├── .gitignore
├── README.md
├── compare_results_real.pdf
├── data
    ├── goodreads
    │   ├── data
    │   │   ├── test
    │   │   │   └── test.json
    │   │   └── train
    │   │   │   └── train.json
    │   ├── data_text
    │   │   ├── test
    │   │   │   └── test.json
    │   │   └── train
    │   │   │   └── train.json
    │   └── generate.ipynb
    ├── mex
    │   ├── data
    │   │   ├── test
    │   │   │   └── test.json
    │   │   └── train
    │   │   │   └── train.json
    │   └── generate.py
    ├── mnist
    │   ├── README.md
    │   └── generate_niid.py
    ├── nist
    │   ├── README.md
    │   ├── data
    │   │   └── my_sample.py
    │   ├── preprocess.sh
    │   ├── preprocess
    │   │   ├── data_to_json.py
    │   │   ├── data_to_json.sh
    │   │   ├── get_data.sh
    │   │   ├── get_file_dirs.py
    │   │   ├── get_hashes.py
    │   │   ├── group_by_writer.py
    │   │   └── match_hashes.py
    │   └── stats.sh
    ├── synthetic_0.25_0.25
    │   ├── data
    │   │   ├── test
    │   │   │   └── mytest.json
    │   │   └── train
    │   │   │   └── mytrain.json
    │   └── synthetic_0.25_0.25.zip
    ├── synthetic_0.5_0.5
    │   ├── README.md
    │   ├── data
    │   │   ├── test
    │   │   │   └── mytest.json
    │   │   └── train
    │   │   │   └── mytrain.json
    │   └── generate_synthetic.py
    ├── synthetic_0.75_0.75
    │   └── data
    │   │   ├── test
    │   │       └── mytest.json
    │   │   └── train
    │   │       └── mytrain.json
    ├── synthetic_0_0
    │   ├── README.md
    │   ├── data
    │   │   ├── test
    │   │   │   └── mytest.json
    │   │   └── train
    │   │   │   └── mytrain.json
    │   ├── generate_synthetic.py
    │   └── synthetic_0_0.zip
    ├── synthetic_1_1
    │   ├── README.md
    │   ├── data
    │   │   ├── test
    │   │   │   └── mytest.json
    │   │   └── train
    │   │   │   └── mytrain.json
    │   └── generate_synthetic.py
    └── synthetic_iid
    │   ├── README.md
    │   ├── data
    │       ├── test
    │       │   └── mytest.json
    │       └── train
    │       │   └── mytrain.json
    │   └── generate_iid.py
├── fedavg_original
    ├── goodreads_20.csv
    ├── goodreads_20_prox.csv.csv
    ├── mex_10.csv
    ├── mex_10_prox.csv
    ├── mnist_20.csv
    ├── mnist_20_prox.csv
    ├── nist_20.csv
    └── nist_20_prox.csv
├── flearn
    ├── models
    │   ├── __init__.py
    │   ├── client.py
    │   ├── goodreads
    │   │   ├── get_embs.py
    │   │   ├── get_embs.sh
    │   │   ├── mclr.py
    │   │   └── rnn.py
    │   ├── mex
    │   │   ├── __init__.py
    │   │   ├── dnn.py
    │   │   └── mclr.py
    │   ├── mnist
    │   │   ├── __init__.py
    │   │   ├── cnn.py
    │   │   └── mclr.py
    │   ├── nist
    │   │   ├── __init__.py
    │   │   ├── cnn.py
    │   │   └── mclr.py
    │   └── synthetic
    │   │   ├── __init__.py
    │   │   └── mclr.py
    ├── optimizer
    │   ├── pgd.py
    │   └── pggd.py
    ├── trainers
    │   ├── __init__.py
    │   ├── fedavg.py
    │   ├── fedbase.py
    │   ├── feddane.py
    │   ├── fedprox.py
    │   └── fedsim.py
    └── utils
    │   ├── __init__.py
    │   ├── language_utils.py
    │   ├── model_utils.py
    │   ├── tf_utils.py
    │   └── utils.py
├── full_results_real.pdf
├── full_results_real_other.pdf
├── full_results_synthetic.pdf
├── images
    ├── compare_results_real.png
    └── full_results_real.png
├── logs
    └── sample
    │   ├── clusters.csv
    │   ├── fed_sim_g_0.csv
    │   ├── fed_sim_g_1.csv
    │   ├── fed_sim_g_2.csv
    │   ├── fed_sim_g_3.csv
    │   ├── fed_sim_g_4.csv
    │   ├── fed_sim_g_5.csv
    │   ├── fed_sim_g_6.csv
    │   ├── fed_sim_g_7.csv
    │   ├── fed_sim_g_8.csv
    │   ├── nist_0_fedsim.csv
    │   ├── nist_0_fedsim.pdf
    │   ├── params.json
    │   └── timetaken.csv
├── main.py
├── plot_fedsim_improvements.py
├── plot_fedsim_main.py
├── plot_fedsim_other.py
├── requirements.txt
├── results
    ├── main
    │   ├── 00.csv
    │   ├── 0505.csv
    │   ├── 11.csv
    │   ├── 2525.csv
    │   ├── 7575.csv
    │   ├── IID.csv
    │   ├── femnist.csv
    │   ├── goodreads.csv
    │   ├── mex.csv
    │   └── mnist.csv
    └── other
    │   ├── femnist.csv
    │   ├── goodreads.csv
    │   ├── mex.csv
    │   ├── mex_cnn.csv
    │   └── mnist.csv
├── run_fedavg.sh
├── run_fedprox.sh
├── run_fedsim.sh
└── utils
    ├── __init__.py
    ├── csv_log.py
    ├── language_utils.py
    ├── model_utils.py
    ├── preprocess.sh
    ├── remove_users.py
    ├── sample.py
    ├── split_data.py
    ├── stats.py
    ├── tf_utils.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | data/
132 | 
133 | logs/
134 | 
135 | PNI/logs/
136 | docs/analysis/logs/
137 | docs/analysis/logs_cnn/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # FedSim
 2 | _Similarity Guided Model Aggregation for Federated Learning_
 3 | 
 4 | This work is published at the **Neurocomputing Journal [FedSim: Similarity guided model aggregation for Federated Learning](https://doi.org/10.1016/j.neucom.2021.08.141)**
 5 | 
 6 | Bibtext
 7 | ```
 8 | @article{palihawadana2021fedsim,
 9 |   title={FedSim: Similarity guided model aggregation for Federated Learning},
10 |   author={Palihawadana, Chamath and Wiratunga, Nirmalie and Wijekoon, Anjana and Kalutarage, Harsha},
11 |   journal={Neurocomputing},
12 |   year={2021},
13 |   publisher={Elsevier}
14 | }
15 | ```
16 | 
17 | 
18 | ## Usage
19 | ```shell
20 | bash run_fedsim.sh DATASET_NAME DROP_PERC NUM_CLUSTERS NUM_CLIENTS Run_Name
21 | bash run_fedsim.sh mnist 0 9 20 mnist_run
22 | ```
23 | 
24 | FedSim algorithm implementation is available in [`flearn/trainers/fedsim.py`](https://github.com/chamathpali/FedSim/blob/main/flearn/trainers/fedsim.py).
25 | 
26 | ## Reproduce results
27 | 
28 | 
29 | The experiments performed on all the datasets were carried out with 35 random seeds (from 0 to 34 incremented by 1) to empirically demonstrate the significance. Repetition of the same experiment with different random seeds helps to reduce the sampling error of our experiments.
30 | 
31 | For a single dataset, run rounds of FedSim, FedAvg and FedProx where each run will generate a single folder in the `logs` folder, as a reference we have added a sample log folder with the results in `logs/sample/`
32 | 
33 | Used hyper parameters for the experiments are presented in Table 2.
34 | 
35 | Once the experiments are completed, create the summary log files with the 3 methods as in `results/` folder. We have added our results in here which can help to refer and use FedSim.
36 | 
37 | 1. Figure 3 - Results on real datasets - `plot_fedsim_main.py`
38 | 
39 | 2. Figure 5 - Accuracy improvements of FedSim - `plot_fedsim_improvements.py`
40 | 
41 | 3. Figure 6 - Results on synthetic datasets - `plot_fedsim_main.py` change line #123 to `if(True)`
42 | 
43 | 4. Figure 7 - Results on other learning models - `plot_fedsim_other.py`
44 | 
45 | ## Experiment setup
46 | We have adapted the experiment setup from [FedProx](https://github.com/litian96/FedProx) and [Leaf Benchmark](https://github.com/TalwalkarLab/leaf) work. Thanks for the support by [Tian Li](https://github.com/litian96).
47 | 
48 | ### Dataset generation
49 | 
50 | For all datasets, see the `README` files in separate `data/$dataset` folders for instructions on preprocessing and/or sampling data.
51 | 
52 | For further clarifications follow the guides on [FedProx](https://github.com/litian96/FedProx) and [Leaf](https://github.com/TalwalkarLab/leaf)
53 | 
54 | The two datasets produced with this work is published with the generation source code.
55 | - [Fed-Mex](https://github.com/chamathpali/Fed-MEx/)
56 | - [Fed-Goodreads](https://github.com/chamathpali/Fed-Goodreads/)
57 | 
58 | ### Downloading dependencies
59 | 
60 | ```
61 | pip3 install -r requirements.txt  
62 | ```
63 | ### Run FedSim Experiments
64 | 
65 | ```shell
66 | bash run_fedsim.sh DATASET_NAME 0 NUM_CLUSTERS NUM_CLIENTS Run_Name
67 | bash run_fedavg.sh mnist 0 9 20 mnist_run
68 | ```
69 | or direcly use the python command
70 | ```shell
71 | python3  -u main.py --dataset='goodreads' --optimizer='fedsim' --learning_rate=0.0001 
72 | --num_rounds=250 --clients_per_round=20 --eval_every=1 --batch_size=10 --num_epochs=10 
73 | --model='rnn' --drop_percent=0 --num_groups=11 --ex_name=goodreads_rnn_0 --seed=0
74 | ```
75 | 
76 | When running on GPU specify the id and then run the experiments
77 | ```
78 | export CUDA_VISIBLE_DEVICES=GPU_ID
79 | ```
80 | 
81 | ### Results
82 | 
83 | 
84 | ![](https://raw.githubusercontent.com/chamathpali/FedSim/main/images/full_results_real.png)
85 | 
86 | _Figure 3: Comparison of performances over communication rounds with real-world datasets_
87 | 
88 | 
89 | ![](https://raw.githubusercontent.com/chamathpali/FedSim/main/images/compare_results_real.png)
90 | 
91 | _Figure 5: Accuracy improvements of FedSim compared to FedAvg and FedProx of experiments in Figure 3. Values below zero indicate negative performance against a baseline and grey vertical lines denote areas of no statistical significance_
92 | 


--------------------------------------------------------------------------------
/compare_results_real.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/compare_results_real.pdf


--------------------------------------------------------------------------------
/data/mnist/README.md:
--------------------------------------------------------------------------------
 1 | # MNIST Dataset
 2 | 
 3 | First download the raw data [here](https://drive.google.com/file/d/1Vp_gJHw4pPqwMUSgodhFOqUglAQyaOGD/view?usp=sharing), put `mnist-original.mat` under the folder `data/mldata/`.
 4 | 
 5 | To generate non-iid data:
 6 | 
 7 | ```
 8 | mkdir test
 9 | mkdir train
10 | python generate_niid.py
11 | ```
12 | 
13 | Or you can download the dataset [here](https://drive.google.com/file/d/1cU_LcBAUZvfZWveOMhG4G5Fg9uFXhVdf/view?usp=sharing), unzip it and put the `train` and `test` folder under `data`.
14 | 
15 | The layout of the folders under `./mnist` should be:
16 | 
17 | ```
18 | | data
19 | 
20 | ----| mldata
21 | 
22 | ---- ----| mnist-original.mat
23 | 
24 | ----| train 
25 | 
26 | ---- ----| train_file_name.json
27 | 
28 | ----| test
29 | 
30 | ---- ----| test_file_name.json
31 | 
32 | | generate_niid.py
33 | | README.md
34 | ```
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/data/mnist/generate_niid.py:
--------------------------------------------------------------------------------
 1 | from sklearn.datasets import fetch_mldata
 2 | from tqdm import trange
 3 | import numpy as np
 4 | import random
 5 | import json
 6 | import os
 7 | 
 8 | # Setup directory for train/test data
 9 | train_path = './data/train/all_data_0_niid_0_keep_10_train_9.json'
10 | test_path = './data/test/all_data_0_niid_0_keep_10_test_9.json'
11 | dir_path = os.path.dirname(train_path)
12 | if not os.path.exists(dir_path):
13 |     os.makedirs(dir_path)
14 | dir_path = os.path.dirname(test_path)
15 | if not os.path.exists(dir_path):
16 |     os.makedirs(dir_path)
17 | 
18 | # Get MNIST data, normalize, and divide by level
19 | mnist = fetch_mldata('MNIST original', data_home='./data')
20 | mu = np.mean(mnist.data.astype(np.float32), 0)
21 | sigma = np.std(mnist.data.astype(np.float32), 0)
22 | mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001)
23 | mnist_data = []
24 | for i in trange(10):
25 |     idx = mnist.target==i
26 |     mnist_data.append(mnist.data[idx])
27 | 
28 | print([len(v) for v in mnist_data])
29 | 
30 | ###### CREATE USER DATA SPLIT #######
31 | # Assign 10 samples to each user
32 | X = [[] for _ in range(1000)]
33 | y = [[] for _ in range(1000)]
34 | idx = np.zeros(10, dtype=np.int64)
35 | for user in range(1000):
36 |     for j in range(2):
37 |         l = (user+j)%10
38 |         X[user] += mnist_data[l][idx[l]:idx[l]+5].tolist()
39 |         y[user] += (l*np.ones(5)).tolist()
40 |         idx[l] += 5
41 | print(idx)
42 | 
43 | # Assign remaining sample by power law
44 | user = 0
45 | props = np.random.lognormal(0, 2.0, (10,100,2))
46 | props = np.array([[[len(v)-1000]] for v in mnist_data])*props/np.sum(props,(1,2), keepdims=True)
47 | #idx = 1000*np.ones(10, dtype=np.int64)
48 | for user in trange(1000):
49 |     for j in range(2):
50 |         l = (user+j)%10
51 |         num_samples = int(props[l,user//10,j])
52 |         #print(num_samples)
53 |         if idx[l] + num_samples < len(mnist_data[l]):
54 |             X[user] += mnist_data[l][idx[l]:idx[l]+num_samples].tolist()
55 |             y[user] += (l*np.ones(num_samples)).tolist()
56 |             idx[l] += num_samples
57 | 
58 | print(idx)
59 | 
60 | # Create data structure
61 | train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
62 | test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
63 | 
64 | # Setup 1000 users
65 | for i in trange(1000, ncols=120):
66 |     uname = 'f_{0:05d}'.format(i)
67 |     
68 |     combined = list(zip(X[i], y[i]))
69 |     random.shuffle(combined)
70 |     X[i][:], y[i][:] = zip(*combined)
71 |     num_samples = len(X[i])
72 |     train_len = int(0.9*num_samples)
73 |     test_len = num_samples - train_len
74 |     
75 |     train_data['users'].append(uname) 
76 |     train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
77 |     train_data['num_samples'].append(train_len)
78 |     test_data['users'].append(uname)
79 |     test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
80 |     test_data['num_samples'].append(test_len)
81 | 
82 | print(train_data['num_samples'])
83 | print(sum(train_data['num_samples']))
84 |     
85 | with open(train_path,'w') as outfile:
86 |     json.dump(train_data, outfile)
87 | with open(test_path, 'w') as outfile:
88 |     json.dump(test_data, outfile)
89 | 


--------------------------------------------------------------------------------
/data/nist/README.md:
--------------------------------------------------------------------------------
 1 | # FEMNIST Dataset
 2 | 
 3 | ## Setup Instructions
 4 | 
 5 | 
 6 | You can download the dataset [here](https://drive.google.com/file/d/1tCEcJgRJ8NdRo11UJZR6WSKMNdmox4GC/view?usp=sharing), unzip it and put the `train` and `test` folder under `data`.
 7 | 
 8 | 
 9 | The FEMNIST data we used in the paper is a subsampled (and repartitioned) version of the original full dataset in order to impose additional statistical heterogeneity. The above dataset is generated by the following instruction:
10 | 
11 | (1) First,
12 | 
13 | Run preprocess.sh with a choice of the following tags:
14 | 
15 | - ```-s``` := 'iid' to sample in an i.i.d. manner, or 'niid' to sample in a non-i.i.d. manner; more information on i.i.d. versus non-i.i.d. is included in the 'Notes' section
16 | - ```--iu``` := number of users, if iid sampling; expressed as a fraction of the total number of users; default is 0.01
17 | - ```--sf``` := fraction of data to sample, written as a decimal; default is 0.1
18 | - ```-k``` := minimum number of samples per user
19 | - ```-t``` := 'user' to partition users into train-test groups, or 'sample' to partition each user's samples into train-test groups
20 | - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9
21 | 
22 | 
23 | And then run:
24 | 
25 | ```
26 | ./preprocess.sh -s niid --sf 0.5 -k 0 -tf 0.8 -t sample
27 | ```
28 | 
29 | 
30 | (Make sure to delete the rem\_user\_data, sampled\_data, test, and train subfolders in the data directory before re-running preprocess.sh.)
31 | 
32 | (2) And then re-partition the data:
33 | 
34 | ```
35 | cd data
36 | python my_sample.py
37 | ```
38 | 
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/data/nist/data/my_sample.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import json
  3 | import math
  4 | import numpy as np
  5 | import os
  6 | import sys
  7 | import random
  8 | from tqdm import trange
  9 | 
 10 | from PIL import Image
 11 | 
 12 | NUM_USER = 200
 13 | CLASS_PER_USER = 3  # from 10 lowercase characters
 14 | 
 15 | 
 16 | def relabel_class(c):
 17 |     '''
 18 |     maps hexadecimal class value (string) to a decimal number
 19 |     returns:
 20 |     - 0 through 9 for classes representing respective numbers
 21 |     - 10 through 35 for classes representing respective uppercase letters
 22 |     - 36 through 61 for classes representing respective lowercase letters
 23 |     '''
 24 |     if c.isdigit() and int(c) < 40:
 25 |         return (int(c) - 30)
 26 |     elif int(c, 16) <= 90: # uppercase
 27 |         return (int(c, 16) - 55)
 28 |     else:
 29 |         return (int(c, 16) - 61) # lowercase
 30 | 
 31 | def load_image(file_name):
 32 |     '''read in a png
 33 |     Return: a flatted list representing the image
 34 |     '''
 35 |     size = (28, 28)
 36 |     img = Image.open(file_name)
 37 |     gray = img.convert('L')
 38 |     gray.thumbnail(size, Image.ANTIALIAS)
 39 |     arr = np.asarray(gray).copy()
 40 |     vec = arr.flatten()
 41 |     vec = vec / 255 # scale all pixel values to between 0 and 1
 42 |     vec = vec.tolist()
 43 | 
 44 |     return vec
 45 | 
 46 | 
 47 | def main():
 48 |     file_dir = "raw_data/by_class"
 49 | 
 50 |     train_path = "train/mytrain.json"
 51 |     test_path = "test/mytest.json"
 52 | 
 53 |     X = [[] for _ in range(NUM_USER)]  
 54 |     y = [[] for _ in range(NUM_USER)]
 55 | 
 56 |     nist_data = {}
 57 | 
 58 | 
 59 |     for class_ in os.listdir(file_dir):
 60 | 
 61 |         real_class = relabel_class(class_)
 62 |         if real_class >= 36 and real_class <= 45:
 63 |             full_img_path = file_dir + "/" + class_ + "/train_" + class_
 64 |             all_files_this_class = os.listdir(full_img_path)
 65 |             random.shuffle(all_files_this_class)
 66 |             sampled_files_this_class = all_files_this_class[:4000]
 67 |             imgs = []
 68 |             for img in sampled_files_this_class:
 69 |                 imgs.append(load_image(full_img_path + "/" + img))
 70 |             class_ = relabel_class(class_)
 71 |             print(class_)
 72 |             nist_data[class_-36] = imgs  # a list of list, key is (0, 25)
 73 |             print(len(imgs))
 74 | 
 75 |     num_samples = np.random.lognormal(4, 1, (NUM_USER)) + 5
 76 | 
 77 |     idx = np.zeros(10, dtype=np.int64)
 78 | 
 79 |     for user in range(NUM_USER):
 80 |         num_sample_per_class = int(num_samples[user] / CLASS_PER_USER)
 81 |         if num_sample_per_class < 2:
 82 |             num_sample_per_class = 2
 83 | 
 84 |         for j in range(CLASS_PER_USER):
 85 |             class_id = (user + j) % 10
 86 |             if idx[class_id] + num_sample_per_class < len(nist_data[class_id]):
 87 |                 idx[class_id] = 0
 88 |             X[user] += nist_data[class_id][idx[class_id]: (idx[class_id] + num_sample_per_class)]
 89 |             y[user] += (class_id * np.ones(num_sample_per_class)).tolist()
 90 |             idx[class_id] += num_sample_per_class
 91 |     
 92 |     # Create data structure
 93 |     train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 94 |     test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 95 |     
 96 |     for i in trange(NUM_USER, ncols=120):
 97 |         uname = 'f_{0:05d}'.format(i)
 98 |         
 99 |         combined = list(zip(X[i], y[i]))
100 |         random.shuffle(combined)
101 |         X[i][:], y[i][:] = zip(*combined)
102 |         num_samples = len(X[i])
103 |         train_len = int(0.9 * num_samples)
104 |         test_len = num_samples - train_len
105 |         
106 |         train_data['users'].append(uname) 
107 |         train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
108 |         train_data['num_samples'].append(train_len)
109 |         test_data['users'].append(uname)
110 |         test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
111 |         test_data['num_samples'].append(test_len)
112 | 
113 |     with open(train_path, 'w') as outfile:
114 |         json.dump(train_data, outfile)
115 |     with open(test_path, 'w') as outfile:
116 |         json.dump(test_data, outfile)
117 | 
118 | 
119 | if __name__ == "__main__":
120 |     main()
121 | 
122 | 


--------------------------------------------------------------------------------
/data/nist/preprocess.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #rm -rf rem_user_data sampled_data test train
 4 | 
 5 | # download data and convert to .json format
 6 | 
 7 | if [ ! -d "data/all_data" ] || [ ! "$(ls -A data/all_data)" ]; then
 8 |     cd preprocess
 9 |     ./data_to_json.sh
10 |     cd ..
11 | fi
12 | 
13 | NAME="nist" # name of the dataset, equivalent to directory name
14 | 
15 | cd ../../utils
16 | 
17 | # ./preprocess.sh -s niid --sf 0.05 -k 64 -t sample
18 | # ./preprocess.sh --name nist -s niid --sf 1.0 -k 0 -t sample
19 | # ./preprocess.sh --name sent140 -s niid --sf 1.0 -k 1 -t sample
20 | ./preprocess.sh --name $NAME $@
21 | 
22 | cd ../data/$NAME
23 | 


--------------------------------------------------------------------------------
/data/nist/preprocess/data_to_json.py:
--------------------------------------------------------------------------------
 1 | # Converts a list of (writer, [list of (file,class)]) tuples into a json object
 2 | # of the form:
 3 | #   {users: [bob, etc], num_samples: [124, etc.],
 4 | #   user_data: {bob : {x:[img1,img2,etc], y:[class1,class2,etc]}, etc}}
 5 | # where 'img_' is a vectorized representation of the corresponding image
 6 | 
 7 | from __future__ import division
 8 | import json
 9 | import math
10 | import numpy as np
11 | import os
12 | import sys
13 | 
14 | from PIL import Image
15 | 
16 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
17 | utils_dir = os.path.join(utils_dir, 'utils')
18 | sys.path.append(utils_dir)
19 | 
20 | import utils
21 | 
22 | 
23 | MAX_WRITERS = 100 # max number of writers per json file.
24 | 
25 | 
26 | def relabel_class(c):
27 |     '''
28 |     maps hexadecimal class value (string) to a decimal number
29 |     returns:
30 |     - 0 through 9 for classes representing respective numbers
31 |     - 10 through 35 for classes representing respective uppercase letters
32 |     - 36 through 61 for classes representing respective lowercase letters
33 |     '''
34 |     if c.isdigit() and int(c) < 40:
35 |         return (int(c) - 30)
36 |     elif int(c, 16) <= 90: # uppercase
37 |         return (int(c, 16) - 55)
38 |     else:
39 |         return (int(c, 16) - 61)
40 | 
41 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
42 | 
43 | ibwd = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer')
44 | writers = utils.load_obj(ibwd)
45 | 
46 | num_json = int(math.ceil(len(writers) / MAX_WRITERS))
47 | 
48 | users = [[] for _ in range(num_json)]
49 | num_samples = [[] for _ in range(num_json)]
50 | user_data = [{} for _ in range(num_json)]
51 | 
52 | writer_count = 0
53 | json_index = 0
54 | for (w, l) in writers:
55 | 
56 |     users[json_index].append(w)
57 |     num_samples[json_index].append(len(l))
58 |     user_data[json_index][w] = {'x': [], 'y': []}
59 | 
60 |     size = 28, 28 # original image size is 128, 128
61 |     for (f, c) in l:
62 |         file_path = os.path.join(parent_path, f)
63 |         img = Image.open(file_path)
64 |         gray = img.convert('L')
65 |         gray.thumbnail(size, Image.ANTIALIAS)
66 |         arr = np.asarray(gray).copy()
67 |         vec = arr.flatten()
68 |         vec = vec / 255 # scale all pixel values to between 0 and 1
69 |         vec = vec.tolist()
70 | 
71 |         nc = relabel_class(c)
72 | 
73 |         user_data[json_index][w]['x'].append(vec)
74 |         user_data[json_index][w]['y'].append(nc)
75 | 
76 |     writer_count += 1
77 |     if writer_count == MAX_WRITERS:
78 | 
79 |         all_data = {}
80 |         all_data['users'] = users[json_index]
81 |         all_data['num_samples'] = num_samples[json_index]
82 |         all_data['user_data'] = user_data[json_index]
83 | 
84 |         file_name = 'all_data_%d.json' % json_index
85 |         file_path = os.path.join(parent_path, 'data', 'all_data', file_name)
86 | 
87 |         print('writing %s' % file_name)
88 | 
89 |         with open(file_path, 'w') as outfile:
90 |             json.dump(all_data, outfile)
91 | 
92 |         writer_count = 0
93 |         json_index += 1
94 | 


--------------------------------------------------------------------------------
/data/nist/preprocess/data_to_json.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # assumes that the script is run in the preprocess folder
 4 | 
 5 | if [ ! -d "../data" ]; then
 6 |   mkdir ../data
 7 | fi
 8 | if [ ! -d "../data/raw_data" ]; then
 9 |   echo "------------------------------"
10 |   echo "downloading data"
11 |   mkdir ../data/raw_data
12 |   ./get_data.sh
13 |   echo "finished downloading data"
14 | fi
15 | 
16 | if [ ! -d "../data/intermediate" ]; then # stores .pkl files during preprocessing
17 |   mkdir ../data/intermediate
18 | fi
19 | 
20 | if [ ! -f ../data/intermediate/class_file_dirs.pkl ]; then
21 |   echo "------------------------------"
22 |   echo "extracting file directories of images"
23 |   python3 get_file_dirs.py
24 |   echo "finished extracting file directories of images"
25 | fi
26 | 
27 | if [ ! -f ../data/intermediate/class_file_hashes.pkl ]; then
28 |   echo "------------------------------"
29 |   echo "calculating image hashes"
30 |   python3 get_hashes.py
31 |   echo "finished calculating image hashes"
32 | fi
33 | 
34 | if [ ! -f ../data/intermediate/write_with_class.pkl ]; then
35 |   echo "------------------------------"
36 |   echo "assigning class labels to write images"
37 |   python3 match_hashes.py
38 |   echo "finished assigning class labels to write images"
39 | fi
40 | 
41 | if [ ! -f ../data/intermediate/images_by_writer.pkl ]; then
42 |   echo "------------------------------"
43 |   echo "grouping images by writer"
44 |   python3 group_by_writer.py
45 |   echo "finished grouping images by writer"
46 | fi
47 | 
48 | if [ ! -d "../data/all_data" ]; then
49 |   mkdir ../data/all_data
50 | fi
51 | if [ ! "$(ls -A ../data/all_data)" ]; then
52 |   echo "------------------------------"
53 |   echo "converting data to .json format"
54 |   python3 data_to_json.py
55 |   echo "finished converting data to .json format"
56 | fi
57 | 


--------------------------------------------------------------------------------
/data/nist/preprocess/get_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # assumes that the script is run in the preprocess folder
 4 | 
 5 | cd ../data/raw_data
 6 | wget https://s3.amazonaws.com/nist-srd/SD19/by_class.zip
 7 | wget https://s3.amazonaws.com/nist-srd/SD19/by_write.zip
 8 | unzip by_class.zip
 9 | rm by_class.zip
10 | unzip by_write.zip
11 | rm by_write.zip
12 | cd ../../preprocess
13 | 


--------------------------------------------------------------------------------
/data/nist/preprocess/get_file_dirs.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Creates .pkl files for:
 3 | 1. list of directories of every image in 'by_class'
 4 | 2. list of directories of every image in 'by_write'
 5 | the hierarchal structure of the data is as follows:
 6 | - by_class -> classes -> folders containing images -> images
 7 | - by_write -> folders containing writers -> writer -> types of images -> images
 8 | the directories written into the files are of the form 'raw_data/...'
 9 | '''
10 | 
11 | import os
12 | import sys
13 | 
14 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
15 | utils_dir = os.path.join(utils_dir, 'utils')
16 | sys.path.append(utils_dir)
17 | 
18 | import utils
19 | 
20 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
21 | 
22 | class_files = [] # (class, file directory)
23 | write_files = [] # (writer, file directory)
24 | 
25 | class_dir = os.path.join(parent_path, 'data', 'raw_data', 'by_class')
26 | rel_class_dir = os.path.join('data', 'raw_data', 'by_class')
27 | classes = os.listdir(class_dir)
28 | 
29 | for cl in classes:
30 |     cldir = os.path.join(class_dir, cl)
31 |     rel_cldir = os.path.join(rel_class_dir, cl)
32 |     subcls = os.listdir(cldir)
33 | 
34 |     subcls = [s for s in subcls if (('hsf' in s) and ('mit' not in s))]
35 | 
36 |     for subcl in subcls:
37 |         subcldir = os.path.join(cldir, subcl)
38 |         rel_subcldir = os.path.join(rel_cldir, subcl)
39 |         images = os.listdir(subcldir)
40 |         image_dirs = [os.path.join(rel_subcldir, i) for i in images]
41 | 
42 |         for image_dir in image_dirs:
43 |             class_files.append((cl, image_dir))
44 | 
45 | write_dir = os.path.join(parent_path, 'data', 'raw_data', 'by_write')
46 | rel_write_dir = os.path.join('data', 'raw_data', 'by_write')
47 | write_parts = os.listdir(write_dir)
48 | 
49 | for write_part in write_parts:
50 |     writers_dir = os.path.join(write_dir, write_part)
51 |     rel_writers_dir = os.path.join(rel_write_dir, write_part)
52 |     writers = os.listdir(writers_dir)
53 | 
54 |     for writer in writers:
55 |         writer_dir = os.path.join(writers_dir, writer)
56 |         rel_writer_dir = os.path.join(rel_writers_dir, writer)
57 |         wtypes = os.listdir(writer_dir)
58 | 
59 |         for wtype in wtypes:
60 |             type_dir = os.path.join(writer_dir, wtype)
61 |             rel_type_dir = os.path.join(rel_writer_dir, wtype)
62 |             images = os.listdir(type_dir)
63 |             image_dirs = [os.path.join(rel_type_dir, i) for i in images]
64 | 
65 |             for image_dir in image_dirs:
66 |                 write_files.append((writer, image_dir))
67 | 
68 | utils.save_obj(
69 |     class_files,
70 |     os.path.join(parent_path, 'data', 'intermediate', 'class_file_dirs'))
71 | utils.save_obj(
72 |     write_files,
73 |     os.path.join(parent_path, 'data', 'intermediate', 'write_file_dirs'))
74 | 


--------------------------------------------------------------------------------
/data/nist/preprocess/get_hashes.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import os
 3 | import sys
 4 | 
 5 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 6 | utils_dir = os.path.join(utils_dir, 'utils')
 7 | sys.path.append(utils_dir)
 8 | 
 9 | import utils
10 | 
11 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
12 | 
13 | cfd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_dirs')
14 | wfd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_dirs')
15 | class_file_dirs = utils.load_obj(cfd)
16 | write_file_dirs = utils.load_obj(wfd)
17 | 
18 | class_file_hashes = []
19 | write_file_hashes = []
20 | 
21 | count = 0
22 | for tup in class_file_dirs:
23 |     if (count%100000 == 0):
24 |         print('hashed %d class images' % count)
25 | 
26 |     (cclass, cfile) = tup
27 |     file_path = os.path.join(parent_path, cfile)
28 | 
29 |     chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest()
30 | 
31 |     class_file_hashes.append((cclass, cfile, chash))
32 | 
33 |     count += 1
34 | 
35 | cfhd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_hashes')
36 | utils.save_obj(class_file_hashes, cfhd)
37 | 
38 | count = 0
39 | for tup in write_file_dirs:
40 |     if (count%100000 == 0):
41 |         print('hashed %d write images' % count)
42 | 
43 |     (cclass, cfile) = tup
44 |     file_path = os.path.join(parent_path, cfile)
45 | 
46 |     chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest()
47 | 
48 |     write_file_hashes.append((cclass, cfile, chash))
49 | 
50 |     count += 1
51 | 
52 | wfhd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_hashes')
53 | utils.save_obj(write_file_hashes, wfhd)
54 | 


--------------------------------------------------------------------------------
/data/nist/preprocess/group_by_writer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 5 | utils_dir = os.path.join(utils_dir, 'utils')
 6 | sys.path.append(utils_dir)
 7 | 
 8 | import utils
 9 | 
10 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
11 | 
12 | wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class')
13 | write_class = utils.load_obj(wwcd)
14 | 
15 | writers = [] # each entry is a (writer, [list of (file, class)]) tuple
16 | cimages = []
17 | (cw, _, _) = write_class[0]
18 | for (w, f, c) in write_class:
19 |     if w != cw:
20 |         writers.append((cw, cimages))
21 |         cw = w
22 |         cimages = [(f, c)]
23 |     cimages.append((f, c))
24 | writers.append((cw, cimages))
25 | 
26 | ibwd = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer')
27 | utils.save_obj(writers, ibwd)
28 | 


--------------------------------------------------------------------------------
/data/nist/preprocess/match_hashes.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 5 | utils_dir = os.path.join(utils_dir, 'utils')
 6 | sys.path.append(utils_dir)
 7 | 
 8 | import utils
 9 | 
10 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
11 | 
12 | cfhd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_hashes')
13 | wfhd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_hashes')
14 | class_file_hashes = utils.load_obj(cfhd) # each elem is (class, file dir, hash)
15 | write_file_hashes = utils.load_obj(wfhd) # each elem is (writer, file dir, hash)
16 | 
17 | class_hash_dict = {}
18 | for i in range(len(class_file_hashes)):
19 |     (c, f, h) = class_file_hashes[len(class_file_hashes)-i-1]
20 |     class_hash_dict[h] = (c, f)
21 | 
22 | write_classes = []
23 | for tup in write_file_hashes:
24 |     (w, f, h) = tup
25 |     write_classes.append((w, f, class_hash_dict[h][0]))
26 | 
27 | wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class')
28 | utils.save_obj(write_classes, wwcd)
29 | 


--------------------------------------------------------------------------------
/data/nist/stats.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | NAME="nist"
4 | 
5 | cd ../../utils
6 | 
7 | python3 stats.py --name $NAME
8 | 
9 | cd ../data/$NAME


--------------------------------------------------------------------------------
/data/synthetic_0.25_0.25/synthetic_0.25_0.25.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/data/synthetic_0.25_0.25/synthetic_0.25_0.25.zip


--------------------------------------------------------------------------------
/data/synthetic_0.5_0.5/README.md:
--------------------------------------------------------------------------------
1 | ```
2 | python generate_synthetic.py
3 | ```


--------------------------------------------------------------------------------
/data/synthetic_0.5_0.5/generate_synthetic.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import math
  3 | import numpy as np
  4 | import os
  5 | import sys
  6 | import random
  7 | from tqdm import trange
  8 | import math
  9 | 
 10 | 
 11 | NUM_USER = 30
 12 | 
 13 | def softmax(x):
 14 |     ex = np.exp(x)
 15 |     sum_ex = np.sum( np.exp(x))
 16 |     return ex/sum_ex
 17 | 
 18 | 
 19 | def generate_synthetic(alpha, beta, iid):
 20 | 
 21 |     dimension = 60
 22 |     NUM_CLASS = 10
 23 |     
 24 |     samples_per_user = np.random.lognormal(4, 2, (NUM_USER)).astype(int) + 50
 25 |     print(samples_per_user)
 26 |     num_samples = np.sum(samples_per_user)
 27 | 
 28 |     X_split = [[] for _ in range(NUM_USER)]
 29 |     y_split = [[] for _ in range(NUM_USER)]
 30 | 
 31 | 
 32 |     #### define some eprior ####
 33 |     mean_W = np.random.normal(0, alpha, NUM_USER)
 34 |     mean_b = mean_W
 35 |     B = np.random.normal(0, beta, NUM_USER)
 36 |     mean_x = np.zeros((NUM_USER, dimension))
 37 | 
 38 |     diagonal = np.zeros(dimension)
 39 |     for j in range(dimension):
 40 |         diagonal[j] = np.power((j+1), -1.2)
 41 |     cov_x = np.diag(diagonal)
 42 | 
 43 |     for i in range(NUM_USER):
 44 |         if iid == 1:
 45 |             mean_x[i] = np.ones(dimension) * B[i]  # all zeros
 46 |         else:
 47 |             mean_x[i] = np.random.normal(B[i], 1, dimension)
 48 |         print(mean_x[i])
 49 | 
 50 |     if iid == 1:
 51 |         W_global = np.random.normal(0, 1, (dimension, NUM_CLASS))
 52 |         b_global = np.random.normal(0, 1,  NUM_CLASS)
 53 | 
 54 |     for i in range(NUM_USER):
 55 | 
 56 |         W = np.random.normal(mean_W[i], 1, (dimension, NUM_CLASS))
 57 |         b = np.random.normal(mean_b[i], 1,  NUM_CLASS)
 58 | 
 59 |         if iid == 1:
 60 |             W = W_global
 61 |             b = b_global
 62 | 
 63 |         xx = np.random.multivariate_normal(mean_x[i], cov_x, samples_per_user[i])
 64 |         yy = np.zeros(samples_per_user[i])
 65 | 
 66 |         for j in range(samples_per_user[i]):
 67 |             tmp = np.dot(xx[j], W) + b
 68 |             yy[j] = np.argmax(softmax(tmp))
 69 | 
 70 |         X_split[i] = xx.tolist()
 71 |         y_split[i] = yy.tolist()
 72 | 
 73 |         print("{}-th users has {} exampls".format(i, len(y_split[i])))
 74 | 
 75 | 
 76 |     return X_split, y_split
 77 | 
 78 | 
 79 | 
 80 | def main():
 81 | 
 82 | 
 83 |     train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 84 |     test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 85 | 
 86 |     train_path = "data/train/mytrain.json"
 87 |     test_path = "data/test/mytest.json"
 88 | 
 89 |     #X, y = generate_synthetic(alpha=0, beta=0, iid=0)     # synthetiv (0,0)
 90 |     X, y = generate_synthetic(alpha=0.5, beta=0.5, iid=0) # synthetic (0.5, 0.5)
 91 |     #X, y = generate_synthetic(alpha=1, beta=1, iid=0)     # synthetic (1,1)
 92 |     #X, y = generate_synthetic(alpha=0, beta=0, iid=1)      # synthetic_IID
 93 | 
 94 | 
 95 |     # Create data structure
 96 |     train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 97 |     test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 98 |     
 99 |     for i in trange(NUM_USER, ncols=120):
100 | 
101 |         uname = 'f_{0:05d}'.format(i)        
102 |         combined = list(zip(X[i], y[i]))
103 |         random.shuffle(combined)
104 |         X[i][:], y[i][:] = zip(*combined)
105 |         num_samples = len(X[i])
106 |         train_len = int(0.9 * num_samples)
107 |         test_len = num_samples - train_len
108 |         
109 |         train_data['users'].append(uname) 
110 |         train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
111 |         train_data['num_samples'].append(train_len)
112 |         test_data['users'].append(uname)
113 |         test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
114 |         test_data['num_samples'].append(test_len)
115 |     
116 | 
117 |     with open(train_path,'w') as outfile:
118 |         json.dump(train_data, outfile)
119 |     with open(test_path, 'w') as outfile:
120 |         json.dump(test_data, outfile)
121 | 
122 | 
123 | if __name__ == "__main__":
124 |     main()
125 | 
126 | 


--------------------------------------------------------------------------------
/data/synthetic_0_0/README.md:
--------------------------------------------------------------------------------
1 | ```
2 | python generate_synthetic.py
3 | ```


--------------------------------------------------------------------------------
/data/synthetic_0_0/generate_synthetic.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import math
  3 | import numpy as np
  4 | import os
  5 | import sys
  6 | import random
  7 | from tqdm import trange
  8 | import math
  9 | 
 10 | 
 11 | NUM_USER = 30
 12 | 
 13 | def softmax(x):
 14 |     ex = np.exp(x)
 15 |     sum_ex = np.sum( np.exp(x))
 16 |     return ex/sum_ex
 17 | 
 18 | 
 19 | def generate_synthetic(alpha, beta, iid):
 20 | 
 21 |     dimension = 60
 22 |     NUM_CLASS = 10
 23 |     
 24 |     samples_per_user = np.random.lognormal(4, 2, (NUM_USER)).astype(int) + 50
 25 |     print(samples_per_user)
 26 |     num_samples = np.sum(samples_per_user)
 27 | 
 28 |     X_split = [[] for _ in range(NUM_USER)]
 29 |     y_split = [[] for _ in range(NUM_USER)]
 30 | 
 31 | 
 32 |     #### define some eprior ####
 33 |     mean_W = np.random.normal(0, alpha, NUM_USER)
 34 |     mean_b = mean_W
 35 |     B = np.random.normal(0, beta, NUM_USER)
 36 |     mean_x = np.zeros((NUM_USER, dimension))
 37 | 
 38 |     diagonal = np.zeros(dimension)
 39 |     for j in range(dimension):
 40 |         diagonal[j] = np.power((j+1), -1.2)
 41 |     cov_x = np.diag(diagonal)
 42 | 
 43 |     for i in range(NUM_USER):
 44 |         if iid == 1:
 45 |             mean_x[i] = np.ones(dimension) * B[i]  # all zeros
 46 |         else:
 47 |             mean_x[i] = np.random.normal(B[i], 1, dimension)
 48 |         print(mean_x[i])
 49 | 
 50 |     if iid == 1:
 51 |         W_global = np.random.normal(0, 1, (dimension, NUM_CLASS))
 52 |         b_global = np.random.normal(0, 1,  NUM_CLASS)
 53 | 
 54 |     for i in range(NUM_USER):
 55 | 
 56 |         W = np.random.normal(mean_W[i], 1, (dimension, NUM_CLASS))
 57 |         b = np.random.normal(mean_b[i], 1,  NUM_CLASS)
 58 | 
 59 |         if iid == 1:
 60 |             W = W_global
 61 |             b = b_global
 62 | 
 63 |         xx = np.random.multivariate_normal(mean_x[i], cov_x, samples_per_user[i])
 64 |         yy = np.zeros(samples_per_user[i])
 65 | 
 66 |         for j in range(samples_per_user[i]):
 67 |             tmp = np.dot(xx[j], W) + b
 68 |             yy[j] = np.argmax(softmax(tmp))
 69 | 
 70 |         X_split[i] = xx.tolist()
 71 |         y_split[i] = yy.tolist()
 72 | 
 73 |         print("{}-th users has {} exampls".format(i, len(y_split[i])))
 74 | 
 75 | 
 76 |     return X_split, y_split
 77 | 
 78 | 
 79 | 
 80 | def main():
 81 | 
 82 | 
 83 |     train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 84 |     test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 85 | 
 86 |     train_path = "data/train/mytrain.json"
 87 |     test_path = "data/test/mytest.json"
 88 | 
 89 |     X, y = generate_synthetic(alpha=0, beta=0, iid=0)     # synthetiv (0,0)
 90 |     #X, y = generate_synthetic(alpha=0.5, beta=0.5, iid=0) # synthetic (0.5, 0.5)
 91 |     #X, y = generate_synthetic(alpha=1, beta=1, iid=0)     # synthetic (1,1)
 92 |     #X, y = generate_synthetic(alpha=0, beta=0, iid=1)      # synthetic_IID
 93 | 
 94 | 
 95 |     # Create data structure
 96 |     train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 97 |     test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 98 |     
 99 |     for i in trange(NUM_USER, ncols=120):
100 | 
101 |         uname = 'f_{0:05d}'.format(i)        
102 |         combined = list(zip(X[i], y[i]))
103 |         random.shuffle(combined)
104 |         X[i][:], y[i][:] = zip(*combined)
105 |         num_samples = len(X[i])
106 |         train_len = int(0.9 * num_samples)
107 |         test_len = num_samples - train_len
108 |         
109 |         train_data['users'].append(uname) 
110 |         train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
111 |         train_data['num_samples'].append(train_len)
112 |         test_data['users'].append(uname)
113 |         test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
114 |         test_data['num_samples'].append(test_len)
115 |     
116 | 
117 |     with open(train_path,'w') as outfile:
118 |         json.dump(train_data, outfile)
119 |     with open(test_path, 'w') as outfile:
120 |         json.dump(test_data, outfile)
121 | 
122 | 
123 | if __name__ == "__main__":
124 |     main()
125 | 
126 | 


--------------------------------------------------------------------------------
/data/synthetic_0_0/synthetic_0_0.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/data/synthetic_0_0/synthetic_0_0.zip


--------------------------------------------------------------------------------
/data/synthetic_1_1/README.md:
--------------------------------------------------------------------------------
1 | ```
2 | python generate_synthetic.py
3 | ```


--------------------------------------------------------------------------------
/data/synthetic_1_1/generate_synthetic.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import math
  3 | import numpy as np
  4 | import os
  5 | import sys
  6 | import random
  7 | from tqdm import trange
  8 | import math
  9 | 
 10 | 
 11 | NUM_USER = 30
 12 | 
 13 | 
 14 | def softmax(x):
 15 |     ex = np.exp(x)
 16 |     sum_ex = np.sum( np.exp(x))
 17 |     return ex/sum_ex
 18 | 
 19 | 
 20 | def generate_synthetic(alpha, beta, iid):
 21 | 
 22 |     dimension = 60
 23 |     NUM_CLASS = 10
 24 | 
 25 |     samples_per_user = np.random.lognormal(4, 2, (NUM_USER)).astype(int) + 50
 26 |     print(samples_per_user)
 27 |     num_samples = np.sum(samples_per_user)
 28 | 
 29 |     X_split = [[] for _ in range(NUM_USER)]
 30 |     y_split = [[] for _ in range(NUM_USER)]
 31 | 
 32 | 
 33 |     #### define some eprior ####
 34 |     mean_W = np.random.normal(0, alpha, NUM_USER)
 35 |     mean_b = mean_W
 36 |     B = np.random.normal(0, beta, NUM_USER)
 37 |     mean_x = np.zeros((NUM_USER, dimension))
 38 | 
 39 |     diagonal = np.zeros(dimension)
 40 |     for j in range(dimension):
 41 |         diagonal[j] = np.power((j+1), -1.2)
 42 |     cov_x = np.diag(diagonal)
 43 | 
 44 |     for i in range(NUM_USER):
 45 |         mean_x[i] = np.random.normal(B[i], 1, dimension)
 46 |         print(mean_x[i])
 47 | 
 48 | 
 49 |     for i in range(NUM_USER):
 50 | 
 51 |         W = np.random.normal(mean_W[i], 1, (dimension, NUM_CLASS))
 52 |         b = np.random.normal(mean_b[i], 1,  NUM_CLASS)
 53 | 
 54 |         xx = np.random.multivariate_normal(mean_x[i], cov_x, samples_per_user[i])
 55 |         yy = np.zeros(samples_per_user[i])
 56 | 
 57 |         for j in range(samples_per_user[i]):
 58 |             tmp = np.dot(xx[j], W) + b
 59 |             yy[j] = np.argmax(softmax(tmp))
 60 | 
 61 |         X_split[i] = xx.tolist()
 62 |         y_split[i] = yy.tolist()
 63 | 
 64 |         print("{}-th users has {} exampls".format(i, len(y_split[i])))
 65 | 
 66 | 
 67 |     return X_split, y_split
 68 | 
 69 | 
 70 | 
 71 | def main():
 72 | 
 73 | 
 74 |     train_path = "data/train/mytrain.json"
 75 |     test_path = "data/test/mytest.json"
 76 | 
 77 |     X, y = generate_synthetic(alpha=1, beta=1, iid=0)     # synthetic (1,1)
 78 | 
 79 | 
 80 |     # Create data structure
 81 |     train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 82 |     test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
 83 |     
 84 |     for i in trange(NUM_USER, ncols=120):
 85 | 
 86 |         uname = 'f_{0:05d}'.format(i)        
 87 |         combined = list(zip(X[i], y[i]))
 88 |         random.shuffle(combined)
 89 |         X[i][:], y[i][:] = zip(*combined)
 90 |         num_samples = len(X[i])
 91 |         train_len = int(0.9 * num_samples)
 92 |         test_len = num_samples - train_len
 93 |         
 94 |         train_data['users'].append(uname) 
 95 |         train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
 96 |         train_data['num_samples'].append(train_len)
 97 |         test_data['users'].append(uname)
 98 |         test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
 99 |         test_data['num_samples'].append(test_len)
100 |     
101 | 
102 |     with open(train_path,'w') as outfile:
103 |         json.dump(train_data, outfile)
104 |     with open(test_path, 'w') as outfile:
105 |         json.dump(test_data, outfile)
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     main()
110 | 
111 | 


--------------------------------------------------------------------------------
/data/synthetic_iid/README.md:
--------------------------------------------------------------------------------
1 | ```
2 | python generate_synthetic.py
3 | ```


--------------------------------------------------------------------------------
/data/synthetic_iid/generate_iid.py:
--------------------------------------------------------------------------------
 1 | import json, math, os, sys
 2 | import numpy as np
 3 | import random
 4 | from tqdm import trange
 5 | 
 6 | 
 7 | NUM_USER = 30
 8 | 
 9 | def softmax(x):
10 |     ex = np.exp(x)
11 |     sum_ex = np.sum(np.exp(x))
12 |     return ex/sum_ex
13 | 
14 | def generate_synthetic(alpha, beta, iid):
15 |     dimension = 60
16 |     NUM_CLASS = 10
17 | 
18 |     samples_per_user = np.random.lognormal(4, 2, (NUM_USER)).astype(int) + 50
19 |     print(samples_per_user)
20 |     num_samples = np.sum(samples_per_user)
21 | 
22 |     X_split = [[] for _ in range(NUM_USER)]
23 |     y_split = [[] for _ in range(NUM_USER)]
24 | 
25 |     #### define some eprior ####
26 |     mean_x = np.zeros((NUM_USER, dimension))
27 | 
28 |     diagonal = np.zeros(dimension)
29 |     for j in range(dimension):
30 |         diagonal[j] = np.power((j+1), -1.2)
31 |     cov_x = np.diag(diagonal)
32 | 
33 |     for i in range(NUM_USER):
34 |         mean_x[i] = np.zeros(dimension)
35 | 
36 |     W = np.random.normal(0, 1, (dimension, NUM_CLASS))
37 |     b = np.random.normal(0, 1,  NUM_CLASS)
38 | 
39 |     for i in range(NUM_USER):
40 |         xx = np.random.multivariate_normal(mean_x[i], cov_x, samples_per_user[i])
41 |         yy = np.zeros(samples_per_user[i])
42 | 
43 |         for j in range(samples_per_user[i]):
44 |             tmp = np.dot(xx[j], W) + b
45 |             yy[j] = np.argmax(softmax(tmp))
46 | 
47 |         X_split[i] = xx.tolist()
48 |         y_split[i] = yy.tolist()
49 | 
50 |         print("{}-th users has {} exampls".format(i, len(y_split[i])))
51 | 
52 |     return X_split, y_split
53 | 
54 | 
55 | 
56 | def main():
57 |     train_path = "data/train/mytrain.json"
58 |     test_path = "data/test/mytest.json"
59 | 
60 |     X, y = generate_synthetic(alpha=0, beta=0, iid=1)
61 | 
62 |     # Create data structure
63 |     train_data = {'users': [], 'user_data':{}, 'num_samples':[]}
64 |     test_data = {'users': [], 'user_data':{}, 'num_samples':[]}
65 |     
66 |     for i in trange(NUM_USER, ncols=120):
67 | 
68 |         uname = 'f_{0:05d}'.format(i)        
69 |         combined = list(zip(X[i], y[i]))
70 |         random.shuffle(combined)
71 |         X[i][:], y[i][:] = zip(*combined)
72 |         num_samples = len(X[i])
73 |         train_len = int(0.9 * num_samples)
74 |         test_len = num_samples - train_len
75 |         
76 |         train_data['users'].append(uname) 
77 |         train_data['user_data'][uname] = {'x': X[i][:train_len], 'y': y[i][:train_len]}
78 |         train_data['num_samples'].append(train_len)
79 |         test_data['users'].append(uname)
80 |         test_data['user_data'][uname] = {'x': X[i][train_len:], 'y': y[i][train_len:]}
81 |         test_data['num_samples'].append(test_len)
82 | 
83 |     with open(train_path, 'w') as outfile:
84 |         json.dump(train_data, outfile)
85 |     with open(test_path, 'w') as outfile:
86 |         json.dump(test_data, outfile)
87 | 
88 | 
89 | if __name__ == "__main__":
90 |     main()
91 | 
92 | 


--------------------------------------------------------------------------------
/flearn/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/models/__init__.py


--------------------------------------------------------------------------------
/flearn/models/client.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class Client(object):
 4 |     
 5 |     def __init__(self, id, group=None, train_data={'x':[],'y':[]}, eval_data={'x':[],'y':[]}, model=None):
 6 |         self.model = model
 7 |         self.id = id # integer
 8 |         self.group = group
 9 |         self.train_data = {k: np.array(v) for k, v in train_data.items()}
10 |         self.eval_data = {k: np.array(v) for k, v in eval_data.items()}
11 |         self.num_samples = len(self.train_data['y'])
12 |         self.test_samples = len(self.eval_data['y'])
13 | 
14 |     def set_params(self, model_params):
15 |         '''set model parameters'''
16 |         self.model.set_params(model_params)
17 | 
18 |     def get_params(self):
19 |         '''get model parameters'''
20 |         return self.model.get_params()
21 | 
22 |     def get_grads(self, model_len):
23 |         '''get model gradient'''
24 |         return self.model.get_gradients(self.train_data, model_len)
25 | 
26 |     def solve_grad(self):
27 |         '''get model gradient with cost'''
28 |         bytes_w = self.model.size
29 |         grads = self.model.get_gradients(self.train_data)
30 |         comp = self.model.flops * self.num_samples
31 |         bytes_r = self.model.size
32 |         return ((self.num_samples, grads), (bytes_w, comp, bytes_r))
33 | 
34 |     def solve_inner(self, num_epochs=1, batch_size=10):
35 |         '''Solves local optimization problem
36 |         
37 |         Return:
38 |             1: num_samples: number of samples used in training
39 |             1: soln: local optimization solution
40 |             2: bytes read: number of bytes received
41 |             2: comp: number of FLOPs executed in training process
42 |             2: bytes_write: number of bytes transmitted
43 |         '''
44 | 
45 |         bytes_w = self.model.size
46 |         soln, comp = self.model.solve_inner(self.train_data, num_epochs, batch_size)
47 |         bytes_r = self.model.size
48 |         return (self.num_samples, soln), (bytes_w, comp, bytes_r)
49 | 
50 |     def solve_iters(self, num_iters=1, batch_size=10):
51 |         '''Solves local optimization problem
52 | 
53 |         Return:
54 |             1: num_samples: number of samples used in training
55 |             1: soln: local optimization solution
56 |             2: bytes read: number of bytes received
57 |             2: comp: number of FLOPs executed in training process
58 |             2: bytes_write: number of bytes transmitted
59 |         '''
60 | 
61 |         bytes_w = self.model.size
62 |         soln, comp = self.model.solve_iters(self.train_data, num_iters, batch_size)
63 |         bytes_r = self.model.size
64 |         return (self.num_samples, soln), (bytes_w, comp, bytes_r)
65 | 
66 |     def train_error_and_loss(self):
67 |         tot_correct, loss = self.model.test(self.train_data)
68 |         return tot_correct, loss, self.num_samples
69 | 
70 | 
71 |     def test(self):
72 |         '''tests current model on local eval_data
73 | 
74 |         Return:
75 |             tot_correct: total #correct predictions
76 |             test_samples: int
77 |         '''
78 |         tot_correct, loss = self.model.test(self.eval_data)
79 |         return tot_correct, self.test_samples
80 | 


--------------------------------------------------------------------------------
/flearn/models/goodreads/get_embs.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | 
 4 | parser = argparse.ArgumentParser()
 5 | 
 6 | parser.add_argument('-f',
 7 |                 help='path to .txt file containing word embedding information;',
 8 |                 type=str,
 9 |                 default='glove.6B.300d.txt')
10 | 
11 | args = parser.parse_args()
12 | 
13 | lines = []
14 | with open(args.f, 'r') as inf:
15 |     lines = inf.readlines()
16 | lines = [l.split() for l in lines]
17 | vocab = [l[0] for l in lines]
18 | emb_floats = [[float(n) for n in l[1:]] for l in lines]
19 | emb_floats.append([0.0 for _ in range(300)]) # for unknown word
20 | js = {'vocab': vocab, 'emba': emb_floats}
21 | with open('embs.json', 'w') as ouf:
22 |     json.dump(js, ouf)
23 | 


--------------------------------------------------------------------------------
/flearn/models/goodreads/get_embs.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd sent140
 4 | 
 5 | if [ ! -f 'glove.6B.300d.txt' ]; then
 6 |     wget http://nlp.stanford.edu/data/glove.6B.zip
 7 |     unzip glove.6B.zip
 8 |     rm glove.6B.50d.txt glove.6B.100d.txt glove.6B.200d.txt glove.6B.zip
 9 | fi
10 | 
11 | if [ ! -f embs.json ]; then
12 |     python3 get_embs.py
13 | fi


--------------------------------------------------------------------------------
/flearn/models/goodreads/mclr.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from tqdm import trange
  4 | 
  5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters
  6 | from flearn.utils.tf_utils import graph_size
  7 | from flearn.utils.tf_utils import process_grad
  8 | 
  9 | 
 10 | class Model(object):
 11 |     def __init__(self, num_classes, optimizer, seed=1):
 12 | 
 13 |         # params
 14 |         self.num_classes = num_classes
 15 | 
 16 |         # create computation graph
 17 |         self.graph = tf.Graph()
 18 |         with self.graph.as_default():
 19 |             tf.set_random_seed(123 + seed)
 20 |             self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model(
 21 |                 optimizer)
 22 |             self.saver = tf.train.Saver()
 23 |         self.sess = tf.Session(graph=self.graph)
 24 | 
 25 |         # find memory footprint and compute cost of the model
 26 |         self.size = graph_size(self.graph)
 27 |         with self.graph.as_default():
 28 |             self.sess.run(tf.global_variables_initializer())
 29 |             metadata = tf.RunMetadata()
 30 |             opts = tf.profiler.ProfileOptionBuilder.float_operation()
 31 |             self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops
 32 | 
 33 |     def create_model(self, optimizer):
 34 |         """Model function for Logistic Regression."""
 35 |         features = tf.placeholder(tf.float32, shape=[None, 2517], name='features')
 36 |         labels = tf.placeholder(tf.int64, shape=[None, ], name='labels')
 37 |         logits = tf.layers.dense(inputs=features, units=self.num_classes,
 38 |                                  kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
 39 |         predictions = {
 40 |             "classes": tf.argmax(input=logits, axis=1),
 41 |             "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
 42 |         }
 43 |         loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 44 | 
 45 |         grads_and_vars = optimizer.compute_gradients(loss)
 46 |         grads, _ = zip(*grads_and_vars)
 47 |         train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step())
 48 |         eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"]))
 49 |         return features, labels, train_op, grads, eval_metric_ops, loss
 50 | 
 51 |     def set_params(self, model_params=None):
 52 |         if model_params is not None:
 53 |             with self.graph.as_default():
 54 |                 all_vars = tf.trainable_variables()
 55 |                 for variable, value in zip(all_vars, model_params):
 56 |                     variable.load(value, self.sess)
 57 | 
 58 |     def get_params(self):
 59 |         with self.graph.as_default():
 60 |             model_params = self.sess.run(tf.trainable_variables())
 61 |         return model_params
 62 | 
 63 |     def get_gradients(self, data, model_len):
 64 | 
 65 |         grads = np.zeros(model_len)
 66 |         num_samples = len(data['y'])
 67 | 
 68 |         with self.graph.as_default():
 69 |             model_grads = self.sess.run(self.grads,
 70 |                                         feed_dict={self.features: data['x'], self.labels: data['y']})
 71 |             grads = process_grad(model_grads)
 72 | 
 73 |         return num_samples, grads
 74 | 
 75 |     def solve_inner(self, data, num_epochs=1, batch_size=32):
 76 |         '''Solves local optimization problem'''
 77 |         for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120):
 78 |             for X, y in batch_data(data, batch_size):
 79 |                 with self.graph.as_default():
 80 |                     self.sess.run(self.train_op,
 81 |                                   feed_dict={self.features: X, self.labels: y})
 82 |         soln = self.get_params()
 83 |         comp = num_epochs * (len(data['y']) // batch_size) * batch_size * self.flops
 84 |         return soln, comp
 85 | 
 86 |     def solve_iters(self, data, num_iters=1, batch_size=32):
 87 |         '''Solves local optimization problem'''
 88 | 
 89 |         for X, y in batch_data_multiple_iters(data, batch_size, num_iters):
 90 |             with self.graph.as_default():
 91 |                 self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y})
 92 |         soln = self.get_params()
 93 |         comp = 0
 94 |         return soln, comp
 95 | 
 96 |     def test(self, data):
 97 |         '''
 98 |         Args:
 99 |             data: dict of the form {'x': [list], 'y': [list]}
100 |         '''
101 |         with self.graph.as_default():
102 |             tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss],
103 |                                               feed_dict={self.features: data['x'], self.labels: data['y']})
104 |         return tot_correct, loss
105 | 
106 |     def close(self):
107 |         self.sess.close()
108 | 


--------------------------------------------------------------------------------
/flearn/models/mex/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/models/mex/__init__.py


--------------------------------------------------------------------------------
/flearn/models/mex/dnn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from tqdm import trange
  4 | 
  5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters
  6 | from flearn.utils.tf_utils import graph_size
  7 | from flearn.utils.tf_utils import process_grad
  8 | 
  9 | 
 10 | class Model(object):
 11 | 
 12 |     def __init__(self, num_classes, optimizer, seed=1):
 13 | 
 14 |         # params
 15 |         self.num_classes = num_classes
 16 | 
 17 |         # create computation graph
 18 |         self.graph = tf.Graph()
 19 |         with self.graph.as_default():
 20 |             tf.set_random_seed(123 + seed)
 21 |             self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model(
 22 |                 optimizer)
 23 |             self.saver = tf.train.Saver()
 24 |         self.sess = tf.Session(graph=self.graph)
 25 | 
 26 |         # find memory footprint and compute cost of the model
 27 |         self.size = graph_size(self.graph)
 28 |         with self.graph.as_default():
 29 |             self.sess.run(tf.global_variables_initializer())
 30 |             metadata = tf.RunMetadata()
 31 |             opts = tf.profiler.ProfileOptionBuilder.float_operation()
 32 |             self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops
 33 | 
 34 |     def create_model(self, optimizer):
 35 |         """Model function for Deep neural network."""
 36 |         features = tf.placeholder(tf.float32, shape=[None, 1280], name='features')
 37 |         labels = tf.placeholder(tf.int64, shape=[None, ], name='labels')
 38 | 
 39 |         second_layer = tf.layers.dense(units=1280, activation='relu', inputs=features)
 40 |         third_layer = tf.layers.dense(units=640, activation='relu', inputs=second_layer)
 41 |         fourth_layer = tf.layers.dense(units=120, activation='relu', inputs=third_layer)
 42 | 
 43 |         # second_layer = tf.layers.dense(units=1280, activation='relu', inputs=features)
 44 |         # second_layer_bn = tf.layers.batch_normalization(second_layer)
 45 |         # third_layer = tf.layers.dense(units=640, activation='relu', inputs=second_layer_bn)
 46 |         # third_layer_bn = tf.layers.batch_normalization(third_layer)
 47 |         # fourth_layer = tf.layers.dense(units=120, activation='relu', inputs=third_layer_bn)
 48 |         # fourth_layer_bn = tf.layers.batch_normalization(fourth_layer)
 49 | 
 50 |         logits = tf.layers.dense(inputs=fourth_layer, units=self.num_classes, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
 51 | 
 52 |         predictions = {
 53 |             "classes": tf.argmax(input=logits, axis=1),
 54 |             "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
 55 |         }
 56 |         loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 57 | 
 58 |         grads_and_vars = optimizer.compute_gradients(loss)
 59 |         grads, _ = zip(*grads_and_vars)
 60 |         train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step())
 61 |         eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"]))
 62 |         return features, labels, train_op, grads, eval_metric_ops, loss
 63 | 
 64 |     def set_params(self, model_params=None):
 65 |         if model_params is not None:
 66 |             with self.graph.as_default():
 67 |                 all_vars = tf.trainable_variables()
 68 |                 for variable, value in zip(all_vars, model_params):
 69 |                     variable.load(value, self.sess)
 70 | 
 71 |     def get_params(self):
 72 |         with self.graph.as_default():
 73 |             model_params = self.sess.run(tf.trainable_variables())
 74 |         return model_params
 75 | 
 76 |     def get_gradients(self, data, model_len):
 77 | 
 78 |         grads = np.zeros(model_len)
 79 |         num_samples = len(data['y'])
 80 | 
 81 |         with self.graph.as_default():
 82 |             model_grads = self.sess.run(self.grads,
 83 |                                         feed_dict={self.features: data['x'], self.labels: data['y']})
 84 |             grads = process_grad(model_grads)
 85 | 
 86 |         return num_samples, grads
 87 | 
 88 |     def solve_inner(self, data, num_epochs=1, batch_size=32):
 89 |         '''Solves local optimization problem'''
 90 |         for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120):
 91 |             for X, y in batch_data(data, batch_size):
 92 |                 with self.graph.as_default():
 93 |                     self.sess.run(self.train_op,
 94 |                                   feed_dict={self.features: X, self.labels: y})
 95 |         soln = self.get_params()
 96 |         comp = num_epochs * (len(data['y']) // batch_size) * batch_size * self.flops
 97 |         return soln, comp
 98 | 
 99 |     def solve_iters(self, data, num_iters=1, batch_size=32):
100 |         '''Solves local optimization problem'''
101 | 
102 |         for X, y in batch_data_multiple_iters(data, batch_size, num_iters):
103 |             with self.graph.as_default():
104 |                 self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y})
105 |         soln = self.get_params()
106 |         comp = 0
107 |         return soln, comp
108 | 
109 |     def test(self, data):
110 |         '''
111 |         Args:
112 |             data: dict of the form {'x': [list], 'y': [list]}
113 |         '''
114 |         with self.graph.as_default():
115 |             tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss],
116 |                                               feed_dict={self.features: data['x'], self.labels: data['y']})
117 |         return tot_correct, loss
118 | 
119 |     def close(self):
120 |         self.sess.close()
121 | 


--------------------------------------------------------------------------------
/flearn/models/mex/mclr.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from tqdm import trange
  4 | 
  5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters
  6 | from flearn.utils.tf_utils import graph_size
  7 | from flearn.utils.tf_utils import process_grad
  8 | 
  9 | 
 10 | class Model(object):
 11 | 
 12 |     
 13 |     def __init__(self, num_classes, optimizer, seed=1):
 14 | 
 15 |         # params
 16 |         self.num_classes = num_classes
 17 | 
 18 |         # create computation graph        
 19 |         self.graph = tf.Graph()
 20 |         with self.graph.as_default():
 21 |             tf.set_random_seed(123+seed)
 22 |             self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model(optimizer)
 23 |             self.saver = tf.train.Saver()
 24 |         self.sess = tf.Session(graph=self.graph)
 25 | 
 26 |         # find memory footprint and compute cost of the model
 27 |         self.size = graph_size(self.graph)
 28 |         with self.graph.as_default():
 29 |             self.sess.run(tf.global_variables_initializer())
 30 |             metadata = tf.RunMetadata()
 31 |             opts = tf.profiler.ProfileOptionBuilder.float_operation()
 32 |             self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops
 33 |     
 34 |     def create_model(self, optimizer):
 35 |         """Model function for Logistic Regression."""
 36 |         features = tf.placeholder(tf.float32, shape=[None, 1280], name='features')
 37 |         labels = tf.placeholder(tf.int64, shape=[None,], name='labels')
 38 |         logits = tf.layers.dense(inputs=features, units=self.num_classes, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
 39 |         predictions = {
 40 |             "classes": tf.argmax(input=logits, axis=1),
 41 |             "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
 42 |             }
 43 |         loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 44 | 
 45 |         grads_and_vars = optimizer.compute_gradients(loss)
 46 |         grads, _ = zip(*grads_and_vars)
 47 |         train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step())
 48 |         eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"]))
 49 |         return features, labels, train_op, grads, eval_metric_ops, loss
 50 | 
 51 |     def set_params(self, model_params=None):
 52 |         if model_params is not None:
 53 |             with self.graph.as_default():
 54 |                 all_vars = tf.trainable_variables()
 55 |                 for variable, value in zip(all_vars, model_params):
 56 |                     variable.load(value, self.sess)
 57 | 
 58 |     def get_params(self):
 59 |         with self.graph.as_default():
 60 |             model_params = self.sess.run(tf.trainable_variables())
 61 |         return model_params
 62 | 
 63 |     def get_gradients(self, data, model_len):
 64 | 
 65 |         grads = np.zeros(model_len)
 66 |         num_samples = len(data['y'])
 67 | 
 68 |         with self.graph.as_default():
 69 |             model_grads = self.sess.run(self.grads,
 70 |                 feed_dict={self.features: data['x'], self.labels: data['y']})
 71 |             grads = process_grad(model_grads)
 72 | 
 73 |         return num_samples, grads
 74 |     
 75 |     def solve_inner(self, data, num_epochs=1, batch_size=32):
 76 |         '''Solves local optimization problem'''
 77 |         for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120):
 78 |             for X, y in batch_data(data, batch_size):
 79 |                 with self.graph.as_default():
 80 |                     self.sess.run(self.train_op,
 81 |                         feed_dict={self.features: X, self.labels: y})
 82 |         soln = self.get_params()
 83 |         comp = num_epochs * (len(data['y'])//batch_size) * batch_size * self.flops
 84 |         return soln, comp
 85 | 
 86 |     def solve_iters(self, data, num_iters=1, batch_size=32):
 87 |         '''Solves local optimization problem'''
 88 | 
 89 |         for X, y in batch_data_multiple_iters(data, batch_size, num_iters):
 90 |             with self.graph.as_default():
 91 |                 self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y})
 92 |         soln = self.get_params()
 93 |         comp = 0
 94 |         return soln, comp
 95 |     
 96 |     def test(self, data):
 97 |         '''
 98 |         Args:
 99 |             data: dict of the form {'x': [list], 'y': [list]}
100 |         '''
101 |         with self.graph.as_default():
102 |             tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss], 
103 |                 feed_dict={self.features: data['x'], self.labels: data['y']})
104 |         return tot_correct, loss
105 |     
106 |     def close(self):
107 |         self.sess.close()
108 | 


--------------------------------------------------------------------------------
/flearn/models/mnist/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/models/mnist/__init__.py


--------------------------------------------------------------------------------
/flearn/models/mnist/cnn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from tqdm import trange
  4 | 
  5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters
  6 | from flearn.utils.tf_utils import graph_size
  7 | from flearn.utils.tf_utils import process_grad
  8 | 
  9 | 
 10 | class Model(object):
 11 |     '''
 12 |     Assumes that images are 28px by 28px
 13 |     '''
 14 | 
 15 |     def __init__(self, num_classes, optimizer, seed=1):
 16 | 
 17 |         # params
 18 |         self.num_classes = num_classes
 19 | 
 20 |         # create computation graph
 21 |         self.graph = tf.Graph()
 22 |         with self.graph.as_default():
 23 |             tf.set_random_seed(123 + seed)
 24 |             self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model(
 25 |                 optimizer)
 26 |             self.saver = tf.train.Saver()
 27 |         self.sess = tf.Session(graph=self.graph)
 28 | 
 29 |         # find memory footprint and compute cost of the model
 30 |         self.size = graph_size(self.graph)
 31 |         with self.graph.as_default():
 32 |             self.sess.run(tf.global_variables_initializer())
 33 |             metadata = tf.RunMetadata()
 34 |             opts = tf.profiler.ProfileOptionBuilder.float_operation()
 35 |             self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops
 36 | 
 37 |     def create_model(self, optimizer):
 38 |         """Model function for CNN."""
 39 |         features = tf.placeholder(tf.float32, shape=[None, 28 * 28], name='features')
 40 |         labels = tf.placeholder(tf.int64, shape=[None], name='labels')
 41 |         input_layer = tf.reshape(features, [-1, 28, 28, 1])
 42 |         conv1 = tf.layers.conv2d(
 43 |             inputs=input_layer,
 44 |             filters=32,
 45 |             kernel_size=[5, 5],
 46 |             padding="same",
 47 |             activation=tf.nn.relu)
 48 |         pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
 49 |         conv2 = tf.layers.conv2d(
 50 |             inputs=pool1,
 51 |             filters=64,
 52 |             kernel_size=[5, 5],
 53 |             padding="same",
 54 |             activation=tf.nn.relu)
 55 |         pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
 56 |         pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
 57 |         dense = tf.layers.dense(inputs=pool2_flat, units=2048, activation=tf.nn.relu)
 58 |         logits = tf.layers.dense(inputs=dense, units=self.num_classes)
 59 |         predictions = {
 60 |             "classes": tf.argmax(input=logits, axis=1),
 61 |             "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
 62 |         }
 63 |         loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 64 |         grads_and_vars = optimizer.compute_gradients(loss)
 65 |         grads, _ = zip(*grads_and_vars)
 66 |         train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step())
 67 | 
 68 |         eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"]))
 69 |         return features, labels, train_op, grads, eval_metric_ops, loss
 70 | 
 71 |     def set_params(self, model_params=None):
 72 |         if model_params is not None:
 73 |             with self.graph.as_default():
 74 |                 all_vars = tf.trainable_variables()
 75 |                 for variable, value in zip(all_vars, model_params):
 76 |                     variable.load(value, self.sess)
 77 | 
 78 |     def get_params(self):
 79 |         with self.graph.as_default():
 80 |             model_params = self.sess.run(tf.trainable_variables())
 81 |         return model_params
 82 | 
 83 |     def get_gradients(self, data, model_len):
 84 | 
 85 |         grads = np.zeros(model_len)
 86 |         num_samples = len(data['y'])
 87 | 
 88 |         with self.graph.as_default():
 89 |             model_grads = self.sess.run(self.grads,
 90 |                                         feed_dict={self.features: data['x'], self.labels: data['y']})
 91 |             grads = process_grad(model_grads)
 92 | 
 93 |         return num_samples, grads
 94 | 
 95 |     def solve_inner(self, data, num_epochs=1, batch_size=32):
 96 |         '''Solves local optimization problem'''
 97 |         for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120):
 98 |             for X, y in batch_data(data, batch_size):
 99 |                 with self.graph.as_default():
100 |                     self.sess.run(self.train_op,
101 |                                   feed_dict={self.features: X, self.labels: y})
102 |         soln = self.get_params()
103 |         comp = num_epochs * (len(data['y']) // batch_size) * batch_size * self.flops
104 |         return soln, comp
105 | 
106 |     def solve_iters(self, data, num_iters=1, batch_size=32):
107 |         '''Solves local optimization problem'''
108 | 
109 |         for X, y in batch_data_multiple_iters(data, batch_size, num_iters):
110 |             with self.graph.as_default():
111 |                 self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y})
112 |         soln = self.get_params()
113 |         comp = 0
114 |         return soln, comp
115 | 
116 |     def test(self, data):
117 |         '''
118 |         Args:
119 |             data: dict of the form {'x': [list], 'y': [list]}
120 |         '''
121 |         with self.graph.as_default():
122 |             tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss],
123 |                                               feed_dict={self.features: data['x'], self.labels: data['y']})
124 |         return tot_correct, loss
125 | 
126 |     def close(self):
127 |         self.sess.close()
128 | 


--------------------------------------------------------------------------------
/flearn/models/mnist/mclr.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from tqdm import trange
  4 | 
  5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters
  6 | from flearn.utils.tf_utils import graph_size
  7 | from flearn.utils.tf_utils import process_grad
  8 | 
  9 | 
 10 | class Model(object):
 11 |     '''
 12 |     Assumes that images are 28px by 28px
 13 |     '''
 14 |     
 15 |     def __init__(self, num_classes, optimizer, seed=1):
 16 | 
 17 |         # params
 18 |         self.num_classes = num_classes
 19 | 
 20 |         # create computation graph        
 21 |         self.graph = tf.Graph()
 22 |         with self.graph.as_default():
 23 |             tf.set_random_seed(123+seed)
 24 |             self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model(optimizer)
 25 |             self.saver = tf.train.Saver()
 26 |         self.sess = tf.Session(graph=self.graph)
 27 | 
 28 |         # find memory footprint and compute cost of the model
 29 |         self.size = graph_size(self.graph)
 30 |         with self.graph.as_default():
 31 |             self.sess.run(tf.global_variables_initializer())
 32 |             metadata = tf.RunMetadata()
 33 |             opts = tf.profiler.ProfileOptionBuilder.float_operation()
 34 |             self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops
 35 |     
 36 |     def create_model(self, optimizer):
 37 |         """Model function for Logistic Regression."""
 38 |         features = tf.placeholder(tf.float32, shape=[None, 784], name='features')
 39 |         labels = tf.placeholder(tf.int64, shape=[None,], name='labels')
 40 |         logits = tf.layers.dense(inputs=features, units=self.num_classes, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
 41 |         predictions = {
 42 |             "classes": tf.argmax(input=logits, axis=1),
 43 |             "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
 44 |             }
 45 |         loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 46 | 
 47 |         grads_and_vars = optimizer.compute_gradients(loss)
 48 |         grads, _ = zip(*grads_and_vars)
 49 |         train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step())
 50 |         eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"]))
 51 |         return features, labels, train_op, grads, eval_metric_ops, loss
 52 | 
 53 |     def set_params(self, model_params=None):
 54 |         if model_params is not None:
 55 |             with self.graph.as_default():
 56 |                 all_vars = tf.trainable_variables()
 57 |                 for variable, value in zip(all_vars, model_params):
 58 |                     variable.load(value, self.sess)
 59 | 
 60 |     def get_params(self):
 61 |         with self.graph.as_default():
 62 |             model_params = self.sess.run(tf.trainable_variables())
 63 |         return model_params
 64 | 
 65 |     def get_gradients(self, data, model_len):
 66 | 
 67 |         grads = np.zeros(model_len)
 68 |         num_samples = len(data['y'])
 69 | 
 70 |         with self.graph.as_default():
 71 |             model_grads = self.sess.run(self.grads,
 72 |                 feed_dict={self.features: data['x'], self.labels: data['y']})
 73 |             grads = process_grad(model_grads)
 74 | 
 75 |         return num_samples, grads
 76 |     
 77 |     def solve_inner(self, data, num_epochs=1, batch_size=32):
 78 |         '''Solves local optimization problem'''
 79 |         for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120):
 80 |             for X, y in batch_data(data, batch_size):
 81 |                 with self.graph.as_default():
 82 |                     self.sess.run(self.train_op,
 83 |                         feed_dict={self.features: X, self.labels: y})
 84 |         soln = self.get_params()
 85 |         comp = num_epochs * (len(data['y'])//batch_size) * batch_size * self.flops
 86 |         return soln, comp
 87 | 
 88 |     def solve_iters(self, data, num_iters=1, batch_size=32):
 89 |         '''Solves local optimization problem'''
 90 | 
 91 |         for X, y in batch_data_multiple_iters(data, batch_size, num_iters):
 92 |             with self.graph.as_default():
 93 |                 self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y})
 94 |         soln = self.get_params()
 95 |         comp = 0
 96 |         return soln, comp
 97 |     
 98 |     def test(self, data):
 99 |         '''
100 |         Args:
101 |             data: dict of the form {'x': [list], 'y': [list]}
102 |         '''
103 |         with self.graph.as_default():
104 |             tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss], 
105 |                 feed_dict={self.features: data['x'], self.labels: data['y']})
106 |         return tot_correct, loss
107 |     
108 |     def close(self):
109 |         self.sess.close()
110 | 


--------------------------------------------------------------------------------
/flearn/models/nist/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/models/nist/__init__.py


--------------------------------------------------------------------------------
/flearn/models/nist/cnn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from tqdm import trange
  4 | 
  5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters
  6 | from flearn.utils.tf_utils import graph_size
  7 | from flearn.utils.tf_utils import process_grad
  8 | 
  9 | 
 10 | class Model(object):
 11 |     '''
 12 |     Assumes that images are 28px by 28px
 13 |     '''
 14 | 
 15 |     def __init__(self, num_classes, optimizer, seed=1):
 16 | 
 17 |         # params
 18 |         self.num_classes = num_classes
 19 | 
 20 |         # create computation graph
 21 |         self.graph = tf.Graph()
 22 |         with self.graph.as_default():
 23 |             tf.set_random_seed(123 + seed)
 24 |             self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model(
 25 |                 optimizer)
 26 |             self.saver = tf.train.Saver()
 27 |         self.sess = tf.Session(graph=self.graph)
 28 | 
 29 |         # find memory footprint and compute cost of the model
 30 |         self.size = graph_size(self.graph)
 31 |         with self.graph.as_default():
 32 |             self.sess.run(tf.global_variables_initializer())
 33 |             metadata = tf.RunMetadata()
 34 |             opts = tf.profiler.ProfileOptionBuilder.float_operation()
 35 |             self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops
 36 | 
 37 |     def create_model(self, optimizer):
 38 |         """Model function for CNN."""
 39 |         features = tf.placeholder(tf.float32, shape=[None, 28 * 28], name='features')
 40 |         labels = tf.placeholder(tf.int64, shape=[None], name='labels')
 41 |         input_layer = tf.reshape(features, [-1, 28, 28, 1])
 42 |         conv1 = tf.layers.conv2d(
 43 |             inputs=input_layer,
 44 |             filters=32,
 45 |             kernel_size=[5, 5],
 46 |             padding="same",
 47 |             activation=tf.nn.relu)
 48 |         pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
 49 |         conv2 = tf.layers.conv2d(
 50 |             inputs=pool1,
 51 |             filters=64,
 52 |             kernel_size=[5, 5],
 53 |             padding="same",
 54 |             activation=tf.nn.relu)
 55 |         pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
 56 |         pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
 57 |         dense = tf.layers.dense(inputs=pool2_flat, units=2048, activation=tf.nn.relu)
 58 |         logits = tf.layers.dense(inputs=dense, units=self.num_classes)
 59 |         predictions = {
 60 |             "classes": tf.argmax(input=logits, axis=1),
 61 |             "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
 62 |         }
 63 |         loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 64 |         grads_and_vars = optimizer.compute_gradients(loss)
 65 |         grads, _ = zip(*grads_and_vars)
 66 |         train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step())
 67 | 
 68 |         eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"]))
 69 |         return features, labels, train_op, grads, eval_metric_ops, loss
 70 | 
 71 |     def set_params(self, model_params=None):
 72 |         if model_params is not None:
 73 |             with self.graph.as_default():
 74 |                 all_vars = tf.trainable_variables()
 75 |                 for variable, value in zip(all_vars, model_params):
 76 |                     variable.load(value, self.sess)
 77 | 
 78 |     def get_params(self):
 79 |         with self.graph.as_default():
 80 |             model_params = self.sess.run(tf.trainable_variables())
 81 |         return model_params
 82 | 
 83 |     def get_gradients(self, data, model_len):
 84 | 
 85 |         grads = np.zeros(model_len)
 86 |         num_samples = len(data['y'])
 87 | 
 88 |         with self.graph.as_default():
 89 |             model_grads = self.sess.run(self.grads,
 90 |                                         feed_dict={self.features: data['x'], self.labels: data['y']})
 91 |             grads = process_grad(model_grads)
 92 | 
 93 |         return num_samples, grads
 94 | 
 95 |     def solve_inner(self, data, num_epochs=1, batch_size=32):
 96 |         '''Solves local optimization problem'''
 97 |         for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120):
 98 |             for X, y in batch_data(data, batch_size):
 99 |                 with self.graph.as_default():
100 |                     self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y})
101 |         soln = self.get_params()
102 |         comp = num_epochs * (len(data['y']) // batch_size) * batch_size * self.flops
103 |         return soln, comp
104 | 
105 |     def solve_iters(self, data, num_iters=1, batch_size=32):
106 |         '''Solves local optimization problem'''
107 | 
108 |         for X, y in batch_data_multiple_iters(data, batch_size, num_iters):
109 |             with self.graph.as_default():
110 |                 self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y})
111 |         soln = self.get_params()
112 |         comp = 0
113 |         return soln, comp
114 | 
115 |     def test(self, data):
116 |         '''
117 |         Args:
118 |             data: dict of the form {'x': [list], 'y': [list]}
119 |         '''
120 |         with self.graph.as_default():
121 |             tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss],
122 |                                               feed_dict={self.features: data['x'], self.labels: data['y']})
123 |         return tot_correct, loss
124 | 
125 |     def close(self):
126 |         self.sess.close()
127 | 


--------------------------------------------------------------------------------
/flearn/models/nist/mclr.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from tqdm import trange
  4 | 
  5 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters
  6 | from flearn.utils.tf_utils import graph_size
  7 | from flearn.utils.tf_utils import process_grad
  8 | 
  9 | 
 10 | class Model(object):
 11 |     '''
 12 |     Assumes that images are 28px by 28px
 13 |     '''
 14 |     
 15 |     def __init__(self, num_classes, optimizer, seed=1):
 16 | 
 17 |         # params
 18 |         self.num_classes = num_classes
 19 | 
 20 |         # create computation graph        
 21 |         self.graph = tf.Graph()
 22 |         with self.graph.as_default():
 23 |             tf.set_random_seed(123+seed)
 24 |             self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss = self.create_model(optimizer)
 25 |             self.saver = tf.train.Saver()
 26 |         self.sess = tf.Session(graph=self.graph)
 27 | 
 28 |         # find memory footprint and compute cost of the model
 29 |         self.size = graph_size(self.graph)
 30 |         with self.graph.as_default():
 31 |             self.sess.run(tf.global_variables_initializer())
 32 |             metadata = tf.RunMetadata()
 33 |             opts = tf.profiler.ProfileOptionBuilder.float_operation()
 34 |             self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops
 35 |     
 36 |     def create_model(self, optimizer):
 37 |         """Model function for Logistic Regression."""
 38 |         features = tf.placeholder(tf.float32, shape=[None, 784], name='features')
 39 |         labels = tf.placeholder(tf.int64, shape=[None,], name='labels')
 40 |         logits = tf.layers.dense(inputs=features, units=self.num_classes, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
 41 |         predictions = {
 42 |             "classes": tf.argmax(input=logits, axis=1),
 43 |                 "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
 44 |             }
 45 |         loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 46 | 
 47 |         grads_and_vars = optimizer.compute_gradients(loss)
 48 |         grads, _ = zip(*grads_and_vars)
 49 |         train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step())
 50 |         eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"]))
 51 |         return features, labels, train_op, grads, eval_metric_ops, loss
 52 | 
 53 |     def set_params(self, model_params=None):
 54 |         if model_params is not None:
 55 |             with self.graph.as_default():
 56 |                 all_vars = tf.trainable_variables()
 57 |                 for variable, value in zip(all_vars, model_params):
 58 |                     variable.load(value, self.sess)
 59 | 
 60 |     def get_params(self):
 61 |         with self.graph.as_default():
 62 |             model_params = self.sess.run(tf.trainable_variables())
 63 |         return model_params
 64 | 
 65 |     def get_gradients(self, data, model_len):
 66 | 
 67 |         grads = np.zeros(model_len)
 68 |         num_samples = len(data['y'])
 69 | 
 70 |         with self.graph.as_default():
 71 |             model_grads = self.sess.run(self.grads,
 72 |                 feed_dict={self.features: data['x'], self.labels: data['y']})
 73 |             grads = process_grad(model_grads)
 74 | 
 75 |         return num_samples, grads
 76 |     
 77 |     def solve_inner(self, data, num_epochs=1, batch_size=32):
 78 |         '''Solves local optimization problem'''
 79 |         for _ in trange(num_epochs, desc='Epoch: ', leave=False, ncols=120):
 80 |             for X, y in batch_data(data, batch_size):
 81 |                 with self.graph.as_default():
 82 |                     self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y})
 83 |         soln = self.get_params()
 84 |         comp = num_epochs * (len(data['y'])//batch_size) * batch_size * self.flops
 85 |         return soln, comp
 86 | 
 87 |     def solve_iters(self, data, num_iters=1, batch_size=32):
 88 |         '''Solves local optimization problem'''
 89 | 
 90 |         for X, y in batch_data_multiple_iters(data, batch_size, num_iters):
 91 |             with self.graph.as_default():
 92 |                 self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y})
 93 |         soln = self.get_params()
 94 |         comp = 0
 95 |         return soln, comp
 96 |     
 97 |     def test(self, data):
 98 |         '''
 99 |         Args:
100 |             data: dict of the form {'x': [list], 'y': [list]}
101 |         '''
102 |         with self.graph.as_default():
103 |             tot_correct, loss = self.sess.run([self.eval_metric_ops, self.loss], 
104 |                 feed_dict={self.features: data['x'], self.labels: data['y']})
105 |         return tot_correct, loss
106 |     
107 |     def close(self):
108 |         self.sess.close()
109 | 


--------------------------------------------------------------------------------
/flearn/models/synthetic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/models/synthetic/__init__.py


--------------------------------------------------------------------------------
/flearn/models/synthetic/mclr.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | from flearn.utils.model_utils import batch_data, batch_data_multiple_iters
  5 | from flearn.utils.tf_utils import graph_size
  6 | from flearn.utils.tf_utils import process_grad
  7 | 
  8 | 
  9 | class Model(object):
 10 |     '''
 11 |     Assumes that images are 28px by 28px
 12 |     '''
 13 |     
 14 |     def __init__(self, num_classes, optimizer, seed=1):
 15 | 
 16 |         # params
 17 |         self.num_classes = num_classes
 18 | 
 19 |         # create computation graph        
 20 |         self.graph = tf.Graph()
 21 |         with self.graph.as_default():
 22 |             tf.set_random_seed(123+seed)
 23 |             self.features, self.labels, self.train_op, self.grads, self.eval_metric_ops, self.loss, self.pred = self.create_model(optimizer)
 24 |             self.saver = tf.train.Saver()
 25 |         self.sess = tf.Session(graph=self.graph)
 26 | 
 27 |         # find memory footprint and compute cost of the model
 28 |         self.size = graph_size(self.graph)
 29 |         with self.graph.as_default():
 30 |             self.sess.run(tf.global_variables_initializer())
 31 |             metadata = tf.RunMetadata()
 32 |             opts = tf.profiler.ProfileOptionBuilder.float_operation()
 33 |             self.flops = tf.profiler.profile(self.graph, run_meta=metadata, cmd='scope', options=opts).total_float_ops
 34 |     
 35 |     def create_model(self, optimizer):
 36 |         """Model function for Logistic Regression."""
 37 |         features = tf.placeholder(tf.float32, shape=[None, 60], name='features')
 38 |         labels = tf.placeholder(tf.int64, shape=[None,], name='labels')
 39 |         logits = tf.layers.dense(inputs=features, units=self.num_classes, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
 40 |         predictions = {
 41 |             "classes": tf.argmax(input=logits, axis=1),
 42 |             "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
 43 |             }
 44 |         loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 45 | 
 46 |         grads_and_vars = optimizer.compute_gradients(loss)
 47 |         grads, _ = zip(*grads_and_vars)
 48 |         train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step())
 49 |         eval_metric_ops = tf.count_nonzero(tf.equal(labels, predictions["classes"]))
 50 |         return features, labels, train_op, grads, eval_metric_ops, loss, predictions["classes"]
 51 | 
 52 |     def set_params(self, model_params=None):
 53 |         if model_params is not None:
 54 |             with self.graph.as_default():
 55 |                 all_vars = tf.trainable_variables()
 56 |                 for variable, value in zip(all_vars, model_params):
 57 |                     variable.load(value, self.sess)
 58 | 
 59 |     def get_params(self):
 60 |         with self.graph.as_default():
 61 |             model_params = self.sess.run(tf.trainable_variables())
 62 |         return model_params
 63 | 
 64 |     def get_gradients(self, data, model_len):
 65 | 
 66 |         grads = np.zeros(model_len)
 67 |         num_samples = len(data['y'])
 68 | 
 69 |         with self.graph.as_default():
 70 |             model_grads = self.sess.run(self.grads,
 71 |                 feed_dict={self.features: data['x'], self.labels: data['y']})
 72 |             grads = process_grad(model_grads)
 73 | 
 74 |         return num_samples, grads
 75 |     
 76 |     def solve_inner(self, data, num_epochs=1, batch_size=32):
 77 |         '''Solves local optimization problem'''
 78 | 
 79 |         for _ in range(num_epochs):
 80 |             for X, y in batch_data(data, batch_size):
 81 |                 with self.graph.as_default():
 82 |                     self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y})
 83 |         soln = self.get_params()
 84 |         comp = num_epochs * (len(data['y'])//batch_size) * batch_size * self.flops
 85 |         return soln, comp
 86 | 
 87 |     def solve_iters(self, data, num_iters=1, batch_size=32):
 88 |         '''Solves local optimization problem'''
 89 | 
 90 |         for X, y in batch_data_multiple_iters(data, batch_size, num_iters):
 91 |             with self.graph.as_default():
 92 |                 self.sess.run(self.train_op, feed_dict={self.features: X, self.labels: y})
 93 |         soln = self.get_params()
 94 |         comp = 0
 95 |         return soln, comp
 96 |     
 97 |     def test(self, data):
 98 |         '''
 99 |         Args:
100 |             data: dict of the form {'x': [list], 'y': [list]}
101 |         '''
102 |         with self.graph.as_default():
103 |             tot_correct, loss, pred = self.sess.run([self.eval_metric_ops, self.loss, self.pred], 
104 |                 feed_dict={self.features: data['x'], self.labels: data['y']})
105 |         return tot_correct, loss
106 |     
107 |     def close(self):
108 |         self.sess.close()
109 | 


--------------------------------------------------------------------------------
/flearn/optimizer/pgd.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.python.ops import control_flow_ops
 2 | from tensorflow.python.ops import math_ops
 3 | from tensorflow.python.ops import state_ops
 4 | from tensorflow.python.framework import ops
 5 | from tensorflow.python.training import optimizer
 6 | import tensorflow as tf
 7 | 
 8 | 
 9 | class PerturbedGradientDescent(optimizer.Optimizer):
10 |     """Implementation of Perturbed Gradient Descent, i.e., FedProx optimizer"""
11 |     def __init__(self, learning_rate=0.001, mu=0.01, use_locking=False, name="PGD"):
12 |         super(PerturbedGradientDescent, self).__init__(use_locking, name)
13 |         self._lr = learning_rate
14 |         self._mu = mu
15 |        
16 |         # Tensor versions of the constructor arguments, created in _prepare().
17 |         self._lr_t = None
18 |         self._mu_t = None
19 | 
20 |     def _prepare(self):
21 |         self._lr_t = ops.convert_to_tensor(self._lr, name="learning_rate")
22 |         self._mu_t = ops.convert_to_tensor(self._mu, name="prox_mu")
23 | 
24 |     def _create_slots(self, var_list):
25 |         # Create slots for the global solution.
26 |         for v in var_list:
27 |             self._zeros_slot(v, "vstar", self._name)
28 | 
29 |     def _apply_dense(self, grad, var):
30 |         lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
31 |         mu_t = math_ops.cast(self._mu_t, var.dtype.base_dtype)
32 |         vstar = self.get_slot(var, "vstar")
33 | 
34 |         var_update = state_ops.assign_sub(var, lr_t*(grad + mu_t*(var-vstar)))
35 | 
36 |         return control_flow_ops.group(*[var_update,])
37 | 
38 |     
39 |     def _apply_sparse_shared(self, grad, var, indices, scatter_add):
40 | 
41 |         lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
42 |         mu_t = math_ops.cast(self._mu_t, var.dtype.base_dtype)
43 |         vstar = self.get_slot(var, "vstar")
44 | 
45 |         v_diff = state_ops.assign(vstar, mu_t * (var - vstar), use_locking=self._use_locking)
46 | 
47 |         with ops.control_dependencies([v_diff]):  # run v_diff operation before scatter_add
48 |             scaled_grad = scatter_add(vstar, indices, grad)
49 |         var_update = state_ops.assign_sub(var, lr_t * scaled_grad)
50 | 
51 |         return control_flow_ops.group(*[var_update,])
52 | 
53 |     def _apply_sparse(self, grad, var):
54 |         return self._apply_sparse_shared(
55 |         grad.values, var, grad.indices,
56 |         lambda x, i, v: state_ops.scatter_add(x, i, v))
57 |     
58 | 
59 |     def set_params(self, cog, client):
60 |         with client.graph.as_default():
61 |             all_vars = tf.trainable_variables()
62 |             for variable, value in zip(all_vars, cog):
63 |                 # print(value)
64 |                 # print(variable)
65 |                 vstar = self.get_slot(variable, "vstar")
66 |                 # print(vstar)
67 |                 vstar.load(value, client.sess)
68 | 


--------------------------------------------------------------------------------
/flearn/optimizer/pggd.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.python.ops import control_flow_ops
 2 | from tensorflow.python.ops import math_ops
 3 | from tensorflow.python.ops import state_ops
 4 | from tensorflow.python.framework import ops
 5 | from tensorflow.python.training import optimizer
 6 | import tensorflow as tf
 7 | 
 8 | 
 9 | class PerGodGradientDescent(optimizer.Optimizer):
10 |     """Implementation of Perturbed gold Gradient Descent"""
11 |     def __init__(self, learning_rate=0.001, mu=0.01, use_locking=False, name="PGD"):
12 |         super(PerGodGradientDescent, self).__init__(use_locking, name)
13 |         self._lr = learning_rate
14 |         self._mu = mu
15 |         
16 |         # Tensor versions of the constructor arguments, created in _prepare().
17 |         self._lr_t = None
18 |         self._mu_t = None
19 | 
20 |     def _prepare(self):
21 |         self._lr_t = ops.convert_to_tensor(self._lr, name="learning_rate")
22 |         self._mu_t = ops.convert_to_tensor(self._mu, name="prox_mu")
23 | 
24 |     def _create_slots(self, var_list):
25 |         # Create slots for the global solution.
26 |         for v in var_list:
27 |             self._zeros_slot(v, "vstar", self._name)
28 |             self._zeros_slot(v, "gold", self._name)
29 | 
30 |     def _apply_dense(self, grad, var):
31 |         lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
32 |         mu_t = math_ops.cast(self._mu_t, var.dtype.base_dtype)
33 |         
34 |         vstar = self.get_slot(var, "vstar")
35 |         gold = self.get_slot(var, "gold")
36 | 
37 |         var_update = state_ops.assign_sub(var, lr_t*(grad + gold + mu_t*(var-vstar))) #Update 'ref' by subtracting 'value
38 |         #Create an op that groups multiple operations.
39 |         #When this op finishes, all ops in input have finished
40 |         return control_flow_ops.group(*[var_update,])
41 | 
42 |     def _apply_sparse(self, grad, var):
43 |         raise NotImplementedError("Sparse gradient updates are not supported.")
44 | 
45 |     def set_params(self, cog, avg_gradient, client):
46 |         with client.model.graph.as_default():
47 |             all_vars = tf.trainable_variables()
48 |             for variable, value in zip(all_vars, cog):
49 |                 vstar = self.get_slot(variable, "vstar")
50 |                 vstar.load(value, client.model.sess)
51 |         
52 |         # get old gradient
53 |         gprev = client.get_grads()
54 | 
55 |         # Find g_t - F'(old)
56 |         gdiff = [g1-g2 for g1,g2 in zip(avg_gradient, gprev)]
57 | 
58 |         with client.model.graph.as_default():
59 |             all_vars = tf.trainable_variables()
60 |             for variable, grad in zip(all_vars, gdiff):
61 |                 gold = self.get_slot(variable, "gold")
62 |                 gold.load(grad, client.model.sess)
63 | 


--------------------------------------------------------------------------------
/flearn/trainers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/trainers/__init__.py


--------------------------------------------------------------------------------
/flearn/trainers/fedavg.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import time
 3 | 
 4 | import numpy as np
 5 | from tqdm import trange, tqdm
 6 | import tensorflow as tf
 7 | 
 8 | from utils import csv_log
 9 | from .fedbase import BaseFedarated
10 | from flearn.utils.tf_utils import process_grad
11 | 
12 | 
13 | class Server(BaseFedarated):
14 |     def __init__(self, params, learner, dataset):
15 |         print('Using Federated avg to Train')
16 |         self.inner_opt = tf.train.GradientDescentOptimizer(params['learning_rate'])
17 | 
18 |         # Setup Log
19 |         self.params_log = params
20 |         # self.run_name = str(params["ex_name"])+"_fedavg_"+ str(datetime.datetime.now().strftime("%m%d-%H%M%S"))
21 |         self.run_name = str(params["ex_name"])+"_fedavg"
22 |         self.log_main = []
23 |         csv_log.log_start('avg',params,1, self.run_name)
24 | 
25 |         super(Server, self).__init__(params, learner, dataset)
26 | 
27 |     def train(self):
28 |         '''Train using Federated Proximal'''
29 |         print('Training with {} workers ---'.format(self.clients_per_round))
30 |         elapsed = []
31 |         for i in range(self.num_rounds):
32 |             # test model
33 |             if i % self.eval_every == 0:
34 |                 stats = self.test()  # have set the latest model for all clients
35 |                 stats_train = self.train_error_and_loss()
36 | 
37 |                 train_loss = np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum(stats_train[2])
38 |                 train_acc = np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2])
39 |                 test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2])
40 | 
41 |                 self.log_main.append([i, train_loss, train_acc, test_acc])
42 | 
43 |                 tqdm.write('At round {} accuracy: {}'.format(i, test_acc ))  # testing accuracy
44 |                 tqdm.write('At round {} training accuracy: {}'.format(i,train_acc ))
45 |                 tqdm.write('At round {} training loss: {}'.format(i,train_loss ))
46 | 
47 |             start_time = time.time()
48 | 
49 |             indices, selected_clients = self.select_clients(i, num_clients=self.clients_per_round)  # uniform sampling
50 |             np.random.seed(i)
51 |             active_clients = np.random.choice(selected_clients, round(self.clients_per_round * (1-self.drop_percent)), replace=False)
52 | 
53 |             csolns = []  # buffer for receiving client solutions
54 | 
55 |             for idx, c in enumerate(active_clients.tolist()):  # simply drop the slow devices
56 |                 # communicate the latest model
57 |                 c.set_params(self.latest_model)
58 | 
59 |                 # solve minimization locally
60 |                 soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size)
61 | 
62 |                 # gather solutions from client
63 |                 csolns.append(soln)
64 | 
65 |                 # track communication cost
66 |                 self.metrics.update(rnd=i, cid=c.id, stats=stats)
67 | 
68 |             # update models
69 |             self.latest_model = self.aggregate(csolns)
70 |             elapsed_time = time.time() - start_time
71 |             elapsed.append(elapsed_time)
72 | 
73 |         # final test model
74 |         stats = self.test()
75 |         stats_train = self.train_error_and_loss()
76 |         self.metrics.accuracies.append(stats)
77 |         self.metrics.train_accuracies.append(stats_train)
78 | 
79 |         test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2])
80 |         train_acc = np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2])
81 | 
82 |         tqdm.write('At round {} accuracy: {}'.format(self.num_rounds, test_acc))
83 |         tqdm.write('At round {} training accuracy: {}'.format(self.num_rounds, train_acc))
84 | 
85 |         self.log_main.append([self.num_rounds, train_loss, train_acc, test_acc])
86 |         csv_log.write_all('avg', self.log_main, [], 1, self.run_name)
87 |         csv_log.graph_print('avg',self.params_log, 1, self.run_name)
88 | 
89 |         print("Time Taken Each Round: ")
90 |         print(elapsed)
91 |         print(np.mean(elapsed))
92 |         csv_log.write_time_taken(elapsed, self.run_name)
93 | 
94 | 


--------------------------------------------------------------------------------
/flearn/trainers/fedbase.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from tqdm import tqdm
  4 | 
  5 | from flearn.models.client import Client
  6 | from flearn.utils.model_utils import Metrics
  7 | from flearn.utils.tf_utils import process_grad
  8 | 
  9 | class BaseFedarated(object):
 10 |     def __init__(self, params, learner, dataset):
 11 |         # transfer parameters to self
 12 |         for key, val in params.items(): setattr(self, key, val);
 13 | 
 14 |         # create worker nodes
 15 |         tf.reset_default_graph()
 16 |         self.client_model = learner(*params['model_params'], self.inner_opt, self.seed)
 17 |         self.clients = self.setup_clients(dataset, self.client_model)
 18 |         print('{} Clients in Total'.format(len(self.clients)))
 19 |         self.latest_model = self.client_model.get_params()
 20 |         self.params_ = params
 21 |         # initialize system metrics
 22 |         self.metrics = Metrics(self.clients, params)
 23 | 
 24 |     def __del__(self):
 25 |         self.client_model.close()
 26 | 
 27 |     def setup_clients(self, dataset, model=None):
 28 |         '''instantiates clients based on given train and test data directories
 29 | 
 30 |         Return:
 31 |             list of Clients
 32 |         '''
 33 |         users, groups, train_data, test_data = dataset
 34 |         if len(groups) == 0:
 35 |             groups = [None for _ in users]
 36 |         all_clients = [Client(u, g, train_data[u], test_data[u], model) for u, g in zip(users, groups)]
 37 |         return all_clients
 38 | 
 39 |     def train_error_and_loss(self):
 40 |         num_samples = []
 41 |         tot_correct = []
 42 |         losses = []
 43 | 
 44 |         for c in self.clients:
 45 |             ct, cl, ns = c.train_error_and_loss() 
 46 |             tot_correct.append(ct*1.0)
 47 |             num_samples.append(ns)
 48 |             losses.append(cl*1.0)
 49 |         
 50 |         ids = [c.id for c in self.clients]
 51 |         groups = [c.group for c in self.clients]
 52 | 
 53 |         return ids, groups, num_samples, tot_correct, losses
 54 | 
 55 | 
 56 |     def show_grads(self):  
 57 |         '''
 58 |         Return:
 59 |             gradients on all workers and the global gradient
 60 |         '''
 61 | 
 62 |         model_len = process_grad(self.latest_model).size
 63 |         global_grads = np.zeros(model_len)  
 64 | 
 65 |         intermediate_grads = []
 66 |         samples=[]
 67 | 
 68 |         self.client_model.set_params(self.latest_model)
 69 |         for c in self.clients:
 70 |             num_samples, client_grads = c.get_grads(self.latest_model) 
 71 |             samples.append(num_samples)
 72 |             global_grads = np.add(global_grads, client_grads * num_samples)
 73 |             intermediate_grads.append(client_grads)
 74 | 
 75 |         global_grads = global_grads * 1.0 / np.sum(np.asarray(samples)) 
 76 |         intermediate_grads.append(global_grads)
 77 | 
 78 |         return intermediate_grads
 79 |  
 80 |   
 81 |     def test(self):
 82 |         '''tests self.latest_model on given clients
 83 |         '''
 84 |         num_samples = []
 85 |         tot_correct = []
 86 |         self.client_model.set_params(self.latest_model)
 87 |         for c in self.clients:
 88 |             ct, ns = c.test()
 89 |             tot_correct.append(ct*1.0)
 90 |             num_samples.append(ns)
 91 |         ids = [c.id for c in self.clients]
 92 |         groups = [c.group for c in self.clients]
 93 |         return ids, groups, num_samples, tot_correct
 94 | 
 95 |     def save(self):
 96 |         pass
 97 | 
 98 |     def select_clients(self, round, num_clients=20):
 99 |         '''selects num_clients clients weighted by number of samples from possible_clients
100 |         
101 |         Args:
102 |             num_clients: number of clients to select; default 20
103 |                 note that within function, num_clients is set to
104 |                 min(num_clients, len(possible_clients))
105 |         
106 |         Return:
107 |             list of selected clients objects
108 |         '''
109 | 
110 |         num_clients = min(num_clients, len(self.clients))
111 |         np.random.seed(round+self.params_['seed'])  # make sure for each comparison, we are selecting the same clients each round
112 |         indices = np.random.choice(range(len(self.clients)), num_clients, replace=False)
113 |         return indices, np.asarray(self.clients)[indices]
114 | 
115 |     def aggregate(self, wsolns):
116 |         total_weight = 0.0
117 |         base = [0]*len(wsolns[0][1])
118 |         for (w, soln) in wsolns:  # w is the number of local samples
119 |             total_weight += w
120 |             for i, v in enumerate(soln):
121 |                 base[i] += w*v.astype(np.float64)
122 | 
123 |         averaged_soln = [v / total_weight for v in base]
124 | 
125 |         return averaged_soln
126 | 
127 | 


--------------------------------------------------------------------------------
/flearn/trainers/feddane.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from tqdm import trange, tqdm
 3 | import tensorflow as tf
 4 | 
 5 | from .fedbase import BaseFedarated
 6 | from flearn.optimizer.pggd import PerGodGradientDescent
 7 | from flearn.utils.tf_utils import process_grad, process_sparse_grad
 8 | 
 9 | 
10 | class Server(BaseFedarated):
11 |     def __init__(self, params, learner, dataset):
12 |         print('Using Federated Dane to Train')
13 |         self.inner_opt = PerGodGradientDescent(params['learning_rate'], params['mu'])
14 |         super(Server, self).__init__(params, learner, dataset)
15 | 
16 |     def train(self):
17 |         '''Train using Federated Proximal'''
18 |         print('Training with {} workers ---'.format(self.clients_per_round))
19 |         for i in trange(self.num_rounds, desc='Round: ', ncols=120):
20 |             # test model
21 |             if i % self.eval_every == 0:
22 |                 stats = self.test() # have set the latest model for all clients
23 |                 stats_train = self.train_error_and_loss()
24 | 
25 |                 tqdm.write('At round {} accuracy: {}'.format(i, np.sum(stats[3])*1.0/np.sum(stats[2])))  # testing accuracy
26 |                 tqdm.write('At round {} training accuracy: {}'.format(i, np.sum(stats_train[3])*1.0/np.sum(stats_train[2])))
27 |                 tqdm.write('At round {} training loss: {}'.format(i, np.dot(stats_train[4], stats_train[2])*1.0/np.sum(stats_train[2])))
28 | 
29 |             # choose K clients prop to data size
30 |             selected_clients = self.select_clients(i, num_clients=self.clients_per_round)
31 | 
32 |             cgrads = [] # buffer for receiving client solutions
33 |             for c in tqdm(selected_clients, desc='Grads: ', leave=False, ncols=120):
34 |                 # communicate the latest model
35 |                 c.set_params(self.latest_model)
36 | 
37 |                 # get the gradients
38 |                 grad, stats = c.solve_grad()
39 | 
40 |                 # gather gradient from client
41 |                 cgrads.append(grad)
42 |             
43 |             # Total gradient
44 |             avg_gradient = self.aggregate(cgrads)
45 | 
46 |             # Choose K clients prop to data size
47 |             selected_clients = self.select_clients(i, num_clients=self.clients_per_round)
48 | 
49 |             csolns = [] # buffer for receiving client solutions
50 |             for c in tqdm(selected_clients, desc='Solve: ', leave=False, ncols=120):
51 |                 # communicate the latest model
52 |                 c.set_params(self.latest_model)  # w_{t-1}
53 | 
54 |                 # setup local optimizer
55 |                 self.inner_opt.set_params(self.latest_model, avg_gradient, c)
56 | 
57 |                 # solve minimization locally
58 |                 soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size)
59 | 
60 |                 # gather solutions from client
61 |                 csolns.append(soln)
62 |         
63 |             # update model
64 |             self.latest_model = self.aggregate(csolns)
65 | 
66 |         # final test model
67 |         stats = self.test()
68 |         stats_train = self.train_error_and_loss()
69 |         tqdm.write('At round {} accuracy: {}'.format(self.num_rounds, np.sum(stats[3])*1.0/np.sum(stats[2])))
70 |         tqdm.write('At round {} training accuracy: {}'.format(self.num_rounds, np.sum(stats_train[3])*1.0/np.sum(stats_train[2])))
71 | 


--------------------------------------------------------------------------------
/flearn/trainers/fedprox.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import time
  3 | 
  4 | import numpy as np
  5 | from tqdm import trange, tqdm
  6 | import tensorflow as tf
  7 | 
  8 | from utils import csv_log
  9 | from .fedbase import BaseFedarated
 10 | from flearn.optimizer.pgd import PerturbedGradientDescent
 11 | from flearn.utils.tf_utils import process_grad, process_sparse_grad
 12 | 
 13 | 
 14 | class Server(BaseFedarated):
 15 |     def __init__(self, params, learner, dataset):
 16 |         print('Using Federated prox to Train')
 17 |         self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu'])
 18 | 
 19 |         # Setup Log
 20 |         self.params_log = params
 21 |         # self.run_name = str(params["ex_name"])+"_fedprox_"+ str(datetime.datetime.now().strftime("%m%d-%H%M%S"))
 22 |         self.run_name = str(params["ex_name"])+"_fedprox"
 23 |         self.log_main = []
 24 |         csv_log.log_start('prox',params,1, self.run_name)
 25 | 
 26 |         super(Server, self).__init__(params, learner, dataset)
 27 | 
 28 |     def train(self):
 29 |         '''Train using Federated Proximal'''
 30 |         print('Training with {} workers ---'.format(self.clients_per_round))
 31 | 
 32 |         elapsed = []
 33 | 
 34 |         for i in range(self.num_rounds):
 35 |             # test model
 36 |             if i % self.eval_every == 0:
 37 |                 stats = self.test() # have set the latest model for all clients
 38 |                 stats_train = self.train_error_and_loss()
 39 | 
 40 |                 test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2])
 41 |                 train_acc = np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2])
 42 |                 train_loss = np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum(stats_train[2])
 43 | 
 44 |                 tqdm.write('At round {} accuracy: {}'.format(i, test_acc ))  # testing accuracy
 45 |                 tqdm.write('At round {} training accuracy: {}'.format(i,train_acc ))
 46 |                 tqdm.write('At round {} training loss: {}'.format(i,train_loss ))
 47 | 
 48 |                 self.log_main.append([i, train_loss, train_acc, test_acc])
 49 | 
 50 |             start_time = time.time()
 51 | 
 52 |             model_len = process_grad(self.latest_model).size
 53 |             global_grads = np.zeros(model_len)
 54 |             client_grads = np.zeros(model_len)
 55 |             num_samples = []
 56 |             local_grads = []
 57 | 
 58 |             for c in self.clients:
 59 |                 num, client_grad = c.get_grads(model_len)
 60 |                 local_grads.append(client_grad)
 61 |                 num_samples.append(num)
 62 |                 global_grads = np.add(global_grads, client_grad * num)
 63 |             global_grads = global_grads * 1.0 / np.sum(np.asarray(num_samples))
 64 | 
 65 |             difference = 0
 66 |             for idx in range(len(self.clients)):
 67 |                 difference += np.sum(np.square(global_grads - local_grads[idx]))
 68 |             difference = difference * 1.0 / len(self.clients)
 69 |             tqdm.write('gradient difference: {}'.format(difference))
 70 | 
 71 |             indices, selected_clients = self.select_clients(i, num_clients=self.clients_per_round)  # uniform sampling
 72 |             np.random.seed(i)  # make sure that the stragglers are the same for FedProx and FedAvg
 73 |             active_clients = np.random.choice(selected_clients, round(self.clients_per_round * (1 - self.drop_percent)), replace=False)
 74 | 
 75 |             csolns = [] # buffer for receiving client solutions
 76 | 
 77 |             self.inner_opt.set_params(self.latest_model, self.client_model)
 78 | 
 79 |             for idx, c in enumerate(selected_clients.tolist()):
 80 |                 # communicate the latest model
 81 |                 c.set_params(self.latest_model)
 82 | 
 83 |                 total_iters = int(self.num_epochs * c.num_samples / self.batch_size)+2 # randint(low,high)=[low,high)
 84 | 
 85 |                 # solve minimization locally
 86 |                 if c in active_clients:
 87 |                     soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size)
 88 |                 else:
 89 |                     #soln, stats = c.solve_iters(num_iters=np.random.randint(low=1, high=total_iters), batch_size=self.batch_size)
 90 |                     soln, stats = c.solve_inner(num_epochs=np.random.randint(low=1, high=self.num_epochs), batch_size=self.batch_size)
 91 | 
 92 |                 # gather solutions from client
 93 |                 csolns.append(soln)
 94 |         
 95 |                 # track communication cost
 96 |                 self.metrics.update(rnd=i, cid=c.id, stats=stats)
 97 | 
 98 |             # update models
 99 |             self.latest_model = self.aggregate(csolns)
100 |             self.client_model.set_params(self.latest_model)
101 |             elapsed_time = time.time() - start_time
102 |             elapsed.append(elapsed_time)
103 | 
104 |         # final test model
105 |         stats = self.test()
106 |         stats_train = self.train_error_and_loss()
107 |         self.metrics.accuracies.append(stats)
108 |         self.metrics.train_accuracies.append(stats_train)
109 | 
110 |         test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2])
111 |         train_acc = np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2])
112 | 
113 |         tqdm.write('At round {} accuracy: {}'.format(self.num_rounds, test_acc))
114 |         tqdm.write('At round {} training accuracy: {}'.format(self.num_rounds, train_acc))
115 | 
116 |         self.log_main.append([self.num_rounds, train_loss, train_acc, test_acc])
117 |         csv_log.write_all('prox', self.log_main, [], 1, self.run_name)
118 |         csv_log.graph_print('prox',self.params_log, 1, self.run_name)
119 | 
120 |         # tqdm.write('At round {} accuracy: {}'.format(self.num_rounds, np.sum(stats[3])*1.0/np.sum(stats[2])))
121 |         # tqdm.write('At round {} training accuracy: {}'.format(self.num_rounds, np.sum(stats_train[3])*1.0/np.sum(stats_train[2])))
122 |         print("Time Taken Each Round: ")
123 |         print(elapsed)
124 |         print(np.mean(elapsed))
125 |         csv_log.write_time_taken(elapsed, self.run_name)
126 | 


--------------------------------------------------------------------------------
/flearn/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/flearn/utils/__init__.py


--------------------------------------------------------------------------------
/flearn/utils/language_utils.py:
--------------------------------------------------------------------------------
  1 | """Utils for language models."""
  2 | 
  3 | import re
  4 | 
  5 | 
  6 | # ------------------------
  7 | # utils for shakespeare dataset
  8 | 
  9 | ALL_LETTERS = "\n !\"&'(),-.0123456789:;>?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]abcdefghijklmnopqrstuvwxyz}"
 10 | NUM_LETTERS = len(ALL_LETTERS)
 11 | 
 12 | 
 13 | def _one_hot(index, size):
 14 |     '''returns one-hot vector with given size and value 1 at given index
 15 |     '''
 16 |     vec = [0 for _ in range(size)]
 17 |     vec[int(index)] = 1
 18 |     return vec
 19 | 
 20 | 
 21 | def letter_to_vec(letter):
 22 |     '''returns one-hot representation of given letter
 23 |     '''
 24 |     index = ALL_LETTERS.find(letter)
 25 |     return _one_hot(index, NUM_LETTERS)
 26 | 
 27 | 
 28 | def word_to_indices(word):
 29 |     '''returns a list of character indices
 30 | 
 31 |     Args:
 32 |         word: string
 33 |     
 34 |     Return:
 35 |         indices: int list with length len(word)
 36 |     '''
 37 |     indices = []
 38 |     for c in word:
 39 |         indices.append(ALL_LETTERS.find(c))
 40 |     return indices
 41 | 
 42 | 
 43 | # ------------------------
 44 | # utils for sent140 dataset
 45 | 
 46 | 
 47 | def split_line(line):
 48 |     '''split given line/phrase into list of words
 49 | 
 50 |     Args:
 51 |         line: string representing phrase to be split
 52 |     
 53 |     Return:
 54 |         list of strings, with each string representing a word
 55 |     '''
 56 |     return re.findall(r"[\w']+|[.,!?;]", line)
 57 | 
 58 | 
 59 | def _word_to_index(word, indd):
 60 |     '''returns index of given word based on given lookup dictionary
 61 | 
 62 |     returns the length of the lookup dictionary if word not found
 63 | 
 64 |     Args:
 65 |         word: string
 66 |         indd: dictionary with string words as keys and int indices as values
 67 |     '''
 68 |     if word in indd:
 69 |         return indd[word]
 70 |     else:
 71 |         return len(indd)
 72 | 
 73 | 
 74 | def line_to_indices(line, word2id, max_words=25):
 75 |     '''converts given phrase into list of word indices
 76 |     
 77 |     if the phrase has more than max_words words, returns a list containing
 78 |     indices of the first max_words words
 79 |     if the phrase has less than max_words words, repeatedly appends integer 
 80 |     representing unknown index to returned list until the list's length is 
 81 |     max_words
 82 | 
 83 |     Args:
 84 |         line: string representing phrase/sequence of words
 85 |         word2id: dictionary with string words as keys and int indices as values
 86 |         max_words: maximum number of word indices in returned list
 87 | 
 88 |     Return:
 89 |         indl: list of word indices, one index for each word in phrase
 90 |     '''
 91 |     unk_id = len(word2id)
 92 |     line_list = split_line(line) # split phrase in words
 93 |     indl = [word2id[w] if w in word2id else unk_id for w in line_list[:max_words]]
 94 |     indl += [unk_id]*(max_words-len(indl))
 95 |     return indl
 96 | 
 97 | 
 98 | def bag_of_words(line, vocab):
 99 |     '''returns bag of words representation of given phrase using given vocab
100 | 
101 |     Args:
102 |         line: string representing phrase to be parsed
103 |         vocab: dictionary with words as keys and indices as values
104 | 
105 |     Return:
106 |         integer list
107 |     '''
108 |     bag = [0]*len(vocab)
109 |     words = split_line(line)
110 |     for w in words:
111 |         if w in vocab:
112 |             bag[vocab[w]] += 1
113 |     return bag
114 | 


--------------------------------------------------------------------------------
/flearn/utils/model_utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import numpy as np
  3 | import os
  4 | 
  5 | def batch_data(data, batch_size):
  6 |     '''
  7 |     data is a dict := {'x': [numpy array], 'y': [numpy array]} (on one client)
  8 |     returns x, y, which are both numpy array of length: batch_size
  9 |     '''
 10 |     data_x = data['x']
 11 |     data_y = data['y']
 12 | 
 13 |     # randomly shuffle data
 14 |     np.random.seed(100)
 15 |     rng_state = np.random.get_state()
 16 |     np.random.shuffle(data_x)
 17 |     np.random.set_state(rng_state)
 18 |     np.random.shuffle(data_y)
 19 | 
 20 |     # loop through mini-batches
 21 |     for i in range(0, len(data_x), batch_size):
 22 |         batched_x = data_x[i:i+batch_size]
 23 |         batched_y = data_y[i:i+batch_size]
 24 |         yield (batched_x, batched_y)
 25 | 
 26 | def batch_data_multiple_iters(data, batch_size, num_iters):
 27 |     data_x = data['x']
 28 |     data_y = data['y']
 29 | 
 30 |     np.random.seed(100)
 31 |     rng_state = np.random.get_state()
 32 |     np.random.shuffle(data_x)
 33 |     np.random.set_state(rng_state)
 34 |     np.random.shuffle(data_y)
 35 | 
 36 |     idx = 0
 37 | 
 38 |     for i in range(num_iters):
 39 |         if idx+batch_size >= len(data_x):
 40 |             idx = 0
 41 |             rng_state = np.random.get_state()
 42 |             np.random.shuffle(data_x)
 43 |             np.random.set_state(rng_state)
 44 |             np.random.shuffle(data_y)
 45 |         batched_x = data_x[idx: idx+batch_size]
 46 |         batched_y = data_y[idx: idx+batch_size]
 47 |         idx += batch_size
 48 |         yield (batched_x, batched_y)
 49 | 
 50 | def read_data(train_data_dir, test_data_dir):
 51 |     '''parses data in given train and test data directories
 52 | 
 53 |     assumes:
 54 |     - the data in the input directories are .json files with 
 55 |         keys 'users' and 'user_data'
 56 |     - the set of train set users is the same as the set of test set users
 57 |     
 58 |     Return:
 59 |         clients: list of client ids
 60 |         groups: list of group ids; empty list if none found
 61 |         train_data: dictionary of train data
 62 |         test_data: dictionary of test data
 63 |     '''
 64 |     clients = []
 65 |     groups = []
 66 |     train_data = {}
 67 |     test_data = {}
 68 | 
 69 |     train_files = os.listdir(train_data_dir)
 70 |     train_files = [f for f in train_files if f.endswith('.json')]
 71 |     for f in train_files:
 72 |         file_path = os.path.join(train_data_dir,f)
 73 |         with open(file_path, 'r') as inf:
 74 |             cdata = json.load(inf)
 75 |         clients.extend(cdata['users'])
 76 |         if 'hierarchies' in cdata:
 77 |             groups.extend(cdata['hierarchies'])
 78 |         train_data.update(cdata['user_data'])
 79 | 
 80 |     test_files = os.listdir(test_data_dir)
 81 |     test_files = [f for f in test_files if f.endswith('.json')]
 82 |     for f in test_files:
 83 |         file_path = os.path.join(test_data_dir,f)
 84 |         with open(file_path, 'r') as inf:
 85 |             cdata = json.load(inf)
 86 |         test_data.update(cdata['user_data'])
 87 | 
 88 |     clients = list(sorted(train_data.keys()))
 89 | 
 90 |     return clients, groups, train_data, test_data
 91 | 
 92 | 
 93 | class Metrics(object):
 94 |     def __init__(self, clients, params):
 95 |         self.params = params
 96 |         num_rounds = params['num_rounds']
 97 |         self.bytes_written = {c.id: [0] * num_rounds for c in clients}
 98 |         self.client_computations = {c.id: [0] * num_rounds for c in clients}
 99 |         self.bytes_read = {c.id: [0] * num_rounds for c in clients}      
100 |         self.accuracies = []
101 |         self.train_accuracies = []
102 | 
103 |     def update(self, rnd, cid, stats):
104 |         bytes_w, comp, bytes_r = stats
105 |         self.bytes_written[cid][rnd] += bytes_w
106 |         self.client_computations[cid][rnd] += comp
107 |         self.bytes_read[cid][rnd] += bytes_r
108 | 
109 |     def write(self):
110 |         metrics = {}
111 |         metrics['dataset'] = self.params['dataset']
112 |         metrics['num_rounds'] = self.params['num_rounds']
113 |         metrics['eval_every'] = self.params['eval_every']
114 |         metrics['learning_rate'] = self.params['learning_rate']
115 |         metrics['mu'] = self.params['mu']
116 |         metrics['num_epochs'] = self.params['num_epochs']
117 |         metrics['batch_size'] = self.params['batch_size']
118 |         metrics['accuracies'] = self.accuracies
119 |         metrics['train_accuracies'] = self.train_accuracies
120 |         metrics['client_computations'] = self.client_computations
121 |         metrics['bytes_written'] = self.bytes_written
122 |         metrics['bytes_read'] = self.bytes_read
123 |         metrics_dir = os.path.join('out', self.params['dataset'], 'metrics_{}_{}_{}_{}_{}.json'.format(self.params['seed'], self.params['optimizer'], self.params['learning_rate'], self.params['num_epochs'], self.params['mu']))
124 | 	#os.mkdir(os.path.join('out', self.params['dataset']))
125 |         if not os.path.exists(os.path.join('out', self.params['dataset'])):
126 |             os.mkdir(os.path.join('out', self.params['dataset']))
127 |         with open(metrics_dir, 'w') as ouf:
128 |             json.dump(metrics, ouf)
129 | 


--------------------------------------------------------------------------------
/flearn/utils/tf_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | def __num_elems(shape):
 6 |     '''Returns the number of elements in the given shape
 7 | 
 8 |     Args:
 9 |         shape: TensorShape
10 |     
11 |     Return:
12 |         tot_elems: int
13 |     '''
14 |     tot_elems = 1
15 |     for s in shape:
16 |         tot_elems *= int(s)
17 |     return tot_elems
18 | 
19 | def graph_size(graph):
20 |     '''Returns the size of the given graph in bytes
21 | 
22 |     The size of the graph is calculated by summing up the sizes of each
23 |     trainable variable. The sizes of variables are calculated by multiplying
24 |     the number of bytes in their dtype with their number of elements, captured
25 |     in their shape attribute
26 | 
27 |     Args:
28 |         graph: TF graph
29 |     Return:
30 |         integer representing size of graph (in bytes)
31 |     '''
32 |     tot_size = 0
33 |     with graph.as_default():
34 |         vs = tf.trainable_variables()
35 |         for v in vs:
36 |             tot_elems = __num_elems(v.shape)
37 |             dtype_size = int(v.dtype.size)
38 |             var_size = tot_elems * dtype_size
39 |             tot_size += var_size
40 |     return tot_size
41 | 
42 | def process_sparse_grad(grads):
43 |     '''
44 |     Args:
45 |         grads: grad returned by LSTM model (only for the shakespaere dataset)
46 |     Return:
47 |         a flattened grad in numpy (1-D array)
48 |     '''
49 | 
50 |     indices = grads[0].indices
51 |     values =  grads[0].values
52 |     first_layer_dense = np.zeros((80,8))
53 |     for i in range(indices.shape[0]):
54 |         first_layer_dense[indices[i], :] = values[i, :]
55 | 
56 |     client_grads = first_layer_dense
57 |     for i in range(1, len(grads)):
58 |         client_grads = np.append(client_grads, grads[i]) # output a flattened array
59 | 
60 | 
61 |     return client_grads
62 | 
63 | def process_grad(grads):
64 |     '''
65 |     Args:
66 |         grads: grad 
67 |     Return:
68 |         a flattened grad in numpy (1-D array)
69 |     '''
70 | 
71 |     client_grads = grads[0]
72 | 
73 |     for i in range(1, len(grads)):
74 |         client_grads = np.append(client_grads, grads[i]) # output a flattened array
75 | 
76 | 
77 |     return client_grads
78 | 
79 | def cosine_sim(a, b):
80 |     '''Returns the cosine similarity between two arrays a and b
81 |     '''  
82 |     dot_product = np.dot(a, b)
83 |     norm_a = np.linalg.norm(a)
84 |     norm_b = np.linalg.norm(b)
85 |     return dot_product * 1.0 / (norm_a * norm_b)  
86 | 
87 | 
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/flearn/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | def save_obj(obj, name):
 4 |     with open(name + '.pkl', 'wb') as f:
 5 |         pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
 6 | 
 7 | def load_obj(name):
 8 |     with open(name + '.pkl', 'rb') as f:
 9 |         return pickle.load(f)
10 | 
11 | def iid_divide(l, g):
12 |     '''
13 |     divide list l among g groups
14 |     each group has either int(len(l)/g) or int(len(l)/g)+1 elements
15 |     returns a list of groups
16 |     '''
17 |     num_elems = len(l)
18 |     group_size = int(len(l)/g)
19 |     num_big_groups = num_elems - g * group_size
20 |     num_small_groups = g - num_big_groups
21 |     glist = []
22 |     for i in range(num_small_groups):
23 |         glist.append(l[group_size*i:group_size*(i+1)])
24 |     bi = group_size*num_small_groups
25 |     group_size += 1
26 |     for i in range(num_big_groups):
27 |         glist.append(l[bi+group_size*i:bi+group_size*(i+1)])
28 |     return glist


--------------------------------------------------------------------------------
/full_results_real.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/full_results_real.pdf


--------------------------------------------------------------------------------
/full_results_real_other.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/full_results_real_other.pdf


--------------------------------------------------------------------------------
/full_results_synthetic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/full_results_synthetic.pdf


--------------------------------------------------------------------------------
/images/compare_results_real.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/images/compare_results_real.png


--------------------------------------------------------------------------------
/images/full_results_real.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/images/full_results_real.png


--------------------------------------------------------------------------------
/logs/sample/nist_0_fedsim.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/logs/sample/nist_0_fedsim.pdf


--------------------------------------------------------------------------------
/logs/sample/params.json:
--------------------------------------------------------------------------------
1 | {"eval_every": 1, "batch_size": 10, "num_rounds": 200, "seed": 0, "clients_per_round": 20, "model_params": [26], "mu": 0, "ex_name": "nist_0", "num_groups": 9, "model": "mclr", "dataset": "nist", "num_epochs": 20, "num_iters": 1, "learning_rate": 0.003, "drop_percent": 0.0, "optimizer": "fedsim"}


--------------------------------------------------------------------------------
/logs/sample/timetaken.csv:
--------------------------------------------------------------------------------
  1 | 4.214725017547607
  2 | 5.838357448577881
  3 | 2.27826189994812
  4 | 3.640329360961914
  5 | 3.9433250427246094
  6 | 4.371510982513428
  7 | 4.614236116409302
  8 | 3.6348283290863037
  9 | 4.996238946914673
 10 | 4.2246363162994385
 11 | 3.8461761474609375
 12 | 3.9423208236694336
 13 | 4.81032657623291
 14 | 4.6327292919158936
 15 | 4.864189863204956
 16 | 4.644549608230591
 17 | 3.357804775238037
 18 | 3.611405849456787
 19 | 4.387576103210449
 20 | 3.3639280796051025
 21 | 5.5443501472473145
 22 | 2.9019081592559814
 23 | 2.857591152191162
 24 | 3.4291489124298096
 25 | 3.949673652648926
 26 | 3.728832483291626
 27 | 3.3412296772003174
 28 | 3.3335158824920654
 29 | 4.09697413444519
 30 | 6.109966278076172
 31 | 4.153619766235352
 32 | 2.502737283706665
 33 | 3.415215015411377
 34 | 2.3686652183532715
 35 | 3.5048797130584717
 36 | 3.1543667316436768
 37 | 4.710537672042847
 38 | 3.802248477935791
 39 | 2.779290199279785
 40 | 3.647050380706787
 41 | 2.7246694564819336
 42 | 3.9133896827697754
 43 | 3.7519407272338867
 44 | 2.7095303535461426
 45 | 3.036364793777466
 46 | 3.8586699962615967
 47 | 5.336363315582275
 48 | 3.487077236175537
 49 | 3.1062116622924805
 50 | 4.089224338531494
 51 | 4.3005688190460205
 52 | 3.6237943172454834
 53 | 5.833637237548828
 54 | 4.375678300857544
 55 | 3.372389316558838
 56 | 5.044728994369507
 57 | 3.2794342041015625
 58 | 3.5190436840057373
 59 | 3.564743995666504
 60 | 3.7481768131256104
 61 | 3.776792049407959
 62 | 3.156827449798584
 63 | 4.273126602172852
 64 | 3.272702693939209
 65 | 3.364558219909668
 66 | 3.6134274005889893
 67 | 4.213018178939819
 68 | 3.009146213531494
 69 | 2.7177224159240723
 70 | 3.214456558227539
 71 | 2.91222882270813
 72 | 5.4962992668151855
 73 | 5.531466484069824
 74 | 4.0989015102386475
 75 | 4.856179475784302
 76 | 3.8664450645446777
 77 | 4.387919187545776
 78 | 2.3184447288513184
 79 | 3.5940182209014893
 80 | 5.098527908325195
 81 | 3.8258328437805176
 82 | 3.7295711040496826
 83 | 3.320307731628418
 84 | 5.78113055229187
 85 | 3.460848808288574
 86 | 3.7840187549591064
 87 | 3.936537504196167
 88 | 4.583587884902954
 89 | 4.4354448318481445
 90 | 3.77303147315979
 91 | 5.793760299682617
 92 | 3.4721591472625732
 93 | 4.305266618728638
 94 | 3.124553680419922
 95 | 4.849768400192261
 96 | 6.200813055038452
 97 | 4.458336591720581
 98 | 5.42701268196106
 99 | 4.517723083496094
100 | 3.4376020431518555
101 | 5.089099645614624
102 | 4.69443416595459
103 | 4.045530557632446
104 | 3.4828686714172363
105 | 3.4498486518859863
106 | 4.347341299057007
107 | 3.4853549003601074
108 | 3.4179494380950928
109 | 4.953992605209351
110 | 4.152960538864136
111 | 2.546747922897339
112 | 5.421801328659058
113 | 6.776229381561279
114 | 3.7474520206451416
115 | 4.845839262008667
116 | 2.8177096843719482
117 | 3.4865877628326416
118 | 3.222196102142334
119 | 3.5015692710876465
120 | 4.133359432220459
121 | 4.52689003944397
122 | 3.3103301525115967
123 | 4.601098299026489
124 | 3.8205862045288086
125 | 4.177612543106079
126 | 4.034154653549194
127 | 3.4860522747039795
128 | 3.5163798332214355
129 | 3.654008388519287
130 | 3.204397439956665
131 | 3.4267284870147705
132 | 4.247891664505005
133 | 3.4885201454162598
134 | 3.1615872383117676
135 | 4.064049243927002
136 | 3.8435089588165283
137 | 3.8735852241516113
138 | 4.101383209228516
139 | 3.107581615447998
140 | 3.2751734256744385
141 | 4.465871334075928
142 | 3.7008581161499023
143 | 3.2754745483398438
144 | 3.2202935218811035
145 | 4.118159770965576
146 | 4.30841326713562
147 | 3.80255389213562
148 | 3.1343297958374023
149 | 3.8692893981933594
150 | 4.835238933563232
151 | 3.2457480430603027
152 | 3.8979451656341553
153 | 4.613762855529785
154 | 2.955164909362793
155 | 3.2703709602355957
156 | 4.195127248764038
157 | 4.027691125869751
158 | 3.856401205062866
159 | 3.0798890590667725
160 | 2.8761911392211914
161 | 4.519295692443848
162 | 4.205658912658691
163 | 3.9707353115081787
164 | 4.714170694351196
165 | 4.722447633743286
166 | 3.4032065868377686
167 | 3.8246989250183105
168 | 4.494217157363892
169 | 3.7432897090911865
170 | 4.892650842666626
171 | 4.120609998703003
172 | 4.131703615188599
173 | 4.355011701583862
174 | 4.092770576477051
175 | 3.4949283599853516
176 | 3.3183343410491943
177 | 3.649127721786499
178 | 3.1163415908813477
179 | 3.616954803466797
180 | 5.093567132949829
181 | 2.825251579284668
182 | 3.7706644535064697
183 | 3.6947431564331055
184 | 4.169904470443726
185 | 3.7774462699890137
186 | 5.93481183052063
187 | 3.4171574115753174
188 | 3.385265588760376
189 | 2.6730124950408936
190 | 4.493683099746704
191 | 2.276113748550415
192 | 3.825378179550171
193 | 3.441279888153076
194 | 4.027428150177002
195 | 3.4155771732330322
196 | 4.0339367389678955
197 | 4.612143039703369
198 | 3.1436209678649902
199 | 3.6796154975891113
200 | 3.641611337661743
201 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import argparse
  3 | import importlib
  4 | import random
  5 | import os
  6 | import tensorflow as tf
  7 | from flearn.utils.model_utils import read_data
  8 | # print(tf.__version__)
  9 | # tf = tf.compat.v1
 10 | # GLOBAL PARAMETERS
 11 | OPTIMIZERS = ['fedavg', 'fedprox', 'feddane', 'fedddane', 'fedsgd', 'fedprox_origin', 'fedsim']
 12 | DATASETS = ['sent140', 'nist', 'shakespeare', 'mnist','mex' ,
 13 | 'synthetic_iid', 'synthetic_0_0', 'synthetic_0.5_0.5', 'synthetic_1_1',
 14 |             'synthetic_0.25_0.25',
 15 |             'synthetic_0.75_0.75',
 16 |             'synthetic_0.25_0.75',
 17 |             'synthetic_0.75_0.25',
 18 |             'news',
 19 |             'goodreads'
 20 | 
 21 |             ]  # NIST is EMNIST in the paepr
 22 | 
 23 | 
 24 | MODEL_PARAMS = {
 25 |     'sent140.bag_dnn': (2,), # num_classes
 26 |     'sent140.stacked_lstm': (25, 2, 100), # seq_len, num_classes, num_hidden 
 27 |     'sent140.stacked_lstm_no_embeddings': (25, 2, 100), # seq_len, num_classes, num_hidden
 28 |     'nist.mclr': (26,),  # num_classes
 29 |     'nist.cnn': (26,),  # num_classes
 30 |     'mex.mclr': (7,), # num_classes
 31 |     'mex.cnn': (7,), # num_classes
 32 |     'mex.dnn': (7,), # num_classes
 33 |     'mnist.mclr': (10,), # num_classes
 34 |     'mnist.cnn': (10,),  # num_classes
 35 |     'shakespeare.stacked_lstm': (80, 80, 256), # seq_len, emb_dim, num_hidden
 36 |     'synthetic.mclr': (10, ), # num_classes
 37 |     'news.mclr': (20,),  # num_classes
 38 |     'goodreads.mclr': (2,),  # num_classes
 39 |     'goodreads.stacked_lstm': (25,2,10),  # seq_len, num_classes, num_hidden
 40 |     'goodreads.rnn': (40, 2, 128),  # seq_len, num_classes, num_units
 41 |     'goodreads.dnn': (2,),  # num_classes
 42 | 
 43 | }
 44 | 
 45 | 
 46 | def read_options():
 47 |     ''' Parse command line arguments or load defaults '''
 48 |     parser = argparse.ArgumentParser()
 49 | 
 50 |     parser.add_argument('--optimizer',
 51 |                         help='name of optimizer;',
 52 |                         type=str,
 53 |                         choices=OPTIMIZERS,
 54 |                         default='fedavg')
 55 |     parser.add_argument('--dataset',
 56 |                         help='name of dataset;',
 57 |                         type=str,
 58 |                         choices=DATASETS,
 59 |                         default='nist')
 60 |     parser.add_argument('--model',
 61 |                         help='name of model;',
 62 |                         type=str,
 63 |                         default='stacked_lstm.py')
 64 |     parser.add_argument('--num_rounds',
 65 |                         help='number of rounds to simulate;',
 66 |                         type=int,
 67 |                         default=-1)
 68 |     parser.add_argument('--eval_every',
 69 |                         help='evaluate every ____ rounds;',
 70 |                         type=int,
 71 |                         default=-1)
 72 |     parser.add_argument('--clients_per_round',
 73 |                         help='number of clients trained per round;',
 74 |                         type=int,
 75 |                         default=-1)
 76 |     parser.add_argument('--batch_size',
 77 |                         help='batch size when clients train on data;',
 78 |                         type=int,
 79 |                         default=10)
 80 |     parser.add_argument('--num_epochs', 
 81 |                         help='number of epochs when clients train on data;',
 82 |                         type=int,
 83 |                         default=1)
 84 |     parser.add_argument('--num_iters',
 85 |                         help='number of iterations when clients train on data;',
 86 |                         type=int,
 87 |                         default=1)
 88 |     parser.add_argument('--learning_rate',
 89 |                         help='learning rate for inner solver;',
 90 |                         type=float,
 91 |                         default=0.003)
 92 |     parser.add_argument('--mu',
 93 |                         help='constant for prox;',
 94 |                         type=float,
 95 |                         default=0)
 96 |     parser.add_argument('--seed',
 97 |                         help='seed for randomness;',
 98 |                         type=int,
 99 |                         default=0)
100 |     parser.add_argument('--drop_percent',
101 |                         help='percentage of slow devices',
102 |                         type=float,
103 |                         default=0.1)
104 |     parser.add_argument('--num_groups',
105 |                         help='Number of groups;',
106 |                         type=int,
107 |                         default=1)
108 |     parser.add_argument('--ex_name',
109 |                         help='Run Name to identify;',
110 |                         type=str,
111 |                         default='dev')
112 | 
113 |     try: parsed = vars(parser.parse_args())
114 |     except IOError as msg: parser.error(str(msg))
115 | 
116 |     # Set seeds
117 |     random.seed(1 + parsed['seed'])
118 |     np.random.seed(12 + parsed['seed'])
119 |     tf.set_random_seed(123 + parsed['seed'])
120 | 
121 | 
122 |     # load selected model
123 |     if parsed['dataset'].startswith("synthetic"):  # all synthetic datasets use the same model
124 |         model_path = '%s.%s.%s.%s' % ('flearn', 'models', 'synthetic', parsed['model'])
125 |     else:
126 |         model_path = '%s.%s.%s.%s' % ('flearn', 'models', parsed['dataset'], parsed['model'])
127 | 
128 |     mod = importlib.import_module(model_path)
129 |     learner = getattr(mod, 'Model')
130 |     print(model_path)
131 |     print(learner)
132 | 
133 |     # load selected trainer
134 |     opt_path = 'flearn.trainers.%s' % parsed['optimizer']
135 |     mod = importlib.import_module(opt_path)
136 |     optimizer = getattr(mod, 'Server')
137 | 
138 |     print(opt_path)
139 |     print(mod)
140 |     print(optimizer)
141 | 
142 |     # add selected model parameter
143 |     parsed['model_params'] = MODEL_PARAMS['.'.join(model_path.split('.')[2:])]
144 | 
145 |     # print and return
146 |     maxLen = max([len(ii) for ii in parsed.keys()]);
147 |     fmtString = '\t%' + str(maxLen) + 's : %s';
148 |     print('Arguments:')
149 |     for keyPair in sorted(parsed.items()): print(fmtString % keyPair)
150 | 
151 |     return parsed, learner, optimizer
152 | 
153 | def main():
154 |     # suppress tf warnings
155 |     # tf.logging.set_verbosity(tf.logging.WARN)
156 |     tf.logging.set_verbosity(tf.logging.ERROR)
157 |     # parse command line arguments
158 |     options, learner, optimizer = read_options()
159 | 
160 |     # read data
161 |     train_path = os.path.join('data', options['dataset'], 'data', 'train')
162 |     test_path = os.path.join('data', options['dataset'], 'data', 'test')
163 |     dataset = read_data(train_path, test_path)
164 |     print(learner)
165 |     print(options)
166 |     # call appropriate trainer
167 |     t = optimizer(options, learner, dataset)
168 |     t.train()
169 |     
170 | if __name__ == '__main__':
171 |     main()
172 | 


--------------------------------------------------------------------------------
/plot_fedsim_other.py:
--------------------------------------------------------------------------------
  1 | # Used to generate - full_results_real_other.pdf
  2 | 
  3 | import json
  4 | import os
  5 | import csv
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib
  8 | 
  9 | color_avg ="#ff7f0e"
 10 | color_prox ="#13CA91"
 11 | color_sim ="#17becf"
 12 | 
 13 | color_avg ="#ff7f0e"
 14 | color_prox ="#fb99bc"
 15 | color_sim ="#17becf"
 16 | linewidth = 1.8
 17 | 
 18 | ROUNDS = 501
 19 | rounds = [i for i in range(ROUNDS)]
 20 | 
 21 | datasets = ["mnist","femnist", "mex","goodreads"]
 22 | 
 23 | all = {}
 24 | for ds in datasets:
 25 |     dataset = "results/other/"+ds+".csv"
 26 |     avg_rounds = []
 27 |     avg_test_acc = []
 28 |     prox_rounds = []
 29 |     prox_test_acc = []
 30 |     sim_rounds = []
 31 |     sim_test_acc = []
 32 |     with open(dataset,
 33 |               mode='r') as csv_file:
 34 |         csv_reader = csv.DictReader(csv_file)
 35 |         line_count = 0
 36 |         for row in csv_reader:
 37 |             if line_count == 0:
 38 |                 line_count += 1
 39 |             avg_rounds.append(float(row["round"]))
 40 |             avg_test_acc.append(float(row["avg"]))
 41 |             prox_rounds.append(float(row["round"]))
 42 |             prox_test_acc.append(float(row["prox"]))
 43 |             sim_rounds.append(float(row["round"]))
 44 |             sim_test_acc.append(float(row["sim"]))
 45 | 
 46 |             line_count += 1
 47 | 
 48 |     all[ds] = {"avg": [], "sim": [], "prox": []}
 49 |     all[ds]["avg"]  = avg_test_acc
 50 |     all[ds]["sim"]  = sim_test_acc
 51 |     all[ds]["prox"] = prox_test_acc
 52 | 
 53 | 
 54 | if(False):
 55 |     print(False)
 56 | else:
 57 |     fig, ax = plt.subplots(2, 2, figsize=[10, 8])
 58 |     # linewidth = 1.2
 59 | 
 60 |     ax[0,0].plot([i for i in range(31)], all["mnist"]["sim"], linewidth=linewidth, color=color_sim, label="FedSim")
 61 |     ax[0,0].plot([i for i in range(31)], all["mnist"]["avg"],":", alpha=1, linewidth=linewidth, color=color_avg, label="FedAvg")
 62 |     ax[0,0].plot([i for i in range(31)], all["mnist"]["prox"], "-.", alpha=1, linewidth=linewidth, color=color_prox, label="FedProx")
 63 |     ax[0,0].set_title("MNIST - CNN",fontweight='bold')
 64 |     ax[0,0].set_xlim(0, 31)
 65 |     ax[0,0].set_ylim(0.2, 0.9)
 66 | 
 67 |     ax[0,1].plot([i for i in range(501)], all["femnist"]["sim"], linewidth=linewidth, color=color_sim)
 68 |     ax[0,1].plot([i for i in range(501)], all["femnist"]["avg"],":", alpha=1, linewidth=linewidth, color=color_avg)
 69 |     ax[0,1].plot([i for i in range(501)], all["femnist"]["prox"], "-.", alpha=1, linewidth=linewidth, color=color_prox)
 70 |     ax[0,1].set_title("FEMNIST - CNN", fontweight='bold')
 71 |     ax[0,1].set_xlim(0, 501)
 72 |     ax[0,1].set_ylim(0.6, 0.95)
 73 | 
 74 |     #
 75 |     ax[1,0].plot([i for i in range(201)], all["mex"]["sim"], linewidth=linewidth, color=color_sim)
 76 |     ax[1,0].plot([i for i in range(201)], all["mex"]["avg"],":", alpha=1, linewidth=linewidth, color=color_avg)
 77 |     ax[1,0].plot([i for i in range(201)], all["mex"]["prox"], "-.", alpha=1, linewidth=linewidth, color=color_prox)
 78 |     ax[1,0].set_title("Fed-MEx - MLP", fontweight='bold')
 79 |     ax[1,0].set_xlim(0, 201)
 80 |     ax[1,0].set_ylim(0.68, 0.98)
 81 |     #
 82 |     #
 83 |     ax[1,1].plot([i for i in range(251)], all["goodreads"]["sim"], linewidth=linewidth, color=color_sim)
 84 |     ax[1,1].plot([i for i in range(251)], all["goodreads"]["avg"],":", alpha=1, linewidth=linewidth, color=color_avg)
 85 |     ax[1,1].plot([i for i in range(251)], all["goodreads"]["prox"], "-.", alpha=1, linewidth=linewidth, color=color_prox)
 86 |     ax[1,1].set_title("Fed-Goodreads - RNN", fontweight='bold')
 87 |     ax[1,1].set_xlim(0, 251)
 88 |     ax[1,1].set_ylim(0.45, 0.6)
 89 | 
 90 | 
 91 |     plt.subplots_adjust(hspace=0.5)
 92 |     ax[0,0].set_xlabel("# Rounds")
 93 |     ax[0,0].set_ylabel('Test Accuracy')
 94 |     ax[1,0].set_xlabel("# Rounds")
 95 |     ax[1,0].set_ylabel('Test Accuracy')
 96 | 
 97 |     for i in range(2):
 98 |         for j in range(2):
 99 |             ax[j, i].spines['bottom'].set_color('#dddddd')
100 |             ax[j, i].spines['top'].set_color('#dddddd')
101 |             ax[j, i].spines['right'].set_color('#dddddd')
102 |             ax[j, i].spines['left'].set_color('#dddddd')
103 |             ax[j, i].tick_params(color='#dddddd')
104 |             # ax[j, i].set_xlim(0, ROUNDS)
105 | 
106 |     fig.legend(frameon=False, loc='lower center', ncol=3, prop=dict(weight='normal', size=13),
107 |                borderaxespad=-0.3)  # note: different from plt.legend
108 | 
109 |     plt.tight_layout()
110 |     plt.subplots_adjust(bottom=0.09)
111 |     plt.subplots_adjust(left=0.061, wspace=0.11)
112 | 
113 |     plt.show()
114 |     fig.savefig("full_results_real_cnn.pdf")
115 | 
116 | exit(0)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | tensorflow-gpu==1.10
4 | Pillow
5 | matplotlib
6 | jupyter
7 | tqdm
8 | 


--------------------------------------------------------------------------------
/results/main/00.csv:
--------------------------------------------------------------------------------
  1 | round,avg,prox,sim
  2 | 0,0.10686600221483941,0.10686600221483941,0.10686600221483941
  3 | 1,0.19728682170542636,0.150609080841639,0.15498338870431894
  4 | 2,0.28405315614617943,0.20049833887043192,0.20775193798449615
  5 | 3,0.37547065337763014,0.2719822812846069,0.26904761904761904
  6 | 4,0.42480620155038756,0.3246954595791805,0.3268549280177187
  7 | 5,0.4662236987818384,0.37563676633444076,0.3945182724252492
  8 | 6,0.49689922480620163,0.4135658914728682,0.45127353266888154
  9 | 7,0.5301218161683278,0.456312292358804,0.5066998892580287
 10 | 8,0.5513289036544851,0.4846622369878184,0.5493355481727576
 11 | 9,0.570764119601329,0.5093023255813953,0.5907530454042083
 12 | 10,0.588704318936877,0.5301218161683279,0.6199889258028792
 13 | 11,0.6077519379844961,0.5534330011074197,0.6521594684385382
 14 | 12,0.6207087486157252,0.5707641196013289,0.6713178294573644
 15 | 13,0.6317829457364342,0.5864894795127353,0.6911406423034328
 16 | 14,0.6397009966777407,0.5996124031007751,0.7073643410852712
 17 | 15,0.6477851605758582,0.6109634551495017,0.725249169435216
 18 | 16,0.6532668881506091,0.620874861572536,0.7402547065337762
 19 | 17,0.6604097452934662,0.6337763012181619,0.7567552602436324
 20 | 18,0.6623477297895903,0.6428017718715393,0.7708748615725362
 21 | 19,0.6667774086378738,0.6513842746400886,0.7817829457364341
 22 | 20,0.6704872646733112,0.658139534883721,0.7903100775193798
 23 | 21,0.6742524916943521,0.6646179401993355,0.8044296788482836
 24 | 22,0.6779069767441862,0.6698228128460686,0.8104651162790698
 25 | 23,0.6820044296788482,0.6768549280177188,0.8137873754152825
 26 | 24,0.6867663344407531,0.6838870431893688,0.8161129568106312
 27 | 25,0.6909745293466224,0.6895348837209302,0.8234219269102991
 28 | 26,0.6937430786267996,0.6973975636766335,0.8285714285714285
 29 | 27,0.6987264673311187,0.7022702104097452,0.8290697674418606
 30 | 28,0.703266888150609,0.7080287929125139,0.8312292358803987
 31 | 29,0.7064784053156146,0.7134551495016611,0.832281284606866
 32 | 30,0.7094130675526025,0.7184939091915836,0.833499446290144
 33 | 31,0.7136212624584718,0.7230343300110741,0.8342746400885938
 34 | 32,0.7163898117386488,0.7266888150609081,0.8348837209302326
 35 | 33,0.7213178294573643,0.7311184939091916,0.8361572535991141
 36 | 34,0.7244739756367663,0.7364341085271319,0.8365448504983389
 37 | 35,0.7274640088593576,0.74296788482835,0.8370431893687708
 38 | 36,0.7304540420819491,0.7461240310077519,0.8398671096345515
 39 | 37,0.7317275747508305,0.7501107419712071,0.8409745293466224
 40 | 38,0.733610188261351,0.7538759689922481,0.8414174972314508
 41 | 39,0.7357142857142857,0.7580287929125138,0.8410299003322259
 42 | 40,0.736655592469546,0.7620155038759691,0.8426356589147287
 43 | 41,0.7392580287929125,0.7665559246954596,0.8454595791805093
 44 | 42,0.7423034330011075,0.7696566998892581,0.8468992248062016
 45 | 43,0.743798449612403,0.7721483942414176,0.8476190476190476
 46 | 44,0.7460132890365447,0.774418604651163,0.8479512735326691
 47 | 45,0.7475083056478405,0.7775747508305647,0.8476190476190476
 48 | 46,0.749390919158361,0.7799557032115171,0.8468992248062016
 49 | 47,0.7510520487264674,0.7829457364341087,0.8472868217054264
 50 | 48,0.7537652270210411,0.7868770764119601,0.8476190476190476
 51 | 49,0.7552602436323366,0.7900332225913621,0.850498338870432
 52 | 50,0.7580287929125138,0.7925249169435217,0.8503322259136213
 53 | 51,0.759468438538206,0.7947397563676635,0.851550387596899
 54 | 52,0.7617386489479513,0.7977297895902546,0.8501107419712072
 55 | 53,0.7645071982281285,0.8,0.8493909191583611
 56 | 54,0.7672203765227021,0.8032115171650055,0.8484496124031008
 57 | 55,0.7688815060908084,0.8050387596899224,0.8501661129568107
 58 | 56,0.7711517165005538,0.8075304540420819,0.8509966777408637
 59 | 57,0.7724806201550388,0.8102436323366555,0.8516057585825028
 60 | 58,0.7746954595791806,0.8132890365448503,0.8518826135105204
 61 | 59,0.7766888150609081,0.8150055370985602,0.8532668881506089
 62 | 60,0.7776301218161684,0.8167220376522701,0.8529346622369878
 63 | 61,0.7796788482834995,0.8182724252491694,0.8517165005537097
 64 | 62,0.7821151716500554,0.8204872646733112,0.8513289036544851
 65 | 63,0.7843853820598007,0.8217607973421929,0.8515503875968992
 66 | 64,0.7863787375415283,0.8231450719822814,0.8523809523809524
 67 | 65,0.7888704318936878,0.8241971207087487,0.8533776301218162
 68 | 66,0.788372093023256,0.825249169435216,0.8553709856035437
 69 | 67,0.7893687707641196,0.8265227021040974,0.8563676633444076
 70 | 68,0.7904761904761906,0.8272978959025472,0.8575858250276854
 71 | 69,0.7923034330011074,0.8287929125138428,0.8589147286821704
 72 | 70,0.7946290143964563,0.8309523809523809,0.8598006644518272
 73 | 71,0.796124031007752,0.8324473975636766,0.8606312292358803
 74 | 72,0.7973421926910299,0.8334440753045403,0.8613510520487264
 75 | 73,0.7987264673311185,0.8353266888150609,0.8617386489479514
 76 | 74,0.7998338870431894,0.8368770764119602,0.8618493909191584
 77 | 75,0.8021594684385382,0.837984496124031,0.8622923588039867
 78 | 76,0.803156146179402,0.8379844961240309,0.861904761904762
 79 | 77,0.8047065337763013,0.8393133997785162,0.8606866002214838
 80 | 78,0.8062015503875968,0.8397563676633445,0.8615725359911407
 81 | 79,0.8068106312292359,0.8395348837209302,0.8612956810631229
 82 | 80,0.8078626799557033,0.8405869324473976,0.8621816168327796
 83 | 81,0.8081949058693245,0.8414728682170542,0.8612956810631229
 84 | 82,0.8084163898117386,0.8419158361018825,0.862126245847176
 85 | 83,0.810077519379845,0.8431339977851606,0.8638981173864895
 86 | 84,0.8106312292358804,0.8433554817275747,0.8640088593576967
 87 | 85,0.8105758582502768,0.8434662236987818,0.8632890365448506
 88 | 86,0.8119601328903654,0.8434108527131783,0.8636766334440752
 89 | 87,0.8123477297895901,0.8441306755260243,0.8632336655592469
 90 | 88,0.8130675526024363,0.8440753045404209,0.8638981173864896
 91 | 89,0.8129014396456256,0.8454042081949058,0.8630675526024362
 92 | 90,0.813233665559247,0.8460686600221484,0.8653377630121816
 93 | 91,0.8131782945736433,0.8453488372093023,0.8666666666666666
 94 | 92,0.813233665559247,0.8459025470653377,0.8666112956810631
 95 | 93,0.8138427464008858,0.8460686600221484,0.8652823920265781
 96 | 94,0.8140088593576965,0.8466223698781837,0.8659468438538206
 97 | 95,0.8147840531561462,0.8466223698781837,0.8658361018826135
 98 | 96,0.8152270210409746,0.8477297895902547,0.8661683277962346
 99 | 97,0.8150055370985603,0.848671096345515,0.8668327796234773
100 | 98,0.8166112956810632,0.8494462901439643,0.8669435215946844
101 | 99,0.8160022148394241,0.8493909191583611,0.867940199335548
102 | 100,0.8158361018826136,0.8492248062015503,0.8684939091915835
103 | 


--------------------------------------------------------------------------------
/results/main/0505.csv:
--------------------------------------------------------------------------------
  1 | round,avg,prox,sim
  2 | 0,0.09072681704260653,0.09072681704260653,0.09072681704260653
  3 | 1,0.24445488721804512,0.17781954887218046,0.13348997493734335
  4 | 2,0.3483709273182957,0.2517857142857143,0.1599624060150376
  5 | 3,0.41810776942355893,0.3097431077694236,0.22465538847117797
  6 | 4,0.4887218045112783,0.3969924812030075,0.31130952380952376
  7 | 5,0.5201441102756892,0.4345238095238095,0.35037593984962406
  8 | 6,0.5390037593984962,0.4683897243107769,0.40673558897243106
  9 | 7,0.5485902255639097,0.4948621553884711,0.43170426065162903
 10 | 8,0.5528508771929825,0.5153822055137843,0.4629385964912281
 11 | 9,0.5605263157894737,0.5253446115288222,0.4854949874686717
 12 | 10,0.5708959899749374,0.5400375939849623,0.5228070175438597
 13 | 11,0.5766917293233084,0.5589598997493734,0.5434523809523809
 14 | 12,0.5821428571428572,0.5641290726817043,0.5586152882205514
 15 | 13,0.5852443609022556,0.5731516290726816,0.5788847117794488
 16 | 14,0.5906641604010026,0.5820175438596491,0.5803571428571428
 17 | 15,0.5949561403508771,0.5899749373433583,0.5952694235588972
 18 | 16,0.6005325814536341,0.6005325814536341,0.6047619047619048
 19 | 17,0.6053884711779449,0.605983709273183,0.6109962406015036
 20 | 18,0.6090852130325815,0.6094924812030075,0.6142543859649123
 21 | 19,0.612092731829574,0.6137844611528822,0.6180764411027567
 22 | 20,0.6142857142857144,0.619204260651629,0.6272556390977444
 23 | 21,0.6162280701754387,0.6265977443609023,0.6341165413533835
 24 | 22,0.6196741854636592,0.6302944862155389,0.6427318295739348
 25 | 23,0.6208333333333333,0.6356516290726817,0.6488721804511278
 26 | 24,0.6243421052631578,0.637750626566416,0.6519110275689224
 27 | 25,0.6271929824561403,0.6433270676691729,0.6565162907268169
 28 | 26,0.6286340852130325,0.6491541353383459,0.6625313283208021
 29 | 27,0.6284774436090226,0.6515664160401003,0.6670739348370928
 30 | 28,0.6306390977443609,0.6551065162907268,0.6715852130325815
 31 | 29,0.6306704260651629,0.6569548872180451,0.6766290726817042
 32 | 30,0.6312656641604011,0.6570175438596492,0.6796992481203008
 33 | 31,0.6310150375939849,0.6579260651629072,0.6843984962406015
 34 | 32,0.6324561403508772,0.6607769423558897,0.6880639097744362
 35 | 33,0.6324874686716793,0.6630952380952381,0.6892230576441103
 36 | 34,0.6328634085213032,0.6670426065162907,0.6920426065162907
 37 | 35,0.6329260651629073,0.669016290726817,0.6954260651629074
 38 | 36,0.6342105263157894,0.6716478696741854,0.6962719298245614
 39 | 37,0.6366541353383459,0.6741228070175439,0.6974310776942356
 40 | 38,0.6382205513784461,0.6788220551378448,0.6985275689223057
 41 | 39,0.6385651629072682,0.680795739348371,0.7010964912280703
 42 | 40,0.6403195488721805,0.6828320802005011,0.7032894736842105
 43 | 41,0.6414473684210525,0.6838345864661654,0.7030075187969925
 44 | 42,0.6416040100250626,0.6838972431077694,0.7036340852130327
 45 | 43,0.6426378446115288,0.6844611528822055,0.7045426065162909
 46 | 44,0.6445175438596491,0.6850250626566415,0.7037593984962406
 47 | 45,0.6451441102756892,0.6864974937343359,0.7056390977443608
 48 | 46,0.6458020050125313,0.6887531328320803,0.7061090225563909
 49 | 47,0.6476817042606515,0.6892857142857143,0.706547619047619
 50 | 48,0.649279448621554,0.6897869674185464,0.7080200501253133
 51 | 49,0.6501253132832081,0.6914473684210527,0.7082393483709273
 52 | 50,0.6510338345864662,0.6928884711779449,0.7099310776942356
 53 | 51,0.650407268170426,0.6943295739348371,0.7090538847117794
 54 | 52,0.6503446115288222,0.6966478696741855,0.7104010025062657
 55 | 53,0.6494674185463658,0.6972431077694236,0.7110902255639098
 56 | 54,0.650093984962406,0.699498746867168,0.7108082706766917
 57 | 55,0.649780701754386,0.6997807017543859,0.7128759398496242
 58 | 56,0.6503132832080201,0.7006892230576443,0.7130952380952381
 59 | 57,0.6506265664160401,0.7012844611528822,0.7139097744360904
 60 | 58,0.6508145363408523,0.7021616541353385,0.7148809523809524
 61 | 59,0.6515037593984963,0.7016290726817042,0.7156641604010027
 62 | 60,0.65,0.7035714285714286,0.7162280701754387
 63 | 61,0.6513784461152882,0.7062969924812028,0.7163220551378447
 64 | 62,0.6522556390977444,0.7083020050125314,0.7166666666666667
 65 | 63,0.6521616541353383,0.7119674185463658,0.7180137844611528
 66 | 64,0.6523496240601503,0.7130952380952381,0.718389724310777
 67 | 65,0.6532581453634085,0.7153195488721804,0.718796992481203
 68 | 66,0.6536340852130327,0.7162593984962405,0.718984962406015
 69 | 67,0.6536027568922306,0.7144736842105264,0.7197681704260651
 70 | 68,0.6531641604010026,0.7185776942355889,0.7201754385964912
 71 | 69,0.6533834586466165,0.7210526315789473,0.7205827067669174
 72 | 70,0.6525062656641604,0.7226190476190476,0.7197994987468673
 73 | 71,0.6514097744360902,0.7234335839598998,0.7222431077694236
 74 | 72,0.6506578947368422,0.7256578947368421,0.724561403508772
 75 | 73,0.64984335839599,0.7270989974937343,0.7246240601503761
 76 | 74,0.6490601503759397,0.7283834586466165,0.7265664160401003
 77 | 75,0.6490288220551379,0.7293859649122807,0.7272243107769425
 78 | 76,0.6505012531328321,0.730482456140351,0.7276002506265664
 79 | 77,0.6513471177944863,0.7319862155388471,0.7279448621553885
 80 | 78,0.6525689223057644,0.7308583959899749,0.7290100250626566
 81 | 79,0.6524122807017544,0.7328007518796993,0.7293546365914787
 82 | 80,0.6539786967418547,0.7321428571428571,0.7302944862155388
 83 | 81,0.655764411027569,0.7354010025062656,0.731265664160401
 84 | 82,0.6556704260651629,0.7346177944862156,0.731766917293233
 85 | 83,0.655451127819549,0.7359022556390978,0.7331140350877193
 86 | 84,0.6561716791979949,0.736685463659148,0.7338345864661655
 87 | 85,0.656234335839599,0.7375000000000002,0.7347431077694236
 88 | 86,0.6555764411027569,0.7393170426065164,0.7350250626566417
 89 | 87,0.6572368421052632,0.7400375939849624,0.7350877192982457
 90 | 88,0.6591165413533834,0.7404448621553885,0.7354323308270676
 91 | 89,0.661810776942356,0.7419172932330825,0.7355889724310779
 92 | 90,0.6634711779448621,0.7439223057644112,0.7364661654135338
 93 | 91,0.6654135338345865,0.7448308270676692,0.73640350877193
 94 | 92,0.6691416040100251,0.7464285714285714,0.7354323308270676
 95 | 93,0.6720864661654135,0.7473684210526315,0.7361528822055139
 96 | 94,0.675281954887218,0.7473997493734337,0.73687343358396
 97 | 95,0.6777568922305764,0.7484962406015038,0.7357769423558896
 98 | 96,0.6766290726817044,0.7490914786967416,0.7360588972431078
 99 | 97,0.676347117794486,0.7501566416040102,0.7348370927318296
100 | 98,0.6764724310776943,0.7509711779448622,0.7351817042606517
101 | 99,0.6766917293233082,0.7525689223057644,0.7351503759398496
102 | 100,0.6765037593984963,0.7526002506265663,0.7354949874686717
103 | 


--------------------------------------------------------------------------------
/results/main/11.csv:
--------------------------------------------------------------------------------
  1 | round,avg,prox,sim
  2 | 0,0.10363732208750658,0.10363732208750658,0.10363732208750658
  3 | 1,0.2700316288877175,0.2264891934633632,0.15213494992092777
  4 | 2,0.3386399578281497,0.27108592514496577,0.22132314180284657
  5 | 3,0.39891934633632053,0.3394306800210859,0.25761729045861886
  6 | 4,0.42698998418555606,0.35972588297311553,0.3022667369530838
  7 | 5,0.4405376910911966,0.3814443858724301,0.3090142329994729
  8 | 6,0.4704533473906167,0.4059040590405904,0.3151818661043753
  9 | 7,0.4796257248286768,0.413995782814971,0.3357933579335794
 10 | 8,0.4809963099630997,0.434343700579863,0.36731681602530314
 11 | 9,0.4937796520822352,0.4537954665260938,0.39156562994201377
 12 | 10,0.48943068002108603,0.4445967316816026,0.4137322087506589
 13 | 11,0.49583552978386924,0.4593832366895097,0.39973642593568787
 14 | 12,0.49525566684238265,0.4679230363732209,0.4363468634686347
 15 | 13,0.4990247759620452,0.4785714285714286,0.44064312071692147
 16 | 14,0.4979177648919347,0.478360569319979,0.465919873484449
 17 | 15,0.5045071164997363,0.49088033737480236,0.4722192936215077
 18 | 16,0.5127833421191356,0.5017923036373221,0.4804691618344755
 19 | 17,0.5225355824986823,0.5089878755930416,0.4707432788613601
 20 | 18,0.5233526620980496,0.5227464417501319,0.4813916710595678
 21 | 19,0.5268318397469689,0.5226937269372693,0.49053769109119655
 22 | 20,0.5406167633104904,0.5442540853979968,0.49095940959409595
 23 | 21,0.5447285187137586,0.539799683711123,0.5121244069583553
 24 | 22,0.5487348444913019,0.5357933579335793,0.49501845018450186
 25 | 23,0.5534264628360569,0.5397469688982603,0.5076963626779125
 26 | 24,0.559040590405904,0.5387453874538745,0.5034791776489194
 27 | 25,0.5594623089088033,0.5386663152345809,0.5087243015287295
 28 | 26,0.5601476014760148,0.5370848708487084,0.510411175540327
 29 | 27,0.5624670532419611,0.540590405904059,0.513837638376384
 30 | 28,0.5661834475487613,0.5385081707959937,0.512967843964154
 31 | 29,0.5657880864522932,0.5330785450711648,0.5151555086979441
 32 | 30,0.5696889826041118,0.5298892988929889,0.513995782814971
 33 | 31,0.5737480231945178,0.5365050079072219,0.5191881918819188
 34 | 32,0.5766736953083816,0.5496837111228255,0.5197416974169743
 35 | 33,0.578492356352135,0.5531101739588824,0.5189509752240379
 36 | 34,0.5808645229309436,0.5606220347917764,0.5162098049551925
 37 | 35,0.582683183974697,0.5721929362150765,0.5173695308381655
 38 | 36,0.5810753821823932,0.5747759620453348,0.5170795993674222
 39 | 37,0.5853452820242488,0.5689509752240379,0.5087770163415919
 40 | 38,0.5885081707959937,0.5779652082235108,0.5144702161307327
 41 | 39,0.5952029520295202,0.593384290985767,0.5186083289404323
 42 | 40,0.5986030574591461,0.5921982076963626,0.522509225092251
 43 | 41,0.5952556668423828,0.5883500263574064,0.5198734844491302
 44 | 42,0.5958882445967316,0.5838692672641012,0.5183447548761202
 45 | 43,0.5954665260938324,0.5865050079072219,0.5216921454928836
 46 | 44,0.594148655772272,0.594122298365841,0.5181075382182393
 47 | 45,0.5919082762256194,0.5897469688982604,0.5137058513442276
 48 | 46,0.6023194517659461,0.6024512387981024,0.5173695308381655
 49 | 47,0.6032419609910384,0.5985767000527149,0.5187664733790195
 50 | 48,0.6084080126515552,0.6055877701634158,0.5188982604111755
 51 | 49,0.6109646810753823,0.6091460200316289,0.5225355824986821
 52 | 50,0.6129151291512915,0.6134686346863468,0.5258566157090143
 53 | 51,0.6136794939377965,0.6205587770163415,0.5225882973115445
 54 | 52,0.6156826568265682,0.6253558249868212,0.5268845545598312
 55 | 53,0.6143384290985766,0.6264628360569321,0.5294148655772272
 56 | 54,0.61462836056932,0.6302319451765946,0.5343173431734318
 57 | 55,0.6150764364786504,0.635239852398524,0.5350026357406431
 58 | 56,0.615155508697944,0.6368212967843965,0.5400105429625726
 59 | 57,0.6110173958882447,0.6427253558249868,0.5396151818661044
 60 | 58,0.6124934106483924,0.6511070110701106,0.535740643120717
 61 | 59,0.6074327886136004,0.6515814443858724,0.5371639430680022
 62 | 60,0.6040590405904059,0.6511333684765419,0.5375856615709014
 63 | 61,0.6079599367422244,0.653690036900369,0.5428307854507116
 64 | 62,0.6108328940432262,0.6510279388508171,0.5505535055350554
 65 | 63,0.6101739588824461,0.651897733263047,0.5512915129151292
 66 | 64,0.6099367422245651,0.65392725355825,0.5512387981022667
 67 | 65,0.6131523458091723,0.6544016868740117,0.5515550869794413
 68 | 66,0.6105166051660517,0.6575118608328939,0.5531365313653136
 69 | 67,0.6076963626779126,0.6504744333157617,0.5528202424881391
 70 | 68,0.6022667369530837,0.6341328413284133,0.5575909330521878
 71 | 69,0.6038481813389562,0.6340274117026886,0.558144438587243
 72 | 70,0.6043753294675803,0.6538745387453874,0.5592250922509225
 73 | 71,0.6063257775434896,0.6521349499209278,0.5631259884027411
 74 | 72,0.6079599367422247,0.6604638903531892,0.5588297311544543
 75 | 73,0.6088033737480231,0.6615709014232999,0.5602793885081707
 76 | 74,0.6081707959936741,0.6510806536636795,0.56004217185029
 77 | 75,0.6076172904586188,0.6412229836584081,0.5653663679493938
 78 | 76,0.6093305218766474,0.632577754348972,0.5670005271481285
 79 | 77,0.6100421718502899,0.6287032156035846,0.5723510806536636
 80 | 78,0.6076172904586188,0.6247759620453347,0.5748286768581973
 81 | 79,0.6020822351080654,0.625303110173959,0.5699525566684238
 82 | 80,0.6006062203479177,0.6178703215603586,0.5699525566684239
 83 | 81,0.6038745387453874,0.6240115972588298,0.5685028993147073
 84 | 82,0.607116499736426,0.6204269899841856,0.5692409066947812
 85 | 83,0.6118608328940432,0.6275171323141802,0.5712177121771218
 86 | 84,0.6216921454928835,0.6465735371639431,0.5710068529256722
 87 | 85,0.6225882973115445,0.6595677385345283,0.5766473379019504
 88 | 86,0.6228518713758566,0.6595413811280969,0.5786768581971535
 89 | 87,0.6234580917237743,0.6676594623089088,0.5797575118608329
 90 | 88,0.6206378492356354,0.6714022140221403,0.5823405376910912
 91 | 89,0.6195308381655246,0.6719293621507644,0.5828413284132841
 92 | 90,0.6206114918292039,0.6730890880337375,0.5814971006852926
 93 | 91,0.6217185028993147,0.6721665788086453,0.5766736953083818
 94 | 92,0.6236162361623616,0.6695835529783869,0.5771481286241434
 95 | 93,0.6219293621507643,0.6693726937269372,0.5779124934106483
 96 | 94,0.6179493937796521,0.6709541381128098,0.5767791249341064
 97 | 95,0.6141539272535582,0.6710859251449658,0.5795466526093832
 98 | 96,0.6111491829204004,0.6741433842909857,0.5835529783869267
 99 | 97,0.6119926199261995,0.6785977859778599,0.5838165524512388
100 | 98,0.6115445440168686,0.6813916710595678,0.5914602003162889
101 | 99,0.610964681075382,0.6816552451238798,0.5910648392198209
102 | 100,0.6135477069056405,0.6783342119135476,0.5897733263046915
103 | 


--------------------------------------------------------------------------------
/results/main/2525.csv:
--------------------------------------------------------------------------------
  1 | round,avg,prox,sim
  2 | 0,0.09173881673881674,0.09173881673881674,0.09173881673881674
  3 | 1,0.2435425685425685,0.2174242424242424,0.17453102453102454
  4 | 2,0.3255050505050505,0.2979076479076479,0.21313131313131312
  5 | 3,0.3713924963924964,0.3396825396825397,0.27575757575757576
  6 | 4,0.40584415584415584,0.38582251082251084,0.319011544011544
  7 | 5,0.44062049062049063,0.42676767676767674,0.3725108225108225
  8 | 6,0.4653318903318904,0.4591991341991343,0.41396103896103903
  9 | 7,0.483982683982684,0.4845598845598844,0.45371572871572874
 10 | 8,0.5003968253968254,0.5044733044733044,0.4821789321789322
 11 | 9,0.5178932178932178,0.5377705627705627,0.5086940836940836
 12 | 10,0.5334415584415585,0.5523088023088022,0.5408008658008657
 13 | 11,0.536075036075036,0.5648268398268398,0.565873015873016
 14 | 12,0.5435064935064935,0.5750360750360749,0.58997113997114
 15 | 13,0.5496031746031745,0.5857864357864356,0.6139610389610388
 16 | 14,0.5545454545454546,0.5962842712842713,0.6323232323232323
 17 | 15,0.5563131313131312,0.6073953823953823,0.6495310245310244
 18 | 16,0.5616161616161616,0.6179292929292929,0.666017316017316
 19 | 17,0.5634559884559884,0.6275974025974026,0.6742784992784995
 20 | 18,0.5669913419913419,0.6356060606060606,0.6850649350649352
 21 | 19,0.5691558441558441,0.6441558441558441,0.6968975468975469
 22 | 20,0.5748556998556997,0.6488455988455988,0.7085137085137084
 23 | 21,0.5788961038961038,0.6563131313131314,0.7137806637806637
 24 | 22,0.5828643578643578,0.6627705627705628,0.7206349206349205
 25 | 23,0.5854617604617605,0.6697691197691197,0.7276695526695528
 26 | 24,0.5892496392496392,0.6744588744588744,0.7322510822510822
 27 | 25,0.5939393939393939,0.6806637806637806,0.7375180375180374
 28 | 26,0.5966810966810966,0.685064935064935,0.7399711399711398
 29 | 27,0.6015873015873016,0.6893578643578643,0.7474025974025973
 30 | 28,0.6053391053391052,0.6936147186147187,0.7512626262626262
 31 | 29,0.6066017316017316,0.6975468975468976,0.7537157287157288
 32 | 30,0.6103535353535352,0.7007575757575757,0.7601370851370851
 33 | 31,0.6145743145743144,0.7052669552669553,0.7636363636363637
 34 | 32,0.6153318903318903,0.7084776334776334,0.765981240981241
 35 | 33,0.6179292929292929,0.7126984126984125,0.7686147186147186
 36 | 34,0.6191919191919192,0.7161616161616161,0.7721139971139971
 37 | 35,0.6210317460317459,0.7202741702741702,0.7752164502164501
 38 | 36,0.6226190476190475,0.7229437229437229,0.7754329004329006
 39 | 37,0.6234126984126984,0.7261904761904762,0.7795093795093796
 40 | 38,0.624098124098124,0.729040404040404,0.780916305916306
 41 | 39,0.6261904761904761,0.7331529581529582,0.7844155844155846
 42 | 40,0.6258297258297257,0.7370851370851371,0.7843795093795095
 43 | 41,0.6266233766233764,0.7413419913419914,0.7866883116883118
 44 | 42,0.6272727272727272,0.7457792207792208,0.7908369408369409
 45 | 43,0.6282106782106782,0.7491702741702742,0.7902597402597404
 46 | 44,0.6271645021645021,0.7533189033189034,0.7931096681096682
 47 | 45,0.6274531024531024,0.7554112554112554,0.7930375180375182
 48 | 46,0.6275252525252525,0.7580086580086581,0.7946248196248197
 49 | 47,0.6272727272727273,0.7605339105339106,0.7924963924963926
 50 | 48,0.6283910533910534,0.7641053391053392,0.7919552669552671
 51 | 49,0.6293650793650793,0.7653318903318903,0.7942279942279943
 52 | 50,0.6295454545454545,0.7678571428571429,0.7955627705627707
 53 | 51,0.6308441558441558,0.7703823953823955,0.796933621933622
 54 | 52,0.6319624819624821,0.771789321789322,0.7982323232323234
 55 | 53,0.6325757575757576,0.7731601731601732,0.7968614718614719
 56 | 54,0.6332972582972584,0.7756854256854258,0.7970779220779223
 57 | 55,0.6329365079365079,0.7772727272727273,0.7972582972582973
 58 | 56,0.6348124098124098,0.7793290043290044,0.796933621933622
 59 | 57,0.6375541125541127,0.7804112554112554,0.7969336219336219
 60 | 58,0.6385281385281385,0.7821789321789323,0.7976551226551227
 61 | 59,0.6414862914862915,0.7836940836940839,0.7993506493506496
 62 | 60,0.6424963924963926,0.7847402597402598,0.8004329004329005
 63 | 61,0.6447691197691199,0.7860750360750361,0.8024531024531025
 64 | 62,0.6480519480519482,0.7875901875901877,0.8040043290043292
 65 | 63,0.6510461760461762,0.7877344877344878,0.80487012987013
 66 | 64,0.6537518037518036,0.7890331890331891,0.8065656565656566
 67 | 65,0.6567460317460316,0.7897546897546898,0.8061688311688312
 68 | 66,0.6594155844155842,0.7910533910533912,0.8069624819624821
 69 | 67,0.663023088023088,0.7933261183261185,0.8077200577200578
 70 | 68,0.666053391053391,0.7944083694083696,0.8064574314574315
 71 | 69,0.6686868686868687,0.7959235209235208,0.8061688311688312
 72 | 70,0.6734126984126984,0.7970418470418471,0.8075396825396826
 73 | 71,0.6761544011544011,0.797979797979798,0.8082611832611833
 74 | 72,0.6799062049062048,0.8003607503607505,0.8097402597402595
 75 | 73,0.6836219336219335,0.8011183261183262,0.8103535353535353
 76 | 74,0.6867604617604617,0.8024170274170274,0.8099567099567099
 77 | 75,0.690079365079365,0.8026695526695525,0.8123015873015872
 78 | 76,0.6929292929292928,0.8037157287157288,0.8134920634920634
 79 | 77,0.6961038961038961,0.8055916305916304,0.8129870129870128
 80 | 78,0.6976551226551226,0.8072510822510821,0.8131673881673881
 81 | 79,0.6995310245310244,0.8088023088023086,0.8137445887445888
 82 | 80,0.7012626262626261,0.8099206349206348,0.8151515151515152
 83 | 81,0.7027056277056277,0.8107503607503606,0.8159812409812413
 84 | 82,0.7056998556998556,0.8119408369408367,0.8160533910533913
 85 | 83,0.7078643578643578,0.8128066378066376,0.8157287157287157
 86 | 84,0.7117965367965368,0.8139249639249638,0.8171717171717172
 87 | 85,0.7125901875901876,0.8146464646464645,0.8175685425685426
 88 | 86,0.7156565656565655,0.8151515151515152,0.8174963924963926
 89 | 87,0.7175685425685424,0.814935064935065,0.8194083694083695
 90 | 88,0.7203823953823953,0.8155122655122654,0.8203823953823954
 91 | 89,0.7228354978354977,0.8163059163059162,0.8208874458874459
 92 | 90,0.7254689754689754,0.8171717171717172,0.8216450216450214
 93 | 91,0.7272005772005772,0.8172799422799424,0.8214285714285712
 94 | 92,0.7288961038961039,0.8182900432900433,0.8216810966810965
 95 | 93,0.7303030303030302,0.8190115440115441,0.8217171717171717
 96 | 94,0.7313492063492063,0.820165945165945,0.8209235209235209
 97 | 95,0.7337301587301587,0.820093795093795,0.82012987012987
 98 | 96,0.7353896103896103,0.8206349206349206,0.8203823953823954
 99 | 97,0.7342352092352092,0.8207431457431457,0.8203823953823953
100 | 98,0.7353174603174603,0.8211399711399711,0.82012987012987
101 | 99,0.7369047619047618,0.8208152958152957,0.82027417027417
102 | 100,0.738059163059163,0.8207792207792206,0.8208513708513707
103 | 


--------------------------------------------------------------------------------
/results/main/7575.csv:
--------------------------------------------------------------------------------
  1 | round,avg,prox,sim
  2 | 0,0.08952205882352941,0.08952205882352941,0.08952205882352941
  3 | 1,0.2142331932773109,0.1603991596638656,0.1470325630252101
  4 | 2,0.30354516806722687,0.2290703781512605,0.1746323529411765
  5 | 3,0.34868697478991606,0.2792804621848739,0.21701680672268908
  6 | 4,0.39845063025210087,0.32796743697478986,0.25829831932773106
  7 | 5,0.4363970588235294,0.3702993697478991,0.28332457983193277
  8 | 6,0.44708508403361347,0.3951155462184874,0.30795693277310926
  9 | 7,0.471218487394958,0.4271008403361345,0.32555147058823525
 10 | 8,0.48981092436974794,0.4507352941176471,0.34908088235294116
 11 | 9,0.49892331932773104,0.469905462184874,0.3680672268907563
 12 | 10,0.522452731092437,0.4919117647058824,0.3959033613445378
 13 | 11,0.5404149159663867,0.5110556722689076,0.42171743697478986
 14 | 12,0.548765756302521,0.5204306722689076,0.439968487394958
 15 | 13,0.551969537815126,0.5248949579831933,0.46302521008403363
 16 | 14,0.5618172268907564,0.5340336134453781,0.47938550420168063
 17 | 15,0.5740283613445378,0.5451943277310924,0.4914915966386555
 18 | 16,0.5821691176470588,0.556827731092437,0.5103203781512605
 19 | 17,0.5951418067226892,0.5686974789915966,0.5239758403361344
 20 | 18,0.6059086134453782,0.5735819327731092,0.5410714285714285
 21 | 19,0.6131302521008405,0.5841386554621848,0.5473476890756305
 22 | 20,0.6197216386554621,0.5919117647058822,0.5544117647058824
 23 | 21,0.6237394957983194,0.598844537815126,0.5591649159663864
 24 | 22,0.6294117647058824,0.6061974789915968,0.5713760504201683
 25 | 23,0.6328518907563024,0.6116596638655463,0.5852415966386555
 26 | 24,0.6338497899159663,0.6178571428571429,0.5950892857142858
 27 | 25,0.6370798319327732,0.623424369747899,0.5972951680672268
 28 | 26,0.6377888655462186,0.6293329831932774,0.5955094537815125
 29 | 27,0.6387867647058824,0.6359506302521007,0.5985819327731093
 30 | 28,0.6405987394957983,0.6394957983193278,0.6110294117647059
 31 | 29,0.6407300420168067,0.6431460084033613,0.6109768907563027
 32 | 30,0.6372636554621849,0.6464548319327732,0.6170693277310924
 33 | 31,0.6387079831932775,0.6492647058823529,0.6209033613445378
 34 | 32,0.6387342436974791,0.6518382352941176,0.6298844537815125
 35 | 33,0.6405724789915966,0.6546218487394958,0.6314075630252101
 36 | 34,0.6431985294117647,0.6580619747899159,0.6334033613445378
 37 | 35,0.6461134453781513,0.6602678571428572,0.6369485294117646
 38 | 36,0.6468224789915966,0.6630514705882352,0.6376838235294118
 39 | 37,0.6479254201680672,0.6654936974789916,0.638813025210084
 40 | 38,0.6498424369747899,0.666701680672269,0.6454831932773109
 41 | 39,0.6508403361344537,0.6676207983193279,0.6427258403361343
 42 | 40,0.6519695378151261,0.670561974789916,0.6432510504201682
 43 | 41,0.6539128151260503,0.672951680672269,0.645745798319328
 44 | 42,0.6534926470588235,0.6758665966386554,0.6477415966386555
 45 | 43,0.652389705882353,0.6785976890756302,0.6514705882352941
 46 | 44,0.6499474789915967,0.6817752100840336,0.6504726890756303
 47 | 45,0.6492384453781511,0.6847689075630252,0.647531512605042
 48 | 46,0.6474264705882353,0.6882615546218488,0.6496060924369749
 49 | 47,0.6478466386554622,0.6898634453781513,0.6510504201680671
 50 | 48,0.6493172268907562,0.6923581932773111,0.6509453781512604
 51 | 49,0.651155462184874,0.6962447478991596,0.652127100840336
 52 | 50,0.6530462184873951,0.6997636554621849,0.6529936974789915
 53 | 51,0.6535976890756302,0.7014180672268907,0.6488970588235293
 54 | 52,0.6536764705882353,0.7022321428571429,0.651811974789916
 55 | 53,0.654359243697479,0.7033088235294118,0.6499212184873948
 56 | 54,0.6546743697478992,0.7056460084033613,0.6545168067226891
 57 | 55,0.6530724789915966,0.7067752100840335,0.6515493697478991
 58 | 56,0.6511292016806722,0.7074579831932772,0.6495010504201681
 59 | 57,0.6500525210084033,0.7096113445378152,0.6488182773109245
 60 | 58,0.6492647058823529,0.710267857142857,0.6475840336134452
 61 | 59,0.64968487394958,0.7115808823529411,0.6522846638655462
 62 | 60,0.6492647058823529,0.7141806722689076,0.6503939075630252
 63 | 61,0.6480829831932773,0.716281512605042,0.6551207983193278
 64 | 62,0.6470325630252101,0.7173319327731092,0.6543855042016806
 65 | 63,0.6470063025210084,0.719485294117647,0.6552258403361344
 66 | 64,0.6489495798319329,0.7206932773109244,0.6561186974789915
 67 | 65,0.6518644957983194,0.721796218487395,0.6572216386554622
 68 | 66,0.6535714285714287,0.7226365546218487,0.6517594537815127
 69 | 67,0.6552783613445379,0.7215861344537816,0.6505252100840336
 70 | 68,0.6544905462184873,0.7237132352941177,0.6476102941176471
 71 | 69,0.6541228991596638,0.7245273109243697,0.6434611344537815
 72 | 70,0.6543067226890756,0.7277310924369749,0.6435399159663865
 73 | 71,0.6564075630252102,0.7301207983193279,0.6418067226890757
 74 | 72,0.6555672268907563,0.7304884453781512,0.6383928571428572
 75 | 73,0.6566439075630253,0.7308035714285714,0.6384191176470588
 76 | 74,0.6571953781512606,0.730908613445378,0.6362132352941177
 77 | 75,0.6571165966386556,0.7312762605042018,0.635031512605042
 78 | 76,0.6571691176470587,0.7308298319327732,0.6341386554621848
 79 | 77,0.6576155462184874,0.7295693277310924,0.6344012605042016
 80 | 78,0.6585084033613445,0.7300157563025209,0.6353203781512606
 81 | 79,0.6604254201680672,0.7305147058823529,0.6372899159663865
 82 | 80,0.6595063025210084,0.731171218487395,0.6397058823529411
 83 | 81,0.6568539915966387,0.7334821428571429,0.6371848739495797
 84 | 82,0.6535976890756301,0.7341386554621848,0.6373686974789916
 85 | 83,0.6554884453781512,0.7352153361344538,0.6379726890756302
 86 | 84,0.6561974789915966,0.7357930672268908,0.6404149159663864
 87 | 85,0.6573004201680672,0.7361607142857142,0.6392594537815124
 88 | 86,0.6580882352941178,0.7375262605042017,0.6441439075630252
 89 | 87,0.6607142857142857,0.7375262605042017,0.645640756302521
 90 | 88,0.6634191176470589,0.7376050420168068,0.6487132352941177
 91 | 89,0.6668329831932772,0.7373949579831932,0.6509716386554624
 92 | 90,0.6717436974789915,0.7377626050420169,0.6519170168067226
 93 | 91,0.673686974789916,0.7378939075630252,0.6540441176470589
 94 | 92,0.676811974789916,0.7380514705882353,0.6541491596638656
 95 | 93,0.6772321428571428,0.7382878151260505,0.6560136554621849
 96 | 94,0.6781775210084036,0.7387342436974789,0.6535714285714287
 97 | 95,0.6774947478991598,0.7395483193277309,0.6543329831932774
 98 | 96,0.676890756302521,0.7401785714285716,0.6535714285714286
 99 | 97,0.6757615546218487,0.7403886554621847,0.6537289915966387
100 | 98,0.671638655462185,0.7406775210084033,0.6519432773109243
101 | 99,0.6711922268907563,0.7419905462184874,0.6523897058823529
102 | 100,0.6710346638655462,0.742436974789916,0.6547531512605043
103 | 


--------------------------------------------------------------------------------
/results/main/IID.csv:
--------------------------------------------------------------------------------
  1 | round,avg,prox,sim
  2 | 0,0.10736468500443656,0.10736468500443656,0.10736468500443656
  3 | 1,0.5493877551020409,0.2987045252883762,0.5269210292812776
  4 | 2,0.588890860692103,0.4537000887311447,0.5488198757763976
  5 | 3,0.6124933451641527,0.5083939662821653,0.5590771960958296
  6 | 4,0.6319432120674356,0.5220940550133096,0.5699378881987579
  7 | 5,0.6501863354037266,0.52773735581189,0.5810115350488022
  8 | 6,0.6649511978704525,0.5311446317657498,0.5907719609582964
  9 | 7,0.6774090505767525,0.5336291038154393,0.6009937888198759
 10 | 8,0.6871694764862466,0.5367879325643301,0.6100443655723159
 11 | 9,0.6956166814551908,0.5388110026619343,0.6173558118899733
 12 | 10,0.7048092280390418,0.5423957409050578,0.6253061224489797
 13 | 11,0.7144631765749777,0.5459804791481812,0.6319432120674355
 14 | 12,0.7219520851818989,0.5490683229813663,0.6384738243123336
 15 | 13,0.7295119787045253,0.55226264418811,0.6441526175687667
 16 | 14,0.7374622892635314,0.556131322094055,0.6497249334516415
 17 | 15,0.7455545696539487,0.5592546583850931,0.6545519077196095
 18 | 16,0.7527595385980479,0.5616681455190772,0.6602661934338954
 19 | 17,0.7600709849157053,0.5647914818101154,0.6650576752440107
 20 | 18,0.7663886424134871,0.5674889086069208,0.6693167701863354
 21 | 19,0.7720319432120675,0.5708606921029281,0.6729724933451641
 22 | 20,0.7786335403726709,0.573522626441881,0.6761668145519077
 23 | 21,0.7837799467613131,0.5758651286601597,0.6788642413487133
 24 | 22,0.7885714285714285,0.577639751552795,0.6825909494232475
 25 | 23,0.7924401064773736,0.5795208518189885,0.6853593611357587
 26 | 24,0.79527950310559,0.5819698314108253,0.6887666370896184
 27 | 25,0.8001419698314107,0.5843478260869565,0.6919964507542146
 28 | 26,0.8034782608695652,0.5876131322094055,0.6954037267080745
 29 | 27,0.8064241348713398,0.5901330967169477,0.6976397515527951
 30 | 28,0.809192546583851,0.5932209405501332,0.6995208518189885
 31 | 29,0.8127772848269743,0.5953859804791481,0.7019343389529725
 32 | 30,0.8153682342502218,0.5972670807453415,0.7044543034605147
 33 | 31,0.8188464951197871,0.5992546583850931,0.7062999112688554
 34 | 32,0.820798580301686,0.6013132209405501,0.7074001774622892
 35 | 33,0.8236379769299024,0.6040106477373559,0.709849157054126
 36 | 34,0.8257320319432121,0.6054658385093168,0.7114463176574978
 37 | 35,0.828039041703638,0.6070275066548358,0.7133274179236914
 38 | 36,0.8301685891748004,0.6083762200532387,0.7155634427684118
 39 | 37,0.8310204081632653,0.6105767524401065,0.7176574977817214
 40 | 38,0.8333274179236912,0.6119254658385094,0.7194321206743568
 41 | 39,0.835634427684117,0.61377107364685,0.7216326530612245
 42 | 40,0.8370186335403726,0.6151907719609583,0.7236202307009761
 43 | 41,0.8384738243123337,0.6166459627329193,0.7256787932564331
 44 | 42,0.8413487133984028,0.6183141082519965,0.7280567879325643
 45 | 43,0.8431233362910381,0.6200887311446319,0.7310736468500443
 46 | 44,0.8435847382431233,0.6212954747116237,0.732919254658385
 47 | 45,0.8457142857142856,0.6228216503992903,0.7362555456965396
 48 | 46,0.8463531499556343,0.6238864241348714,0.7385625554569654
 49 | 47,0.8475953859804791,0.6255190771960957,0.7405856255545696
 50 | 48,0.8486601597160603,0.6269387755102039,0.7425022182786158
 51 | 49,0.8506122448979592,0.6278970718722271,0.7448802129547472
 52 | 50,0.8514640638864241,0.6290328305235138,0.7467258207630878
 53 | 51,0.8524578527062999,0.6305590062111801,0.7495652173913042
 54 | 52,0.8530257320319432,0.6313753327417924,0.7511978704525287
 55 | 53,0.8537355811889975,0.6325110913930789,0.7537178349600708
 56 | 54,0.8554037267080746,0.6338952972493346,0.7559893522626443
 57 | 55,0.856184560780834,0.6351730257320318,0.7586157941437445
 58 | 56,0.8578172138420586,0.6357763975155281,0.7600709849157053
 59 | 57,0.8590239574090506,0.6371606033717835,0.7620585625554568
 60 | 58,0.8605501330967169,0.637941437444543,0.7642590949423247
 61 | 59,0.8615439219165927,0.6385803016858919,0.7661047027506654
 62 | 60,0.8624312333629105,0.6395385980479149,0.7686601597160603
 63 | 61,0.863815439219166,0.6402839396628217,0.7700088731144633
 64 | 62,0.8648092280390418,0.6413842058562556,0.7718189884649512
 65 | 63,0.8664063886424136,0.6423779946761313,0.7740550133096717
 66 | 64,0.8667613132209405,0.643513753327418,0.7764685004436559
 67 | 65,0.8683584738243124,0.644614019520852,0.7780656610470276
 68 | 66,0.8691038154392191,0.6455013309671696,0.7796628216503994
 69 | 67,0.8701330967169477,0.6467080745341613,0.7822892635314995
 70 | 68,0.8706654835847383,0.6477728482697427,0.7839219165927241
 71 | 69,0.8711978704525288,0.6484826974267968,0.7856965394853593
 72 | 70,0.8720851818988465,0.6495829636202307,0.7869387755102041
 73 | 71,0.872404614019521,0.6502573203194321,0.7891393078970719
 74 | 72,0.8730434782608696,0.6511446317657498,0.7896007098491571
 75 | 73,0.8729015084294587,0.6518189884649512,0.7908074534161491
 76 | 74,0.8740017746228925,0.6527772848269743,0.7919432120674356
 77 | 75,0.8742857142857142,0.6537355811889972,0.7937888198757763
 78 | 76,0.8748890860692103,0.6543744454303462,0.7953504880212955
 79 | 77,0.8753149955634427,0.6550488021295475,0.7960958296362023
 80 | 78,0.8754569653948535,0.6557586512866017,0.7981898846495119
 81 | 79,0.8762732919254658,0.6562555456965395,0.7988287488908608
 82 | 80,0.876379769299024,0.6565039929015084,0.799751552795031
 83 | 81,0.8769121561668145,0.6570718722271518,0.8013487133984029
 84 | 82,0.8771251109139308,0.6574977817213842,0.8029103815439219
 85 | 83,0.8777994676131321,0.658243123336291,0.8034782608695652
 86 | 84,0.8782253771073647,0.6587755102040818,0.8047204968944098
 87 | 85,0.8785448092280391,0.6595563442768413,0.8054303460514641
 88 | 86,0.878935226264419,0.6602661934338953,0.8064596273291927
 89 | 87,0.8790771960958297,0.6612244897959183,0.8076308784383318
 90 | 88,0.8793611357586515,0.6620053238686779,0.8090505767524401
 91 | 89,0.8800354924578527,0.6624312333629103,0.8094764862466727
 92 | 90,0.8803904170363798,0.6629991126885538,0.8108961845607807
 93 | 91,0.8804968944099378,0.6640993788819874,0.8114640638864241
 94 | 92,0.8807453416149068,0.6645962732919255,0.8122448979591838
 95 | 93,0.8809937888198759,0.6649157054125997,0.8132741792369121
 96 | 94,0.8816681455190771,0.6655190771960959,0.8143389529724933
 97 | 95,0.882058562555457,0.6657675244010648,0.815581188997338
 98 | 96,0.8820940550133096,0.6665483584738244,0.8158651286601597
 99 | 97,0.8832653061224491,0.6675421472937001,0.81678793256433
100 | 98,0.8833007985803015,0.6680745341614907,0.8171073646850044
101 | 99,0.8839396628216504,0.6687133984028394,0.8184205856255545
102 | 100,0.8844365572315884,0.6695297249334515,0.8190594498669034
103 | 


--------------------------------------------------------------------------------
/results/main/mnist.csv:
--------------------------------------------------------------------------------
 1 | round,avg,prox,sim
 2 | 0,0.10386650386650387,0.10386650386650387,0.10386650386650387
 3 | 1,0.33560478322383086,0.2569761807857046,0.36892842607128323
 4 | 2,0.46288350097873904,0.3878675116770354,0.5368722987770607
 5 | 3,0.533531019245305,0.4857646762408666,0.639897668469097
 6 | 4,0.5789871504157218,0.5537957633195727,0.6957109909490863
 7 | 5,0.616016435064054,0.6107641917165727,0.7300928348547396
 8 | 6,0.6454251216155978,0.6527976432738338,0.7580557009128438
 9 | 7,0.6697598697598697,0.6853305424733998,0.781739248405915
10 | 8,0.6895517181231466,0.7119599976742833,0.7946857375428804
11 | 9,0.7086264705312323,0.7347791538267728,0.8068376068376069
12 | 10,0.7267360505455744,0.751853789949028,0.8137449851735565
13 | 11,0.7415624939434462,0.7665368141558618,0.8212958117720024
14 | 12,0.7505281314805123,0.7781692734073686,0.8269434269434269
15 | 13,0.7566641471403376,0.7872511967750062,0.8330096711049092
16 | 14,0.7602224935558268,0.7934724887105838,0.8378471616566854
17 | 15,0.7658662325328992,0.8009109056728103,0.8426691474310521
18 | 16,0.7717309145880575,0.8071903405236739,0.8458359982169505
19 | 17,0.7768087291896816,0.8138884043645948,0.8496075353218211
20 | 18,0.7811384382812955,0.8183150183150184,0.8478865050293622
21 | 19,0.7873984921603969,0.8229199372056516,0.8516192801907086
22 | 20,0.7918289823051727,0.8280210089733899,0.856084656084656
23 | 21,0.7985154175630367,0.8318274318274318,0.8583289726146869
24 | 22,0.8059344535535012,0.8357695214838072,0.8603794794270984
25 | 23,0.8123069170688219,0.8401418687132973,0.8627400817877008
26 | 24,0.8157993681803206,0.8430141287284145,0.8644223501366358
27 | 25,0.8177994844661511,0.8446227493846542,0.864100626005388
28 | 26,0.819570905285191,0.8465530941721418,0.8656588561350466
29 | 27,0.817869255964494,0.8484950675426866,0.8680233346900014
30 | 28,0.8187607806655426,0.8507975269880031,0.869015640444212
31 | 29,0.8183421516754851,0.8525340620578715,0.8699342985057271
32 | 30,0.8192142954047716,0.8537085489466443,0.8707676802914899
33 | 


--------------------------------------------------------------------------------
/results/other/mnist.csv:
--------------------------------------------------------------------------------
 1 | round,avg,prox,sim
 2 | 0,0.09983913793437602,0.0998430141287284,0.09983913793437602
 3 | 1,0.19298796441653582,0.15719518576661437,0.1989379227474465
 4 | 2,0.2186328662519139,0.18673178673178675,0.35703626179816655
 5 | 3,0.24948737329689713,0.22424171947981472,0.47979533693819415
 6 | 4,0.29881969881969883,0.26790704885942984,0.5500668643525786
 7 | 5,0.35224916177297133,0.3162199352675543,0.6149659863945578
 8 | 6,0.41871039013896166,0.36516851754946994,0.6573405430548288
 9 | 7,0.4712056902533092,0.412570498284784,0.6916022249355582
10 | 8,0.5089598232455376,0.4419598038645658,0.7210070352927496
11 | 9,0.5529546291451052,0.47181425276663375,0.7384305289067192
12 | 10,0.5857239762001667,0.5139368567939997,0.7665872046824428
13 | 11,0.6176599414694652,0.5475085760800046,0.7657460705079753
14 | 12,0.6410992887183364,0.5525708859042192,0.7767195767195768
15 | 13,0.6699885652266605,0.5666337190146714,0.7943833943833943
16 | 14,0.685706533325581,0.5988061321394654,0.8004418861561718
17 | 15,0.6956761051999146,0.6290908386146482,0.8097292478244859
18 | 16,0.7112506541077969,0.6392387154291918,0.8210593639165068
19 | 17,0.7231195612147993,0.6671473147663624,0.8231486326724422
20 | 18,0.7406477120762835,0.6668527239955813,0.8191057619629049
21 | 19,0.7549082310987072,0.7028974552784076,0.8266953505048742
22 | 20,0.760365912746865,0.718723956819195,0.8400876019923639
23 | 21,0.7743822315250887,0.7304378161521019,0.8363858363858364
24 | 22,0.7887939221272553,0.7402833498071595,0.84097525049906
25 | 23,0.7947942709847471,0.7481403957594434,0.8497044401806307
26 | 24,0.8018450685117352,0.7621799717037813,0.8571544857259142
27 | 25,0.8108300870205633,0.7662848615229568,0.8632711204139776
28 | 26,0.8150822722251293,0.765180146132527,0.8630269201697772
29 | 27,0.8106711630521154,0.7673469387755102,0.8563288563288562
30 | 28,0.8151326627517103,0.7656569180378704,0.8681396205205728
31 | 29,0.8194274860941527,0.7711843711843711,0.8620578715816811
32 | 30,0.8227455084597942,0.7720720196910673,0.8614376804852995
33 | 


--------------------------------------------------------------------------------
/run_fedavg.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | python3  -u main.py --dataset=$1 --optimizer='fedavg'  \
 3 |             --learning_rate=0.01 --num_rounds=200 --clients_per_round=$4 \
 4 |             --eval_every=1 --batch_size=10 \
 5 |             --num_epochs=20 \
 6 |             --model='mclr' \
 7 |             --drop_percent=$2 \
 8 |             --num_groups=$3 \
 9 |             --ex_name=$5 \
10 |             --seed=0


--------------------------------------------------------------------------------
/run_fedprox.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | python3  -u main.py --dataset=$1 --optimizer='fedprox'  \
 3 |             --learning_rate=0.01 --num_rounds=200 --clients_per_round=$4 \
 4 |             --eval_every=1 --batch_size=10 \
 5 |             --num_epochs=20 \
 6 |             --model='mclr' \
 7 |             --drop_percent=$2 \
 8 |             --mu=$3 \
 9 |             --ex_name=$5 \
10 | 
11 | 


--------------------------------------------------------------------------------
/run_fedsim.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | python3  -u main.py --dataset=$1 --optimizer='fedsim'  \
 3 |             --learning_rate=0.01 --num_rounds=200 --clients_per_round=$4 \
 4 |             --eval_every=1 --batch_size=10 \
 5 |             --num_epochs=20 \
 6 |             --model='mclr' \
 7 |             --drop_percent=$2 \
 8 |             --num_groups=$3 \
 9 |             --ex_name=$5 \
10 |             --seed=0


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chamathpali/FedSim/982b5db099c6e4b3b6758b3b5091b6f7fe5e5d15/utils/__init__.py


--------------------------------------------------------------------------------
/utils/csv_log.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | import csv
  5 | import matplotlib.pyplot as plt
  6 | import matplotlib
  7 | 
  8 | matplotlib.rc('xtick', labelsize=17)
  9 | matplotlib.rc('ytick', labelsize=17)
 10 | 
 11 | def log_start(method, params, num_groups = 1, name="non"):
 12 |     logdir = "logs/"+name
 13 |     if not os.path.exists(logdir):
 14 |         os.makedirs(logdir)
 15 | 
 16 |     with open(logdir+"/params" + '.json', 'w') as json_file:
 17 |         json.dump(params, json_file)
 18 | 
 19 | def write_dataset(arr, name="non"):
 20 |     logdir = "logs/"+name
 21 |     with open(logdir + '/dataset_analysis.csv', mode='a+', newline='') as log_file:
 22 |         writer = csv.DictWriter(log_file, fieldnames=arr[0].keys())
 23 |         writer.writeheader()
 24 |         for data in arr:
 25 |             writer.writerow(data)
 26 | 
 27 | 
 28 | def write_clusters(arr, name="non"):
 29 |     logdir = "logs/"+name
 30 |     with open(logdir + '/clusters.csv', mode='a+', newline='') as log_file:
 31 |         writer = csv.writer(log_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
 32 |         writer.writerow(arr)
 33 | 
 34 | def write_all(method, log_data, log_groups, num_groups = 1, name="non"):
 35 |     logdir = "logs/"+name
 36 |     # with open(logdir + '/fed_' +method +'_'+str(num_groups)+'.csv', mode='w', newline='') as log_file:
 37 |     with open(logdir + '/' + name + '.csv', mode='w', newline='') as log_file:
 38 |         writer = csv.writer(log_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
 39 |         writer.writerow(['round', 'train_loss', 'train_acc', 'test_acc'])
 40 |         for line in log_data:
 41 |             writer.writerow(line)
 42 | 
 43 |     for  idx,group in enumerate(log_groups):
 44 |         with open(logdir + '/fed_' +method +'_g_'+str(idx)+'.csv', mode='w', newline='') as log_file:
 45 |             writer = csv.writer(log_file)
 46 |             writer.writerow(['round', 'train_loss', 'train_acc', 'test_acc'])
 47 |             for line in log_groups[idx]:
 48 |                 writer.writerow(line)
 49 | 
 50 | def graph_print(method, params, num_groups = 1, name="non"):
 51 |     sim_rounds = []
 52 |     sim_test_acc = []
 53 |     avg_rounds = []
 54 |     avg_test_acc = []
 55 |     prox_rounds = []
 56 |     prox_test_acc = []
 57 |     groups = []
 58 |     logdir = "logs/"+name
 59 | 
 60 |     with open(logdir + '/' + name + '.csv', mode='r') as csv_file:
 61 |         csv_reader = csv.DictReader(csv_file)
 62 |         line_count = 0
 63 |         for row in csv_reader:
 64 |             if line_count == 0:
 65 |                 line_count += 1
 66 |             sim_rounds.append(float(row["round"]))
 67 |             sim_test_acc.append(float(row["test_acc"]))
 68 |             line_count += 1
 69 |         # print(f'Fed Processed {line_count} lines.')
 70 | 
 71 |     if method == "sim":
 72 |         for i in range(num_groups):
 73 |             group_data = []
 74 |             with open(logdir + '/fed_sim_g_'+ str(i)+'.csv', mode='r') as csv_file:
 75 |                 csv_reader = csv.DictReader(csv_file)
 76 |                 line_count = 0
 77 |                 for row in csv_reader:
 78 |                     if line_count == 0:
 79 |                         line_count += 1
 80 |                     group_data.append(float(row["test_acc"]))
 81 |                     line_count += 1
 82 |             groups.append(group_data)
 83 |             # print(f'FedSim Groups {line_count} lines.')
 84 | 
 85 |     with open('fedavg_original/'+str(params["dataset"])+'_'+str(params["clients_per_round"])+'.csv', mode='r') as csv_file:
 86 |     # with open('fedavg_original/seeds/'+str(params["dataset"])+'_'+str(params["seed"])+'_fedavg/'+str(params["dataset"])+'_'+str(params["seed"])+'_fedavg.csv', mode='r') as csv_file:
 87 |         csv_reader = csv.DictReader(csv_file)
 88 |         line_count = 0
 89 |         for row in csv_reader:
 90 |             if line_count == 0:
 91 |                 line_count += 1
 92 |             avg_rounds.append(float(row["round"]))
 93 |             avg_test_acc.append(float(row["test_acc"]))
 94 |             line_count += 1
 95 |         # print(f'FedAvg log Processed {line_count} lines.')
 96 | 
 97 |     with open('fedavg_original/'+str(params["dataset"])+'_'+str(params["clients_per_round"])+'_prox.csv', mode='r') as csv_file:
 98 |     # with open('fedavg_original/seeds/'+str(params["dataset"])+'_'+str(params["seed"])+'_fedprox/'+str(params["dataset"])+'_'+str(params["seed"])+'_fedprox.csv', mode='r') as csv_file:
 99 |         csv_reader = csv.DictReader(csv_file)
100 |         line_count = 0
101 |         for row in csv_reader:
102 |             if line_count == 0:
103 |                 line_count += 1
104 |             prox_rounds.append(float(row["round"]))
105 |             prox_test_acc.append(float(row["test_acc"]))
106 |             line_count += 1
107 |         # print(f'FedProx log Processed {line_count} lines.')
108 | 
109 |     fig, ax = plt.subplots(2,1, figsize=[12, 16])
110 | 
111 |     ax[0].plot(sim_rounds, sim_test_acc, linewidth=3.0, color="#17becf", label="FedSim - G - "+str(num_groups))
112 |     ax[0].plot(avg_rounds, avg_test_acc, ":",alpha=0.6, linewidth=3.0, color="#ff7f0e", label="FedAvg")
113 |     ax[0].plot(prox_rounds, prox_test_acc, "-",alpha=0.6, linewidth=3.0, color="#90C978", label="FedProx")
114 | 
115 |     ax[1].plot(sim_rounds, sim_test_acc,  "-",linewidth=1.0, alpha=0.8, color="#0000ff", label="FedSim - G - "+str(num_groups))
116 |     ax[1].plot(avg_rounds, avg_test_acc,  "-",linewidth=1.0, alpha=0.8, color="#ff0000", label="Fed Avg")
117 |     for idx,g in enumerate(groups):
118 |         ax[1].plot(sim_rounds, g, linewidth=1.5,  alpha=0.3, label="Group - " + str(idx))
119 | 
120 |     ax[0].set_xlabel("# Rounds", fontsize=22)
121 |     ax[0].set_ylabel('Testing Accuracy', fontsize=22)
122 |     ax[0].set_title("FedSim comparision - Data:"+str(params["dataset"])
123 |                     + " Clients/round: "+ str(params["clients_per_round"])
124 |                     + " E: " + str(params["num_epochs"])
125 |                     + " Groups: " + str(params["num_groups"]), fontsize=18)
126 | 
127 |     ax[0].legend(fontsize=22, loc='lower center')
128 |     ax[0].grid()
129 | 
130 |     ax[1].set_xlabel("# Rounds", fontsize=22)
131 |     ax[1].set_ylabel('Testing Accuracy', fontsize=22)
132 |     ax[1].set_title("Group Accuracies", fontsize=22)
133 |     ax[1].legend(fontsize=22, loc='lower right')
134 |     ax[1].grid()
135 | 
136 |     # plt.xticks(fontsize=17)
137 |     # plt.yticks(fontsize=17)
138 |     # ax.tick_params(color='#dddddd')
139 |     # ax.spines['bottom'].set_color('#dddddd')
140 |     # ax.spines['top'].set_color('#dddddd')
141 |     # ax.spines['right'].set_color('#dddddd')
142 |     # ax.spines['left'].set_color('#dddddd')
143 | 
144 |     # fig.showfig.show()()
145 |     # fig.savefig(logdir+"/fed"+method+"_acc_"+str(num_groups)+".pdf")
146 |     fig.savefig(logdir+"/"+name+".pdf")
147 | 
148 | 
149 |     plt.close(fig)
150 | 
151 | def write_time_taken(elapsed, name="non"):
152 |     logdir = "logs/"+name
153 |     with open(logdir + '/timetaken.csv', mode='w', newline='') as log_file:
154 |         writer = csv.writer(log_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
155 |         for line in elapsed:
156 |             writer.writerow([line])
157 | 


--------------------------------------------------------------------------------
/utils/language_utils.py:
--------------------------------------------------------------------------------
  1 | """Utils for language models."""
  2 | 
  3 | import re
  4 | 
  5 | 
  6 | # ------------------------
  7 | # utils for shakespeare dataset
  8 | 
  9 | ALL_LETTERS = "\n !\"&'(),-.0123456789:;>?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]abcdefghijklmnopqrstuvwxyz}"
 10 | NUM_LETTERS = len(ALL_LETTERS)
 11 | 
 12 | 
 13 | def _one_hot(index, size):
 14 |     '''returns one-hot vector with given size and value 1 at given index
 15 |     '''
 16 |     vec = [0 for _ in range(size)]
 17 |     vec[int(index)] = 1
 18 |     return vec
 19 | 
 20 | 
 21 | def letter_to_vec(letter):
 22 |     '''returns one-hot representation of given letter
 23 |     '''
 24 |     index = ALL_LETTERS.find(letter)
 25 |     return _one_hot(index, NUM_LETTERS)
 26 | 
 27 | 
 28 | def word_to_indices(word):
 29 |     '''returns a list of character indices
 30 | 
 31 |     Args:
 32 |         word: string
 33 |     
 34 |     Return:
 35 |         indices: int list with length len(word)
 36 |     '''
 37 |     indices = []
 38 |     for c in word:
 39 |         indices.append(ALL_LETTERS.find(c))
 40 |     return indices
 41 | 
 42 | 
 43 | # ------------------------
 44 | # utils for sent140 dataset
 45 | 
 46 | 
 47 | def split_line(line):
 48 |     '''split given line/phrase into list of words
 49 | 
 50 |     Args:
 51 |         line: string representing phrase to be split
 52 |     
 53 |     Return:
 54 |         list of strings, with each string representing a word
 55 |     '''
 56 |     return re.findall(r"[\w']+|[.,!?;]", line)
 57 | 
 58 | 
 59 | def _word_to_index(word, indd):
 60 |     '''returns index of given word based on given lookup dictionary
 61 | 
 62 |     returns the length of the lookup dictionary if word not found
 63 | 
 64 |     Args:
 65 |         word: string
 66 |         indd: dictionary with string words as keys and int indices as values
 67 |     '''
 68 |     if word in indd:
 69 |         return indd[word]
 70 |     else:
 71 |         return len(indd)
 72 | 
 73 | 
 74 | def line_to_indices(line, indd, max_words=25):
 75 |     '''converts given phrase into list of word indices
 76 |     
 77 |     if the phrase has more than max_words words, returns a list containing
 78 |     indices of the first max_words words
 79 |     if the phrase has less than max_words words, repeatedly appends integer 
 80 |     representing unknown index to returned list until the list's length is 
 81 |     max_words
 82 | 
 83 |     Args:
 84 |         line: string representing phrase/sequence of words
 85 |         indd: dictionary with string words as keys and int indices as values
 86 |         max_words: maximum number of word indices in returned list
 87 | 
 88 |     Return:
 89 |         indl: list of word indices, one index for each word in phrase
 90 |     '''
 91 |     line_list = split_line(line) # split phrase in words
 92 |     indl = []
 93 |     for word in line_list:
 94 |         cind = _word_to_index(word, indd)
 95 |         indl.append(cind)
 96 |         if (len(indl) == max_words):
 97 |             break
 98 |     for i in range(max_words - len(indl)):
 99 |         indl.append(len(indd))
100 |     return indl
101 | 
102 | 
103 | def bag_of_words(line, vocab):
104 |     '''returns bag of words representation of given phrase using given vocab
105 | 
106 |     Args:
107 |         line: string representing phrase to be parsed
108 |         vocab: dictionary with words as keys and indices as values
109 | 
110 |     Return:
111 |         integer list
112 |     '''
113 |     bag = [0]*len(vocab)
114 |     words = split_line(line)
115 |     for w in words:
116 |         if w in vocab:
117 |             bag[vocab[w]] += 1
118 |     return bag
119 | 


--------------------------------------------------------------------------------
/utils/model_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | import os
 4 | import re
 5 | import sys
 6 | 
 7 | models_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 8 | models_dir = os.path.join(models_dir, 'models')
 9 | sys.path.append(models_dir)
10 | 
11 | from client import Client
12 | 
13 | def batch_data(data, batch_size):
14 |     '''
15 |     data is a dict := {'x': [list], 'y': [list]}
16 |     returns x, y, which are both lists of size-batch_size lists
17 |     '''
18 |     raw_x = data['x']
19 |     raw_y = data['y']        
20 |     batched_x = []
21 |     batched_y = []
22 |     for i in range(0, len(raw_x), batch_size):
23 |         batched_x.append(raw_x[i:i+batch_size])
24 |         batched_y.append(raw_y[i:i+batch_size])
25 |     return batched_x, batched_y
26 | 
27 | def read_data(train_data_dir, test_data_dir):
28 |     '''parses data in given train and test data directories
29 | 
30 |     assumes:
31 |     - the data in the input directories are .json files with 
32 |         keys 'users' and 'user_data'
33 |     - the set of train set users is the same as the set of test set users
34 |     
35 |     Return:
36 |         clients: list of client ids
37 |         groups: list of group ids; empty list if none found
38 |         train_data: dictionary of train data
39 |         test_data: dictionary of test data
40 |     '''
41 |     clients = []
42 |     groups = []
43 |     train_data = {}
44 |     test_data = {}
45 | 
46 |     train_files = os.listdir(train_data_dir)
47 |     train_files = [f for f in train_files if f.endswith('.json')]
48 |     for f in train_files:
49 |         file_path = os.path.join(train_data_dir,f)
50 |         with open(file_path, 'r') as inf:
51 |             cdata = json.load(inf)
52 |         clients.extend(cdata['users'])
53 |         if 'hierarchies' in cdata:
54 |             groups.extend(cdata['hierarchies'])
55 |         train_data.update(cdata['user_data'])
56 | 
57 |     test_files = os.listdir(test_data_dir)
58 |     test_files = [f for f in test_files if f.endswith('.json')]
59 |     for f in test_files:
60 |         file_path = os.path.join(test_data_dir,f)
61 |         with open(file_path, 'r') as inf:
62 |             cdata = json.load(inf)
63 |         test_data.update(cdata['user_data'])
64 | 
65 |     clients = list(train_data.keys())
66 | 
67 |     return clients, groups, train_data, test_data
68 | 
69 | def setup_clients(train_data_dir, test_data_dir, model=None):
70 |     '''instantiates clients based on given train and test data directories
71 | 
72 |     Return:
73 |         list of Clients
74 |     '''
75 |     users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir)
76 |     if len(groups) == 0:
77 |         groups = [None for _ in users]
78 |     all_clients = [Client(u, g, train_data[u], test_data[u], model) for u, g in zip(users, groups)]
79 |     return all_clients
80 | 
81 | 


--------------------------------------------------------------------------------
/utils/preprocess.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | #
  3 | # script to preprocess data
  4 | 
  5 | # --------------------
  6 | # parse arguments
  7 | 
  8 | NAME="sent140" # name of the dataset, equivalent to directory name
  9 | SAMPLE="na" # -s tag, iid or niid
 10 | IUSER="" # --iu tag, # of users if iid sampling
 11 | SFRAC="" # --sf tag, fraction of data to sample
 12 | MINSAMPLES="na" # -k tag, minimum allowable # of samples per user
 13 | TRAIN="na" # -t tag, user or sample
 14 | TFRAC="" # --tf tag, fraction of data in training set
 15 | 
 16 | while [[ $# -gt 0 ]]
 17 | do
 18 | key="$1"
 19 | 
 20 | case $key in
 21 |     --name)
 22 |     NAME="$2"
 23 |     shift # past argument
 24 |     if [ ${SAMPLE:0:1} = "-" ]; then
 25 |         NAME="sent140"
 26 |     else
 27 |         shift # past value
 28 |     fi
 29 |     ;;
 30 |     -s)
 31 |     SAMPLE="$2"
 32 |     shift # past argument
 33 |     if [ ${SAMPLE:0:1} = "-" ]; then
 34 |         SAMPLE=""
 35 |     else
 36 |         shift # past value
 37 |     fi
 38 |     ;;
 39 |     --iu)
 40 |     IUSER="$2"
 41 |     shift # past argument
 42 |     if [ ${IUSER:0:1} = "-" ]; then
 43 |         IUSER=""
 44 |     else
 45 |         shift # past value
 46 |     fi
 47 |     ;;
 48 |     --sf)
 49 |     SFRAC="$2"
 50 |     shift # past argument
 51 |     if [ ${SFRAC:0:1} = "-" ]; then
 52 |         SFRAC=""
 53 |     else
 54 |         shift # past value
 55 |     fi
 56 |     ;;
 57 |     -k)
 58 |     MINSAMPLES="$2"
 59 |     shift # past argument
 60 |     if [ ${MINSAMPLES:0:1} = "-" ]; then
 61 |         MINSAMPLES=""
 62 |     else
 63 |         shift # past value
 64 |     fi
 65 |     ;;
 66 |     -t)
 67 |     TRAIN="$2"
 68 |     shift # past argument
 69 |     if [ -z "$TRAIN" ] || [ ${TRAIN:0:1} = "-" ]; then
 70 |         TRAIN=""
 71 |     else
 72 |         shift # past value
 73 |     fi
 74 |     ;;
 75 |     --tf)
 76 |     TFRAC="$2"
 77 |     shift # past argument
 78 |     if [ ${TFRAC:0:1} = "-" ]; then
 79 |         TFRAC=""
 80 |     else
 81 |         shift # past value
 82 |     fi
 83 |     ;;
 84 |     *)    # unknown option
 85 |     shift # past argument
 86 |     ;;
 87 | esac
 88 | done
 89 | 
 90 | # --------------------
 91 | # preprocess data
 92 | 
 93 | CONT_SCRIPT=true
 94 | cd ../data/$NAME
 95 | 
 96 | # download data and convert to .json format
 97 | 
 98 | if [ ! -d "data/all_data" ]; then
 99 |     cd preprocess
100 |     ./data_to_json.sh
101 |     cd ..
102 | fi
103 | 
104 | NAMETAG="--name $NAME"
105 | 
106 | # sample data
107 | IUSERTAG=""
108 | if [ ! -z $IUSER ]; then
109 |     IUSERTAG="--u $IUSER"
110 | fi
111 | SFRACTAG=""
112 | if [ ! -z $SFRAC ]; then
113 |     SFRACTAG="--fraction $SFRAC"
114 | fi
115 | 
116 | if [ "$CONT_SCRIPT" = true ] && [ ! $SAMPLE = "na" ]; then
117 |     if [ -d "data/sampled_data" ] && [ "$(ls -A data/sampled_data)" ]; then
118 |         CONT_SCRIPT=false
119 |     else
120 |         if [ ! -d "data/sampled_data" ]; then
121 |             mkdir data/sampled_data
122 |         fi
123 | 
124 |         cd ../../utils
125 | 
126 |         if [ $SAMPLE = "iid" ]; then
127 |             python3 sample.py $NAMETAG --iid $IUSERTAG $SFRACTAG
128 |         else
129 |             python3 sample.py $NAMETAG $SFRACTAG
130 |         fi
131 | 
132 |         cd ../data/$NAME
133 |     fi
134 | fi
135 | 
136 | # remove users with less then given number of samples
137 | if [ "$CONT_SCRIPT" = true ] && [ ! $MINSAMPLES = "na" ]; then
138 |     if [ -d "data/rem_user_data" ] && [ "$(ls -A data/rem_user_data)" ]; then
139 |         CONT_SCRIPT=false
140 |     else
141 |         if [ ! -d "data/rem_user_data" ]; then
142 |             mkdir data/rem_user_data
143 |         fi
144 | 
145 |         cd ../../utils
146 | 
147 |         if [ -z $MINSAMPLES ]; then
148 |             python3 remove_users.py $NAMETAG
149 |         else
150 |             python3 remove_users.py $NAMETAG --min_samples $MINSAMPLES
151 |         fi
152 | 
153 |         cd ../data/$NAME
154 |     fi
155 | fi
156 | 
157 | # create train-test split
158 | TFRACTAG=""
159 | if [ ! -z $TFRAC ]; then
160 |     TFRACTAG="--frac $TFRAC"
161 | fi
162 | 
163 | if [ "$CONT_SCRIPT" = true ] && [ ! $TRAIN = "na" ]; then
164 |     if [ -d "data/train" ] && [ "$(ls -A data/train)" ]; then
165 |         CONT_SCRIPT=false
166 |     else
167 |         if [ ! -d "data/train" ]; then
168 |             mkdir data/train
169 |         fi
170 |         if [ ! -d "data/test" ]; then
171 |             mkdir data/test
172 |         fi
173 | 
174 |         cd ../../utils
175 | 
176 |         if [ -z $TRAIN ]; then
177 |             python3 split_data.py $NAMETAG $TFRACTAG
178 |         elif [ $TRAIN = "user" ]; then
179 |             python3 split_data.py $NAMETAG --by_user $TFRACTAG
180 |         elif [ $TRAIN = "sample" ]; then
181 |             python3 split_data.py $NAMETAG --by_sample $TFRACTAG
182 |         fi
183 | 
184 |         cd ../data/$NAME
185 |     fi
186 | fi
187 | 
188 | if [ "$CONT_SCRIPT" = false ]; then
189 |     echo "Data for one of the specified preprocessing tasks has already been"
190 |     echo "generated. If you would like to re-generate data for this directory,"
191 |     echo "please delete the existing one. Otherwise, please remove the"
192 |     echo "respective tag(s) from the preprocessing command."
193 | fi


--------------------------------------------------------------------------------
/utils/remove_users.py:
--------------------------------------------------------------------------------
 1 | 
 2 | '''
 3 | removes users with less than the given number of samples
 4 | '''
 5 | 
 6 | import argparse
 7 | import json
 8 | import os
 9 | 
10 | import numpy as np
11 | 
12 | parser = argparse.ArgumentParser()
13 | 
14 | parser.add_argument('--name',
15 |                 help='name of dataset to parse; default: sent140;',
16 |                 type=str,
17 |                 default='sent140')
18 | 
19 | parser.add_argument('--min_samples',
20 |                 help='users with less than x samples are discarded; default: 10;',
21 |                 type=int,
22 |                 default=10)
23 | 
24 | args = parser.parse_args()
25 | 
26 | 
27 | print('------------------------------')
28 | 
29 | 
30 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
31 | dir = os.path.join(parent_path, 'data', args.name, 'data')
32 | subdir = os.path.join(dir, 'sampled_data')
33 | files = []
34 | if os.path.exists(subdir):
35 |     files = os.listdir(subdir)
36 | if len(files) == 0:
37 |     subdir = os.path.join(dir, 'all_data')
38 |     files = os.listdir(subdir)
39 | files = [f for f in files if f.endswith('.json')]
40 | 
41 | for f in files:
42 |     users = []
43 |     hierarchies = []
44 |     num_samples = []
45 |     user_data = {}
46 | 
47 |     min_number = 0
48 |     max_number = 0
49 | 
50 |     file_dir = os.path.join(subdir, f)
51 |     with open(file_dir, 'r') as inf:
52 |         data = json.load(inf)
53 | 
54 |     num_users = len(data['users'])
55 |     for i in range(num_users):
56 |         curr_user = data['users'][i]
57 |         curr_hierarchy = None
58 |         if 'hierarchies' in data:
59 |             curr_hierarchy = data['hierarchies'][i]
60 |         curr_num_samples = data['num_samples'][i]
61 | 
62 |         if curr_num_samples > args.min_samples:
63 |             user_data[curr_user] = data['user_data'][curr_user]
64 |             users.append(curr_user)
65 |             max_number += 1
66 |             if curr_hierarchy is not None:
67 |                 hierarchies.append(curr_hierarchy)
68 |             num_samples.append(data['num_samples'][i])
69 | 
70 | 
71 |     all_data = {}
72 |     all_data['users'] = users
73 |     if len(hierarchies) == len(users):
74 |         all_data['hierarchies'] = hierarchies
75 |     all_data['num_samples'] = num_samples
76 |     all_data['user_data'] = user_data
77 | 
78 |     file_name = '%s_keep_%d.json' % ((f[:-5]), args.min_samples)
79 |     ouf_dir = os.path.join(dir, 'rem_user_data', file_name)
80 | 
81 |     print('writing %s' % file_name)
82 |     with open(ouf_dir, 'w') as outfile:
83 |         json.dump(all_data, outfile)
84 | 
85 | 


--------------------------------------------------------------------------------
/utils/sample.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | samples from all raw data;
  3 | by default samples in a non-iid manner; namely, randomly selects users from 
  4 | raw data until their cumulative amount of data exceeds the given number of 
  5 | datapoints to sample (specified by --fraction argument);
  6 | ordering of original data points is not preserved in sampled data
  7 | '''
  8 | 
  9 | import argparse
 10 | import json
 11 | import os
 12 | import random
 13 | 
 14 | from utils import iid_divide
 15 | 
 16 | parser = argparse.ArgumentParser()
 17 | 
 18 | parser.add_argument('--name',
 19 |                 help='name of dataset to parse; default: sent140;',
 20 |                 type=str,
 21 |                 default='sent140')
 22 | parser.add_argument('--iid',
 23 |                 help='sample iid;',
 24 |                 action="store_true")
 25 | parser.add_argument('--niid',
 26 |                 help="sample niid;",
 27 |                 dest='iid', action='store_false')
 28 | parser.add_argument('--fraction',
 29 |                 help='fraction of all data to sample; default: 0.1;',
 30 |                 type=float,
 31 |                 default=0.1)
 32 | parser.add_argument('--u',
 33 |                 help=('number of users in iid data set; ignored in niid case;'
 34 |                       'represented as fraction of original total number of users; '
 35 |                       'default: 0.01;'),
 36 |                 type=float,
 37 |                 default=0.01)
 38 | parser.set_defaults(iid=False)
 39 | 
 40 | args = parser.parse_args()
 41 | 
 42 | print('------------------------------')
 43 | print('sampling data')
 44 | 
 45 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 46 | data_dir = os.path.join(parent_path, 'data', args.name, 'data')
 47 | subdir = os.path.join(data_dir, 'all_data')
 48 | files = os.listdir(subdir)
 49 | files = [f for f in files if f.endswith('.json')]
 50 | 
 51 | new_user_count = 0 # for iid case
 52 | for f in files:
 53 |     file_dir = os.path.join(subdir, f)
 54 |     with open(file_dir, 'r') as inf:
 55 |         data = json.load(inf)
 56 | 
 57 |     num_users = len(data['users'])
 58 | 
 59 |     tot_num_samples = sum(data['num_samples'])
 60 |     print('Fraction: ', args.fraction)
 61 |     num_new_samples = int(args.fraction * tot_num_samples)
 62 | 
 63 |     hierarchies = None
 64 | 
 65 |     if(args.iid):
 66 |         raw_list = list(data['user_data'].values())
 67 |         raw_x = [elem['x'] for elem in raw_list]
 68 |         raw_y = [elem['y'] for elem in raw_list]
 69 |         x_list = [item for sublist in raw_x for item in sublist] # flatten raw_x
 70 |         y_list = [item for sublist in raw_y for item in sublist] # flatten raw_y
 71 | 
 72 |         num_new_users = int(round(args.u * num_users))
 73 |         if num_new_users == 0:
 74 |             num_new_users += 1
 75 | 
 76 |         indices = [i for i in range(tot_num_samples)]
 77 |         new_indices = random.sample(indices, num_new_samples)
 78 |         # TODO: seed this random
 79 | 
 80 |         users = [str(i+new_user_count) for i in range(num_new_users)]
 81 | 
 82 |         user_data = {}
 83 |         for user in users:
 84 |             user_data[user] = {'x': [], 'y': []}
 85 |         all_x_samples = [x_list[i] for i in new_indices]
 86 |         all_y_samples = [y_list[i] for i in new_indices]
 87 |         x_groups = iid_divide(all_x_samples, num_new_users)
 88 |         y_groups = iid_divide(all_y_samples, num_new_users)
 89 |         for i in range(num_new_users):
 90 |             user_data[users[i]]['x'] = x_groups[i]
 91 |             user_data[users[i]]['y'] = y_groups[i]
 92 |         
 93 |         num_samples = [len(user_data[u]['y']) for u in users]
 94 | 
 95 |         new_user_count += num_new_users
 96 | 
 97 |     else:
 98 | 
 99 |         ctot_num_samples = 0
100 | 
101 |         users = data['users']
102 |         users_and_hiers = None
103 |         if 'hierarchies' in data:
104 |             users_and_hiers = list(zip(users, data['hierarchies']))
105 |             random.shuffle(users_and_hiers)
106 |         else:
107 |             random.shuffle(users)
108 |         user_i = 0
109 |         num_samples = []
110 |         user_data = {}
111 | 
112 |         if 'hierarchies' in data:
113 |             hierarchies = []
114 | 
115 |         while(ctot_num_samples < num_new_samples):
116 |             hierarchy = None
117 |             if users_and_hiers is not None:
118 |                 user, hier = users_and_hiers[user_i]
119 |             else:
120 |                 user = users[user_i]
121 | 
122 |             cdata = data['user_data'][user]
123 | 
124 |             cnum_samples = len(data['user_data'][user]['y'])
125 | 
126 |             if (ctot_num_samples + cnum_samples > num_new_samples):
127 |                 cnum_samples = num_new_samples - ctot_num_samples
128 |                 indices = [i for i in range(cnum_samples)]
129 |                 new_indices = random.sample(indices, cnum_samples)
130 |                 x = []
131 |                 y = []
132 |                 for i in new_indices:
133 |                     x.append(data['user_data'][user]['x'][i])
134 |                     y.append(data['user_data'][user]['y'][i])
135 |                 cdata = {'x': x, 'y': y}
136 |             
137 |             if 'hierarchies' in data:
138 |                 hierarchies.append(hier)
139 | 
140 |             num_samples.append(cnum_samples)
141 |             user_data[user] = cdata
142 | 
143 |             ctot_num_samples += cnum_samples
144 |             user_i += 1
145 | 
146 |         if 'hierarchies' in data:
147 |             users = [u for u, h in users_and_hiers][:user_i]
148 |         else:
149 |             users = users[:user_i]
150 | 
151 |     # ------------
152 |     # create .json file
153 | 
154 |     all_data = {}
155 |     all_data['users'] = users
156 |     if hierarchies is not None:
157 |         all_data['hierarchies'] = hierarchies
158 |     all_data['num_samples'] = num_samples
159 |     all_data['user_data'] = user_data
160 | 
161 |     slabel = ''
162 |     if(args.iid):
163 |         slabel = 'iid'
164 |     else:
165 |         slabel = 'niid'
166 | 
167 |     arg_frac = str(args.fraction)
168 |     arg_frac = arg_frac[2:]
169 |     arg_nu = str(args.u)
170 |     arg_nu = arg_nu[2:]
171 |     arg_label = arg_frac
172 |     if(args.iid):
173 |         arg_label = '%s_%s' % (arg_nu, arg_label)
174 |     file_name = '%s_%s_%s.json' % ((f[:-5]), slabel, arg_label)
175 |     ouf_dir = os.path.join(data_dir, 'sampled_data', file_name)
176 | 
177 |     print('writing %s' % file_name)
178 |     with open(ouf_dir, 'w') as outfile:
179 |         json.dump(all_data, outfile)
180 | 


--------------------------------------------------------------------------------
/utils/stats.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | assumes that the user has already generated .json file(s) containing data
 3 | '''
 4 | 
 5 | import argparse
 6 | import json
 7 | import matplotlib.pyplot as plt
 8 | import math
 9 | import numpy as np
10 | import os
11 | 
12 | from scipy import io
13 | from scipy import stats
14 | 
15 | parser = argparse.ArgumentParser()
16 | 
17 | parser.add_argument('--name',
18 |                 help='name of dataset to parse; default: sent140;',
19 |                 type=str,
20 |                 default='sent140')
21 | 
22 | args = parser.parse_args()
23 | 
24 | 
25 | def load_data(name):
26 | 
27 |     users = []
28 |     num_samples = []
29 | 
30 |     parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
31 |     data_dir = os.path.join(parent_path, 'data', name, 'data')
32 |     subdir = os.path.join(data_dir, 'all_data')
33 | 
34 |     files = os.listdir(subdir)
35 |     files = [f for f in files if f.endswith('.json')]
36 | 
37 |     for f in files:
38 |         file_dir = os.path.join(subdir, f)
39 | 
40 |         with open(file_dir) as inf:
41 |             data = json.load(inf)
42 | 
43 |         users.extend(data['users'])
44 |         num_samples.extend(data['num_samples'])
45 | 
46 |     return users, num_samples
47 | 
48 | def print_dataset_stats(name):
49 |     users, num_samples = load_data(name)
50 |     num_users = len(users)
51 | 
52 |     print('####################################')
53 |     print('DATASET: %s' % name)
54 |     print('%d users' % num_users)
55 |     print('%d samples (total)' % np.sum(num_samples))
56 |     print('%.2f samples per user (mean)' % np.mean(num_samples))
57 |     print('num_samples (std): %.2f' % np.std(num_samples))
58 |     print('num_samples (std/mean): %.2f' % (np.std(num_samples)/np.mean(num_samples)))
59 |     print('num_samples (skewness): %.2f' % stats.skew(num_samples))
60 |     
61 |     bins = [0,20,40,60,80,100,120,140,160,180,200]
62 |     if args.name == 'shakespeare':
63 |         bins = [0,2000,4000,6000,8000,10000,12000,14000,16000,18000,20000]
64 |     if args.name == 'nist':
65 |         bins = [0,20,40,60,80,100,120,140,160,180,200,220,240,260,280,300,320,340,360,380,400,420,440,460,480,500]
66 | 
67 |     hist, edges = np.histogram(num_samples,bins=bins)
68 |     print("\nnum_sam\tnum_users")
69 |     for e, h in zip(edges, hist):
70 |         print(e, "\t", h)
71 | 
72 |     parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
73 |     data_dir = os.path.join(parent_path, 'data', name, 'data')
74 | 
75 |     plt.hist(num_samples, bins = bins) 
76 |     fig_name = "%s_hist_nolabel.png" % name
77 |     fig_dir = os.path.join(data_dir, fig_name)
78 |     plt.savefig(fig_dir)
79 |     plt.title(name)
80 |     plt.xlabel("number of samples")
81 |     plt.ylabel("number of users")
82 |     fig_name = "%s_hist.png" % name
83 |     fig_dir = os.path.join(data_dir, fig_name)
84 |     plt.savefig(fig_dir)
85 | 
86 | print_dataset_stats(args.name)


--------------------------------------------------------------------------------
/utils/tf_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import tensorflow as tf
 3 | 
 4 | def __num_elems(shape):
 5 |     '''Returns the number of elements in the given shape
 6 | 
 7 |     Args:
 8 |         shape: TensorShape
 9 |     
10 |     Return:
11 |         tot_elems: int
12 |     '''
13 |     tot_elems = 1
14 |     for s in shape:
15 |         tot_elems *= int(s)
16 |     return tot_elems
17 | 
18 | def graph_size(graph):
19 |     '''Returns the size of the given graph in bytes
20 | 
21 |     The size of the graph is calculated by summing up the sizes of each
22 |     trainable variable. The sizes of variables are calculated by multiplying
23 |     the number of bytes in their dtype with their number of elements, captured
24 |     in their shape attribute
25 | 
26 |     Args:
27 |         graph: TF graph
28 |     Return:
29 |         integer representing size of graph (in bytes)
30 |     '''
31 |     tot_size = 0
32 |     with graph.as_default():
33 |         vs = tf.trainable_variables()
34 |         for v in vs:
35 |             tot_elems = __num_elems(v.shape)
36 |             dtype_size = int(v.dtype.size)
37 |             var_size = tot_elems * dtype_size
38 |             tot_size += var_size
39 |     return tot_size
40 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | def save_obj(obj, name):
 4 |     with open(name + '.pkl', 'wb') as f:
 5 |         pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
 6 | 
 7 | def load_obj(name):
 8 |     with open(name + '.pkl', 'rb') as f:
 9 |         return pickle.load(f)
10 | 
11 | def iid_divide(l, g):
12 |     '''
13 |     divide list l among g groups
14 |     each group has either int(len(l)/g) or int(len(l)/g)+1 elements
15 |     returns a list of groups
16 |     '''
17 |     num_elems = len(l)
18 |     group_size = int(len(l)/g)
19 |     num_big_groups = num_elems - g * group_size
20 |     num_small_groups = g - num_big_groups
21 |     glist = []
22 |     for i in range(num_small_groups):
23 |         glist.append(l[group_size*i:group_size*(i+1)])
24 |     bi = group_size*num_small_groups
25 |     group_size += 1
26 |     for i in range(num_big_groups):
27 |         glist.append(l[bi+group_size*i:bi+group_size*(i+1)])
28 |     return glist


--------------------------------------------------------------------------------