├── requirements.txt
├── data
    ├── synthetic
    │   ├── README.md
    │   ├── generate_data.sh
    │   ├── split_data.py
    │   └── generate_data.py
    ├── femnist
    │   ├── README.md
    │   ├── group_by_writer.py
    │   ├── match_hashes.py
    │   ├── split_data.sh
    │   ├── get_hashes.py
    │   ├── preprocess.sh
    │   ├── get_file_dirs.py
    │   └── split_data.py
    ├── sent140
    │   ├── README.md
    │   ├── combine_data.py
    │   ├── preprocess.sh
    │   ├── split_data.sh
    │   └── split_data.py
    ├── shakespeare
    │   ├── README.md
    │   ├── preprocess.sh
    │   ├── split_data.sh
    │   ├── split_data.py
    │   └── preprocess_shakespeare.py
    ├── inaturalist
    │   ├── README.md
    │   ├── preprocess.sh
    │   └── split_data.py
    └── README.md
├── utils
    ├── metrics.py
    ├── logger.py
    ├── optim.py
    └── args.py
├── graph_utils
    ├── generate_all_networks.sh
    ├── data
    │   ├── Read_me_gml.txt
    │   └── gaia.gml
    ├── README.md
    ├── show_networks.py
    ├── utils
    │   ├── mbst.py
    │   ├── evaluate_throughput.py
    │   ├── tsp_christofides.py
    │   ├── matcha.py
    │   ├── matching_decomposition.py
    │   └── utils.py
    ├── time_simulator.py
    └── generate_networks.py
├── .gitignore
├── loaders
    ├── synthetic.py
    ├── sent140.py
    ├── shakespeare.py
    ├── femnist.py
    └── inaturalist.py
├── reproduce_results.py
├── make_table3.py
├── communication_module
    ├── worker.py
    └── manager.py
├── models
    ├── inaturalist
    │   └── resnet.py
    ├── model.py
    ├── synthetic
    │   └── linear.py
    ├── femnist
    │   └── cnn.py
    ├── sent140
    │   └── lstm.py
    └── shakespeare
    │   └── gru.py
├── main.py
├── make_figure2.py
├── README.md
└── LICENSE


/requirements.txt:
--------------------------------------------------------------------------------
 1 | cvxpy
 2 | tensorboard
 3 | geopy
 4 | PIL
 5 | scikit-learn
 6 | networkx == 2.4
 7 | numpy
 8 | torch
 9 | torchvision
10 | scipy
11 | matplotlib
12 | jupyter
13 | torchtext
14 | spacy
15 | mplleaflet


--------------------------------------------------------------------------------
/data/synthetic/README.md:
--------------------------------------------------------------------------------
 1 | # Synthetic Dataset
 2 | 
 3 | ## Setup Instructions
 4 | 
 5 | Run generate_data.sh with a choice of the following tags:
 6 | 
 7 | - ```-nw```: number of workers, written as integer
 8 | - ```-nc``` : number of classes, written as integer
 9 | - ```-dim```: dimension of the data, written as integer
10 | - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9
11 | - ```--seed``` := seed to be used before random sampling of data
12 | 
13 | i.e. 
14 | - ```./generate_data.sh -s -nw 11 -nc 2 -dim 10 -tf 0.8 -seed 1234``` 
15 | 


--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def binary_accuracy(preds, y):
 5 |     """
 6 | 
 7 |     :param preds:
 8 |     :param y:
 9 |     :return:
10 |     """
11 |     # round predictions to the closest integer
12 |     rounded_preds = torch.round(torch.sigmoid(preds))
13 |     correct = (rounded_preds == y).float()
14 |     acc = correct.sum() / len(correct)
15 |     return acc
16 | 
17 | 
18 | def accuracy(preds, y):
19 |     """
20 | 
21 |     :param preds:
22 |     :param y:
23 |     :return:
24 |     """
25 |     _, predicted = torch.max(preds, 1)
26 |     correct = (predicted == y).float()
27 |     acc = correct.sum() / len(correct)
28 |     return acc


--------------------------------------------------------------------------------
/data/femnist/README.md:
--------------------------------------------------------------------------------
 1 | # FEMNIST Dataset
 2 | 
 3 | ## Setup Instructions
 4 | 
 5 | Run preprocess.sh with a choice of the following tags:
 6 | 
 7 | - ```-nw```: number of workers, written as integer
 8 | - ```-s``` := 'iid' to sample in an i.i.d. manner, or 'niid' to sample
 9 |   in a non-i.i.d. manner; more information on i.i.d. versus non-i.i.d.
10 |   is included in the 'Notes' section
11 | - ```--sf``` := fraction of data to sample, written as a decimal;
12 |   default is 0.1
13 | - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9
14 | - ```--seed``` := seed to be used before random sampling of data
15 | 
16 | i.e.
17 | - ```./preprocess.sh -s iid -nw 11--sf 1.0 -t sample``` (full-sized
18 |   dataset partitioned on Gaia)<br/>


--------------------------------------------------------------------------------
/data/sent140/README.md:
--------------------------------------------------------------------------------
 1 | # Sentiment140 Dataset
 2 | 
 3 | ## Setup Instructions
 4 | 
 5 | Run preprocess.sh with a choice of the following tags:
 6 | 
 7 | - ```-nw```: number of workers, written as integer
 8 | - ```-s``` := 'iid' to sample in an i.i.d. manner, or 'niid' to sample
 9 |   in a non-i.i.d. manner; more information on i.i.d. versus non-i.i.d.
10 |   is included in the 'Notes' section
11 | - ```--sf``` := fraction of data to sample, written as a decimal;
12 |   default is 0.1
13 | - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9
14 | - ```--seed``` := seed to be used before random sampling of data
15 | 
16 | i.e. 
17 | - ```./preprocess.sh -s iid -nw 11--sf 1.0 -t sample``` (full-sized
18 |   dataset partitioned on Gaia)<br/>
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/data/shakespeare/README.md:
--------------------------------------------------------------------------------
 1 | # Shakespeare Dataset
 2 | 
 3 | ## Setup Instructions
 4 | 
 5 | Run preprocess.sh with a choice of the following tags:
 6 | 
 7 | - ```-nw```: number of workers, written as integer
 8 | - ```-s``` := 'iid' to sample in an i.i.d. manner, or 'niid' to sample
 9 |   in a non-i.i.d. manner; more information on i.i.d. versus non-i.i.d.
10 |   is included in the 'Notes' section
11 | - ```--sf``` := fraction of data to sample, written as a decimal;
12 |   default is 0.1
13 | - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9
14 | - ```--seed``` := seed to be used before random sampling of data
15 | 
16 | i.e. 
17 | - ```./preprocess.sh -s iid -nw 11--sf 1.0 -t sample``` (full-sized
18 |   dataset partitioned on Gaia)<br/>
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/graph_utils/generate_all_networks.sh:
--------------------------------------------------------------------------------
 1 | echo "################"
 2 | echo "gaia"
 3 | python generate_networks.py gaia --experiment inaturalist --upload_capacity 1e10 --download_capacity 1e10
 4 | echo "################"
 5 | echo "amazon_us"
 6 | python generate_networks.py amazon_us --experiment inaturalist --upload_capacity 1e10 --download_capacity 1e10
 7 | echo "################"
 8 | echo "geantdistance"
 9 | python generate_networks.py geantdistance --experiment inaturalist --upload_capacity 1e10 --download_capacity 1e10
10 | echo "################"
11 | echo "ebone"
12 | python generate_networks.py ebone --experiment inaturalist --upload_capacity 1e10 --download_capacity 1e10
13 | echo "################"
14 | echo "exodus"
15 | python generate_networks.py exodus --experiment inaturalist --upload_capacity 1e10 --download_capacity 1e10


--------------------------------------------------------------------------------
/data/inaturalist/README.md:
--------------------------------------------------------------------------------
 1 | # iNaturalist Dataset
 2 | 
 3 | ## Setup Instructions
 4 | 
 5 | * Download iNaturalist
 6 |   [here](https://storage.googleapis.com/inat_data_2018_eu/train_val2018.tar.gz),
 7 |   unzip it and place its content in ``raw_data`` folder.
 8 | 
 9 | * Run preprocess.sh with a choice of the following tags:
10 | 
11 |   - ```--network```:= name of the network to use, should be present in
12 |     ``/graph_utils/data``, default is us-amzaon
13 |   - ```--sf``` := fraction of data to sample, written as a decimal;
14 |     default is 0.1
15 |   - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9
16 |   - ```--seed``` := seed to be used before random sampling of data
17 | 
18 | i.e.
19 | - ```./preprocess.sh --sf 1.0 --tf 0.9 --seed 1234``` (full-sized
20 |   dataset partitioned on Gaia)<br/>
21 | 


--------------------------------------------------------------------------------
/graph_utils/data/Read_me_gml.txt:
--------------------------------------------------------------------------------
 1 | In GML file the distance is the latency indicated in the original files (latencies.intra).
 2 | The details of each instance is as follows (which is a little bit different from the statistics in the paper):
 3 |                  num_of_nodes             num_of_links
 4 | 1221                 108                     153
 5 | 1239                 315                     972
 6 | 1755                 87                      161
 7 | 3257                 161                     328
 8 | 3967                 79                      147
 9 | 6461                 141                     374
10 | 1755+3967            166                     327
11 | 
12 | 
13 | The combined one "1755+3967": 19 random edges are added. The latency is calculated by distance which is normalized compared with the original maximum latency weight.
14 | 
15 |  
16 |  
17 | 


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import json
 4 | 
 5 | 
 6 | class Logger(object):
 7 |     def __init__(self, logdir):
 8 |         self.logdir = logdir
 9 | 
10 |     def write_model(self, model_params, iteration=0, mode="json"):
11 |         """
12 |         save model parameters as .pt file
13 |         :param model_params: torch.tensor
14 |         :param iteration: integer
15 |         :param mode:
16 |         """
17 |         if mode == "torch":
18 |             file_path = os.path.join(self.logdir,
19 |                                      "model_{}.pt".format(iteration))
20 |             torch.save(model_params, file_path)
21 | 
22 |         elif mode == "json":
23 |             file_path = os.path.join(self.logdir,
24 |                                      "model_{}.json".format(iteration))
25 | 
26 |             with open(file_path, "w") as f:
27 |                 f.write(json.dumps(model_params.tolist()))


--------------------------------------------------------------------------------
/data/femnist/group_by_writer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | 
 4 | 
 5 | def load_obj(name):
 6 |     with open(name + '.pkl', 'rb') as f:
 7 |         return pickle.load(f)
 8 | 
 9 | 
10 | def save_obj(obj, name):
11 |     with open(name + '.pkl', 'wb') as f:
12 |         pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
13 | 
14 | 
15 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
16 | 
17 | wwcd = os.path.join('intermediate', 'write_with_class')
18 | write_class = load_obj(wwcd)
19 | 
20 | writers = []  # each entry is a (writer, [list of (file, class)]) tuple
21 | cimages = []
22 | (cw, _, _) = write_class[0]
23 | for (w, f, c) in write_class:
24 |     if w != cw:
25 |         writers.append((cw, cimages))
26 |         cw = w
27 |         cimages = [(f, c)]
28 |     cimages.append((f, c))
29 | writers.append((cw, cimages))
30 | 
31 | ibwd = os.path.join('intermediate', 'images_by_writer')
32 | save_obj(writers, ibwd)


--------------------------------------------------------------------------------
/data/sent140/combine_data.py:
--------------------------------------------------------------------------------
 1 | """
 2 | each row of created .csv file is of the form:
 3 | polarity, id, date, query, user, comment, test_or_training
 4 | """
 5 | 
 6 | import csv
 7 | import os
 8 | 
 9 | 
10 | train_file_name = os.path.join('raw_data', 'training.csv')
11 | 
12 | training = []
13 | with open(train_file_name, 'rt', encoding='ISO-8859-1') as f:
14 |     reader = csv.reader(f)
15 |     training = list(reader)
16 | 
17 | test_file_name = os.path.join('raw_data', 'test.csv')
18 | 
19 | test = []
20 | with open(test_file_name, 'rt', encoding='ISO-8859-1') as f:
21 |     reader = csv.reader(f)
22 |     test = list(reader)
23 | 
24 | out_file_name = os.path.join('raw_data', 'all_data.csv')
25 | 
26 | with open(out_file_name, 'w') as f:
27 |     writer = csv.writer(f)
28 | 
29 |     for row in training:
30 |         row.append('training')
31 |         writer.writerow(row)
32 | 
33 |     for row in test:
34 |         row.append('test')
35 |         writer.writerow(row)


--------------------------------------------------------------------------------
/data/shakespeare/preprocess.sh:
--------------------------------------------------------------------------------
 1 | if [ ! -d "all_data" ] || [ ! "$(ls -A all_data)" ]; then
 2 |     if [ ! -d "raw_data" ]; then
 3 |         mkdir raw_data
 4 |     fi
 5 | 
 6 |     if [ ! -f raw_data/raw_data.txt ]; then
 7 |         echo "------------------------------"
 8 |         echo "retrieving raw data"
 9 |         cd raw_data
10 | 
11 |         wget http://www.gutenberg.org/files/100/old/1994-01-100.zip
12 |         unzip 1994-01-100.zip
13 |         rm 1994-01-100.zip
14 |         mv 100.txt raw_data.txt
15 | 
16 |         cd ../
17 |     fi
18 |     if [ ! -d "raw_data/by_play_and_character" ]; then
19 |         echo "dividing txt data between users"
20 |         python3 preprocess_shakespeare.py raw_data/raw_data.txt raw_data/
21 |     fi
22 | fi
23 | if [ ! -f test/test.json ]; then
24 |     echo "------------------------------"
25 |     echo "spliting data"
26 |     mkdir train
27 |     mkdir test
28 | 
29 |     ./split_data.sh "$@"
30 | 
31 |     echo "finished splitting data"
32 | fi


--------------------------------------------------------------------------------
/data/femnist/match_hashes.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | 
 4 | 
 5 | def load_obj(name):
 6 |     with open(name + '.pkl', 'rb') as f:
 7 |         return pickle.load(f)
 8 | 
 9 | 
10 | def save_obj(obj, name):
11 |     with open(name + '.pkl', 'wb') as f:
12 |         pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
13 | 
14 | 
15 | cfhd = os.path.join('intermediate', 'class_file_hashes')
16 | wfhd = os.path.join('intermediate', 'write_file_hashes')
17 | class_file_hashes = load_obj(cfhd) # each elem is (class, file dir, hash)
18 | write_file_hashes = load_obj(wfhd) # each elem is (writer, file dir, hash)
19 | 
20 | class_hash_dict = {}
21 | for i in range(len(class_file_hashes)):
22 |     (c, f, h) = class_file_hashes[len(class_file_hashes)-i-1]
23 |     class_hash_dict[h] = (c, f)
24 | 
25 | write_classes = []
26 | for tup in write_file_hashes:
27 |     (w, f, h) = tup
28 |     write_classes.append((w, f, class_hash_dict[h][0]))
29 | 
30 | wwcd = os.path.join('intermediate', 'write_with_class')
31 | save_obj(write_classes, wwcd)


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Don't track content of these folders
 2 | .idea/
 3 | log/
 4 | 
 5 | *.ipynb
 6 | *.npy
 7 | *.pth
 8 | *.csv
 9 | *.json
10 | 
11 | # Byte-compiled / optimized / DLL files
12 | __pycache__/
13 | *.py[cod]
14 | 
15 | # C extensions
16 | *.so
17 | 
18 | # Distribution / packaging
19 | .Python
20 | env/
21 | build/
22 | develop-eggs/
23 | dist/
24 | downloads/
25 | eggs/
26 | .eggs/
27 | lib/
28 | lib64/
29 | parts/
30 | sdist/
31 | var/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | 
36 | # PyInstaller
37 | #  Usually these files are written by a python script from a template
38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 | 
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 | 
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *,cover
55 | 
56 | # Translations
57 | *.mo
58 | *.pot
59 | 
60 | # Django stuff:
61 | *.log
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | 


--------------------------------------------------------------------------------
/data/sent140/preprocess.sh:
--------------------------------------------------------------------------------
 1 | if [ ! -d "raw_data" ]; then
 2 |   mkdir raw_data
 3 | fi
 4 | 
 5 | if [ ! -f raw_data/test.csv ]; then
 6 |     echo "------------------------------"
 7 |     echo "retrieving raw data"
 8 | 
 9 |     cd raw_data
10 | 
11 |     if [ ! -f trainingandtestdata.zip ]; then
12 |         wget --no-check-certificate http://cs.stanford.edu/people/alecmgo/trainingandtestdata.zip
13 |     fi
14 | 
15 |     unzip trainingandtestdata.zip
16 | 
17 |     mv training.1600000.processed.noemoticon.csv training.csv
18 |     mv testdata.manual.2009.06.14.csv test.csv
19 | 
20 |     rm trainingandtestdata.zip
21 | 
22 |     cd ../
23 |     echo "finished retrieving raw data"
24 | 
25 |     echo "------------------------------"
26 |     echo "combining raw_data .csv files"
27 | 
28 |     python3 combine_data.py
29 | 
30 |     echo "finished combining raw_data .csv files"
31 | 
32 | fi
33 | if [ ! -f test/test.json ]; then
34 |     echo "------------------------------"
35 |     echo "spliting data"
36 |     mkdir train
37 |     mkdir test
38 | 
39 |     ./split_data.sh "$@"
40 | 
41 |     echo "finished splitting data"
42 | fi


--------------------------------------------------------------------------------
/loaders/synthetic.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import json
 3 | from torch.utils.data import Dataset, DataLoader
 4 | 
 5 | 
 6 | class SyntheticDataset(Dataset):
 7 |     def __init__(self, json_file, device):
 8 |         self.device = device
 9 | 
10 |         with open(json_file, "r") as f:
11 |             data = json.load(f)
12 | 
13 |         self.X = torch.tensor(data["x"]).to(device)
14 |         self.y = torch.tensor(data["y"]).to(device)
15 | 
16 |         self.num_classes = data["num_classes"]
17 |         if self.num_classes == 2:
18 |             self.num_classes = 1
19 |         self.dimension = self.X.shape[1]
20 | 
21 |     def __len__(self):
22 |         return self.X.shape[0]
23 | 
24 |     def __getitem__(self, idx):
25 |         return self.X[idx], torch.unsqueeze(self.y[idx], 0)
26 | 
27 | 
28 | def get_iterator_synthetic(file_path, device, batch_size=1):
29 |     """
30 | 
31 |     :param file_path:
32 |     :param device:
33 |     :param batch_size
34 |     :return:
35 |     """
36 |     dataset = SyntheticDataset(file_path, device)
37 |     iterator = DataLoader(dataset, shuffle=True, batch_size=batch_size)
38 | 
39 |     return iterator


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | # Datasets
 2 | 
 3 | ## Leaf Datasets
 4 | 1. FEMNIST
 5 | 
 6 | * **Overview:** Image Dataset
 7 | * **Details:** 62 different classes (10 digits, 26 lowercase, 26 uppercase), images are 28 by 28 pixels (with option to make them all 128 by 128 pixels), 3500 users
 8 | * **Task:** Image Classification
 9 | 
10 | 2. Sentiment140
11 | 
12 | * **Overview:** Text Dataset of Tweets
13 | * **Details** 660120 users
14 |   * **Task:** Sentiment Analysis
15 | 
16 | 3. Shakespeare
17 | 
18 | * **Overview:** Text Dataset of Shakespeare Dialogues
19 | *   **Details:** 1129 users
20 | * **Task:** Next-Character Prediction
21 | 
22 | 
23 | ## Cross-silo Datasets
24 | 1. iNaturlaist Dataset
25 | 
26 | *   **Overview:** We preprocess the iNaturalist data released by
27 |     [inaturalist.org](https://www.inaturalist.org/pages/developers).
28 | *   **Details:** 859,000 samples with geo-location information.
29 | *   **Task:** Image classification.
30 | 
31 | ## References
32 | 
33 | 
34 |     @misc{title={LEAF: A Benchmark for Federated Settings},
35 |     author={Sebastian Caldas and Sai Meher Karthik Duddu and Peter Wu and Tian Li and Jakub Konečný and H. Brendan McMahan and Virginia Smith and Ameet Talwalkar},
36 |     year={2018},
37 |     eprint={1812.01097},
38 |     archivePrefix={arXiv},
39 |     primaryClass={cs.LG}
40 |     }
41 | 


--------------------------------------------------------------------------------
/graph_utils/README.md:
--------------------------------------------------------------------------------
 1 | # Graph Generator
 2 | 
 3 | Generate different overlays given a connectivity graph. The connectivity
 4 | graph should be stored in ``data`` as a ``.gml`` file
 5 | ## Setup Instructions
 6 | 
 7 | Run ```generate_network.py``` with a choice of the following arguments:
 8 | 
 9 | - ```name```: name of the used network;
10 | - ```--experiment```: name of the experiment that will be run on the
11 |   network; possible are femnist, inaturalist, synthetic, shakespeare,
12 |   sent140; if not precised --model_size will be used as model size;
13 | - ``--model_size``: size of the model that will be transmitted on the
14 |   network in bit; will be ignored if --experiment is precised; default
15 |   is 1e8;
16 | - ``--default_capacity``: default capacity (in bit/s) to use on links
17 |   with unknown capacity; default is 1e9;
18 | - ```--centrality``` : Type of centrality to use in order to select the
19 |   central node of the network; possible values are: "load", "distance"
20 |   and "information"; default is "load";
21 | 
22 | 
23 | i.e. 
24 | - ```python3 generate_network.py amazon_us --experiment inaturalist```
25 |   (generate different overlays with Amazon North America as connectivity
26 |   graph for iNaturalist experiment)<br/>
27 | 
28 | To generate all the topologies for all the networks run
29 | 
30 | ```
31 | .\generate_all_networks.sh
32 | ```


--------------------------------------------------------------------------------
/data/shakespeare/split_data.sh:
--------------------------------------------------------------------------------
 1 | while [[ $# -gt 0 ]]
 2 | do
 3 | key="$1"
 4 | 
 5 | case $key in
 6 |     -nw)
 7 |     NUM_WORKERS="$2"
 8 |     shift # past argument
 9 |     shift # past value
10 |     ;;
11 |     -s)
12 |     SAMPLE="$2"
13 |     shift # past argument
14 |     shift # past value
15 |     ;;
16 |     --sf)
17 |     SFRAC="$2"
18 |     shift # past argument
19 |     shift # past value
20 |     ;;
21 |     --tf)
22 |     TFRAC="$2"
23 |     shift # past argument
24 |     shift # past value
25 |     ;;
26 |     --seed)
27 |     SEED="$2"
28 |     shift # past argument
29 |     ;;
30 |     --default)
31 |     DEFAULT=YES
32 |     shift # past argument
33 |     ;;
34 |     *)    # unknown option
35 |     POSITIONAL+=("$1") # save it in an array for later
36 |     shift # past argument
37 |     ;;
38 | esac
39 | done
40 | 
41 | if [ ! -z $NUM_WORKERS ]; then
42 |     NUM_WORKERS_TAG="--num_workers $NUM_WORKERS"
43 | fi
44 | 
45 | SFRAC_TAG=""
46 | if [ ! -z $SFRAC ]; then
47 |     SFRAC_TAG="--s_frac $SFRAC"
48 | fi
49 | 
50 | TFRAC_TAG=""
51 | if [ ! -z $TFRAC ]; then
52 |     TFRAC_TAG="--tr_frac $TFRAC"
53 | fi
54 | 
55 | SEED_TAG=""
56 | if [ ! -z $SEED ]; then
57 |     SEED_TAG="--seed $SEED"
58 | fi
59 | 
60 | if [ $SAMPLE = "iid" ]; then
61 |     python3 split_data.py  --iid $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
62 | else
63 |     python3 split_data.py $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
64 | fi


--------------------------------------------------------------------------------
/data/femnist/split_data.sh:
--------------------------------------------------------------------------------
 1 | while [[ $# -gt 0 ]]
 2 | do
 3 | key="$1"
 4 | 
 5 | case $key in
 6 |     -nw)
 7 |     NUM_WORKERS="$2"
 8 |     shift # past argument
 9 |     shift # past value
10 |     ;;
11 |     -s)
12 |     SAMPLE="$2"
13 |     shift # past argument
14 |     shift # past value
15 |     ;;
16 |     --sf)
17 |     SFRAC="$2"
18 |     shift # past argument
19 |     shift # past value
20 |     ;;
21 |     --tf)
22 |     TFRAC="$2"
23 |     shift # past argument
24 |     shift # past value
25 |     ;;
26 |     --seed)
27 |     SEED="$2"
28 |     shift # past argument
29 |     ;;
30 |     --default)
31 |     DEFAULT=YES
32 |     shift # past argument
33 |     ;;
34 |     *)    # unknown option
35 |     POSITIONAL+=("$1") # save it in an array for later
36 |     shift # past argument
37 |     ;;
38 | esac
39 | done
40 | 
41 | NUM_WORKERS_TAG=""
42 | if [ ! -z $NUM_WORKERS ]; then
43 |     NUM_WORKERS_TAG="--num_workers $NUM_WORKERS"
44 | fi
45 | 
46 | SFRAC_TAG=""
47 | if [ ! -z $SFRAC ]; then
48 |     SFRAC_TAG="--s_frac $SFRAC"
49 | fi
50 | 
51 | TFRAC_TAG=""
52 | if [ ! -z $TFRAC ]; then
53 |     TFRAC_TAG="--tr_frac $TFRAC"
54 | fi
55 | 
56 | SEED_TAG=""
57 | if [ ! -z $SEED ]; then
58 |     SEED_TAG="--seed $SEED"
59 | fi
60 | 
61 | if [ $SAMPLE = "iid" ]; then
62 |     python3 split_data.py  --iid $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
63 | else
64 |     python3 split_data.py $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
65 | fi


--------------------------------------------------------------------------------
/data/sent140/split_data.sh:
--------------------------------------------------------------------------------
 1 | while [[ $# -gt 0 ]]
 2 | do
 3 | key="$1"
 4 | 
 5 | case $key in
 6 |     -nw)
 7 |     NUM_WORKERS="$2"
 8 |     shift # past argument
 9 |     shift # past value
10 |     ;;
11 |     -s)
12 |     SAMPLE="$2"
13 |     shift # past argument
14 |     shift # past value
15 |     ;;
16 |     --sf)
17 |     SFRAC="$2"
18 |     shift # past argument
19 |     shift # past value
20 |     ;;
21 |     --tf)
22 |     TFRAC="$2"
23 |     shift # past argument
24 |     shift # past value
25 |     ;;
26 |     --seed)
27 |     SEED="$2"
28 |     shift # past argument
29 |     ;;
30 |     --default)
31 |     DEFAULT=YES
32 |     shift # past argument
33 |     ;;
34 |     *)    # unknown option
35 |     POSITIONAL+=("$1") # save it in an array for later
36 |     shift # past argument
37 |     ;;
38 | esac
39 | done
40 | 
41 | NUM_WORKERS_TAG=""
42 | if [ ! -z $NUM_WORKERS ]; then
43 |     NUM_WORKERS_TAG="--num_workers $NUM_WORKERS"
44 | fi
45 | 
46 | SFRAC_TAG=""
47 | if [ ! -z $SFRAC ]; then
48 |     SFRAC_TAG="--s_frac $SFRAC"
49 | fi
50 | 
51 | TFRAC_TAG=""
52 | if [ ! -z $TFRAC ]; then
53 |     TFRAC_TAG="--tr_frac $TFRAC"
54 | fi
55 | 
56 | SEED_TAG=""
57 | if [ ! -z $SEED ]; then
58 |     SEED_TAG="--seed $SEED"
59 | fi
60 | 
61 | if [ $SAMPLE = "iid" ]; then
62 |     python3 split_data.py  --iid $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
63 | else
64 |     python3 split_data.py $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
65 | fi


--------------------------------------------------------------------------------
/utils/optim.py:
--------------------------------------------------------------------------------
 1 | import torch.optim as optim
 2 | import numpy as np
 3 | 
 4 | 
 5 | def get_optimizer(optimizer_name, net, lr_initial=1e-3):
 6 |     """
 7 | 
 8 |     :param optimizer_name:
 9 |     :param net:
10 |     :param lr_initial:
11 |     :return:
12 |     """
13 |     if optimizer_name == "adam":
14 |         return optim.Adam([param for param in net.parameters() if param.requires_grad], lr=lr_initial)
15 | 
16 |     elif optimizer_name == "sgd":
17 |         return optim.SGD([param for param in net.parameters() if param.requires_grad], lr=lr_initial)
18 | 
19 |     else:
20 |         raise NotImplementedError("Other optimizer are not implemented")
21 | 
22 | 
23 | def get_lr_scheduler(optimizer, scheduler_name, epoch_size):
24 |     if scheduler_name == "sqrt":
25 |         return optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: 1/np.sqrt(x) if x > 0 else 1)
26 | 
27 |     elif scheduler_name == "linear":
28 |         return optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: 1 / x if x > 0 else 1)
29 | 
30 |     elif scheduler_name == "constant":
31 |         return optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: 1)
32 | 
33 |     elif scheduler_name == "cyclic":
34 |         return optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-5, max_lr=0.1)
35 | 
36 |     elif scheduler_name == "custom":
37 |         return optim.lr_scheduler.StepLR(optimizer, step_size=30*int(epoch_size), gamma=0.1)
38 |     else:
39 |         raise NotImplementedError("Other learning rate schedulers are not implemented")
40 | 
41 | 


--------------------------------------------------------------------------------
/data/femnist/get_hashes.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import os
 3 | import pickle
 4 | 
 5 | 
 6 | def load_obj(name):
 7 |     with open(name + '.pkl', 'rb') as f:
 8 |         return pickle.load(f)
 9 | 
10 | 
11 | def save_obj(obj, name):
12 |     with open(name + '.pkl', 'wb') as f:
13 |         pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
14 | 
15 | 
16 | cfd = os.path.join('intermediate', 'class_file_dirs')
17 | wfd = os.path.join('intermediate', 'write_file_dirs')
18 | 
19 | class_file_dirs = load_obj(cfd)
20 | write_file_dirs = load_obj(wfd)
21 | 
22 | class_file_hashes = []
23 | write_file_hashes = []
24 | 
25 | count = 0
26 | for tup in class_file_dirs:
27 |     if count % 100000 == 0:
28 |         print('hashed %d class images' % count)
29 | 
30 |     (cclass, cfile) = tup
31 |     file_path = os.path.join(cfile)
32 | 
33 |     chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest()
34 | 
35 |     class_file_hashes.append((cclass, cfile, chash))
36 | 
37 |     count += 1
38 | 
39 | cfhd = os.path.join('intermediate', 'class_file_hashes')
40 | save_obj(class_file_hashes, cfhd)
41 | 
42 | count = 0
43 | for tup in write_file_dirs:
44 |     if (count % 100000 == 0):
45 |         print('hashed %d write images' % count)
46 | 
47 |     (cclass, cfile) = tup
48 |     file_path = os.path.join(cfile)
49 | 
50 |     chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest()
51 | 
52 |     write_file_hashes.append((cclass, cfile, chash))
53 | 
54 |     count += 1
55 | 
56 | wfhd = os.path.join('intermediate', 'write_file_hashes')
57 | save_obj(write_file_hashes, wfhd)


--------------------------------------------------------------------------------
/loaders/sent140.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torchtext import data
 3 | 
 4 | 
 5 | def get_iterator_sent140(path, all_data_path, device, max_vocab_size=25_000, batch_size=64):
 6 |     """
 7 |     Build text iterator to be use with LSTM model,
 8 |     :param path: path to .json file used to build the iterator, see TorchText for .json file format.
 9 |     :param all_data_path: path to .json file containing all train data
10 |     :param device:
11 |     :param max_vocab_size:
12 |     :param batch_size:
13 |     :return: iterator over sent140 samples, each sample has two attributes "text" and "label"
14 |     """
15 |     TEXT = data.Field(tokenize='spacy', include_lengths=True)
16 |     LABEL = data.LabelField(dtype=torch.float)
17 | 
18 |     fields = {'text': ('text', TEXT), 'label': ('label', LABEL)}
19 | 
20 |     text_data = data.TabularDataset(path=path, format='json', fields=fields)
21 | 
22 |     text_data.sort_key = lambda x: len(x.text)
23 | 
24 |     # Fix the seed
25 |     torch.manual_seed(0)
26 |     torch.backends.cudnn.deterministic = True
27 |     torch.backends.cudnn.benchmark = False
28 | 
29 |     all_text_data = data.TabularDataset(path=all_data_path, format='json', fields=fields)
30 | 
31 |     # vocab is built using all data, in order to have the same mapping from words to indexes across workers
32 |     TEXT.build_vocab(all_text_data,
33 |                      max_size=max_vocab_size,
34 |                      vectors="glove.6B.100d",
35 |                      unk_init=torch.Tensor.normal_)
36 |     LABEL.build_vocab(text_data)
37 | 
38 |     iterator = data.BucketIterator(
39 |         text_data,
40 |         batch_size=batch_size,
41 |         sort_within_batch=True,
42 |         device=device)
43 | 
44 |     return iterator
45 | 
46 | 


--------------------------------------------------------------------------------
/data/femnist/preprocess.sh:
--------------------------------------------------------------------------------
 1 | if [ ! -d "raw_data" ]; then
 2 |   echo "------------------------------"
 3 |   echo "downloading data"
 4 |   mkdir raw_data
 5 |   cd raw_data
 6 |   wget https://s3.amazonaws.com/nist-srd/SD19/by_class.zip
 7 |   wget https://s3.amazonaws.com/nist-srd/SD19/by_write.zip
 8 |   unzip by_class.zip
 9 |   rm by_class.zip
10 |   unzip by_write.zip
11 |   rm by_write.zip
12 |   cd ../
13 |   echo "finished downloading data"
14 | fi
15 | if [ ! -d "intermediate" ]; then # stores .pkl files during preprocessing
16 |   mkdir intermediate
17 | fi
18 | 
19 | if [ ! -f ntermediate/class_file_dirs.pkl ]; then
20 |   echo "------------------------------"
21 |   echo "extracting file directories of images"
22 |   python3 get_file_dirs.py
23 |   echo "finished extracting file directories of images"
24 | fi
25 | 
26 | if [ ! -f intermediate/class_file_hashes.pkl ]; then
27 |   echo "------------------------------"
28 |   echo "calculating image hashes"
29 |   python3 get_hashes.py
30 |   echo "finished calculating image hashes"
31 | fi
32 | 
33 | if [ ! -f intermediate/write_with_class.pkl ]; then
34 |   echo "------------------------------"
35 |   echo "assigning class labels to write images"
36 |   python3 match_hashes.py
37 |   echo "finished assigning class labels to write images"
38 | fi
39 | 
40 | if [ ! -f intermediate/images_by_writer.pkl ]; then
41 |   echo "------------------------------"
42 |   echo "grouping images by writer"
43 |   python3 group_by_writer.py
44 |   echo "finished grouping images by writer"
45 | fi
46 | if [ ! -f test/test.json ]; then
47 |     echo "------------------------------"
48 |     echo "spliting data"
49 |     mkdir train
50 |     mkdir test
51 | 
52 |     ./split_data.sh "$@"
53 | 
54 |     echo "finished splitting data"
55 | fi


--------------------------------------------------------------------------------
/data/synthetic/generate_data.sh:
--------------------------------------------------------------------------------
 1 | # Parse arguments
 2 | while [[ $# -gt 0 ]]
 3 | do
 4 | key="$1"
 5 | 
 6 | case $key in
 7 |     -nw)
 8 |     NUM_WORKERS="$2"
 9 |     shift # past argument
10 |     shift # past value
11 |     ;;
12 |     -nc)
13 |     NUM_CLASSES="$2"
14 |     shift # past argument
15 |     shift # past value
16 |     ;;
17 |     -dim)
18 |     DIMENSION="$2"
19 |     shift # past argument
20 |     shift # past value
21 |     ;;
22 |     --tf)
23 |     TFRAC="$2"
24 |     shift # past argument
25 |     shift # past value
26 |     ;;
27 |     --seed)
28 |     SEED="$2"
29 |     shift # past argument
30 |     ;;
31 |     --default)
32 |     DEFAULT=YES
33 |     shift # past argument
34 |     ;;
35 |     *)    # unknown option
36 |     POSITIONAL+=("$1") # save it in an array for later
37 |     shift # past argument
38 |     ;;
39 | esac
40 | done
41 | 
42 | NUM_WORKERS_TAG=""
43 | if [ ! -z $NUM_WORKERS ]; then
44 |     NUM_WORKERS_TAG="--num_workers $NUM_WORKERS"
45 | fi
46 | 
47 | NUM_CLASSE_TAG=""
48 | if [ ! -z $NUM_CLASSES ]; then
49 |     NUM_CLASSES_TAG="--num_classes $NUM_CLASSES"
50 | fi
51 | 
52 | DIMENSION_TAG=""
53 | if [ ! -z $DIMENSION ]; then
54 |     DIMENSION_TAG="--dimension $DIMENSION"
55 | fi
56 | 
57 | TFRACTAG=""
58 | if [ ! -z $TFRAC ]; then
59 |     TFRAC_TAG="--tr_frac $TFRAC"
60 | fi
61 | 
62 | SEED_TAG=""
63 | if [ ! -z $SEED ]; then
64 |     SEED_TAG="--seed $SEED"
65 | fi
66 | 
67 | 
68 | if [ ! -d "all_data" ]; then
69 |   mkdir all_data
70 | fi
71 | 
72 | 
73 | if [ ! -f all_data/all_data.json ]; then
74 |     echo "------------------------------"
75 |     echo "generating  data"
76 | 
77 |     python3 generate_data.py  $NUM_WORKERS_TAG $NUM_CLASSES_TAG $DIMENSION_TAG $SEED_TAG
78 | 
79 |     echo "finished generating  data"
80 | fi
81 | 
82 | if [ ! -f test/test.json ]; then
83 |     echo "------------------------------"
84 |     echo "spliting data"
85 |     mkdir train
86 |     mkdir test
87 | 
88 |     python3 split_data.py $TFRACTAG $SEED_TAG
89 | 
90 |     echo "finished splitting data"
91 | fi


--------------------------------------------------------------------------------
/loaders/shakespeare.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset, DataLoader
 3 | import string
 4 | 
 5 | 
 6 | class CharacterDataset(Dataset):
 7 |     def __init__(self, file_path, chunk_len, device):
 8 |         """
 9 |         Dataset for next character prediction, each sample represents an input sequence of characters
10 |          and a target sequence of characters representing to next sequence of the input
11 |         :param file_path: path to .txt file containing the training corpus
12 |         :param chunk_len: (int) the length of the input and target sequences
13 |         :param device:
14 |         """
15 |         self.all_characters = string.printable
16 |         self.n_characters = len(self.all_characters)
17 |         self.chunk_len = chunk_len
18 |         self.device = device
19 |         f = open(file_path, 'r')
20 |         self.text = f.read()
21 | 
22 |     def __len__(self):
23 |         return len(self.text) // (self.chunk_len + 1)
24 | 
25 |     def __getitem__(self, idx):
26 |         input_ = torch.zeros(self.chunk_len).long()
27 |         for c in range(self.chunk_len):
28 |             input_[c] = self.all_characters.index(self.text[idx + c])
29 | 
30 |         target = torch.zeros(self.chunk_len).long()
31 |         for c in range(self.chunk_len):
32 |             target[c] = self.all_characters.index(self.text[idx + c + 1])
33 | 
34 |         return input_.to(self.device), target.to(self.device)
35 | 
36 | 
37 | def get_iterator_shakespeare(file_path, device, batch_size, chunk_len=200):
38 |     """
39 |     get next character prediction DataLoader, yields `batch_size` batches of `CharacterDataset` samples
40 |     :param file_path: path to .txt file containing the training corpus
41 |     :param chunk_len: (int) the length of the input and target sequences
42 |     :param device:
43 |     :param batch_size
44 |     :return: iterator over shakespeare dataset samples
45 |     """
46 |     dataset = CharacterDataset(file_path, chunk_len, device)
47 |     iterator = DataLoader(dataset, shuffle=True, batch_size=batch_size)
48 | 
49 |     return iterator
50 | 


--------------------------------------------------------------------------------
/reproduce_results.py:
--------------------------------------------------------------------------------
 1 | from utils.utils import args_to_string, loggs_to_json
 2 | from utils.args import parse_args
 3 | 
 4 | import os
 5 | import json
 6 | 
 7 | 
 8 | trsh_dict = {"gaia": 0.65,
 9 |              "amazon_us": 0.55,
10 |              "geantdistance": 0.55,
11 |              "exodus": 0.5,
12 |              "ebone": 0.5}
13 | 
14 | lr_dict = {"gaia": "1e-3",
15 |            "amazon_us": "1e-3",
16 |            "geantdistance": "1e-3",
17 |            "exodus": "1e-1",
18 |            "ebone": "1e-1"}
19 | 
20 | for network_name in ["gaia", "amazon_us", "geantdistance", "exodus", "ebone"]:
21 |     print("{}:".format(network_name))
22 |     args = parse_args(["inaturalist",
23 |                        "--network", network_name,
24 |                        "--bz", "16",
25 |                        "--lr", lr_dict[network_name],
26 |                        "--decay", "sqrt",
27 |                        "--local_steps", "1"])
28 | 
29 |     args_string = args_to_string(args)
30 | 
31 |     loggs_dir = os.path.join("loggs", args_to_string(args))
32 |     loggs_to_json(loggs_dir)
33 | 
34 |     loggs_dir_path = os.path.join("loggs", args_to_string(args))
35 |     path_to_json = os.path.join("results", "json", "{}.json".format(os.path.split(loggs_dir_path)[1]))
36 |     with open(path_to_json, "r") as f:
37 |         data = json.load(f)
38 | 
39 |     for architecture in ["centralized", "ring", "matcha"]:
40 |         values = data['Train/Acc'][architecture]
41 |         rounds = data["Round"][architecture]
42 | 
43 |         ii = -1
44 |         for ii, value in enumerate(values):
45 |             if value > trsh_dict[network_name]:
46 |                 break
47 | 
48 |         try:
49 |             print("Number of steps to achieve {}% is {} on {} using {}".format(int(trsh_dict[network_name] * 100),
50 |                                                                                rounds[ii], network_name, architecture))
51 |         except IndexError:
52 |             print("Number of steps to achieve {}% is {} on {} using {}".format(int(trsh_dict[network_name] * 100),
53 |                                                                                rounds[-1], network_name, architecture))
54 | 
55 |     print("#" * 10)
56 | 


--------------------------------------------------------------------------------
/loaders/femnist.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | 
 4 | import torch
 5 | from torch.utils.data import Dataset, DataLoader
 6 | from torchvision.transforms import Compose, ToTensor, Normalize, Resize
 7 | from PIL import Image
 8 | 
 9 | 
10 | class FEMNIST(Dataset):
11 |     def __init__(self, pickle_file, root_path, device, transforms=None):
12 |         """
13 |         FEMNIST Dataset generated from a .pkl containing a list of tuples
14 |          each of them representing a path to an image and it class
15 |         :param pickle_file: path to .pkl file
16 |         :param root_path: path to the directory containing images
17 |         :param device:
18 |         :param transforms: list of transformation to apply to images
19 |         """
20 |         self.root_path = root_path
21 |         self.device = device
22 |         with open(pickle_file, 'rb') as f:
23 |             self.data = pickle.load(f)
24 | 
25 |         self.transforms = transforms
26 | 
27 |     def __getitem__(self, idx):
28 |         img_path, label = self.data[idx]
29 | 
30 |         img = Image.open(os.path.join(self.root_path, img_path))
31 |         label = torch.tensor(label).to(self.device)
32 | 
33 |         if self.transforms:
34 |             img = self.transforms(img).to(self.device)
35 | 
36 |         return img, label
37 | 
38 |     def __len__(self):
39 |         return len(self.data)
40 | 
41 | 
42 | def get_iterator_femnist(file_path, device, batch_size=1):
43 |     """
44 |     returns an iterator over FEMNIST dataset batches
45 |     :param file_path: path to .pkl file containing a list of tuples
46 |          each of them representing a path to an image and it class
47 |     :param device:
48 |     :param batch_size:
49 |     :return: torch.utils.DataLoader object constructed from FEMNIST dataset object
50 |     """
51 |     root_path = os.path.join("data", "femnist")
52 | 
53 |     transforms = Compose([Resize(28),
54 |                           ToTensor(),
55 |                           Normalize((0.1307,), (0.3081,))
56 |                           ])
57 | 
58 |     dataset = FEMNIST(file_path, device=device, root_path=root_path, transforms=transforms)
59 |     iterator = DataLoader(dataset, shuffle=True, batch_size=batch_size)
60 | 
61 |     return iterator
62 | 


--------------------------------------------------------------------------------
/loaders/inaturalist.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | 
 4 | import torch
 5 | from torch.utils.data import Dataset, DataLoader
 6 | from torchvision.transforms import Compose, ToTensor, Normalize, CenterCrop
 7 | from PIL import Image
 8 | 
 9 | 
10 | class INaturalist(Dataset):
11 |     def __init__(self, pickle_file, root_path, device, transforms=None):
12 |         """
13 |         iNaturalist Dataset generated from a .pkl containing a list of tuples
14 |          each of them representing a path to an image and it class
15 |         :param pickle_file: path to .pkl file
16 |         :param root_path: path to the directory containing images
17 |         :param device:
18 |         :param transforms: list of transformation to apply to images
19 |         """
20 |         self.root_path = root_path
21 |         self.device = device
22 |         with open(pickle_file, 'rb') as f:
23 |             self.data = pickle.load(f)
24 | 
25 |         self.transforms = transforms
26 | 
27 |     def __getitem__(self, idx):
28 |         img_path, label = self.data[idx]
29 | 
30 |         img = Image.open(os.path.join(self.root_path, img_path)).convert("RGB")
31 |         label = torch.tensor(label).to(self.device)
32 | 
33 |         if self.transforms:
34 |             img = self.transforms(img).to(self.device)
35 | 
36 |         return img, label
37 | 
38 |     def __len__(self):
39 |         return len(self.data)
40 | 
41 | 
42 | def get_iterator_inaturalist(file_path, device, batch_size=1):
43 |     """
44 |     returns an iterator over iNaturalist dataset batches
45 |     :param file_path: path to .pkl file containing a list of tuples
46 |          each of them representing a path to an image and it class
47 |     :param device:
48 |     :param batch_size:
49 |     :return: torch.utils.DataLoader object constructed from INaturalist dataset object
50 |     """
51 |     root_path = os.path.join("data", "inaturalist")
52 | 
53 |     transforms = Compose([CenterCrop((224, 224)),
54 |                           ToTensor(),
55 |                           Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
56 |                           ])
57 | 
58 |     dataset = INaturalist(file_path, device=device, root_path=root_path, transforms=transforms)
59 |     iterator = DataLoader(dataset, shuffle=True, batch_size=batch_size)
60 | 
61 |     return iterator
62 | 


--------------------------------------------------------------------------------
/data/synthetic/split_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import json
 4 | import random
 5 | import time
 6 | import numpy as np
 7 | from sklearn.model_selection import train_test_split
 8 | 
 9 | parser = argparse.ArgumentParser()
10 | 
11 | 
12 | parser.add_argument('--tr_frac',
13 |                     help='fraction in training set; default: 0.8;',
14 |                     type=float,
15 |                     default=0.8)
16 | parser.add_argument('--seed',
17 |                     help='args.seed for random partitioning of test/train data',
18 |                     type=int,
19 |                     default=None)
20 | 
21 | parser.set_defaults(user=False)
22 | 
23 | args = parser.parse_args()
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     rng_seed = (args.seed if (args.seed is not None and args.seed >= 0) else int(time.time()))
28 |     rng = random.Random(rng_seed)
29 | 
30 |     data_file = os.path.join('all_data', 'all_data.json')
31 | 
32 |     with open(data_file, 'r') as inf:
33 |         data = json.load(inf)
34 | 
35 |     X_list = {"train": [], "test": []}
36 |     y_list = {"train": [], "test": []}
37 | 
38 |     num_classes = data['num_classes']
39 | 
40 |     for worker in data['users']:
41 |         train_file = os.path.join("train", "{}.json".format(worker))
42 | 
43 |         worker_data = data['user_data'][worker]
44 |         X = np.array(worker_data['x'])
45 |         y = np.array(worker_data['y'])
46 | 
47 |         X_train, X_test, y_train, y_test = train_test_split(
48 |             X, y, train_size=args.tr_frac, random_state=args.seed)
49 | 
50 |         X_list["train"].append(X_train)
51 |         y_list["train"].append(y_train)
52 |         X_list["test"].append(X_test)
53 |         y_list["test"].append(y_test)
54 | 
55 |         json_data_train = {"x": X_train.tolist(), "y": y_train.tolist(), "num_classes": num_classes}
56 | 
57 |         with open(train_file, 'w') as outfile:
58 |             json.dump(json_data_train, outfile)
59 | 
60 |     for key in ["train", "test"]:
61 |         X = np.vstack(X_list[key])
62 |         y = np.concatenate(y_list[key])
63 | 
64 |         file = os.path.join(key, "{}.json".format(key))
65 |         json_data = {"x": X.tolist(), "y": y.tolist(), "num_classes": num_classes}
66 |         with open(file, 'w') as outfile:
67 |             json.dump(json_data, outfile)
68 | 
69 | 


--------------------------------------------------------------------------------
/data/inaturalist/preprocess.sh:
--------------------------------------------------------------------------------
 1 | while [[ $# -gt 0 ]]
 2 | do
 3 | key="$1"
 4 | 
 5 | case $key in
 6 |     --network)
 7 |     NETWORK_NAME="$2"
 8 |     shift # past argument
 9 |     shift # past value
10 |     ;;
11 |     --sf)
12 |     SFRAC="$2"
13 |     shift # past argument
14 |     shift # past value
15 |     ;;
16 |     --tf)
17 |     TFRAC="$2"
18 |     shift # past argument
19 |     shift # past value
20 |     ;;
21 |     --seed)
22 |     SEED="$2"
23 |     shift # past argument
24 |     ;;
25 |     --default)
26 |     DEFAULT=YES
27 |     shift # past argument
28 |     ;;
29 |     *)    # unknown option
30 |     POSITIONAL+=("$1") # save it in an array for later
31 |     shift # past argument
32 |     ;;
33 | esac
34 | done
35 | 
36 | NETWORK_NAME_TAG=""
37 | if [ ! -z $NETWORK_NAME ]; then
38 |     NETWORK_NAME_TAG="--network $NETWORK_NAME"
39 | fi
40 | 
41 | SFRAC_TAG=""
42 | if [ ! -z $SFRAC ]; then
43 |     SFRAC_TAG="--s_frac $SFRAC"
44 | fi
45 | 
46 | TFRAC_TAG=""
47 | if [ ! -z $TFRAC ]; then
48 |     TFRAC_TAG="--tr_frac $TFRAC"
49 | fi
50 | 
51 | SEED_TAG=""
52 | if [ ! -z $SEED ]; then
53 |     SEED_TAG="--seed $SEED"
54 | fi
55 | 
56 | if [ ! -f raw_data/train2018.json ]; then
57 |     echo "------------------------------"
58 |     echo "downloading annotations and locations"
59 | 
60 |     cd raw_data
61 |     wget http://www.vision.caltech.edu/~gvanhorn/datasets/inaturalist/fgvc5_competition/val2018.json.tar.gz
62 |     wget http://www.vision.caltech.edu/~gvanhorn/datasets/inaturalist/fgvc5_competition/inat2018_locations.zip
63 |     wget http://www.vision.caltech.edu/~gvanhorn/datasets/inaturalist/fgvc5_competition/train2018.json.tar.gz
64 |     unzip inat2018_locations.zip -d .
65 |     tar -xf val2018.json.tar.gz -C .
66 |     tar -xf train2018.json.tar.gz -C .
67 | 
68 |     rm inat2018_locations.zip
69 |     rm val2018.json.tar.gz
70 |     rm train2018.json.tar.gz
71 |     mv inat2018_locations/* .
72 |     rm -r inat2018_locations
73 |     echo "finished downloading annotations and locations"
74 |     cd ../
75 | fi
76 | 
77 | if [ ! -f test/test.json ]; then
78 |     echo "------------------------------"
79 |     echo "spliting data"
80 |     mkdir train
81 |     mkdir test
82 | 
83 |     python3 split_data.py  $NETWORK_NAME_TAG $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
84 | 
85 |     echo "finished splitting data"
86 | fi


--------------------------------------------------------------------------------
/graph_utils/show_networks.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generate .html file with world map and positions of workers and links used in the overlay
 3 | """
 4 | import argparse
 5 | import os
 6 | import time
 7 | import mplleaflet
 8 | import matplotlib.pyplot as plt
 9 | import networkx as nx
10 | from geopy.geocoders import Nominatim
11 | 
12 | 
13 | geolocator = Nominatim(user_agent="delay", timeout=20)
14 | 
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument(
17 |     'underlay',
18 |     help='name of the underlay network; should be present in "/data"',
19 |     type=str)
20 | parser.add_argument(
21 |     'architecture',
22 |     help='name of the architecture; should be present in "results/$UNDERLAY"',
23 |     type=str)
24 | 
25 | parser.set_defaults(user=False)
26 | 
27 | args = parser.parse_args()
28 | 
29 | if __name__ == "__main__":
30 |     underlay_path = os.path.join("data", "{}.gml".format(args.underlay))
31 |     overlay_path = os.path.join("results", args.underlay, "{}.gml".format(args.architecture))
32 | 
33 |     underlay = nx.read_gml(underlay_path)
34 | 
35 |     pos_dict = {}
36 |     for node in underlay.nodes():
37 |         try:
38 |             pos_dict[node] = [underlay.nodes(data=True)[node]["Longitude"],
39 |                               underlay.nodes(data=True)[node]["Latitude"]]
40 | 
41 |         except KeyError:
42 |             time.sleep(1.2)  # To avoid Service time out Error
43 | 
44 |             geo = geolocator.geocode(node, timeout=20)
45 |             pos_dict[node] = [geo.longitude, geo.latitude]
46 | 
47 |     overlay = nx.read_gml(overlay_path).to_undirected()
48 | 
49 |     mapping = {}
50 |     for ii, node in enumerate(underlay.nodes()):
51 |         mapping[str(ii)] = node
52 | 
53 |     overlay = nx.relabel_nodes(overlay, mapping).to_undirected()
54 | 
55 |     fig, ax = plt.subplots()
56 | 
57 |     nx.draw_networkx_nodes(overlay, pos=pos_dict, node_size=10, node_color='red', edge_color='k', alpha=.5,
58 |                            with_labels=True)
59 |     nx.draw_networkx_edges(overlay, pos=pos_dict, edge_color='blue', alpha=1, width=5.0)
60 |     nx.draw_networkx_labels(overlay, pos=pos_dict, label_pos=10.3)
61 | 
62 |     mplleaflet.display(fig=ax.figure)
63 |     mplleaflet.save_html(fig=ax.figure,
64 |                          fileobj=os.path.join("results", args.underlay, "{}.html".format(args.architecture)))
65 | 


--------------------------------------------------------------------------------
/graph_utils/utils/mbst.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import numpy as np
 3 | from networkx.algorithms.tournament import hamiltonian_path
 4 | 
 5 | 
 6 | def cube_algorithm(G_complete):
 7 |     """
 8 |     Use cube algorithm to build an approximation for the 2-MBST problem on G:
 9 |     1. Add edges to G to build complete graph G_complete
10 |     2. Build an MST T of G_complete
11 |     3. Build the the cube of T
12 |     4. find a Hamiltonian path in the cube of T
13 |     :param G : (nx.Graph())
14 |     """
15 |     T = nx.minimum_spanning_tree(G_complete, weight="weight")
16 | 
17 |     T_cube = nx.Graph()
18 |     T_cube.add_nodes_from(T.nodes(data=True))
19 | 
20 |     shortest_paths = nx.shortest_path_length(T)
21 |     for source, lengths_dict in shortest_paths:
22 |         for target in lengths_dict:
23 |             if lengths_dict[target] <= 3:
24 |                 T_cube.add_edge(source, target,
25 |                                 weight=G_complete.get_edge_data(source, target)["weight"])
26 | 
27 |     ham_path = hamiltonian_path(T_cube.to_directed())
28 | 
29 |     result = nx.Graph()
30 |     result.add_nodes_from(G_complete.nodes(data=True))
31 | 
32 |     for idx in range(len(ham_path) - 1):
33 |         result.add_edge(ham_path[idx], ham_path[idx + 1],
34 |                         weight=G_complete.get_edge_data(ham_path[idx], ham_path[idx + 1])['weight'])
35 | 
36 |     return result
37 | 
38 | 
39 | def delta_prim(G_complete, delta):
40 |     """
41 |     implementation of delta prim algorithm from https://ieeexplore.ieee.org/document/850653
42 |     :param G: (nx.Graph())
43 |     :param delta: (int)
44 |     :return: a tree T with degree at most delta
45 |     """
46 |     N = G_complete.number_of_nodes()
47 |     T = nx.Graph()
48 | 
49 |     T.add_node(list(G_complete.nodes)[0])
50 | 
51 |     while len(T.edges) < N - 1:
52 |         smallest_weight = np.inf
53 |         edge_to_add = None
54 |         for u in T.nodes:
55 |             for v in G_complete.nodes:
56 |                 if (v not in T.nodes) and (T.degree[u] < delta):
57 |                     weight = G_complete.get_edge_data(u, v)["weight"]
58 |                     if weight < smallest_weight:
59 |                         smallest_weight = weight
60 |                         edge_to_add = (u, v)
61 | 
62 |         T.add_edge(*edge_to_add, weight=smallest_weight)
63 | 
64 |     T.add_nodes_from(G_complete.nodes(data=True))
65 | 
66 |     return T
67 | 


--------------------------------------------------------------------------------
/make_table3.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | from utils.args import parse_args
 5 | from utils.utils import args_to_string, loggs_to_json
 6 | 
 7 | trsh_dict = {"gaia": 0.65,
 8 |              "amazon_us": 0.55,
 9 |              "geantdistance": 0.55,
10 |              "exodus": 0.5,
11 |              "ebone": 0.5}
12 | 
13 | lr_dict = {"gaia": "1e-3",
14 |            "amazon_us": "1e-3",
15 |            "geantdistance": "1e-3",
16 |            "exodus": "1e-1",
17 |            "ebone": "1e-1"}
18 | 
19 | if __name__ == "__main__":
20 |     for network_name in ["gaia", "amazon_us", "geantdistance", "exodus", "ebone"]:
21 |         print("{}:".format(network_name))
22 |         args = parse_args(["inaturalist",
23 |                            "--network", network_name,
24 |                            "--bz", "16",
25 |                            "--lr", lr_dict[network_name],
26 |                            "--decay", "sqrt",
27 |                            "--local_steps", "1"])
28 | 
29 |         args_string = args_to_string(args)
30 | 
31 |         loggs_dir = os.path.join("loggs", args_to_string(args))
32 |         loggs_to_json(loggs_dir)
33 | 
34 |         loggs_dir_path = os.path.join("loggs", args_to_string(args))
35 |         path_to_json = os.path.join("results", "json", "{}.json".format(os.path.split(loggs_dir_path)[1]))
36 |         with open(path_to_json, "r") as f:
37 |             data = json.load(f)
38 | 
39 |         for architecture in ["centralized", "ring", "matcha"]:
40 |             values = data['Train/Acc'][architecture]
41 |             rounds = data["Round"][architecture]
42 | 
43 |             for ii, value in enumerate(values):
44 |                 if value > trsh_dict[network_name]:
45 |                     break
46 | 
47 |             try:
48 |                 print("Number of steps to achieve {}% is {} on {} using {}".format(int(trsh_dict[network_name] * 100),
49 |                                                                                    rounds[ii], network_name,
50 |                                                                                    architecture))
51 |             except IndexError:
52 |                 print("Number of steps to achieve {}% is {} on {} using {}".format(int(trsh_dict[network_name] * 100),
53 |                                                                                    rounds[-1], network_name,
54 |                                                                                    architecture))
55 | 
56 |         print("#" * 10)


--------------------------------------------------------------------------------
/communication_module/worker.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | import torch.distributed as dist
 5 | 
 6 | from utils.utils import get_network, get_iterator, get_model
 7 | 
 8 | 
 9 | EXTENSIONS = {"synthetic": ".json", "sent140": ".json", "femnist": ".pkl", "shakespeare": ".txt"}
10 | 
11 | 
12 | class Worker(object):
13 |     def __init__(self, args, rank):
14 |         self.rank = rank
15 |         self.local_steps = args.local_steps
16 |         self.device = args.device
17 |         self.num_gpu = torch.cuda.device_count()
18 |         self.batch_size = args.bz
19 |         self.network = get_network(args.network_name, args.architecture)
20 |         self.world_size = self.network.number_of_nodes() + 1  # we add node representing the network manager
21 |         self.fit_by_epoch = args.fit_by_epoch
22 |         self.initial_lr = args.lr
23 |         self.optimizer_name = args.optimizer
24 |         self.lr_scheduler_name = args.decay
25 | 
26 |         if self.device == "cuda":
27 |             if torch.cuda.is_available():
28 |                 print(f"{rank} get gpu {self.rank % self.num_gpu}")
29 |                 self.device = "cuda:"+str(self.rank % self.num_gpu)
30 |             else:
31 |                 print("No GPU is available on the system")
32 |                 raise TypeError
33 |         elif self.device != "cpu":
34 |             print("Please choose device be either cuda or cpu")
35 |             raise TypeError
36 | 
37 |         self.data_dir = os.path.join("data", args.experiment, "train")
38 |         self.data_path = os.path.join(self.data_dir, str(rank) + EXTENSIONS[args.experiment])
39 | 
40 |         self.iterator = get_iterator(args.experiment, self.data_path, self.device, self.batch_size)
41 | 
42 |         self.model = get_model(args.experiment, self.device, self.iterator,
43 |                                optimizer_name=self.optimizer_name, lr_scheduler=self.lr_scheduler_name,
44 |                                initial_lr=self.initial_lr)
45 | 
46 |     def communicate(self):
47 | 
48 |         if self.fit_by_epoch:
49 |             self.model.fit_iterator(train_iterator=self.iterator, n_epochs=self.local_steps)
50 |         else:
51 |             self.model.fit_batches(iterator=self.iterator, n_steps=self.local_steps)
52 | 
53 |         for ii, param in enumerate(self.model.net.parameters()):
54 |             dist.gather(tensor=param.data, dst=self.world_size - 1)
55 | 
56 |         for ii, param in enumerate(self.model.net.parameters()):
57 |             dist.scatter(tensor=param.data, src=self.world_size - 1)
58 | 


--------------------------------------------------------------------------------
/data/femnist/get_file_dirs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Creates .pkl files for:
 3 | 1. list of directories of every image in 'by_class'
 4 | 2. list of directories of every image in 'by_write'
 5 | the hierarchal structure of the data is as follows:
 6 | - by_class -> classes -> folders containing images -> images
 7 | - by_write -> folders containing writers -> writer -> types of images -> images
 8 | the directories written into the files are of the form 'raw_data/...'
 9 | """
10 | import os
11 | import pickle
12 | 
13 | 
14 | def save_obj(obj, name):
15 |     with open(name + '.pkl', 'wb') as f:
16 |         pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
17 | 
18 | 
19 | class_files = []  # (class, file directory)
20 | write_files = []  # (writer, file directory)
21 | 
22 | class_dir = os.path.join('raw_data', 'by_class')
23 | classes = os.listdir(class_dir)
24 | classes = [c for c in classes if len(c) == 2]
25 | 
26 | for cl in classes:
27 |     cldir = os.path.join(class_dir, cl)
28 |     rel_cldir = os.path.join(class_dir, cl)
29 |     subcls = os.listdir(cldir)
30 | 
31 |     subcls = [s for s in subcls if (('hsf' in s) and ('mit' not in s))]
32 | 
33 |     for subcl in subcls:
34 |         subcldir = os.path.join(cldir, subcl)
35 |         rel_subcldir = os.path.join(rel_cldir, subcl)
36 |         images = os.listdir(subcldir)
37 |         image_dirs = [os.path.join(rel_subcldir, i) for i in images]
38 | 
39 |         for image_dir in image_dirs:
40 |             class_files.append((cl, image_dir))
41 | 
42 | 
43 | write_dir = os.path.join('raw_data', 'by_write')
44 | write_parts = os.listdir(write_dir)
45 | 
46 | for write_part in write_parts:
47 |     writers_dir = os.path.join(write_dir, write_part)
48 |     rel_writers_dir = os.path.join(write_dir, write_part)
49 |     writers = os.listdir(writers_dir)
50 | 
51 |     for writer in writers:
52 |         writer_dir = os.path.join(writers_dir, writer)
53 |         rel_writer_dir = os.path.join(rel_writers_dir, writer)
54 |         wtypes = os.listdir(writer_dir)
55 | 
56 |         for wtype in wtypes:
57 |             type_dir = os.path.join(writer_dir, wtype)
58 |             rel_type_dir = os.path.join(rel_writer_dir, wtype)
59 |             images = os.listdir(type_dir)
60 |             image_dirs = [os.path.join(rel_type_dir, i) for i in images]
61 | 
62 |             for image_dir in image_dirs:
63 |                 write_files.append((writer, image_dir))
64 | 
65 | save_obj(
66 |     class_files,
67 |     os.path.join('intermediate', 'class_file_dirs'))
68 | save_obj(
69 |     write_files,
70 |     os.path.join('intermediate', 'write_file_dirs'))


--------------------------------------------------------------------------------
/models/inaturalist/resnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from utils.optim import get_optimizer, get_lr_scheduler
 4 | from torchvision.models import resnet18
 5 | from ..model import Model
 6 | 
 7 | NUMBER_CLASSES = 80
 8 | 
 9 | 
10 | class INaturalistCNN(Model):
11 |     def __init__(self, criterion, metric, device,
12 |                  optimizer_name="adam", lr_scheduler="sqrt", initial_lr=1e-3, epoch_size=1, coeff=1):
13 |         super(Model, self).__init__()
14 | 
15 |         self.net = resnet18(pretrained=True)
16 |         self.net.fc = nn.Linear(self.net.fc.in_features, NUMBER_CLASSES)
17 |         self.net = self.net.to(device)
18 |         self.criterion = criterion
19 |         self.metric = metric
20 |         self.device = device
21 |         self.coeff = coeff
22 | 
23 |         self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr)
24 |         self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
25 | 
26 |     def fit_iterator_one_epoch(self, iterator):
27 |         epoch_loss = 0
28 |         epoch_acc = 0
29 | 
30 |         self.net.train()
31 | 
32 |         for x, y in iterator:
33 |             self.optimizer.zero_grad()
34 | 
35 |             predictions = self.net(x)
36 | 
37 |             loss = self.coeff * self.criterion(predictions, y)
38 | 
39 |             acc = self.metric(predictions, y)
40 | 
41 |             loss.backward()
42 | 
43 |             self.optimizer.step()
44 |             self.lr_scheduler.step()
45 | 
46 |             epoch_loss += loss.item()
47 |             epoch_acc += acc.item()
48 | 
49 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)
50 | 
51 |     def fit_batch(self, iterator, update=True):
52 |         self.net.train()
53 | 
54 |         x, y = next(iter(iterator))
55 | 
56 |         self.optimizer.zero_grad()
57 | 
58 |         predictions = self.net(x)
59 | 
60 |         loss = self.criterion(predictions, y)
61 | 
62 |         acc = self.metric(predictions, y)
63 | 
64 |         loss.backward()
65 | 
66 |         if update:
67 |             self.optimizer.step()
68 |             self.lr_scheduler.step()
69 | 
70 |         batch_loss = loss.item()
71 |         batch_acc = acc.item()
72 | 
73 |         return batch_loss, batch_acc
74 | 
75 |     def evaluate_iterator(self, iterator):
76 |         epoch_loss = 0
77 |         epoch_acc = 0
78 | 
79 |         self.net.eval()
80 | 
81 |         with torch.no_grad():
82 |             for x, y in iterator:
83 |                 predictions = self.net(x)
84 | 
85 |                 loss = self.criterion(predictions, y)
86 | 
87 |                 acc = self.metric(predictions, y)
88 | 
89 |                 epoch_loss += loss.item()
90 |                 epoch_acc += acc.item()
91 | 
92 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from torch.multiprocessing import Process
 3 | import torch.distributed as dist
 4 | import torch
 5 | 
 6 | from utils.args import parse_args
 7 | from utils.utils import loggs_to_json, args_to_string
 8 | from communication_module.worker import Worker
 9 | from communication_module.manager import Peer2PeerManager, CentralizedManager
10 | from communication import CentralizedNetwork, Peer2PeerNetwork, MATCHANetwork, RingNetwork
11 | 
12 | 
13 | def run(rank, size, arguments):
14 |     torch.manual_seed(0)
15 |     torch.backends.cudnn.deterministic = True
16 |     torch.backends.cudnn.benchmark = False
17 | 
18 |     if rank == size - 1:
19 |         if arguments.architecture == "centralized":
20 |             node = CentralizedManager(arguments)
21 |         else:
22 |             node = Peer2PeerManager(arguments)
23 |     else:
24 |         node = Worker(arguments, rank)
25 | 
26 |     for _ in range(arguments.n_rounds):
27 |         node.communicate()
28 | 
29 | 
30 | def init_process(rank, size, arguments, fn, backend='gloo'):
31 |     """ Initialize the distributed environment. """
32 |     os.environ['MASTER_ADDR'] = '127.0.0.1'
33 |     os.environ['MASTER_PORT'] = '29500'
34 |     dist.init_process_group(backend, rank=rank, world_size=size)
35 |     fn(rank, size, arguments)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     torch.manual_seed(0)
40 |     torch.backends.cudnn.deterministic = True
41 |     torch.backends.cudnn.benchmark = False
42 | 
43 |     args = parse_args()
44 | 
45 |     if args.parallel:
46 |         print("Run experiment in parallel settings using torch.dist..")
47 | 
48 |         processes = []
49 |         world_size = args.num_workers + 1  # We add an extra node that plays the role of network manager
50 |         for rank_ in range(world_size):
51 |             p = Process(target=init_process, args=(rank_, world_size, args, run))
52 |             p.start()
53 |             processes.append(p)
54 | 
55 |         for p in processes:
56 |             p.join()
57 | 
58 |     else:
59 |         print("Run experiment in sequential setting..")
60 | 
61 |         if args.architecture == "centralized":
62 |             network = CentralizedNetwork(args)
63 |         elif args.architecture == "matcha" or args.architecture == "matcha+" or\
64 |                 args.architecture == "matcha+mst" or args.architecture == "matcha+ring" or\
65 |                 args.architecture == "matcha+delta_mbst":
66 |             network = MATCHANetwork(args)
67 |         elif args.architecture == "dynamic_ring":
68 |             network = RingNetwork(args)
69 |         else:
70 |             network = Peer2PeerNetwork(args)
71 | 
72 |         for k in range(args.n_rounds):
73 |             network.mix()
74 | 
75 |         network.write_logs()
76 | 
77 |     loggs_dir = os.path.join("loggs", args_to_string(args))
78 |     loggs_to_json(loggs_dir)
79 | 


--------------------------------------------------------------------------------
/models/model.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | import torch
 3 | import time
 4 | 
 5 | 
 6 | def epoch_time(start_time, end_time):
 7 |     elapsed_time = end_time - start_time
 8 |     elapsed_mins = int(elapsed_time / 60)
 9 |     elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
10 |     return elapsed_mins, elapsed_secs
11 | 
12 | 
13 | class Model(ABC):
14 |     @abstractmethod
15 |     def __init__(self):
16 |         pass
17 | 
18 |     @abstractmethod
19 |     def fit_iterator_one_epoch(self, iterator):
20 |         pass
21 | 
22 |     @abstractmethod
23 |     def fit_batch(self, iterator):
24 |         pass
25 | 
26 |     @abstractmethod
27 |     def evaluate_iterator(self, iterator):
28 |         pass
29 | 
30 |     def update_from_model(self, model):
31 |         """
32 |         update parameters using gradients from another model
33 |         :param model: Model() object, gradients should be precomputed;
34 |         """
35 |         for param_idx, param in enumerate(self.net.parameters()):
36 |             param.grad = list(model.net.parameters())[param_idx].grad.data.clone()
37 | 
38 |         self.optimizer.step()
39 |         self.lr_scheduler.step()
40 | 
41 |     def fit_batches(self, iterator, n_steps):
42 |         global_loss = 0
43 |         global_acc = 0
44 | 
45 |         for step in range(n_steps):
46 |             batch_loss, batch_acc = self.fit_batch(iterator)
47 |             global_loss += batch_loss
48 |             global_acc += batch_acc
49 | 
50 |         return global_loss / n_steps, global_acc / n_steps
51 | 
52 |     def fit_iterator(self, train_iterator, val_iterator=None, n_epochs=1, path=None, verbose=0):
53 |         best_valid_loss = float('inf')
54 | 
55 |         for epoch in range(n_epochs):
56 | 
57 |             start_time = time.time()
58 | 
59 |             train_loss, train_acc = self.fit_iterator_one_epoch(train_iterator)
60 |             if val_iterator:
61 |                 valid_loss, valid_acc = self.evaluate_iterator(val_iterator)
62 | 
63 |             end_time = time.time()
64 | 
65 |             epoch_mins, epoch_secs = epoch_time(start_time, end_time)
66 | 
67 |             if val_iterator:
68 |                 if valid_loss < best_valid_loss:
69 |                     best_valid_loss = valid_loss
70 |                     if path:
71 |                         torch.save(self.net, path)
72 | 
73 |             if verbose:
74 |                 print(f'Epoch: {epoch + 1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
75 |                 print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc * 100:.2f}%')
76 |                 if val_iterator:
77 |                     print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc * 100:.2f}%')
78 | 
79 |     def get_param_tensor(self):
80 |         param_list = []
81 | 
82 |         for param in self.net.parameters():
83 |             param_list.append(param.data.view(-1, ))
84 | 
85 |         return torch.cat(param_list)
86 | 


--------------------------------------------------------------------------------
/graph_utils/utils/evaluate_throughput.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import numpy as np
 3 | 
 4 | np.seterr(all="ignore")
 5 | 
 6 | 
 7 | def cycle_time_decision(G, lambda_0):
 8 |     """
 9 |     Answers the cycle time decision problem question: Is the throughput of G at most lambda ?
10 |     :param G: (nx.DiGraph) Strong Weighted Digraph
11 |     :param lambda_0: (numerical)
12 |     """
13 |     A = nx.adjacency_matrix(G).toarray()
14 |     new_A = lambda_0 - A
15 | 
16 |     new_G = nx.from_numpy_matrix(new_A, create_using=nx.DiGraph())
17 | 
18 |     answer = True
19 |     try:
20 |         nx.bellman_ford_predecessor_and_distance(new_G, 0)
21 |     except nx.NetworkXUnbounded:
22 |         answer = False
23 |     return answer
24 | 
25 | 
26 | def evaluate_cycle_time(G, s=0):
27 |     """
28 |     Evaluate the cycle time of a strong weighted digraph. For now the implementation only supports integer delays
29 |     :param G: (nx.DiGraph) strong weighted digraph
30 |     :param s: starting point
31 |     :return: lambda_G
32 |             The cycle time of G
33 |     """
34 |     n = len(G)
35 |     nodes_to_indices = {node: idx for idx, node in enumerate(G.nodes)}
36 | 
37 |     # Head
38 |     D = np.zeros((n + 1, n)) - np.inf
39 |     pi = np.zeros((n + 1, n), dtype=np.int64) - 1
40 |     D[0, s] = 0
41 | 
42 |     # Body
43 |     for k in range(1, n + 1):
44 |         for v in G.nodes:
45 |             for u in G.predecessors(v):
46 |                 if D[k, nodes_to_indices[v]] < D[k - 1, nodes_to_indices[u]] + G.get_edge_data(u, v)['weight']:
47 |                     D[k, nodes_to_indices[v]] = D[k - 1, nodes_to_indices[u]] \
48 |                                                 + G.get_edge_data(u, v)['weight']
49 | 
50 |                     pi[k, nodes_to_indices[v]] = nodes_to_indices[u]
51 | 
52 |     # Tail
53 |     lambda_ = -np.inf
54 |     M = np.zeros((n,)) + np.inf
55 |     K = np.zeros((n,), dtype=np.int64) - 1
56 |     for v in G.nodes:
57 |         for k in range(0, n):
58 |             if M[nodes_to_indices[v]] > (D[n, nodes_to_indices[v]] - D[k, nodes_to_indices[v]]) / (n - k):
59 |                 M[nodes_to_indices[v]] = (D[n, nodes_to_indices[v]] - D[k, nodes_to_indices[v]]) / (n - k)
60 |                 K[nodes_to_indices[v]] = k
61 | 
62 |         if lambda_ < M[nodes_to_indices[v]]:
63 |             lambda_ = M[nodes_to_indices[v]]
64 |             v_star = nodes_to_indices[v]
65 | 
66 |     # Get critical cycle
67 |     path = []
68 |     actual = v_star
69 |     for i in range(n, -1, -1):
70 |         path.append(actual)
71 |         actual = pi[i, actual]
72 | 
73 |     path.reverse()
74 | 
75 |     return lambda_, path, n - K[v_star]
76 | 
77 | 
78 | def evaluate_throughput(G):
79 |     """
80 |     Evaluate the throughput of a strong weighted digraph. For now the implementation only supports integer delays
81 |     :param G: (nx.DiGraph) strong weighted digraph
82 |     :return: The throughput of G
83 |     """
84 |     lambda_, _, _ = evaluate_cycle_time(G)
85 |     return 1 / lambda_
86 | 


--------------------------------------------------------------------------------
/models/synthetic/linear.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from utils.optim import get_lr_scheduler, get_optimizer
  4 | from ..model import Model
  5 | 
  6 | 
  7 | class LinearLayer(nn.Module):
  8 |     def __init__(self, input_dimension, num_classes):
  9 |         super(LinearLayer, self).__init__()
 10 |         self.input_dimension = input_dimension
 11 |         self.num_classes = num_classes
 12 |         self.fc = nn.Linear(input_dimension, num_classes)
 13 | 
 14 |     def forward(self, x):
 15 |         return self.fc(x)
 16 | 
 17 | 
 18 | class LinearModel(Model):
 19 |     def __init__(self, criterion, metric, device, input_dimension, num_classes,
 20 |                  optimizer_name="adam", lr_scheduler="cyclic", initial_lr=1e-3, epoch_size=1):
 21 |         super(Model, self).__init__()
 22 | 
 23 |         self.criterion = criterion
 24 |         self.metric = metric
 25 |         self.device = device
 26 | 
 27 |         self.net = LinearLayer(input_dimension, num_classes).to(self.device)
 28 | 
 29 |         self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr)
 30 |         self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
 31 | 
 32 |     def fit_iterator_one_epoch(self, iterator):
 33 |         epoch_loss = 0
 34 |         epoch_acc = 0
 35 | 
 36 |         self.net.train()
 37 | 
 38 |         for x, y in iterator:
 39 |             self.optimizer.zero_grad()
 40 | 
 41 |             predictions = self.net(x)
 42 | 
 43 |             loss = self.criterion(predictions, y.float())
 44 | 
 45 |             acc = self.metric(predictions, y)
 46 | 
 47 |             loss.backward()
 48 | 
 49 |             self.optimizer.step()
 50 |             self.lr_scheduler.step()
 51 | 
 52 |             epoch_loss += loss.item()
 53 |             epoch_acc += acc.item()
 54 | 
 55 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)
 56 | 
 57 |     def fit_batch(self, iterator, update=True):
 58 |         self.net.train()
 59 | 
 60 |         x, y = next(iter(iterator))
 61 | 
 62 |         self.optimizer.zero_grad()
 63 | 
 64 |         predictions = self.net(x)
 65 | 
 66 |         loss = self.criterion(predictions, y.float())
 67 | 
 68 |         acc = self.metric(predictions, y)
 69 | 
 70 |         loss.backward()
 71 | 
 72 |         if update:
 73 |             self.optimizer.step()
 74 |             self.lr_scheduler.step()
 75 | 
 76 |         batch_loss = loss.item()
 77 |         batch_acc = acc.item()
 78 | 
 79 |         return batch_loss, batch_acc
 80 | 
 81 |     def evaluate_iterator(self, iterator):
 82 |         epoch_loss = 0
 83 |         epoch_acc = 0
 84 | 
 85 |         self.net.eval()
 86 | 
 87 |         with torch.no_grad():
 88 |             for x, y in iterator:
 89 |                 predictions = self.net(x)
 90 | 
 91 |                 loss = self.criterion(predictions, y.float())
 92 | 
 93 |                 acc = self.metric(predictions, y)
 94 | 
 95 |                 epoch_loss += loss.item()
 96 |                 epoch_acc += acc.item()
 97 | 
 98 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)
 99 | 
100 | 


--------------------------------------------------------------------------------
/models/femnist/cnn.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | from utils.optim import get_optimizer, get_lr_scheduler
  4 | import torch
  5 | from ..model import Model
  6 | 
  7 | 
  8 | class CNN(nn.Module):
  9 |     def __init__(self):
 10 |         super(CNN, self).__init__()
 11 |         self.conv1 = nn.Conv2d(3, 32, 3, 1)
 12 |         self.conv2 = nn.Conv2d(32, 64, 3, 1)
 13 |         self.dropout1 = nn.Dropout2d(0.25)
 14 |         self.dropout2 = nn.Dropout2d(0.5)
 15 |         self.fc1 = nn.Linear(9216, 128)
 16 |         self.fc2 = nn.Linear(128, 62)
 17 | 
 18 |     def forward(self, x):
 19 |         x = self.conv1(x)
 20 |         x = F.relu(x)
 21 |         x = self.conv2(x)
 22 |         x = F.relu(x)
 23 |         x = F.max_pool2d(x, 2)
 24 |         x = self.dropout1(x)
 25 |         x = torch.flatten(x, 1)
 26 |         x = self.fc1(x)
 27 |         x = F.relu(x)
 28 |         x = self.dropout2(x)
 29 |         x = self.fc2(x)
 30 |         return x
 31 | 
 32 | 
 33 | class FemnistCNN(Model):
 34 |     def __init__(self, criterion, metric, device,
 35 |                  optimizer_name="adam", lr_scheduler="sqrt", initial_lr=1e-3, epoch_size=1):
 36 |         super(Model, self).__init__()
 37 | 
 38 |         self.net = CNN().to(device)
 39 |         self.criterion = criterion
 40 |         self.metric = metric
 41 |         self.device = device
 42 | 
 43 |         self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr)
 44 |         self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
 45 | 
 46 |     def fit_iterator_one_epoch(self, iterator):
 47 |         epoch_loss = 0
 48 |         epoch_acc = 0
 49 | 
 50 |         self.net.train()
 51 | 
 52 |         for x, y in iterator:
 53 |             self.optimizer.zero_grad()
 54 | 
 55 |             predictions = self.net(x)
 56 | 
 57 |             loss = self.criterion(predictions, y)
 58 | 
 59 |             acc = self.metric(predictions, y)
 60 | 
 61 |             loss.backward()
 62 | 
 63 |             self.optimizer.step()
 64 |             self.lr_scheduler.step()
 65 | 
 66 |             epoch_loss += loss.item()
 67 |             epoch_acc += acc.item()
 68 | 
 69 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)
 70 | 
 71 |     def fit_batch(self, iterator, update=True):
 72 |         self.net.train()
 73 | 
 74 |         x, y = next(iter(iterator))
 75 | 
 76 |         self.optimizer.zero_grad()
 77 | 
 78 |         predictions = self.net(x)
 79 | 
 80 |         loss = self.criterion(predictions, y)
 81 | 
 82 |         acc = self.metric(predictions, y)
 83 | 
 84 |         loss.backward()
 85 | 
 86 |         if update:
 87 |             self.optimizer.step()
 88 |             self.lr_scheduler.step()
 89 | 
 90 |         batch_loss = loss.item()
 91 |         batch_acc = acc.item()
 92 | 
 93 |         return batch_loss, batch_acc
 94 | 
 95 |     def evaluate_iterator(self, iterator):
 96 |         epoch_loss = 0
 97 |         epoch_acc = 0
 98 | 
 99 |         self.net.eval()
100 | 
101 |         with torch.no_grad():
102 |             for x, y in iterator:
103 |                 predictions = self.net(x)
104 | 
105 |                 loss = self.criterion(predictions, y)
106 | 
107 |                 acc = self.metric(predictions, y)
108 | 
109 |                 epoch_loss += loss.item()
110 |                 epoch_acc += acc.item()
111 | 
112 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)


--------------------------------------------------------------------------------
/utils/args.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from utils.utils import get_network
  3 | 
  4 | 
  5 | def parse_args(args_list=None):
  6 |     parser = argparse.ArgumentParser()
  7 |     parser.add_argument(
  8 |         'experiment',
  9 |         help='name of experiment',
 10 |         type=str)
 11 |     parser.add_argument(
 12 |         "--use_weighted_average",
 13 |         help="if used the weighted average will be optimized, otherwise the average is optimized,"
 14 |              " i,e, all the local functions are treated the same.",
 15 |         action='store_true'
 16 |     )
 17 |     parser.add_argument(
 18 |         '--network_name',
 19 |         help='name of the network;',
 20 |         type=str
 21 |     )
 22 |     parser.add_argument(
 23 |         '--architecture',
 24 |         help='architecture to use, possible: complete, centralized, ring, mst, original and matcha;',
 25 |         default='original'
 26 |     )
 27 |     parser.add_argument(
 28 |         '--communication_budget',
 29 |         type=float,
 30 |         help='used to fix communication budget when architecture is matcha;',
 31 |         default=0.5
 32 |     )
 33 |     parser.add_argument(
 34 |         "--random_ring_proba",
 35 |         type=float,
 36 |         help="the probability of using a random ring at each step; only used if architecture is ring",
 37 |         default=0.5
 38 |     )
 39 |     parser.add_argument(
 40 |         '--parallel',
 41 |         help='if chosen the training well be run in parallel,'
 42 |              'otherwise the training will be run in a sequential fashion;',
 43 |         action='store_true'
 44 |     )
 45 |     parser.add_argument(
 46 |         '--fit_by_epoch',
 47 |         help='if chosen each local step corresponds to one epoch,'
 48 |              ' otherwise each local step corresponds to one gradient step',
 49 |         action='store_true'
 50 |     )
 51 |     parser.add_argument(
 52 |         '--n_rounds',
 53 |         help='number of communication rounds;',
 54 |         type=int,
 55 |         default=1
 56 |     )
 57 |     parser.add_argument(
 58 |         '--bz',
 59 |         help='batch_size;',
 60 |         type=int,
 61 |         default=1
 62 |     )
 63 |     parser.add_argument(
 64 |         '--local_steps',
 65 |         help='number of local steps before communication;',
 66 |         type=int,
 67 |         default=1
 68 |     )
 69 |     parser.add_argument(
 70 |         '--log_freq',
 71 |         help='number of local steps before communication;',
 72 |         type=int,
 73 |         default=1
 74 |     )
 75 |     parser.add_argument(
 76 |         '--device',
 77 |         help='device to use, either cpu or gpu;',
 78 |         type=str,
 79 |         default="cpu"
 80 |     )
 81 |     parser.add_argument(
 82 |         '--optimizer',
 83 |         help='optimizer to be used for the training;',
 84 |         type=str,
 85 |         default="adam"
 86 |     )
 87 |     parser.add_argument(
 88 |         "--lr",
 89 |         type=float,
 90 |         help='learning rate',
 91 |         default=1e-3
 92 |     )
 93 |     parser.add_argument(
 94 |         "--decay",
 95 |         help='learning rate decay scheme to be used;'
 96 |              ' possible are "cyclic", "sqrt", "linear" and "constant"(no learning rate decay);'
 97 |              'default is "cyclic"',
 98 |         type=str,
 99 |         default="constant"
100 |     )
101 | 
102 |     if args_list:
103 |         args = parser.parse_args(args_list)
104 |     else:
105 |         args = parser.parse_args()
106 | 
107 |     network = get_network(args.network_name, args.architecture)
108 |     args.num_workers = network.number_of_nodes()
109 | 
110 |     return args
111 | 


--------------------------------------------------------------------------------
/graph_utils/utils/tsp_christofides.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | from random import randint
  3 | 
  4 | import numpy as np
  5 | import networkx as nx
  6 | 
  7 | from networkx.algorithms.matching import max_weight_matching
  8 | from networkx.algorithms.euler import eulerian_circuit
  9 | 
 10 | 
 11 | def christofides_tsp(graph, starting_node=0):
 12 |     """
 13 |     Christofides TSP algorithm
 14 |     http://www.dtic.mil/dtic/tr/fulltext/u2/a025602.pdf
 15 |     Args:
 16 |         graph: 2d numpy array matrix
 17 |         starting_node: of the TSP
 18 |     Returns:
 19 |         tour given by christofies TSP algorithm
 20 |     Examples:
 21 |         >>> import numpy as np
 22 |         >>> graph = np.array([[  0, 300, 250, 190, 230],
 23 |         >>>                   [300,   0, 230, 330, 150],
 24 |         >>>                   [250, 230,   0, 240, 120],
 25 |         >>>                   [190, 330, 240,   0, 220],
 26 |         >>>                   [230, 150, 120, 220,   0]])
 27 |         >>> christofides_tsp(graph)
 28 |     """
 29 | 
 30 |     mst = minimal_spanning_tree(graph, 'Prim', starting_node=0)
 31 |     odd_degree_nodes = list(_get_odd_degree_vertices(mst))
 32 |     odd_degree_nodes_ix = np.ix_(odd_degree_nodes, odd_degree_nodes)
 33 |     nx_graph = nx.from_numpy_array(-1 * graph[odd_degree_nodes_ix])
 34 |     matching = max_weight_matching(nx_graph, maxcardinality=True)
 35 |     euler_multigraph = nx.MultiGraph(mst)
 36 |     for edge in matching:
 37 |         euler_multigraph.add_edge(odd_degree_nodes[edge[0]], odd_degree_nodes[edge[1]],
 38 |                                   weight=graph[odd_degree_nodes[edge[0]]][odd_degree_nodes[edge[1]]])
 39 |     euler_tour = list(eulerian_circuit(euler_multigraph, source=starting_node))
 40 |     path = list(itertools.chain.from_iterable(euler_tour))
 41 |     return _remove_repeated_vertices(path, starting_node)[:-1]
 42 | 
 43 | 
 44 | def _get_odd_degree_vertices(graph):
 45 |     """
 46 |     Finds all the odd degree vertices in graph
 47 |     Args:
 48 |         graph: 2d np array as adj. matrix
 49 |     Returns:
 50 |     Set of vertices that have odd degree
 51 |     """
 52 |     odd_degree_vertices = set()
 53 |     for index, row in enumerate(graph):
 54 |         if len(np.nonzero(row)[0]) % 2 != 0:
 55 |             odd_degree_vertices.add(index)
 56 |     return odd_degree_vertices
 57 | 
 58 | 
 59 | def _remove_repeated_vertices(path, starting_node):
 60 |     path = list(dict.fromkeys(path).keys())
 61 |     path.append(starting_node)
 62 |     return path
 63 | 
 64 | 
 65 | def minimal_spanning_tree(graph, mode='Prim', starting_node=None):
 66 |     """
 67 |     Args:
 68 |         graph:  weighted adjacency matrix as 2d np.array
 69 |         mode: method for calculating minimal spanning tree
 70 |         starting_node: node number to start construction of minimal spanning tree (Prim)
 71 |     Returns:
 72 |         minimal spanning tree as 2d array
 73 |     """
 74 | 
 75 |     if mode == 'Prim':
 76 |         return _minimal_spanning_tree_prim(graph, starting_node)
 77 | 
 78 | 
 79 | def _minimal_spanning_tree_prim(graph, starting_node):
 80 |     """
 81 |     Args:
 82 |         graph: weighted adj. matrix as 2d np.array
 83 |         starting_node: node number to start construction of minimal spanning tree
 84 |     Returns:
 85 |         minimal spanning tree as 2d array calculted by Prim
 86 |     """
 87 | 
 88 |     node_count = len(graph)
 89 |     all_nodes = [i for i in range(node_count)]
 90 | 
 91 |     if starting_node is None:
 92 |         starting_node = randint(0, node_count-1)
 93 | 
 94 |     unvisited_nodes = all_nodes
 95 |     visited_nodes = [starting_node]
 96 |     unvisited_nodes.remove(starting_node)
 97 |     mst = np.zeros((node_count, node_count))
 98 | 
 99 |     while len(visited_nodes) != node_count:
100 |         selected_subgraph = graph[np.array(visited_nodes)[:, None], np.array(unvisited_nodes)]
101 |         # we mask non-exist edges with -- so it doesn't crash the argmin
102 |         min_edge_index = np.unravel_index(np.ma.masked_equal(selected_subgraph, 0, copy=False).argmin(),
103 |                                           selected_subgraph.shape)
104 |         edge_from = visited_nodes[min_edge_index[0]]
105 |         edge_to = unvisited_nodes[min_edge_index[1]]
106 |         mst[edge_from, edge_to] = graph[edge_from, edge_to]
107 |         mst[edge_to, edge_from] = graph[edge_from, edge_to]
108 |         unvisited_nodes.remove(edge_to)
109 |         visited_nodes.append(edge_to)
110 |     return mst


--------------------------------------------------------------------------------
/data/synthetic/generate_data.py:
--------------------------------------------------------------------------------
  1 | """ From https://github.com/TalwalkarLab/leaf/blob/master/data/synthetic/"""
  2 | import argparse
  3 | import json
  4 | import os
  5 | import numpy as np
  6 | from scipy.special import softmax
  7 | 
  8 | NUM_DIM = 10
  9 | PROB_CLUSTERS = [1.0]
 10 | 
 11 | 
 12 | class SyntheticDataset:
 13 |     def __init__(
 14 |             self,
 15 |             num_classes=2,
 16 |             seed=931231,
 17 |             num_dim=NUM_DIM,
 18 |             prob_clusters=[0.5, 0.5]):
 19 | 
 20 |         np.random.seed(seed)
 21 | 
 22 |         self.num_classes = num_classes
 23 |         self.num_dim = num_dim
 24 |         self.num_clusters = len(prob_clusters)
 25 |         self.prob_clusters = prob_clusters
 26 | 
 27 |         self.side_info_dim = self.num_clusters
 28 | 
 29 |         self.Q = np.random.normal(
 30 |             loc=0.0, scale=1.0, size=(self.num_dim + 1, self.num_classes, self.side_info_dim))
 31 | 
 32 |         self.Sigma = np.zeros((self.num_dim, self.num_dim))
 33 |         for i in range(self.num_dim):
 34 |             self.Sigma[i, i] = (i + 1) ** (-1.2)
 35 | 
 36 |         self.means = self._generate_clusters()
 37 | 
 38 |     def get_task(self, num_samples):
 39 |         cluster_idx = np.random.choice(
 40 |             range(self.num_clusters), size=None, replace=True, p=self.prob_clusters)
 41 |         new_task = self._generate_task(self.means[cluster_idx], cluster_idx, num_samples)
 42 |         return new_task
 43 | 
 44 |     def _generate_clusters(self):
 45 |         means = []
 46 |         for i in range(self.num_clusters):
 47 |             loc = np.random.normal(loc=0, scale=1., size=None)
 48 |             mu = np.random.normal(loc=loc, scale=1., size=self.side_info_dim)
 49 |             means.append(mu)
 50 |         return means
 51 | 
 52 |     def _generate_x(self, num_samples):
 53 |         B = np.random.normal(loc=0.0, scale=1.0, size=None)
 54 |         loc = np.random.normal(loc=B, scale=1.0, size=self.num_dim)
 55 | 
 56 |         samples = np.ones((num_samples, self.num_dim + 1))
 57 |         samples[:, 1:] = np.random.multivariate_normal(
 58 |             mean=loc, cov=self.Sigma, size=num_samples)
 59 | 
 60 |         return samples
 61 | 
 62 |     def _generate_y(self, x, cluster_mean):
 63 |         model_info = np.random.normal(loc=cluster_mean, scale=0.1, size=cluster_mean.shape)
 64 |         w = np.matmul(self.Q, model_info)
 65 | 
 66 |         num_samples = x.shape[0]
 67 |         prob = softmax(np.matmul(x, w) + np.random.normal(loc=0., scale=0.1, size=(num_samples, self.num_classes)),
 68 |                        axis=1)
 69 | 
 70 |         y = np.argmax(prob, axis=1)
 71 |         return y, w, model_info
 72 | 
 73 |     def _generate_task(self, cluster_mean, cluster_id, num_samples):
 74 |         x = self._generate_x(num_samples)
 75 |         y, w, model_info = self._generate_y(x, cluster_mean)
 76 | 
 77 |         # now that we have y, we can remove the bias coeff
 78 |         x = x[:, 1:]
 79 | 
 80 |         return {'x': x, 'y': y, 'w': w, 'model_info': model_info, 'cluster': cluster_id}
 81 | 
 82 | 
 83 | def main():
 84 |     args = parse_args()
 85 |     np.random.seed(args.seed)
 86 | 
 87 |     num_samples = get_num_samples(args.num_workers)
 88 |     dataset = SyntheticDataset(
 89 |         num_classes=args.num_classes, prob_clusters=PROB_CLUSTERS, num_dim=args.dimension, seed=args.seed)
 90 |     tasks = [dataset.get_task(s) for s in num_samples]
 91 |     users, num_samples, user_data = to_leaf_format(tasks)
 92 |     save_json('all_data', 'all_data.json', users, num_samples, user_data, args.num_classes)
 93 | 
 94 | 
 95 | def get_num_samples(num_tasks, min_num_samples=5, max_num_samples=1000):
 96 |     num_samples = np.random.lognormal(3, 2, (num_tasks)).astype(int)
 97 |     num_samples = [min(s + min_num_samples, max_num_samples) for s in num_samples]
 98 |     return num_samples
 99 | 
100 | 
101 | def to_leaf_format(tasks):
102 |     users, num_samples, user_data = [], [], {}
103 | 
104 |     for i, t in enumerate(tasks):
105 |         x, y = t['x'].tolist(), t['y'].tolist()
106 |         u_id = str(i)
107 | 
108 |         users.append(u_id)
109 |         num_samples.append(len(y))
110 |         user_data[u_id] = {'x': x, 'y': y}
111 | 
112 |     return users, num_samples, user_data
113 | 
114 | 
115 | def save_json(json_dir, json_name, users, num_samples, user_data, num_classes):
116 |     if not os.path.exists(json_dir):
117 |         os.makedirs(json_dir)
118 | 
119 |     json_file = {
120 |         'users': users,
121 |         'num_samples': num_samples,
122 |         'user_data': user_data,
123 |         "num_classes": num_classes
124 |     }
125 | 
126 |     with open(os.path.join(json_dir, json_name), 'w') as outfile:
127 |         json.dump(json_file, outfile)
128 | 
129 | 
130 | def parse_args():
131 |     parser = argparse.ArgumentParser()
132 | 
133 |     parser.add_argument(
134 |         '--num_workers',
135 |         help='number of workers;',
136 |         type=int,
137 |         required=True)
138 |     parser.add_argument(
139 |         '--num_classes',
140 |         help='number of classes;',
141 |         type=int,
142 |         required=True)
143 |     parser.add_argument(
144 |         '--dimension',
145 |         help='data dimension;',
146 |         type=int,
147 |         required=True)
148 |     parser.add_argument(
149 |         '--seed',
150 |         help='seed for the random processes;',
151 |         type=int,
152 |         default=931231,
153 |         required=False)
154 |     return parser.parse_args()
155 | 
156 | 
157 | if __name__ == '__main__':
158 |     main()
159 | 


--------------------------------------------------------------------------------
/models/sent140/lstm.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from utils.optim import get_optimizer, get_lr_scheduler
  4 | from ..model import Model
  5 | 
  6 | 
  7 | class LSTM(nn.Module):
  8 |     def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers,
  9 |                  bidirectional, dropout, pad_idx):
 10 |         super().__init__()
 11 | 
 12 |         self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
 13 | 
 14 |         self.lstm = nn.LSTM(embedding_dim,
 15 |                             hidden_dim,
 16 |                             num_layers=n_layers,
 17 |                             bidirectional=bidirectional,
 18 |                             dropout=dropout)
 19 | 
 20 |         self.fc = nn.Linear(hidden_dim * 2, output_dim)
 21 | 
 22 |         self.dropout = nn.Dropout(dropout)
 23 | 
 24 |     def forward(self, text, text_lengths):
 25 |         # text = [sent len, batch size]
 26 |         self.lstm.flatten_parameters()
 27 |         embedded = self.dropout(self.embedding(text))
 28 | 
 29 |         # pack sequence
 30 |         packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)
 31 | 
 32 |         packed_output, (hidden, cell) = self.lstm(packed_embedded)
 33 | 
 34 |         # unpack sequence
 35 |         _, _ = nn.utils.rnn.pad_packed_sequence(packed_output)
 36 | 
 37 |         hidden = self.dropout(torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1))
 38 | 
 39 |         return self.fc(hidden)
 40 | 
 41 | 
 42 | class LSTMSentiment(Model):
 43 |     def __init__(self, iterator, criterion, metric, device, optimizer_name="adam", lr_scheduler="sqrt", initial_lr=1e-3,
 44 |                  epoch_size=1, embedding_dim=100, hidden_dim=256, output_dim=1, n_layers=2, bidirectional=True,
 45 |                  dropout=0.5):
 46 |         """
 47 | 
 48 |         :param iterator:
 49 |         :param criterion:
 50 |         :param metric:
 51 |         :param device:
 52 |         :param optimizer_name:
 53 |         :param lr_scheduler:
 54 |         :param initial_lr:
 55 |         :param embedding_dim:
 56 |         :param hidden_dim:
 57 |         :param output_dim:
 58 |         :param n_layers:
 59 |         :param bidirectional:
 60 |         :param dropout:
 61 |         """
 62 |         super(Model, self).__init__()
 63 | 
 64 |         self.device = device
 65 |         self.criterion = criterion
 66 |         self.metric = metric
 67 | 
 68 |         text_field = iterator.dataset.fields['text']
 69 | 
 70 |         pad_idx = text_field.vocab.stoi[text_field.pad_token]
 71 |         unk_idx = text_field.vocab.stoi[text_field.unk_token]
 72 | 
 73 |         self.net = LSTM(vocab_size=len(text_field.vocab),
 74 |                         embedding_dim=embedding_dim,
 75 |                         hidden_dim=hidden_dim,
 76 |                         output_dim=output_dim,
 77 |                         n_layers=n_layers,
 78 |                         bidirectional=bidirectional,
 79 |                         dropout=dropout,
 80 |                         pad_idx=pad_idx).to(device)
 81 | 
 82 |         # initialize embeddings
 83 |         pretrained_embeddings = text_field.vocab.vectors
 84 |         self.net.embedding.weight.data.copy_(pretrained_embeddings)
 85 | 
 86 |         self.net.embedding.weight.data[unk_idx] = torch.zeros(embedding_dim).to(self.device)
 87 |         self.net.embedding.weight.data[pad_idx] = torch.zeros(embedding_dim).to(self.device)
 88 | 
 89 |         # Freeze embedding
 90 |         self.net.embedding.weight.requires_grad = False
 91 | 
 92 |         self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr)
 93 |         self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
 94 | 
 95 |     def fit_iterator_one_epoch(self, iterator):
 96 |         epoch_loss = 0
 97 |         epoch_acc = 0
 98 | 
 99 |         self.net.train()
100 | 
101 |         for batch in iterator:
102 |             self.optimizer.zero_grad()
103 | 
104 |             text, text_lengths = batch.text
105 | 
106 |             predictions = self.net(text, text_lengths).squeeze(1)
107 | 
108 |             loss = self.criterion(predictions, batch.label)
109 | 
110 |             acc = self.metric(predictions, batch.label)
111 | 
112 |             loss.backward()
113 | 
114 |             self.optimizer.step()
115 | 
116 |             self.lr_scheduler.step()
117 | 
118 |             epoch_loss += loss.item()
119 |             epoch_acc += acc.item()
120 | 
121 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)
122 | 
123 |     def fit_batch(self, iterator, update=True):
124 |         self.net.train()
125 | 
126 |         batch = next(iter(iterator))
127 |         self.optimizer.zero_grad()
128 | 
129 |         text, text_lengths = batch.text
130 | 
131 |         predictions = self.net(text, text_lengths).squeeze(1)
132 | 
133 |         loss = self.criterion(predictions, batch.label)
134 | 
135 |         acc = self.metric(predictions, batch.label)
136 | 
137 |         loss.backward()
138 | 
139 |         if update:
140 |             self.optimizer.step()
141 |             self.lr_scheduler.step()
142 | 
143 |         return loss.item(), acc.item()
144 | 
145 |     def evaluate_iterator(self, iterator):
146 |         epoch_loss = 0
147 |         epoch_acc = 0
148 | 
149 |         self.net.eval()
150 | 
151 |         with torch.no_grad():
152 |             for batch in iterator:
153 |                 text, text_lengths = batch.text
154 | 
155 |                 predictions = self.net(text, text_lengths).squeeze(1)
156 | 
157 |                 loss = self.criterion(predictions, batch.label)
158 | 
159 |                 acc = self.metric(predictions, batch.label)
160 | 
161 |                 epoch_loss += loss.item()
162 |                 epoch_acc += acc.item()
163 | 
164 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)
165 | 


--------------------------------------------------------------------------------
/data/shakespeare/split_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import random
  4 | import time
  5 | 
  6 | 
  7 | def iid_divide(l, g):
  8 |     """
  9 |     divide list l among g groups
 10 |     each group has either int(len(l)/g) or int(len(l)/g)+1 elements
 11 |     returns a list of groups
 12 | 
 13 |     """
 14 |     num_elems = len(l)
 15 |     group_size = int(len(l)/g)
 16 |     num_big_groups = num_elems - g * group_size
 17 |     num_small_groups = g - num_big_groups
 18 |     glist = []
 19 |     for i in range(num_small_groups):
 20 |         glist.append(l[group_size * i : group_size * (i + 1)])
 21 |     bi = group_size*num_small_groups
 22 |     group_size += 1
 23 |     for i in range(num_big_groups):
 24 |         glist.append(l[bi + group_size * i:bi + group_size * (i + 1)])
 25 |     return glist
 26 | 
 27 | 
 28 | parser = argparse.ArgumentParser()
 29 | 
 30 | parser.add_argument('--num_workers',
 31 |                     help=('number of workers/users;'
 32 |                           'default: 1;'),
 33 |                     type=int,
 34 |                     default=1)
 35 | parser.add_argument('--iid',
 36 |                     help='sample iid;',
 37 |                     action="store_true")
 38 | parser.add_argument('--niid',
 39 |                     help="sample niid;",
 40 |                     dest='iid', action='store_false')
 41 | parser.add_argument('--s_frac',
 42 |                     help='fraction of all data to sample; default: 0.1;',
 43 |                     type=float,
 44 |                     default=0.1)
 45 | parser.add_argument('--tr_frac',
 46 |                     help='fraction in training set; default: 0.8;',
 47 |                     type=float,
 48 |                     default=0.8)
 49 | parser.add_argument('--seed',
 50 |                     help='args.seed for random partitioning of test/train data',
 51 |                     type=int,
 52 |                     default=None)
 53 | 
 54 | parser.set_defaults(user=False)
 55 | 
 56 | args = parser.parse_args()
 57 | 
 58 | 
 59 | if __name__ == "__main__":
 60 |     print('------------------------------')
 61 |     print('generating training and test sets')
 62 | 
 63 |     rng_seed = (args.seed if (args.seed is not None and args.seed >= 0) else int(time.time()))
 64 |     rng = random.Random(rng_seed)
 65 | 
 66 |     train_file = os.path.join("train", "train.txt")
 67 |     test_file = os.path.join("test", "test.txt")
 68 | 
 69 |     data_dir = os.path.join('raw_data', 'by_play_and_character')
 70 | 
 71 |     if args.iid:
 72 |         # TO DO: Factorize this part
 73 |         all_lines = []
 74 |         for file_name in os.listdir(data_dir):
 75 |             file_path = os.path.join(data_dir, file_name)
 76 |             with open(file_path, "r") as f:
 77 |                 lines = f.readlines()
 78 |             all_lines += lines
 79 | 
 80 |         tot_num_samples = len(all_lines)
 81 |         num_new_samples = int(args.s_frac * tot_num_samples)
 82 | 
 83 |         indices = [i for i in range(tot_num_samples)]
 84 |         new_indices = rng.sample(indices, num_new_samples)
 85 | 
 86 |         indices_groups = iid_divide(new_indices, args.num_workers)
 87 | 
 88 |         for id_w, worker_indices in enumerate(indices_groups):
 89 |             curr_num_samples = len(worker_indices)
 90 | 
 91 |             num_train_samples = max(1, int(args.tr_frac * curr_num_samples))
 92 |             num_test_samples = curr_num_samples - num_train_samples
 93 | 
 94 |             train_indices = rng.sample(worker_indices, num_train_samples)
 95 |             test_indices = list(set(worker_indices) - set(train_indices))
 96 | 
 97 |             local_train_file = os.path.join("train", "{}.txt".format(id_w))
 98 | 
 99 |             for (file_, indices) in [(train_file, train_indices),
100 |                                      (local_train_file, train_indices),
101 |                                      (test_file, test_indices)]:
102 | 
103 |                 for sample_idx in indices:
104 |                     sample = all_lines[sample_idx]
105 | 
106 |                     with open(file_, "a") as f:
107 |                         f.write(sample)
108 |     else:
109 |         writers = os.listdir(data_dir)
110 | 
111 |         rng.shuffle(writers)
112 |         writers_by_workers = iid_divide(writers, args.num_workers)
113 | 
114 |         for id_w, worker_writers in enumerate(writers_by_workers):
115 |             all_worker_lines = []
116 |             for writer in worker_writers:
117 |                 file_path = os.path.join(data_dir, writer)
118 |                 with open(file_path, "r") as f:
119 |                     lines = f.readlines()
120 | 
121 |                 all_worker_lines += lines
122 | 
123 |             tot_num_samples = len(all_worker_lines)
124 |             num_new_samples = int(args.s_frac * tot_num_samples)
125 | 
126 |             indices = [i for i in range(tot_num_samples)]
127 |             new_indices = rng.sample(indices, num_new_samples)
128 | 
129 |             new_worker_lines = [all_worker_lines[i] for i in new_indices]
130 | 
131 |             num_train_samples = max(1, int(args.tr_frac * num_new_samples))
132 |             num_test_samples = num_new_samples - num_train_samples
133 | 
134 |             train_indices = rng.sample(new_indices, num_train_samples)
135 |             test_indices = list(set(new_indices) - set(train_indices))
136 | 
137 |             local_train_file = os.path.join("train", "{}.txt".format(id_w))
138 | 
139 |             for (file_, indices) in [(train_file, train_indices),
140 |                                      (local_train_file, train_indices),
141 |                                      (test_file, test_indices)]:
142 | 
143 |                 for sample_idx in indices:
144 |                     sample = all_worker_lines[sample_idx]
145 | 
146 |                     with open(file_, "a") as f:
147 |                         f.write(sample)
148 | 


--------------------------------------------------------------------------------
/graph_utils/time_simulator.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import networkx as nx
  4 | import numpy as np
  5 | 
  6 | import geopy.distance
  7 | from geopy.geocoders import Nominatim
  8 | 
  9 | geolocator = Nominatim(user_agent="delay", timeout=20)
 10 | 
 11 | 
 12 | def get_zoo_topology(file_path,
 13 |                      bandwidth=1e9,
 14 |                      upload_capacity_at_edge=35 * 1e6,
 15 |                      download_capacity_at_edge=144 * 1e6):
 16 |     """
 17 |     Read zoo_topology data into nx.DiGraph();
 18 |      in the output graph each edge has two information: "capacity" and "distance";
 19 |      each node has two information: "upload capacity" and "download capacity";
 20 |     :param file_path : path to .gml file with topology information
 21 |     :param bandwidth: (float) represent links capacity,
 22 |                      used when information not available in .gml file
 23 |     :param upload_capacity_at_edge: https://en.wikipedia.org/wiki/Bit_rate for information
 24 |     :param download_capacity_at_edge: https://en.wikipedia.org/wiki/Bit_rate for information
 25 |     :return:  G_z (nx.DiGraph)
 26 |     """
 27 | 
 28 |     network_data = nx.read_gml(file_path)
 29 | 
 30 |     G_z = nx.Graph()
 31 |     G_z.add_nodes_from(network_data)
 32 | 
 33 |     # add nodes capacity
 34 |     nx.set_node_attributes(G_z, upload_capacity_at_edge * 1e-3, 'upload_capacity')
 35 |     nx.set_node_attributes(G_z, download_capacity_at_edge * 1e-3, "download_capacity")
 36 | 
 37 |     # add edges data
 38 |     for u, v, data in network_data.edges.data():
 39 |         # get distance
 40 |         try:
 41 |             distance = data["distance"]
 42 | 
 43 |         except AttributeError:
 44 |             try:
 45 |                 coords_1 = (network_data.nodes(data=True)[u]["Latitude"],
 46 |                             network_data.nodes(data=True)[u]["Longitude"])
 47 | 
 48 |                 coords_2 = (network_data.nodes(data=True)[v]["Latitude"],
 49 |                             network_data.nodes(data=True)[v]["Longitude"])
 50 | 
 51 |             except KeyError:
 52 |                 time.sleep(1.2)  # To avoid Service time out Error
 53 | 
 54 |                 geo = geolocator.geocode(u, timeout=20)
 55 | 
 56 |                 coords_1 = (geo.latitude, geo.longitude)
 57 | 
 58 |                 time.sleep(1.2)  # To avoid Service time out Error
 59 | 
 60 |                 geo = geolocator.geocode(v, timeout=20)
 61 | 
 62 |                 coords_2 = (geo.latitude, geo.longitude)
 63 | 
 64 |             distance = geopy.distance.distance(coords_1, coords_2).km
 65 | 
 66 |         # add_edge
 67 |         G_z.add_edge(u, v, capacity=bandwidth * 1e-3, distance=distance)
 68 | 
 69 |     return G_z
 70 | 
 71 | 
 72 | def initialize_delays(underlay, overlay, model_size):
 73 |     """
 74 |     compute delays between nodes ignoring download congestion effect
 75 |     :param underlay: (nx.Graph())
 76 |     :param overlay: (nx.Graph())
 77 |     :param model_size: message_size in bits, see https://keras.io/applications/ for examples
 78 |     :return: nxGraph()
 79 |     """
 80 |     for u, v, data in overlay.edges(data=True):
 81 |         overlay.edges[u, v]["delay"] = overlay.edges[u, v]["weight"]
 82 | 
 83 |     return overlay
 84 | 
 85 | 
 86 | def init_iteration_end_time(overlay, computation_time=0):
 87 |     """
 88 | 
 89 |     :param overlay:
 90 |     :param computation_time:
 91 |     :return:
 92 |     """
 93 |     nx.set_node_attributes(overlay, computation_time, "end_time")
 94 |     return overlay
 95 | 
 96 | 
 97 | def get_iteration_end_time(underlay, overlay, model_size, computation_time):
 98 |     """
 99 |     Compute the end times of next iteration having the  end times for current iteration.
100 |     :param underlay:
101 |     :param overlay:
102 |     :param model_size:
103 |     :param computation_time
104 |     :return:
105 |     """
106 |     out_degrees = dict(overlay.out_degree())
107 |     for i, j in overlay.edges:
108 |         overlay.edges[i, j]["t"] = overlay.edges[i, j]["delay"] + overlay.nodes[i]["end_time"]
109 | 
110 |     def get_edge_time(e):
111 |         return overlay.edges[e[0], e[1]]["t"]
112 | 
113 |     for j in overlay.nodes:
114 |         overlay.nodes[j]["end_time"] = 0
115 | 
116 |         # get all the input edges to "j" sorted by t_{ij}
117 |         edges = []
118 |         for i in overlay.predecessors(j):
119 |             edges.append((i, j))
120 | 
121 |         if len(edges) > 0:
122 |             edges.sort(key=get_edge_time)
123 | 
124 |             t_prev = get_edge_time(edges[0]) + model_size / underlay.nodes[j]["download_capacity"]
125 | 
126 |             for edge in edges[1:]:
127 |                 if get_edge_time(edge) <= t_prev + model_size / underlay.nodes[j]["download_capacity"]:
128 |                     t_prev = t_prev + model_size / underlay.nodes[j]["download_capacity"]
129 |                 else:
130 |                     t_prev = get_edge_time(edge)
131 | 
132 |         else:
133 |             t_prev = 0
134 | 
135 |         overlay.nodes[j]["end_time"] = t_prev + computation_time + \
136 |                                        (model_size * out_degrees[j]) / underlay.nodes[j]["upload_capacity"]
137 | 
138 |     return overlay
139 | 
140 | 
141 | def simulate_network(underlay, overlay, n_iterations, model_size=1e8, computation_time=0):
142 |     """
143 | 
144 |     :param underlay:
145 |     :param overlay:
146 |     :param n_iterations:
147 |     :param model_size:
148 |     :param computation_time
149 |     :return:
150 |     """
151 |     time_evolution = np.zeros((overlay.number_of_nodes(), n_iterations))
152 | 
153 |     overlay = initialize_delays(underlay, overlay, model_size)
154 |     overlay = init_iteration_end_time(overlay, computation_time)
155 | 
156 |     for iteration in range(n_iterations):
157 |         overlay = get_iteration_end_time(underlay, overlay, model_size, computation_time)
158 |         for ii, (_, end_time) in enumerate(overlay.nodes.data("end_time")):
159 |             time_evolution[ii, iteration] = end_time
160 | 
161 |     return time_evolution
162 | 


--------------------------------------------------------------------------------
/models/shakespeare/gru.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from utils.optim import get_optimizer, get_lr_scheduler
  4 | from torch.autograd import Variable
  5 | import string
  6 | from ..model import Model
  7 | 
  8 | 
  9 | class RNN(nn.Module):
 10 |     def __init__(self, input_size, embed_size, hidden_size, output_size, n_layers):
 11 |         super(RNN, self).__init__()
 12 |         self.input_size = input_size
 13 |         self.hidden_size = hidden_size
 14 |         self.embed_size = embed_size
 15 |         self.output_size = output_size
 16 |         self.n_layers = n_layers
 17 | 
 18 |         self.encoder = nn.Embedding(input_size, embed_size)
 19 |         self.gru = nn.GRU(embed_size, hidden_size, n_layers)
 20 |         self.decoder = nn.Linear(hidden_size, output_size)
 21 | 
 22 |     def forward(self, input_, hidden):
 23 |         self.gru.flatten_parameters()
 24 |         batch_size = input_.size(0)
 25 |         encoded = self.encoder(input_)
 26 |         output, hidden = self.gru(encoded.view(1, batch_size, -1), hidden)
 27 |         output = self.decoder(output.view(batch_size, -1))
 28 |         return output, hidden
 29 | 
 30 |     def init_hidden(self, batch_size):
 31 |         return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))
 32 | 
 33 | 
 34 | class NextCharDecoder(Model):
 35 |     def __init__(self, criterion, metric, device,
 36 |                  optimizer_name="adam", lr_scheduler="sqrt", initial_lr=1e-3, epoch_size=1,
 37 |                  embed_size=16, hidden_size=256, n_layers=2):
 38 |         super(Model, self).__init__()
 39 | 
 40 |         vocab_size = len(string.printable)
 41 |         self.net = RNN(vocab_size, embed_size, hidden_size, vocab_size, n_layers).to(device)
 42 |         self.criterion = criterion
 43 |         self.metric = metric
 44 |         self.device = device
 45 | 
 46 |         self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr)
 47 |         self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
 48 | 
 49 |     def fit_iterator_one_epoch(self, iterator):
 50 |         self.net.train()
 51 | 
 52 |         epoch_loss = 0
 53 |         epoch_acc = 0
 54 | 
 55 |         for inp, target in iterator:
 56 | 
 57 |             inp = inp.to(self.device)
 58 |             target = target.to(self.device)
 59 | 
 60 |             hidden = self.net.init_hidden(inp.size(0)).to(self.device)
 61 |             self.optimizer.zero_grad()
 62 | 
 63 |             loss = 0
 64 |             acc = 0
 65 | 
 66 |             for c in range(iterator.dataset.chunk_len):
 67 |                 output, hidden = self.net(inp[:, c], hidden)
 68 |                 loss += self.criterion(output.view(inp.size(0), -1), target[:, c])
 69 |                 acc += self.metric(output, target[:, c]).item()
 70 | 
 71 |             loss /= iterator.dataset.chunk_len
 72 |             acc /= iterator.dataset.chunk_len
 73 | 
 74 |             loss.backward()
 75 | 
 76 |             self.optimizer.step()
 77 |             self.lr_scheduler.step()
 78 | 
 79 |             epoch_loss += loss.item()
 80 |             epoch_acc += acc
 81 | 
 82 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)
 83 | 
 84 |     def fit_batch(self, iterator, update=True):
 85 |         self.net.train()
 86 | 
 87 |         inp, target = next(iter(iterator))
 88 |         inp = inp.to(self.device)
 89 |         target = target.to(self.device)
 90 | 
 91 |         hidden = self.net.init_hidden(inp.size(0)).to(self.device)
 92 |         self.optimizer.zero_grad()
 93 | 
 94 |         loss = 0
 95 |         acc = 0
 96 | 
 97 |         for c in range(iterator.dataset.chunk_len):
 98 |             output, hidden = self.net(inp[:, c], hidden)
 99 |             loss += self.criterion(output.view(inp.size(0), -1), target[:, c])
100 |             acc += self.metric(output, target[:, c]).item()
101 | 
102 |         loss /= iterator.dataset.chunk_len
103 |         acc /= iterator.dataset.chunk_len
104 | 
105 |         loss.backward()
106 | 
107 |         if update:
108 |             self.optimizer.step()
109 |             self.lr_scheduler.step()
110 | 
111 |         return loss.item(), acc
112 | 
113 |     def evaluate_iterator(self, iterator):
114 |         self.net.eval()
115 | 
116 |         epoch_loss = 0
117 |         epoch_acc = 0
118 | 
119 |         for inp, target in iterator:
120 | 
121 |             inp = inp.to(self.device)
122 |             target = target.to(self.device)
123 | 
124 |             hidden = self.net.init_hidden(inp.size(0)).to(self.device)
125 | 
126 |             loss = 0
127 |             acc = 0
128 |             for c in range(iterator.dataset.chunk_len):
129 |                 output, hidden = self.net(inp[:, c], hidden)
130 |                 loss += self.criterion(output.view(inp.size(0), -1), target[:, c])
131 |                 acc += self.metric(output, target[:, c]).item()
132 | 
133 |             loss /= iterator.dataset.chunk_len
134 |             acc /= iterator.dataset.chunk_len
135 | 
136 |             epoch_loss += loss.item()
137 |             epoch_acc += acc
138 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)
139 | 
140 |     def generate(self, prime_str="Wh", predict_len=200, temperature=0.8):
141 |         all_characters = string.printable
142 |         hidden = self.net.init_hidden(1).to(self.device)
143 | 
144 |         prime_input = torch.zeros(1, len(prime_str)).long().to(self.device)
145 |         for c in range(len(prime_str)):
146 |             prime_input[0, c] = all_characters.index(prime_str[c])
147 | 
148 |         predicted = prime_str
149 | 
150 |         for p in range(len(prime_str) - 1):
151 |             _, hidden = self.net(prime_input[:, p], hidden)
152 | 
153 |         inp = prime_input[:, -1]
154 | 
155 |         for p in range(predict_len):
156 |             output, hidden = self.net(inp, hidden)
157 | 
158 |             output_dist = output.data.view(-1).div(temperature).exp()
159 |             top_i = torch.multinomial(output_dist, 1)[0]
160 | 
161 |             predicted_char = all_characters[top_i]
162 |             predicted += predicted_char
163 | 
164 |             inp = torch.zeros(1, len(predicted_char)).long().to(self.device)
165 |             for c in range(len(predicted_char)):
166 |                 inp[0, c] = all_characters.index(predicted_char[c])
167 | 
168 |         return predicted
169 | 


--------------------------------------------------------------------------------
/data/inaturalist/split_data.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | import pickle
  5 | import time
  6 | import random
  7 | from collections import Counter
  8 | 
  9 | import networkx as nx
 10 | import numpy as np
 11 | 
 12 | import geopy.distance
 13 | from geopy.geocoders import Nominatim
 14 | 
 15 | 
 16 | class FileException(FileNotFoundError):
 17 |     def __init__(self, message):
 18 |         super().__init__(message)
 19 | 
 20 | 
 21 | parser = argparse.ArgumentParser()
 22 | 
 23 | parser.add_argument('--network',
 24 |                     help="name of the network to use, should be present in /graph_utils/data; default: amazon_us",
 25 |                     type=str,
 26 |                     default="amazon_us")
 27 | parser.add_argument('--num_categories',
 28 |                     help="number of classes to include, default: 80",
 29 |                     type=int,
 30 |                     default="80")
 31 | parser.add_argument('--s_frac',
 32 |                     help='fraction of all data to sample; default: 0.1;',
 33 |                     type=float,
 34 |                     default=1)
 35 | parser.add_argument('--tr_frac',
 36 |                     help='fraction in training set; default: 0.8;',
 37 |                     type=float,
 38 |                     default=0.9)
 39 | parser.add_argument('--seed',
 40 |                     help='args.seed for random partitioning of test/train data',
 41 |                     type=int,
 42 |                     default=None)
 43 | 
 44 | args = parser.parse_args()
 45 | 
 46 | 
 47 | if __name__ == "__main__":
 48 |     network_path = os.path.abspath(os.path.join(os.getcwd(), "..", "..", "graph_utils/data", args.network + ".gml"))
 49 | 
 50 |     if not os.path.isfile(network_path):
 51 |         raise FileException("The network with name {} is not found!".format(network_path))
 52 | 
 53 |     rng_seed = (args.seed if (args.seed is not None and args.seed >= 0) else int(time.time()))
 54 |     rng = random.Random(rng_seed)
 55 |     np.random.seed(rng_seed)
 56 | 
 57 |     # Get workers locations
 58 |     network_path = os.path.abspath(os.path.join(os.getcwd(), "..", "..", "graph_utils/data", args.network + ".gml"))
 59 |     workers_network = nx.read_gml(network_path, label="label")
 60 |     nodes_locs = []
 61 |     geolocator = Nominatim(user_agent="delay", timeout=20)
 62 |     for node in workers_network.nodes():
 63 |         time.sleep(1.0)  # To avoid Service time out Error
 64 |         geo = geolocator.geocode(node, timeout=20)
 65 |         nodes_locs.append((geo.latitude, geo.longitude))
 66 | 
 67 |     # Get the information for images and locations
 68 |     with open(os.path.join("raw_data", "train2018_locations.json")) as f:
 69 |         train_imgs_locations = json.load(f)
 70 | 
 71 |     with open(os.path.join("raw_data", "val2018_locations.json")) as f:
 72 |         val_imgs_locations = json.load(f)
 73 | 
 74 |     with open(os.path.join("raw_data", "train2018.json")) as f:
 75 |         train_images_data = json.load(f)
 76 | 
 77 |     with open(os.path.join("raw_data", "val2018.json")) as f:
 78 |         val_images_data = json.load(f)
 79 | 
 80 |     all_data = dict()
 81 |     for images_data in [train_images_data, val_images_data]:
 82 |         for img, annotation in zip(images_data["images"], images_data["annotations"]):
 83 |             img_id = img["id"]
 84 |             img_path = ["raw_data/"] + img["file_name"].split("/")[1:]
 85 |             img_path = "/".join(img_path)
 86 |             category_id = annotation["category_id"]
 87 | 
 88 |             all_data[img_id] = {"path": img_path, "class": category_id}
 89 | 
 90 |     for imgs_locations in [train_imgs_locations, val_imgs_locations]:
 91 |         for location in imgs_locations:
 92 |             img_id = location["id"]
 93 |             all_data[img_id]["lat"] = location["lat"]
 94 |             all_data[img_id]["lon"] = location["lon"]
 95 | 
 96 |     # Get most common categories
 97 |     all_categories = []
 98 |     for img_id in all_data:
 99 |         all_categories.append(all_data[img_id]['class'])
100 | 
101 |     c = Counter(all_categories)
102 |     most_common_categories = c.most_common(args.num_categories)
103 |     most_common_categories = [i for (i, j) in most_common_categories]
104 | 
105 |     relabel_categories = {category: idx for idx, category in enumerate(most_common_categories)}
106 |     most_common_categories = set(most_common_categories)
107 | 
108 |     # Assign images to closest workers
109 |     imgs_by_workers = {worker_id: [] for worker_id in range(workers_network.number_of_nodes())}
110 | 
111 |     for img_id in all_data:
112 |         category = all_data[img_id]['class']
113 |         if category in most_common_categories:
114 |             # Get closest worker to node
115 |             coord_img = (all_data[img_id]['lat'], all_data[img_id]['lon'])
116 |             distances = np.array([geopy.distance.distance(coord_img, coord_node).km for coord_node in nodes_locs])
117 |             worker_id = np.argmin(distances)
118 | 
119 |             img_data = (all_data[img_id]["path"], relabel_categories[category])
120 | 
121 |             imgs_by_workers[worker_id].append(img_data)
122 | 
123 |     # Split data to train and test
124 |     train_data = []
125 |     test_data = []
126 | 
127 |     for worker_id in imgs_by_workers.keys():
128 |         all_worker_data = imgs_by_workers[worker_id]
129 | 
130 |         tot_num_samples = len(all_worker_data)
131 |         num_new_samples = int(args.s_frac * tot_num_samples)
132 | 
133 |         indices = [i for i in range(tot_num_samples)]
134 |         new_indices = rng.sample(indices, num_new_samples)
135 | 
136 |         num_train_samples = max(1, int(args.tr_frac * num_new_samples))
137 |         num_test_samples = num_new_samples - num_train_samples
138 | 
139 |         train_indices = rng.sample(new_indices, num_train_samples)
140 |         test_indices = list(set(new_indices) - set(train_indices))
141 | 
142 |         worker_data = [all_worker_data[ii] for ii in train_indices]
143 |         train_data += [all_worker_data[ii] for ii in train_indices]
144 |         test_data += [all_worker_data[ii] for ii in test_indices]
145 | 
146 |         with open('train/{}.pkl'.format(worker_id), 'wb') as f:
147 |             pickle.dump(worker_data, f, pickle.HIGHEST_PROTOCOL)
148 | 
149 |     with open('train/train.pkl', 'wb') as f:
150 |         pickle.dump(train_data, f, pickle.HIGHEST_PROTOCOL)
151 | 
152 |     with open('test/test.pkl', 'wb') as f:
153 |         pickle.dump(test_data, f, pickle.HIGHEST_PROTOCOL)
154 | 


--------------------------------------------------------------------------------
/graph_utils/utils/matcha.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import csv
  3 | 
  4 | import cvxpy as cp
  5 | import numpy as np
  6 | import networkx as nx
  7 | 
  8 | from .matching_decomposition import matching_decomposition
  9 | 
 10 | 
 11 | class RandomTopologyGenerator(object):
 12 |     """
 13 |     Attributes:
 14 |         - laplacian_matrices: List of numpy arrays; each array represents the laplacian matrix of a matching;
 15 |         - communication_budget: Constraint controlling the sum of the weights,
 16 |          and equivalently controlling the expected communication time;
 17 |         - path_to_history_file: path to .csv file used to save the history of selected matching at each step
 18 |         - activation_probas: np.array of the same size as "laplacian_matrices";
 19 |         - current_matching_activations: list of  booleans, each of them represent if a matching is used;
 20 |         - matching_list: list of nx.Graph() objects;
 21 |         - alpha: float to be use in generating mixing matrix
 22 |     """
 23 |     def __init__(self, network, communication_budget, network_save_path=None, path_to_history_file=None):
 24 |         self.network = network
 25 |         self.communication_budget = communication_budget
 26 |         self.path_to_history_file = path_to_history_file
 27 |         self.network_save_path = network_save_path
 28 | 
 29 |         # eliminate self loops
 30 |         self.network.remove_edges_from(nx.selfloop_edges(self.network))
 31 | 
 32 |         self.matching_list, self.laplacian_matrices = matching_decomposition(self.network)
 33 | 
 34 |         self.number_workers = self.laplacian_matrices[0].shape[0]
 35 |         self.number_matching = len(self.laplacian_matrices)
 36 | 
 37 |         # Initialize generator parameters
 38 |         self.activation_probas = self.get_matching_activation_probabilities()
 39 |         self.activation_probas = np.clip(self.activation_probas, 0., 1.)
 40 | 
 41 |         self.alpha, self.spectral_norm = self.get_mixing_matrix_parameter()
 42 | 
 43 |         # Initialize
 44 |         self.current_step = -1
 45 |         self.current_matching_activations = np.ones(self.number_workers)
 46 |         self.current_topology = self.network
 47 | 
 48 |         if self.network_save_path:
 49 |             nx.write_gml(self.network, self.network_save_path)
 50 | 
 51 |     def get_matching_activation_probabilities(self):
 52 |         """
 53 |         Computes a set of activation probabilities that maximize the connectivity of the expected graph
 54 |          given a communication time constraint;
 55 |         For given Laplacian matrices, it computes optimal weights to sum them, in order to maximize
 56 |          the second largest eigenvalue of their weighted sum;
 57 |         See https://arxiv.org/pdf/1905.09435.pdf  (Formula 5) for details;
 58 |          and equivalently controlling the expected communication time;
 59 |         :return: np.array of the same size as "laplacian_matrices"; each entry represents the probability
 60 |          of activating a sub-graph;
 61 |         """
 62 |         p = cp.Variable(self.number_matching)
 63 |         gamma = cp.Variable()
 64 |         beta = cp.Variable()
 65 |         constraints = [p <= 1, p >= 0,
 66 |                        p.T @ np.ones(self.number_matching) <= self.communication_budget * self.number_matching,
 67 |                        gamma * np.eye(self.number_workers) - beta * np.ones((self.number_workers, self.number_workers))
 68 |                        << cp.sum([p[i] * self.laplacian_matrices[i] for i in range(self.number_matching)])]
 69 |         objective = cp.Maximize(gamma)
 70 |         problem = cp.Problem(objective, constraints)
 71 | 
 72 |         problem.solve()
 73 | 
 74 |         return p.value
 75 | 
 76 |     def get_mixing_matrix_parameter(self):
 77 |         """
 78 |         Computes optimal equal weight mixing matrix parameter;
 79 |          i.e. computes alpha in order to optimize the spectral gap of the mixing matrix W, where
 80 |          W = I - alpha * L_bar, with being identity matrix and L_bar is the expected Laplacian matrix;
 81 |          See https://arxiv.org/pdf/1905.09435.pdf  (Formula 6 and 7) for details;
 82 |          each entry represents the probability of activating a sub-graph;
 83 |         :return: alpha (float)
 84 |         """
 85 |         L_bar = np.zeros((self.number_workers, self.number_workers))
 86 |         L_tilde = np.zeros((self.number_workers, self.number_workers))
 87 | 
 88 |         for idx in range(self.number_matching):
 89 |             L_bar += self.activation_probas[idx] * self.laplacian_matrices[idx]
 90 |             L_tilde += self.activation_probas[idx] * (1 - self.activation_probas[idx]) * self.laplacian_matrices[idx]
 91 | 
 92 |         rho = cp.Variable()
 93 |         alpha = cp.Variable()
 94 |         beta = cp.Variable()
 95 | 
 96 |         objective = cp.Minimize(rho)
 97 | 
 98 |         constraints = [alpha ** 2 - beta <= 0,
 99 |                        np.eye(self.number_workers) - 2 * alpha * L_bar + beta * (L_bar @ L_bar + 2 * L_tilde)
100 |                        - (1 / self.number_workers) * np.ones((self.number_workers, self.number_workers))
101 |                        << rho * np.eye(self.number_workers)]
102 | 
103 |         prob = cp.Problem(objective, constraints)
104 |         prob.solve()
105 | 
106 |         return alpha.value, rho.value
107 | 
108 |     def step(self):
109 |         """
110 |          Generating random topology at any iteration: given activation probabilities, generates an independent
111 |           Bernoulli random variable Bj for each matching  in "matching_list",
112 |            the activated topology is the concatenation of the activated matching.
113 |             The mixing matrix is then computed as W = I - alpha * L, where L is the Laplacian matrix
114 |             of the activated topology;
115 |          """
116 |         self.current_topology = nx.Graph()
117 |         laplacian_matrix = np.zeros((self.number_workers, self.number_workers))
118 | 
119 |         self.current_matching_activations = np.random.binomial(n=1, p=self.activation_probas)
120 |         while self.current_matching_activations.sum() == 0:
121 |             self.current_matching_activations = np.random.binomial(n=1, p=self.activation_probas)
122 | 
123 |         for idx, matching_activation in enumerate(self.current_matching_activations):
124 |             if matching_activation:
125 |                 self.current_topology = nx.compose(self.current_topology, self.matching_list[idx])
126 |                 laplacian_matrix += self.laplacian_matrices[idx]
127 | 
128 |         mixing_matrix = np.eye(self.number_workers) - self.alpha * laplacian_matrix
129 |                 
130 |         self.current_topology = nx.from_numpy_matrix(mixing_matrix)
131 | 
132 |         self.current_step += 1
133 | 
134 |         if self.path_to_history_file:
135 |             with open(self.path_to_history_file, "a") as csvfile:
136 |                 writer = csv.writer(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL)
137 |                 writer.writerow(self.current_matching_activations.tolist())
138 | 


--------------------------------------------------------------------------------
/data/sent140/split_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import csv
  3 | import argparse
  4 | import json
  5 | import random
  6 | import time
  7 | import numpy as np
  8 | 
  9 | 
 10 | def iid_divide(l, g):
 11 |     """
 12 |     divide list l among g groups
 13 |     each group has either int(len(l)/g) or int(len(l)/g)+1 elements
 14 |     returns a list of groups
 15 | 
 16 |     """
 17 |     num_elems = len(l)
 18 |     group_size = int(len(l)/g)
 19 |     num_big_groups = num_elems - g * group_size
 20 |     num_small_groups = g - num_big_groups
 21 |     glist = []
 22 |     for i in range(num_small_groups):
 23 |         glist.append(l[group_size * i : group_size * (i + 1)])
 24 |     bi = group_size*num_small_groups
 25 |     group_size += 1
 26 |     for i in range(num_big_groups):
 27 |         glist.append(l[bi + group_size * i:bi + group_size * (i + 1)])
 28 |     return glist
 29 | 
 30 | 
 31 | parser = argparse.ArgumentParser()
 32 | 
 33 | parser.add_argument('--num_workers',
 34 |                     help=('number of workers/users;'
 35 |                           'default: 1;'),
 36 |                     type=int,
 37 |                     default=1)
 38 | parser.add_argument('--iid',
 39 |                     help='sample iid;',
 40 |                     action="store_true")
 41 | parser.add_argument('--niid',
 42 |                     help="sample niid;",
 43 |                     dest='iid', action='store_false')
 44 | parser.add_argument('--s_frac',
 45 |                     help='fraction of all data to sample; default: 0.1;',
 46 |                     type=float,
 47 |                     default=0.01)
 48 | parser.add_argument('--tr_frac',
 49 |                     help='fraction in training set; default: 0.8;',
 50 |                     type=float,
 51 |                     default=0.8)
 52 | parser.add_argument('--seed',
 53 |                     help='args.seed for random partitioning of test/train data',
 54 |                     type=int,
 55 |                     default=None)
 56 | 
 57 | parser.set_defaults(user=False)
 58 | 
 59 | args = parser.parse_args()
 60 | 
 61 | if __name__ == "__main__":
 62 |     print('------------------------------')
 63 |     print('generating training and test sets')
 64 | 
 65 |     rng_seed = (args.seed if (args.seed is not None and args.seed >= 0) else int(time.time()))
 66 |     rng = random.Random(rng_seed)
 67 |     np.random.seed(rng_seed)
 68 | 
 69 |     train_file = os.path.join("train", "train.json")
 70 |     test_file = os.path.join("test", "test.json")
 71 | 
 72 |     data_dir = os.path.join('raw_data', 'all_data.csv')
 73 |     with open(data_dir, 'rt', encoding='ISO-8859-1') as f:
 74 |         reader = csv.reader(f)
 75 |         data = list(reader)
 76 | 
 77 |     data = sorted(data, key=lambda x: x[4])
 78 | 
 79 |     if args.iid:
 80 |         tot_num_samples = len(data)
 81 |         num_new_samples = int(args.s_frac * tot_num_samples)
 82 | 
 83 |         indices = [i for i in range(tot_num_samples)]
 84 |         new_indices = rng.sample(indices, num_new_samples)
 85 | 
 86 |         indices_groups = iid_divide(new_indices, args.num_workers)
 87 | 
 88 |         for id_w, worker_indices in enumerate(indices_groups):
 89 |             curr_num_samples = len(worker_indices)
 90 | 
 91 |             num_train_samples = max(1, int(args.tr_frac * curr_num_samples))
 92 |             num_test_samples = curr_num_samples - num_train_samples
 93 | 
 94 |             train_indices = rng.sample(worker_indices, num_train_samples)
 95 |             test_indices = list(set(worker_indices) - set(train_indices))
 96 | 
 97 |             local_train_file = os.path.join("train", "{}.json".format(id_w))
 98 | 
 99 |             for (file_, indices) in [(local_train_file, train_indices),
100 |                                      (train_file, train_indices),
101 |                                      (test_file, test_indices)]:
102 | 
103 |                 for sample_idx in indices:
104 |                     sample = data[sample_idx]
105 |                     row = dict()
106 | 
107 |                     row['idx'] = sample[1]
108 |                     row["time"] = sample[2]
109 |                     row['query'] = sample[3]
110 |                     row["user"] = sample[4]
111 |                     row["text"] = sample[5]
112 |                     row["label"] = 1 if sample[0] == "4" else 0
113 | 
114 |                     with open(file_, "a") as f:
115 |                         json.dump(row, f)
116 |                         f.write("\n")
117 | 
118 |     else:
119 |         all_writers = set()
120 | 
121 |         for i in range(len(data)):
122 |             row = data[i]
123 |             all_writers.add(row[4])
124 | 
125 |         all_writers = list(all_writers)
126 | 
127 |         data_by_writers = {k: [] for k in all_writers}
128 | 
129 |         for i in range(len(data)):
130 |             row = data[i]
131 |             data_by_writers[row[4]].append(row)
132 | 
133 |         num_writers_by_user = np.random.lognormal(5, 1.5, args.num_workers) + 5
134 |         num_writers_by_user *= (len(all_writers) / num_writers_by_user.sum())
135 |         num_samples = np.floor(num_writers_by_user).astype(np.int64)
136 | 
137 |         writers_by_workers = []
138 |         current_idx = 0
139 |         for worker_id in range(args.num_workers):
140 |             writers_by_workers.append(all_writers[current_idx: current_idx + num_samples[worker_id]])
141 |             current_idx = num_samples[worker_id]
142 | 
143 |         for id_w, writers in enumerate(writers_by_workers):
144 |             all_worker_data = []
145 |             for writer in writers:
146 |                 all_worker_data += data_by_writers[writer]
147 | 
148 |             tot_num_samples = len(all_worker_data)
149 |             curr_num_samples = int(args.s_frac * tot_num_samples)
150 | 
151 |             indices = [i for i in range(tot_num_samples)]
152 |             worker_indices = rng.sample(indices, curr_num_samples)
153 | 
154 |             num_train_samples = max(1, int(args.tr_frac * curr_num_samples))
155 |             num_test_samples = curr_num_samples - num_train_samples
156 | 
157 |             train_indices = rng.sample(worker_indices, num_train_samples)
158 |             test_indices = list(set(worker_indices) - set(train_indices))
159 | 
160 |             local_train_file = os.path.join("train", "{}.json".format(id_w))
161 | 
162 |             for (file_, indices) in [(local_train_file, train_indices),
163 |                                      (train_file, train_indices),
164 |                                      (test_file, test_indices)]:
165 | 
166 |                 for sample_idx in indices:
167 |                     sample = data[sample_idx]
168 |                     row = dict()
169 | 
170 |                     row['idx'] = sample[1]
171 |                     row["time"] = sample[2]
172 |                     row['query'] = sample[3]
173 |                     row["user"] = sample[4]
174 |                     row["text"] = sample[5]
175 |                     row["label"] = 1 if sample[0] == "4" else 0
176 | 
177 |                     with open(file_, "a") as f:
178 |                         json.dump(row, f)
179 |                         f.write("\n")
180 | 
181 | 
182 | 


--------------------------------------------------------------------------------
/data/femnist/split_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import argparse
  4 | import random
  5 | import time
  6 | import numpy as np
  7 | 
  8 | 
  9 | def relabel_class(c):
 10 |     """
 11 |     maps hexadecimal class value (string) to a decimal number
 12 |     returns:
 13 |     - 0 through 9 for classes representing respective numbers
 14 |     - 10 through 35 for classes representing respective uppercase letters
 15 |     - 36 through 61 for classes representing respective lowercase letters
 16 |     """
 17 |     if c.isdigit() and int(c) < 40:
 18 |         return int(c) - 30
 19 |     elif int(c, 16) <= 90:  # uppercase
 20 |         return int(c, 16) - 55
 21 |     else:
 22 |         return int(c, 16) - 61
 23 | 
 24 | 
 25 | def iid_divide(l, g):
 26 |     """
 27 |     divide list l among g groups
 28 |     each group has either int(len(l)/g) or int(len(l)/g)+1 elements
 29 |     returns a list of groups
 30 | 
 31 |     """
 32 |     num_elems = len(l)
 33 |     group_size = int(len(l)/g)
 34 |     num_big_groups = num_elems - g * group_size
 35 |     num_small_groups = g - num_big_groups
 36 |     glist = []
 37 |     for i in range(num_small_groups):
 38 |         glist.append(l[group_size * i : group_size * (i + 1)])
 39 |     bi = group_size*num_small_groups
 40 |     group_size += 1
 41 |     for i in range(num_big_groups):
 42 |         glist.append(l[bi + group_size * i:bi + group_size * (i + 1)])
 43 |     return glist
 44 | 
 45 | 
 46 | parser = argparse.ArgumentParser()
 47 | 
 48 | parser.add_argument('--num_workers',
 49 |                     help=('number of workers/users;'
 50 |                           'default: 1;'),
 51 |                     type=int,
 52 |                     default=1)
 53 | parser.add_argument('--iid',
 54 |                     help='sample iid;',
 55 |                     action="store_true")
 56 | parser.add_argument('--niid',
 57 |                     help="sample niid;",
 58 |                     dest='iid', action='store_false')
 59 | parser.add_argument('--s_frac',
 60 |                     help='fraction of all data to sample; default: 0.1;',
 61 |                     type=float,
 62 |                     default=0.01)
 63 | parser.add_argument('--tr_frac',
 64 |                     help='fraction in training set; default: 0.8;',
 65 |                     type=float,
 66 |                     default=0.8)
 67 | parser.add_argument('--seed',
 68 |                     help='args.seed for random partitioning of test/train data',
 69 |                     type=int,
 70 |                     default=None)
 71 | 
 72 | parser.set_defaults(user=False)
 73 | 
 74 | args = parser.parse_args()
 75 | 
 76 | if __name__ == "__main__":
 77 |     print('------------------------------')
 78 |     print('generating training and test sets')
 79 | 
 80 |     rng_seed = (args.seed if (args.seed is not None and args.seed >= 0) else int(time.time()))
 81 |     rng = random.Random(rng_seed)
 82 |     np.random.seed(rng_seed)
 83 | 
 84 |     train_file = os.path.join("train", "train.json")
 85 |     test_file = os.path.join("test", "test.json")
 86 | 
 87 |     data_dir = os.path.join('intermediate', 'images_by_writer.pkl')
 88 |     with open(data_dir, 'rb') as f:
 89 |         all_data = pickle.load(f)
 90 | 
 91 |     if args.iid:
 92 |         combined_data = []
 93 | 
 94 |         for (writer_id, l) in all_data:
 95 |             combined_data += l
 96 | 
 97 |         for ii, (path, c) in enumerate(combined_data):
 98 |             combined_data[ii] = (path, relabel_class(c))
 99 | 
100 |         tot_num_samples = len(combined_data)
101 |         num_new_samples = int(args.s_frac * tot_num_samples)
102 | 
103 |         indices = [i for i in range(tot_num_samples)]
104 |         new_indices = rng.sample(indices, num_new_samples)
105 | 
106 |         indices_groups = iid_divide(new_indices, args.num_workers)
107 | 
108 |         train_data = []
109 |         test_data = []
110 | 
111 |         for id_w, worker_indices in enumerate(indices_groups):
112 |             curr_num_samples = len(worker_indices)
113 | 
114 |             num_train_samples = max(1, int(args.tr_frac * curr_num_samples))
115 |             num_test_samples = curr_num_samples - num_train_samples
116 | 
117 |             train_indices = rng.sample(worker_indices, num_train_samples)
118 |             test_indices = list(set(indices) - set(train_indices))
119 | 
120 |             worker_data = [combined_data[ii] for ii in train_indices]
121 |             train_data += [combined_data[ii] for ii in train_indices]
122 |             test_data += [combined_data[ii] for ii in test_indices]
123 | 
124 |             with open('train/{}.pkl'.format(id_w), 'wb') as f:
125 |                 pickle.dump(worker_data, f, pickle.HIGHEST_PROTOCOL)
126 | 
127 |         with open('train/train.pkl', 'wb') as f:
128 |             pickle.dump(train_data, f, pickle.HIGHEST_PROTOCOL)
129 | 
130 |         with open('test/test.pkl', 'wb') as f:
131 |             pickle.dump(test_data, f, pickle.HIGHEST_PROTOCOL)
132 | 
133 |     else:
134 |         writer_ids = [i for i in range(len(all_data))]
135 |         rng.shuffle(writer_ids)
136 | 
137 |         num_writers_by_user = np.random.lognormal(5, 1.5, args.num_workers) + 5
138 |         num_writers_by_user *= (len(writer_ids) / num_writers_by_user.sum())
139 |         num_samples = np.floor(num_writers_by_user).astype(np.int64)
140 | 
141 |         writers_by_workers = []
142 |         current_idx = 0
143 |         for worker_id in range(args.num_workers):
144 |             writers_by_workers.append(writer_ids[current_idx: current_idx + num_samples[worker_id]])
145 |             current_idx = num_samples[worker_id]
146 | 
147 |         train_data = []
148 |         test_data = []
149 | 
150 |         for id_w, writer_indices in enumerate(writers_by_workers):
151 |             all_worker_data = []
152 |             for writer_id in writer_indices:
153 |                 all_worker_data += all_data[writer_id][1]
154 | 
155 |             for ii, (path, c) in enumerate(all_worker_data):
156 |                 all_worker_data[ii] = (path, relabel_class(c))
157 | 
158 |             tot_num_samples = len(all_worker_data)
159 |             curr_num_samples = int(args.s_frac * tot_num_samples)
160 | 
161 |             indices = [i for i in range(tot_num_samples)]
162 |             worker_indices = rng.sample(indices, curr_num_samples)
163 | 
164 |             num_train_samples = max(1, int(args.tr_frac * curr_num_samples))
165 |             num_test_samples = curr_num_samples - num_train_samples
166 | 
167 |             train_indices = rng.sample(worker_indices, num_train_samples)
168 |             test_indices = list(set(worker_indices) - set(train_indices))
169 | 
170 |             worker_data = [all_worker_data[ii] for ii in train_indices]
171 |             train_data += [all_worker_data[ii] for ii in train_indices]
172 |             test_data += [all_worker_data[ii] for ii in test_indices]
173 | 
174 |             with open('train/{}.pkl'.format(id_w), 'wb') as f:
175 |                 pickle.dump(worker_data, f, pickle.HIGHEST_PROTOCOL)
176 | 
177 |         with open('train/train.pkl', 'wb') as f:
178 |             pickle.dump(train_data, f, pickle.HIGHEST_PROTOCOL)
179 | 
180 |         with open('test/test.pkl', 'wb') as f:
181 |             pickle.dump(test_data, f, pickle.HIGHEST_PROTOCOL)
182 | 


--------------------------------------------------------------------------------
/graph_utils/generate_networks.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | 
  4 | import networkx as nx
  5 | 
  6 | from utils.evaluate_throughput import evaluate_cycle_time
  7 | from utils.utils import get_connectivity_graph, add_upload_download_delays, get_delta_mbst_overlay,\
  8 |     get_star_overlay, get_ring_overlay, get_matcha_cycle_time
  9 | 
 10 | # Model size in bit
 11 | MODEL_SIZE_DICT = {"synthetic": 4354,
 12 |                    "shakespeare": 3385747,
 13 |                    "femnist": 4843243,
 14 |                    "sent140": 19269416,
 15 |                    "inaturalist": 44961717}
 16 | 
 17 | # Model computation time in ms
 18 | COMPUTATION_TIME_DICT = {"synthetic": 1.5,
 19 |                          "shakespeare": 389.6,
 20 |                          "femnist": 4.6,
 21 |                          "sent140": 9.8,
 22 |                          "inaturalist": 25.4}
 23 | 
 24 | 
 25 | parser = argparse.ArgumentParser()
 26 | 
 27 | parser.add_argument('name',
 28 |                     help='name of the network to use;')
 29 | parser.add_argument("--experiment",
 30 |                     type=str,
 31 |                     help="name of the experiment that will be run on the network;"
 32 |                          "possible are femnist, inaturalist, synthetic, shakespeare, sent140;"
 33 |                          "if not precised --model_size will be used as model size;",
 34 |                     default=None)
 35 | parser.add_argument('--model_size',
 36 |                     type=float,
 37 |                     help="size of the model that will be transmitted on the network in bit;"
 38 |                          "ignored if --experiment is precised;",
 39 |                     default=1e8)
 40 | parser.add_argument("--local_steps",
 41 |                     type=int,
 42 |                     help="number of local steps, used to get computation time",
 43 |                     default=1)
 44 | parser.add_argument("--upload_capacity",
 45 |                     type=float,
 46 |                     help="upload capacity at edge in bit/s; default=1e32",
 47 |                     default=1e32)
 48 | parser.add_argument("--download_capacity",
 49 |                     type=float,
 50 |                     help="download capacity at edge in bit/s; default=1e32",
 51 |                     default=1e32)
 52 | parser.add_argument("--communication_budget",
 53 |                     type=float,
 54 |                     help="communication budget to use with matcha; will be ignored if name is not matcha",
 55 |                     default=0.5)
 56 | parser.add_argument("--default_capacity",
 57 |                     type=float,
 58 |                     help="default capacity (in bit/s) to use on links with unknown capacity",
 59 |                     default=1e9)
 60 | parser.add_argument('--centrality',
 61 |                     help="centrality type; default: load;",
 62 |                     default="load")
 63 | 
 64 | parser.set_defaults(user=False)
 65 | 
 66 | args = parser.parse_args()
 67 | args.default_capacity *= 1e-3
 68 | 
 69 | if __name__ == "__main__":
 70 |     if args.experiment is not None:
 71 |         args.model_size = MODEL_SIZE_DICT[args.experiment]
 72 |         args.computation_time = args.local_steps * COMPUTATION_TIME_DICT[args.experiment]
 73 | 
 74 |     upload_delay = (args.model_size / args.upload_capacity) * 1e3
 75 |     download_delay = (args.model_size / args.download_capacity) * 1e3
 76 | 
 77 |     result_dir = "./results/{}".format(args.name)
 78 |     if not os.path.exists(result_dir):
 79 |         os.makedirs(result_dir)
 80 | 
 81 |     results_txt_path = os.path.join(result_dir, "cycle_time.txt")
 82 |     results_file = open(results_txt_path, "w")
 83 | 
 84 |     path_to_graph = "./data/{}.gml".format(args.name)
 85 | 
 86 |     underlay = nx.read_gml(path_to_graph)
 87 | 
 88 |     print("Number of Workers: {}".format(underlay.number_of_nodes()))
 89 |     print("Number of links: {}".format(underlay.number_of_edges()))
 90 | 
 91 |     nx.set_node_attributes(underlay, upload_delay, 'uploadDelay')
 92 |     nx.set_node_attributes(underlay, download_delay, "downloadDelay")
 93 | 
 94 |     nx.write_gml(underlay.copy(), os.path.join(result_dir, "original.gml"))
 95 | 
 96 |     connectivity_graph = get_connectivity_graph(underlay, args.default_capacity)
 97 | 
 98 |     # MST
 99 |     for u, v, data in connectivity_graph.edges(data=True):
100 |         weight = args.computation_time + data["latency"] + args.model_size / data["availableBandwidth"]
101 |         connectivity_graph.add_edge(u, v, weight=weight)
102 | 
103 |     MST = nx.minimum_spanning_tree(connectivity_graph.copy(), weight="weight")
104 | 
105 |     MST = MST.to_directed()
106 | 
107 |     cycle_time, _, _ = evaluate_cycle_time(add_upload_download_delays(MST, args.computation_time, args.model_size))
108 | 
109 |     nx.write_gml(MST, os.path.join(result_dir, "mst.gml"))
110 |     print("Cycle time for MST architecture: {0:.1f}".format(cycle_time))
111 |     results_file.write("MST {}\n".format(cycle_time))
112 | 
113 |     # delta-MBST
114 |     delta_mbst, best_cycle_time, best_delta = \
115 |         get_delta_mbst_overlay(connectivity_graph.copy(), args.computation_time, args.model_size)
116 | 
117 |     delta_mbst = add_upload_download_delays(delta_mbst, args.computation_time, args.model_size)
118 |     cycle_time, _, _ = evaluate_cycle_time(delta_mbst)
119 | 
120 |     nx.write_gml(delta_mbst, os.path.join(result_dir, "mct_congest.gml"))
121 |     print("Cycle time for delta-MBST architecture: {0:.1f} ms".format(cycle_time))
122 |     results_file.write("MCT_congest {}\n".format(cycle_time))
123 | 
124 |     # Star
125 |     star = get_star_overlay(connectivity_graph.copy(), args.centrality)
126 | 
127 |     cycle_time, _, _ = evaluate_cycle_time(add_upload_download_delays(star, args.computation_time, args.model_size))
128 | 
129 |     cycle_time = (cycle_time - args.computation_time) * 2 + args.computation_time
130 | 
131 |     nx.write_gml(star, os.path.join(result_dir, "centralized.gml"))
132 |     print("Cycle time for STAR architecture: {0:.1f} ms".format(cycle_time))
133 |     results_file.write("Server {}\n".format(cycle_time))
134 | 
135 |     # Ring
136 |     ring = get_ring_overlay(connectivity_graph.copy(), args.computation_time, args.model_size)
137 | 
138 |     cycle_time, _, _ = evaluate_cycle_time(add_upload_download_delays(ring, args.computation_time, args.model_size))
139 | 
140 |     nx.write_gml(ring, os.path.join(result_dir, "ring.gml"))
141 |     print("Cycle time for RING architecture: {0:.1f} ms".format(cycle_time))
142 |     results_file.write("Ring graph {}\n".format(cycle_time))
143 | 
144 |     # MATCHA
145 |     cycle_time = get_matcha_cycle_time(underlay.copy(), connectivity_graph.copy(),
146 |                                        args.computation_time, args.model_size, args.communication_budget)
147 | 
148 |     print("Cycle time for MATCHA architecture: {0:.1f} ms".format(cycle_time))
149 |     results_file.write("MATCHA {}\n".format(cycle_time))
150 | 
151 |     # MATCHA+
152 |     cycle_time = get_matcha_cycle_time(connectivity_graph.copy(), connectivity_graph.copy(),
153 |                                        args.computation_time, args.model_size, args.communication_budget)
154 | 
155 |     print("Cycle time for MATCHA+ architecture: {0:.1f} ms".format(cycle_time))
156 |     results_file.write("MATCHA {}\n".format(cycle_time))
157 | 


--------------------------------------------------------------------------------
/make_figure2.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | 
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | from utils.utils import args_to_string, loggs_to_json
  8 | from utils.args import parse_args
  9 | 
 10 | 
 11 | cycle_time_dict = {"gaia": {"ring": 522.8,
 12 |                             "centralized": 9293.3,
 13 |                             "mst": 1442.0,
 14 |                             "mct_congest": 1018.8,
 15 |                             "matcha": 2612.8},
 16 |                    "amazon_us": {"ring": 485.9,
 17 |                                  "centralized": 18983.2,
 18 |                                  "mst": 1385.7,
 19 |                                  "mct_congest": 952.8,
 20 |                                  "matcha": 5036.7},
 21 |                    "geantdistance": {"ring": 491.1,
 22 |                                      "centralized": 35188.4,
 23 |                                      "mst": 2753.8,
 24 |                                      "mct_congest": 984.7,
 25 |                                      "matcha": 2658.9},
 26 |                    "exodus": {"ring": 488.1,
 27 |                               "centralized": 70350.7,
 28 |                               "mst": 3176.9,
 29 |                               "mct_congest": 1023.5,
 30 |                               "matcha": 2874.3},
 31 |                    "ebone": {"ring": 482.2,
 32 |                              "centralized": 77462.5,
 33 |                              "mst": 4123.4,
 34 |                              "mct_congest": 984.8,
 35 |                              "matcha": 2660.3}}
 36 | 
 37 | EXTENSIONS = {"synthetic": ".json",
 38 |               "sent140": ".json",
 39 |               "femnist": ".pkl",
 40 |               "shakespeare": ".txt",
 41 |               "inaturalist": ".pkl"}
 42 | 
 43 | # Model size in bit
 44 | MODEL_SIZE_DICT = {"synthetic": 4354,
 45 |                    "shakespeare": 3385747,
 46 |                    "femnist": 4843243,
 47 |                    "sent140": 19269416,
 48 |                    "inaturalist": 44961717}
 49 | 
 50 | # Model computation time in ms
 51 | COMPUTATION_TIME_DICT = {"synthetic": 1.5,
 52 |                          "shakespeare": 389.6,
 53 |                          "femnist": 4.6,
 54 |                          "sent140": 9.8,
 55 |                          "inaturalist": 25.4}
 56 | 
 57 | # Tags list
 58 | TAGS = ["Train/Loss", "Train/Acc", "Test/Loss", "Test/Acc", "Consensus"]
 59 | 
 60 | labels_dict = {"matcha": "MATCHA$^{+}$",
 61 |                "mst": "MST",
 62 |                "centralized": "STAR",
 63 |                'mct_congest': "$\delta$-MBST",
 64 |                "ring": "RING"}
 65 | 
 66 | tag_dict = {"Train/Loss": "Train loss",
 67 |             "Train/Acc": "Train acc",
 68 |             "Test/Loss": "Test loss",
 69 |             "Test/Acc": "Test acc",
 70 |             "Consensus": "Consensus"}
 71 | 
 72 | path_dict = {"Train/Loss": "Train_loss",
 73 |              "Train/Acc": "Train_acc",
 74 |              "Test/Loss": "Test_loss",
 75 |              "Test/Acc": "Test_acc",
 76 |              "Consensus": "Consensus"}
 77 | 
 78 | trsh_dict = {"gaia": 0.65,
 79 |              "amazon_us": 0.55,
 80 |              "geantdistance": 0.55,
 81 |              "exodus": 0.5,
 82 |              "ebone": 0.5}
 83 | 
 84 | lr_dict = {"gaia": "1e-3",
 85 |            "amazon_us": "1e-3",
 86 |            "geantdistance": "1e-3",
 87 |            "exodus": "1e-1",
 88 |            "ebone": "1e-1"}
 89 | 
 90 | bz_dict = {"shakespeare": 512,
 91 |            "femnist": 128,
 92 |            "sent140": 512,
 93 |            "inaturalist": 16}
 94 | 
 95 | 
 96 | def make_plots(args, mode=0):
 97 |     os.makedirs(os.path.join("results", "plots", args.experiment), exist_ok=True)
 98 | 
 99 |     loggs_dir_path = os.path.join("loggs", args_to_string(args))
100 |     path_to_json = os.path.join("results", "json", "{}.json".format(os.path.split(loggs_dir_path)[1]))
101 |     with open(path_to_json, "r") as f:
102 |         data = json.load(f)
103 | 
104 |     # fig, axs = plt.subplots(2, 5, figsize=(20, 8))
105 |     x_lim = np.inf
106 |     for idx, tag in enumerate(TAGS):
107 |         fig = plt.figure(figsize=(12, 10))
108 |         for architecture in ["centralized", "matcha", "mst", "mct_congest", "ring"]:
109 |             try:
110 |                 values = data[tag][architecture]
111 |                 rounds = data["Round"][architecture]
112 |             except:
113 |                 continue
114 | 
115 |             if mode == 0:
116 |                 min_len = min(len(values), len(rounds))
117 | 
118 |                 if rounds[-1] * cycle_time_dict[network_name][architecture] < x_lim:
119 |                     x_lim = rounds[-1] * cycle_time_dict[network_name][architecture]
120 | 
121 |                 plt.plot(cycle_time_dict[network_name][architecture] * np.array(rounds) / 1000,
122 |                          values[:min_len], label=labels_dict[architecture],
123 |                          linewidth=5.0)
124 |                 plt.grid(True, linewidth=2)
125 |                 plt.xlim(0, x_lim / 1000)
126 |                 plt.ylabel("{}".format(tag_dict[tag]), fontsize=50)
127 |                 plt.xlabel("time (s)", fontsize=50)
128 |                 plt.tick_params(axis='both', labelsize=40)
129 |                 plt.tick_params(axis='x')
130 |                 plt.legend(fontsize=35)
131 | 
132 |             else:
133 |                 min_len = min(len(values), len(rounds))
134 | 
135 |                 if rounds[:min_len][-1] < x_lim:
136 |                     x_lim = rounds[:min_len][-1]
137 | 
138 |                 plt.plot(rounds[:min_len],
139 |                          values[:min_len], label=labels_dict[architecture],
140 |                          linewidth=5.0)
141 |                 plt.ylabel("{}".format(tag_dict[tag]), fontsize=50)
142 |                 plt.xlabel("Rounds", fontsize=50)
143 |                 plt.tick_params(axis='both', labelsize=40)
144 |                 plt.legend(fontsize=35)
145 |                 plt.grid(True, linewidth=2)
146 |                 plt.xlim(0, x_lim)
147 | 
148 |         if mode == 0:
149 |             fig_path = os.path.join("results", "plots", args.experiment,
150 |                                     "{}_{}_vs_time.png".format(args.network_name, path_dict[tag]))
151 |             plt.savefig(fig_path, bbox_inches='tight')
152 |         else:
153 |             fig_path = os.path.join("results", "plots", args.experiment,
154 |                                     "{}_{}_vs_iteration.png".format(args.network_name, path_dict[tag]))
155 |             plt.savefig(fig_path, bbox_inches='tight')
156 | 
157 | 
158 | if __name__ == "__main__":
159 |     network_name = "amazon_us"
160 | 
161 |     for experiment in [ "inaturalist", "shakespeare", "sent140", "femnist"]:
162 |         args = parse_args([experiment,
163 |                            "--network", network_name,
164 |                            "--bz", str(bz_dict[experiment]),
165 |                            "--lr", str(lr_dict[network_name]),
166 |                            "--decay", "sqrt",
167 |                            "--local_steps", "1"])
168 | 
169 |         args_string = args_to_string(args)
170 | 
171 |         loggs_dir = os.path.join("loggs", args_to_string(args))
172 |         loggs_to_json(loggs_dir)
173 | 
174 |         print("{}:".format(experiment))
175 | 
176 |         make_plots(args, mode=0)
177 |         make_plots(args, mode=1)
178 | 
179 |         print("#" * 10)
180 | 
181 | 
182 | 
183 | 
184 | 


--------------------------------------------------------------------------------
/communication_module/manager.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from abc import ABC, abstractmethod
  3 | 
  4 | import torch
  5 | import torch.distributed as dist
  6 | from torch.utils.tensorboard import SummaryWriter
  7 | 
  8 | from graph_utils.utils.matcha import RandomTopologyGenerator
  9 | from utils.utils import get_network, get_iterator, get_model, args_to_string
 10 | 
 11 | 
 12 | EXTENSIONS = {"synthetic": ".json", "sent140": ".json", "femnist": ".pkl", "shakespeare": ".txt"}
 13 | 
 14 | 
 15 | class Manager(ABC):
 16 |     def __init__(self, args):
 17 |         self.device = args.device
 18 |         self.batch_size = args.bz
 19 |         self.network = get_network(args.network_name, args.architecture)
 20 |         self.world_size = self.network.number_of_nodes() + 1  # we add node representing the network manager
 21 |         self.log_freq = args.log_freq
 22 | 
 23 |         # create logger
 24 |         logger_path = os.path.join("loggs", args_to_string(args), args.architecture)
 25 |         self.logger = SummaryWriter(logger_path)
 26 | 
 27 |         self.round_idx = 0  # index of the current communication round
 28 | 
 29 |         self.train_dir = os.path.join("data", args.experiment, "train")
 30 |         self.test_dir = os.path.join("data", args.experiment, "test")
 31 | 
 32 |         self.train_path = os.path.join(self.train_dir, "train" + EXTENSIONS[args.experiment])
 33 |         self.test_path = os.path.join(self.test_dir, "test" + EXTENSIONS[args.experiment])
 34 | 
 35 |         self.train_iterator = get_iterator(args.experiment, self.train_path, self.device, self.batch_size)
 36 |         self.test_iterator = get_iterator(args.experiment, self.test_path, self.device, self.batch_size)
 37 | 
 38 |         self.gather_list = [get_model(args.experiment, self.device, self.train_iterator)
 39 |                             for _ in range(self.world_size)]
 40 | 
 41 |         self.scatter_list = [get_model(args.experiment, self.device, self.train_iterator)
 42 |                              for _ in range(self.world_size)]
 43 | 
 44 |         # print initial logs
 45 |         self.write_logs()
 46 | 
 47 |     def communicate(self):
 48 |         for ii, param in enumerate(self.gather_list[-1].net.parameters()):
 49 |             param_list = [list(self.gather_list[idx].net.parameters())[ii].data
 50 |                           for idx in range(self.world_size)]
 51 | 
 52 |             dist.gather(tensor=param.data, dst=self.world_size - 1, gather_list=param_list)
 53 | 
 54 |         self.mix()
 55 | 
 56 |         if (self.round_idx - 1) % self.log_freq == 0:
 57 |             self.write_logs()
 58 | 
 59 |         for ii, param in enumerate(self.scatter_list[-1].net.parameters()):
 60 |             param_list = [list(self.scatter_list[idx].net.parameters())[ii].data
 61 |                           for idx in range(self.world_size)]
 62 | 
 63 |             dist.scatter(tensor=param.data, src=self.world_size - 1, scatter_list=param_list)
 64 | 
 65 |     @abstractmethod
 66 |     def mix(self):
 67 |         pass
 68 | 
 69 |     def write_logs(self):
 70 |         """
 71 |         write train/test loss, train/tet accuracy for average model and local models
 72 |          and intra-workers parameters variance (consensus) adn save average model
 73 |         """
 74 |         train_loss, train_acc = self.scatter_list[-1].evaluate_iterator(self.train_iterator)
 75 |         test_loss, test_acc = self.scatter_list[-1].evaluate_iterator(self.train_iterator)
 76 | 
 77 |         self.logger.add_scalar("Train/Loss", train_loss, self.round_idx)
 78 |         self.logger.add_scalar("Train/Acc", train_acc, self.round_idx)
 79 |         self.logger.add_scalar("Test/Loss", test_loss, self.round_idx)
 80 |         self.logger.add_scalar("Test/Acc", test_acc, self.round_idx)
 81 | 
 82 |         # write parameter variance
 83 |         average_parameter = self.scatter_list[-1].get_param_tensor()
 84 | 
 85 |         param_tensors_by_workers = torch.zeros((average_parameter.shape[0], self.world_size - 1))
 86 | 
 87 |         for ii, model in enumerate(self.scatter_list[:-1]):
 88 |             param_tensors_by_workers[:, ii] = model.get_param_tensor() - average_parameter
 89 | 
 90 |         consensus = (param_tensors_by_workers ** 2).sum()
 91 |         self.logger.add_scalar("Consensus", consensus, self.round_idx)
 92 | 
 93 |         print(f'\t Round: {self.round_idx} |Test Loss: {test_loss:.3f} | Test Acc: {test_acc * 100:.2f}%')
 94 | 
 95 | 
 96 | class Peer2PeerManager(Manager):
 97 |     def mix(self):
 98 |         for ii, model in enumerate(self.scatter_list):
 99 |             if ii == self.world_size - 1:
100 |                 for param_idx, param in enumerate(model.net.parameters()):
101 |                     param.data.fill_(0.)
102 |                     for local_model in self.scatter_list[:-1]:
103 |                         param.data += (1 / (self.world_size - 1)) * list(local_model.net.parameters())[param_idx]
104 |             else:
105 |                 for param_idx, param in enumerate(model.net.parameters()):
106 |                     param.data.fill_(0.)
107 |                     for neighbour in self.network.neighbors(ii):
108 |                         coeff = self.network.get_edge_data(ii, neighbour)["weight"]
109 |                         param.data += coeff * list(self.gather_list[neighbour].net.parameters())[param_idx]
110 | 
111 |         self.round_idx += 1
112 | 
113 | 
114 | class MATCHAManager(Manager):
115 |     def __init__(self, args):
116 |         super(MATCHAManager, self).__init__(args)
117 |         path_to_save_network = os.path.join("loggs", args_to_string(args), "matcha", "colored_network.gml")
118 |         path_to_matching_history_file = os.path.join("loggs", args_to_string(args), "matcha", "matching_history.csv")
119 |         self.topology_generator = RandomTopologyGenerator(self.network,
120 |                                                           args.communication_budget,
121 |                                                           network_save_path=path_to_save_network,
122 |                                                           path_to_history_file=path_to_matching_history_file)
123 | 
124 |     def mix(self):
125 |         # update topology
126 |         self.topology_generator.step()
127 | 
128 |         for ii, model in enumerate(self.scatter_list):
129 |             if ii == self.world_size - 1:
130 |                 for param_idx, param in enumerate(model.net.parameters()):
131 |                     param.data.fill_(0.)
132 |                     for local_model in self.scatter_list[:-1]:
133 |                         param.data += (1 / (self.world_size - 1)) * list(local_model.net.parameters())[param_idx]
134 |             else:
135 |                 for param_idx, param in enumerate(model.net.parameters()):
136 |                     param.data.fill_(0.)
137 |                     for neighbour in self.topology_generator.current_topology.neighbors(ii):
138 |                         coeff = self.topology_generator.current_topology.get_edge_data(ii, neighbour)["weight"]
139 |                         param.data += coeff * list(self.gather_list[neighbour].net.parameters())[param_idx]
140 | 
141 |         self.round_idx += 1
142 | 
143 | 
144 | class CentralizedManager(Manager):
145 |     def mix(self):
146 |         for param_idx, param in enumerate(self.scatter_list[-1].net.parameters()):
147 |             param.data.fill_(0.)
148 |             for local_model in self.gather_list[:-1]:
149 |                 param.data += (1 / (self.world_size - 1)) * list(local_model.net.parameters())[param_idx]
150 | 
151 |         for ii, model in enumerate(self.scatter_list[:-1]):
152 |             for param_idx, param in enumerate(model.net.parameters()):
153 |                 param.data = list(self.scatter_list[-1].net.parameters())[param_idx]
154 | 
155 |         self.round_idx += 1
156 | 


--------------------------------------------------------------------------------
/graph_utils/data/gaia.gml:
--------------------------------------------------------------------------------
  1 | graph [
  2 |   node [
  3 |     id 0
  4 |     label "Virginia"
  5 |   ]
  6 |   node [
  7 |     id 1
  8 |     label "California"
  9 |   ]
 10 |   node [
 11 |     id 2
 12 |     label "Oregon"
 13 |   ]
 14 |   node [
 15 |     id 3
 16 |     label "Dublin"
 17 |   ]
 18 |   node [
 19 |     id 4
 20 |     label "Frankfurt"
 21 |   ]
 22 |   node [
 23 |     id 5
 24 |     label "Tokyo"
 25 |   ]
 26 |   node [
 27 |     id 6
 28 |     label "Seoul"
 29 |   ]
 30 |   node [
 31 |     id 7
 32 |     label "Singapore"
 33 |   ]
 34 |   node [
 35 |     id 8
 36 |     label "Sydney"
 37 |   ]
 38 |   node [
 39 |     id 9
 40 |     label "Mumbai"
 41 |   ]
 42 |   node [
 43 |     id 10
 44 |     label "Sao Paulo"
 45 |   ]
 46 |   edge [
 47 |     source 0
 48 |     target 1
 49 |     distance 3560.859824767453
 50 |     capacity 1000000000.0
 51 |   ]
 52 |   edge [
 53 |     source 0
 54 |     target 2
 55 |     distance 3617.1058525455455
 56 |     capacity 1000000000.0
 57 |   ]
 58 |   edge [
 59 |     source 0
 60 |     target 3
 61 |     distance 5683.746538162422
 62 |     capacity 100000000.0
 63 |   ]
 64 |   edge [
 65 |     source 0
 66 |     target 4
 67 |     distance 6774.62010172149
 68 |     capacity 100000000.0
 69 |   ]
 70 |   edge [
 71 |     source 0
 72 |     target 5
 73 |     distance 11032.403521116341
 74 |     capacity 100000000.0
 75 |   ]
 76 |   edge [
 77 |     source 0
 78 |     target 6
 79 |     distance 11331.528778910633
 80 |     capacity 100000000.0
 81 |   ]
 82 |   edge [
 83 |     source 0
 84 |     target 7
 85 |     distance 15737.083172377323
 86 |     capacity 100000000.0
 87 |   ]
 88 |   edge [
 89 |     source 0
 90 |     target 8
 91 |     distance 15550.74835546916
 92 |     capacity 100000000.0
 93 |   ]
 94 |   edge [
 95 |     source 0
 96 |     target 9
 97 |     distance 13113.161300492078
 98 |     capacity 100000000.0
 99 |   ]
100 |   edge [
101 |     source 0
102 |     target 10
103 |     distance 7500.898168816753
104 |     capacity 500000000.0
105 |   ]
106 |   edge [
107 |     source 1
108 |     target 2
109 |     distance 825.4130940774442
110 |     capacity 1500000000.0
111 |   ]
112 |   edge [
113 |     source 1
114 |     target 3
115 |     distance 8111.218768362535
116 |     capacity 100000000.0
117 |   ]
118 |   edge [
119 |     source 1
120 |     target 4
121 |     distance 9096.865584257743
122 |     capacity 100000000.0
123 |   ]
124 |   edge [
125 |     source 1
126 |     target 5
127 |     distance 8620.547632468602
128 |     capacity 100000000.0
129 |   ]
130 |   edge [
131 |     source 1
132 |     target 6
133 |     distance 9370.063077937788
134 |     capacity 300000000.0
135 |   ]
136 |   edge [
137 |     source 1
138 |     target 7
139 |     distance 13930.612571776204
140 |     capacity 100000000.0
141 |   ]
142 |   edge [
143 |     source 1
144 |     target 8
145 |     distance 12160.544494528913
146 |     capacity 300000000.0
147 |   ]
148 |   edge [
149 |     source 1
150 |     target 9
151 |     distance 13727.28776621854
152 |     capacity 100000000.0
153 |   ]
154 |   edge [
155 |     source 1
156 |     target 10
157 |     distance 10079.072989313954
158 |     capacity 100000000.0
159 |   ]
160 |   edge [
161 |     source 2
162 |     target 3
163 |     distance 7551.970732123231
164 |     capacity 300000000.0
165 |   ]
166 |   edge [
167 |     source 2
168 |     target 4
169 |     distance 8488.896086335717
170 |     capacity 100000000.0
171 |   ]
172 |   edge [
173 |     source 2
174 |     target 5
175 |     distance 8028.469388699873
176 |     capacity 100000000.0
177 |   ]
178 |   edge [
179 |     source 2
180 |     target 6
181 |     distance 8700.031091458462
182 |     capacity 300000000.0
183 |   ]
184 |   edge [
185 |     source 2
186 |     target 7
187 |     distance 13325.366070623815
188 |     capacity 100000000.0
189 |   ]
190 |   edge [
191 |     source 2
192 |     target 8
193 |     distance 12383.076161347562
194 |     capacity 100000000.0
195 |   ]
196 |   edge [
197 |     source 2
198 |     target 9
199 |     distance 12902.319229980723
200 |     capacity 100000000.0
201 |   ]
202 |   edge [
203 |     source 2
204 |     target 10
205 |     distance 10610.577959918295
206 |     capacity 100000000.0
207 |   ]
208 |   edge [
209 |     source 3
210 |     target 4
211 |     distance 1091.0035398064083
212 |     capacity 1500000000.0
213 |   ]
214 |   edge [
215 |     source 3
216 |     target 5
217 |     distance 9611.133798789571
218 |     capacity 100000000.0
219 |   ]
220 |   edge [
221 |     source 3
222 |     target 6
223 |     distance 8974.589549377932
224 |     capacity 100000000.0
225 |   ]
226 |   edge [
227 |     source 3
228 |     target 7
229 |     distance 11203.776982156216
230 |     capacity 100000000.0
231 |   ]
232 |   edge [
233 |     source 3
234 |     target 8
235 |     distance 17207.312372624874
236 |     capacity 100000000.0
237 |   ]
238 |   edge [
239 |     source 3
240 |     target 9
241 |     distance 7620.843594967312
242 |     capacity 300000000.0
243 |   ]
244 |   edge [
245 |     source 3
246 |     target 10
247 |     distance 9366.555606476215
248 |     capacity 100000000.0
249 |   ]
250 |   edge [
251 |     source 4
252 |     target 5
253 |     distance 9358.521215366647
254 |     capacity 100000000.0
255 |   ]
256 |   edge [
257 |     source 4
258 |     target 6
259 |     distance 8571.5714609335
260 |     capacity 100000000.0
261 |   ]
262 |   edge [
263 |     source 4
264 |     target 7
265 |     distance 10260.83044153216
266 |     capacity 100000000.0
267 |   ]
268 |   edge [
269 |     source 4
270 |     target 8
271 |     distance 16478.1341044152
272 |     capacity 100000000.0
273 |   ]
274 |   edge [
275 |     source 4
276 |     target 9
277 |     distance 6578.168093870104
278 |     capacity 500000000.0
279 |   ]
280 |   edge [
281 |     source 4
282 |     target 10
283 |     distance 9807.409376220698
284 |     capacity 100000000.0
285 |   ]
286 |   edge [
287 |     source 5
288 |     target 6
289 |     distance 1161.2277477992284
290 |     capacity 1000000000.0
291 |   ]
292 |   edge [
293 |     source 5
294 |     target 7
295 |     distance 5311.118309037953
296 |     capacity 1000000000.0
297 |   ]
298 |   edge [
299 |     source 5
300 |     target 8
301 |     distance 7789.739742827469
302 |     capacity 300000000.0
303 |   ]
304 |   edge [
305 |     source 5
306 |     target 9
307 |     distance 6751.3514540143415
308 |     capacity 300000000.0
309 |   ]
310 |   edge [
311 |     source 5
312 |     target 10
313 |     distance 18528.65557840507
314 |     capacity 100000000.0
315 |   ]
316 |   edge [
317 |     source 6
318 |     target 7
319 |     distance 4658.7421490548095
320 |     capacity 1000000000.0
321 |   ]
322 |   edge [
323 |     source 6
324 |     target 8
325 |     distance 8296.033168577038
326 |     capacity 100000000.0
327 |   ]
328 |   edge [
329 |     source 6
330 |     target 9
331 |     distance 5613.893433078432
332 |     capacity 500000000.0
333 |   ]
334 |   edge [
335 |     source 6
336 |     target 10
337 |     distance 18337.930813275976
338 |     capacity 100000000.0
339 |   ]
340 |   edge [
341 |     source 7
342 |     target 8
343 |     distance 6301.111688839916
344 |     capacity 300000000.0
345 |   ]
346 |   edge [
347 |     source 7
348 |     target 9
349 |     distance 3899.1833741194805
350 |     capacity 500000000.0
351 |   ]
352 |   edge [
353 |     source 7
354 |     target 10
355 |     distance 16000.059238393498
356 |     capacity 100000000.0
357 |   ]
358 |   edge [
359 |     source 8
360 |     target 9
361 |     distance 10144.778814121693
362 |     capacity 100000000.0
363 |   ]
364 |   edge [
365 |     source 8
366 |     target 10
367 |     distance 13377.864263189238
368 |     capacity 100000000.0
369 |   ]
370 |   edge [
371 |     source 9
372 |     target 10
373 |     distance 13772.602629233716
374 |     capacity 100000000.0
375 |   ]
376 | ]
377 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Throughput-Optimal Topology Design for Cross-Silo Federated Learning
  2 | 
  3 | This repository is the official implementation of [Throughput-Optimal Topology
  4 | Design for Cross-Silo Federated Learning](https://arxiv.org/abs/2010.12229).
  5 | 
  6 | Federated learning usually employs a master-slave architecture where an
  7 | orchestrator iteratively aggregates model updates from remote clients
  8 | and pushes them back a refined model. This approach may be inefficient
  9 | in cross-silo settings, as close-by data silos with high-speed access
 10 | links may exchange information faster than with the orchestrator, and
 11 | the orchestrator may become a communication bottleneck. In this paper we
 12 | define the problem of topology design for cross-silo federated learning
 13 | using the theory of max-plus linear systems to compute the system
 14 | throughput---number of communication rounds per time unit. We also
 15 | propose practical algorithms that, under the knowledge of measurable
 16 | network characteristics, find a topology with the largest throughput or
 17 | with provable throughput guarantees. In realistic Internet networks with
 18 | 10 Gbps access links for silos, our algorithms speed up training by a
 19 | factor 9 and 1.5 in comparison to the master-slave architecture and to
 20 | state-of-the-art MATCHA, respectively. Speedups are even larger with
 21 | slower access links.
 22 | 
 23 | ## Requirements
 24 | 
 25 | To install requirements:
 26 | 
 27 | ```setup
 28 | pip install -r requirements.txt
 29 | ```
 30 | 
 31 | ## Datasets
 32 | 
 33 | We provide four datasets that are used in the paper under corresponding
 34 | folders. For all datasets, see the README files in separate
 35 | data/$dataset folders for instructions on preprocessing and/or sampling
 36 | data.
 37 | 
 38 | ## Networks and Topologies
 39 | 
 40 | A main part of the paper is related to topology design. In
 41 | `graph_utils/` details on generating different topologies for each
 42 | network are provided. Scripts to compute the cycle time of each topology
 43 | are also provided in `graph_utils/`
 44 | 
 45 | ## Training
 46 | 
 47 | Run on one dataset, with a specific topology choice on on network.
 48 | Specify the name of the dataset (experiment), the name of the network
 49 | and the used architecture, and configure all other hyper-parameters (see
 50 | all hyper-parameters values in the appendix of the paper)
 51 | 
 52 | ```train
 53 | python3  main.py experiment ----network_name  \
 54 |          --architecture=original (--parallel) (--fit_by_epoch) \
 55 |          --n_rounds=1 --bz=1 
 56 |          --local_steps=1 --log_freq=1 \
 57 |          --device="cpu" --lr=1e-3\
 58 |          --optimizer='adam' --decay="constant"
 59 | ```
 60 | 
 61 | And the test and training accuracy and loss will be saved in the log files.
 62 | 
 63 | ## Evaluation
 64 | 
 65 | ### iNaturalist Speed-ups
 66 | To evaluate the speed-ups obtained when training iNaturalist on the proposed topology architectures (generate Table 3) fora given network, run
 67 | 
 68 | ```eval
 69 | python3 main.py inaturalist --network_name gaia --architecture $ARCHITECTURE --n_rounds 5600 --bz 16 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
 70 | python3 main.py inaturalist --network_name amazon_us --architecture $ARCHITECTURE --n_rounds 1600 --bz 16 --device cuda --log_freq 40 --local_steps 1 --lr 0.001 --decay sqrt
 71 | python3 main.py inaturalist --network_name geantdistance --architecture $ARCHITECTURE --n_rounds 4000 --bz 16 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
 72 | python3 main.py inaturalist --network_name exodus --architecture $ARCHITECTURE --n_rounds 4800 --bz 16 --device cuda --log_freq 100 --local_steps 1 --lr 0.1 --decay sqrt --optimizer sgd
 73 | python3 main.py inaturalist --network_name ebone --architecture $ARCHITECTURE --n_rounds 6000 --bz 16 --device cuda --log_freq 100 --local_steps 1 --lr 0.1 --decay sqrt --optimizer sgd
 74 | ```
 75 | 
 76 | And the test and training accuracy and loss for the corresponding experiment will be saved in the log files.
 77 | 
 78 | Do this operation for all architectures ($ARCHITECTURE=ring, centralized, matcha, exodus, ebone).  
 79 | Remind that for every network, a new generation of dataset (data/$dataset folders) is required to distribute data into silos. 
 80 | 
 81 | Then run
 82 | 
 83 | ```eval
 84 | python3 make_table3.py
 85 | ```
 86 | 
 87 | To generate the values from Table 3.
 88 | 
 89 | ### Effect of the topology on the convergence
 90 | 
 91 | To evaluate the influence of topology on the training evolution for the different datasets when trained on AWS-NA network, run
 92 | 
 93 | ```eval
 94 | python  main.py inaturalist --network_name amazon_us --architecture ring --n_rounds 1600 --bz 16 --device cuda --log_freq 40 --local_steps 1 --lr 0.001 --decay sqrt
 95 | python  main.py inaturalist --network_name amazon_us --architecture centralized --n_rounds 1600 --bz 16 --device cuda --log_freq 40 --local_steps 1 --lr 0.001 --decay sqrt
 96 | python  main.py inaturalist --network_name amazon_us --architecture matcha --n_rounds 1600 --bz 16 --device cuda --log_freq 40 --local_steps 1 --lr 0.001 --decay sqrt
 97 | python  main.py inaturalist --network_name amazon_us --architecture mst --n_rounds 1600 --bz 16 --device cuda --log_freq 40 --local_steps 1 --lr 0.001 --decay sqrt
 98 | python  main.py inaturalist --network_name amazon_us --architecture mct_congest --n_rounds 1600 --bz 16 --device cuda --log_freq 40 --local_steps 1 --lr 0.001 --decay sqrt
 99 | 
100 | python main.py femnist --network_name amazon_us --architecture ring --n_rounds 6400 --bz 128 --device cuda --log_freq 80 --local_steps 1 --lr 0.001 --decay sqrt
101 | python main.py femnist --network_name amazon_us --architecture centralized --n_rounds 6400 --bz 128 --device cuda --log_freq 80 --local_steps 1 --lr 0.001 --decay sqrt
102 | python main.py femnist --network_name amazon_us --architecture matcha --n_rounds 6400 --bz 128 --device cuda --log_freq 80 --local_steps 1 --lr 0.001 --decay sqrt
103 | python main.py femnist --network_name amazon_us --architecture mst --n_rounds 6400 --bz 128 --device cuda --log_freq 80 --local_steps 1 --lr 0.001 --decay sqrt
104 | python main.py femnist --network_name amazon_us --architecture mct_congest --n_rounds 6400 --bz 128 --device cuda --log_freq 80 --local_steps 1 --lr 0.001 --decay sqrt
105 | 
106 | python main.py sent140 --network_name amazon_us --architecture ring --n_rounds 20000 --bz 512 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
107 | python main.py sent140 --network_name amazon_us --architecture centralized --n_rounds 20000 --bz 512 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
108 | python main.py sent140 --network_name amazon_us --architecture matcha --n_rounds 20000 --bz 512 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
109 | python main.py sent140 --network_name amazon_us --architecture mst --n_rounds 20000 --bz 512 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
110 | python main.py sent140 --network_name amazon_us --architecture mct_congest --n_rounds 20000 --bz 512 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
111 | 
112 | python main.py shakespeare --network_name amazon_us --architecture ring --n_rounds 1500 --bz 512 --decay sqrt --lr 1e-3 --device cuda --local_steps 1 --log_freq 30
113 | python main.py shakespeare --network_name amazon_us --architecture centralized --n_rounds 1500 --bz 512 --decay sqrt --lr 1e-3 --device cuda --local_steps 1 --log_freq 30
114 | python main.py shakespeare --network_name amazon_us --architecture matcha --n_rounds 1500 --bz 512 --decay sqrt --lr 1e-3 --device cuda --local_steps 1 --log_freq 30
115 | python main.py shakespeare --network_name amazon_us --architecture mst --n_rounds 1500 --bz 512 --decay sqrt --lr 1e-3 --device cuda --local_steps 1 --log_freq 30
116 | python main.py shakespeare --network_name amazon_us --architecture mct_congest --n_rounds 1500 --bz 512 --decay sqrt --lr 1e-3 --device cuda --local_steps 1 --log_freq 30
117 | ```
118 | 
119 | to generate the log files for each experiment. Tne run
120 | 
121 | ```eval
122 | python3 make_figure2.py
123 | ```
124 | 
125 | to generate Figure 2. (Figures will be found in `results/plots`)
126 | 
127 | ## Results
128 | 
129 | ### iNaturalist Speed-ups
130 | Our topology design achieves the following speed-ups when training
131 | iNaturalist dataset over different networks:
132 | 
133 | 
134 | |Network Name         | Silos  | Links | Ring vs Star speed-up | Ring vs MATCHA speed-up|
135 | | ------------------  |  ------|-------|----------------       | --------------         |
136 | | Gaia    |     11       |      55              |2.65        | 1.54 |
137 | | AWS NA    |    22      |      321              |3.41          |1.47|
138 | | Géant   |     40        |      61             |4.85          |0.81|
139 | | Exodus    |     79        |      147              |8.78          |1.37|
140 | | Ebone    |     87        |      161              |8.83          |1.29|
141 | 
142 | ### Effect of the topology on the convergence
143 | 
144 | Effect of overlays on the convergence w.r.t. communication rounds  (top row)  
145 | and wall-clock time(bottom row) when training four different datasets on  
146 |  AWS North America underlay.1Gbps core links capacities, 100Mbps access  
147 |  links capacities,s= 1.
148 | 
149 | ![](https://user-images.githubusercontent.com/42912620/84382812-7e215780-abeb-11ea-94f5-e08e506ace89.PNG)
150 | 


--------------------------------------------------------------------------------
/data/shakespeare/preprocess_shakespeare.py:
--------------------------------------------------------------------------------
  1 | """Preprocesses the Shakespeare dataset for federated training.
  2 | Copyright 2017 Google Inc.
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | You may obtain a copy of the License at
  6 |     https://www.apache.org/licenses/LICENSE-2.0
  7 | Unless required by applicable law or agreed to in writing, software
  8 | distributed under the License is distributed on an "AS IS" BASIS,
  9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | See the License for the specific language governing permissions and
 11 | limitations under the License.
 12 | To run:
 13 |   python preprocess_shakespeare.py path/to/raw/shakespeare.txt output_directory/
 14 | The raw data can be downloaded from:
 15 |   http://www.gutenberg.org/cache/epub/100/pg100.txt
 16 | (The Plain Text UTF-8 file format, md5sum: 036d0f9cf7296f41165c2e6da1e52a0e)
 17 | Note that The Comedy of Errors has a incorrect indentation compared to all the
 18 | other plays in the file. The code below reflects that issue. To make the code
 19 | cleaner, you could fix the indentation in the raw shakespeare file and remove
 20 | the special casing for that play in the code below.
 21 | Authors: loeki@google.com, mcmahan@google.com
 22 | Disclaimer: This is not an official Google product.
 23 | """
 24 | import collections
 25 | import json
 26 | import os
 27 | import random
 28 | import re
 29 | import sys
 30 | RANDOM_SEED = 1234
 31 | # Regular expression to capture an actors name, and line continuation
 32 | CHARACTER_RE = re.compile(r'^  ([a-zA-Z][a-zA-Z ]*)\. (.*)')
 33 | CONT_RE = re.compile(r'^    (.*)')
 34 | # The Comedy of Errors has errors in its indentation so we need to use
 35 | # different regular expressions.
 36 | COE_CHARACTER_RE = re.compile(r'^([a-zA-Z][a-zA-Z ]*)\. (.*)')
 37 | COE_CONT_RE = re.compile(r'^(.*)')
 38 | 
 39 | 
 40 | def _match_character_regex(line, comedy_of_errors=False):
 41 |     return (COE_CHARACTER_RE.match(line) if comedy_of_errors
 42 |             else CHARACTER_RE.match(line))
 43 | 
 44 | 
 45 | def _match_continuation_regex(line, comedy_of_errors=False):
 46 |     return (
 47 |         COE_CONT_RE.match(line) if comedy_of_errors else CONT_RE.match(line))
 48 | 
 49 | 
 50 | def _split_into_plays(shakespeare_full):
 51 |     """Splits the full data by play."""
 52 |     # List of tuples (play_name, dict from character to list of lines)
 53 |     plays = []
 54 |     discarded_lines = []  # Track discarded lines.
 55 |     slines = shakespeare_full.splitlines(True)[1:]
 56 | 
 57 |     # skip contents, the sonnets, and all's well that ends well
 58 |     author_count = 0
 59 |     start_i = 0
 60 |     for i, l in enumerate(slines):
 61 |         if 'by William Shakespeare' in l:
 62 |             author_count += 1
 63 |         if author_count == 2:
 64 |             start_i = i - 5
 65 |             break
 66 |     slines = slines[start_i:]
 67 | 
 68 |     current_character = None
 69 |     comedy_of_errors = False
 70 |     for i, line in enumerate(slines):
 71 |         # This marks the end of the plays in the file.
 72 |         if i > 124195 - start_i:
 73 |             break
 74 |         # This is a pretty good heuristic for detecting the start of a new play:
 75 |         if 'by William Shakespeare' in line:
 76 |             current_character = None
 77 |             characters = collections.defaultdict(list)
 78 |             # The title will be 2, 3, 4, 5, 6, or 7 lines above "by William Shakespeare".
 79 |             if slines[i - 2].strip():
 80 |                 title = slines[i - 2]
 81 |             elif slines[i - 3].strip():
 82 |                 title = slines[i - 3]
 83 |             elif slines[i - 4].strip():
 84 |                 title = slines[i - 4]
 85 |             elif slines[i - 5].strip():
 86 |                 title = slines[i - 5]
 87 |             elif slines[i - 6].strip():
 88 |                 title = slines[i - 6]
 89 |             else:
 90 |                 title = slines[i - 7]
 91 |             title = title.strip()
 92 | 
 93 |             assert title, (
 94 |                 'Parsing error on line %d. Expecting title 2 or 3 lines above.' %
 95 |                 i)
 96 |             comedy_of_errors = (title == 'THE COMEDY OF ERRORS')
 97 |             # Degenerate plays are removed at the end of the method.
 98 |             plays.append((title, characters))
 99 |             continue
100 |         match = _match_character_regex(line, comedy_of_errors)
101 |         if match:
102 |             character, snippet = match.group(1), match.group(2)
103 |             # Some character names are written with multiple casings, e.g., SIR_Toby
104 |             # and SIR_TOBY. To normalize the character names, we uppercase each name.
105 |             # Note that this was not done in the original preprocessing and is a
106 |             # recent fix.
107 |             character = character.upper()
108 |             if not (comedy_of_errors and character.startswith('ACT ')):
109 |                 characters[character].append(snippet)
110 |                 current_character = character
111 |                 continue
112 |             else:
113 |                 current_character = None
114 |                 continue
115 |         elif current_character:
116 |             match = _match_continuation_regex(line, comedy_of_errors)
117 |             if match:
118 |                 if comedy_of_errors and match.group(1).startswith('<'):
119 |                     current_character = None
120 |                     continue
121 |                 else:
122 |                     characters[current_character].append(match.group(1))
123 |                     continue
124 |         # Didn't consume the line.
125 |         line = line.strip()
126 |         if line and i > 2646:
127 |             # Before 2646 are the sonnets, which we expect to discard.
128 |             discarded_lines.append('%d:%s' % (i, line))
129 |     # Remove degenerate "plays".
130 |     return [play for play in plays if len(play[1]) > 1], discarded_lines
131 | 
132 | 
133 | def _remove_nonalphanumerics(filename):
134 |     return re.sub('\\W+', '_', filename)
135 | 
136 | 
137 | def play_and_character(play, character):
138 |     return _remove_nonalphanumerics((play + '_' + character).replace(' ', '_'))
139 | 
140 | 
141 | def _get_train_test_by_character(plays, test_fraction=0.2):
142 |     """
143 |       Splits character data into train and test sets.
144 |       if test_fraction <= 0, returns {} for all_test_examples
145 |       plays := list of (play, dict) tuples where play is a string and dict
146 |       is a dictionary with character names as keys
147 |     """
148 |     skipped_characters = 0
149 |     all_train_examples = collections.defaultdict(list)
150 |     all_test_examples = collections.defaultdict(list)
151 | 
152 |     def add_examples(example_dict, example_tuple_list):
153 |         for play, character, sound_bite in example_tuple_list:
154 |             example_dict[play_and_character(
155 |                 play, character)].append(sound_bite)
156 | 
157 |     users_and_plays = {}
158 |     for play, characters in plays:
159 |         curr_characters = list(characters.keys())
160 |         for c in curr_characters:
161 |             users_and_plays[play_and_character(play, c)] = play
162 |         for character, sound_bites in characters.items():
163 |             examples = [(play, character, sound_bite)
164 |                         for sound_bite in sound_bites]
165 |             if len(examples) <= 2:
166 |                 skipped_characters += 1
167 |                 # Skip characters with fewer than 2 lines since we need at least one
168 |                 # train and one test line.
169 |                 continue
170 |             train_examples = examples
171 |             if test_fraction > 0:
172 |                 num_test = max(int(len(examples) * test_fraction), 1)
173 |                 train_examples = examples[:-num_test]
174 |                 test_examples = examples[-num_test:]
175 |                 assert len(test_examples) == num_test
176 |                 assert len(train_examples) >= len(test_examples)
177 |                 add_examples(all_test_examples, test_examples)
178 |             add_examples(all_train_examples, train_examples)
179 |     return users_and_plays, all_train_examples, all_test_examples
180 | 
181 | 
182 | def _write_data_by_character(examples, output_directory):
183 |     """Writes a collection of data files by play & character."""
184 |     if not os.path.exists(output_directory):
185 |         os.makedirs(output_directory)
186 |     for character_name, sound_bites in examples.items():
187 |         filename = os.path.join(output_directory, character_name + '.txt')
188 |         with open(filename, 'w') as output:
189 |             for sound_bite in sound_bites:
190 |                 output.write(sound_bite + '\n')
191 | 
192 | 
193 | def main(argv):
194 |     print('Splitting .txt data between users')
195 |     input_filename = argv[0]
196 |     with open(input_filename, 'r') as input_file:
197 |         shakespeare_full = input_file.read()
198 |     plays, discarded_lines = _split_into_plays(shakespeare_full)
199 |     print('Discarded %d lines' % len(discarded_lines))
200 |     users_and_plays, all_examples, _ = _get_train_test_by_character(plays, test_fraction=-1.0)
201 |     output_directory = argv[1]
202 |     with open(os.path.join(output_directory, 'users_and_plays.json'), 'w') as ouf:
203 |         json.dump(users_and_plays, ouf)
204 |     _write_data_by_character(all_examples,
205 |                              os.path.join(output_directory,
206 |                                           'by_play_and_character/'))
207 | 
208 | 
209 | if __name__ == '__main__':
210 |     main(sys.argv[1:])


--------------------------------------------------------------------------------
/graph_utils/utils/matching_decomposition.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx
  2 | 
  3 | 
  4 | def matching_decomposition(graph):
  5 |     """
  6 |     Implementing Misra & Gries edge coloring algorithm;
  7 |     The coloring produces uses at most Delta +1 colors, where Delta  is the maximum degree of the graph;
  8 |     By Vizing's theorem it uses at most one color more than the optimal for all others;
  9 |      See http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.4452 for details
 10 |     :param graph: nx.Graph()
 11 |     :return: - List of matching; each matching is an nx.Graph() representing a sub-graph of "graph"
 12 |              - list of laplacian matrices, a laplacian matrix for each matching
 13 |     """
 14 |     # Initialize the graph with a greedy coloring of less then degree + 1 colors
 15 |     nx.set_edge_attributes(graph, None, 'color')
 16 | 
 17 |     # edge coloring
 18 |     for u, v in graph.edges:
 19 |         if u != v:
 20 |             graph = color_edge(graph, u, v)
 21 | 
 22 |     # matching decomposition
 23 |     matching_list = get_matching_list_from_graph(graph)
 24 | 
 25 |     # compute laplacian matrices
 26 |     laplacian_matrices = [nx.laplacian_matrix(matching, nodelist=graph.nodes(), weight=None).toarray()
 27 |                           for matching in matching_list]
 28 | 
 29 |     return matching_list, laplacian_matrices
 30 | 
 31 | 
 32 | def get_matching_list_from_graph(graph):
 33 |     """
 34 |     
 35 |     :param graph: nx.Graph(); each edge should have an attribute "color"
 36 |     :return: List of matching; each matching is an nx.Graph() representing a sub-graph of "graph"
 37 |     """
 38 |     degree = get_graph_degree(graph)
 39 |     colors = [i for i in range(degree + 1)]
 40 |     
 41 |     matching_list = [nx.Graph() for _ in colors]
 42 | 
 43 |     for (u, v, data) in graph.edges(data=True):
 44 |         color = data["color"]
 45 |         idx = colors.index(color)
 46 |         matching_list[idx].add_edges_from([(u, v, data)])
 47 | 
 48 |     return matching_list
 49 | 
 50 | 
 51 | def color_edge(graph, u, v):
 52 |     """
 53 |     color edge (u, v) if  uncolored following Misra & Gries procedure;
 54 |     :param graph: nx.Graph(); each edge should have an attribute "color"
 55 |     :param u: node in "graph"
 56 |     :param v: node in "graph"
 57 |     :return: nx.Graph() where edge (u, v) has an attribute "color", the generated coloring is valid
 58 |     """
 59 |     degree = get_graph_degree(graph)
 60 |     colors = [i for i in range(degree + 1)]
 61 | 
 62 |     if graph.get_edge_data(u, v)["color"] is not None:
 63 |         return graph
 64 | 
 65 |     else:
 66 |         maximal_fan = get_maximal_fan(graph, u, v)
 67 | 
 68 |         for color in colors:
 69 |             if is_color_free(graph, u, color):
 70 |                 c = color
 71 |                 break
 72 | 
 73 |         for color in colors:
 74 |             if is_color_free(graph, maximal_fan[-1], color):
 75 |                 d = color
 76 |                 break
 77 | 
 78 |         cd_path = get_cd_path(graph, u, c, d)
 79 |         
 80 |         sub_fan = get_sub_fan(graph, maximal_fan, u, v, cd_path, d)
 81 | 
 82 |         graph = invert_cd_path(graph, cd_path, c, d)
 83 | 
 84 |         graph = rotate_fan(graph, sub_fan, u)
 85 | 
 86 |         graph.add_edge(u, sub_fan[-1], color=d)
 87 | 
 88 |         return graph
 89 | 
 90 | 
 91 | def get_maximal_fan(graph, u, v):
 92 |     """
 93 |     constructs a maximal fan starting from v;
 94 |     A fan of a vertex u is a sequence of vertices F[1:k] that satisfies the following conditions:
 95 |         1) F[1:k] is a non-empty sequence of distinct neighbors of u
 96 |         2) (F[1],u) in  E(G) is uncolored
 97 |         3) The color of (F[i+1],u) is free on F[i] for 1 ≤ i < k
 98 |     A fan is maximal if it can't be extended;
 99 |     :param graph: nx.Graph(); each edge should have an attribute "color"
100 |     :param u: node in "graph"
101 |     :param v: node in "graph"
102 |     :return: list of nodes of "graph" representing a maximal fan starting from "v"
103 |     """
104 |     maximal_fan = [v]
105 | 
106 |     is_maximal = False
107 | 
108 |     while not is_maximal:
109 |         is_maximal = True
110 |         for node in graph.neighbors(u):
111 |             edge_color = graph.get_edge_data(u, node)["color"]
112 |             if (node not in maximal_fan) and \
113 |                     is_color_free(graph, maximal_fan[-1], edge_color) and \
114 |                     (edge_color is not None):
115 |                 maximal_fan.append(node)
116 |                 is_maximal = False
117 |                 break
118 | 
119 |     return maximal_fan
120 | 
121 | 
122 | def get_sub_fan(graph, maximal_fan, u, v, cd_path, d):
123 |     """
124 |     constructs a sub-fan of "maximal_fan" such that color `d` is free on its last node;
125 |     :param graph: nx.Graph(); each edge should have an attribute "color"
126 |     :param maximal_fan: maxmial resulting from `get_maximal_fan`
127 |     :param u: node in "graph"
128 |     :param v: node in "graph"
129 |     :param cd_path: nx.Graph() representing a path with edges colored only with c and d
130 |     :param d: integer representing a color
131 |     :return: sub-list of maximal fan such that its last node is free on d 
132 |     """
133 |     sub_fan = [v]
134 |     for node in maximal_fan[1:]:
135 |         if graph.get_edge_data(u, node)['color'] == d:
136 |             break
137 |         else:
138 |             sub_fan.append(node)
139 | 
140 |     if cd_path.has_node(sub_fan[-1]):
141 |         sub_fan = maximal_fan
142 | 
143 |     return sub_fan
144 | 
145 | 
146 | def rotate_fan(graph, fan, u):
147 |     """
148 | 
149 |     :param graph: nx.Graph(); each edge should have an attribute "color"
150 |     :param fan: list of nodes of "graph" representing a fan
151 |     :param u: node in "graph"
152 |     :return:
153 |     """
154 |     for idx in range(len(fan)-1):
155 |         current_edge = (u, fan[idx])
156 |         next_edge = (u, fan[idx+1])
157 |         color = graph.get_edge_data(*next_edge)["color"]
158 |         graph.add_edge(*current_edge, color=color)
159 | 
160 |     graph.add_edge(u, fan[-1], color=None)
161 | 
162 |     return graph
163 | 
164 | 
165 | def is_color_free(graph, node, color):
166 |     """
167 |     check if the color is free on a vertex;
168 |     a color is said to be incident on a vertex if an edge incident on that vertex has that color;
169 |      otherwise, the color is free on that vertex
170 |     :param graph: graph: nx.Graph(); each edge should have an attribute "color"
171 |     :param node: node of "graph"
172 |     :param color: integer smaller then the degree of "graph" or None
173 |     :return: boolean True if "color" is free on "node" and False otherwise
174 |     """
175 |     for neighbor in graph.neighbors(node):
176 |         current_color = graph.get_edge_data(node, neighbor)["color"]
177 | 
178 |         if current_color == color:
179 |             return False
180 | 
181 |     return True
182 | 
183 | 
184 | def get_cd_path(graph, u, c, d):
185 |     """
186 |     Construct cd-path; a path that includes vertex u, has edges colored only c or d , and is maximal
187 |     :param graph: graph: nx.Graph(); each edge should have an attribute "color"
188 |     :param u: node of "graph"
189 |     :param c:  integer smaller then the degree of "graph" or None; represents a color
190 |     :param d: integer smaller then the degree of "graph" or None; represents a color
191 |     :return: List of nodes of "graph" representing a cd-path
192 |     """
193 |     path = nx.Graph()
194 | 
195 |     current_color = d
196 |     current_node = u
197 |     is_maximal = False
198 | 
199 |     while not is_maximal:
200 |         is_maximal = True
201 |         for neighbor in graph.neighbors(current_node):
202 | 
203 |             try:
204 |                 color = graph.get_edge_data(current_node, neighbor)["color"]
205 |             except:
206 |                 color = None
207 | 
208 |             if color == current_color:
209 |                 path.add_edge(current_node, neighbor)
210 |                 current_node = neighbor
211 |                 is_maximal = False
212 |                 if current_color == c:
213 |                     current_color = d
214 |                 else:
215 |                     current_color = c
216 |                 break
217 | 
218 |     return path
219 | 
220 | 
221 | def invert_cd_path(graph, path, c, d):
222 |     """
223 |     Switch the colors of the edges on the cd-path: c to d and d to c.
224 |     :param graph: nx.Graph(); each edge should have an attribute "color"
225 |     :param path: nx.Graph() representing cd-path
226 |     :param c: integer smaller then the degree of "graph" or None; represents a color
227 |     :param d: integer smaller then the degree of "graph" or None; represents a color
228 |     :return: graph with switched colors
229 |     """
230 |     for edge in path.edges:
231 |         current_color = graph.get_edge_data(*edge)["color"]
232 |         if current_color == c:
233 |             graph.add_edge(*edge, color=d)
234 |         if current_color == d:
235 |             graph.add_edge(*edge, color=c)
236 | 
237 |     return graph
238 | 
239 | 
240 | def get_graph_degree(graph):
241 |     """
242 |     get maximal degree of nodes of "graph"
243 |     :param graph: nx.Graph()
244 |     :return: integer representing the degree of the graph
245 |     """
246 |     degrees = graph.degree()
247 | 
248 |     graph_degree = 0
249 |     for _, degree in degrees:
250 |         if degree > graph_degree:
251 |             graph_degree = degree
252 | 
253 |     return graph_degree
254 | 
255 | 
256 | def is_coloring_valid(graph):
257 |     """
258 |     check if the coloring of a graph is valid,
259 |     i.e., two adjacent edges shouldn't have the same color;
260 |     :param graph: nx.Graph() each edge should have an attribute 'color'
261 |     """
262 |     for u, v, data in graph.edges(data=True):
263 |         color = data['color']
264 | 
265 |         if color is None: continue
266 | 
267 |         for _, v_, data_ in graph.edges(u, data=True):
268 |             if v_ != v and data_['color'] == color:
269 |                 return False
270 | 
271 |         for _, u_, data_ in graph.edges(v, data=True):
272 |             if u_ != u and data_['color'] == color:
273 |                 return False
274 | 
275 |     return True 
276 | 
277 | 
278 | def is_coloring_correct(graph):
279 |     """
280 |     check if the coloring of a graph is correct,
281 |     i.e., two adjacent edges shouldn't have the same color and all edges are colored;
282 |     :param graph: nx.Graph() each edge should have an attribute 'color'
283 |     """
284 |     if is_coloring_valid(graph): 
285 |         for u, v, data in graph.edges(data=True):
286 |             color = data['color']
287 | 
288 |             if color is None: continue
289 | 
290 |             for _, v_, data_ in graph.edges(u, data=True):
291 |                 if v_ != v and data_['color'] == color:
292 |                     return False
293 | 
294 |             for _, u_, data_ in graph.edges(v, data=True):
295 |                 if u_ != u and data_['color'] == color:
296 |                     return False
297 | 
298 |         return True
299 |     else: return False 
300 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/graph_utils/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import csv
  3 | import shutil
  4 | import random
  5 | 
  6 | import networkx as nx
  7 | import numpy as np
  8 | 
  9 | from .evaluate_throughput import evaluate_cycle_time
 10 | from .mbst import cube_algorithm, delta_prim
 11 | from .tsp_christofides import christofides_tsp
 12 | from .matcha import RandomTopologyGenerator
 13 | from .matching_decomposition import get_matching_list_from_graph
 14 | 
 15 | 
 16 | def get_connectivity_graph(underlay, default_capacity=1e9):
 17 |     """
 18 | 
 19 |     :param underlay:
 20 |     :param default_capacity:
 21 |     :return:
 22 |     """
 23 |     connectivity_graph = nx.Graph()
 24 |     connectivity_graph.add_nodes_from(underlay.nodes(data=True))
 25 | 
 26 |     dijkstra_result = nx.all_pairs_dijkstra(underlay.copy(), weight="distance")
 27 | 
 28 |     for node, (weights_dict, paths_dict) in dijkstra_result:
 29 |         for neighbour in paths_dict.keys():
 30 |             if node != neighbour:
 31 |                 path = paths_dict[neighbour]
 32 | 
 33 |                 distance = 0.
 34 |                 for idx in range(len(path) - 1):
 35 |                     u = path[idx]
 36 |                     v = path[idx + 1]
 37 | 
 38 |                     data = underlay.get_edge_data(u, v)
 39 |                     distance += data["distance"]
 40 | 
 41 |                 available_bandwidth = default_capacity / (len(path) - 1)
 42 | 
 43 |                 latency = 0.0085 * distance + 4
 44 | 
 45 |                 connectivity_graph.add_edge(node, neighbour, availableBandwidth=available_bandwidth, latency=latency)
 46 | 
 47 |     return connectivity_graph
 48 | 
 49 | 
 50 | def add_upload_download_delays(overlay, computation_time, model_size):
 51 |     """
 52 |     Takes as input an nx.Graph(), each edge should have attributes "latency" and "availableBandwidth";
 53 |     each node should have attribute "uploadDelay" and "downloadDelay";
 54 |     The weight (delay) of edge (i, j) is computed as:
 55 |     d(i, j) = computation_time + latency(i, j) + max(M/[availableBandwidth(i, j), "uploadDelay", "downloadDelay"]$$
 56 |     :param overlay:
 57 |     :param computation_time:
 58 |     :param model_size:
 59 |     :return:
 60 |     """
 61 |     overlay = overlay.to_directed()
 62 | 
 63 |     out_degree_dict = dict(overlay.out_degree)
 64 |     in_degree_dict = dict(overlay.in_degree)
 65 | 
 66 |     for u, v, data in overlay.edges(data=True):
 67 |         upload_delay = out_degree_dict[u] * overlay.nodes[u]["uploadDelay"]
 68 |         download_delay = in_degree_dict[v] * overlay.nodes[v]["downloadDelay"]
 69 | 
 70 |         weight = computation_time + data["latency"] +\
 71 |             max(upload_delay, download_delay, model_size/data["availableBandwidth"])
 72 | 
 73 |         overlay.add_edge(u, v, weight=weight)
 74 | 
 75 |     return overlay
 76 | 
 77 | 
 78 | def get_star_overlay(connectivity_graph, centrality):
 79 |     """
 80 |     Generate server connectivity graph given an underlay topology represented as an nx.Graph
 81 |     :param connectivity_graph: nx.Graph() object, each edge should have availableBandwidth:
 82 |      "latency", "availableBandwidth" and "weight";
 83 |     :param centrality: mode of centrality to use, possible: "load", "distance", "information", default="load"
 84 |     :return: nx.Graph()
 85 |     """
 86 |     if centrality == "distance":
 87 |         centrality_dict = nx.algorithms.centrality.closeness_centrality(connectivity_graph, distance="latency")
 88 |         server_node = max(centrality_dict, key=centrality_dict.get)
 89 | 
 90 |     elif centrality == "information":
 91 |         centrality_dict = nx.algorithms.centrality.information_centrality(connectivity_graph, weight="latency")
 92 |         server_node = max(centrality_dict, key=centrality_dict.get)
 93 | 
 94 |     else:
 95 |         # centrality = load_centrality
 96 |         centrality_dict = nx.algorithms.centrality.load_centrality(connectivity_graph, weight="latency")
 97 |         server_node = max(centrality_dict, key=centrality_dict.get)
 98 | 
 99 |     weights, paths = nx.single_source_dijkstra(connectivity_graph, source=server_node, weight="weight")
100 | 
101 |     star = nx.Graph()
102 |     star.add_nodes_from(connectivity_graph.nodes(data=True))
103 | 
104 |     for node in paths.keys():
105 |         if node != server_node:
106 | 
107 |             latency = 0.
108 |             available_bandwidth = 1e32
109 |             for idx in range(len(paths[node]) - 1):
110 |                 u = paths[node][idx]
111 |                 v = paths[node][idx + 1]
112 | 
113 |                 data = connectivity_graph.get_edge_data(u, v)
114 |                 latency += data["latency"]
115 |                 available_bandwidth = data["availableBandwidth"]
116 | 
117 |             star.add_edge(server_node, node, availableBandwidth=available_bandwidth, latency=latency)
118 | 
119 |     return star
120 | 
121 | 
122 | def get_ring_overlay(connectivity_graph, computation_time, model_size):
123 |     """
124 | 
125 |     :param connectivity_graph:
126 |     :param computation_time:
127 |     :param model_size:
128 |     :return:
129 |     """
130 |     for u, v, data in connectivity_graph.edges(data=True):
131 |         upload_delay = connectivity_graph.nodes[u]["uploadDelay"]
132 |         download_delay = connectivity_graph.nodes[v]["downloadDelay"]
133 | 
134 |         weight = computation_time + data["latency"] + max(upload_delay,
135 |                                                           download_delay,
136 |                                                           model_size / data["availableBandwidth"])
137 | 
138 |         connectivity_graph.add_edge(u, v, weight=weight)
139 | 
140 |     adjacency_matrix = nx.adjacency_matrix(connectivity_graph, weight="weight").toarray()
141 |     tsp_nodes = christofides_tsp(adjacency_matrix)
142 | 
143 |     ring = nx.DiGraph()
144 |     ring.add_nodes_from(connectivity_graph.nodes(data=True))
145 | 
146 |     for idx in range(len(tsp_nodes) - 1):
147 |         # get the label of source and sink nodes from the original graph
148 |         source_node = list(connectivity_graph.nodes())[tsp_nodes[idx]]
149 |         sink_node = list(connectivity_graph.nodes())[tsp_nodes[idx + 1]]
150 | 
151 |         ring.add_edge(source_node, sink_node,
152 |                       latency=connectivity_graph.get_edge_data(source_node, sink_node)['latency'],
153 |                       availableBandwidth=connectivity_graph.get_edge_data(source_node, sink_node)['availableBandwidth'],
154 |                       weight=connectivity_graph.get_edge_data(source_node, sink_node)['weight'])
155 | 
156 |     # add final link to close the circuit
157 |     source_node = list(connectivity_graph.nodes())[tsp_nodes[-1]]
158 |     sink_node = list(connectivity_graph.nodes())[tsp_nodes[0]]
159 |     ring.add_edge(source_node, sink_node,
160 |                   latency=connectivity_graph.get_edge_data(source_node, sink_node)['latency'],
161 |                   availableBandwidth=connectivity_graph.get_edge_data(source_node, sink_node)['availableBandwidth'],
162 |                   weight=connectivity_graph.get_edge_data(source_node, sink_node)['weight'])
163 | 
164 |     return ring
165 | 
166 | 
167 | def generate_random_ring(list_of_nodes):
168 |     """
169 |     Generate a random ring graph connecting a list of nodes
170 |     :param list_of_nodes:
171 |     :return: nx.DiGraph()
172 |     """
173 |     ring = nx.DiGraph()
174 | 
175 |     ring.add_nodes_from(list_of_nodes)
176 | 
177 |     random.shuffle(list_of_nodes)
178 | 
179 |     for idx in range(len(list_of_nodes) - 1):
180 |         # get the label of source and sink nodes from the original graph
181 |         source_node = list_of_nodes[idx]
182 |         sink_node = list_of_nodes[idx + 1]
183 | 
184 |         ring.add_edge(source_node, sink_node)
185 | 
186 |     # add final link to close the circuit
187 |     source_node = list_of_nodes[-1]
188 |     sink_node = list_of_nodes[0]
189 |     ring.add_edge(source_node, sink_node)
190 | 
191 |     mixing_matrix = nx.adjacency_matrix(ring, weight=None).todense().astype(np.float64)
192 | 
193 |     mixing_matrix += np.eye(mixing_matrix.shape[0])
194 |     mixing_matrix *= 0.5
195 | 
196 |     return nx.from_numpy_matrix(mixing_matrix, create_using=nx.DiGraph())
197 | 
198 | 
199 | def get_delta_mbst_overlay(connectivity_graph, computation_time, model_size):
200 |     """
201 | 
202 |     :param connectivity_graph:
203 |     :param computation_time:
204 |     :param model_size:
205 |     :return:
206 |     """
207 |     for u, v, data in connectivity_graph.edges(data=True):
208 |         weight = computation_time + data["latency"] + \
209 |                  max(connectivity_graph.nodes[u]["uploadDelay"], connectivity_graph.nodes[v]["downloadDelay"],
210 |                      model_size / data["availableBandwidth"]) + \
211 |                  max(connectivity_graph.nodes[v]["uploadDelay"], connectivity_graph.nodes[u]["downloadDelay"],
212 |                      model_size / data["availableBandwidth"])
213 | 
214 |         connectivity_graph.add_edge(u, v, weight=weight, latency=data["latency"],
215 |                                     availableBandwidth=data["availableBandwidth"])
216 | 
217 |     for u in connectivity_graph.nodes:
218 |         connectivity_graph.add_edge(u, u, weight=0, latency=0, availableBandwidth=1e32)
219 | 
220 |     best_result = cube_algorithm(connectivity_graph.copy()).to_directed()
221 | 
222 |     for u, v in best_result.edges:
223 |         best_result.add_edge(u, v,
224 |                              latency=connectivity_graph.get_edge_data(u, v)['latency'],
225 |                              availableBandwidth=connectivity_graph.get_edge_data(u, v)['availableBandwidth'])
226 | 
227 |     best_cycle_time, _, _ = evaluate_cycle_time(add_upload_download_delays(best_result, computation_time, model_size))
228 |     best_delta = 2
229 | 
230 |     n_nodes = connectivity_graph.number_of_nodes()
231 |     for delta in range(2, n_nodes):
232 |         result = delta_prim(connectivity_graph.copy(), delta).to_directed()
233 | 
234 |         for u, v, data in result.edges(data=True):
235 |             weight = data["weight"] - (result.nodes[u]["uploadDelay"] + result.nodes[v]["downloadDelay"])
236 | 
237 |             result.add_edge(u, v, weight=weight,
238 |                             latency=connectivity_graph.get_edge_data(u, v)['latency'],
239 |                             availableBandwidth=connectivity_graph.get_edge_data(u, v)['availableBandwidth'])
240 | 
241 |         cycle_time, _, _ = evaluate_cycle_time(add_upload_download_delays(result, computation_time, model_size))
242 | 
243 |         if cycle_time < best_cycle_time:
244 |             best_result = result
245 |             best_cycle_time = cycle_time
246 |             best_delta = delta
247 | 
248 |     return best_result, best_cycle_time, best_delta
249 | 
250 | 
251 | def get_matcha_cycle_time(underlay, connectivity_graph, computation_time, model_size, communication_budget):
252 |     """
253 | 
254 |     :param underlay:
255 |     :param connectivity_graph:
256 |     :param computation_time:
257 |     :param model_size:
258 |     :param communication_budget:
259 |     :return:
260 |     """
261 |     path_to_save_network = os.path.join("temp", "colored_network.gml")
262 |     path_to_matching_history_file = os.path.join("temp", "matching_history.csv")
263 | 
264 |     try:
265 |         shutil.rmtree("temp")
266 |     except FileNotFoundError:
267 |         pass
268 | 
269 |     os.makedirs("temp", exist_ok=True)
270 | 
271 |     topology_generator = RandomTopologyGenerator(underlay.copy(),
272 |                                                  communication_budget,
273 |                                                  network_save_path=path_to_save_network,
274 |                                                  path_to_history_file=path_to_matching_history_file)
275 | 
276 |     n_rounds = 1000
277 |     np.random.seed(0)
278 |     for _ in range(n_rounds):
279 |         topology_generator.step()
280 | 
281 |     path_to_colored_network = os.path.join("temp", "colored_network.gml")
282 |     path_to_matching_history_file = os.path.join("temp", "matching_history.csv")
283 | 
284 |     colored_network = nx.read_gml(path_to_colored_network)
285 |     matching_list = get_matching_list_from_graph(colored_network)
286 | 
287 |     simulated_time = np.zeros(n_rounds)
288 |     with open(path_to_matching_history_file) as csv_file:
289 |         csv_reader = csv.reader(csv_file, delimiter=' ')
290 | 
291 |         for ii, row in enumerate(csv_reader):
292 |             overlay = nx.Graph()
293 |             overlay.add_nodes_from(connectivity_graph.nodes(data=True))
294 | 
295 |             current_matching_activations = row
296 |             for matching_idx, matching_activation in enumerate(current_matching_activations):
297 |                 if int(matching_activation):
298 |                     overlay = nx.compose(overlay, matching_list[matching_idx])
299 | 
300 |             for u, v in overlay.edges():
301 |                 overlay.add_edge(u, v,
302 |                                  latency=connectivity_graph.get_edge_data(u, v)["latency"],
303 |                                  availableBandwidth=connectivity_graph.get_edge_data(u, v)['availableBandwidth']
304 |                                  )
305 | 
306 |             if nx.is_empty(overlay):
307 |                 # If overlay is empty, then no communication cost is added
308 |                 simulated_time[:, ii] = computation_time
309 | 
310 |             else:
311 |                 overlay = add_upload_download_delays(overlay, computation_time, model_size)
312 | 
313 |                 cycle_time = 0
314 |                 for u, v, data in overlay.edges(data=True):
315 |                     if data["weight"] > cycle_time:
316 |                         cycle_time = data["weight"]
317 | 
318 |                 simulated_time[ii] = cycle_time
319 | 
320 |     simulated_time = simulated_time.cumsum()
321 | 
322 |     try:
323 |         shutil.rmtree("temp")
324 |     except FileNotFoundError:
325 |         pass
326 | 
327 |     return simulated_time[-1] / (n_rounds - 1)
328 | 


--------------------------------------------------------------------------------