├── requirements.txt
├── data
├── synthetic
│ ├── README.md
│ ├── generate_data.sh
│ ├── split_data.py
│ └── generate_data.py
├── femnist
│ ├── README.md
│ ├── group_by_writer.py
│ ├── match_hashes.py
│ ├── split_data.sh
│ ├── get_hashes.py
│ ├── preprocess.sh
│ ├── get_file_dirs.py
│ └── split_data.py
├── sent140
│ ├── README.md
│ ├── combine_data.py
│ ├── preprocess.sh
│ ├── split_data.sh
│ └── split_data.py
├── shakespeare
│ ├── README.md
│ ├── preprocess.sh
│ ├── split_data.sh
│ ├── split_data.py
│ └── preprocess_shakespeare.py
├── inaturalist
│ ├── README.md
│ ├── preprocess.sh
│ └── split_data.py
└── README.md
├── utils
├── metrics.py
├── logger.py
├── optim.py
└── args.py
├── graph_utils
├── generate_all_networks.sh
├── data
│ ├── Read_me_gml.txt
│ └── gaia.gml
├── README.md
├── show_networks.py
├── utils
│ ├── mbst.py
│ ├── evaluate_throughput.py
│ ├── tsp_christofides.py
│ ├── matcha.py
│ ├── matching_decomposition.py
│ └── utils.py
├── time_simulator.py
└── generate_networks.py
├── .gitignore
├── loaders
├── synthetic.py
├── sent140.py
├── shakespeare.py
├── femnist.py
└── inaturalist.py
├── reproduce_results.py
├── make_table3.py
├── communication_module
├── worker.py
└── manager.py
├── models
├── inaturalist
│ └── resnet.py
├── model.py
├── synthetic
│ └── linear.py
├── femnist
│ └── cnn.py
├── sent140
│ └── lstm.py
└── shakespeare
│ └── gru.py
├── main.py
├── make_figure2.py
├── README.md
└── LICENSE
/requirements.txt:
--------------------------------------------------------------------------------
1 | cvxpy
2 | tensorboard
3 | geopy
4 | PIL
5 | scikit-learn
6 | networkx == 2.4
7 | numpy
8 | torch
9 | torchvision
10 | scipy
11 | matplotlib
12 | jupyter
13 | torchtext
14 | spacy
15 | mplleaflet
--------------------------------------------------------------------------------
/data/synthetic/README.md:
--------------------------------------------------------------------------------
1 | # Synthetic Dataset
2 |
3 | ## Setup Instructions
4 |
5 | Run generate_data.sh with a choice of the following tags:
6 |
7 | - ```-nw```: number of workers, written as integer
8 | - ```-nc``` : number of classes, written as integer
9 | - ```-dim```: dimension of the data, written as integer
10 | - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9
11 | - ```--seed``` := seed to be used before random sampling of data
12 |
13 | i.e.
14 | - ```./generate_data.sh -s -nw 11 -nc 2 -dim 10 -tf 0.8 -seed 1234```
15 |
--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def binary_accuracy(preds, y):
5 | """
6 |
7 | :param preds:
8 | :param y:
9 | :return:
10 | """
11 | # round predictions to the closest integer
12 | rounded_preds = torch.round(torch.sigmoid(preds))
13 | correct = (rounded_preds == y).float()
14 | acc = correct.sum() / len(correct)
15 | return acc
16 |
17 |
18 | def accuracy(preds, y):
19 | """
20 |
21 | :param preds:
22 | :param y:
23 | :return:
24 | """
25 | _, predicted = torch.max(preds, 1)
26 | correct = (predicted == y).float()
27 | acc = correct.sum() / len(correct)
28 | return acc
--------------------------------------------------------------------------------
/data/femnist/README.md:
--------------------------------------------------------------------------------
1 | # FEMNIST Dataset
2 |
3 | ## Setup Instructions
4 |
5 | Run preprocess.sh with a choice of the following tags:
6 |
7 | - ```-nw```: number of workers, written as integer
8 | - ```-s``` := 'iid' to sample in an i.i.d. manner, or 'niid' to sample
9 | in a non-i.i.d. manner; more information on i.i.d. versus non-i.i.d.
10 | is included in the 'Notes' section
11 | - ```--sf``` := fraction of data to sample, written as a decimal;
12 | default is 0.1
13 | - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9
14 | - ```--seed``` := seed to be used before random sampling of data
15 |
16 | i.e.
17 | - ```./preprocess.sh -s iid -nw 11--sf 1.0 -t sample``` (full-sized
18 | dataset partitioned on Gaia)
--------------------------------------------------------------------------------
/data/sent140/README.md:
--------------------------------------------------------------------------------
1 | # Sentiment140 Dataset
2 |
3 | ## Setup Instructions
4 |
5 | Run preprocess.sh with a choice of the following tags:
6 |
7 | - ```-nw```: number of workers, written as integer
8 | - ```-s``` := 'iid' to sample in an i.i.d. manner, or 'niid' to sample
9 | in a non-i.i.d. manner; more information on i.i.d. versus non-i.i.d.
10 | is included in the 'Notes' section
11 | - ```--sf``` := fraction of data to sample, written as a decimal;
12 | default is 0.1
13 | - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9
14 | - ```--seed``` := seed to be used before random sampling of data
15 |
16 | i.e.
17 | - ```./preprocess.sh -s iid -nw 11--sf 1.0 -t sample``` (full-sized
18 | dataset partitioned on Gaia)
19 |
20 |
21 |
--------------------------------------------------------------------------------
/data/shakespeare/README.md:
--------------------------------------------------------------------------------
1 | # Shakespeare Dataset
2 |
3 | ## Setup Instructions
4 |
5 | Run preprocess.sh with a choice of the following tags:
6 |
7 | - ```-nw```: number of workers, written as integer
8 | - ```-s``` := 'iid' to sample in an i.i.d. manner, or 'niid' to sample
9 | in a non-i.i.d. manner; more information on i.i.d. versus non-i.i.d.
10 | is included in the 'Notes' section
11 | - ```--sf``` := fraction of data to sample, written as a decimal;
12 | default is 0.1
13 | - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9
14 | - ```--seed``` := seed to be used before random sampling of data
15 |
16 | i.e.
17 | - ```./preprocess.sh -s iid -nw 11--sf 1.0 -t sample``` (full-sized
18 | dataset partitioned on Gaia)
19 |
20 |
21 |
--------------------------------------------------------------------------------
/graph_utils/generate_all_networks.sh:
--------------------------------------------------------------------------------
1 | echo "################"
2 | echo "gaia"
3 | python generate_networks.py gaia --experiment inaturalist --upload_capacity 1e10 --download_capacity 1e10
4 | echo "################"
5 | echo "amazon_us"
6 | python generate_networks.py amazon_us --experiment inaturalist --upload_capacity 1e10 --download_capacity 1e10
7 | echo "################"
8 | echo "geantdistance"
9 | python generate_networks.py geantdistance --experiment inaturalist --upload_capacity 1e10 --download_capacity 1e10
10 | echo "################"
11 | echo "ebone"
12 | python generate_networks.py ebone --experiment inaturalist --upload_capacity 1e10 --download_capacity 1e10
13 | echo "################"
14 | echo "exodus"
15 | python generate_networks.py exodus --experiment inaturalist --upload_capacity 1e10 --download_capacity 1e10
--------------------------------------------------------------------------------
/data/inaturalist/README.md:
--------------------------------------------------------------------------------
1 | # iNaturalist Dataset
2 |
3 | ## Setup Instructions
4 |
5 | * Download iNaturalist
6 | [here](https://storage.googleapis.com/inat_data_2018_eu/train_val2018.tar.gz),
7 | unzip it and place its content in ``raw_data`` folder.
8 |
9 | * Run preprocess.sh with a choice of the following tags:
10 |
11 | - ```--network```:= name of the network to use, should be present in
12 | ``/graph_utils/data``, default is us-amzaon
13 | - ```--sf``` := fraction of data to sample, written as a decimal;
14 | default is 0.1
15 | - ```--tf``` := fraction of data in training set, written as a decimal; default is 0.9
16 | - ```--seed``` := seed to be used before random sampling of data
17 |
18 | i.e.
19 | - ```./preprocess.sh --sf 1.0 --tf 0.9 --seed 1234``` (full-sized
20 | dataset partitioned on Gaia)
21 |
--------------------------------------------------------------------------------
/graph_utils/data/Read_me_gml.txt:
--------------------------------------------------------------------------------
1 | In GML file the distance is the latency indicated in the original files (latencies.intra).
2 | The details of each instance is as follows (which is a little bit different from the statistics in the paper):
3 | num_of_nodes num_of_links
4 | 1221 108 153
5 | 1239 315 972
6 | 1755 87 161
7 | 3257 161 328
8 | 3967 79 147
9 | 6461 141 374
10 | 1755+3967 166 327
11 |
12 |
13 | The combined one "1755+3967": 19 random edges are added. The latency is calculated by distance which is normalized compared with the original maximum latency weight.
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import json
4 |
5 |
6 | class Logger(object):
7 | def __init__(self, logdir):
8 | self.logdir = logdir
9 |
10 | def write_model(self, model_params, iteration=0, mode="json"):
11 | """
12 | save model parameters as .pt file
13 | :param model_params: torch.tensor
14 | :param iteration: integer
15 | :param mode:
16 | """
17 | if mode == "torch":
18 | file_path = os.path.join(self.logdir,
19 | "model_{}.pt".format(iteration))
20 | torch.save(model_params, file_path)
21 |
22 | elif mode == "json":
23 | file_path = os.path.join(self.logdir,
24 | "model_{}.json".format(iteration))
25 |
26 | with open(file_path, "w") as f:
27 | f.write(json.dumps(model_params.tolist()))
--------------------------------------------------------------------------------
/data/femnist/group_by_writer.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 |
4 |
5 | def load_obj(name):
6 | with open(name + '.pkl', 'rb') as f:
7 | return pickle.load(f)
8 |
9 |
10 | def save_obj(obj, name):
11 | with open(name + '.pkl', 'wb') as f:
12 | pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
13 |
14 |
15 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
16 |
17 | wwcd = os.path.join('intermediate', 'write_with_class')
18 | write_class = load_obj(wwcd)
19 |
20 | writers = [] # each entry is a (writer, [list of (file, class)]) tuple
21 | cimages = []
22 | (cw, _, _) = write_class[0]
23 | for (w, f, c) in write_class:
24 | if w != cw:
25 | writers.append((cw, cimages))
26 | cw = w
27 | cimages = [(f, c)]
28 | cimages.append((f, c))
29 | writers.append((cw, cimages))
30 |
31 | ibwd = os.path.join('intermediate', 'images_by_writer')
32 | save_obj(writers, ibwd)
--------------------------------------------------------------------------------
/data/sent140/combine_data.py:
--------------------------------------------------------------------------------
1 | """
2 | each row of created .csv file is of the form:
3 | polarity, id, date, query, user, comment, test_or_training
4 | """
5 |
6 | import csv
7 | import os
8 |
9 |
10 | train_file_name = os.path.join('raw_data', 'training.csv')
11 |
12 | training = []
13 | with open(train_file_name, 'rt', encoding='ISO-8859-1') as f:
14 | reader = csv.reader(f)
15 | training = list(reader)
16 |
17 | test_file_name = os.path.join('raw_data', 'test.csv')
18 |
19 | test = []
20 | with open(test_file_name, 'rt', encoding='ISO-8859-1') as f:
21 | reader = csv.reader(f)
22 | test = list(reader)
23 |
24 | out_file_name = os.path.join('raw_data', 'all_data.csv')
25 |
26 | with open(out_file_name, 'w') as f:
27 | writer = csv.writer(f)
28 |
29 | for row in training:
30 | row.append('training')
31 | writer.writerow(row)
32 |
33 | for row in test:
34 | row.append('test')
35 | writer.writerow(row)
--------------------------------------------------------------------------------
/data/shakespeare/preprocess.sh:
--------------------------------------------------------------------------------
1 | if [ ! -d "all_data" ] || [ ! "$(ls -A all_data)" ]; then
2 | if [ ! -d "raw_data" ]; then
3 | mkdir raw_data
4 | fi
5 |
6 | if [ ! -f raw_data/raw_data.txt ]; then
7 | echo "------------------------------"
8 | echo "retrieving raw data"
9 | cd raw_data
10 |
11 | wget http://www.gutenberg.org/files/100/old/1994-01-100.zip
12 | unzip 1994-01-100.zip
13 | rm 1994-01-100.zip
14 | mv 100.txt raw_data.txt
15 |
16 | cd ../
17 | fi
18 | if [ ! -d "raw_data/by_play_and_character" ]; then
19 | echo "dividing txt data between users"
20 | python3 preprocess_shakespeare.py raw_data/raw_data.txt raw_data/
21 | fi
22 | fi
23 | if [ ! -f test/test.json ]; then
24 | echo "------------------------------"
25 | echo "spliting data"
26 | mkdir train
27 | mkdir test
28 |
29 | ./split_data.sh "$@"
30 |
31 | echo "finished splitting data"
32 | fi
--------------------------------------------------------------------------------
/data/femnist/match_hashes.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 |
4 |
5 | def load_obj(name):
6 | with open(name + '.pkl', 'rb') as f:
7 | return pickle.load(f)
8 |
9 |
10 | def save_obj(obj, name):
11 | with open(name + '.pkl', 'wb') as f:
12 | pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
13 |
14 |
15 | cfhd = os.path.join('intermediate', 'class_file_hashes')
16 | wfhd = os.path.join('intermediate', 'write_file_hashes')
17 | class_file_hashes = load_obj(cfhd) # each elem is (class, file dir, hash)
18 | write_file_hashes = load_obj(wfhd) # each elem is (writer, file dir, hash)
19 |
20 | class_hash_dict = {}
21 | for i in range(len(class_file_hashes)):
22 | (c, f, h) = class_file_hashes[len(class_file_hashes)-i-1]
23 | class_hash_dict[h] = (c, f)
24 |
25 | write_classes = []
26 | for tup in write_file_hashes:
27 | (w, f, h) = tup
28 | write_classes.append((w, f, class_hash_dict[h][0]))
29 |
30 | wwcd = os.path.join('intermediate', 'write_with_class')
31 | save_obj(write_classes, wwcd)
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Don't track content of these folders
2 | .idea/
3 | log/
4 |
5 | *.ipynb
6 | *.npy
7 | *.pth
8 | *.csv
9 | *.json
10 |
11 | # Byte-compiled / optimized / DLL files
12 | __pycache__/
13 | *.py[cod]
14 |
15 | # C extensions
16 | *.so
17 |
18 | # Distribution / packaging
19 | .Python
20 | env/
21 | build/
22 | develop-eggs/
23 | dist/
24 | downloads/
25 | eggs/
26 | .eggs/
27 | lib/
28 | lib64/
29 | parts/
30 | sdist/
31 | var/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *,cover
55 |
56 | # Translations
57 | *.mo
58 | *.pot
59 |
60 | # Django stuff:
61 | *.log
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 |
--------------------------------------------------------------------------------
/data/sent140/preprocess.sh:
--------------------------------------------------------------------------------
1 | if [ ! -d "raw_data" ]; then
2 | mkdir raw_data
3 | fi
4 |
5 | if [ ! -f raw_data/test.csv ]; then
6 | echo "------------------------------"
7 | echo "retrieving raw data"
8 |
9 | cd raw_data
10 |
11 | if [ ! -f trainingandtestdata.zip ]; then
12 | wget --no-check-certificate http://cs.stanford.edu/people/alecmgo/trainingandtestdata.zip
13 | fi
14 |
15 | unzip trainingandtestdata.zip
16 |
17 | mv training.1600000.processed.noemoticon.csv training.csv
18 | mv testdata.manual.2009.06.14.csv test.csv
19 |
20 | rm trainingandtestdata.zip
21 |
22 | cd ../
23 | echo "finished retrieving raw data"
24 |
25 | echo "------------------------------"
26 | echo "combining raw_data .csv files"
27 |
28 | python3 combine_data.py
29 |
30 | echo "finished combining raw_data .csv files"
31 |
32 | fi
33 | if [ ! -f test/test.json ]; then
34 | echo "------------------------------"
35 | echo "spliting data"
36 | mkdir train
37 | mkdir test
38 |
39 | ./split_data.sh "$@"
40 |
41 | echo "finished splitting data"
42 | fi
--------------------------------------------------------------------------------
/loaders/synthetic.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import json
3 | from torch.utils.data import Dataset, DataLoader
4 |
5 |
6 | class SyntheticDataset(Dataset):
7 | def __init__(self, json_file, device):
8 | self.device = device
9 |
10 | with open(json_file, "r") as f:
11 | data = json.load(f)
12 |
13 | self.X = torch.tensor(data["x"]).to(device)
14 | self.y = torch.tensor(data["y"]).to(device)
15 |
16 | self.num_classes = data["num_classes"]
17 | if self.num_classes == 2:
18 | self.num_classes = 1
19 | self.dimension = self.X.shape[1]
20 |
21 | def __len__(self):
22 | return self.X.shape[0]
23 |
24 | def __getitem__(self, idx):
25 | return self.X[idx], torch.unsqueeze(self.y[idx], 0)
26 |
27 |
28 | def get_iterator_synthetic(file_path, device, batch_size=1):
29 | """
30 |
31 | :param file_path:
32 | :param device:
33 | :param batch_size
34 | :return:
35 | """
36 | dataset = SyntheticDataset(file_path, device)
37 | iterator = DataLoader(dataset, shuffle=True, batch_size=batch_size)
38 |
39 | return iterator
--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | # Datasets
2 |
3 | ## Leaf Datasets
4 | 1. FEMNIST
5 |
6 | * **Overview:** Image Dataset
7 | * **Details:** 62 different classes (10 digits, 26 lowercase, 26 uppercase), images are 28 by 28 pixels (with option to make them all 128 by 128 pixels), 3500 users
8 | * **Task:** Image Classification
9 |
10 | 2. Sentiment140
11 |
12 | * **Overview:** Text Dataset of Tweets
13 | * **Details** 660120 users
14 | * **Task:** Sentiment Analysis
15 |
16 | 3. Shakespeare
17 |
18 | * **Overview:** Text Dataset of Shakespeare Dialogues
19 | * **Details:** 1129 users
20 | * **Task:** Next-Character Prediction
21 |
22 |
23 | ## Cross-silo Datasets
24 | 1. iNaturlaist Dataset
25 |
26 | * **Overview:** We preprocess the iNaturalist data released by
27 | [inaturalist.org](https://www.inaturalist.org/pages/developers).
28 | * **Details:** 859,000 samples with geo-location information.
29 | * **Task:** Image classification.
30 |
31 | ## References
32 |
33 |
34 | @misc{title={LEAF: A Benchmark for Federated Settings},
35 | author={Sebastian Caldas and Sai Meher Karthik Duddu and Peter Wu and Tian Li and Jakub Konečný and H. Brendan McMahan and Virginia Smith and Ameet Talwalkar},
36 | year={2018},
37 | eprint={1812.01097},
38 | archivePrefix={arXiv},
39 | primaryClass={cs.LG}
40 | }
41 |
--------------------------------------------------------------------------------
/graph_utils/README.md:
--------------------------------------------------------------------------------
1 | # Graph Generator
2 |
3 | Generate different overlays given a connectivity graph. The connectivity
4 | graph should be stored in ``data`` as a ``.gml`` file
5 | ## Setup Instructions
6 |
7 | Run ```generate_network.py``` with a choice of the following arguments:
8 |
9 | - ```name```: name of the used network;
10 | - ```--experiment```: name of the experiment that will be run on the
11 | network; possible are femnist, inaturalist, synthetic, shakespeare,
12 | sent140; if not precised --model_size will be used as model size;
13 | - ``--model_size``: size of the model that will be transmitted on the
14 | network in bit; will be ignored if --experiment is precised; default
15 | is 1e8;
16 | - ``--default_capacity``: default capacity (in bit/s) to use on links
17 | with unknown capacity; default is 1e9;
18 | - ```--centrality``` : Type of centrality to use in order to select the
19 | central node of the network; possible values are: "load", "distance"
20 | and "information"; default is "load";
21 |
22 |
23 | i.e.
24 | - ```python3 generate_network.py amazon_us --experiment inaturalist```
25 | (generate different overlays with Amazon North America as connectivity
26 | graph for iNaturalist experiment)
27 |
28 | To generate all the topologies for all the networks run
29 |
30 | ```
31 | .\generate_all_networks.sh
32 | ```
--------------------------------------------------------------------------------
/data/shakespeare/split_data.sh:
--------------------------------------------------------------------------------
1 | while [[ $# -gt 0 ]]
2 | do
3 | key="$1"
4 |
5 | case $key in
6 | -nw)
7 | NUM_WORKERS="$2"
8 | shift # past argument
9 | shift # past value
10 | ;;
11 | -s)
12 | SAMPLE="$2"
13 | shift # past argument
14 | shift # past value
15 | ;;
16 | --sf)
17 | SFRAC="$2"
18 | shift # past argument
19 | shift # past value
20 | ;;
21 | --tf)
22 | TFRAC="$2"
23 | shift # past argument
24 | shift # past value
25 | ;;
26 | --seed)
27 | SEED="$2"
28 | shift # past argument
29 | ;;
30 | --default)
31 | DEFAULT=YES
32 | shift # past argument
33 | ;;
34 | *) # unknown option
35 | POSITIONAL+=("$1") # save it in an array for later
36 | shift # past argument
37 | ;;
38 | esac
39 | done
40 |
41 | if [ ! -z $NUM_WORKERS ]; then
42 | NUM_WORKERS_TAG="--num_workers $NUM_WORKERS"
43 | fi
44 |
45 | SFRAC_TAG=""
46 | if [ ! -z $SFRAC ]; then
47 | SFRAC_TAG="--s_frac $SFRAC"
48 | fi
49 |
50 | TFRAC_TAG=""
51 | if [ ! -z $TFRAC ]; then
52 | TFRAC_TAG="--tr_frac $TFRAC"
53 | fi
54 |
55 | SEED_TAG=""
56 | if [ ! -z $SEED ]; then
57 | SEED_TAG="--seed $SEED"
58 | fi
59 |
60 | if [ $SAMPLE = "iid" ]; then
61 | python3 split_data.py --iid $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
62 | else
63 | python3 split_data.py $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
64 | fi
--------------------------------------------------------------------------------
/data/femnist/split_data.sh:
--------------------------------------------------------------------------------
1 | while [[ $# -gt 0 ]]
2 | do
3 | key="$1"
4 |
5 | case $key in
6 | -nw)
7 | NUM_WORKERS="$2"
8 | shift # past argument
9 | shift # past value
10 | ;;
11 | -s)
12 | SAMPLE="$2"
13 | shift # past argument
14 | shift # past value
15 | ;;
16 | --sf)
17 | SFRAC="$2"
18 | shift # past argument
19 | shift # past value
20 | ;;
21 | --tf)
22 | TFRAC="$2"
23 | shift # past argument
24 | shift # past value
25 | ;;
26 | --seed)
27 | SEED="$2"
28 | shift # past argument
29 | ;;
30 | --default)
31 | DEFAULT=YES
32 | shift # past argument
33 | ;;
34 | *) # unknown option
35 | POSITIONAL+=("$1") # save it in an array for later
36 | shift # past argument
37 | ;;
38 | esac
39 | done
40 |
41 | NUM_WORKERS_TAG=""
42 | if [ ! -z $NUM_WORKERS ]; then
43 | NUM_WORKERS_TAG="--num_workers $NUM_WORKERS"
44 | fi
45 |
46 | SFRAC_TAG=""
47 | if [ ! -z $SFRAC ]; then
48 | SFRAC_TAG="--s_frac $SFRAC"
49 | fi
50 |
51 | TFRAC_TAG=""
52 | if [ ! -z $TFRAC ]; then
53 | TFRAC_TAG="--tr_frac $TFRAC"
54 | fi
55 |
56 | SEED_TAG=""
57 | if [ ! -z $SEED ]; then
58 | SEED_TAG="--seed $SEED"
59 | fi
60 |
61 | if [ $SAMPLE = "iid" ]; then
62 | python3 split_data.py --iid $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
63 | else
64 | python3 split_data.py $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
65 | fi
--------------------------------------------------------------------------------
/data/sent140/split_data.sh:
--------------------------------------------------------------------------------
1 | while [[ $# -gt 0 ]]
2 | do
3 | key="$1"
4 |
5 | case $key in
6 | -nw)
7 | NUM_WORKERS="$2"
8 | shift # past argument
9 | shift # past value
10 | ;;
11 | -s)
12 | SAMPLE="$2"
13 | shift # past argument
14 | shift # past value
15 | ;;
16 | --sf)
17 | SFRAC="$2"
18 | shift # past argument
19 | shift # past value
20 | ;;
21 | --tf)
22 | TFRAC="$2"
23 | shift # past argument
24 | shift # past value
25 | ;;
26 | --seed)
27 | SEED="$2"
28 | shift # past argument
29 | ;;
30 | --default)
31 | DEFAULT=YES
32 | shift # past argument
33 | ;;
34 | *) # unknown option
35 | POSITIONAL+=("$1") # save it in an array for later
36 | shift # past argument
37 | ;;
38 | esac
39 | done
40 |
41 | NUM_WORKERS_TAG=""
42 | if [ ! -z $NUM_WORKERS ]; then
43 | NUM_WORKERS_TAG="--num_workers $NUM_WORKERS"
44 | fi
45 |
46 | SFRAC_TAG=""
47 | if [ ! -z $SFRAC ]; then
48 | SFRAC_TAG="--s_frac $SFRAC"
49 | fi
50 |
51 | TFRAC_TAG=""
52 | if [ ! -z $TFRAC ]; then
53 | TFRAC_TAG="--tr_frac $TFRAC"
54 | fi
55 |
56 | SEED_TAG=""
57 | if [ ! -z $SEED ]; then
58 | SEED_TAG="--seed $SEED"
59 | fi
60 |
61 | if [ $SAMPLE = "iid" ]; then
62 | python3 split_data.py --iid $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
63 | else
64 | python3 split_data.py $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
65 | fi
--------------------------------------------------------------------------------
/utils/optim.py:
--------------------------------------------------------------------------------
1 | import torch.optim as optim
2 | import numpy as np
3 |
4 |
5 | def get_optimizer(optimizer_name, net, lr_initial=1e-3):
6 | """
7 |
8 | :param optimizer_name:
9 | :param net:
10 | :param lr_initial:
11 | :return:
12 | """
13 | if optimizer_name == "adam":
14 | return optim.Adam([param for param in net.parameters() if param.requires_grad], lr=lr_initial)
15 |
16 | elif optimizer_name == "sgd":
17 | return optim.SGD([param for param in net.parameters() if param.requires_grad], lr=lr_initial)
18 |
19 | else:
20 | raise NotImplementedError("Other optimizer are not implemented")
21 |
22 |
23 | def get_lr_scheduler(optimizer, scheduler_name, epoch_size):
24 | if scheduler_name == "sqrt":
25 | return optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: 1/np.sqrt(x) if x > 0 else 1)
26 |
27 | elif scheduler_name == "linear":
28 | return optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: 1 / x if x > 0 else 1)
29 |
30 | elif scheduler_name == "constant":
31 | return optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: 1)
32 |
33 | elif scheduler_name == "cyclic":
34 | return optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-5, max_lr=0.1)
35 |
36 | elif scheduler_name == "custom":
37 | return optim.lr_scheduler.StepLR(optimizer, step_size=30*int(epoch_size), gamma=0.1)
38 | else:
39 | raise NotImplementedError("Other learning rate schedulers are not implemented")
40 |
41 |
--------------------------------------------------------------------------------
/data/femnist/get_hashes.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import os
3 | import pickle
4 |
5 |
6 | def load_obj(name):
7 | with open(name + '.pkl', 'rb') as f:
8 | return pickle.load(f)
9 |
10 |
11 | def save_obj(obj, name):
12 | with open(name + '.pkl', 'wb') as f:
13 | pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
14 |
15 |
16 | cfd = os.path.join('intermediate', 'class_file_dirs')
17 | wfd = os.path.join('intermediate', 'write_file_dirs')
18 |
19 | class_file_dirs = load_obj(cfd)
20 | write_file_dirs = load_obj(wfd)
21 |
22 | class_file_hashes = []
23 | write_file_hashes = []
24 |
25 | count = 0
26 | for tup in class_file_dirs:
27 | if count % 100000 == 0:
28 | print('hashed %d class images' % count)
29 |
30 | (cclass, cfile) = tup
31 | file_path = os.path.join(cfile)
32 |
33 | chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest()
34 |
35 | class_file_hashes.append((cclass, cfile, chash))
36 |
37 | count += 1
38 |
39 | cfhd = os.path.join('intermediate', 'class_file_hashes')
40 | save_obj(class_file_hashes, cfhd)
41 |
42 | count = 0
43 | for tup in write_file_dirs:
44 | if (count % 100000 == 0):
45 | print('hashed %d write images' % count)
46 |
47 | (cclass, cfile) = tup
48 | file_path = os.path.join(cfile)
49 |
50 | chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest()
51 |
52 | write_file_hashes.append((cclass, cfile, chash))
53 |
54 | count += 1
55 |
56 | wfhd = os.path.join('intermediate', 'write_file_hashes')
57 | save_obj(write_file_hashes, wfhd)
--------------------------------------------------------------------------------
/loaders/sent140.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchtext import data
3 |
4 |
5 | def get_iterator_sent140(path, all_data_path, device, max_vocab_size=25_000, batch_size=64):
6 | """
7 | Build text iterator to be use with LSTM model,
8 | :param path: path to .json file used to build the iterator, see TorchText for .json file format.
9 | :param all_data_path: path to .json file containing all train data
10 | :param device:
11 | :param max_vocab_size:
12 | :param batch_size:
13 | :return: iterator over sent140 samples, each sample has two attributes "text" and "label"
14 | """
15 | TEXT = data.Field(tokenize='spacy', include_lengths=True)
16 | LABEL = data.LabelField(dtype=torch.float)
17 |
18 | fields = {'text': ('text', TEXT), 'label': ('label', LABEL)}
19 |
20 | text_data = data.TabularDataset(path=path, format='json', fields=fields)
21 |
22 | text_data.sort_key = lambda x: len(x.text)
23 |
24 | # Fix the seed
25 | torch.manual_seed(0)
26 | torch.backends.cudnn.deterministic = True
27 | torch.backends.cudnn.benchmark = False
28 |
29 | all_text_data = data.TabularDataset(path=all_data_path, format='json', fields=fields)
30 |
31 | # vocab is built using all data, in order to have the same mapping from words to indexes across workers
32 | TEXT.build_vocab(all_text_data,
33 | max_size=max_vocab_size,
34 | vectors="glove.6B.100d",
35 | unk_init=torch.Tensor.normal_)
36 | LABEL.build_vocab(text_data)
37 |
38 | iterator = data.BucketIterator(
39 | text_data,
40 | batch_size=batch_size,
41 | sort_within_batch=True,
42 | device=device)
43 |
44 | return iterator
45 |
46 |
--------------------------------------------------------------------------------
/data/femnist/preprocess.sh:
--------------------------------------------------------------------------------
1 | if [ ! -d "raw_data" ]; then
2 | echo "------------------------------"
3 | echo "downloading data"
4 | mkdir raw_data
5 | cd raw_data
6 | wget https://s3.amazonaws.com/nist-srd/SD19/by_class.zip
7 | wget https://s3.amazonaws.com/nist-srd/SD19/by_write.zip
8 | unzip by_class.zip
9 | rm by_class.zip
10 | unzip by_write.zip
11 | rm by_write.zip
12 | cd ../
13 | echo "finished downloading data"
14 | fi
15 | if [ ! -d "intermediate" ]; then # stores .pkl files during preprocessing
16 | mkdir intermediate
17 | fi
18 |
19 | if [ ! -f ntermediate/class_file_dirs.pkl ]; then
20 | echo "------------------------------"
21 | echo "extracting file directories of images"
22 | python3 get_file_dirs.py
23 | echo "finished extracting file directories of images"
24 | fi
25 |
26 | if [ ! -f intermediate/class_file_hashes.pkl ]; then
27 | echo "------------------------------"
28 | echo "calculating image hashes"
29 | python3 get_hashes.py
30 | echo "finished calculating image hashes"
31 | fi
32 |
33 | if [ ! -f intermediate/write_with_class.pkl ]; then
34 | echo "------------------------------"
35 | echo "assigning class labels to write images"
36 | python3 match_hashes.py
37 | echo "finished assigning class labels to write images"
38 | fi
39 |
40 | if [ ! -f intermediate/images_by_writer.pkl ]; then
41 | echo "------------------------------"
42 | echo "grouping images by writer"
43 | python3 group_by_writer.py
44 | echo "finished grouping images by writer"
45 | fi
46 | if [ ! -f test/test.json ]; then
47 | echo "------------------------------"
48 | echo "spliting data"
49 | mkdir train
50 | mkdir test
51 |
52 | ./split_data.sh "$@"
53 |
54 | echo "finished splitting data"
55 | fi
--------------------------------------------------------------------------------
/data/synthetic/generate_data.sh:
--------------------------------------------------------------------------------
1 | # Parse arguments
2 | while [[ $# -gt 0 ]]
3 | do
4 | key="$1"
5 |
6 | case $key in
7 | -nw)
8 | NUM_WORKERS="$2"
9 | shift # past argument
10 | shift # past value
11 | ;;
12 | -nc)
13 | NUM_CLASSES="$2"
14 | shift # past argument
15 | shift # past value
16 | ;;
17 | -dim)
18 | DIMENSION="$2"
19 | shift # past argument
20 | shift # past value
21 | ;;
22 | --tf)
23 | TFRAC="$2"
24 | shift # past argument
25 | shift # past value
26 | ;;
27 | --seed)
28 | SEED="$2"
29 | shift # past argument
30 | ;;
31 | --default)
32 | DEFAULT=YES
33 | shift # past argument
34 | ;;
35 | *) # unknown option
36 | POSITIONAL+=("$1") # save it in an array for later
37 | shift # past argument
38 | ;;
39 | esac
40 | done
41 |
42 | NUM_WORKERS_TAG=""
43 | if [ ! -z $NUM_WORKERS ]; then
44 | NUM_WORKERS_TAG="--num_workers $NUM_WORKERS"
45 | fi
46 |
47 | NUM_CLASSE_TAG=""
48 | if [ ! -z $NUM_CLASSES ]; then
49 | NUM_CLASSES_TAG="--num_classes $NUM_CLASSES"
50 | fi
51 |
52 | DIMENSION_TAG=""
53 | if [ ! -z $DIMENSION ]; then
54 | DIMENSION_TAG="--dimension $DIMENSION"
55 | fi
56 |
57 | TFRACTAG=""
58 | if [ ! -z $TFRAC ]; then
59 | TFRAC_TAG="--tr_frac $TFRAC"
60 | fi
61 |
62 | SEED_TAG=""
63 | if [ ! -z $SEED ]; then
64 | SEED_TAG="--seed $SEED"
65 | fi
66 |
67 |
68 | if [ ! -d "all_data" ]; then
69 | mkdir all_data
70 | fi
71 |
72 |
73 | if [ ! -f all_data/all_data.json ]; then
74 | echo "------------------------------"
75 | echo "generating data"
76 |
77 | python3 generate_data.py $NUM_WORKERS_TAG $NUM_CLASSES_TAG $DIMENSION_TAG $SEED_TAG
78 |
79 | echo "finished generating data"
80 | fi
81 |
82 | if [ ! -f test/test.json ]; then
83 | echo "------------------------------"
84 | echo "spliting data"
85 | mkdir train
86 | mkdir test
87 |
88 | python3 split_data.py $TFRACTAG $SEED_TAG
89 |
90 | echo "finished splitting data"
91 | fi
--------------------------------------------------------------------------------
/loaders/shakespeare.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.utils.data import Dataset, DataLoader
3 | import string
4 |
5 |
6 | class CharacterDataset(Dataset):
7 | def __init__(self, file_path, chunk_len, device):
8 | """
9 | Dataset for next character prediction, each sample represents an input sequence of characters
10 | and a target sequence of characters representing to next sequence of the input
11 | :param file_path: path to .txt file containing the training corpus
12 | :param chunk_len: (int) the length of the input and target sequences
13 | :param device:
14 | """
15 | self.all_characters = string.printable
16 | self.n_characters = len(self.all_characters)
17 | self.chunk_len = chunk_len
18 | self.device = device
19 | f = open(file_path, 'r')
20 | self.text = f.read()
21 |
22 | def __len__(self):
23 | return len(self.text) // (self.chunk_len + 1)
24 |
25 | def __getitem__(self, idx):
26 | input_ = torch.zeros(self.chunk_len).long()
27 | for c in range(self.chunk_len):
28 | input_[c] = self.all_characters.index(self.text[idx + c])
29 |
30 | target = torch.zeros(self.chunk_len).long()
31 | for c in range(self.chunk_len):
32 | target[c] = self.all_characters.index(self.text[idx + c + 1])
33 |
34 | return input_.to(self.device), target.to(self.device)
35 |
36 |
37 | def get_iterator_shakespeare(file_path, device, batch_size, chunk_len=200):
38 | """
39 | get next character prediction DataLoader, yields `batch_size` batches of `CharacterDataset` samples
40 | :param file_path: path to .txt file containing the training corpus
41 | :param chunk_len: (int) the length of the input and target sequences
42 | :param device:
43 | :param batch_size
44 | :return: iterator over shakespeare dataset samples
45 | """
46 | dataset = CharacterDataset(file_path, chunk_len, device)
47 | iterator = DataLoader(dataset, shuffle=True, batch_size=batch_size)
48 |
49 | return iterator
50 |
--------------------------------------------------------------------------------
/reproduce_results.py:
--------------------------------------------------------------------------------
1 | from utils.utils import args_to_string, loggs_to_json
2 | from utils.args import parse_args
3 |
4 | import os
5 | import json
6 |
7 |
8 | trsh_dict = {"gaia": 0.65,
9 | "amazon_us": 0.55,
10 | "geantdistance": 0.55,
11 | "exodus": 0.5,
12 | "ebone": 0.5}
13 |
14 | lr_dict = {"gaia": "1e-3",
15 | "amazon_us": "1e-3",
16 | "geantdistance": "1e-3",
17 | "exodus": "1e-1",
18 | "ebone": "1e-1"}
19 |
20 | for network_name in ["gaia", "amazon_us", "geantdistance", "exodus", "ebone"]:
21 | print("{}:".format(network_name))
22 | args = parse_args(["inaturalist",
23 | "--network", network_name,
24 | "--bz", "16",
25 | "--lr", lr_dict[network_name],
26 | "--decay", "sqrt",
27 | "--local_steps", "1"])
28 |
29 | args_string = args_to_string(args)
30 |
31 | loggs_dir = os.path.join("loggs", args_to_string(args))
32 | loggs_to_json(loggs_dir)
33 |
34 | loggs_dir_path = os.path.join("loggs", args_to_string(args))
35 | path_to_json = os.path.join("results", "json", "{}.json".format(os.path.split(loggs_dir_path)[1]))
36 | with open(path_to_json, "r") as f:
37 | data = json.load(f)
38 |
39 | for architecture in ["centralized", "ring", "matcha"]:
40 | values = data['Train/Acc'][architecture]
41 | rounds = data["Round"][architecture]
42 |
43 | ii = -1
44 | for ii, value in enumerate(values):
45 | if value > trsh_dict[network_name]:
46 | break
47 |
48 | try:
49 | print("Number of steps to achieve {}% is {} on {} using {}".format(int(trsh_dict[network_name] * 100),
50 | rounds[ii], network_name, architecture))
51 | except IndexError:
52 | print("Number of steps to achieve {}% is {} on {} using {}".format(int(trsh_dict[network_name] * 100),
53 | rounds[-1], network_name, architecture))
54 |
55 | print("#" * 10)
56 |
--------------------------------------------------------------------------------
/loaders/femnist.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 |
4 | import torch
5 | from torch.utils.data import Dataset, DataLoader
6 | from torchvision.transforms import Compose, ToTensor, Normalize, Resize
7 | from PIL import Image
8 |
9 |
10 | class FEMNIST(Dataset):
11 | def __init__(self, pickle_file, root_path, device, transforms=None):
12 | """
13 | FEMNIST Dataset generated from a .pkl containing a list of tuples
14 | each of them representing a path to an image and it class
15 | :param pickle_file: path to .pkl file
16 | :param root_path: path to the directory containing images
17 | :param device:
18 | :param transforms: list of transformation to apply to images
19 | """
20 | self.root_path = root_path
21 | self.device = device
22 | with open(pickle_file, 'rb') as f:
23 | self.data = pickle.load(f)
24 |
25 | self.transforms = transforms
26 |
27 | def __getitem__(self, idx):
28 | img_path, label = self.data[idx]
29 |
30 | img = Image.open(os.path.join(self.root_path, img_path))
31 | label = torch.tensor(label).to(self.device)
32 |
33 | if self.transforms:
34 | img = self.transforms(img).to(self.device)
35 |
36 | return img, label
37 |
38 | def __len__(self):
39 | return len(self.data)
40 |
41 |
42 | def get_iterator_femnist(file_path, device, batch_size=1):
43 | """
44 | returns an iterator over FEMNIST dataset batches
45 | :param file_path: path to .pkl file containing a list of tuples
46 | each of them representing a path to an image and it class
47 | :param device:
48 | :param batch_size:
49 | :return: torch.utils.DataLoader object constructed from FEMNIST dataset object
50 | """
51 | root_path = os.path.join("data", "femnist")
52 |
53 | transforms = Compose([Resize(28),
54 | ToTensor(),
55 | Normalize((0.1307,), (0.3081,))
56 | ])
57 |
58 | dataset = FEMNIST(file_path, device=device, root_path=root_path, transforms=transforms)
59 | iterator = DataLoader(dataset, shuffle=True, batch_size=batch_size)
60 |
61 | return iterator
62 |
--------------------------------------------------------------------------------
/loaders/inaturalist.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 |
4 | import torch
5 | from torch.utils.data import Dataset, DataLoader
6 | from torchvision.transforms import Compose, ToTensor, Normalize, CenterCrop
7 | from PIL import Image
8 |
9 |
10 | class INaturalist(Dataset):
11 | def __init__(self, pickle_file, root_path, device, transforms=None):
12 | """
13 | iNaturalist Dataset generated from a .pkl containing a list of tuples
14 | each of them representing a path to an image and it class
15 | :param pickle_file: path to .pkl file
16 | :param root_path: path to the directory containing images
17 | :param device:
18 | :param transforms: list of transformation to apply to images
19 | """
20 | self.root_path = root_path
21 | self.device = device
22 | with open(pickle_file, 'rb') as f:
23 | self.data = pickle.load(f)
24 |
25 | self.transforms = transforms
26 |
27 | def __getitem__(self, idx):
28 | img_path, label = self.data[idx]
29 |
30 | img = Image.open(os.path.join(self.root_path, img_path)).convert("RGB")
31 | label = torch.tensor(label).to(self.device)
32 |
33 | if self.transforms:
34 | img = self.transforms(img).to(self.device)
35 |
36 | return img, label
37 |
38 | def __len__(self):
39 | return len(self.data)
40 |
41 |
42 | def get_iterator_inaturalist(file_path, device, batch_size=1):
43 | """
44 | returns an iterator over iNaturalist dataset batches
45 | :param file_path: path to .pkl file containing a list of tuples
46 | each of them representing a path to an image and it class
47 | :param device:
48 | :param batch_size:
49 | :return: torch.utils.DataLoader object constructed from INaturalist dataset object
50 | """
51 | root_path = os.path.join("data", "inaturalist")
52 |
53 | transforms = Compose([CenterCrop((224, 224)),
54 | ToTensor(),
55 | Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
56 | ])
57 |
58 | dataset = INaturalist(file_path, device=device, root_path=root_path, transforms=transforms)
59 | iterator = DataLoader(dataset, shuffle=True, batch_size=batch_size)
60 |
61 | return iterator
62 |
--------------------------------------------------------------------------------
/data/synthetic/split_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | import json
4 | import random
5 | import time
6 | import numpy as np
7 | from sklearn.model_selection import train_test_split
8 |
9 | parser = argparse.ArgumentParser()
10 |
11 |
12 | parser.add_argument('--tr_frac',
13 | help='fraction in training set; default: 0.8;',
14 | type=float,
15 | default=0.8)
16 | parser.add_argument('--seed',
17 | help='args.seed for random partitioning of test/train data',
18 | type=int,
19 | default=None)
20 |
21 | parser.set_defaults(user=False)
22 |
23 | args = parser.parse_args()
24 |
25 |
26 | if __name__ == "__main__":
27 | rng_seed = (args.seed if (args.seed is not None and args.seed >= 0) else int(time.time()))
28 | rng = random.Random(rng_seed)
29 |
30 | data_file = os.path.join('all_data', 'all_data.json')
31 |
32 | with open(data_file, 'r') as inf:
33 | data = json.load(inf)
34 |
35 | X_list = {"train": [], "test": []}
36 | y_list = {"train": [], "test": []}
37 |
38 | num_classes = data['num_classes']
39 |
40 | for worker in data['users']:
41 | train_file = os.path.join("train", "{}.json".format(worker))
42 |
43 | worker_data = data['user_data'][worker]
44 | X = np.array(worker_data['x'])
45 | y = np.array(worker_data['y'])
46 |
47 | X_train, X_test, y_train, y_test = train_test_split(
48 | X, y, train_size=args.tr_frac, random_state=args.seed)
49 |
50 | X_list["train"].append(X_train)
51 | y_list["train"].append(y_train)
52 | X_list["test"].append(X_test)
53 | y_list["test"].append(y_test)
54 |
55 | json_data_train = {"x": X_train.tolist(), "y": y_train.tolist(), "num_classes": num_classes}
56 |
57 | with open(train_file, 'w') as outfile:
58 | json.dump(json_data_train, outfile)
59 |
60 | for key in ["train", "test"]:
61 | X = np.vstack(X_list[key])
62 | y = np.concatenate(y_list[key])
63 |
64 | file = os.path.join(key, "{}.json".format(key))
65 | json_data = {"x": X.tolist(), "y": y.tolist(), "num_classes": num_classes}
66 | with open(file, 'w') as outfile:
67 | json.dump(json_data, outfile)
68 |
69 |
--------------------------------------------------------------------------------
/data/inaturalist/preprocess.sh:
--------------------------------------------------------------------------------
1 | while [[ $# -gt 0 ]]
2 | do
3 | key="$1"
4 |
5 | case $key in
6 | --network)
7 | NETWORK_NAME="$2"
8 | shift # past argument
9 | shift # past value
10 | ;;
11 | --sf)
12 | SFRAC="$2"
13 | shift # past argument
14 | shift # past value
15 | ;;
16 | --tf)
17 | TFRAC="$2"
18 | shift # past argument
19 | shift # past value
20 | ;;
21 | --seed)
22 | SEED="$2"
23 | shift # past argument
24 | ;;
25 | --default)
26 | DEFAULT=YES
27 | shift # past argument
28 | ;;
29 | *) # unknown option
30 | POSITIONAL+=("$1") # save it in an array for later
31 | shift # past argument
32 | ;;
33 | esac
34 | done
35 |
36 | NETWORK_NAME_TAG=""
37 | if [ ! -z $NETWORK_NAME ]; then
38 | NETWORK_NAME_TAG="--network $NETWORK_NAME"
39 | fi
40 |
41 | SFRAC_TAG=""
42 | if [ ! -z $SFRAC ]; then
43 | SFRAC_TAG="--s_frac $SFRAC"
44 | fi
45 |
46 | TFRAC_TAG=""
47 | if [ ! -z $TFRAC ]; then
48 | TFRAC_TAG="--tr_frac $TFRAC"
49 | fi
50 |
51 | SEED_TAG=""
52 | if [ ! -z $SEED ]; then
53 | SEED_TAG="--seed $SEED"
54 | fi
55 |
56 | if [ ! -f raw_data/train2018.json ]; then
57 | echo "------------------------------"
58 | echo "downloading annotations and locations"
59 |
60 | cd raw_data
61 | wget http://www.vision.caltech.edu/~gvanhorn/datasets/inaturalist/fgvc5_competition/val2018.json.tar.gz
62 | wget http://www.vision.caltech.edu/~gvanhorn/datasets/inaturalist/fgvc5_competition/inat2018_locations.zip
63 | wget http://www.vision.caltech.edu/~gvanhorn/datasets/inaturalist/fgvc5_competition/train2018.json.tar.gz
64 | unzip inat2018_locations.zip -d .
65 | tar -xf val2018.json.tar.gz -C .
66 | tar -xf train2018.json.tar.gz -C .
67 |
68 | rm inat2018_locations.zip
69 | rm val2018.json.tar.gz
70 | rm train2018.json.tar.gz
71 | mv inat2018_locations/* .
72 | rm -r inat2018_locations
73 | echo "finished downloading annotations and locations"
74 | cd ../
75 | fi
76 |
77 | if [ ! -f test/test.json ]; then
78 | echo "------------------------------"
79 | echo "spliting data"
80 | mkdir train
81 | mkdir test
82 |
83 | python3 split_data.py $NETWORK_NAME_TAG $NUM_WORKERS_TAG $SFRAC_TAG $TFRAC_TAG $SEED_TAG
84 |
85 | echo "finished splitting data"
86 | fi
--------------------------------------------------------------------------------
/graph_utils/show_networks.py:
--------------------------------------------------------------------------------
1 | """
2 | Generate .html file with world map and positions of workers and links used in the overlay
3 | """
4 | import argparse
5 | import os
6 | import time
7 | import mplleaflet
8 | import matplotlib.pyplot as plt
9 | import networkx as nx
10 | from geopy.geocoders import Nominatim
11 |
12 |
13 | geolocator = Nominatim(user_agent="delay", timeout=20)
14 |
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument(
17 | 'underlay',
18 | help='name of the underlay network; should be present in "/data"',
19 | type=str)
20 | parser.add_argument(
21 | 'architecture',
22 | help='name of the architecture; should be present in "results/$UNDERLAY"',
23 | type=str)
24 |
25 | parser.set_defaults(user=False)
26 |
27 | args = parser.parse_args()
28 |
29 | if __name__ == "__main__":
30 | underlay_path = os.path.join("data", "{}.gml".format(args.underlay))
31 | overlay_path = os.path.join("results", args.underlay, "{}.gml".format(args.architecture))
32 |
33 | underlay = nx.read_gml(underlay_path)
34 |
35 | pos_dict = {}
36 | for node in underlay.nodes():
37 | try:
38 | pos_dict[node] = [underlay.nodes(data=True)[node]["Longitude"],
39 | underlay.nodes(data=True)[node]["Latitude"]]
40 |
41 | except KeyError:
42 | time.sleep(1.2) # To avoid Service time out Error
43 |
44 | geo = geolocator.geocode(node, timeout=20)
45 | pos_dict[node] = [geo.longitude, geo.latitude]
46 |
47 | overlay = nx.read_gml(overlay_path).to_undirected()
48 |
49 | mapping = {}
50 | for ii, node in enumerate(underlay.nodes()):
51 | mapping[str(ii)] = node
52 |
53 | overlay = nx.relabel_nodes(overlay, mapping).to_undirected()
54 |
55 | fig, ax = plt.subplots()
56 |
57 | nx.draw_networkx_nodes(overlay, pos=pos_dict, node_size=10, node_color='red', edge_color='k', alpha=.5,
58 | with_labels=True)
59 | nx.draw_networkx_edges(overlay, pos=pos_dict, edge_color='blue', alpha=1, width=5.0)
60 | nx.draw_networkx_labels(overlay, pos=pos_dict, label_pos=10.3)
61 |
62 | mplleaflet.display(fig=ax.figure)
63 | mplleaflet.save_html(fig=ax.figure,
64 | fileobj=os.path.join("results", args.underlay, "{}.html".format(args.architecture)))
65 |
--------------------------------------------------------------------------------
/graph_utils/utils/mbst.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import numpy as np
3 | from networkx.algorithms.tournament import hamiltonian_path
4 |
5 |
6 | def cube_algorithm(G_complete):
7 | """
8 | Use cube algorithm to build an approximation for the 2-MBST problem on G:
9 | 1. Add edges to G to build complete graph G_complete
10 | 2. Build an MST T of G_complete
11 | 3. Build the the cube of T
12 | 4. find a Hamiltonian path in the cube of T
13 | :param G : (nx.Graph())
14 | """
15 | T = nx.minimum_spanning_tree(G_complete, weight="weight")
16 |
17 | T_cube = nx.Graph()
18 | T_cube.add_nodes_from(T.nodes(data=True))
19 |
20 | shortest_paths = nx.shortest_path_length(T)
21 | for source, lengths_dict in shortest_paths:
22 | for target in lengths_dict:
23 | if lengths_dict[target] <= 3:
24 | T_cube.add_edge(source, target,
25 | weight=G_complete.get_edge_data(source, target)["weight"])
26 |
27 | ham_path = hamiltonian_path(T_cube.to_directed())
28 |
29 | result = nx.Graph()
30 | result.add_nodes_from(G_complete.nodes(data=True))
31 |
32 | for idx in range(len(ham_path) - 1):
33 | result.add_edge(ham_path[idx], ham_path[idx + 1],
34 | weight=G_complete.get_edge_data(ham_path[idx], ham_path[idx + 1])['weight'])
35 |
36 | return result
37 |
38 |
39 | def delta_prim(G_complete, delta):
40 | """
41 | implementation of delta prim algorithm from https://ieeexplore.ieee.org/document/850653
42 | :param G: (nx.Graph())
43 | :param delta: (int)
44 | :return: a tree T with degree at most delta
45 | """
46 | N = G_complete.number_of_nodes()
47 | T = nx.Graph()
48 |
49 | T.add_node(list(G_complete.nodes)[0])
50 |
51 | while len(T.edges) < N - 1:
52 | smallest_weight = np.inf
53 | edge_to_add = None
54 | for u in T.nodes:
55 | for v in G_complete.nodes:
56 | if (v not in T.nodes) and (T.degree[u] < delta):
57 | weight = G_complete.get_edge_data(u, v)["weight"]
58 | if weight < smallest_weight:
59 | smallest_weight = weight
60 | edge_to_add = (u, v)
61 |
62 | T.add_edge(*edge_to_add, weight=smallest_weight)
63 |
64 | T.add_nodes_from(G_complete.nodes(data=True))
65 |
66 | return T
67 |
--------------------------------------------------------------------------------
/make_table3.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 |
4 | from utils.args import parse_args
5 | from utils.utils import args_to_string, loggs_to_json
6 |
7 | trsh_dict = {"gaia": 0.65,
8 | "amazon_us": 0.55,
9 | "geantdistance": 0.55,
10 | "exodus": 0.5,
11 | "ebone": 0.5}
12 |
13 | lr_dict = {"gaia": "1e-3",
14 | "amazon_us": "1e-3",
15 | "geantdistance": "1e-3",
16 | "exodus": "1e-1",
17 | "ebone": "1e-1"}
18 |
19 | if __name__ == "__main__":
20 | for network_name in ["gaia", "amazon_us", "geantdistance", "exodus", "ebone"]:
21 | print("{}:".format(network_name))
22 | args = parse_args(["inaturalist",
23 | "--network", network_name,
24 | "--bz", "16",
25 | "--lr", lr_dict[network_name],
26 | "--decay", "sqrt",
27 | "--local_steps", "1"])
28 |
29 | args_string = args_to_string(args)
30 |
31 | loggs_dir = os.path.join("loggs", args_to_string(args))
32 | loggs_to_json(loggs_dir)
33 |
34 | loggs_dir_path = os.path.join("loggs", args_to_string(args))
35 | path_to_json = os.path.join("results", "json", "{}.json".format(os.path.split(loggs_dir_path)[1]))
36 | with open(path_to_json, "r") as f:
37 | data = json.load(f)
38 |
39 | for architecture in ["centralized", "ring", "matcha"]:
40 | values = data['Train/Acc'][architecture]
41 | rounds = data["Round"][architecture]
42 |
43 | for ii, value in enumerate(values):
44 | if value > trsh_dict[network_name]:
45 | break
46 |
47 | try:
48 | print("Number of steps to achieve {}% is {} on {} using {}".format(int(trsh_dict[network_name] * 100),
49 | rounds[ii], network_name,
50 | architecture))
51 | except IndexError:
52 | print("Number of steps to achieve {}% is {} on {} using {}".format(int(trsh_dict[network_name] * 100),
53 | rounds[-1], network_name,
54 | architecture))
55 |
56 | print("#" * 10)
--------------------------------------------------------------------------------
/communication_module/worker.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import torch
4 | import torch.distributed as dist
5 |
6 | from utils.utils import get_network, get_iterator, get_model
7 |
8 |
9 | EXTENSIONS = {"synthetic": ".json", "sent140": ".json", "femnist": ".pkl", "shakespeare": ".txt"}
10 |
11 |
12 | class Worker(object):
13 | def __init__(self, args, rank):
14 | self.rank = rank
15 | self.local_steps = args.local_steps
16 | self.device = args.device
17 | self.num_gpu = torch.cuda.device_count()
18 | self.batch_size = args.bz
19 | self.network = get_network(args.network_name, args.architecture)
20 | self.world_size = self.network.number_of_nodes() + 1 # we add node representing the network manager
21 | self.fit_by_epoch = args.fit_by_epoch
22 | self.initial_lr = args.lr
23 | self.optimizer_name = args.optimizer
24 | self.lr_scheduler_name = args.decay
25 |
26 | if self.device == "cuda":
27 | if torch.cuda.is_available():
28 | print(f"{rank} get gpu {self.rank % self.num_gpu}")
29 | self.device = "cuda:"+str(self.rank % self.num_gpu)
30 | else:
31 | print("No GPU is available on the system")
32 | raise TypeError
33 | elif self.device != "cpu":
34 | print("Please choose device be either cuda or cpu")
35 | raise TypeError
36 |
37 | self.data_dir = os.path.join("data", args.experiment, "train")
38 | self.data_path = os.path.join(self.data_dir, str(rank) + EXTENSIONS[args.experiment])
39 |
40 | self.iterator = get_iterator(args.experiment, self.data_path, self.device, self.batch_size)
41 |
42 | self.model = get_model(args.experiment, self.device, self.iterator,
43 | optimizer_name=self.optimizer_name, lr_scheduler=self.lr_scheduler_name,
44 | initial_lr=self.initial_lr)
45 |
46 | def communicate(self):
47 |
48 | if self.fit_by_epoch:
49 | self.model.fit_iterator(train_iterator=self.iterator, n_epochs=self.local_steps)
50 | else:
51 | self.model.fit_batches(iterator=self.iterator, n_steps=self.local_steps)
52 |
53 | for ii, param in enumerate(self.model.net.parameters()):
54 | dist.gather(tensor=param.data, dst=self.world_size - 1)
55 |
56 | for ii, param in enumerate(self.model.net.parameters()):
57 | dist.scatter(tensor=param.data, src=self.world_size - 1)
58 |
--------------------------------------------------------------------------------
/data/femnist/get_file_dirs.py:
--------------------------------------------------------------------------------
1 | """
2 | Creates .pkl files for:
3 | 1. list of directories of every image in 'by_class'
4 | 2. list of directories of every image in 'by_write'
5 | the hierarchal structure of the data is as follows:
6 | - by_class -> classes -> folders containing images -> images
7 | - by_write -> folders containing writers -> writer -> types of images -> images
8 | the directories written into the files are of the form 'raw_data/...'
9 | """
10 | import os
11 | import pickle
12 |
13 |
14 | def save_obj(obj, name):
15 | with open(name + '.pkl', 'wb') as f:
16 | pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
17 |
18 |
19 | class_files = [] # (class, file directory)
20 | write_files = [] # (writer, file directory)
21 |
22 | class_dir = os.path.join('raw_data', 'by_class')
23 | classes = os.listdir(class_dir)
24 | classes = [c for c in classes if len(c) == 2]
25 |
26 | for cl in classes:
27 | cldir = os.path.join(class_dir, cl)
28 | rel_cldir = os.path.join(class_dir, cl)
29 | subcls = os.listdir(cldir)
30 |
31 | subcls = [s for s in subcls if (('hsf' in s) and ('mit' not in s))]
32 |
33 | for subcl in subcls:
34 | subcldir = os.path.join(cldir, subcl)
35 | rel_subcldir = os.path.join(rel_cldir, subcl)
36 | images = os.listdir(subcldir)
37 | image_dirs = [os.path.join(rel_subcldir, i) for i in images]
38 |
39 | for image_dir in image_dirs:
40 | class_files.append((cl, image_dir))
41 |
42 |
43 | write_dir = os.path.join('raw_data', 'by_write')
44 | write_parts = os.listdir(write_dir)
45 |
46 | for write_part in write_parts:
47 | writers_dir = os.path.join(write_dir, write_part)
48 | rel_writers_dir = os.path.join(write_dir, write_part)
49 | writers = os.listdir(writers_dir)
50 |
51 | for writer in writers:
52 | writer_dir = os.path.join(writers_dir, writer)
53 | rel_writer_dir = os.path.join(rel_writers_dir, writer)
54 | wtypes = os.listdir(writer_dir)
55 |
56 | for wtype in wtypes:
57 | type_dir = os.path.join(writer_dir, wtype)
58 | rel_type_dir = os.path.join(rel_writer_dir, wtype)
59 | images = os.listdir(type_dir)
60 | image_dirs = [os.path.join(rel_type_dir, i) for i in images]
61 |
62 | for image_dir in image_dirs:
63 | write_files.append((writer, image_dir))
64 |
65 | save_obj(
66 | class_files,
67 | os.path.join('intermediate', 'class_file_dirs'))
68 | save_obj(
69 | write_files,
70 | os.path.join('intermediate', 'write_file_dirs'))
--------------------------------------------------------------------------------
/models/inaturalist/resnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from utils.optim import get_optimizer, get_lr_scheduler
4 | from torchvision.models import resnet18
5 | from ..model import Model
6 |
7 | NUMBER_CLASSES = 80
8 |
9 |
10 | class INaturalistCNN(Model):
11 | def __init__(self, criterion, metric, device,
12 | optimizer_name="adam", lr_scheduler="sqrt", initial_lr=1e-3, epoch_size=1, coeff=1):
13 | super(Model, self).__init__()
14 |
15 | self.net = resnet18(pretrained=True)
16 | self.net.fc = nn.Linear(self.net.fc.in_features, NUMBER_CLASSES)
17 | self.net = self.net.to(device)
18 | self.criterion = criterion
19 | self.metric = metric
20 | self.device = device
21 | self.coeff = coeff
22 |
23 | self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr)
24 | self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
25 |
26 | def fit_iterator_one_epoch(self, iterator):
27 | epoch_loss = 0
28 | epoch_acc = 0
29 |
30 | self.net.train()
31 |
32 | for x, y in iterator:
33 | self.optimizer.zero_grad()
34 |
35 | predictions = self.net(x)
36 |
37 | loss = self.coeff * self.criterion(predictions, y)
38 |
39 | acc = self.metric(predictions, y)
40 |
41 | loss.backward()
42 |
43 | self.optimizer.step()
44 | self.lr_scheduler.step()
45 |
46 | epoch_loss += loss.item()
47 | epoch_acc += acc.item()
48 |
49 | return epoch_loss / len(iterator), epoch_acc / len(iterator)
50 |
51 | def fit_batch(self, iterator, update=True):
52 | self.net.train()
53 |
54 | x, y = next(iter(iterator))
55 |
56 | self.optimizer.zero_grad()
57 |
58 | predictions = self.net(x)
59 |
60 | loss = self.criterion(predictions, y)
61 |
62 | acc = self.metric(predictions, y)
63 |
64 | loss.backward()
65 |
66 | if update:
67 | self.optimizer.step()
68 | self.lr_scheduler.step()
69 |
70 | batch_loss = loss.item()
71 | batch_acc = acc.item()
72 |
73 | return batch_loss, batch_acc
74 |
75 | def evaluate_iterator(self, iterator):
76 | epoch_loss = 0
77 | epoch_acc = 0
78 |
79 | self.net.eval()
80 |
81 | with torch.no_grad():
82 | for x, y in iterator:
83 | predictions = self.net(x)
84 |
85 | loss = self.criterion(predictions, y)
86 |
87 | acc = self.metric(predictions, y)
88 |
89 | epoch_loss += loss.item()
90 | epoch_acc += acc.item()
91 |
92 | return epoch_loss / len(iterator), epoch_acc / len(iterator)
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | from torch.multiprocessing import Process
3 | import torch.distributed as dist
4 | import torch
5 |
6 | from utils.args import parse_args
7 | from utils.utils import loggs_to_json, args_to_string
8 | from communication_module.worker import Worker
9 | from communication_module.manager import Peer2PeerManager, CentralizedManager
10 | from communication import CentralizedNetwork, Peer2PeerNetwork, MATCHANetwork, RingNetwork
11 |
12 |
13 | def run(rank, size, arguments):
14 | torch.manual_seed(0)
15 | torch.backends.cudnn.deterministic = True
16 | torch.backends.cudnn.benchmark = False
17 |
18 | if rank == size - 1:
19 | if arguments.architecture == "centralized":
20 | node = CentralizedManager(arguments)
21 | else:
22 | node = Peer2PeerManager(arguments)
23 | else:
24 | node = Worker(arguments, rank)
25 |
26 | for _ in range(arguments.n_rounds):
27 | node.communicate()
28 |
29 |
30 | def init_process(rank, size, arguments, fn, backend='gloo'):
31 | """ Initialize the distributed environment. """
32 | os.environ['MASTER_ADDR'] = '127.0.0.1'
33 | os.environ['MASTER_PORT'] = '29500'
34 | dist.init_process_group(backend, rank=rank, world_size=size)
35 | fn(rank, size, arguments)
36 |
37 |
38 | if __name__ == "__main__":
39 | torch.manual_seed(0)
40 | torch.backends.cudnn.deterministic = True
41 | torch.backends.cudnn.benchmark = False
42 |
43 | args = parse_args()
44 |
45 | if args.parallel:
46 | print("Run experiment in parallel settings using torch.dist..")
47 |
48 | processes = []
49 | world_size = args.num_workers + 1 # We add an extra node that plays the role of network manager
50 | for rank_ in range(world_size):
51 | p = Process(target=init_process, args=(rank_, world_size, args, run))
52 | p.start()
53 | processes.append(p)
54 |
55 | for p in processes:
56 | p.join()
57 |
58 | else:
59 | print("Run experiment in sequential setting..")
60 |
61 | if args.architecture == "centralized":
62 | network = CentralizedNetwork(args)
63 | elif args.architecture == "matcha" or args.architecture == "matcha+" or\
64 | args.architecture == "matcha+mst" or args.architecture == "matcha+ring" or\
65 | args.architecture == "matcha+delta_mbst":
66 | network = MATCHANetwork(args)
67 | elif args.architecture == "dynamic_ring":
68 | network = RingNetwork(args)
69 | else:
70 | network = Peer2PeerNetwork(args)
71 |
72 | for k in range(args.n_rounds):
73 | network.mix()
74 |
75 | network.write_logs()
76 |
77 | loggs_dir = os.path.join("loggs", args_to_string(args))
78 | loggs_to_json(loggs_dir)
79 |
--------------------------------------------------------------------------------
/models/model.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | import torch
3 | import time
4 |
5 |
6 | def epoch_time(start_time, end_time):
7 | elapsed_time = end_time - start_time
8 | elapsed_mins = int(elapsed_time / 60)
9 | elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
10 | return elapsed_mins, elapsed_secs
11 |
12 |
13 | class Model(ABC):
14 | @abstractmethod
15 | def __init__(self):
16 | pass
17 |
18 | @abstractmethod
19 | def fit_iterator_one_epoch(self, iterator):
20 | pass
21 |
22 | @abstractmethod
23 | def fit_batch(self, iterator):
24 | pass
25 |
26 | @abstractmethod
27 | def evaluate_iterator(self, iterator):
28 | pass
29 |
30 | def update_from_model(self, model):
31 | """
32 | update parameters using gradients from another model
33 | :param model: Model() object, gradients should be precomputed;
34 | """
35 | for param_idx, param in enumerate(self.net.parameters()):
36 | param.grad = list(model.net.parameters())[param_idx].grad.data.clone()
37 |
38 | self.optimizer.step()
39 | self.lr_scheduler.step()
40 |
41 | def fit_batches(self, iterator, n_steps):
42 | global_loss = 0
43 | global_acc = 0
44 |
45 | for step in range(n_steps):
46 | batch_loss, batch_acc = self.fit_batch(iterator)
47 | global_loss += batch_loss
48 | global_acc += batch_acc
49 |
50 | return global_loss / n_steps, global_acc / n_steps
51 |
52 | def fit_iterator(self, train_iterator, val_iterator=None, n_epochs=1, path=None, verbose=0):
53 | best_valid_loss = float('inf')
54 |
55 | for epoch in range(n_epochs):
56 |
57 | start_time = time.time()
58 |
59 | train_loss, train_acc = self.fit_iterator_one_epoch(train_iterator)
60 | if val_iterator:
61 | valid_loss, valid_acc = self.evaluate_iterator(val_iterator)
62 |
63 | end_time = time.time()
64 |
65 | epoch_mins, epoch_secs = epoch_time(start_time, end_time)
66 |
67 | if val_iterator:
68 | if valid_loss < best_valid_loss:
69 | best_valid_loss = valid_loss
70 | if path:
71 | torch.save(self.net, path)
72 |
73 | if verbose:
74 | print(f'Epoch: {epoch + 1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
75 | print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc * 100:.2f}%')
76 | if val_iterator:
77 | print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc * 100:.2f}%')
78 |
79 | def get_param_tensor(self):
80 | param_list = []
81 |
82 | for param in self.net.parameters():
83 | param_list.append(param.data.view(-1, ))
84 |
85 | return torch.cat(param_list)
86 |
--------------------------------------------------------------------------------
/graph_utils/utils/evaluate_throughput.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import numpy as np
3 |
4 | np.seterr(all="ignore")
5 |
6 |
7 | def cycle_time_decision(G, lambda_0):
8 | """
9 | Answers the cycle time decision problem question: Is the throughput of G at most lambda ?
10 | :param G: (nx.DiGraph) Strong Weighted Digraph
11 | :param lambda_0: (numerical)
12 | """
13 | A = nx.adjacency_matrix(G).toarray()
14 | new_A = lambda_0 - A
15 |
16 | new_G = nx.from_numpy_matrix(new_A, create_using=nx.DiGraph())
17 |
18 | answer = True
19 | try:
20 | nx.bellman_ford_predecessor_and_distance(new_G, 0)
21 | except nx.NetworkXUnbounded:
22 | answer = False
23 | return answer
24 |
25 |
26 | def evaluate_cycle_time(G, s=0):
27 | """
28 | Evaluate the cycle time of a strong weighted digraph. For now the implementation only supports integer delays
29 | :param G: (nx.DiGraph) strong weighted digraph
30 | :param s: starting point
31 | :return: lambda_G
32 | The cycle time of G
33 | """
34 | n = len(G)
35 | nodes_to_indices = {node: idx for idx, node in enumerate(G.nodes)}
36 |
37 | # Head
38 | D = np.zeros((n + 1, n)) - np.inf
39 | pi = np.zeros((n + 1, n), dtype=np.int64) - 1
40 | D[0, s] = 0
41 |
42 | # Body
43 | for k in range(1, n + 1):
44 | for v in G.nodes:
45 | for u in G.predecessors(v):
46 | if D[k, nodes_to_indices[v]] < D[k - 1, nodes_to_indices[u]] + G.get_edge_data(u, v)['weight']:
47 | D[k, nodes_to_indices[v]] = D[k - 1, nodes_to_indices[u]] \
48 | + G.get_edge_data(u, v)['weight']
49 |
50 | pi[k, nodes_to_indices[v]] = nodes_to_indices[u]
51 |
52 | # Tail
53 | lambda_ = -np.inf
54 | M = np.zeros((n,)) + np.inf
55 | K = np.zeros((n,), dtype=np.int64) - 1
56 | for v in G.nodes:
57 | for k in range(0, n):
58 | if M[nodes_to_indices[v]] > (D[n, nodes_to_indices[v]] - D[k, nodes_to_indices[v]]) / (n - k):
59 | M[nodes_to_indices[v]] = (D[n, nodes_to_indices[v]] - D[k, nodes_to_indices[v]]) / (n - k)
60 | K[nodes_to_indices[v]] = k
61 |
62 | if lambda_ < M[nodes_to_indices[v]]:
63 | lambda_ = M[nodes_to_indices[v]]
64 | v_star = nodes_to_indices[v]
65 |
66 | # Get critical cycle
67 | path = []
68 | actual = v_star
69 | for i in range(n, -1, -1):
70 | path.append(actual)
71 | actual = pi[i, actual]
72 |
73 | path.reverse()
74 |
75 | return lambda_, path, n - K[v_star]
76 |
77 |
78 | def evaluate_throughput(G):
79 | """
80 | Evaluate the throughput of a strong weighted digraph. For now the implementation only supports integer delays
81 | :param G: (nx.DiGraph) strong weighted digraph
82 | :return: The throughput of G
83 | """
84 | lambda_, _, _ = evaluate_cycle_time(G)
85 | return 1 / lambda_
86 |
--------------------------------------------------------------------------------
/models/synthetic/linear.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from utils.optim import get_lr_scheduler, get_optimizer
4 | from ..model import Model
5 |
6 |
7 | class LinearLayer(nn.Module):
8 | def __init__(self, input_dimension, num_classes):
9 | super(LinearLayer, self).__init__()
10 | self.input_dimension = input_dimension
11 | self.num_classes = num_classes
12 | self.fc = nn.Linear(input_dimension, num_classes)
13 |
14 | def forward(self, x):
15 | return self.fc(x)
16 |
17 |
18 | class LinearModel(Model):
19 | def __init__(self, criterion, metric, device, input_dimension, num_classes,
20 | optimizer_name="adam", lr_scheduler="cyclic", initial_lr=1e-3, epoch_size=1):
21 | super(Model, self).__init__()
22 |
23 | self.criterion = criterion
24 | self.metric = metric
25 | self.device = device
26 |
27 | self.net = LinearLayer(input_dimension, num_classes).to(self.device)
28 |
29 | self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr)
30 | self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
31 |
32 | def fit_iterator_one_epoch(self, iterator):
33 | epoch_loss = 0
34 | epoch_acc = 0
35 |
36 | self.net.train()
37 |
38 | for x, y in iterator:
39 | self.optimizer.zero_grad()
40 |
41 | predictions = self.net(x)
42 |
43 | loss = self.criterion(predictions, y.float())
44 |
45 | acc = self.metric(predictions, y)
46 |
47 | loss.backward()
48 |
49 | self.optimizer.step()
50 | self.lr_scheduler.step()
51 |
52 | epoch_loss += loss.item()
53 | epoch_acc += acc.item()
54 |
55 | return epoch_loss / len(iterator), epoch_acc / len(iterator)
56 |
57 | def fit_batch(self, iterator, update=True):
58 | self.net.train()
59 |
60 | x, y = next(iter(iterator))
61 |
62 | self.optimizer.zero_grad()
63 |
64 | predictions = self.net(x)
65 |
66 | loss = self.criterion(predictions, y.float())
67 |
68 | acc = self.metric(predictions, y)
69 |
70 | loss.backward()
71 |
72 | if update:
73 | self.optimizer.step()
74 | self.lr_scheduler.step()
75 |
76 | batch_loss = loss.item()
77 | batch_acc = acc.item()
78 |
79 | return batch_loss, batch_acc
80 |
81 | def evaluate_iterator(self, iterator):
82 | epoch_loss = 0
83 | epoch_acc = 0
84 |
85 | self.net.eval()
86 |
87 | with torch.no_grad():
88 | for x, y in iterator:
89 | predictions = self.net(x)
90 |
91 | loss = self.criterion(predictions, y.float())
92 |
93 | acc = self.metric(predictions, y)
94 |
95 | epoch_loss += loss.item()
96 | epoch_acc += acc.item()
97 |
98 | return epoch_loss / len(iterator), epoch_acc / len(iterator)
99 |
100 |
--------------------------------------------------------------------------------
/models/femnist/cnn.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | from utils.optim import get_optimizer, get_lr_scheduler
4 | import torch
5 | from ..model import Model
6 |
7 |
8 | class CNN(nn.Module):
9 | def __init__(self):
10 | super(CNN, self).__init__()
11 | self.conv1 = nn.Conv2d(3, 32, 3, 1)
12 | self.conv2 = nn.Conv2d(32, 64, 3, 1)
13 | self.dropout1 = nn.Dropout2d(0.25)
14 | self.dropout2 = nn.Dropout2d(0.5)
15 | self.fc1 = nn.Linear(9216, 128)
16 | self.fc2 = nn.Linear(128, 62)
17 |
18 | def forward(self, x):
19 | x = self.conv1(x)
20 | x = F.relu(x)
21 | x = self.conv2(x)
22 | x = F.relu(x)
23 | x = F.max_pool2d(x, 2)
24 | x = self.dropout1(x)
25 | x = torch.flatten(x, 1)
26 | x = self.fc1(x)
27 | x = F.relu(x)
28 | x = self.dropout2(x)
29 | x = self.fc2(x)
30 | return x
31 |
32 |
33 | class FemnistCNN(Model):
34 | def __init__(self, criterion, metric, device,
35 | optimizer_name="adam", lr_scheduler="sqrt", initial_lr=1e-3, epoch_size=1):
36 | super(Model, self).__init__()
37 |
38 | self.net = CNN().to(device)
39 | self.criterion = criterion
40 | self.metric = metric
41 | self.device = device
42 |
43 | self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr)
44 | self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
45 |
46 | def fit_iterator_one_epoch(self, iterator):
47 | epoch_loss = 0
48 | epoch_acc = 0
49 |
50 | self.net.train()
51 |
52 | for x, y in iterator:
53 | self.optimizer.zero_grad()
54 |
55 | predictions = self.net(x)
56 |
57 | loss = self.criterion(predictions, y)
58 |
59 | acc = self.metric(predictions, y)
60 |
61 | loss.backward()
62 |
63 | self.optimizer.step()
64 | self.lr_scheduler.step()
65 |
66 | epoch_loss += loss.item()
67 | epoch_acc += acc.item()
68 |
69 | return epoch_loss / len(iterator), epoch_acc / len(iterator)
70 |
71 | def fit_batch(self, iterator, update=True):
72 | self.net.train()
73 |
74 | x, y = next(iter(iterator))
75 |
76 | self.optimizer.zero_grad()
77 |
78 | predictions = self.net(x)
79 |
80 | loss = self.criterion(predictions, y)
81 |
82 | acc = self.metric(predictions, y)
83 |
84 | loss.backward()
85 |
86 | if update:
87 | self.optimizer.step()
88 | self.lr_scheduler.step()
89 |
90 | batch_loss = loss.item()
91 | batch_acc = acc.item()
92 |
93 | return batch_loss, batch_acc
94 |
95 | def evaluate_iterator(self, iterator):
96 | epoch_loss = 0
97 | epoch_acc = 0
98 |
99 | self.net.eval()
100 |
101 | with torch.no_grad():
102 | for x, y in iterator:
103 | predictions = self.net(x)
104 |
105 | loss = self.criterion(predictions, y)
106 |
107 | acc = self.metric(predictions, y)
108 |
109 | epoch_loss += loss.item()
110 | epoch_acc += acc.item()
111 |
112 | return epoch_loss / len(iterator), epoch_acc / len(iterator)
--------------------------------------------------------------------------------
/utils/args.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from utils.utils import get_network
3 |
4 |
5 | def parse_args(args_list=None):
6 | parser = argparse.ArgumentParser()
7 | parser.add_argument(
8 | 'experiment',
9 | help='name of experiment',
10 | type=str)
11 | parser.add_argument(
12 | "--use_weighted_average",
13 | help="if used the weighted average will be optimized, otherwise the average is optimized,"
14 | " i,e, all the local functions are treated the same.",
15 | action='store_true'
16 | )
17 | parser.add_argument(
18 | '--network_name',
19 | help='name of the network;',
20 | type=str
21 | )
22 | parser.add_argument(
23 | '--architecture',
24 | help='architecture to use, possible: complete, centralized, ring, mst, original and matcha;',
25 | default='original'
26 | )
27 | parser.add_argument(
28 | '--communication_budget',
29 | type=float,
30 | help='used to fix communication budget when architecture is matcha;',
31 | default=0.5
32 | )
33 | parser.add_argument(
34 | "--random_ring_proba",
35 | type=float,
36 | help="the probability of using a random ring at each step; only used if architecture is ring",
37 | default=0.5
38 | )
39 | parser.add_argument(
40 | '--parallel',
41 | help='if chosen the training well be run in parallel,'
42 | 'otherwise the training will be run in a sequential fashion;',
43 | action='store_true'
44 | )
45 | parser.add_argument(
46 | '--fit_by_epoch',
47 | help='if chosen each local step corresponds to one epoch,'
48 | ' otherwise each local step corresponds to one gradient step',
49 | action='store_true'
50 | )
51 | parser.add_argument(
52 | '--n_rounds',
53 | help='number of communication rounds;',
54 | type=int,
55 | default=1
56 | )
57 | parser.add_argument(
58 | '--bz',
59 | help='batch_size;',
60 | type=int,
61 | default=1
62 | )
63 | parser.add_argument(
64 | '--local_steps',
65 | help='number of local steps before communication;',
66 | type=int,
67 | default=1
68 | )
69 | parser.add_argument(
70 | '--log_freq',
71 | help='number of local steps before communication;',
72 | type=int,
73 | default=1
74 | )
75 | parser.add_argument(
76 | '--device',
77 | help='device to use, either cpu or gpu;',
78 | type=str,
79 | default="cpu"
80 | )
81 | parser.add_argument(
82 | '--optimizer',
83 | help='optimizer to be used for the training;',
84 | type=str,
85 | default="adam"
86 | )
87 | parser.add_argument(
88 | "--lr",
89 | type=float,
90 | help='learning rate',
91 | default=1e-3
92 | )
93 | parser.add_argument(
94 | "--decay",
95 | help='learning rate decay scheme to be used;'
96 | ' possible are "cyclic", "sqrt", "linear" and "constant"(no learning rate decay);'
97 | 'default is "cyclic"',
98 | type=str,
99 | default="constant"
100 | )
101 |
102 | if args_list:
103 | args = parser.parse_args(args_list)
104 | else:
105 | args = parser.parse_args()
106 |
107 | network = get_network(args.network_name, args.architecture)
108 | args.num_workers = network.number_of_nodes()
109 |
110 | return args
111 |
--------------------------------------------------------------------------------
/graph_utils/utils/tsp_christofides.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | from random import randint
3 |
4 | import numpy as np
5 | import networkx as nx
6 |
7 | from networkx.algorithms.matching import max_weight_matching
8 | from networkx.algorithms.euler import eulerian_circuit
9 |
10 |
11 | def christofides_tsp(graph, starting_node=0):
12 | """
13 | Christofides TSP algorithm
14 | http://www.dtic.mil/dtic/tr/fulltext/u2/a025602.pdf
15 | Args:
16 | graph: 2d numpy array matrix
17 | starting_node: of the TSP
18 | Returns:
19 | tour given by christofies TSP algorithm
20 | Examples:
21 | >>> import numpy as np
22 | >>> graph = np.array([[ 0, 300, 250, 190, 230],
23 | >>> [300, 0, 230, 330, 150],
24 | >>> [250, 230, 0, 240, 120],
25 | >>> [190, 330, 240, 0, 220],
26 | >>> [230, 150, 120, 220, 0]])
27 | >>> christofides_tsp(graph)
28 | """
29 |
30 | mst = minimal_spanning_tree(graph, 'Prim', starting_node=0)
31 | odd_degree_nodes = list(_get_odd_degree_vertices(mst))
32 | odd_degree_nodes_ix = np.ix_(odd_degree_nodes, odd_degree_nodes)
33 | nx_graph = nx.from_numpy_array(-1 * graph[odd_degree_nodes_ix])
34 | matching = max_weight_matching(nx_graph, maxcardinality=True)
35 | euler_multigraph = nx.MultiGraph(mst)
36 | for edge in matching:
37 | euler_multigraph.add_edge(odd_degree_nodes[edge[0]], odd_degree_nodes[edge[1]],
38 | weight=graph[odd_degree_nodes[edge[0]]][odd_degree_nodes[edge[1]]])
39 | euler_tour = list(eulerian_circuit(euler_multigraph, source=starting_node))
40 | path = list(itertools.chain.from_iterable(euler_tour))
41 | return _remove_repeated_vertices(path, starting_node)[:-1]
42 |
43 |
44 | def _get_odd_degree_vertices(graph):
45 | """
46 | Finds all the odd degree vertices in graph
47 | Args:
48 | graph: 2d np array as adj. matrix
49 | Returns:
50 | Set of vertices that have odd degree
51 | """
52 | odd_degree_vertices = set()
53 | for index, row in enumerate(graph):
54 | if len(np.nonzero(row)[0]) % 2 != 0:
55 | odd_degree_vertices.add(index)
56 | return odd_degree_vertices
57 |
58 |
59 | def _remove_repeated_vertices(path, starting_node):
60 | path = list(dict.fromkeys(path).keys())
61 | path.append(starting_node)
62 | return path
63 |
64 |
65 | def minimal_spanning_tree(graph, mode='Prim', starting_node=None):
66 | """
67 | Args:
68 | graph: weighted adjacency matrix as 2d np.array
69 | mode: method for calculating minimal spanning tree
70 | starting_node: node number to start construction of minimal spanning tree (Prim)
71 | Returns:
72 | minimal spanning tree as 2d array
73 | """
74 |
75 | if mode == 'Prim':
76 | return _minimal_spanning_tree_prim(graph, starting_node)
77 |
78 |
79 | def _minimal_spanning_tree_prim(graph, starting_node):
80 | """
81 | Args:
82 | graph: weighted adj. matrix as 2d np.array
83 | starting_node: node number to start construction of minimal spanning tree
84 | Returns:
85 | minimal spanning tree as 2d array calculted by Prim
86 | """
87 |
88 | node_count = len(graph)
89 | all_nodes = [i for i in range(node_count)]
90 |
91 | if starting_node is None:
92 | starting_node = randint(0, node_count-1)
93 |
94 | unvisited_nodes = all_nodes
95 | visited_nodes = [starting_node]
96 | unvisited_nodes.remove(starting_node)
97 | mst = np.zeros((node_count, node_count))
98 |
99 | while len(visited_nodes) != node_count:
100 | selected_subgraph = graph[np.array(visited_nodes)[:, None], np.array(unvisited_nodes)]
101 | # we mask non-exist edges with -- so it doesn't crash the argmin
102 | min_edge_index = np.unravel_index(np.ma.masked_equal(selected_subgraph, 0, copy=False).argmin(),
103 | selected_subgraph.shape)
104 | edge_from = visited_nodes[min_edge_index[0]]
105 | edge_to = unvisited_nodes[min_edge_index[1]]
106 | mst[edge_from, edge_to] = graph[edge_from, edge_to]
107 | mst[edge_to, edge_from] = graph[edge_from, edge_to]
108 | unvisited_nodes.remove(edge_to)
109 | visited_nodes.append(edge_to)
110 | return mst
--------------------------------------------------------------------------------
/data/synthetic/generate_data.py:
--------------------------------------------------------------------------------
1 | """ From https://github.com/TalwalkarLab/leaf/blob/master/data/synthetic/"""
2 | import argparse
3 | import json
4 | import os
5 | import numpy as np
6 | from scipy.special import softmax
7 |
8 | NUM_DIM = 10
9 | PROB_CLUSTERS = [1.0]
10 |
11 |
12 | class SyntheticDataset:
13 | def __init__(
14 | self,
15 | num_classes=2,
16 | seed=931231,
17 | num_dim=NUM_DIM,
18 | prob_clusters=[0.5, 0.5]):
19 |
20 | np.random.seed(seed)
21 |
22 | self.num_classes = num_classes
23 | self.num_dim = num_dim
24 | self.num_clusters = len(prob_clusters)
25 | self.prob_clusters = prob_clusters
26 |
27 | self.side_info_dim = self.num_clusters
28 |
29 | self.Q = np.random.normal(
30 | loc=0.0, scale=1.0, size=(self.num_dim + 1, self.num_classes, self.side_info_dim))
31 |
32 | self.Sigma = np.zeros((self.num_dim, self.num_dim))
33 | for i in range(self.num_dim):
34 | self.Sigma[i, i] = (i + 1) ** (-1.2)
35 |
36 | self.means = self._generate_clusters()
37 |
38 | def get_task(self, num_samples):
39 | cluster_idx = np.random.choice(
40 | range(self.num_clusters), size=None, replace=True, p=self.prob_clusters)
41 | new_task = self._generate_task(self.means[cluster_idx], cluster_idx, num_samples)
42 | return new_task
43 |
44 | def _generate_clusters(self):
45 | means = []
46 | for i in range(self.num_clusters):
47 | loc = np.random.normal(loc=0, scale=1., size=None)
48 | mu = np.random.normal(loc=loc, scale=1., size=self.side_info_dim)
49 | means.append(mu)
50 | return means
51 |
52 | def _generate_x(self, num_samples):
53 | B = np.random.normal(loc=0.0, scale=1.0, size=None)
54 | loc = np.random.normal(loc=B, scale=1.0, size=self.num_dim)
55 |
56 | samples = np.ones((num_samples, self.num_dim + 1))
57 | samples[:, 1:] = np.random.multivariate_normal(
58 | mean=loc, cov=self.Sigma, size=num_samples)
59 |
60 | return samples
61 |
62 | def _generate_y(self, x, cluster_mean):
63 | model_info = np.random.normal(loc=cluster_mean, scale=0.1, size=cluster_mean.shape)
64 | w = np.matmul(self.Q, model_info)
65 |
66 | num_samples = x.shape[0]
67 | prob = softmax(np.matmul(x, w) + np.random.normal(loc=0., scale=0.1, size=(num_samples, self.num_classes)),
68 | axis=1)
69 |
70 | y = np.argmax(prob, axis=1)
71 | return y, w, model_info
72 |
73 | def _generate_task(self, cluster_mean, cluster_id, num_samples):
74 | x = self._generate_x(num_samples)
75 | y, w, model_info = self._generate_y(x, cluster_mean)
76 |
77 | # now that we have y, we can remove the bias coeff
78 | x = x[:, 1:]
79 |
80 | return {'x': x, 'y': y, 'w': w, 'model_info': model_info, 'cluster': cluster_id}
81 |
82 |
83 | def main():
84 | args = parse_args()
85 | np.random.seed(args.seed)
86 |
87 | num_samples = get_num_samples(args.num_workers)
88 | dataset = SyntheticDataset(
89 | num_classes=args.num_classes, prob_clusters=PROB_CLUSTERS, num_dim=args.dimension, seed=args.seed)
90 | tasks = [dataset.get_task(s) for s in num_samples]
91 | users, num_samples, user_data = to_leaf_format(tasks)
92 | save_json('all_data', 'all_data.json', users, num_samples, user_data, args.num_classes)
93 |
94 |
95 | def get_num_samples(num_tasks, min_num_samples=5, max_num_samples=1000):
96 | num_samples = np.random.lognormal(3, 2, (num_tasks)).astype(int)
97 | num_samples = [min(s + min_num_samples, max_num_samples) for s in num_samples]
98 | return num_samples
99 |
100 |
101 | def to_leaf_format(tasks):
102 | users, num_samples, user_data = [], [], {}
103 |
104 | for i, t in enumerate(tasks):
105 | x, y = t['x'].tolist(), t['y'].tolist()
106 | u_id = str(i)
107 |
108 | users.append(u_id)
109 | num_samples.append(len(y))
110 | user_data[u_id] = {'x': x, 'y': y}
111 |
112 | return users, num_samples, user_data
113 |
114 |
115 | def save_json(json_dir, json_name, users, num_samples, user_data, num_classes):
116 | if not os.path.exists(json_dir):
117 | os.makedirs(json_dir)
118 |
119 | json_file = {
120 | 'users': users,
121 | 'num_samples': num_samples,
122 | 'user_data': user_data,
123 | "num_classes": num_classes
124 | }
125 |
126 | with open(os.path.join(json_dir, json_name), 'w') as outfile:
127 | json.dump(json_file, outfile)
128 |
129 |
130 | def parse_args():
131 | parser = argparse.ArgumentParser()
132 |
133 | parser.add_argument(
134 | '--num_workers',
135 | help='number of workers;',
136 | type=int,
137 | required=True)
138 | parser.add_argument(
139 | '--num_classes',
140 | help='number of classes;',
141 | type=int,
142 | required=True)
143 | parser.add_argument(
144 | '--dimension',
145 | help='data dimension;',
146 | type=int,
147 | required=True)
148 | parser.add_argument(
149 | '--seed',
150 | help='seed for the random processes;',
151 | type=int,
152 | default=931231,
153 | required=False)
154 | return parser.parse_args()
155 |
156 |
157 | if __name__ == '__main__':
158 | main()
159 |
--------------------------------------------------------------------------------
/models/sent140/lstm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from utils.optim import get_optimizer, get_lr_scheduler
4 | from ..model import Model
5 |
6 |
7 | class LSTM(nn.Module):
8 | def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers,
9 | bidirectional, dropout, pad_idx):
10 | super().__init__()
11 |
12 | self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
13 |
14 | self.lstm = nn.LSTM(embedding_dim,
15 | hidden_dim,
16 | num_layers=n_layers,
17 | bidirectional=bidirectional,
18 | dropout=dropout)
19 |
20 | self.fc = nn.Linear(hidden_dim * 2, output_dim)
21 |
22 | self.dropout = nn.Dropout(dropout)
23 |
24 | def forward(self, text, text_lengths):
25 | # text = [sent len, batch size]
26 | self.lstm.flatten_parameters()
27 | embedded = self.dropout(self.embedding(text))
28 |
29 | # pack sequence
30 | packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)
31 |
32 | packed_output, (hidden, cell) = self.lstm(packed_embedded)
33 |
34 | # unpack sequence
35 | _, _ = nn.utils.rnn.pad_packed_sequence(packed_output)
36 |
37 | hidden = self.dropout(torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1))
38 |
39 | return self.fc(hidden)
40 |
41 |
42 | class LSTMSentiment(Model):
43 | def __init__(self, iterator, criterion, metric, device, optimizer_name="adam", lr_scheduler="sqrt", initial_lr=1e-3,
44 | epoch_size=1, embedding_dim=100, hidden_dim=256, output_dim=1, n_layers=2, bidirectional=True,
45 | dropout=0.5):
46 | """
47 |
48 | :param iterator:
49 | :param criterion:
50 | :param metric:
51 | :param device:
52 | :param optimizer_name:
53 | :param lr_scheduler:
54 | :param initial_lr:
55 | :param embedding_dim:
56 | :param hidden_dim:
57 | :param output_dim:
58 | :param n_layers:
59 | :param bidirectional:
60 | :param dropout:
61 | """
62 | super(Model, self).__init__()
63 |
64 | self.device = device
65 | self.criterion = criterion
66 | self.metric = metric
67 |
68 | text_field = iterator.dataset.fields['text']
69 |
70 | pad_idx = text_field.vocab.stoi[text_field.pad_token]
71 | unk_idx = text_field.vocab.stoi[text_field.unk_token]
72 |
73 | self.net = LSTM(vocab_size=len(text_field.vocab),
74 | embedding_dim=embedding_dim,
75 | hidden_dim=hidden_dim,
76 | output_dim=output_dim,
77 | n_layers=n_layers,
78 | bidirectional=bidirectional,
79 | dropout=dropout,
80 | pad_idx=pad_idx).to(device)
81 |
82 | # initialize embeddings
83 | pretrained_embeddings = text_field.vocab.vectors
84 | self.net.embedding.weight.data.copy_(pretrained_embeddings)
85 |
86 | self.net.embedding.weight.data[unk_idx] = torch.zeros(embedding_dim).to(self.device)
87 | self.net.embedding.weight.data[pad_idx] = torch.zeros(embedding_dim).to(self.device)
88 |
89 | # Freeze embedding
90 | self.net.embedding.weight.requires_grad = False
91 |
92 | self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr)
93 | self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
94 |
95 | def fit_iterator_one_epoch(self, iterator):
96 | epoch_loss = 0
97 | epoch_acc = 0
98 |
99 | self.net.train()
100 |
101 | for batch in iterator:
102 | self.optimizer.zero_grad()
103 |
104 | text, text_lengths = batch.text
105 |
106 | predictions = self.net(text, text_lengths).squeeze(1)
107 |
108 | loss = self.criterion(predictions, batch.label)
109 |
110 | acc = self.metric(predictions, batch.label)
111 |
112 | loss.backward()
113 |
114 | self.optimizer.step()
115 |
116 | self.lr_scheduler.step()
117 |
118 | epoch_loss += loss.item()
119 | epoch_acc += acc.item()
120 |
121 | return epoch_loss / len(iterator), epoch_acc / len(iterator)
122 |
123 | def fit_batch(self, iterator, update=True):
124 | self.net.train()
125 |
126 | batch = next(iter(iterator))
127 | self.optimizer.zero_grad()
128 |
129 | text, text_lengths = batch.text
130 |
131 | predictions = self.net(text, text_lengths).squeeze(1)
132 |
133 | loss = self.criterion(predictions, batch.label)
134 |
135 | acc = self.metric(predictions, batch.label)
136 |
137 | loss.backward()
138 |
139 | if update:
140 | self.optimizer.step()
141 | self.lr_scheduler.step()
142 |
143 | return loss.item(), acc.item()
144 |
145 | def evaluate_iterator(self, iterator):
146 | epoch_loss = 0
147 | epoch_acc = 0
148 |
149 | self.net.eval()
150 |
151 | with torch.no_grad():
152 | for batch in iterator:
153 | text, text_lengths = batch.text
154 |
155 | predictions = self.net(text, text_lengths).squeeze(1)
156 |
157 | loss = self.criterion(predictions, batch.label)
158 |
159 | acc = self.metric(predictions, batch.label)
160 |
161 | epoch_loss += loss.item()
162 | epoch_acc += acc.item()
163 |
164 | return epoch_loss / len(iterator), epoch_acc / len(iterator)
165 |
--------------------------------------------------------------------------------
/data/shakespeare/split_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | import random
4 | import time
5 |
6 |
7 | def iid_divide(l, g):
8 | """
9 | divide list l among g groups
10 | each group has either int(len(l)/g) or int(len(l)/g)+1 elements
11 | returns a list of groups
12 |
13 | """
14 | num_elems = len(l)
15 | group_size = int(len(l)/g)
16 | num_big_groups = num_elems - g * group_size
17 | num_small_groups = g - num_big_groups
18 | glist = []
19 | for i in range(num_small_groups):
20 | glist.append(l[group_size * i : group_size * (i + 1)])
21 | bi = group_size*num_small_groups
22 | group_size += 1
23 | for i in range(num_big_groups):
24 | glist.append(l[bi + group_size * i:bi + group_size * (i + 1)])
25 | return glist
26 |
27 |
28 | parser = argparse.ArgumentParser()
29 |
30 | parser.add_argument('--num_workers',
31 | help=('number of workers/users;'
32 | 'default: 1;'),
33 | type=int,
34 | default=1)
35 | parser.add_argument('--iid',
36 | help='sample iid;',
37 | action="store_true")
38 | parser.add_argument('--niid',
39 | help="sample niid;",
40 | dest='iid', action='store_false')
41 | parser.add_argument('--s_frac',
42 | help='fraction of all data to sample; default: 0.1;',
43 | type=float,
44 | default=0.1)
45 | parser.add_argument('--tr_frac',
46 | help='fraction in training set; default: 0.8;',
47 | type=float,
48 | default=0.8)
49 | parser.add_argument('--seed',
50 | help='args.seed for random partitioning of test/train data',
51 | type=int,
52 | default=None)
53 |
54 | parser.set_defaults(user=False)
55 |
56 | args = parser.parse_args()
57 |
58 |
59 | if __name__ == "__main__":
60 | print('------------------------------')
61 | print('generating training and test sets')
62 |
63 | rng_seed = (args.seed if (args.seed is not None and args.seed >= 0) else int(time.time()))
64 | rng = random.Random(rng_seed)
65 |
66 | train_file = os.path.join("train", "train.txt")
67 | test_file = os.path.join("test", "test.txt")
68 |
69 | data_dir = os.path.join('raw_data', 'by_play_and_character')
70 |
71 | if args.iid:
72 | # TO DO: Factorize this part
73 | all_lines = []
74 | for file_name in os.listdir(data_dir):
75 | file_path = os.path.join(data_dir, file_name)
76 | with open(file_path, "r") as f:
77 | lines = f.readlines()
78 | all_lines += lines
79 |
80 | tot_num_samples = len(all_lines)
81 | num_new_samples = int(args.s_frac * tot_num_samples)
82 |
83 | indices = [i for i in range(tot_num_samples)]
84 | new_indices = rng.sample(indices, num_new_samples)
85 |
86 | indices_groups = iid_divide(new_indices, args.num_workers)
87 |
88 | for id_w, worker_indices in enumerate(indices_groups):
89 | curr_num_samples = len(worker_indices)
90 |
91 | num_train_samples = max(1, int(args.tr_frac * curr_num_samples))
92 | num_test_samples = curr_num_samples - num_train_samples
93 |
94 | train_indices = rng.sample(worker_indices, num_train_samples)
95 | test_indices = list(set(worker_indices) - set(train_indices))
96 |
97 | local_train_file = os.path.join("train", "{}.txt".format(id_w))
98 |
99 | for (file_, indices) in [(train_file, train_indices),
100 | (local_train_file, train_indices),
101 | (test_file, test_indices)]:
102 |
103 | for sample_idx in indices:
104 | sample = all_lines[sample_idx]
105 |
106 | with open(file_, "a") as f:
107 | f.write(sample)
108 | else:
109 | writers = os.listdir(data_dir)
110 |
111 | rng.shuffle(writers)
112 | writers_by_workers = iid_divide(writers, args.num_workers)
113 |
114 | for id_w, worker_writers in enumerate(writers_by_workers):
115 | all_worker_lines = []
116 | for writer in worker_writers:
117 | file_path = os.path.join(data_dir, writer)
118 | with open(file_path, "r") as f:
119 | lines = f.readlines()
120 |
121 | all_worker_lines += lines
122 |
123 | tot_num_samples = len(all_worker_lines)
124 | num_new_samples = int(args.s_frac * tot_num_samples)
125 |
126 | indices = [i for i in range(tot_num_samples)]
127 | new_indices = rng.sample(indices, num_new_samples)
128 |
129 | new_worker_lines = [all_worker_lines[i] for i in new_indices]
130 |
131 | num_train_samples = max(1, int(args.tr_frac * num_new_samples))
132 | num_test_samples = num_new_samples - num_train_samples
133 |
134 | train_indices = rng.sample(new_indices, num_train_samples)
135 | test_indices = list(set(new_indices) - set(train_indices))
136 |
137 | local_train_file = os.path.join("train", "{}.txt".format(id_w))
138 |
139 | for (file_, indices) in [(train_file, train_indices),
140 | (local_train_file, train_indices),
141 | (test_file, test_indices)]:
142 |
143 | for sample_idx in indices:
144 | sample = all_worker_lines[sample_idx]
145 |
146 | with open(file_, "a") as f:
147 | f.write(sample)
148 |
--------------------------------------------------------------------------------
/graph_utils/time_simulator.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import networkx as nx
4 | import numpy as np
5 |
6 | import geopy.distance
7 | from geopy.geocoders import Nominatim
8 |
9 | geolocator = Nominatim(user_agent="delay", timeout=20)
10 |
11 |
12 | def get_zoo_topology(file_path,
13 | bandwidth=1e9,
14 | upload_capacity_at_edge=35 * 1e6,
15 | download_capacity_at_edge=144 * 1e6):
16 | """
17 | Read zoo_topology data into nx.DiGraph();
18 | in the output graph each edge has two information: "capacity" and "distance";
19 | each node has two information: "upload capacity" and "download capacity";
20 | :param file_path : path to .gml file with topology information
21 | :param bandwidth: (float) represent links capacity,
22 | used when information not available in .gml file
23 | :param upload_capacity_at_edge: https://en.wikipedia.org/wiki/Bit_rate for information
24 | :param download_capacity_at_edge: https://en.wikipedia.org/wiki/Bit_rate for information
25 | :return: G_z (nx.DiGraph)
26 | """
27 |
28 | network_data = nx.read_gml(file_path)
29 |
30 | G_z = nx.Graph()
31 | G_z.add_nodes_from(network_data)
32 |
33 | # add nodes capacity
34 | nx.set_node_attributes(G_z, upload_capacity_at_edge * 1e-3, 'upload_capacity')
35 | nx.set_node_attributes(G_z, download_capacity_at_edge * 1e-3, "download_capacity")
36 |
37 | # add edges data
38 | for u, v, data in network_data.edges.data():
39 | # get distance
40 | try:
41 | distance = data["distance"]
42 |
43 | except AttributeError:
44 | try:
45 | coords_1 = (network_data.nodes(data=True)[u]["Latitude"],
46 | network_data.nodes(data=True)[u]["Longitude"])
47 |
48 | coords_2 = (network_data.nodes(data=True)[v]["Latitude"],
49 | network_data.nodes(data=True)[v]["Longitude"])
50 |
51 | except KeyError:
52 | time.sleep(1.2) # To avoid Service time out Error
53 |
54 | geo = geolocator.geocode(u, timeout=20)
55 |
56 | coords_1 = (geo.latitude, geo.longitude)
57 |
58 | time.sleep(1.2) # To avoid Service time out Error
59 |
60 | geo = geolocator.geocode(v, timeout=20)
61 |
62 | coords_2 = (geo.latitude, geo.longitude)
63 |
64 | distance = geopy.distance.distance(coords_1, coords_2).km
65 |
66 | # add_edge
67 | G_z.add_edge(u, v, capacity=bandwidth * 1e-3, distance=distance)
68 |
69 | return G_z
70 |
71 |
72 | def initialize_delays(underlay, overlay, model_size):
73 | """
74 | compute delays between nodes ignoring download congestion effect
75 | :param underlay: (nx.Graph())
76 | :param overlay: (nx.Graph())
77 | :param model_size: message_size in bits, see https://keras.io/applications/ for examples
78 | :return: nxGraph()
79 | """
80 | for u, v, data in overlay.edges(data=True):
81 | overlay.edges[u, v]["delay"] = overlay.edges[u, v]["weight"]
82 |
83 | return overlay
84 |
85 |
86 | def init_iteration_end_time(overlay, computation_time=0):
87 | """
88 |
89 | :param overlay:
90 | :param computation_time:
91 | :return:
92 | """
93 | nx.set_node_attributes(overlay, computation_time, "end_time")
94 | return overlay
95 |
96 |
97 | def get_iteration_end_time(underlay, overlay, model_size, computation_time):
98 | """
99 | Compute the end times of next iteration having the end times for current iteration.
100 | :param underlay:
101 | :param overlay:
102 | :param model_size:
103 | :param computation_time
104 | :return:
105 | """
106 | out_degrees = dict(overlay.out_degree())
107 | for i, j in overlay.edges:
108 | overlay.edges[i, j]["t"] = overlay.edges[i, j]["delay"] + overlay.nodes[i]["end_time"]
109 |
110 | def get_edge_time(e):
111 | return overlay.edges[e[0], e[1]]["t"]
112 |
113 | for j in overlay.nodes:
114 | overlay.nodes[j]["end_time"] = 0
115 |
116 | # get all the input edges to "j" sorted by t_{ij}
117 | edges = []
118 | for i in overlay.predecessors(j):
119 | edges.append((i, j))
120 |
121 | if len(edges) > 0:
122 | edges.sort(key=get_edge_time)
123 |
124 | t_prev = get_edge_time(edges[0]) + model_size / underlay.nodes[j]["download_capacity"]
125 |
126 | for edge in edges[1:]:
127 | if get_edge_time(edge) <= t_prev + model_size / underlay.nodes[j]["download_capacity"]:
128 | t_prev = t_prev + model_size / underlay.nodes[j]["download_capacity"]
129 | else:
130 | t_prev = get_edge_time(edge)
131 |
132 | else:
133 | t_prev = 0
134 |
135 | overlay.nodes[j]["end_time"] = t_prev + computation_time + \
136 | (model_size * out_degrees[j]) / underlay.nodes[j]["upload_capacity"]
137 |
138 | return overlay
139 |
140 |
141 | def simulate_network(underlay, overlay, n_iterations, model_size=1e8, computation_time=0):
142 | """
143 |
144 | :param underlay:
145 | :param overlay:
146 | :param n_iterations:
147 | :param model_size:
148 | :param computation_time
149 | :return:
150 | """
151 | time_evolution = np.zeros((overlay.number_of_nodes(), n_iterations))
152 |
153 | overlay = initialize_delays(underlay, overlay, model_size)
154 | overlay = init_iteration_end_time(overlay, computation_time)
155 |
156 | for iteration in range(n_iterations):
157 | overlay = get_iteration_end_time(underlay, overlay, model_size, computation_time)
158 | for ii, (_, end_time) in enumerate(overlay.nodes.data("end_time")):
159 | time_evolution[ii, iteration] = end_time
160 |
161 | return time_evolution
162 |
--------------------------------------------------------------------------------
/models/shakespeare/gru.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from utils.optim import get_optimizer, get_lr_scheduler
4 | from torch.autograd import Variable
5 | import string
6 | from ..model import Model
7 |
8 |
9 | class RNN(nn.Module):
10 | def __init__(self, input_size, embed_size, hidden_size, output_size, n_layers):
11 | super(RNN, self).__init__()
12 | self.input_size = input_size
13 | self.hidden_size = hidden_size
14 | self.embed_size = embed_size
15 | self.output_size = output_size
16 | self.n_layers = n_layers
17 |
18 | self.encoder = nn.Embedding(input_size, embed_size)
19 | self.gru = nn.GRU(embed_size, hidden_size, n_layers)
20 | self.decoder = nn.Linear(hidden_size, output_size)
21 |
22 | def forward(self, input_, hidden):
23 | self.gru.flatten_parameters()
24 | batch_size = input_.size(0)
25 | encoded = self.encoder(input_)
26 | output, hidden = self.gru(encoded.view(1, batch_size, -1), hidden)
27 | output = self.decoder(output.view(batch_size, -1))
28 | return output, hidden
29 |
30 | def init_hidden(self, batch_size):
31 | return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))
32 |
33 |
34 | class NextCharDecoder(Model):
35 | def __init__(self, criterion, metric, device,
36 | optimizer_name="adam", lr_scheduler="sqrt", initial_lr=1e-3, epoch_size=1,
37 | embed_size=16, hidden_size=256, n_layers=2):
38 | super(Model, self).__init__()
39 |
40 | vocab_size = len(string.printable)
41 | self.net = RNN(vocab_size, embed_size, hidden_size, vocab_size, n_layers).to(device)
42 | self.criterion = criterion
43 | self.metric = metric
44 | self.device = device
45 |
46 | self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr)
47 | self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
48 |
49 | def fit_iterator_one_epoch(self, iterator):
50 | self.net.train()
51 |
52 | epoch_loss = 0
53 | epoch_acc = 0
54 |
55 | for inp, target in iterator:
56 |
57 | inp = inp.to(self.device)
58 | target = target.to(self.device)
59 |
60 | hidden = self.net.init_hidden(inp.size(0)).to(self.device)
61 | self.optimizer.zero_grad()
62 |
63 | loss = 0
64 | acc = 0
65 |
66 | for c in range(iterator.dataset.chunk_len):
67 | output, hidden = self.net(inp[:, c], hidden)
68 | loss += self.criterion(output.view(inp.size(0), -1), target[:, c])
69 | acc += self.metric(output, target[:, c]).item()
70 |
71 | loss /= iterator.dataset.chunk_len
72 | acc /= iterator.dataset.chunk_len
73 |
74 | loss.backward()
75 |
76 | self.optimizer.step()
77 | self.lr_scheduler.step()
78 |
79 | epoch_loss += loss.item()
80 | epoch_acc += acc
81 |
82 | return epoch_loss / len(iterator), epoch_acc / len(iterator)
83 |
84 | def fit_batch(self, iterator, update=True):
85 | self.net.train()
86 |
87 | inp, target = next(iter(iterator))
88 | inp = inp.to(self.device)
89 | target = target.to(self.device)
90 |
91 | hidden = self.net.init_hidden(inp.size(0)).to(self.device)
92 | self.optimizer.zero_grad()
93 |
94 | loss = 0
95 | acc = 0
96 |
97 | for c in range(iterator.dataset.chunk_len):
98 | output, hidden = self.net(inp[:, c], hidden)
99 | loss += self.criterion(output.view(inp.size(0), -1), target[:, c])
100 | acc += self.metric(output, target[:, c]).item()
101 |
102 | loss /= iterator.dataset.chunk_len
103 | acc /= iterator.dataset.chunk_len
104 |
105 | loss.backward()
106 |
107 | if update:
108 | self.optimizer.step()
109 | self.lr_scheduler.step()
110 |
111 | return loss.item(), acc
112 |
113 | def evaluate_iterator(self, iterator):
114 | self.net.eval()
115 |
116 | epoch_loss = 0
117 | epoch_acc = 0
118 |
119 | for inp, target in iterator:
120 |
121 | inp = inp.to(self.device)
122 | target = target.to(self.device)
123 |
124 | hidden = self.net.init_hidden(inp.size(0)).to(self.device)
125 |
126 | loss = 0
127 | acc = 0
128 | for c in range(iterator.dataset.chunk_len):
129 | output, hidden = self.net(inp[:, c], hidden)
130 | loss += self.criterion(output.view(inp.size(0), -1), target[:, c])
131 | acc += self.metric(output, target[:, c]).item()
132 |
133 | loss /= iterator.dataset.chunk_len
134 | acc /= iterator.dataset.chunk_len
135 |
136 | epoch_loss += loss.item()
137 | epoch_acc += acc
138 | return epoch_loss / len(iterator), epoch_acc / len(iterator)
139 |
140 | def generate(self, prime_str="Wh", predict_len=200, temperature=0.8):
141 | all_characters = string.printable
142 | hidden = self.net.init_hidden(1).to(self.device)
143 |
144 | prime_input = torch.zeros(1, len(prime_str)).long().to(self.device)
145 | for c in range(len(prime_str)):
146 | prime_input[0, c] = all_characters.index(prime_str[c])
147 |
148 | predicted = prime_str
149 |
150 | for p in range(len(prime_str) - 1):
151 | _, hidden = self.net(prime_input[:, p], hidden)
152 |
153 | inp = prime_input[:, -1]
154 |
155 | for p in range(predict_len):
156 | output, hidden = self.net(inp, hidden)
157 |
158 | output_dist = output.data.view(-1).div(temperature).exp()
159 | top_i = torch.multinomial(output_dist, 1)[0]
160 |
161 | predicted_char = all_characters[top_i]
162 | predicted += predicted_char
163 |
164 | inp = torch.zeros(1, len(predicted_char)).long().to(self.device)
165 | for c in range(len(predicted_char)):
166 | inp[0, c] = all_characters.index(predicted_char[c])
167 |
168 | return predicted
169 |
--------------------------------------------------------------------------------
/data/inaturalist/split_data.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import os
4 | import pickle
5 | import time
6 | import random
7 | from collections import Counter
8 |
9 | import networkx as nx
10 | import numpy as np
11 |
12 | import geopy.distance
13 | from geopy.geocoders import Nominatim
14 |
15 |
16 | class FileException(FileNotFoundError):
17 | def __init__(self, message):
18 | super().__init__(message)
19 |
20 |
21 | parser = argparse.ArgumentParser()
22 |
23 | parser.add_argument('--network',
24 | help="name of the network to use, should be present in /graph_utils/data; default: amazon_us",
25 | type=str,
26 | default="amazon_us")
27 | parser.add_argument('--num_categories',
28 | help="number of classes to include, default: 80",
29 | type=int,
30 | default="80")
31 | parser.add_argument('--s_frac',
32 | help='fraction of all data to sample; default: 0.1;',
33 | type=float,
34 | default=1)
35 | parser.add_argument('--tr_frac',
36 | help='fraction in training set; default: 0.8;',
37 | type=float,
38 | default=0.9)
39 | parser.add_argument('--seed',
40 | help='args.seed for random partitioning of test/train data',
41 | type=int,
42 | default=None)
43 |
44 | args = parser.parse_args()
45 |
46 |
47 | if __name__ == "__main__":
48 | network_path = os.path.abspath(os.path.join(os.getcwd(), "..", "..", "graph_utils/data", args.network + ".gml"))
49 |
50 | if not os.path.isfile(network_path):
51 | raise FileException("The network with name {} is not found!".format(network_path))
52 |
53 | rng_seed = (args.seed if (args.seed is not None and args.seed >= 0) else int(time.time()))
54 | rng = random.Random(rng_seed)
55 | np.random.seed(rng_seed)
56 |
57 | # Get workers locations
58 | network_path = os.path.abspath(os.path.join(os.getcwd(), "..", "..", "graph_utils/data", args.network + ".gml"))
59 | workers_network = nx.read_gml(network_path, label="label")
60 | nodes_locs = []
61 | geolocator = Nominatim(user_agent="delay", timeout=20)
62 | for node in workers_network.nodes():
63 | time.sleep(1.0) # To avoid Service time out Error
64 | geo = geolocator.geocode(node, timeout=20)
65 | nodes_locs.append((geo.latitude, geo.longitude))
66 |
67 | # Get the information for images and locations
68 | with open(os.path.join("raw_data", "train2018_locations.json")) as f:
69 | train_imgs_locations = json.load(f)
70 |
71 | with open(os.path.join("raw_data", "val2018_locations.json")) as f:
72 | val_imgs_locations = json.load(f)
73 |
74 | with open(os.path.join("raw_data", "train2018.json")) as f:
75 | train_images_data = json.load(f)
76 |
77 | with open(os.path.join("raw_data", "val2018.json")) as f:
78 | val_images_data = json.load(f)
79 |
80 | all_data = dict()
81 | for images_data in [train_images_data, val_images_data]:
82 | for img, annotation in zip(images_data["images"], images_data["annotations"]):
83 | img_id = img["id"]
84 | img_path = ["raw_data/"] + img["file_name"].split("/")[1:]
85 | img_path = "/".join(img_path)
86 | category_id = annotation["category_id"]
87 |
88 | all_data[img_id] = {"path": img_path, "class": category_id}
89 |
90 | for imgs_locations in [train_imgs_locations, val_imgs_locations]:
91 | for location in imgs_locations:
92 | img_id = location["id"]
93 | all_data[img_id]["lat"] = location["lat"]
94 | all_data[img_id]["lon"] = location["lon"]
95 |
96 | # Get most common categories
97 | all_categories = []
98 | for img_id in all_data:
99 | all_categories.append(all_data[img_id]['class'])
100 |
101 | c = Counter(all_categories)
102 | most_common_categories = c.most_common(args.num_categories)
103 | most_common_categories = [i for (i, j) in most_common_categories]
104 |
105 | relabel_categories = {category: idx for idx, category in enumerate(most_common_categories)}
106 | most_common_categories = set(most_common_categories)
107 |
108 | # Assign images to closest workers
109 | imgs_by_workers = {worker_id: [] for worker_id in range(workers_network.number_of_nodes())}
110 |
111 | for img_id in all_data:
112 | category = all_data[img_id]['class']
113 | if category in most_common_categories:
114 | # Get closest worker to node
115 | coord_img = (all_data[img_id]['lat'], all_data[img_id]['lon'])
116 | distances = np.array([geopy.distance.distance(coord_img, coord_node).km for coord_node in nodes_locs])
117 | worker_id = np.argmin(distances)
118 |
119 | img_data = (all_data[img_id]["path"], relabel_categories[category])
120 |
121 | imgs_by_workers[worker_id].append(img_data)
122 |
123 | # Split data to train and test
124 | train_data = []
125 | test_data = []
126 |
127 | for worker_id in imgs_by_workers.keys():
128 | all_worker_data = imgs_by_workers[worker_id]
129 |
130 | tot_num_samples = len(all_worker_data)
131 | num_new_samples = int(args.s_frac * tot_num_samples)
132 |
133 | indices = [i for i in range(tot_num_samples)]
134 | new_indices = rng.sample(indices, num_new_samples)
135 |
136 | num_train_samples = max(1, int(args.tr_frac * num_new_samples))
137 | num_test_samples = num_new_samples - num_train_samples
138 |
139 | train_indices = rng.sample(new_indices, num_train_samples)
140 | test_indices = list(set(new_indices) - set(train_indices))
141 |
142 | worker_data = [all_worker_data[ii] for ii in train_indices]
143 | train_data += [all_worker_data[ii] for ii in train_indices]
144 | test_data += [all_worker_data[ii] for ii in test_indices]
145 |
146 | with open('train/{}.pkl'.format(worker_id), 'wb') as f:
147 | pickle.dump(worker_data, f, pickle.HIGHEST_PROTOCOL)
148 |
149 | with open('train/train.pkl', 'wb') as f:
150 | pickle.dump(train_data, f, pickle.HIGHEST_PROTOCOL)
151 |
152 | with open('test/test.pkl', 'wb') as f:
153 | pickle.dump(test_data, f, pickle.HIGHEST_PROTOCOL)
154 |
--------------------------------------------------------------------------------
/graph_utils/utils/matcha.py:
--------------------------------------------------------------------------------
1 | import os
2 | import csv
3 |
4 | import cvxpy as cp
5 | import numpy as np
6 | import networkx as nx
7 |
8 | from .matching_decomposition import matching_decomposition
9 |
10 |
11 | class RandomTopologyGenerator(object):
12 | """
13 | Attributes:
14 | - laplacian_matrices: List of numpy arrays; each array represents the laplacian matrix of a matching;
15 | - communication_budget: Constraint controlling the sum of the weights,
16 | and equivalently controlling the expected communication time;
17 | - path_to_history_file: path to .csv file used to save the history of selected matching at each step
18 | - activation_probas: np.array of the same size as "laplacian_matrices";
19 | - current_matching_activations: list of booleans, each of them represent if a matching is used;
20 | - matching_list: list of nx.Graph() objects;
21 | - alpha: float to be use in generating mixing matrix
22 | """
23 | def __init__(self, network, communication_budget, network_save_path=None, path_to_history_file=None):
24 | self.network = network
25 | self.communication_budget = communication_budget
26 | self.path_to_history_file = path_to_history_file
27 | self.network_save_path = network_save_path
28 |
29 | # eliminate self loops
30 | self.network.remove_edges_from(nx.selfloop_edges(self.network))
31 |
32 | self.matching_list, self.laplacian_matrices = matching_decomposition(self.network)
33 |
34 | self.number_workers = self.laplacian_matrices[0].shape[0]
35 | self.number_matching = len(self.laplacian_matrices)
36 |
37 | # Initialize generator parameters
38 | self.activation_probas = self.get_matching_activation_probabilities()
39 | self.activation_probas = np.clip(self.activation_probas, 0., 1.)
40 |
41 | self.alpha, self.spectral_norm = self.get_mixing_matrix_parameter()
42 |
43 | # Initialize
44 | self.current_step = -1
45 | self.current_matching_activations = np.ones(self.number_workers)
46 | self.current_topology = self.network
47 |
48 | if self.network_save_path:
49 | nx.write_gml(self.network, self.network_save_path)
50 |
51 | def get_matching_activation_probabilities(self):
52 | """
53 | Computes a set of activation probabilities that maximize the connectivity of the expected graph
54 | given a communication time constraint;
55 | For given Laplacian matrices, it computes optimal weights to sum them, in order to maximize
56 | the second largest eigenvalue of their weighted sum;
57 | See https://arxiv.org/pdf/1905.09435.pdf (Formula 5) for details;
58 | and equivalently controlling the expected communication time;
59 | :return: np.array of the same size as "laplacian_matrices"; each entry represents the probability
60 | of activating a sub-graph;
61 | """
62 | p = cp.Variable(self.number_matching)
63 | gamma = cp.Variable()
64 | beta = cp.Variable()
65 | constraints = [p <= 1, p >= 0,
66 | p.T @ np.ones(self.number_matching) <= self.communication_budget * self.number_matching,
67 | gamma * np.eye(self.number_workers) - beta * np.ones((self.number_workers, self.number_workers))
68 | << cp.sum([p[i] * self.laplacian_matrices[i] for i in range(self.number_matching)])]
69 | objective = cp.Maximize(gamma)
70 | problem = cp.Problem(objective, constraints)
71 |
72 | problem.solve()
73 |
74 | return p.value
75 |
76 | def get_mixing_matrix_parameter(self):
77 | """
78 | Computes optimal equal weight mixing matrix parameter;
79 | i.e. computes alpha in order to optimize the spectral gap of the mixing matrix W, where
80 | W = I - alpha * L_bar, with being identity matrix and L_bar is the expected Laplacian matrix;
81 | See https://arxiv.org/pdf/1905.09435.pdf (Formula 6 and 7) for details;
82 | each entry represents the probability of activating a sub-graph;
83 | :return: alpha (float)
84 | """
85 | L_bar = np.zeros((self.number_workers, self.number_workers))
86 | L_tilde = np.zeros((self.number_workers, self.number_workers))
87 |
88 | for idx in range(self.number_matching):
89 | L_bar += self.activation_probas[idx] * self.laplacian_matrices[idx]
90 | L_tilde += self.activation_probas[idx] * (1 - self.activation_probas[idx]) * self.laplacian_matrices[idx]
91 |
92 | rho = cp.Variable()
93 | alpha = cp.Variable()
94 | beta = cp.Variable()
95 |
96 | objective = cp.Minimize(rho)
97 |
98 | constraints = [alpha ** 2 - beta <= 0,
99 | np.eye(self.number_workers) - 2 * alpha * L_bar + beta * (L_bar @ L_bar + 2 * L_tilde)
100 | - (1 / self.number_workers) * np.ones((self.number_workers, self.number_workers))
101 | << rho * np.eye(self.number_workers)]
102 |
103 | prob = cp.Problem(objective, constraints)
104 | prob.solve()
105 |
106 | return alpha.value, rho.value
107 |
108 | def step(self):
109 | """
110 | Generating random topology at any iteration: given activation probabilities, generates an independent
111 | Bernoulli random variable Bj for each matching in "matching_list",
112 | the activated topology is the concatenation of the activated matching.
113 | The mixing matrix is then computed as W = I - alpha * L, where L is the Laplacian matrix
114 | of the activated topology;
115 | """
116 | self.current_topology = nx.Graph()
117 | laplacian_matrix = np.zeros((self.number_workers, self.number_workers))
118 |
119 | self.current_matching_activations = np.random.binomial(n=1, p=self.activation_probas)
120 | while self.current_matching_activations.sum() == 0:
121 | self.current_matching_activations = np.random.binomial(n=1, p=self.activation_probas)
122 |
123 | for idx, matching_activation in enumerate(self.current_matching_activations):
124 | if matching_activation:
125 | self.current_topology = nx.compose(self.current_topology, self.matching_list[idx])
126 | laplacian_matrix += self.laplacian_matrices[idx]
127 |
128 | mixing_matrix = np.eye(self.number_workers) - self.alpha * laplacian_matrix
129 |
130 | self.current_topology = nx.from_numpy_matrix(mixing_matrix)
131 |
132 | self.current_step += 1
133 |
134 | if self.path_to_history_file:
135 | with open(self.path_to_history_file, "a") as csvfile:
136 | writer = csv.writer(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL)
137 | writer.writerow(self.current_matching_activations.tolist())
138 |
--------------------------------------------------------------------------------
/data/sent140/split_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import csv
3 | import argparse
4 | import json
5 | import random
6 | import time
7 | import numpy as np
8 |
9 |
10 | def iid_divide(l, g):
11 | """
12 | divide list l among g groups
13 | each group has either int(len(l)/g) or int(len(l)/g)+1 elements
14 | returns a list of groups
15 |
16 | """
17 | num_elems = len(l)
18 | group_size = int(len(l)/g)
19 | num_big_groups = num_elems - g * group_size
20 | num_small_groups = g - num_big_groups
21 | glist = []
22 | for i in range(num_small_groups):
23 | glist.append(l[group_size * i : group_size * (i + 1)])
24 | bi = group_size*num_small_groups
25 | group_size += 1
26 | for i in range(num_big_groups):
27 | glist.append(l[bi + group_size * i:bi + group_size * (i + 1)])
28 | return glist
29 |
30 |
31 | parser = argparse.ArgumentParser()
32 |
33 | parser.add_argument('--num_workers',
34 | help=('number of workers/users;'
35 | 'default: 1;'),
36 | type=int,
37 | default=1)
38 | parser.add_argument('--iid',
39 | help='sample iid;',
40 | action="store_true")
41 | parser.add_argument('--niid',
42 | help="sample niid;",
43 | dest='iid', action='store_false')
44 | parser.add_argument('--s_frac',
45 | help='fraction of all data to sample; default: 0.1;',
46 | type=float,
47 | default=0.01)
48 | parser.add_argument('--tr_frac',
49 | help='fraction in training set; default: 0.8;',
50 | type=float,
51 | default=0.8)
52 | parser.add_argument('--seed',
53 | help='args.seed for random partitioning of test/train data',
54 | type=int,
55 | default=None)
56 |
57 | parser.set_defaults(user=False)
58 |
59 | args = parser.parse_args()
60 |
61 | if __name__ == "__main__":
62 | print('------------------------------')
63 | print('generating training and test sets')
64 |
65 | rng_seed = (args.seed if (args.seed is not None and args.seed >= 0) else int(time.time()))
66 | rng = random.Random(rng_seed)
67 | np.random.seed(rng_seed)
68 |
69 | train_file = os.path.join("train", "train.json")
70 | test_file = os.path.join("test", "test.json")
71 |
72 | data_dir = os.path.join('raw_data', 'all_data.csv')
73 | with open(data_dir, 'rt', encoding='ISO-8859-1') as f:
74 | reader = csv.reader(f)
75 | data = list(reader)
76 |
77 | data = sorted(data, key=lambda x: x[4])
78 |
79 | if args.iid:
80 | tot_num_samples = len(data)
81 | num_new_samples = int(args.s_frac * tot_num_samples)
82 |
83 | indices = [i for i in range(tot_num_samples)]
84 | new_indices = rng.sample(indices, num_new_samples)
85 |
86 | indices_groups = iid_divide(new_indices, args.num_workers)
87 |
88 | for id_w, worker_indices in enumerate(indices_groups):
89 | curr_num_samples = len(worker_indices)
90 |
91 | num_train_samples = max(1, int(args.tr_frac * curr_num_samples))
92 | num_test_samples = curr_num_samples - num_train_samples
93 |
94 | train_indices = rng.sample(worker_indices, num_train_samples)
95 | test_indices = list(set(worker_indices) - set(train_indices))
96 |
97 | local_train_file = os.path.join("train", "{}.json".format(id_w))
98 |
99 | for (file_, indices) in [(local_train_file, train_indices),
100 | (train_file, train_indices),
101 | (test_file, test_indices)]:
102 |
103 | for sample_idx in indices:
104 | sample = data[sample_idx]
105 | row = dict()
106 |
107 | row['idx'] = sample[1]
108 | row["time"] = sample[2]
109 | row['query'] = sample[3]
110 | row["user"] = sample[4]
111 | row["text"] = sample[5]
112 | row["label"] = 1 if sample[0] == "4" else 0
113 |
114 | with open(file_, "a") as f:
115 | json.dump(row, f)
116 | f.write("\n")
117 |
118 | else:
119 | all_writers = set()
120 |
121 | for i in range(len(data)):
122 | row = data[i]
123 | all_writers.add(row[4])
124 |
125 | all_writers = list(all_writers)
126 |
127 | data_by_writers = {k: [] for k in all_writers}
128 |
129 | for i in range(len(data)):
130 | row = data[i]
131 | data_by_writers[row[4]].append(row)
132 |
133 | num_writers_by_user = np.random.lognormal(5, 1.5, args.num_workers) + 5
134 | num_writers_by_user *= (len(all_writers) / num_writers_by_user.sum())
135 | num_samples = np.floor(num_writers_by_user).astype(np.int64)
136 |
137 | writers_by_workers = []
138 | current_idx = 0
139 | for worker_id in range(args.num_workers):
140 | writers_by_workers.append(all_writers[current_idx: current_idx + num_samples[worker_id]])
141 | current_idx = num_samples[worker_id]
142 |
143 | for id_w, writers in enumerate(writers_by_workers):
144 | all_worker_data = []
145 | for writer in writers:
146 | all_worker_data += data_by_writers[writer]
147 |
148 | tot_num_samples = len(all_worker_data)
149 | curr_num_samples = int(args.s_frac * tot_num_samples)
150 |
151 | indices = [i for i in range(tot_num_samples)]
152 | worker_indices = rng.sample(indices, curr_num_samples)
153 |
154 | num_train_samples = max(1, int(args.tr_frac * curr_num_samples))
155 | num_test_samples = curr_num_samples - num_train_samples
156 |
157 | train_indices = rng.sample(worker_indices, num_train_samples)
158 | test_indices = list(set(worker_indices) - set(train_indices))
159 |
160 | local_train_file = os.path.join("train", "{}.json".format(id_w))
161 |
162 | for (file_, indices) in [(local_train_file, train_indices),
163 | (train_file, train_indices),
164 | (test_file, test_indices)]:
165 |
166 | for sample_idx in indices:
167 | sample = data[sample_idx]
168 | row = dict()
169 |
170 | row['idx'] = sample[1]
171 | row["time"] = sample[2]
172 | row['query'] = sample[3]
173 | row["user"] = sample[4]
174 | row["text"] = sample[5]
175 | row["label"] = 1 if sample[0] == "4" else 0
176 |
177 | with open(file_, "a") as f:
178 | json.dump(row, f)
179 | f.write("\n")
180 |
181 |
182 |
--------------------------------------------------------------------------------
/data/femnist/split_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | import argparse
4 | import random
5 | import time
6 | import numpy as np
7 |
8 |
9 | def relabel_class(c):
10 | """
11 | maps hexadecimal class value (string) to a decimal number
12 | returns:
13 | - 0 through 9 for classes representing respective numbers
14 | - 10 through 35 for classes representing respective uppercase letters
15 | - 36 through 61 for classes representing respective lowercase letters
16 | """
17 | if c.isdigit() and int(c) < 40:
18 | return int(c) - 30
19 | elif int(c, 16) <= 90: # uppercase
20 | return int(c, 16) - 55
21 | else:
22 | return int(c, 16) - 61
23 |
24 |
25 | def iid_divide(l, g):
26 | """
27 | divide list l among g groups
28 | each group has either int(len(l)/g) or int(len(l)/g)+1 elements
29 | returns a list of groups
30 |
31 | """
32 | num_elems = len(l)
33 | group_size = int(len(l)/g)
34 | num_big_groups = num_elems - g * group_size
35 | num_small_groups = g - num_big_groups
36 | glist = []
37 | for i in range(num_small_groups):
38 | glist.append(l[group_size * i : group_size * (i + 1)])
39 | bi = group_size*num_small_groups
40 | group_size += 1
41 | for i in range(num_big_groups):
42 | glist.append(l[bi + group_size * i:bi + group_size * (i + 1)])
43 | return glist
44 |
45 |
46 | parser = argparse.ArgumentParser()
47 |
48 | parser.add_argument('--num_workers',
49 | help=('number of workers/users;'
50 | 'default: 1;'),
51 | type=int,
52 | default=1)
53 | parser.add_argument('--iid',
54 | help='sample iid;',
55 | action="store_true")
56 | parser.add_argument('--niid',
57 | help="sample niid;",
58 | dest='iid', action='store_false')
59 | parser.add_argument('--s_frac',
60 | help='fraction of all data to sample; default: 0.1;',
61 | type=float,
62 | default=0.01)
63 | parser.add_argument('--tr_frac',
64 | help='fraction in training set; default: 0.8;',
65 | type=float,
66 | default=0.8)
67 | parser.add_argument('--seed',
68 | help='args.seed for random partitioning of test/train data',
69 | type=int,
70 | default=None)
71 |
72 | parser.set_defaults(user=False)
73 |
74 | args = parser.parse_args()
75 |
76 | if __name__ == "__main__":
77 | print('------------------------------')
78 | print('generating training and test sets')
79 |
80 | rng_seed = (args.seed if (args.seed is not None and args.seed >= 0) else int(time.time()))
81 | rng = random.Random(rng_seed)
82 | np.random.seed(rng_seed)
83 |
84 | train_file = os.path.join("train", "train.json")
85 | test_file = os.path.join("test", "test.json")
86 |
87 | data_dir = os.path.join('intermediate', 'images_by_writer.pkl')
88 | with open(data_dir, 'rb') as f:
89 | all_data = pickle.load(f)
90 |
91 | if args.iid:
92 | combined_data = []
93 |
94 | for (writer_id, l) in all_data:
95 | combined_data += l
96 |
97 | for ii, (path, c) in enumerate(combined_data):
98 | combined_data[ii] = (path, relabel_class(c))
99 |
100 | tot_num_samples = len(combined_data)
101 | num_new_samples = int(args.s_frac * tot_num_samples)
102 |
103 | indices = [i for i in range(tot_num_samples)]
104 | new_indices = rng.sample(indices, num_new_samples)
105 |
106 | indices_groups = iid_divide(new_indices, args.num_workers)
107 |
108 | train_data = []
109 | test_data = []
110 |
111 | for id_w, worker_indices in enumerate(indices_groups):
112 | curr_num_samples = len(worker_indices)
113 |
114 | num_train_samples = max(1, int(args.tr_frac * curr_num_samples))
115 | num_test_samples = curr_num_samples - num_train_samples
116 |
117 | train_indices = rng.sample(worker_indices, num_train_samples)
118 | test_indices = list(set(indices) - set(train_indices))
119 |
120 | worker_data = [combined_data[ii] for ii in train_indices]
121 | train_data += [combined_data[ii] for ii in train_indices]
122 | test_data += [combined_data[ii] for ii in test_indices]
123 |
124 | with open('train/{}.pkl'.format(id_w), 'wb') as f:
125 | pickle.dump(worker_data, f, pickle.HIGHEST_PROTOCOL)
126 |
127 | with open('train/train.pkl', 'wb') as f:
128 | pickle.dump(train_data, f, pickle.HIGHEST_PROTOCOL)
129 |
130 | with open('test/test.pkl', 'wb') as f:
131 | pickle.dump(test_data, f, pickle.HIGHEST_PROTOCOL)
132 |
133 | else:
134 | writer_ids = [i for i in range(len(all_data))]
135 | rng.shuffle(writer_ids)
136 |
137 | num_writers_by_user = np.random.lognormal(5, 1.5, args.num_workers) + 5
138 | num_writers_by_user *= (len(writer_ids) / num_writers_by_user.sum())
139 | num_samples = np.floor(num_writers_by_user).astype(np.int64)
140 |
141 | writers_by_workers = []
142 | current_idx = 0
143 | for worker_id in range(args.num_workers):
144 | writers_by_workers.append(writer_ids[current_idx: current_idx + num_samples[worker_id]])
145 | current_idx = num_samples[worker_id]
146 |
147 | train_data = []
148 | test_data = []
149 |
150 | for id_w, writer_indices in enumerate(writers_by_workers):
151 | all_worker_data = []
152 | for writer_id in writer_indices:
153 | all_worker_data += all_data[writer_id][1]
154 |
155 | for ii, (path, c) in enumerate(all_worker_data):
156 | all_worker_data[ii] = (path, relabel_class(c))
157 |
158 | tot_num_samples = len(all_worker_data)
159 | curr_num_samples = int(args.s_frac * tot_num_samples)
160 |
161 | indices = [i for i in range(tot_num_samples)]
162 | worker_indices = rng.sample(indices, curr_num_samples)
163 |
164 | num_train_samples = max(1, int(args.tr_frac * curr_num_samples))
165 | num_test_samples = curr_num_samples - num_train_samples
166 |
167 | train_indices = rng.sample(worker_indices, num_train_samples)
168 | test_indices = list(set(worker_indices) - set(train_indices))
169 |
170 | worker_data = [all_worker_data[ii] for ii in train_indices]
171 | train_data += [all_worker_data[ii] for ii in train_indices]
172 | test_data += [all_worker_data[ii] for ii in test_indices]
173 |
174 | with open('train/{}.pkl'.format(id_w), 'wb') as f:
175 | pickle.dump(worker_data, f, pickle.HIGHEST_PROTOCOL)
176 |
177 | with open('train/train.pkl', 'wb') as f:
178 | pickle.dump(train_data, f, pickle.HIGHEST_PROTOCOL)
179 |
180 | with open('test/test.pkl', 'wb') as f:
181 | pickle.dump(test_data, f, pickle.HIGHEST_PROTOCOL)
182 |
--------------------------------------------------------------------------------
/graph_utils/generate_networks.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 |
4 | import networkx as nx
5 |
6 | from utils.evaluate_throughput import evaluate_cycle_time
7 | from utils.utils import get_connectivity_graph, add_upload_download_delays, get_delta_mbst_overlay,\
8 | get_star_overlay, get_ring_overlay, get_matcha_cycle_time
9 |
10 | # Model size in bit
11 | MODEL_SIZE_DICT = {"synthetic": 4354,
12 | "shakespeare": 3385747,
13 | "femnist": 4843243,
14 | "sent140": 19269416,
15 | "inaturalist": 44961717}
16 |
17 | # Model computation time in ms
18 | COMPUTATION_TIME_DICT = {"synthetic": 1.5,
19 | "shakespeare": 389.6,
20 | "femnist": 4.6,
21 | "sent140": 9.8,
22 | "inaturalist": 25.4}
23 |
24 |
25 | parser = argparse.ArgumentParser()
26 |
27 | parser.add_argument('name',
28 | help='name of the network to use;')
29 | parser.add_argument("--experiment",
30 | type=str,
31 | help="name of the experiment that will be run on the network;"
32 | "possible are femnist, inaturalist, synthetic, shakespeare, sent140;"
33 | "if not precised --model_size will be used as model size;",
34 | default=None)
35 | parser.add_argument('--model_size',
36 | type=float,
37 | help="size of the model that will be transmitted on the network in bit;"
38 | "ignored if --experiment is precised;",
39 | default=1e8)
40 | parser.add_argument("--local_steps",
41 | type=int,
42 | help="number of local steps, used to get computation time",
43 | default=1)
44 | parser.add_argument("--upload_capacity",
45 | type=float,
46 | help="upload capacity at edge in bit/s; default=1e32",
47 | default=1e32)
48 | parser.add_argument("--download_capacity",
49 | type=float,
50 | help="download capacity at edge in bit/s; default=1e32",
51 | default=1e32)
52 | parser.add_argument("--communication_budget",
53 | type=float,
54 | help="communication budget to use with matcha; will be ignored if name is not matcha",
55 | default=0.5)
56 | parser.add_argument("--default_capacity",
57 | type=float,
58 | help="default capacity (in bit/s) to use on links with unknown capacity",
59 | default=1e9)
60 | parser.add_argument('--centrality',
61 | help="centrality type; default: load;",
62 | default="load")
63 |
64 | parser.set_defaults(user=False)
65 |
66 | args = parser.parse_args()
67 | args.default_capacity *= 1e-3
68 |
69 | if __name__ == "__main__":
70 | if args.experiment is not None:
71 | args.model_size = MODEL_SIZE_DICT[args.experiment]
72 | args.computation_time = args.local_steps * COMPUTATION_TIME_DICT[args.experiment]
73 |
74 | upload_delay = (args.model_size / args.upload_capacity) * 1e3
75 | download_delay = (args.model_size / args.download_capacity) * 1e3
76 |
77 | result_dir = "./results/{}".format(args.name)
78 | if not os.path.exists(result_dir):
79 | os.makedirs(result_dir)
80 |
81 | results_txt_path = os.path.join(result_dir, "cycle_time.txt")
82 | results_file = open(results_txt_path, "w")
83 |
84 | path_to_graph = "./data/{}.gml".format(args.name)
85 |
86 | underlay = nx.read_gml(path_to_graph)
87 |
88 | print("Number of Workers: {}".format(underlay.number_of_nodes()))
89 | print("Number of links: {}".format(underlay.number_of_edges()))
90 |
91 | nx.set_node_attributes(underlay, upload_delay, 'uploadDelay')
92 | nx.set_node_attributes(underlay, download_delay, "downloadDelay")
93 |
94 | nx.write_gml(underlay.copy(), os.path.join(result_dir, "original.gml"))
95 |
96 | connectivity_graph = get_connectivity_graph(underlay, args.default_capacity)
97 |
98 | # MST
99 | for u, v, data in connectivity_graph.edges(data=True):
100 | weight = args.computation_time + data["latency"] + args.model_size / data["availableBandwidth"]
101 | connectivity_graph.add_edge(u, v, weight=weight)
102 |
103 | MST = nx.minimum_spanning_tree(connectivity_graph.copy(), weight="weight")
104 |
105 | MST = MST.to_directed()
106 |
107 | cycle_time, _, _ = evaluate_cycle_time(add_upload_download_delays(MST, args.computation_time, args.model_size))
108 |
109 | nx.write_gml(MST, os.path.join(result_dir, "mst.gml"))
110 | print("Cycle time for MST architecture: {0:.1f}".format(cycle_time))
111 | results_file.write("MST {}\n".format(cycle_time))
112 |
113 | # delta-MBST
114 | delta_mbst, best_cycle_time, best_delta = \
115 | get_delta_mbst_overlay(connectivity_graph.copy(), args.computation_time, args.model_size)
116 |
117 | delta_mbst = add_upload_download_delays(delta_mbst, args.computation_time, args.model_size)
118 | cycle_time, _, _ = evaluate_cycle_time(delta_mbst)
119 |
120 | nx.write_gml(delta_mbst, os.path.join(result_dir, "mct_congest.gml"))
121 | print("Cycle time for delta-MBST architecture: {0:.1f} ms".format(cycle_time))
122 | results_file.write("MCT_congest {}\n".format(cycle_time))
123 |
124 | # Star
125 | star = get_star_overlay(connectivity_graph.copy(), args.centrality)
126 |
127 | cycle_time, _, _ = evaluate_cycle_time(add_upload_download_delays(star, args.computation_time, args.model_size))
128 |
129 | cycle_time = (cycle_time - args.computation_time) * 2 + args.computation_time
130 |
131 | nx.write_gml(star, os.path.join(result_dir, "centralized.gml"))
132 | print("Cycle time for STAR architecture: {0:.1f} ms".format(cycle_time))
133 | results_file.write("Server {}\n".format(cycle_time))
134 |
135 | # Ring
136 | ring = get_ring_overlay(connectivity_graph.copy(), args.computation_time, args.model_size)
137 |
138 | cycle_time, _, _ = evaluate_cycle_time(add_upload_download_delays(ring, args.computation_time, args.model_size))
139 |
140 | nx.write_gml(ring, os.path.join(result_dir, "ring.gml"))
141 | print("Cycle time for RING architecture: {0:.1f} ms".format(cycle_time))
142 | results_file.write("Ring graph {}\n".format(cycle_time))
143 |
144 | # MATCHA
145 | cycle_time = get_matcha_cycle_time(underlay.copy(), connectivity_graph.copy(),
146 | args.computation_time, args.model_size, args.communication_budget)
147 |
148 | print("Cycle time for MATCHA architecture: {0:.1f} ms".format(cycle_time))
149 | results_file.write("MATCHA {}\n".format(cycle_time))
150 |
151 | # MATCHA+
152 | cycle_time = get_matcha_cycle_time(connectivity_graph.copy(), connectivity_graph.copy(),
153 | args.computation_time, args.model_size, args.communication_budget)
154 |
155 | print("Cycle time for MATCHA+ architecture: {0:.1f} ms".format(cycle_time))
156 | results_file.write("MATCHA {}\n".format(cycle_time))
157 |
--------------------------------------------------------------------------------
/make_figure2.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 |
4 | import numpy as np
5 | import matplotlib.pyplot as plt
6 |
7 | from utils.utils import args_to_string, loggs_to_json
8 | from utils.args import parse_args
9 |
10 |
11 | cycle_time_dict = {"gaia": {"ring": 522.8,
12 | "centralized": 9293.3,
13 | "mst": 1442.0,
14 | "mct_congest": 1018.8,
15 | "matcha": 2612.8},
16 | "amazon_us": {"ring": 485.9,
17 | "centralized": 18983.2,
18 | "mst": 1385.7,
19 | "mct_congest": 952.8,
20 | "matcha": 5036.7},
21 | "geantdistance": {"ring": 491.1,
22 | "centralized": 35188.4,
23 | "mst": 2753.8,
24 | "mct_congest": 984.7,
25 | "matcha": 2658.9},
26 | "exodus": {"ring": 488.1,
27 | "centralized": 70350.7,
28 | "mst": 3176.9,
29 | "mct_congest": 1023.5,
30 | "matcha": 2874.3},
31 | "ebone": {"ring": 482.2,
32 | "centralized": 77462.5,
33 | "mst": 4123.4,
34 | "mct_congest": 984.8,
35 | "matcha": 2660.3}}
36 |
37 | EXTENSIONS = {"synthetic": ".json",
38 | "sent140": ".json",
39 | "femnist": ".pkl",
40 | "shakespeare": ".txt",
41 | "inaturalist": ".pkl"}
42 |
43 | # Model size in bit
44 | MODEL_SIZE_DICT = {"synthetic": 4354,
45 | "shakespeare": 3385747,
46 | "femnist": 4843243,
47 | "sent140": 19269416,
48 | "inaturalist": 44961717}
49 |
50 | # Model computation time in ms
51 | COMPUTATION_TIME_DICT = {"synthetic": 1.5,
52 | "shakespeare": 389.6,
53 | "femnist": 4.6,
54 | "sent140": 9.8,
55 | "inaturalist": 25.4}
56 |
57 | # Tags list
58 | TAGS = ["Train/Loss", "Train/Acc", "Test/Loss", "Test/Acc", "Consensus"]
59 |
60 | labels_dict = {"matcha": "MATCHA$^{+}$",
61 | "mst": "MST",
62 | "centralized": "STAR",
63 | 'mct_congest': "$\delta$-MBST",
64 | "ring": "RING"}
65 |
66 | tag_dict = {"Train/Loss": "Train loss",
67 | "Train/Acc": "Train acc",
68 | "Test/Loss": "Test loss",
69 | "Test/Acc": "Test acc",
70 | "Consensus": "Consensus"}
71 |
72 | path_dict = {"Train/Loss": "Train_loss",
73 | "Train/Acc": "Train_acc",
74 | "Test/Loss": "Test_loss",
75 | "Test/Acc": "Test_acc",
76 | "Consensus": "Consensus"}
77 |
78 | trsh_dict = {"gaia": 0.65,
79 | "amazon_us": 0.55,
80 | "geantdistance": 0.55,
81 | "exodus": 0.5,
82 | "ebone": 0.5}
83 |
84 | lr_dict = {"gaia": "1e-3",
85 | "amazon_us": "1e-3",
86 | "geantdistance": "1e-3",
87 | "exodus": "1e-1",
88 | "ebone": "1e-1"}
89 |
90 | bz_dict = {"shakespeare": 512,
91 | "femnist": 128,
92 | "sent140": 512,
93 | "inaturalist": 16}
94 |
95 |
96 | def make_plots(args, mode=0):
97 | os.makedirs(os.path.join("results", "plots", args.experiment), exist_ok=True)
98 |
99 | loggs_dir_path = os.path.join("loggs", args_to_string(args))
100 | path_to_json = os.path.join("results", "json", "{}.json".format(os.path.split(loggs_dir_path)[1]))
101 | with open(path_to_json, "r") as f:
102 | data = json.load(f)
103 |
104 | # fig, axs = plt.subplots(2, 5, figsize=(20, 8))
105 | x_lim = np.inf
106 | for idx, tag in enumerate(TAGS):
107 | fig = plt.figure(figsize=(12, 10))
108 | for architecture in ["centralized", "matcha", "mst", "mct_congest", "ring"]:
109 | try:
110 | values = data[tag][architecture]
111 | rounds = data["Round"][architecture]
112 | except:
113 | continue
114 |
115 | if mode == 0:
116 | min_len = min(len(values), len(rounds))
117 |
118 | if rounds[-1] * cycle_time_dict[network_name][architecture] < x_lim:
119 | x_lim = rounds[-1] * cycle_time_dict[network_name][architecture]
120 |
121 | plt.plot(cycle_time_dict[network_name][architecture] * np.array(rounds) / 1000,
122 | values[:min_len], label=labels_dict[architecture],
123 | linewidth=5.0)
124 | plt.grid(True, linewidth=2)
125 | plt.xlim(0, x_lim / 1000)
126 | plt.ylabel("{}".format(tag_dict[tag]), fontsize=50)
127 | plt.xlabel("time (s)", fontsize=50)
128 | plt.tick_params(axis='both', labelsize=40)
129 | plt.tick_params(axis='x')
130 | plt.legend(fontsize=35)
131 |
132 | else:
133 | min_len = min(len(values), len(rounds))
134 |
135 | if rounds[:min_len][-1] < x_lim:
136 | x_lim = rounds[:min_len][-1]
137 |
138 | plt.plot(rounds[:min_len],
139 | values[:min_len], label=labels_dict[architecture],
140 | linewidth=5.0)
141 | plt.ylabel("{}".format(tag_dict[tag]), fontsize=50)
142 | plt.xlabel("Rounds", fontsize=50)
143 | plt.tick_params(axis='both', labelsize=40)
144 | plt.legend(fontsize=35)
145 | plt.grid(True, linewidth=2)
146 | plt.xlim(0, x_lim)
147 |
148 | if mode == 0:
149 | fig_path = os.path.join("results", "plots", args.experiment,
150 | "{}_{}_vs_time.png".format(args.network_name, path_dict[tag]))
151 | plt.savefig(fig_path, bbox_inches='tight')
152 | else:
153 | fig_path = os.path.join("results", "plots", args.experiment,
154 | "{}_{}_vs_iteration.png".format(args.network_name, path_dict[tag]))
155 | plt.savefig(fig_path, bbox_inches='tight')
156 |
157 |
158 | if __name__ == "__main__":
159 | network_name = "amazon_us"
160 |
161 | for experiment in [ "inaturalist", "shakespeare", "sent140", "femnist"]:
162 | args = parse_args([experiment,
163 | "--network", network_name,
164 | "--bz", str(bz_dict[experiment]),
165 | "--lr", str(lr_dict[network_name]),
166 | "--decay", "sqrt",
167 | "--local_steps", "1"])
168 |
169 | args_string = args_to_string(args)
170 |
171 | loggs_dir = os.path.join("loggs", args_to_string(args))
172 | loggs_to_json(loggs_dir)
173 |
174 | print("{}:".format(experiment))
175 |
176 | make_plots(args, mode=0)
177 | make_plots(args, mode=1)
178 |
179 | print("#" * 10)
180 |
181 |
182 |
183 |
184 |
--------------------------------------------------------------------------------
/communication_module/manager.py:
--------------------------------------------------------------------------------
1 | import os
2 | from abc import ABC, abstractmethod
3 |
4 | import torch
5 | import torch.distributed as dist
6 | from torch.utils.tensorboard import SummaryWriter
7 |
8 | from graph_utils.utils.matcha import RandomTopologyGenerator
9 | from utils.utils import get_network, get_iterator, get_model, args_to_string
10 |
11 |
12 | EXTENSIONS = {"synthetic": ".json", "sent140": ".json", "femnist": ".pkl", "shakespeare": ".txt"}
13 |
14 |
15 | class Manager(ABC):
16 | def __init__(self, args):
17 | self.device = args.device
18 | self.batch_size = args.bz
19 | self.network = get_network(args.network_name, args.architecture)
20 | self.world_size = self.network.number_of_nodes() + 1 # we add node representing the network manager
21 | self.log_freq = args.log_freq
22 |
23 | # create logger
24 | logger_path = os.path.join("loggs", args_to_string(args), args.architecture)
25 | self.logger = SummaryWriter(logger_path)
26 |
27 | self.round_idx = 0 # index of the current communication round
28 |
29 | self.train_dir = os.path.join("data", args.experiment, "train")
30 | self.test_dir = os.path.join("data", args.experiment, "test")
31 |
32 | self.train_path = os.path.join(self.train_dir, "train" + EXTENSIONS[args.experiment])
33 | self.test_path = os.path.join(self.test_dir, "test" + EXTENSIONS[args.experiment])
34 |
35 | self.train_iterator = get_iterator(args.experiment, self.train_path, self.device, self.batch_size)
36 | self.test_iterator = get_iterator(args.experiment, self.test_path, self.device, self.batch_size)
37 |
38 | self.gather_list = [get_model(args.experiment, self.device, self.train_iterator)
39 | for _ in range(self.world_size)]
40 |
41 | self.scatter_list = [get_model(args.experiment, self.device, self.train_iterator)
42 | for _ in range(self.world_size)]
43 |
44 | # print initial logs
45 | self.write_logs()
46 |
47 | def communicate(self):
48 | for ii, param in enumerate(self.gather_list[-1].net.parameters()):
49 | param_list = [list(self.gather_list[idx].net.parameters())[ii].data
50 | for idx in range(self.world_size)]
51 |
52 | dist.gather(tensor=param.data, dst=self.world_size - 1, gather_list=param_list)
53 |
54 | self.mix()
55 |
56 | if (self.round_idx - 1) % self.log_freq == 0:
57 | self.write_logs()
58 |
59 | for ii, param in enumerate(self.scatter_list[-1].net.parameters()):
60 | param_list = [list(self.scatter_list[idx].net.parameters())[ii].data
61 | for idx in range(self.world_size)]
62 |
63 | dist.scatter(tensor=param.data, src=self.world_size - 1, scatter_list=param_list)
64 |
65 | @abstractmethod
66 | def mix(self):
67 | pass
68 |
69 | def write_logs(self):
70 | """
71 | write train/test loss, train/tet accuracy for average model and local models
72 | and intra-workers parameters variance (consensus) adn save average model
73 | """
74 | train_loss, train_acc = self.scatter_list[-1].evaluate_iterator(self.train_iterator)
75 | test_loss, test_acc = self.scatter_list[-1].evaluate_iterator(self.train_iterator)
76 |
77 | self.logger.add_scalar("Train/Loss", train_loss, self.round_idx)
78 | self.logger.add_scalar("Train/Acc", train_acc, self.round_idx)
79 | self.logger.add_scalar("Test/Loss", test_loss, self.round_idx)
80 | self.logger.add_scalar("Test/Acc", test_acc, self.round_idx)
81 |
82 | # write parameter variance
83 | average_parameter = self.scatter_list[-1].get_param_tensor()
84 |
85 | param_tensors_by_workers = torch.zeros((average_parameter.shape[0], self.world_size - 1))
86 |
87 | for ii, model in enumerate(self.scatter_list[:-1]):
88 | param_tensors_by_workers[:, ii] = model.get_param_tensor() - average_parameter
89 |
90 | consensus = (param_tensors_by_workers ** 2).sum()
91 | self.logger.add_scalar("Consensus", consensus, self.round_idx)
92 |
93 | print(f'\t Round: {self.round_idx} |Test Loss: {test_loss:.3f} | Test Acc: {test_acc * 100:.2f}%')
94 |
95 |
96 | class Peer2PeerManager(Manager):
97 | def mix(self):
98 | for ii, model in enumerate(self.scatter_list):
99 | if ii == self.world_size - 1:
100 | for param_idx, param in enumerate(model.net.parameters()):
101 | param.data.fill_(0.)
102 | for local_model in self.scatter_list[:-1]:
103 | param.data += (1 / (self.world_size - 1)) * list(local_model.net.parameters())[param_idx]
104 | else:
105 | for param_idx, param in enumerate(model.net.parameters()):
106 | param.data.fill_(0.)
107 | for neighbour in self.network.neighbors(ii):
108 | coeff = self.network.get_edge_data(ii, neighbour)["weight"]
109 | param.data += coeff * list(self.gather_list[neighbour].net.parameters())[param_idx]
110 |
111 | self.round_idx += 1
112 |
113 |
114 | class MATCHAManager(Manager):
115 | def __init__(self, args):
116 | super(MATCHAManager, self).__init__(args)
117 | path_to_save_network = os.path.join("loggs", args_to_string(args), "matcha", "colored_network.gml")
118 | path_to_matching_history_file = os.path.join("loggs", args_to_string(args), "matcha", "matching_history.csv")
119 | self.topology_generator = RandomTopologyGenerator(self.network,
120 | args.communication_budget,
121 | network_save_path=path_to_save_network,
122 | path_to_history_file=path_to_matching_history_file)
123 |
124 | def mix(self):
125 | # update topology
126 | self.topology_generator.step()
127 |
128 | for ii, model in enumerate(self.scatter_list):
129 | if ii == self.world_size - 1:
130 | for param_idx, param in enumerate(model.net.parameters()):
131 | param.data.fill_(0.)
132 | for local_model in self.scatter_list[:-1]:
133 | param.data += (1 / (self.world_size - 1)) * list(local_model.net.parameters())[param_idx]
134 | else:
135 | for param_idx, param in enumerate(model.net.parameters()):
136 | param.data.fill_(0.)
137 | for neighbour in self.topology_generator.current_topology.neighbors(ii):
138 | coeff = self.topology_generator.current_topology.get_edge_data(ii, neighbour)["weight"]
139 | param.data += coeff * list(self.gather_list[neighbour].net.parameters())[param_idx]
140 |
141 | self.round_idx += 1
142 |
143 |
144 | class CentralizedManager(Manager):
145 | def mix(self):
146 | for param_idx, param in enumerate(self.scatter_list[-1].net.parameters()):
147 | param.data.fill_(0.)
148 | for local_model in self.gather_list[:-1]:
149 | param.data += (1 / (self.world_size - 1)) * list(local_model.net.parameters())[param_idx]
150 |
151 | for ii, model in enumerate(self.scatter_list[:-1]):
152 | for param_idx, param in enumerate(model.net.parameters()):
153 | param.data = list(self.scatter_list[-1].net.parameters())[param_idx]
154 |
155 | self.round_idx += 1
156 |
--------------------------------------------------------------------------------
/graph_utils/data/gaia.gml:
--------------------------------------------------------------------------------
1 | graph [
2 | node [
3 | id 0
4 | label "Virginia"
5 | ]
6 | node [
7 | id 1
8 | label "California"
9 | ]
10 | node [
11 | id 2
12 | label "Oregon"
13 | ]
14 | node [
15 | id 3
16 | label "Dublin"
17 | ]
18 | node [
19 | id 4
20 | label "Frankfurt"
21 | ]
22 | node [
23 | id 5
24 | label "Tokyo"
25 | ]
26 | node [
27 | id 6
28 | label "Seoul"
29 | ]
30 | node [
31 | id 7
32 | label "Singapore"
33 | ]
34 | node [
35 | id 8
36 | label "Sydney"
37 | ]
38 | node [
39 | id 9
40 | label "Mumbai"
41 | ]
42 | node [
43 | id 10
44 | label "Sao Paulo"
45 | ]
46 | edge [
47 | source 0
48 | target 1
49 | distance 3560.859824767453
50 | capacity 1000000000.0
51 | ]
52 | edge [
53 | source 0
54 | target 2
55 | distance 3617.1058525455455
56 | capacity 1000000000.0
57 | ]
58 | edge [
59 | source 0
60 | target 3
61 | distance 5683.746538162422
62 | capacity 100000000.0
63 | ]
64 | edge [
65 | source 0
66 | target 4
67 | distance 6774.62010172149
68 | capacity 100000000.0
69 | ]
70 | edge [
71 | source 0
72 | target 5
73 | distance 11032.403521116341
74 | capacity 100000000.0
75 | ]
76 | edge [
77 | source 0
78 | target 6
79 | distance 11331.528778910633
80 | capacity 100000000.0
81 | ]
82 | edge [
83 | source 0
84 | target 7
85 | distance 15737.083172377323
86 | capacity 100000000.0
87 | ]
88 | edge [
89 | source 0
90 | target 8
91 | distance 15550.74835546916
92 | capacity 100000000.0
93 | ]
94 | edge [
95 | source 0
96 | target 9
97 | distance 13113.161300492078
98 | capacity 100000000.0
99 | ]
100 | edge [
101 | source 0
102 | target 10
103 | distance 7500.898168816753
104 | capacity 500000000.0
105 | ]
106 | edge [
107 | source 1
108 | target 2
109 | distance 825.4130940774442
110 | capacity 1500000000.0
111 | ]
112 | edge [
113 | source 1
114 | target 3
115 | distance 8111.218768362535
116 | capacity 100000000.0
117 | ]
118 | edge [
119 | source 1
120 | target 4
121 | distance 9096.865584257743
122 | capacity 100000000.0
123 | ]
124 | edge [
125 | source 1
126 | target 5
127 | distance 8620.547632468602
128 | capacity 100000000.0
129 | ]
130 | edge [
131 | source 1
132 | target 6
133 | distance 9370.063077937788
134 | capacity 300000000.0
135 | ]
136 | edge [
137 | source 1
138 | target 7
139 | distance 13930.612571776204
140 | capacity 100000000.0
141 | ]
142 | edge [
143 | source 1
144 | target 8
145 | distance 12160.544494528913
146 | capacity 300000000.0
147 | ]
148 | edge [
149 | source 1
150 | target 9
151 | distance 13727.28776621854
152 | capacity 100000000.0
153 | ]
154 | edge [
155 | source 1
156 | target 10
157 | distance 10079.072989313954
158 | capacity 100000000.0
159 | ]
160 | edge [
161 | source 2
162 | target 3
163 | distance 7551.970732123231
164 | capacity 300000000.0
165 | ]
166 | edge [
167 | source 2
168 | target 4
169 | distance 8488.896086335717
170 | capacity 100000000.0
171 | ]
172 | edge [
173 | source 2
174 | target 5
175 | distance 8028.469388699873
176 | capacity 100000000.0
177 | ]
178 | edge [
179 | source 2
180 | target 6
181 | distance 8700.031091458462
182 | capacity 300000000.0
183 | ]
184 | edge [
185 | source 2
186 | target 7
187 | distance 13325.366070623815
188 | capacity 100000000.0
189 | ]
190 | edge [
191 | source 2
192 | target 8
193 | distance 12383.076161347562
194 | capacity 100000000.0
195 | ]
196 | edge [
197 | source 2
198 | target 9
199 | distance 12902.319229980723
200 | capacity 100000000.0
201 | ]
202 | edge [
203 | source 2
204 | target 10
205 | distance 10610.577959918295
206 | capacity 100000000.0
207 | ]
208 | edge [
209 | source 3
210 | target 4
211 | distance 1091.0035398064083
212 | capacity 1500000000.0
213 | ]
214 | edge [
215 | source 3
216 | target 5
217 | distance 9611.133798789571
218 | capacity 100000000.0
219 | ]
220 | edge [
221 | source 3
222 | target 6
223 | distance 8974.589549377932
224 | capacity 100000000.0
225 | ]
226 | edge [
227 | source 3
228 | target 7
229 | distance 11203.776982156216
230 | capacity 100000000.0
231 | ]
232 | edge [
233 | source 3
234 | target 8
235 | distance 17207.312372624874
236 | capacity 100000000.0
237 | ]
238 | edge [
239 | source 3
240 | target 9
241 | distance 7620.843594967312
242 | capacity 300000000.0
243 | ]
244 | edge [
245 | source 3
246 | target 10
247 | distance 9366.555606476215
248 | capacity 100000000.0
249 | ]
250 | edge [
251 | source 4
252 | target 5
253 | distance 9358.521215366647
254 | capacity 100000000.0
255 | ]
256 | edge [
257 | source 4
258 | target 6
259 | distance 8571.5714609335
260 | capacity 100000000.0
261 | ]
262 | edge [
263 | source 4
264 | target 7
265 | distance 10260.83044153216
266 | capacity 100000000.0
267 | ]
268 | edge [
269 | source 4
270 | target 8
271 | distance 16478.1341044152
272 | capacity 100000000.0
273 | ]
274 | edge [
275 | source 4
276 | target 9
277 | distance 6578.168093870104
278 | capacity 500000000.0
279 | ]
280 | edge [
281 | source 4
282 | target 10
283 | distance 9807.409376220698
284 | capacity 100000000.0
285 | ]
286 | edge [
287 | source 5
288 | target 6
289 | distance 1161.2277477992284
290 | capacity 1000000000.0
291 | ]
292 | edge [
293 | source 5
294 | target 7
295 | distance 5311.118309037953
296 | capacity 1000000000.0
297 | ]
298 | edge [
299 | source 5
300 | target 8
301 | distance 7789.739742827469
302 | capacity 300000000.0
303 | ]
304 | edge [
305 | source 5
306 | target 9
307 | distance 6751.3514540143415
308 | capacity 300000000.0
309 | ]
310 | edge [
311 | source 5
312 | target 10
313 | distance 18528.65557840507
314 | capacity 100000000.0
315 | ]
316 | edge [
317 | source 6
318 | target 7
319 | distance 4658.7421490548095
320 | capacity 1000000000.0
321 | ]
322 | edge [
323 | source 6
324 | target 8
325 | distance 8296.033168577038
326 | capacity 100000000.0
327 | ]
328 | edge [
329 | source 6
330 | target 9
331 | distance 5613.893433078432
332 | capacity 500000000.0
333 | ]
334 | edge [
335 | source 6
336 | target 10
337 | distance 18337.930813275976
338 | capacity 100000000.0
339 | ]
340 | edge [
341 | source 7
342 | target 8
343 | distance 6301.111688839916
344 | capacity 300000000.0
345 | ]
346 | edge [
347 | source 7
348 | target 9
349 | distance 3899.1833741194805
350 | capacity 500000000.0
351 | ]
352 | edge [
353 | source 7
354 | target 10
355 | distance 16000.059238393498
356 | capacity 100000000.0
357 | ]
358 | edge [
359 | source 8
360 | target 9
361 | distance 10144.778814121693
362 | capacity 100000000.0
363 | ]
364 | edge [
365 | source 8
366 | target 10
367 | distance 13377.864263189238
368 | capacity 100000000.0
369 | ]
370 | edge [
371 | source 9
372 | target 10
373 | distance 13772.602629233716
374 | capacity 100000000.0
375 | ]
376 | ]
377 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Throughput-Optimal Topology Design for Cross-Silo Federated Learning
2 |
3 | This repository is the official implementation of [Throughput-Optimal Topology
4 | Design for Cross-Silo Federated Learning](https://arxiv.org/abs/2010.12229).
5 |
6 | Federated learning usually employs a master-slave architecture where an
7 | orchestrator iteratively aggregates model updates from remote clients
8 | and pushes them back a refined model. This approach may be inefficient
9 | in cross-silo settings, as close-by data silos with high-speed access
10 | links may exchange information faster than with the orchestrator, and
11 | the orchestrator may become a communication bottleneck. In this paper we
12 | define the problem of topology design for cross-silo federated learning
13 | using the theory of max-plus linear systems to compute the system
14 | throughput---number of communication rounds per time unit. We also
15 | propose practical algorithms that, under the knowledge of measurable
16 | network characteristics, find a topology with the largest throughput or
17 | with provable throughput guarantees. In realistic Internet networks with
18 | 10 Gbps access links for silos, our algorithms speed up training by a
19 | factor 9 and 1.5 in comparison to the master-slave architecture and to
20 | state-of-the-art MATCHA, respectively. Speedups are even larger with
21 | slower access links.
22 |
23 | ## Requirements
24 |
25 | To install requirements:
26 |
27 | ```setup
28 | pip install -r requirements.txt
29 | ```
30 |
31 | ## Datasets
32 |
33 | We provide four datasets that are used in the paper under corresponding
34 | folders. For all datasets, see the README files in separate
35 | data/$dataset folders for instructions on preprocessing and/or sampling
36 | data.
37 |
38 | ## Networks and Topologies
39 |
40 | A main part of the paper is related to topology design. In
41 | `graph_utils/` details on generating different topologies for each
42 | network are provided. Scripts to compute the cycle time of each topology
43 | are also provided in `graph_utils/`
44 |
45 | ## Training
46 |
47 | Run on one dataset, with a specific topology choice on on network.
48 | Specify the name of the dataset (experiment), the name of the network
49 | and the used architecture, and configure all other hyper-parameters (see
50 | all hyper-parameters values in the appendix of the paper)
51 |
52 | ```train
53 | python3 main.py experiment ----network_name \
54 | --architecture=original (--parallel) (--fit_by_epoch) \
55 | --n_rounds=1 --bz=1
56 | --local_steps=1 --log_freq=1 \
57 | --device="cpu" --lr=1e-3\
58 | --optimizer='adam' --decay="constant"
59 | ```
60 |
61 | And the test and training accuracy and loss will be saved in the log files.
62 |
63 | ## Evaluation
64 |
65 | ### iNaturalist Speed-ups
66 | To evaluate the speed-ups obtained when training iNaturalist on the proposed topology architectures (generate Table 3) fora given network, run
67 |
68 | ```eval
69 | python3 main.py inaturalist --network_name gaia --architecture $ARCHITECTURE --n_rounds 5600 --bz 16 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
70 | python3 main.py inaturalist --network_name amazon_us --architecture $ARCHITECTURE --n_rounds 1600 --bz 16 --device cuda --log_freq 40 --local_steps 1 --lr 0.001 --decay sqrt
71 | python3 main.py inaturalist --network_name geantdistance --architecture $ARCHITECTURE --n_rounds 4000 --bz 16 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
72 | python3 main.py inaturalist --network_name exodus --architecture $ARCHITECTURE --n_rounds 4800 --bz 16 --device cuda --log_freq 100 --local_steps 1 --lr 0.1 --decay sqrt --optimizer sgd
73 | python3 main.py inaturalist --network_name ebone --architecture $ARCHITECTURE --n_rounds 6000 --bz 16 --device cuda --log_freq 100 --local_steps 1 --lr 0.1 --decay sqrt --optimizer sgd
74 | ```
75 |
76 | And the test and training accuracy and loss for the corresponding experiment will be saved in the log files.
77 |
78 | Do this operation for all architectures ($ARCHITECTURE=ring, centralized, matcha, exodus, ebone).
79 | Remind that for every network, a new generation of dataset (data/$dataset folders) is required to distribute data into silos.
80 |
81 | Then run
82 |
83 | ```eval
84 | python3 make_table3.py
85 | ```
86 |
87 | To generate the values from Table 3.
88 |
89 | ### Effect of the topology on the convergence
90 |
91 | To evaluate the influence of topology on the training evolution for the different datasets when trained on AWS-NA network, run
92 |
93 | ```eval
94 | python main.py inaturalist --network_name amazon_us --architecture ring --n_rounds 1600 --bz 16 --device cuda --log_freq 40 --local_steps 1 --lr 0.001 --decay sqrt
95 | python main.py inaturalist --network_name amazon_us --architecture centralized --n_rounds 1600 --bz 16 --device cuda --log_freq 40 --local_steps 1 --lr 0.001 --decay sqrt
96 | python main.py inaturalist --network_name amazon_us --architecture matcha --n_rounds 1600 --bz 16 --device cuda --log_freq 40 --local_steps 1 --lr 0.001 --decay sqrt
97 | python main.py inaturalist --network_name amazon_us --architecture mst --n_rounds 1600 --bz 16 --device cuda --log_freq 40 --local_steps 1 --lr 0.001 --decay sqrt
98 | python main.py inaturalist --network_name amazon_us --architecture mct_congest --n_rounds 1600 --bz 16 --device cuda --log_freq 40 --local_steps 1 --lr 0.001 --decay sqrt
99 |
100 | python main.py femnist --network_name amazon_us --architecture ring --n_rounds 6400 --bz 128 --device cuda --log_freq 80 --local_steps 1 --lr 0.001 --decay sqrt
101 | python main.py femnist --network_name amazon_us --architecture centralized --n_rounds 6400 --bz 128 --device cuda --log_freq 80 --local_steps 1 --lr 0.001 --decay sqrt
102 | python main.py femnist --network_name amazon_us --architecture matcha --n_rounds 6400 --bz 128 --device cuda --log_freq 80 --local_steps 1 --lr 0.001 --decay sqrt
103 | python main.py femnist --network_name amazon_us --architecture mst --n_rounds 6400 --bz 128 --device cuda --log_freq 80 --local_steps 1 --lr 0.001 --decay sqrt
104 | python main.py femnist --network_name amazon_us --architecture mct_congest --n_rounds 6400 --bz 128 --device cuda --log_freq 80 --local_steps 1 --lr 0.001 --decay sqrt
105 |
106 | python main.py sent140 --network_name amazon_us --architecture ring --n_rounds 20000 --bz 512 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
107 | python main.py sent140 --network_name amazon_us --architecture centralized --n_rounds 20000 --bz 512 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
108 | python main.py sent140 --network_name amazon_us --architecture matcha --n_rounds 20000 --bz 512 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
109 | python main.py sent140 --network_name amazon_us --architecture mst --n_rounds 20000 --bz 512 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
110 | python main.py sent140 --network_name amazon_us --architecture mct_congest --n_rounds 20000 --bz 512 --device cuda --log_freq 100 --local_steps 1 --lr 0.001 --decay sqrt
111 |
112 | python main.py shakespeare --network_name amazon_us --architecture ring --n_rounds 1500 --bz 512 --decay sqrt --lr 1e-3 --device cuda --local_steps 1 --log_freq 30
113 | python main.py shakespeare --network_name amazon_us --architecture centralized --n_rounds 1500 --bz 512 --decay sqrt --lr 1e-3 --device cuda --local_steps 1 --log_freq 30
114 | python main.py shakespeare --network_name amazon_us --architecture matcha --n_rounds 1500 --bz 512 --decay sqrt --lr 1e-3 --device cuda --local_steps 1 --log_freq 30
115 | python main.py shakespeare --network_name amazon_us --architecture mst --n_rounds 1500 --bz 512 --decay sqrt --lr 1e-3 --device cuda --local_steps 1 --log_freq 30
116 | python main.py shakespeare --network_name amazon_us --architecture mct_congest --n_rounds 1500 --bz 512 --decay sqrt --lr 1e-3 --device cuda --local_steps 1 --log_freq 30
117 | ```
118 |
119 | to generate the log files for each experiment. Tne run
120 |
121 | ```eval
122 | python3 make_figure2.py
123 | ```
124 |
125 | to generate Figure 2. (Figures will be found in `results/plots`)
126 |
127 | ## Results
128 |
129 | ### iNaturalist Speed-ups
130 | Our topology design achieves the following speed-ups when training
131 | iNaturalist dataset over different networks:
132 |
133 |
134 | |Network Name | Silos | Links | Ring vs Star speed-up | Ring vs MATCHA speed-up|
135 | | ------------------ | ------|-------|---------------- | -------------- |
136 | | Gaia | 11 | 55 |2.65 | 1.54 |
137 | | AWS NA | 22 | 321 |3.41 |1.47|
138 | | Géant | 40 | 61 |4.85 |0.81|
139 | | Exodus | 79 | 147 |8.78 |1.37|
140 | | Ebone | 87 | 161 |8.83 |1.29|
141 |
142 | ### Effect of the topology on the convergence
143 |
144 | Effect of overlays on the convergence w.r.t. communication rounds (top row)
145 | and wall-clock time(bottom row) when training four different datasets on
146 | AWS North America underlay.1Gbps core links capacities, 100Mbps access
147 | links capacities,s= 1.
148 |
149 | 
150 |
--------------------------------------------------------------------------------
/data/shakespeare/preprocess_shakespeare.py:
--------------------------------------------------------------------------------
1 | """Preprocesses the Shakespeare dataset for federated training.
2 | Copyright 2017 Google Inc.
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | https://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | To run:
13 | python preprocess_shakespeare.py path/to/raw/shakespeare.txt output_directory/
14 | The raw data can be downloaded from:
15 | http://www.gutenberg.org/cache/epub/100/pg100.txt
16 | (The Plain Text UTF-8 file format, md5sum: 036d0f9cf7296f41165c2e6da1e52a0e)
17 | Note that The Comedy of Errors has a incorrect indentation compared to all the
18 | other plays in the file. The code below reflects that issue. To make the code
19 | cleaner, you could fix the indentation in the raw shakespeare file and remove
20 | the special casing for that play in the code below.
21 | Authors: loeki@google.com, mcmahan@google.com
22 | Disclaimer: This is not an official Google product.
23 | """
24 | import collections
25 | import json
26 | import os
27 | import random
28 | import re
29 | import sys
30 | RANDOM_SEED = 1234
31 | # Regular expression to capture an actors name, and line continuation
32 | CHARACTER_RE = re.compile(r'^ ([a-zA-Z][a-zA-Z ]*)\. (.*)')
33 | CONT_RE = re.compile(r'^ (.*)')
34 | # The Comedy of Errors has errors in its indentation so we need to use
35 | # different regular expressions.
36 | COE_CHARACTER_RE = re.compile(r'^([a-zA-Z][a-zA-Z ]*)\. (.*)')
37 | COE_CONT_RE = re.compile(r'^(.*)')
38 |
39 |
40 | def _match_character_regex(line, comedy_of_errors=False):
41 | return (COE_CHARACTER_RE.match(line) if comedy_of_errors
42 | else CHARACTER_RE.match(line))
43 |
44 |
45 | def _match_continuation_regex(line, comedy_of_errors=False):
46 | return (
47 | COE_CONT_RE.match(line) if comedy_of_errors else CONT_RE.match(line))
48 |
49 |
50 | def _split_into_plays(shakespeare_full):
51 | """Splits the full data by play."""
52 | # List of tuples (play_name, dict from character to list of lines)
53 | plays = []
54 | discarded_lines = [] # Track discarded lines.
55 | slines = shakespeare_full.splitlines(True)[1:]
56 |
57 | # skip contents, the sonnets, and all's well that ends well
58 | author_count = 0
59 | start_i = 0
60 | for i, l in enumerate(slines):
61 | if 'by William Shakespeare' in l:
62 | author_count += 1
63 | if author_count == 2:
64 | start_i = i - 5
65 | break
66 | slines = slines[start_i:]
67 |
68 | current_character = None
69 | comedy_of_errors = False
70 | for i, line in enumerate(slines):
71 | # This marks the end of the plays in the file.
72 | if i > 124195 - start_i:
73 | break
74 | # This is a pretty good heuristic for detecting the start of a new play:
75 | if 'by William Shakespeare' in line:
76 | current_character = None
77 | characters = collections.defaultdict(list)
78 | # The title will be 2, 3, 4, 5, 6, or 7 lines above "by William Shakespeare".
79 | if slines[i - 2].strip():
80 | title = slines[i - 2]
81 | elif slines[i - 3].strip():
82 | title = slines[i - 3]
83 | elif slines[i - 4].strip():
84 | title = slines[i - 4]
85 | elif slines[i - 5].strip():
86 | title = slines[i - 5]
87 | elif slines[i - 6].strip():
88 | title = slines[i - 6]
89 | else:
90 | title = slines[i - 7]
91 | title = title.strip()
92 |
93 | assert title, (
94 | 'Parsing error on line %d. Expecting title 2 or 3 lines above.' %
95 | i)
96 | comedy_of_errors = (title == 'THE COMEDY OF ERRORS')
97 | # Degenerate plays are removed at the end of the method.
98 | plays.append((title, characters))
99 | continue
100 | match = _match_character_regex(line, comedy_of_errors)
101 | if match:
102 | character, snippet = match.group(1), match.group(2)
103 | # Some character names are written with multiple casings, e.g., SIR_Toby
104 | # and SIR_TOBY. To normalize the character names, we uppercase each name.
105 | # Note that this was not done in the original preprocessing and is a
106 | # recent fix.
107 | character = character.upper()
108 | if not (comedy_of_errors and character.startswith('ACT ')):
109 | characters[character].append(snippet)
110 | current_character = character
111 | continue
112 | else:
113 | current_character = None
114 | continue
115 | elif current_character:
116 | match = _match_continuation_regex(line, comedy_of_errors)
117 | if match:
118 | if comedy_of_errors and match.group(1).startswith('<'):
119 | current_character = None
120 | continue
121 | else:
122 | characters[current_character].append(match.group(1))
123 | continue
124 | # Didn't consume the line.
125 | line = line.strip()
126 | if line and i > 2646:
127 | # Before 2646 are the sonnets, which we expect to discard.
128 | discarded_lines.append('%d:%s' % (i, line))
129 | # Remove degenerate "plays".
130 | return [play for play in plays if len(play[1]) > 1], discarded_lines
131 |
132 |
133 | def _remove_nonalphanumerics(filename):
134 | return re.sub('\\W+', '_', filename)
135 |
136 |
137 | def play_and_character(play, character):
138 | return _remove_nonalphanumerics((play + '_' + character).replace(' ', '_'))
139 |
140 |
141 | def _get_train_test_by_character(plays, test_fraction=0.2):
142 | """
143 | Splits character data into train and test sets.
144 | if test_fraction <= 0, returns {} for all_test_examples
145 | plays := list of (play, dict) tuples where play is a string and dict
146 | is a dictionary with character names as keys
147 | """
148 | skipped_characters = 0
149 | all_train_examples = collections.defaultdict(list)
150 | all_test_examples = collections.defaultdict(list)
151 |
152 | def add_examples(example_dict, example_tuple_list):
153 | for play, character, sound_bite in example_tuple_list:
154 | example_dict[play_and_character(
155 | play, character)].append(sound_bite)
156 |
157 | users_and_plays = {}
158 | for play, characters in plays:
159 | curr_characters = list(characters.keys())
160 | for c in curr_characters:
161 | users_and_plays[play_and_character(play, c)] = play
162 | for character, sound_bites in characters.items():
163 | examples = [(play, character, sound_bite)
164 | for sound_bite in sound_bites]
165 | if len(examples) <= 2:
166 | skipped_characters += 1
167 | # Skip characters with fewer than 2 lines since we need at least one
168 | # train and one test line.
169 | continue
170 | train_examples = examples
171 | if test_fraction > 0:
172 | num_test = max(int(len(examples) * test_fraction), 1)
173 | train_examples = examples[:-num_test]
174 | test_examples = examples[-num_test:]
175 | assert len(test_examples) == num_test
176 | assert len(train_examples) >= len(test_examples)
177 | add_examples(all_test_examples, test_examples)
178 | add_examples(all_train_examples, train_examples)
179 | return users_and_plays, all_train_examples, all_test_examples
180 |
181 |
182 | def _write_data_by_character(examples, output_directory):
183 | """Writes a collection of data files by play & character."""
184 | if not os.path.exists(output_directory):
185 | os.makedirs(output_directory)
186 | for character_name, sound_bites in examples.items():
187 | filename = os.path.join(output_directory, character_name + '.txt')
188 | with open(filename, 'w') as output:
189 | for sound_bite in sound_bites:
190 | output.write(sound_bite + '\n')
191 |
192 |
193 | def main(argv):
194 | print('Splitting .txt data between users')
195 | input_filename = argv[0]
196 | with open(input_filename, 'r') as input_file:
197 | shakespeare_full = input_file.read()
198 | plays, discarded_lines = _split_into_plays(shakespeare_full)
199 | print('Discarded %d lines' % len(discarded_lines))
200 | users_and_plays, all_examples, _ = _get_train_test_by_character(plays, test_fraction=-1.0)
201 | output_directory = argv[1]
202 | with open(os.path.join(output_directory, 'users_and_plays.json'), 'w') as ouf:
203 | json.dump(users_and_plays, ouf)
204 | _write_data_by_character(all_examples,
205 | os.path.join(output_directory,
206 | 'by_play_and_character/'))
207 |
208 |
209 | if __name__ == '__main__':
210 | main(sys.argv[1:])
--------------------------------------------------------------------------------
/graph_utils/utils/matching_decomposition.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 |
3 |
4 | def matching_decomposition(graph):
5 | """
6 | Implementing Misra & Gries edge coloring algorithm;
7 | The coloring produces uses at most Delta +1 colors, where Delta is the maximum degree of the graph;
8 | By Vizing's theorem it uses at most one color more than the optimal for all others;
9 | See http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.4452 for details
10 | :param graph: nx.Graph()
11 | :return: - List of matching; each matching is an nx.Graph() representing a sub-graph of "graph"
12 | - list of laplacian matrices, a laplacian matrix for each matching
13 | """
14 | # Initialize the graph with a greedy coloring of less then degree + 1 colors
15 | nx.set_edge_attributes(graph, None, 'color')
16 |
17 | # edge coloring
18 | for u, v in graph.edges:
19 | if u != v:
20 | graph = color_edge(graph, u, v)
21 |
22 | # matching decomposition
23 | matching_list = get_matching_list_from_graph(graph)
24 |
25 | # compute laplacian matrices
26 | laplacian_matrices = [nx.laplacian_matrix(matching, nodelist=graph.nodes(), weight=None).toarray()
27 | for matching in matching_list]
28 |
29 | return matching_list, laplacian_matrices
30 |
31 |
32 | def get_matching_list_from_graph(graph):
33 | """
34 |
35 | :param graph: nx.Graph(); each edge should have an attribute "color"
36 | :return: List of matching; each matching is an nx.Graph() representing a sub-graph of "graph"
37 | """
38 | degree = get_graph_degree(graph)
39 | colors = [i for i in range(degree + 1)]
40 |
41 | matching_list = [nx.Graph() for _ in colors]
42 |
43 | for (u, v, data) in graph.edges(data=True):
44 | color = data["color"]
45 | idx = colors.index(color)
46 | matching_list[idx].add_edges_from([(u, v, data)])
47 |
48 | return matching_list
49 |
50 |
51 | def color_edge(graph, u, v):
52 | """
53 | color edge (u, v) if uncolored following Misra & Gries procedure;
54 | :param graph: nx.Graph(); each edge should have an attribute "color"
55 | :param u: node in "graph"
56 | :param v: node in "graph"
57 | :return: nx.Graph() where edge (u, v) has an attribute "color", the generated coloring is valid
58 | """
59 | degree = get_graph_degree(graph)
60 | colors = [i for i in range(degree + 1)]
61 |
62 | if graph.get_edge_data(u, v)["color"] is not None:
63 | return graph
64 |
65 | else:
66 | maximal_fan = get_maximal_fan(graph, u, v)
67 |
68 | for color in colors:
69 | if is_color_free(graph, u, color):
70 | c = color
71 | break
72 |
73 | for color in colors:
74 | if is_color_free(graph, maximal_fan[-1], color):
75 | d = color
76 | break
77 |
78 | cd_path = get_cd_path(graph, u, c, d)
79 |
80 | sub_fan = get_sub_fan(graph, maximal_fan, u, v, cd_path, d)
81 |
82 | graph = invert_cd_path(graph, cd_path, c, d)
83 |
84 | graph = rotate_fan(graph, sub_fan, u)
85 |
86 | graph.add_edge(u, sub_fan[-1], color=d)
87 |
88 | return graph
89 |
90 |
91 | def get_maximal_fan(graph, u, v):
92 | """
93 | constructs a maximal fan starting from v;
94 | A fan of a vertex u is a sequence of vertices F[1:k] that satisfies the following conditions:
95 | 1) F[1:k] is a non-empty sequence of distinct neighbors of u
96 | 2) (F[1],u) in E(G) is uncolored
97 | 3) The color of (F[i+1],u) is free on F[i] for 1 ≤ i < k
98 | A fan is maximal if it can't be extended;
99 | :param graph: nx.Graph(); each edge should have an attribute "color"
100 | :param u: node in "graph"
101 | :param v: node in "graph"
102 | :return: list of nodes of "graph" representing a maximal fan starting from "v"
103 | """
104 | maximal_fan = [v]
105 |
106 | is_maximal = False
107 |
108 | while not is_maximal:
109 | is_maximal = True
110 | for node in graph.neighbors(u):
111 | edge_color = graph.get_edge_data(u, node)["color"]
112 | if (node not in maximal_fan) and \
113 | is_color_free(graph, maximal_fan[-1], edge_color) and \
114 | (edge_color is not None):
115 | maximal_fan.append(node)
116 | is_maximal = False
117 | break
118 |
119 | return maximal_fan
120 |
121 |
122 | def get_sub_fan(graph, maximal_fan, u, v, cd_path, d):
123 | """
124 | constructs a sub-fan of "maximal_fan" such that color `d` is free on its last node;
125 | :param graph: nx.Graph(); each edge should have an attribute "color"
126 | :param maximal_fan: maxmial resulting from `get_maximal_fan`
127 | :param u: node in "graph"
128 | :param v: node in "graph"
129 | :param cd_path: nx.Graph() representing a path with edges colored only with c and d
130 | :param d: integer representing a color
131 | :return: sub-list of maximal fan such that its last node is free on d
132 | """
133 | sub_fan = [v]
134 | for node in maximal_fan[1:]:
135 | if graph.get_edge_data(u, node)['color'] == d:
136 | break
137 | else:
138 | sub_fan.append(node)
139 |
140 | if cd_path.has_node(sub_fan[-1]):
141 | sub_fan = maximal_fan
142 |
143 | return sub_fan
144 |
145 |
146 | def rotate_fan(graph, fan, u):
147 | """
148 |
149 | :param graph: nx.Graph(); each edge should have an attribute "color"
150 | :param fan: list of nodes of "graph" representing a fan
151 | :param u: node in "graph"
152 | :return:
153 | """
154 | for idx in range(len(fan)-1):
155 | current_edge = (u, fan[idx])
156 | next_edge = (u, fan[idx+1])
157 | color = graph.get_edge_data(*next_edge)["color"]
158 | graph.add_edge(*current_edge, color=color)
159 |
160 | graph.add_edge(u, fan[-1], color=None)
161 |
162 | return graph
163 |
164 |
165 | def is_color_free(graph, node, color):
166 | """
167 | check if the color is free on a vertex;
168 | a color is said to be incident on a vertex if an edge incident on that vertex has that color;
169 | otherwise, the color is free on that vertex
170 | :param graph: graph: nx.Graph(); each edge should have an attribute "color"
171 | :param node: node of "graph"
172 | :param color: integer smaller then the degree of "graph" or None
173 | :return: boolean True if "color" is free on "node" and False otherwise
174 | """
175 | for neighbor in graph.neighbors(node):
176 | current_color = graph.get_edge_data(node, neighbor)["color"]
177 |
178 | if current_color == color:
179 | return False
180 |
181 | return True
182 |
183 |
184 | def get_cd_path(graph, u, c, d):
185 | """
186 | Construct cd-path; a path that includes vertex u, has edges colored only c or d , and is maximal
187 | :param graph: graph: nx.Graph(); each edge should have an attribute "color"
188 | :param u: node of "graph"
189 | :param c: integer smaller then the degree of "graph" or None; represents a color
190 | :param d: integer smaller then the degree of "graph" or None; represents a color
191 | :return: List of nodes of "graph" representing a cd-path
192 | """
193 | path = nx.Graph()
194 |
195 | current_color = d
196 | current_node = u
197 | is_maximal = False
198 |
199 | while not is_maximal:
200 | is_maximal = True
201 | for neighbor in graph.neighbors(current_node):
202 |
203 | try:
204 | color = graph.get_edge_data(current_node, neighbor)["color"]
205 | except:
206 | color = None
207 |
208 | if color == current_color:
209 | path.add_edge(current_node, neighbor)
210 | current_node = neighbor
211 | is_maximal = False
212 | if current_color == c:
213 | current_color = d
214 | else:
215 | current_color = c
216 | break
217 |
218 | return path
219 |
220 |
221 | def invert_cd_path(graph, path, c, d):
222 | """
223 | Switch the colors of the edges on the cd-path: c to d and d to c.
224 | :param graph: nx.Graph(); each edge should have an attribute "color"
225 | :param path: nx.Graph() representing cd-path
226 | :param c: integer smaller then the degree of "graph" or None; represents a color
227 | :param d: integer smaller then the degree of "graph" or None; represents a color
228 | :return: graph with switched colors
229 | """
230 | for edge in path.edges:
231 | current_color = graph.get_edge_data(*edge)["color"]
232 | if current_color == c:
233 | graph.add_edge(*edge, color=d)
234 | if current_color == d:
235 | graph.add_edge(*edge, color=c)
236 |
237 | return graph
238 |
239 |
240 | def get_graph_degree(graph):
241 | """
242 | get maximal degree of nodes of "graph"
243 | :param graph: nx.Graph()
244 | :return: integer representing the degree of the graph
245 | """
246 | degrees = graph.degree()
247 |
248 | graph_degree = 0
249 | for _, degree in degrees:
250 | if degree > graph_degree:
251 | graph_degree = degree
252 |
253 | return graph_degree
254 |
255 |
256 | def is_coloring_valid(graph):
257 | """
258 | check if the coloring of a graph is valid,
259 | i.e., two adjacent edges shouldn't have the same color;
260 | :param graph: nx.Graph() each edge should have an attribute 'color'
261 | """
262 | for u, v, data in graph.edges(data=True):
263 | color = data['color']
264 |
265 | if color is None: continue
266 |
267 | for _, v_, data_ in graph.edges(u, data=True):
268 | if v_ != v and data_['color'] == color:
269 | return False
270 |
271 | for _, u_, data_ in graph.edges(v, data=True):
272 | if u_ != u and data_['color'] == color:
273 | return False
274 |
275 | return True
276 |
277 |
278 | def is_coloring_correct(graph):
279 | """
280 | check if the coloring of a graph is correct,
281 | i.e., two adjacent edges shouldn't have the same color and all edges are colored;
282 | :param graph: nx.Graph() each edge should have an attribute 'color'
283 | """
284 | if is_coloring_valid(graph):
285 | for u, v, data in graph.edges(data=True):
286 | color = data['color']
287 |
288 | if color is None: continue
289 |
290 | for _, v_, data_ in graph.edges(u, data=True):
291 | if v_ != v and data_['color'] == color:
292 | return False
293 |
294 | for _, u_, data_ in graph.edges(v, data=True):
295 | if u_ != u and data_['color'] == color:
296 | return False
297 |
298 | return True
299 | else: return False
300 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/graph_utils/utils/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import csv
3 | import shutil
4 | import random
5 |
6 | import networkx as nx
7 | import numpy as np
8 |
9 | from .evaluate_throughput import evaluate_cycle_time
10 | from .mbst import cube_algorithm, delta_prim
11 | from .tsp_christofides import christofides_tsp
12 | from .matcha import RandomTopologyGenerator
13 | from .matching_decomposition import get_matching_list_from_graph
14 |
15 |
16 | def get_connectivity_graph(underlay, default_capacity=1e9):
17 | """
18 |
19 | :param underlay:
20 | :param default_capacity:
21 | :return:
22 | """
23 | connectivity_graph = nx.Graph()
24 | connectivity_graph.add_nodes_from(underlay.nodes(data=True))
25 |
26 | dijkstra_result = nx.all_pairs_dijkstra(underlay.copy(), weight="distance")
27 |
28 | for node, (weights_dict, paths_dict) in dijkstra_result:
29 | for neighbour in paths_dict.keys():
30 | if node != neighbour:
31 | path = paths_dict[neighbour]
32 |
33 | distance = 0.
34 | for idx in range(len(path) - 1):
35 | u = path[idx]
36 | v = path[idx + 1]
37 |
38 | data = underlay.get_edge_data(u, v)
39 | distance += data["distance"]
40 |
41 | available_bandwidth = default_capacity / (len(path) - 1)
42 |
43 | latency = 0.0085 * distance + 4
44 |
45 | connectivity_graph.add_edge(node, neighbour, availableBandwidth=available_bandwidth, latency=latency)
46 |
47 | return connectivity_graph
48 |
49 |
50 | def add_upload_download_delays(overlay, computation_time, model_size):
51 | """
52 | Takes as input an nx.Graph(), each edge should have attributes "latency" and "availableBandwidth";
53 | each node should have attribute "uploadDelay" and "downloadDelay";
54 | The weight (delay) of edge (i, j) is computed as:
55 | d(i, j) = computation_time + latency(i, j) + max(M/[availableBandwidth(i, j), "uploadDelay", "downloadDelay"]$$
56 | :param overlay:
57 | :param computation_time:
58 | :param model_size:
59 | :return:
60 | """
61 | overlay = overlay.to_directed()
62 |
63 | out_degree_dict = dict(overlay.out_degree)
64 | in_degree_dict = dict(overlay.in_degree)
65 |
66 | for u, v, data in overlay.edges(data=True):
67 | upload_delay = out_degree_dict[u] * overlay.nodes[u]["uploadDelay"]
68 | download_delay = in_degree_dict[v] * overlay.nodes[v]["downloadDelay"]
69 |
70 | weight = computation_time + data["latency"] +\
71 | max(upload_delay, download_delay, model_size/data["availableBandwidth"])
72 |
73 | overlay.add_edge(u, v, weight=weight)
74 |
75 | return overlay
76 |
77 |
78 | def get_star_overlay(connectivity_graph, centrality):
79 | """
80 | Generate server connectivity graph given an underlay topology represented as an nx.Graph
81 | :param connectivity_graph: nx.Graph() object, each edge should have availableBandwidth:
82 | "latency", "availableBandwidth" and "weight";
83 | :param centrality: mode of centrality to use, possible: "load", "distance", "information", default="load"
84 | :return: nx.Graph()
85 | """
86 | if centrality == "distance":
87 | centrality_dict = nx.algorithms.centrality.closeness_centrality(connectivity_graph, distance="latency")
88 | server_node = max(centrality_dict, key=centrality_dict.get)
89 |
90 | elif centrality == "information":
91 | centrality_dict = nx.algorithms.centrality.information_centrality(connectivity_graph, weight="latency")
92 | server_node = max(centrality_dict, key=centrality_dict.get)
93 |
94 | else:
95 | # centrality = load_centrality
96 | centrality_dict = nx.algorithms.centrality.load_centrality(connectivity_graph, weight="latency")
97 | server_node = max(centrality_dict, key=centrality_dict.get)
98 |
99 | weights, paths = nx.single_source_dijkstra(connectivity_graph, source=server_node, weight="weight")
100 |
101 | star = nx.Graph()
102 | star.add_nodes_from(connectivity_graph.nodes(data=True))
103 |
104 | for node in paths.keys():
105 | if node != server_node:
106 |
107 | latency = 0.
108 | available_bandwidth = 1e32
109 | for idx in range(len(paths[node]) - 1):
110 | u = paths[node][idx]
111 | v = paths[node][idx + 1]
112 |
113 | data = connectivity_graph.get_edge_data(u, v)
114 | latency += data["latency"]
115 | available_bandwidth = data["availableBandwidth"]
116 |
117 | star.add_edge(server_node, node, availableBandwidth=available_bandwidth, latency=latency)
118 |
119 | return star
120 |
121 |
122 | def get_ring_overlay(connectivity_graph, computation_time, model_size):
123 | """
124 |
125 | :param connectivity_graph:
126 | :param computation_time:
127 | :param model_size:
128 | :return:
129 | """
130 | for u, v, data in connectivity_graph.edges(data=True):
131 | upload_delay = connectivity_graph.nodes[u]["uploadDelay"]
132 | download_delay = connectivity_graph.nodes[v]["downloadDelay"]
133 |
134 | weight = computation_time + data["latency"] + max(upload_delay,
135 | download_delay,
136 | model_size / data["availableBandwidth"])
137 |
138 | connectivity_graph.add_edge(u, v, weight=weight)
139 |
140 | adjacency_matrix = nx.adjacency_matrix(connectivity_graph, weight="weight").toarray()
141 | tsp_nodes = christofides_tsp(adjacency_matrix)
142 |
143 | ring = nx.DiGraph()
144 | ring.add_nodes_from(connectivity_graph.nodes(data=True))
145 |
146 | for idx in range(len(tsp_nodes) - 1):
147 | # get the label of source and sink nodes from the original graph
148 | source_node = list(connectivity_graph.nodes())[tsp_nodes[idx]]
149 | sink_node = list(connectivity_graph.nodes())[tsp_nodes[idx + 1]]
150 |
151 | ring.add_edge(source_node, sink_node,
152 | latency=connectivity_graph.get_edge_data(source_node, sink_node)['latency'],
153 | availableBandwidth=connectivity_graph.get_edge_data(source_node, sink_node)['availableBandwidth'],
154 | weight=connectivity_graph.get_edge_data(source_node, sink_node)['weight'])
155 |
156 | # add final link to close the circuit
157 | source_node = list(connectivity_graph.nodes())[tsp_nodes[-1]]
158 | sink_node = list(connectivity_graph.nodes())[tsp_nodes[0]]
159 | ring.add_edge(source_node, sink_node,
160 | latency=connectivity_graph.get_edge_data(source_node, sink_node)['latency'],
161 | availableBandwidth=connectivity_graph.get_edge_data(source_node, sink_node)['availableBandwidth'],
162 | weight=connectivity_graph.get_edge_data(source_node, sink_node)['weight'])
163 |
164 | return ring
165 |
166 |
167 | def generate_random_ring(list_of_nodes):
168 | """
169 | Generate a random ring graph connecting a list of nodes
170 | :param list_of_nodes:
171 | :return: nx.DiGraph()
172 | """
173 | ring = nx.DiGraph()
174 |
175 | ring.add_nodes_from(list_of_nodes)
176 |
177 | random.shuffle(list_of_nodes)
178 |
179 | for idx in range(len(list_of_nodes) - 1):
180 | # get the label of source and sink nodes from the original graph
181 | source_node = list_of_nodes[idx]
182 | sink_node = list_of_nodes[idx + 1]
183 |
184 | ring.add_edge(source_node, sink_node)
185 |
186 | # add final link to close the circuit
187 | source_node = list_of_nodes[-1]
188 | sink_node = list_of_nodes[0]
189 | ring.add_edge(source_node, sink_node)
190 |
191 | mixing_matrix = nx.adjacency_matrix(ring, weight=None).todense().astype(np.float64)
192 |
193 | mixing_matrix += np.eye(mixing_matrix.shape[0])
194 | mixing_matrix *= 0.5
195 |
196 | return nx.from_numpy_matrix(mixing_matrix, create_using=nx.DiGraph())
197 |
198 |
199 | def get_delta_mbst_overlay(connectivity_graph, computation_time, model_size):
200 | """
201 |
202 | :param connectivity_graph:
203 | :param computation_time:
204 | :param model_size:
205 | :return:
206 | """
207 | for u, v, data in connectivity_graph.edges(data=True):
208 | weight = computation_time + data["latency"] + \
209 | max(connectivity_graph.nodes[u]["uploadDelay"], connectivity_graph.nodes[v]["downloadDelay"],
210 | model_size / data["availableBandwidth"]) + \
211 | max(connectivity_graph.nodes[v]["uploadDelay"], connectivity_graph.nodes[u]["downloadDelay"],
212 | model_size / data["availableBandwidth"])
213 |
214 | connectivity_graph.add_edge(u, v, weight=weight, latency=data["latency"],
215 | availableBandwidth=data["availableBandwidth"])
216 |
217 | for u in connectivity_graph.nodes:
218 | connectivity_graph.add_edge(u, u, weight=0, latency=0, availableBandwidth=1e32)
219 |
220 | best_result = cube_algorithm(connectivity_graph.copy()).to_directed()
221 |
222 | for u, v in best_result.edges:
223 | best_result.add_edge(u, v,
224 | latency=connectivity_graph.get_edge_data(u, v)['latency'],
225 | availableBandwidth=connectivity_graph.get_edge_data(u, v)['availableBandwidth'])
226 |
227 | best_cycle_time, _, _ = evaluate_cycle_time(add_upload_download_delays(best_result, computation_time, model_size))
228 | best_delta = 2
229 |
230 | n_nodes = connectivity_graph.number_of_nodes()
231 | for delta in range(2, n_nodes):
232 | result = delta_prim(connectivity_graph.copy(), delta).to_directed()
233 |
234 | for u, v, data in result.edges(data=True):
235 | weight = data["weight"] - (result.nodes[u]["uploadDelay"] + result.nodes[v]["downloadDelay"])
236 |
237 | result.add_edge(u, v, weight=weight,
238 | latency=connectivity_graph.get_edge_data(u, v)['latency'],
239 | availableBandwidth=connectivity_graph.get_edge_data(u, v)['availableBandwidth'])
240 |
241 | cycle_time, _, _ = evaluate_cycle_time(add_upload_download_delays(result, computation_time, model_size))
242 |
243 | if cycle_time < best_cycle_time:
244 | best_result = result
245 | best_cycle_time = cycle_time
246 | best_delta = delta
247 |
248 | return best_result, best_cycle_time, best_delta
249 |
250 |
251 | def get_matcha_cycle_time(underlay, connectivity_graph, computation_time, model_size, communication_budget):
252 | """
253 |
254 | :param underlay:
255 | :param connectivity_graph:
256 | :param computation_time:
257 | :param model_size:
258 | :param communication_budget:
259 | :return:
260 | """
261 | path_to_save_network = os.path.join("temp", "colored_network.gml")
262 | path_to_matching_history_file = os.path.join("temp", "matching_history.csv")
263 |
264 | try:
265 | shutil.rmtree("temp")
266 | except FileNotFoundError:
267 | pass
268 |
269 | os.makedirs("temp", exist_ok=True)
270 |
271 | topology_generator = RandomTopologyGenerator(underlay.copy(),
272 | communication_budget,
273 | network_save_path=path_to_save_network,
274 | path_to_history_file=path_to_matching_history_file)
275 |
276 | n_rounds = 1000
277 | np.random.seed(0)
278 | for _ in range(n_rounds):
279 | topology_generator.step()
280 |
281 | path_to_colored_network = os.path.join("temp", "colored_network.gml")
282 | path_to_matching_history_file = os.path.join("temp", "matching_history.csv")
283 |
284 | colored_network = nx.read_gml(path_to_colored_network)
285 | matching_list = get_matching_list_from_graph(colored_network)
286 |
287 | simulated_time = np.zeros(n_rounds)
288 | with open(path_to_matching_history_file) as csv_file:
289 | csv_reader = csv.reader(csv_file, delimiter=' ')
290 |
291 | for ii, row in enumerate(csv_reader):
292 | overlay = nx.Graph()
293 | overlay.add_nodes_from(connectivity_graph.nodes(data=True))
294 |
295 | current_matching_activations = row
296 | for matching_idx, matching_activation in enumerate(current_matching_activations):
297 | if int(matching_activation):
298 | overlay = nx.compose(overlay, matching_list[matching_idx])
299 |
300 | for u, v in overlay.edges():
301 | overlay.add_edge(u, v,
302 | latency=connectivity_graph.get_edge_data(u, v)["latency"],
303 | availableBandwidth=connectivity_graph.get_edge_data(u, v)['availableBandwidth']
304 | )
305 |
306 | if nx.is_empty(overlay):
307 | # If overlay is empty, then no communication cost is added
308 | simulated_time[:, ii] = computation_time
309 |
310 | else:
311 | overlay = add_upload_download_delays(overlay, computation_time, model_size)
312 |
313 | cycle_time = 0
314 | for u, v, data in overlay.edges(data=True):
315 | if data["weight"] > cycle_time:
316 | cycle_time = data["weight"]
317 |
318 | simulated_time[ii] = cycle_time
319 |
320 | simulated_time = simulated_time.cumsum()
321 |
322 | try:
323 | shutil.rmtree("temp")
324 | except FileNotFoundError:
325 | pass
326 |
327 | return simulated_time[-1] / (n_rounds - 1)
328 |
--------------------------------------------------------------------------------