├── eval
    ├── data
    │   └── .gitkeep
    ├── graph.adj
    ├── 6_star.edges
    ├── ip_addr_3Machines.json
    ├── 4_node_fullyConnected.edges
    ├── ip_addr_4Machines.json
    ├── ip_addr_5Machines.json
    ├── ip_addr_6Machines.json
    ├── ip_addr_7Machines.json
    ├── step_configs
    │   ├── config_movielens_sharing.ini
    │   ├── config_cifar_sharing.ini
    │   ├── config_movielens_subsampling.ini
    │   ├── config_femnist_sharing.ini
    │   ├── config_shakespeare_sharing.ini
    │   ├── config_reddit_sharing.ini
    │   ├── config_cifar_partialmodel.ini
    │   ├── config_cifar_subsampling.ini
    │   ├── config_femnist_partialmodel.ini
    │   ├── config_reddit_partialmodel.ini
    │   ├── config_reddit_subsampling.ini
    │   ├── config_celeba_sharing.ini
    │   ├── config_shakespeare_subsampling.ini
    │   ├── config_shakespeare_partialmodel.ini
    │   ├── config_femnist_subsampling.ini
    │   ├── config_celeba_partialmodel.ini
    │   ├── config_celeba_subsampling.ini
    │   ├── config_movielens_jwins.ini
    │   ├── config_cifar_jwins.ini
    │   ├── config_shakespeare_jwins.ini
    │   ├── config_femnist_jwins.ini
    │   └── config_celeba_jwins.ini
    ├── run.sh
    ├── run_all.sh
    ├── plot_shared.py
    ├── testingKNN.py
    ├── testingKFN.py
    ├── testing.py
    ├── 36_nodes.edges
    ├── testingManual.py
    ├── testingPeerSampler.py
    ├── testingPeerSamplerDynamic.py
    ├── testingPeerSamplerDynamicManual.py
    ├── testingSTC.py
    ├── testingFederated.py
    ├── plot_model.py
    ├── 96_regular.edges
    ├── plot_percentile.py
    ├── 80_nodes.edges
    ├── run_grid.sh
    ├── 96_nodes_smallworld.edges
    ├── run_xtimes_cifar.sh
    └── 96_nodes_random2.edges
├── requirements.txt
├── src
    └── decentralizepy
    │   ├── __init__.py
    │   ├── models
    │       ├── __init__.py
    │       └── Model.py
    │   ├── node
    │       ├── __init__.py
    │       ├── STC
    │       │   └── __init__.py
    │       ├── EpidemicLearning
    │       │   ├── __init__.py
    │       │   ├── EL_Oracle_TopologyBuilder.py
    │       │   └── EL_Oracle_Client.py
    │       ├── PeerSamplerDynamic.py
    │       └── DPSGDWithPeerSampler.py
    │   ├── sharing
    │       ├── __init__.py
    │       ├── JWINS
    │       │   ├── __init__.py
    │       │   └── JWINS.py
    │       └── PlainAverageSharing.py
    │   ├── training
    │       ├── __init__.py
    │       ├── text
    │       │   ├── __init__.py
    │       │   └── LLMTraining.py
    │       └── Training.py
    │   ├── communication
    │       ├── __init__.py
    │       └── Communication.py
    │   ├── datasets
    │       ├── text
    │       │   ├── __init__.py
    │       │   └── LLMData.py
    │       ├── __init__.py
    │       ├── Data.py
    │       └── Dataset.py
    │   ├── mappings
    │       ├── __init__.py
    │       ├── Mapping.py
    │       ├── Linear.py
    │       └── Manual.py
    │   ├── graphs
    │       ├── __init__.py
    │       ├── Ring.py
    │       ├── FullyConnected.py
    │       ├── Star.py
    │       ├── Regular.py
    │       ├── SmallWorld.py
    │       └── Graph.py
    │   ├── compression
    │       ├── EliasQuantization.py
    │       ├── EliasFpzip.py
    │       ├── EliasFpzipLossy.py
    │       ├── Compression.py
    │       ├── Lz4Wrapper.py
    │       ├── Elias.py
    │       └── Quantization.py
    │   └── utils.py
├── tutorial
    ├── ip.json
    ├── JWINS
    │   ├── ip.json
    │   ├── regular_16.txt
    │   ├── run_decentralized.sh
    │   └── config.ini
    ├── EpidemicLearning
    │   ├── ip.json
    │   ├── run_el-local.sh
    │   ├── run_el-oracle.sh
    │   ├── config_EL.ini
    │   ├── testingEL_Local.py
    │   ├── fullyConnected_16.edges
    │   └── testingEL_Oracle.py
    ├── regular_16.txt
    ├── run_decentralized.sh
    ├── run_federated.sh
    └── config.ini
├── pyproject.toml
├── .isort.cfg
├── .gitignore
├── setup.py
├── download_dataset.py
├── split_into_files.py
├── install_nMachines.sh
├── LICENSE
├── setup.cfg
├── generate_graph.py
└── README.rst


/eval/data/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/decentralizepy/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/decentralizepy/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/decentralizepy/node/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/decentralizepy/sharing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/decentralizepy/node/STC/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/decentralizepy/training/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/decentralizepy/communication/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/decentralizepy/datasets/text/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/decentralizepy/sharing/JWINS/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/decentralizepy/training/text/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tutorial/ip.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "localhost"
3 | }


--------------------------------------------------------------------------------
/src/decentralizepy/node/EpidemicLearning/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tutorial/JWINS/ip.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "localhost"
3 | }


--------------------------------------------------------------------------------
/eval/graph.adj:
--------------------------------------------------------------------------------
1 | 6
2 | 1
3 | 0 3 4
4 | 3 5
5 | 1 2 5
6 | 1
7 | 2 3


--------------------------------------------------------------------------------
/tutorial/EpidemicLearning/ip.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "localhost"
3 | }


--------------------------------------------------------------------------------
/src/decentralizepy/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .Femnist import Femnist
2 | 


--------------------------------------------------------------------------------
/src/decentralizepy/mappings/__init__.py:
--------------------------------------------------------------------------------
1 | from .Linear import Linear
2 | from .Mapping import Mapping
3 | 


--------------------------------------------------------------------------------
/src/decentralizepy/graphs/__init__.py:
--------------------------------------------------------------------------------
1 | from .Graph import Graph
2 | from .SmallWorld import SmallWorld
3 | 


--------------------------------------------------------------------------------
/eval/6_star.edges:
--------------------------------------------------------------------------------
 1 | 6
 2 | 0 1
 3 | 0 2
 4 | 0 3
 5 | 0 4
 6 | 0 5
 7 | 1 0
 8 | 2 0
 9 | 3 0
10 | 4 0
11 | 5 0


--------------------------------------------------------------------------------
/eval/ip_addr_3Machines.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "10.90.41.131",
3 |     "1": "10.90.41.132",
4 |     "2": "10.90.41.133"
5 | }


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=42",
4 |     "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/eval/4_node_fullyConnected.edges:
--------------------------------------------------------------------------------
 1 | 4
 2 | 0 1
 3 | 0 2
 4 | 0 3
 5 | 1 0
 6 | 1 2
 7 | 1 3
 8 | 2 0
 9 | 2 1
10 | 2 3
11 | 3 0
12 | 3 1
13 | 3 2
14 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | multi_line_output=3
3 | include_trailing_comma=True
4 | force_grid_wrap=0
5 | combine_as_imports=True
6 | line_length=88
7 | 


--------------------------------------------------------------------------------
/eval/ip_addr_4Machines.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "10.90.41.130",
3 |     "1": "10.90.41.131",
4 |     "2": "10.90.41.132",
5 |     "3": "10.90.41.133"
6 | }


--------------------------------------------------------------------------------
/eval/ip_addr_5Machines.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "10.90.41.129",
3 |     "1": "10.90.41.130",
4 |     "2": "10.90.41.131",
5 |     "3": "10.90.41.132",
6 |     "4": "10.90.41.133"
7 | }


--------------------------------------------------------------------------------
/eval/ip_addr_6Machines.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "10.90.41.128",
3 |     "1": "10.90.41.129",
4 |     "2": "10.90.41.130",
5 |     "3": "10.90.41.131",
6 |     "4": "10.90.41.132",
7 |     "5": "10.90.41.133"
8 | }


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | **/.idea
 2 | **/__pycache__/
 3 | **/data/
 4 | **/.DS_Store
 5 | **/results/
 6 | **/experiment_results/
 7 | **/.vscode
 8 | **/leaf/
 9 | **.egg-info
10 | 202**
11 | eval/data**
12 | **/massif.out*
13 | *swp
14 | build


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!$CONDA_PREFIX/python
2 | from setuptools import setup
3 | 
4 | # https://packaging.python.org/guides/single-sourcing-package-version/
5 | # http://blog.ionelmc.ro/2014/05/25/python-packaging/
6 | setup(setup_cfg=True)
7 | 


--------------------------------------------------------------------------------
/eval/ip_addr_7Machines.json:
--------------------------------------------------------------------------------
1 | {
2 |     "0": "10.90.41.127",
3 |     "1": "10.90.41.128",
4 |     "2": "10.90.41.129",
5 |     "3": "10.90.41.130",
6 |     "4": "10.90.41.131",
7 |     "5": "10.90.41.132",
8 |     "6": "10.90.41.133"
9 | }


--------------------------------------------------------------------------------
/download_dataset.py:
--------------------------------------------------------------------------------
1 | import torchvision
2 | 
3 | if __name__ == "__main__":
4 |     torchvision.datasets.CIFAR10(root="./eval/data/", train=True, download=True)
5 |     torchvision.datasets.CIFAR10(root="./eval/data/", train=False, download=True)
6 | 
7 |     # TODO: download the other datasets
8 | 


--------------------------------------------------------------------------------
/split_into_files.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from decentralizepy.datasets.Reddit import Reddit
 4 | from decentralizepy.mappings import Linear
 5 | 
 6 | if __name__ == "__main__":
 7 |     mapping = Linear(6, 16)
 8 |     f = Reddit(0, 0, mapping)
 9 |     assert len(sys.argv) == 3
10 |     frm = sys.argv[1]
11 |     to = sys.argv[2]
12 |     f.file_per_user(frm, to)
13 | 


--------------------------------------------------------------------------------
/src/decentralizepy/node/EpidemicLearning/EL_Oracle_TopologyBuilder.py:
--------------------------------------------------------------------------------
 1 | from decentralizepy.node.PeerSamplerDynamic import PeerSamplerDynamic
 2 | 
 3 | 
 4 | class EL_Oracle_TopologyBuilder(PeerSamplerDynamic):
 5 |     """
 6 |     This class defines the topology builder that responds to neighbor requests from the clients.
 7 | 
 8 |     """
 9 | 
10 |     def __init__(self, *args, **kwargs):
11 |         super().__init__(*args, **kwargs)
12 | 


--------------------------------------------------------------------------------
/src/decentralizepy/node/EpidemicLearning/EL_Oracle_Client.py:
--------------------------------------------------------------------------------
 1 | from decentralizepy.node.DPSGDWithPeerSampler import DPSGDWithPeerSampler
 2 | 
 3 | 
 4 | class EL_Oracle_Client(DPSGDWithPeerSampler):
 5 |     """
 6 |     This class defines the client class for Epidemic Learning with Oracle.
 7 |     The client requests the peer sampler for neighbors each round.
 8 | 
 9 |     """
10 | 
11 |     def __init__(self, *args, **kwargs):
12 |         super().__init__(*args, **kwargs)
13 | 


--------------------------------------------------------------------------------
/tutorial/regular_16.txt:
--------------------------------------------------------------------------------
 1 | 16
 2 | 0 12
 3 | 0 14
 4 | 0 15
 5 | 1 8
 6 | 1 3
 7 | 1 6
 8 | 2 9
 9 | 2 10
10 | 2 5
11 | 3 1
12 | 3 11
13 | 3 9
14 | 4 9
15 | 4 12
16 | 4 13
17 | 5 2
18 | 5 6
19 | 5 7
20 | 6 1
21 | 6 5
22 | 6 7
23 | 7 5
24 | 7 6
25 | 7 14
26 | 8 1
27 | 8 13
28 | 8 14
29 | 9 2
30 | 9 3
31 | 9 4
32 | 10 2
33 | 10 11
34 | 10 13
35 | 11 10
36 | 11 3
37 | 11 15
38 | 12 0
39 | 12 4
40 | 12 15
41 | 13 8
42 | 13 10
43 | 13 4
44 | 14 0
45 | 14 8
46 | 14 7
47 | 15 0
48 | 15 11
49 | 15 12
50 | 


--------------------------------------------------------------------------------
/tutorial/JWINS/regular_16.txt:
--------------------------------------------------------------------------------
 1 | 16
 2 | 0 12
 3 | 0 14
 4 | 0 15
 5 | 1 8
 6 | 1 3
 7 | 1 6
 8 | 2 9
 9 | 2 10
10 | 2 5
11 | 3 1
12 | 3 11
13 | 3 9
14 | 4 9
15 | 4 12
16 | 4 13
17 | 5 2
18 | 5 6
19 | 5 7
20 | 6 1
21 | 6 5
22 | 6 7
23 | 7 5
24 | 7 6
25 | 7 14
26 | 8 1
27 | 8 13
28 | 8 14
29 | 9 2
30 | 9 3
31 | 9 4
32 | 10 2
33 | 10 11
34 | 10 13
35 | 11 10
36 | 11 3
37 | 11 15
38 | 12 0
39 | 12 4
40 | 12 15
41 | 13 8
42 | 13 10
43 | 13 4
44 | 14 0
45 | 14 8
46 | 14 7
47 | 15 0
48 | 15 11
49 | 15 12
50 | 


--------------------------------------------------------------------------------
/src/decentralizepy/graphs/Ring.py:
--------------------------------------------------------------------------------
 1 | from decentralizepy.graphs.Graph import Graph
 2 | 
 3 | 
 4 | class Ring(Graph):
 5 |     """
 6 |     The class for generating a Ring topology
 7 | 
 8 |     """
 9 | 
10 |     def __init__(self, n_procs):
11 |         """
12 |         Constructor. Generates a Ring graph
13 | 
14 |         Parameters
15 |         ----------
16 |         n_procs : int
17 |             total number of nodes in the graph
18 | 
19 |         """
20 |         super().__init__(n_procs)
21 |         self.connect_graph()
22 | 


--------------------------------------------------------------------------------
/install_nMachines.sh:
--------------------------------------------------------------------------------
 1 | #!\bin\bash
 2 | 
 3 | cd
 4 | mkdir -p Gitlab
 5 | cd Gitlab
 6 | git clone git@gitlab.epfl.ch:risharma/decentralizepy.git
 7 | cd decentralizepy
 8 | mkdir -p leaf/data/femnist/data/train
 9 | mkdir -p leaf/data/femnist/data/test
10 | mkdir -p leaf/data/femnist/per_user_data/train
11 | ~/miniconda3/bin/conda remove --name decpy --all
12 | ~/miniconda3/bin/conda create -n decpy python=3.9
13 | ~/miniconda3/envs/decpy/bin/pip install --upgrade pip --quiet
14 | ~/miniconda3/envs/decpy/bin/pip install --editable .\[dev\]
15 | 


--------------------------------------------------------------------------------
/src/decentralizepy/graphs/FullyConnected.py:
--------------------------------------------------------------------------------
 1 | from decentralizepy.graphs.Graph import Graph
 2 | 
 3 | 
 4 | class FullyConnected(Graph):
 5 |     """
 6 |     The class for generating a Fully Connected Graph Topology
 7 | 
 8 |     """
 9 | 
10 |     def __init__(self, n_procs):
11 |         """
12 |         Constructor. Generates a Fully Connected graph
13 | 
14 |         Parameters
15 |         ----------
16 |         n_procs : int
17 |             total number of nodes in the graph
18 | 
19 |         """
20 |         super().__init__(n_procs)
21 |         for node in range(n_procs):
22 |             neighbors = set([x for x in range(n_procs) if x != node])
23 |             self.adj_list[node] = neighbors
24 | 


--------------------------------------------------------------------------------
/src/decentralizepy/compression/EliasQuantization.py:
--------------------------------------------------------------------------------
 1 | from decentralizepy.compression.Elias import Elias
 2 | from decentralizepy.compression.Quantization import Quantization
 3 | 
 4 | 
 5 | class EliasQuantization(Elias, Quantization):
 6 |     """
 7 |     Compress metadata and quantize parameters
 8 | 
 9 |     """
10 | 
11 |     def __init__(self, float_precision: int = 2**15 - 1, *args, **kwargs):
12 |         """
13 |         Constructor
14 | 
15 |         Parameters
16 |         ----------
17 |         float_precision : int, optional
18 |             Quantization parameter
19 |         """
20 |         super().__init__(float_precision=float_precision, *args, **kwargs)
21 |         self.k = float_precision
22 | 


--------------------------------------------------------------------------------
/src/decentralizepy/graphs/Star.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | 
 3 | from decentralizepy.graphs.Graph import Graph
 4 | 
 5 | 
 6 | class Star(Graph):
 7 |     """
 8 |     The class for generating a Star topology
 9 |     Adapted from ./Regular.py
10 | 
11 |     """
12 | 
13 |     def __init__(self, n_procs):
14 |         """
15 |         Constructor. Generates a Ring graph
16 | 
17 |         Parameters
18 |         ----------
19 |         n_procs : int
20 |             total number of nodes in the graph
21 | 
22 |         """
23 |         super().__init__(n_procs)
24 |         G = nx.star_graph(n_procs - 1)
25 |         adj = G.adjacency()
26 |         for i, l in adj:
27 |             self.adj_list[i] = set()  # new set
28 |             for k in l:
29 |                 self.adj_list[i].add(k)
30 |         if not nx.is_connected(G):
31 |             self.connect_graph()
32 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_movielens_sharing.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.MovieLens
 3 | dataset_class = MovieLens
 4 | model_class = MatrixFactorization
 5 | train_dir = /mnt/nfs/shared/leaf/data/movielens
 6 | test_dir = /mnt/nfs/shared/leaf/data/movielens
 7 | ; python list of fractions below
 8 | sizes =
 9 | 
10 | [OPTIMIZER_PARAMS]
11 | optimizer_package = torch.optim
12 | optimizer_class = SGD
13 | lr = 0.1
14 | 
15 | [TRAIN_PARAMS]
16 | training_package = decentralizepy.training.Training
17 | training_class = Training
18 | rounds = 10
19 | full_epochs = False
20 | batch_size = 16
21 | shuffle = True
22 | loss_package = torch.nn
23 | loss_class = MSELoss
24 | 
25 | [COMMUNICATION]
26 | comm_package = decentralizepy.communication.TCP
27 | comm_class = TCP
28 | addresses_filepath = ip_addr_6Machines.json
29 | 
30 | [SHARING]
31 | sharing_package = decentralizepy.sharing.Sharing
32 | sharing_class = Sharing
33 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_cifar_sharing.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.CIFAR10
 3 | dataset_class = CIFAR10
 4 | model_class = LeNet
 5 | train_dir = /mnt/nfs/shared/CIFAR
 6 | test_dir = /mnt/nfs/shared/CIFAR
 7 | ; python list of fractions below
 8 | sizes =
 9 | random_seed = 99
10 | partition_niid = iid
11 | 
12 | [OPTIMIZER_PARAMS]
13 | optimizer_package = torch.optim
14 | optimizer_class = SGD
15 | lr = 0.001
16 | 
17 | [TRAIN_PARAMS]
18 | training_package = decentralizepy.training.Training
19 | training_class = Training
20 | rounds = 65
21 | full_epochs = False
22 | batch_size = 8
23 | shuffle = True
24 | loss_package = torch.nn
25 | loss_class = CrossEntropyLoss
26 | 
27 | [COMMUNICATION]
28 | comm_package = decentralizepy.communication.TCP
29 | comm_class = TCP
30 | addresses_filepath = ip_addr_6Machines.json
31 | 
32 | [SHARING]
33 | sharing_package = decentralizepy.sharing.Sharing
34 | sharing_class = Sharing
35 | 


--------------------------------------------------------------------------------
/src/decentralizepy/graphs/Regular.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | 
 3 | from decentralizepy.graphs.Graph import Graph
 4 | 
 5 | 
 6 | class Regular(Graph):
 7 |     """
 8 |     The class for generating a Regular topology
 9 | 
10 |     """
11 | 
12 |     def __init__(self, n_procs, degree, seed=None):
13 |         """
14 |         Constructor. Generates a Ring graph
15 | 
16 |         Parameters
17 |         ----------
18 |         n_procs : int
19 |             total number of nodes in the graph
20 |         degree : int
21 |             Neighbors of each node
22 | 
23 |         """
24 |         super().__init__(n_procs)
25 |         G = nx.random_regular_graph(degree, n_procs, seed)
26 |         adj = G.adjacency()
27 |         for i, l in adj:
28 |             self.adj_list[i] = set()  # new set
29 |             for k in l:
30 |                 self.adj_list[i].add(k)
31 |         if not nx.is_connected(G):
32 |             self.connect_graph()
33 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_movielens_subsampling.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.MovieLens
 3 | dataset_class = MovieLens
 4 | model_class = MatrixFactorization
 5 | train_dir = /mnt/nfs/shared/leaf/data/movielens
 6 | test_dir = /mnt/nfs/shared/leaf/data/movielens
 7 | ; python list of fractions below
 8 | sizes =
 9 | 
10 | [OPTIMIZER_PARAMS]
11 | optimizer_package = torch.optim
12 | optimizer_class = SGD
13 | lr = 0.1
14 | 
15 | [TRAIN_PARAMS]
16 | training_package = decentralizepy.training.Training
17 | training_class = Training
18 | rounds = 10
19 | full_epochs = False
20 | batch_size = 16
21 | shuffle = True
22 | loss_package = torch.nn
23 | loss_class = MSELoss
24 | 
25 | [COMMUNICATION]
26 | comm_package = decentralizepy.communication.TCP
27 | comm_class = TCP
28 | addresses_filepath = ip_addr_6Machines.json
29 | 
30 | [SHARING]
31 | sharing_package = decentralizepy.sharing.SubSampling
32 | sharing_class = SubSampling
33 | alpha = 0.3
34 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_femnist_sharing.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Femnist
 3 | dataset_class = Femnist
 4 | random_seed = 97
 5 | model_class = CNN
 6 | train_dir = /mnt/nfs/shared/leaf/data/femnist/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/femnist/data/test
 8 | ; python list of fractions below
 9 | sizes =
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.001
15 | 
16 | [TRAIN_PARAMS]
17 | training_package = decentralizepy.training.Training
18 | training_class = Training
19 | rounds = 47
20 | full_epochs = False
21 | batch_size = 16
22 | shuffle = True
23 | loss_package = torch.nn
24 | loss_class = CrossEntropyLoss
25 | 
26 | [COMMUNICATION]
27 | comm_package = decentralizepy.communication.TCP
28 | comm_class = TCP
29 | addresses_filepath = ip_addr_6Machines.json
30 | 
31 | [SHARING]
32 | sharing_package = decentralizepy.sharing.Sharing
33 | sharing_class = Sharing
34 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_shakespeare_sharing.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Shakespeare
 3 | dataset_class = Shakespeare
 4 | model_class = LSTM
 5 | train_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/per_user_data/train
 6 | test_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/data/test
 7 | ; python list of fractions below
 8 | sizes =
 9 | 
10 | [OPTIMIZER_PARAMS]
11 | optimizer_package = torch.optim
12 | optimizer_class = SGD
13 | lr = 0.1
14 | 
15 | [TRAIN_PARAMS]
16 | training_package = decentralizepy.training.Training
17 | training_class = Training
18 | rounds = 10
19 | full_epochs = False
20 | batch_size = 16
21 | shuffle = True
22 | loss_package = torch.nn
23 | loss_class = CrossEntropyLoss
24 | 
25 | [COMMUNICATION]
26 | comm_package = decentralizepy.communication.TCP
27 | comm_class = TCP
28 | addresses_filepath = ip_addr_6Machines.json
29 | 
30 | [SHARING]
31 | sharing_package = decentralizepy.sharing.Sharing
32 | sharing_class = Sharing
33 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_reddit_sharing.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Reddit
 3 | dataset_class = Reddit
 4 | random_seed = 97
 5 | model_class = RNN
 6 | train_dir = /mnt/nfs/shared/leaf/data/reddit_new/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/reddit_new/new_small_data/test
 8 | ; python list of fractions below
 9 | sizes =
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.001
15 | 
16 | [TRAIN_PARAMS]
17 | training_package = decentralizepy.training.Training
18 | training_class = Training
19 | rounds = 47
20 | full_epochs = False
21 | batch_size = 16
22 | shuffle = True
23 | loss_package = torch.nn
24 | loss_class = CrossEntropyLoss
25 | 
26 | [COMMUNICATION]
27 | comm_package = decentralizepy.communication.TCP
28 | comm_class = TCP
29 | addresses_filepath = ip_addr_6Machines.json
30 | 
31 | [SHARING]
32 | sharing_package = decentralizepy.sharing.Sharing
33 | sharing_class = Sharing
34 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_cifar_partialmodel.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.CIFAR10
 3 | dataset_class = CIFAR10
 4 | model_class = LeNet
 5 | train_dir = /mnt/nfs/shared/CIFAR
 6 | test_dir = /mnt/nfs/shared/CIFAR
 7 | ; python list of fractions below
 8 | sizes =
 9 | random_seed = 99
10 | partition_niid = kshard
11 | shard = 2
12 | 
13 | [OPTIMIZER_PARAMS]
14 | optimizer_package = torch.optim
15 | optimizer_class = SGD
16 | lr = 0.001
17 | 
18 | [TRAIN_PARAMS]
19 | training_package = decentralizepy.training.Training
20 | training_class = Training
21 | rounds = 65
22 | full_epochs = False
23 | batch_size = 8
24 | shuffle = True
25 | loss_package = torch.nn
26 | loss_class = CrossEntropyLoss
27 | 
28 | [COMMUNICATION]
29 | comm_package = decentralizepy.communication.TCP
30 | comm_class = TCP
31 | addresses_filepath = ip_addr_6Machines.json
32 | 
33 | [SHARING]
34 | sharing_package = decentralizepy.sharing.PartialModel
35 | sharing_class = PartialModel
36 | alpha=0.5
37 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_cifar_subsampling.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.CIFAR10
 3 | dataset_class = CIFAR10
 4 | model_class = LeNet
 5 | train_dir = /mnt/nfs/shared/CIFAR
 6 | test_dir = /mnt/nfs/shared/CIFAR
 7 | ; python list of fractions below
 8 | sizes =
 9 | random_seed = 99
10 | partition_niid = dirichlet
11 | alpha = 1
12 | 
13 | [OPTIMIZER_PARAMS]
14 | optimizer_package = torch.optim
15 | optimizer_class = SGD
16 | lr = 0.001
17 | 
18 | [TRAIN_PARAMS]
19 | training_package = decentralizepy.training.Training
20 | training_class = Training
21 | rounds = 65
22 | full_epochs = False
23 | batch_size = 8
24 | shuffle = True
25 | loss_package = torch.nn
26 | loss_class = CrossEntropyLoss
27 | 
28 | [COMMUNICATION]
29 | comm_package = decentralizepy.communication.TCP
30 | comm_class = TCP
31 | addresses_filepath = ip_addr_6Machines.json
32 | 
33 | [SHARING]
34 | sharing_package = decentralizepy.sharing.SubSampling
35 | sharing_class = SubSampling
36 | alpha = 0.5
37 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_femnist_partialmodel.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Femnist
 3 | dataset_class = Femnist
 4 | random_seed = 97
 5 | model_class = CNN
 6 | train_dir = /mnt/nfs/shared/leaf/data/femnist/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/femnist/data/test
 8 | ; python list of fractions below
 9 | sizes = 
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.001
15 | 
16 | [TRAIN_PARAMS]
17 | training_package = decentralizepy.training.Training
18 | training_class = Training
19 | rounds = 47
20 | full_epochs = False
21 | batch_size = 16
22 | shuffle = True
23 | loss_package = torch.nn
24 | loss_class = CrossEntropyLoss
25 | 
26 | [COMMUNICATION]
27 | comm_package = decentralizepy.communication.TCP
28 | comm_class = TCP
29 | addresses_filepath = ip_addr_6Machines.json
30 | 
31 | [SHARING]
32 | sharing_package = decentralizepy.sharing.PartialModel
33 | sharing_class = PartialModel
34 | alpha=0.1
35 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_reddit_partialmodel.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Reddit
 3 | dataset_class = Reddit
 4 | random_seed = 97
 5 | model_class = RNN
 6 | train_dir = /mnt/nfs/shared/leaf/data/reddit_new/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/reddit_new/new_small_data/test
 8 | ; python list of fractions below
 9 | sizes =
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.001
15 | 
16 | [TRAIN_PARAMS]
17 | training_package = decentralizepy.training.Training
18 | training_class = Training
19 | rounds = 47
20 | full_epochs = False
21 | batch_size = 16
22 | shuffle = True
23 | loss_package = torch.nn
24 | loss_class = CrossEntropyLoss
25 | 
26 | [COMMUNICATION]
27 | comm_package = decentralizepy.communication.TCP
28 | comm_class = TCP
29 | addresses_filepath = ip_addr_6Machines.json
30 | 
31 | [SHARING]
32 | sharing_package = decentralizepy.sharing.PartialModel
33 | sharing_class = PartialModel
34 | alpha = 0.1
35 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_reddit_subsampling.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Reddit
 3 | dataset_class = Reddit
 4 | random_seed = 97
 5 | model_class = RNN
 6 | train_dir = /mnt/nfs/shared/leaf/data/reddit_new/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/reddit_new/new_small_data/test
 8 | ; python list of fractions below
 9 | sizes = 
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.001
15 | 
16 | [TRAIN_PARAMS]
17 | training_package = decentralizepy.training.Training
18 | training_class = Training
19 | rounds = 4
20 | full_epochs = False
21 | batch_size = 16
22 | shuffle = True
23 | loss_package = torch.nn
24 | loss_class = CrossEntropyLoss
25 | 
26 | [COMMUNICATION]
27 | comm_package = decentralizepy.communication.TCP
28 | comm_class = TCP
29 | addresses_filepath = ip_addr_6Machines.json
30 | 
31 | [SHARING]
32 | sharing_package = decentralizepy.sharing.SubSampling
33 | sharing_class = SubSampling
34 | alpha = 0.1
35 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_celeba_sharing.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Celeba
 3 | dataset_class = Celeba
 4 | model_class = CNN
 5 | images_dir = /mnt/nfs/shared/leaf/data/celeba/data/raw/img_align_celeba
 6 | train_dir = /mnt/nfs/shared/leaf/data/celeba/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/celeba/data/test
 8 | ; python list of fractions below
 9 | sizes = 
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.001
15 | 
16 | [TRAIN_PARAMS]
17 | training_package = decentralizepy.training.Training
18 | training_class = Training
19 | rounds = 4
20 | full_epochs = False
21 | batch_size = 16
22 | shuffle = True
23 | loss_package = torch.nn
24 | loss_class = CrossEntropyLoss
25 | 
26 | [COMMUNICATION]
27 | comm_package = decentralizepy.communication.TCP
28 | comm_class = TCP
29 | addresses_filepath = ip_addr_6Machines.json
30 | 
31 | [SHARING]
32 | sharing_package = decentralizepy.sharing.Sharing
33 | sharing_class = Sharing
34 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_shakespeare_subsampling.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Shakespeare
 3 | dataset_class = Shakespeare
 4 | random_seed = 97
 5 | model_class = LSTM
 6 | train_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/data/test
 8 | ; python list of fractions below
 9 | sizes =
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.1
15 | 
16 | [TRAIN_PARAMS]
17 | training_package = decentralizepy.training.Training
18 | training_class = Training
19 | rounds = 10
20 | full_epochs = False
21 | batch_size = 16
22 | shuffle = True
23 | loss_package = torch.nn
24 | loss_class = CrossEntropyLoss
25 | 
26 | [COMMUNICATION]
27 | comm_package = decentralizepy.communication.TCP
28 | comm_class = TCP
29 | addresses_filepath = ip_addr_6Machines.json
30 | 
31 | [SHARING]
32 | sharing_package = decentralizepy.sharing.SubSampling
33 | sharing_class = SubSampling
34 | alpha = 0.1
35 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_shakespeare_partialmodel.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Shakespeare
 3 | dataset_class = Shakespeare
 4 | random_seed = 97
 5 | model_class = LSTM
 6 | train_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/data/test
 8 | ; python list of fractions below
 9 | sizes =
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.1
15 | 
16 | [TRAIN_PARAMS]
17 | training_package = decentralizepy.training.Training
18 | training_class = Training
19 | rounds = 10
20 | full_epochs = False
21 | batch_size = 16
22 | shuffle = True
23 | loss_package = torch.nn
24 | loss_class = CrossEntropyLoss
25 | 
26 | [COMMUNICATION]
27 | comm_package = decentralizepy.communication.TCP
28 | comm_class = TCP
29 | addresses_filepath = ip_addr_6Machines.json
30 | 
31 | [SHARING]
32 | sharing_package = decentralizepy.sharing.PartialModel
33 | sharing_class = PartialModel
34 | alpha = 0.1
35 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_femnist_subsampling.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Femnist
 3 | dataset_class = Femnist
 4 | random_seed = 97
 5 | model_class = CNN
 6 | train_dir = /mnt/nfs/shared/leaf/data/femnist/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/femnist/data/test
 8 | ; python list of fractions below
 9 | sizes = 
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.001
15 | 
16 | # There are 734463 femnist samples
17 | [TRAIN_PARAMS]
18 | training_package = decentralizepy.training.Training
19 | training_class = Training
20 | rounds = 47
21 | full_epochs = False
22 | batch_size = 16
23 | shuffle = True
24 | loss_package = torch.nn
25 | loss_class = CrossEntropyLoss
26 | 
27 | [COMMUNICATION]
28 | comm_package = decentralizepy.communication.TCP
29 | comm_class = TCP
30 | addresses_filepath = ip_addr_6Machines.json
31 | 
32 | [SHARING]
33 | sharing_package = decentralizepy.sharing.SubSampling
34 | sharing_class = SubSampling
35 | alpha = 0.1
36 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_celeba_partialmodel.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Celeba
 3 | dataset_class = Celeba
 4 | model_class = CNN
 5 | images_dir = /mnt/nfs/shared/leaf/data/celeba/data/raw/img_align_celeba
 6 | train_dir = /mnt/nfs/shared/leaf/data/celeba/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/celeba/data/test
 8 | ; python list of fractions below
 9 | sizes = 
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.001
15 | 
16 | [TRAIN_PARAMS]
17 | training_package = decentralizepy.training.Training
18 | training_class = Training
19 | rounds = 4
20 | full_epochs = False
21 | batch_size = 16
22 | shuffle = True
23 | loss_package = torch.nn
24 | loss_class = CrossEntropyLoss
25 | 
26 | [COMMUNICATION]
27 | comm_package = decentralizepy.communication.TCP
28 | comm_class = TCP
29 | addresses_filepath = ip_addr_6Machines.json
30 | 
31 | [SHARING]
32 | sharing_package = decentralizepy.sharing.PartialModel
33 | sharing_class = PartialModel
34 | alpha = 0.1


--------------------------------------------------------------------------------
/eval/step_configs/config_celeba_subsampling.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Celeba
 3 | dataset_class = Celeba
 4 | model_class = CNN
 5 | images_dir = /mnt/nfs/shared/leaf/data/celeba/data/raw/img_align_celeba
 6 | train_dir = /mnt/nfs/shared/leaf/data/celeba/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/celeba/data/test
 8 | ; python list of fractions below
 9 | sizes = 
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.001
15 | 
16 | [TRAIN_PARAMS]
17 | training_package = decentralizepy.training.Training
18 | training_class = Training
19 | rounds = 4
20 | full_epochs = False
21 | batch_size = 16
22 | shuffle = True
23 | loss_package = torch.nn
24 | loss_class = CrossEntropyLoss
25 | 
26 | [COMMUNICATION]
27 | comm_package = decentralizepy.communication.TCP
28 | comm_class = TCP
29 | addresses_filepath = ip_addr_6Machines.json
30 | 
31 | [SHARING]
32 | sharing_package = decentralizepy.sharing.SubSampling
33 | sharing_class = SubSampling
34 | alpha = 0.1
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) [2022] [DecentralizePy]
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/src/decentralizepy/datasets/Data.py:
--------------------------------------------------------------------------------
 1 | class Data:
 2 |     """
 3 |     This class defines the API for Data.
 4 | 
 5 |     """
 6 | 
 7 |     def __init__(self, x, y):
 8 |         """
 9 |         Constructor
10 | 
11 |         Parameters
12 |         ----------
13 |         x : numpy array
14 |             A numpy array of data samples
15 |         y : numpy array
16 |             A numpy array of outputs corresponding to the sample
17 | 
18 |         """
19 |         self.x = x
20 |         self.y = y
21 | 
22 |     def __len__(self):
23 |         """
24 |         Return the number of samples in the dataset
25 | 
26 |         Returns
27 |         -------
28 |         int
29 |             Number of samples
30 | 
31 |         """
32 |         return self.y.shape[0]
33 | 
34 |     def __getitem__(self, i):
35 |         """
36 |         Function to get the item with index i.
37 | 
38 |         Parameters
39 |         ----------
40 |         i : int
41 |             Index
42 | 
43 |         Returns
44 |         -------
45 |         2-tuple
46 |             A tuple of the ith data sample and it's corresponding label
47 | 
48 |         """
49 |         return self.x[i], self.y[i]
50 | 


--------------------------------------------------------------------------------
/src/decentralizepy/graphs/SmallWorld.py:
--------------------------------------------------------------------------------
 1 | import smallworld
 2 | 
 3 | from decentralizepy.graphs.Graph import Graph
 4 | 
 5 | 
 6 | class SmallWorld(Graph):
 7 |     """
 8 |     The class for generating a SmallWorld topology Graph
 9 | 
10 |     Adapted from https://gitlab.epfl.ch/sacs/ml-rawdatasharing/dnn-recommender/-/blob/master/topologies.py
11 | 
12 |     """
13 | 
14 |     def __init__(self, n_procs, k_over_2, beta):
15 |         """
16 |         Constructor. Generates a random connected SmallWorld graph
17 | 
18 |         Parameters
19 |         ----------
20 |         n_procs : int
21 |             total number of nodes in the graph
22 |         k_over_2 : int
23 |             k_over_2 config for smallworld
24 |         beta : float
25 |             beta config for smallworld. β = 1 is truly equal to the Erdős-Rényi network model
26 | 
27 |         """
28 |         super().__init__(n_procs)
29 |         G = smallworld.get_smallworld_graph(self.n_procs, k_over_2, beta)
30 |         for edge in list(G.edges):
31 |             node1 = edge[0]
32 |             node2 = edge[1]
33 |             self.adj_list[node1].add(node2)
34 |             self.adj_list[node2].add(node1)
35 | 
36 |         self.connect_graph()
37 | 


--------------------------------------------------------------------------------
/tutorial/run_decentralized.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | decpy_path=../eval # Path to eval folder
 4 | graph=regular_16.txt # Absolute path of the graph file generated using the generate_graph.py script
 5 | run_path=../eval/data # Path to the folder where the graph and config file will be copied and the results will be stored
 6 | config_file=config.ini
 7 | cp $graph $config_file $run_path
 8 | 
 9 | env_python=~/miniconda3/envs/decpy/bin/python3 # Path to python executable of the environment | conda recommended
10 | machines=1 # number of machines in the runtime
11 | iterations=80
12 | test_after=20
13 | eval_file=$decpy_path/testing.py # decentralized driver code (run on each machine)
14 | log_level=INFO # DEBUG | INFO | WARN | CRITICAL
15 | 
16 | m=0 # machine id corresponding consistent with ip.json
17 | echo M is $m
18 | 
19 | procs_per_machine=16 # 16 processes on 1 machine
20 | echo procs per machine is $procs_per_machine
21 | 
22 | log_dir=$run_path/$(date '+%Y-%m-%dT%H:%M')/machine$m # in the eval folder
23 | mkdir -p $log_dir
24 | 
25 | $env_python $eval_file -ro 0 -tea $test_after -ld $log_dir -mid $m -ps $procs_per_machine -ms $machines -is $iterations -gf $run_path/$graph -ta $test_after -cf $run_path/$config_file -ll $log_level -wsd $log_dir


--------------------------------------------------------------------------------
/tutorial/JWINS/run_decentralized.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | decpy_path=../../eval # Path to eval folder
 4 | graph=regular_16.txt # Absolute path of the graph file generated using the generate_graph.py script
 5 | run_path=../../eval/data # Path to the folder where the graph and config file will be copied and the results will be stored
 6 | config_file=config.ini
 7 | cp $graph $config_file $run_path
 8 | 
 9 | env_python=~/miniconda3/envs/decpy/bin/python3 # Path to python executable of the environment | conda recommended
10 | machines=1 # number of machines in the runtime
11 | iterations=80
12 | test_after=20
13 | eval_file=$decpy_path/testing.py # decentralized driver code (run on each machine)
14 | log_level=INFO # DEBUG | INFO | WARN | CRITICAL
15 | 
16 | m=0 # machine id corresponding consistent with ip.json
17 | echo M is $m
18 | 
19 | procs_per_machine=16 # 16 processes on 1 machine
20 | echo procs per machine is $procs_per_machine
21 | 
22 | log_dir=$run_path/$(date '+%Y-%m-%dT%H:%M')/machine$m # in the eval folder
23 | mkdir -p $log_dir
24 | 
25 | $env_python $eval_file -ro 0 -tea $test_after -ld $log_dir -mid $m -ps $procs_per_machine -ms $machines -is $iterations -gf $run_path/$graph -ta $test_after -cf $run_path/$config_file -ll $log_level -wsd $log_dir


--------------------------------------------------------------------------------
/eval/step_configs/config_movielens_jwins.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.MovieLens
 3 | dataset_class = MovieLens
 4 | model_class = MatrixFactorization
 5 | train_dir = /mnt/nfs/shared/leaf/data/movielens
 6 | test_dir = /mnt/nfs/shared/leaf/data/movielens
 7 | ; python list of fractions below
 8 | sizes =
 9 | 
10 | [OPTIMIZER_PARAMS]
11 | optimizer_package = torch.optim
12 | optimizer_class = SGD
13 | lr = 0.1
14 | 
15 | [TRAIN_PARAMS]
16 | training_package = decentralizepy.training.Training
17 | training_class = Training
18 | rounds = 10
19 | full_epochs = False
20 | batch_size = 16
21 | shuffle = True
22 | loss_package = torch.nn
23 | loss_class = MSELoss
24 | 
25 | [COMMUNICATION]
26 | comm_package = decentralizepy.communication.TCP
27 | comm_class = TCP
28 | addresses_filepath = ip_addr_6Machines.json
29 | 
30 | [SHARING]
31 | sharing_package = decentralizepy.sharing.JWINS.JWINS
32 | sharing_class = JWINS
33 | change_based_selection = True
34 | alpha_list = [0.1,0.15,0.2,0.25,0.3,0.4,1.0]
35 | wavelet=sym2
36 | level= 4
37 | accumulation = True
38 | accumulate_averaging_changes = True
39 | metadata_cap = 0.5
40 | compression_package = decentralizepy.compression.EliasFpzip
41 | compression_class = EliasFpzip
42 | compress = True
43 | 


--------------------------------------------------------------------------------
/tutorial/EpidemicLearning/run_el-local.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | decpy_path=../../eval # Path to eval folder
 4 | graph=fullyConnected_16.edges # Absolute path of the graph file generated using the generate_graph.py script
 5 | run_path=../../eval/data # Path to the folder where the graph and config file will be copied and the results will be stored
 6 | config_file=config_EL.ini
 7 | cp $graph $config_file $run_path
 8 | 
 9 | env_python=~/miniconda3/envs/decpy/bin/python3 # Path to python executable of the environment | conda recommended
10 | machines=1 # number of machines in the runtime
11 | iterations=80
12 | test_after=20
13 | eval_file=testingEL_Local.py # decentralized driver code (run on each machine)
14 | log_level=INFO # DEBUG | INFO | WARN | CRITICAL
15 | 
16 | m=0 # machine id corresponding consistent with ip.json
17 | echo M is $m
18 | 
19 | procs_per_machine=16 # 16 processes on 1 machine
20 | echo procs per machine is $procs_per_machine
21 | 
22 | log_dir=$run_path/$(date '+%Y-%m-%dT%H:%M')/machine$m # in the eval folder
23 | mkdir -p $log_dir
24 | 
25 | $env_python $eval_file -ro 0 -tea $test_after -ld $log_dir -mid $m -ps $procs_per_machine -ms $machines -is $iterations -gf $run_path/$graph -ta $test_after -cf $run_path/$config_file -ll $log_level -wsd $log_dir


--------------------------------------------------------------------------------
/tutorial/EpidemicLearning/run_el-oracle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | decpy_path=../../eval # Path to eval folder
 4 | graph=fullyConnected_16.edges # Absolute path of the graph file generated using the generate_graph.py script
 5 | run_path=../../eval/data # Path to the folder where the graph and config file will be copied and the results will be stored
 6 | config_file=config_EL.ini
 7 | cp $graph $config_file $run_path
 8 | 
 9 | env_python=~/miniconda3/envs/decpy/bin/python3 # Path to python executable of the environment | conda recommended
10 | machines=1 # number of machines in the runtime
11 | iterations=80
12 | test_after=20
13 | eval_file=testingEL_Oracle.py # decentralized driver code (run on each machine)
14 | log_level=INFO # DEBUG | INFO | WARN | CRITICAL
15 | 
16 | m=0 # machine id corresponding consistent with ip.json
17 | echo M is $m
18 | 
19 | procs_per_machine=16 # 16 processes on 1 machine
20 | echo procs per machine is $procs_per_machine
21 | 
22 | log_dir=$run_path/$(date '+%Y-%m-%dT%H:%M')/machine$m # in the eval folder
23 | mkdir -p $log_dir
24 | 
25 | $env_python $eval_file -ro 0 -tea $test_after -ld $log_dir -mid $m -ps $procs_per_machine -ms $machines -is $iterations -gf $run_path/$graph -ta $test_after -cf $run_path/$config_file -ll $log_level -wsd $log_dir


--------------------------------------------------------------------------------
/eval/step_configs/config_cifar_jwins.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.CIFAR10
 3 | dataset_class = CIFAR10
 4 | model_class = LeNet
 5 | train_dir = /mnt/nfs/shared/CIFAR
 6 | test_dir = /mnt/nfs/shared/CIFAR
 7 | ; python list of fractions below
 8 | sizes =
 9 | random_seed = 99
10 | partition_niid = dirichlet
11 | alpha = 1
12 | 
13 | [OPTIMIZER_PARAMS]
14 | optimizer_package = torch.optim
15 | optimizer_class = SGD
16 | lr = 0.01
17 | 
18 | [TRAIN_PARAMS]
19 | training_package = decentralizepy.training.Training
20 | training_class = Training
21 | rounds = 3
22 | full_epochs = False
23 | batch_size = 8
24 | shuffle = True
25 | loss_package = torch.nn
26 | loss_class = CrossEntropyLoss
27 | 
28 | [COMMUNICATION]
29 | comm_package = decentralizepy.communication.TCP
30 | comm_class = TCP
31 | addresses_filepath = ip_addr_6Machines.json
32 | 
33 | [SHARING]
34 | sharing_package = decentralizepy.sharing.JWINS.JWINS
35 | sharing_class = JWINS
36 | change_based_selection = True
37 | alpha_list = [0.1,0.15,0.2,0.25,0.3,0.4,1.0]
38 | wavelet=sym2
39 | level= 4
40 | accumulation = True
41 | accumulate_averaging_changes = True
42 | metadata_cap = 0.5
43 | compression_package = decentralizepy.compression.EliasFpzip
44 | compression_class = EliasFpzip
45 | compress = True
46 | 


--------------------------------------------------------------------------------
/tutorial/run_federated.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | decpy_path=../eval # Path to eval folder
 4 | graph=regular_16.txt # Absolute path of the graph file
 5 | run_path=../eval/data # Path to the folder where the graph and config file will be copied and the results will be stored
 6 | config_file=config.ini
 7 | cp $graph $config_file $run_path
 8 | 
 9 | env_python=~/miniconda3/envs/decpy/bin/python3 # Path to python executable of the environment | conda recommended
10 | machines=1 # number of machines in the runtime
11 | iterations=80
12 | test_after=20
13 | eval_file=$decpy_path/testingFederated.py # decentralized driver code (run on each machine)
14 | log_level=INFO # DEBUG | INFO | WARN | CRITICAL
15 | 
16 | server_rank=-1
17 | server_machine=0
18 | working_rate=0.5
19 | 
20 | m=0 # machine id corresponding consistent with ip.json
21 | echo M is $m
22 | 
23 | procs_per_machine=16 # 16 processes on 1 machine
24 | echo procs per machine is $procs_per_machine
25 | 
26 | log_dir=$run_path/$(date '+%Y-%m-%dT%H:%M')/machine$m # in the eval folder
27 | mkdir -p $log_dir
28 | 
29 | $env_python $eval_file -ro 0 -tea $test_after -ld $log_dir -mid $m -ps $procs_per_machine -ms $machines -is $iterations -gf $run_path/$graph -ta $test_after -cf $run_path/$config_file -ll $log_level -sm $server_machine -sr $server_rank -wr $working_rate


--------------------------------------------------------------------------------
/eval/step_configs/config_shakespeare_jwins.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Shakespeare
 3 | dataset_class = Shakespeare
 4 | random_seed = 97
 5 | model_class = LSTM
 6 | train_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/data/test
 8 | ; python list of fractions below
 9 | sizes =
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.1
15 | 
16 | [TRAIN_PARAMS]
17 | training_package = decentralizepy.training.Training
18 | training_class = Training
19 | rounds = 10
20 | full_epochs = False
21 | batch_size = 16
22 | shuffle = True
23 | loss_package = torch.nn
24 | loss_class = CrossEntropyLoss
25 | 
26 | [COMMUNICATION]
27 | comm_package = decentralizepy.communication.TCP
28 | comm_class = TCP
29 | addresses_filepath = ip_addr_6Machines.json
30 | 
31 | [SHARING]
32 | sharing_package = decentralizepy.sharing.JWINS.JWINS
33 | sharing_class = JWINS
34 | change_based_selection = True
35 | alpha_list = [0.1,0.15,0.2,0.25,0.3,0.4,1.0]
36 | wavelet=sym2
37 | level= 4
38 | accumulation = True
39 | accumulate_averaging_changes = True
40 | metadata_cap = 0.5
41 | compression_package = decentralizepy.compression.EliasFpzip
42 | compression_class = EliasFpzip
43 | compress = True
44 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_femnist_jwins.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Femnist
 3 | dataset_class = Femnist
 4 | random_seed = 97
 5 | model_class = CNN
 6 | train_dir = /mnt/nfs/shared/leaf/data/femnist/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/femnist/data/test
 8 | ; python list of fractions below
 9 | sizes = 
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.001
15 | 
16 | # There are 734463 femnist samples
17 | [TRAIN_PARAMS]
18 | training_package = decentralizepy.training.Training
19 | training_class = Training
20 | rounds = 47
21 | full_epochs = False
22 | batch_size = 16
23 | shuffle = True
24 | loss_package = torch.nn
25 | loss_class = CrossEntropyLoss
26 | 
27 | [COMMUNICATION]
28 | comm_package = decentralizepy.communication.TCP
29 | comm_class = TCP
30 | addresses_filepath = ip_addr_6Machines.json
31 | 
32 | [SHARING]
33 | sharing_package = decentralizepy.sharing.JWINS.JWINS
34 | sharing_class = JWINS
35 | change_based_selection = True
36 | alpha_list = [0.1,0.15,0.2,0.25,0.3,0.4,1.0]
37 | wavelet=sym2
38 | level= 4
39 | accumulation = True
40 | accumulate_averaging_changes = True
41 | metadata_cap = 0.5
42 | compression_package = decentralizepy.compression.EliasFpzip
43 | compression_class = EliasFpzip
44 | compress = True
45 | 


--------------------------------------------------------------------------------
/eval/step_configs/config_celeba_jwins.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.Celeba
 3 | dataset_class = Celeba
 4 | model_class = CNN
 5 | images_dir = /mnt/nfs/shared/leaf/data/celeba/data/raw/img_align_celeba
 6 | train_dir = /mnt/nfs/shared/leaf/data/celeba/per_user_data/train
 7 | test_dir = /mnt/nfs/shared/leaf/data/celeba/data/test
 8 | ; python list of fractions below
 9 | sizes = 
10 | 
11 | [OPTIMIZER_PARAMS]
12 | optimizer_package = torch.optim
13 | optimizer_class = SGD
14 | lr = 0.001
15 | 
16 | [TRAIN_PARAMS]
17 | training_package = decentralizepy.training.Training
18 | training_class = Training
19 | rounds = 4
20 | full_epochs = False
21 | batch_size = 16
22 | shuffle = True
23 | loss_package = torch.nn
24 | loss_class = CrossEntropyLoss
25 | 
26 | [COMMUNICATION]
27 | comm_package = decentralizepy.communication.TCP
28 | comm_class = TCP
29 | addresses_filepath = ip_addr_6Machines.json
30 | 
31 | [SHARING]
32 | sharing_package = decentralizepy.sharing.JWINS.JWINS
33 | sharing_class = JWINS
34 | change_based_selection = True
35 | alpha_list = [0.1,0.15,0.2,0.25,0.3,0.4,1.0]
36 | wavelet=sym2
37 | level= 4
38 | accumulation = True
39 | accumulate_averaging_changes = True
40 | metadata_cap = 0.5
41 | compression_package = decentralizepy.compression.EliasFpzip
42 | compression_class = EliasFpzip
43 | compress = True
44 | 
45 | 


--------------------------------------------------------------------------------
/src/decentralizepy/compression/EliasFpzip.py:
--------------------------------------------------------------------------------
 1 | # elias implementation: taken from this stack overflow post:
 2 | # https://stackoverflow.com/questions/62843156/python-fast-compression-of-large-amount-of-numbers-with-elias-gamma
 3 | import fpzip
 4 | 
 5 | from decentralizepy.compression.Elias import Elias
 6 | 
 7 | 
 8 | class EliasFpzip(Elias):
 9 |     """
10 |     Compression API
11 | 
12 |     """
13 | 
14 |     def __init__(self, *args, **kwargs):
15 |         """
16 |         Constructor
17 |         """
18 | 
19 |     def compress_float(self, arr):
20 |         """
21 |         compression function for float arrays
22 | 
23 |         Parameters
24 |         ----------
25 |         arr : np.ndarray
26 |             Data to compress
27 | 
28 |         Returns
29 |         -------
30 |         bytearray
31 |             encoded data as bytes
32 | 
33 |         """
34 |         return fpzip.compress(arr, precision=0, order="C")
35 | 
36 |     def decompress_float(self, bytes):
37 |         """
38 |         decompression function for compressed float arrays
39 | 
40 |         Parameters
41 |         ----------
42 |         bytes :bytearray
43 |             compressed data
44 | 
45 |         Returns
46 |         -------
47 |         arr : np.ndarray
48 |             decompressed data as array
49 | 
50 |         """
51 |         return fpzip.decompress(bytes, order="C").squeeze()
52 | 


--------------------------------------------------------------------------------
/src/decentralizepy/datasets/text/LLMData.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from decentralizepy.datasets.Data import Data
 4 | 
 5 | 
 6 | class LLMData(Data):
 7 |     """
 8 |     This class defines the API for Data.
 9 | 
10 |     """
11 | 
12 |     def __init__(self, x, y):
13 |         """
14 |         Constructor
15 | 
16 |         Parameters
17 |         ----------
18 |         x : numpy array
19 |             A numpy array of data samples
20 |         y : numpy array
21 |             A numpy array of outputs corresponding to the sample
22 | 
23 |         """
24 |         self.x = x
25 |         self.y = y
26 | 
27 |     def __len__(self):
28 |         """
29 |         Return the number of samples in the dataset
30 | 
31 |         Returns
32 |         -------
33 |         int
34 |             Number of samples
35 | 
36 |         """
37 |         return len(self.y)
38 | 
39 |     def __getitem__(self, idx):
40 |         """
41 |         Function to get the item with index i.
42 | 
43 |         Parameters
44 |         ----------
45 |         idx : int
46 |             Index
47 | 
48 |         Returns
49 |         -------
50 |         dict
51 |             A dict of the ith data sample, its attention_mask and label
52 | 
53 |         """
54 |         item = {key: torch.tensor(val[idx]) for key, val in self.x.items()}
55 |         item["labels"] = torch.tensor(self.y[idx])
56 |         return item
57 | 


--------------------------------------------------------------------------------
/tutorial/EpidemicLearning/config_EL.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.CIFAR10
 3 | dataset_class = CIFAR10
 4 | model_class = LeNet
 5 | ; provide directory containing "cifar-10-batches-py" folder | Pre-download recommended
 6 | ; New download does not work with multiple processes | Crashes the first time, just retry
 7 | train_dir = ../../eval/data/
 8 | test_dir = ../../eval/data/
 9 | ; python list of fractions below
10 | sizes = 
11 | random_seed = 90
12 | partition_niid = dirichlet
13 | alpha = 0.1 ; alpha (dirichlet parameter)
14 | 
15 | [OPTIMIZER_PARAMS]
16 | optimizer_package = torch.optim
17 | optimizer_class = SGD
18 | lr = 0.05 ; gamma
19 | 
20 | [TRAIN_PARAMS]
21 | training_package = decentralizepy.training.Training
22 | training_class = Training
23 | rounds = 10 ; r
24 | full_epochs = False
25 | batch_size = 5 ; b
26 | shuffle = True
27 | loss_package = torch.nn
28 | loss_class = CrossEntropyLoss
29 | 
30 | [COMMUNICATION]
31 | comm_package = decentralizepy.communication.TCP
32 | comm_class = TCP
33 | addresses_filepath = ip.json
34 | 
35 | [SHARING]
36 | sharing_package = decentralizepy.sharing.PlainAverageSharing ; Does not use Metropolis-Hastings
37 | sharing_class = PlainAverageSharing
38 | compress = False
39 | 
40 | [NODE]
41 | graph_degree = 7 ; s (number of neighbors in EL-Oracle and number of random neighbors picked to send message to in EL-Local)


--------------------------------------------------------------------------------
/tutorial/config.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.CIFAR10
 3 | dataset_class = CIFAR10
 4 | model_class = LeNet
 5 | ; provide directory containing "cifar-10-batches-py" folder | Pre-download recommended
 6 | ; New download does not work with multiple processes | Crashes the first time, just retry
 7 | train_dir = ../eval/data/
 8 | test_dir = ../eval/data/
 9 | ; python list of fractions below
10 | sizes = 
11 | random_seed = 90
12 | partition_niid = False
13 | shards = 4
14 | validation_source = Test
15 | ; Train or Test set used to extract the validation set only on CIFAR-10 and FEMNIST
16 | ; On FEMNIST if the validation set is extracted from the test set is the same for all the clients
17 | validation_size = 0.1
18 | ; fraction of the train or test set used as validation set, implemented only on CIFAR-10 and FEMNIST dataset
19 | 
20 | 
21 | [OPTIMIZER_PARAMS]
22 | optimizer_package = torch.optim
23 | optimizer_class = SGD
24 | lr = 0.01
25 | 
26 | [TRAIN_PARAMS]
27 | training_package = decentralizepy.training.Training
28 | training_class = Training
29 | rounds = 3
30 | full_epochs = False
31 | batch_size = 8
32 | shuffle = True
33 | loss_package = torch.nn
34 | loss_class = CrossEntropyLoss
35 | 
36 | [COMMUNICATION]
37 | comm_package = decentralizepy.communication.TCP
38 | comm_class = TCP
39 | addresses_filepath = ../tutorial/ip.json
40 | 
41 | [SHARING]
42 | sharing_package = decentralizepy.sharing.Sharing
43 | sharing_class = Sharing
44 | 


--------------------------------------------------------------------------------
/eval/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | script_path=$(realpath $(dirname $0))
 3 | 
 4 | # Working directory, where config files are read from and logs are written.
 5 | decpy_path=/mnt/nfs/$(whoami)/decpy_workingdir
 6 | cd $decpy_path
 7 | 
 8 | # Python interpreter
 9 | env_python=python3
10 | 
11 | # File regular_16.txt is available in /tutorial
12 | graph=$decpy_path/regular_16.txt
13 | 
14 | # File config_celeba_sharing.ini is available in /tutorial
15 | # In this config file, change addresses_filepath to correspond to your list of machines (example in /tutorial/ip.json)
16 | original_config=$decpy_path/config_celeba_sharing.ini
17 | 
18 | # Local config file
19 | config_file=/tmp/$(basename $original_config)
20 | 
21 | # Python script to be executed
22 | eval_file=$script_path/testingPeerSampler.py
23 | 
24 | # General parameters
25 | procs_per_machine=8
26 | machines=2
27 | iterations=5
28 | test_after=2
29 | log_level=INFO
30 | 
31 | m=`cat $(grep addresses_filepath $original_config | awk '{print $3}') | grep $(/sbin/ifconfig ens785 | grep 'inet ' | awk '{print $2}') | cut -d'"' -f2`
32 | echo M is $m
33 | log_dir=$(date '+%Y-%m-%dT%H:%M')/machine$m
34 | mkdir -p $log_dir
35 | 
36 | # Copy and manipulate the local config file
37 | cp $original_config $config_file
38 | # echo "alpha = 0.10" >> $config_file
39 | 
40 | $env_python $eval_file -ro 0 -tea $test_after -ld $log_dir -mid $m -ps $procs_per_machine -ms $machines -is $iterations -gf $graph -ta $test_after -cf $config_file -ll $log_level -wsd $log_dir
41 | 


--------------------------------------------------------------------------------
/tutorial/JWINS/config.ini:
--------------------------------------------------------------------------------
 1 | [DATASET]
 2 | dataset_package = decentralizepy.datasets.CIFAR10
 3 | dataset_class = CIFAR10
 4 | model_class = LeNet
 5 | ; provide directory containing "cifar-10-batches-py" folder | Pre-download recommended
 6 | ; New download does not work with multiple processes | Crashes the first time, just retry
 7 | train_dir = ../../eval/data/
 8 | test_dir = ../../eval/data/
 9 | ; python list of fractions below
10 | sizes = 
11 | random_seed = 90
12 | partition_niid = kshard
13 | shards = 4
14 | 
15 | [OPTIMIZER_PARAMS]
16 | optimizer_package = torch.optim
17 | optimizer_class = SGD
18 | lr = 0.01
19 | 
20 | [TRAIN_PARAMS]
21 | training_package = decentralizepy.training.Training
22 | training_class = Training
23 | rounds = 3
24 | full_epochs = False
25 | batch_size = 8
26 | shuffle = True
27 | loss_package = torch.nn
28 | loss_class = CrossEntropyLoss
29 | 
30 | [COMMUNICATION]
31 | comm_package = decentralizepy.communication.TCP
32 | comm_class = TCP
33 | addresses_filepath = ip.json
34 | 
35 | ; [SHARING]
36 | ; sharing_package = decentralizepy.sharing.Sharing
37 | ; sharing_class = Sharing
38 | 
39 | [SHARING]
40 | sharing_package = decentralizepy.sharing.JWINS.JWINS
41 | sharing_class = JWINS
42 | change_based_selection = True
43 | alpha_list = [0.1,0.15,0.2,0.25,0.3,0.4,1.0]
44 | wavelet=sym2
45 | level= 4
46 | accumulation = True
47 | accumulate_averaging_changes = True
48 | metadata_cap = 0.5
49 | compression_package = decentralizepy.compression.EliasFpzip
50 | compression_class = EliasFpzip
51 | compress = True
52 | 


--------------------------------------------------------------------------------
/src/decentralizepy/compression/EliasFpzipLossy.py:
--------------------------------------------------------------------------------
 1 | # elias implementation: taken from this stack overflow post:
 2 | # https://stackoverflow.com/questions/62843156/python-fast-compression-of-large-amount-of-numbers-with-elias-gamma
 3 | import fpzip
 4 | 
 5 | from decentralizepy.compression.Elias import Elias
 6 | 
 7 | 
 8 | class EliasFpzipLossy(Elias):
 9 |     """
10 |     Compression API
11 | 
12 |     """
13 | 
14 |     def __init__(self, float_precision=16, *args, **kwargs):
15 |         """
16 |         Constructor
17 | 
18 |         Parameters
19 |         ----------
20 |         float_precision : int, optional
21 |             Precision of the compression, by default 16
22 | 
23 |         """
24 |         self.float_precision = float_precision
25 | 
26 |     def compress_float(self, arr):
27 |         """
28 |         compression function for float arrays
29 | 
30 |         Parameters
31 |         ----------
32 |         arr : np.ndarray
33 |             Data to compress
34 | 
35 |         Returns
36 |         -------
37 |         bytearray
38 |             encoded data as bytes
39 | 
40 |         """
41 |         return fpzip.compress(arr, precision=self.float_precision, order="C")
42 | 
43 |     def decompress_float(self, bytes):
44 |         """
45 |         decompression function for compressed float arrays
46 | 
47 |         Parameters
48 |         ----------
49 |         bytes :bytearray
50 |             compressed data
51 | 
52 |         Returns
53 |         -------
54 |         arr : np.ndarray
55 |             decompressed data as array
56 | 
57 |         """
58 |         return fpzip.decompress(bytes, order="C").squeeze()
59 | 


--------------------------------------------------------------------------------
/src/decentralizepy/compression/Compression.py:
--------------------------------------------------------------------------------
 1 | class Compression:
 2 |     """
 3 |     Compression API
 4 | 
 5 |     """
 6 | 
 7 |     def __init__(self, *args, **kwargs):
 8 |         """
 9 |         Constructor
10 |         """
11 | 
12 |     def compress(self, arr):
13 |         """
14 |         compression function
15 | 
16 |         Parameters
17 |         ----------
18 |         arr : np.ndarray
19 |             Data to compress
20 | 
21 |         Returns
22 |         -------
23 |         bytearray
24 |             encoded data as bytes
25 | 
26 |         """
27 |         return arr
28 | 
29 |     def decompress(self, bytes):
30 |         """
31 |         decompression function
32 | 
33 |         Parameters
34 |         ----------
35 |         bytes :bytearray
36 |             compressed data
37 | 
38 |         Returns
39 |         -------
40 |         arr : np.ndarray
41 |             decompressed data as array
42 | 
43 |         """
44 |         return bytes
45 | 
46 |     def compress_float(self, arr):
47 |         """
48 |         compression function for float arrays
49 | 
50 |         Parameters
51 |         ----------
52 |         arr : np.ndarray
53 |             Data to compress
54 | 
55 |         Returns
56 |         -------
57 |         bytearray
58 |             encoded data as bytes
59 | 
60 |         """
61 |         return arr
62 | 
63 |     def decompress_float(self, bytes):
64 |         """
65 |         decompression function for compressed float arrays
66 | 
67 |         Parameters
68 |         ----------
69 |         bytes :bytearray
70 |             compressed data
71 | 
72 |         Returns
73 |         -------
74 |         arr : np.ndarray
75 |             decompressed data as array
76 | 
77 |         """
78 |         return bytes
79 | 


--------------------------------------------------------------------------------
/eval/run_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | nfs_home=$1
 3 | python_bin=$2
 4 | decpy_path=$nfs_home/decentralizepy/eval
 5 | cd $decpy_path
 6 | 
 7 | env_python=$python_bin/python3
 8 | graph=96_regular.edges #4_node_fullyConnected.edges
 9 | config_file=~/tmp/config.ini
10 | procs_per_machine=16
11 | machines=6
12 | iterations=5
13 | train_evaluate_after=5
14 | test_after=21 # we do not test
15 | eval_file=testing.py
16 | log_level=INFO
17 | 
18 | ip_machines=$nfs_home/configs/ip_addr_6Machines.json
19 | 
20 | m=`cat $ip_machines | grep $(/sbin/ifconfig ens785 | grep 'inet ' | awk '{print $2}') | cut -d'"' -f2`
21 | export PYTHONFAULTHANDLER=1
22 | tests=("step_configs/config_celeba_partialmodel.ini" "step_configs/config_celeba_sharing.ini" "step_configs/config_celeba_fft.ini" "step_configs/config_celeba_wavelet.ini"
23 | "step_configs/config_celeba_grow.ini" "step_configs/config_celeba_manualadapt.ini" "step_configs/config_celeba_randomalpha.ini"
24 | "step_configs/config_celeba_randomalphainc.ini" "step_configs/config_celeba_roundrobin.ini" "step_configs/config_celeba_subsampling.ini"
25 | "step_configs/config_celeba_topkrandom.ini" "step_configs/config_celeba_topkacc.ini" "step_configs/config_celeba_topkparam.ini")
26 | 
27 | for i in "${tests[@]}"
28 | do
29 |   echo $i
30 |   IFS='_' read -ra NAMES <<< $i
31 |   IFS='.' read -ra NAME <<< ${NAMES[-1]}
32 |   log_dir=$nfs_home/logs/testing/${NAME[0]}$(date '+%Y-%m-%dT%H:%M')/machine$m
33 |   mkdir -p $log_dir
34 |   cp $i $config_file
35 |   $python_bin/crudini --set $config_file COMMUNICATION addresses_filepath $ip_machines
36 |   $env_python $eval_file -ro 0 -tea $train_evaluate_after -ld $log_dir -mid $m -ps $procs_per_machine -ms $machines -is $iterations -gf $graph -ta $test_after -cf $config_file -ll $log_level
37 |   echo $i is done
38 |   sleep 3
39 |   echo end of sleep
40 | done
41 | 


--------------------------------------------------------------------------------
/eval/plot_shared.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | from pathlib import Path
 5 | 
 6 | import numpy as np
 7 | from matplotlib import pyplot as plt
 8 | 
 9 | 
10 | def plot(x, y, label, *args):
11 |     plt.plot(x, y, *args, label=label)
12 |     plt.legend()
13 | 
14 | 
15 | def plot_shared(path, title):
16 |     model_path = os.path.join(path, "plots")
17 |     Path(model_path).mkdir(parents=True, exist_ok=True)
18 |     files = [f for f in os.listdir(path) if f.endswith("json")]
19 |     assert len(files) > 0
20 |     for i, file in enumerate(files):
21 |         filepath = os.path.join(path, file)
22 |         with open(filepath, "r") as inf:
23 |             model_vec = json.load(inf)
24 |             del model_vec["order"]
25 |             if i == 0:
26 |                 total_params = 0
27 |                 for l in model_vec["shapes"].values():
28 |                     current_params = 1
29 |                     for v in l:
30 |                         current_params *= v
31 |                     total_params += current_params
32 |                 print("Total Params: ", str(total_params))
33 |                 shared_count = np.zeros(total_params, dtype=int)
34 |             del model_vec["shapes"]
35 |             model_vec = np.array(model_vec[list(model_vec.keys())[0]])
36 |         shared_count[model_vec] += 1
37 |     print("sum: ", np.sum(shared_count))
38 |     num_elements = shared_count.shape[0]
39 |     x_axis = np.arange(1, num_elements + 1)
40 |     plt.clf()
41 |     plt.title(title)
42 |     plot(x_axis, shared_count, "unsorted", ".")
43 |     shared_count = np.sort(shared_count)
44 |     plot(x_axis, shared_count, "sorted")
45 |     plt.savefig(os.path.join(model_path, "shared_plot.png"))
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     assert len(sys.argv) == 2
50 |     plot_shared(sys.argv[1], "Shared Parameters")
51 | 


--------------------------------------------------------------------------------
/src/decentralizepy/mappings/Mapping.py:
--------------------------------------------------------------------------------
 1 | class Mapping:
 2 |     """
 3 |     This class defines the bidirectional mapping between:
 4 |         1. The unique identifier
 5 |         2. machine_id and rank
 6 | 
 7 |     """
 8 | 
 9 |     def __init__(self, n_procs):
10 |         """
11 |         Constructor
12 | 
13 |         Parameters
14 |         ----------
15 |         n_procs : int
16 |             Total number of processes
17 | 
18 |         """
19 |         self.n_procs = n_procs
20 | 
21 |     def get_n_procs(self):
22 |         """
23 |         Gives the global sum of all processes that are spawned on the machines
24 | 
25 |         Returns
26 |         -------
27 |         int
28 |             the number of global processes
29 | 
30 |         """
31 | 
32 |         return self.n_procs
33 | 
34 |     def get_uid(self, rank: int, machine_id: int):
35 |         """
36 |         Gives the global unique identifier of the node
37 | 
38 |         Parameters
39 |         ----------
40 |         rank : int
41 |             Node's rank on its machine
42 |         machine_id : int
43 |             node's machine in the cluster
44 | 
45 |         Returns
46 |         -------
47 |         int
48 |             the unique identifier
49 | 
50 |         """
51 | 
52 |         raise NotImplementedError
53 | 
54 |     def get_machine_and_rank(self, uid: int):
55 |         """
56 |         Gives the rank and machine_id of the node
57 | 
58 |         Parameters
59 |         ----------
60 |         uid : int
61 |             globally unique identifier of the node
62 | 
63 |         Returns
64 |         -------
65 |         2-tuple
66 |             a tuple of rank and machine_id
67 | 
68 |         """
69 | 
70 |         raise NotImplementedError
71 | 
72 |     def get_local_procs_count(self):
73 |         """
74 |         Gives number of processes that run on the node
75 | 
76 |         Returns
77 |         -------
78 |         int
79 |             the number of local processes
80 | 
81 |         """
82 | 
83 |         raise NotImplementedError
84 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = decentralizepy
 3 | version = 0.1.dev0
 4 | author = Rishi Sharma
 5 | author_email = rishi.sharma@epfl.ch
 6 | license = MIT
 7 | description = A framework to write decentralized machine learning applications
 8 | keywords =
 9 |     python
10 |     decentralized
11 |     ml
12 |     learning
13 |     sacs
14 | url = https://rishisharma.netlify.app
15 | download_url = https://gitlab.epfl.ch/risharma/decentralizepy
16 | long_description = file: README.rst
17 | classifiers =
18 |         Development Status :: 4 - Beta
19 |         Intended Audience :: Education
20 |         Intended Audience :: Science/Research
21 |         License :: OSI Approved :: MIT License
22 |         Operating System :: OS Independent
23 |         Programming Language :: Python
24 |         Programming Language :: Python :: 3
25 |         Programming Language :: Python :: 3.6
26 |         Programming Language :: Python :: 3.7
27 |         Programming Language :: Python :: 3.8
28 |         Topic :: Scientific/Engineering
29 | [options]
30 | package_dir =
31 |     = src
32 | packages = find:
33 | zip_safe = False
34 | install_requires =
35 |         numpy
36 |         torch
37 |         torchvision
38 |         matplotlib
39 |         networkx
40 |         zmq
41 |         jsonlines
42 |         pillow
43 |         smallworld
44 |         localconfig
45 |         PyWavelets
46 |         pandas
47 |         crudini
48 |         scikit-learn
49 |         lz4
50 |         fpzip
51 | include_package_data = True
52 | python_requires = >=3.6
53 | [options.packages.find]
54 | where = src
55 | [options.extras_require]
56 | dev =
57 |         black>22.3.0
58 |         coverage
59 |         isort
60 |         pytest
61 |         pytest-xdist
62 |         pytest-cov<2.6.0
63 |         pycodestyle
64 |         sphinx
65 |         alabaster
66 |         tox
67 | [tool:pytest]
68 | norecursedirs =
69 |     .git
70 |     dist
71 |     build
72 | python_files =
73 |     test_*.py
74 | doctest_plus = disabled
75 | addopts = --strict
76 | markers =
77 |     slow
78 |     remote_data
79 |     filterwarnings
80 |     mpl_image_compare
81 | [flake8]
82 | ignore = E203, E266, E501, W503
83 | max-line-length = 80
84 | max-complexity = 18
85 | select = B,C,E,F,W,T4,B9
86 | 


--------------------------------------------------------------------------------
/eval/testingKNN.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from shutil import copy
 4 | 
 5 | from localconfig import LocalConfig
 6 | from torch import multiprocessing as mp
 7 | 
 8 | from decentralizepy import utils
 9 | from decentralizepy.graphs.Graph import Graph
10 | from decentralizepy.mappings.Linear import Linear
11 | from decentralizepy.node.KNN import KNN
12 | 
13 | 
14 | def read_ini(file_path):
15 |     config = LocalConfig(file_path)
16 |     for section in config:
17 |         print("Section: ", section)
18 |         for key, value in config.items(section):
19 |             print((key, value))
20 |     print(dict(config.items("DATASET")))
21 |     return config
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     args = utils.get_args()
26 | 
27 |     Path(args.log_dir).mkdir(parents=True, exist_ok=True)
28 | 
29 |     log_level = {
30 |         "INFO": logging.INFO,
31 |         "DEBUG": logging.DEBUG,
32 |         "WARNING": logging.WARNING,
33 |         "ERROR": logging.ERROR,
34 |         "CRITICAL": logging.CRITICAL,
35 |     }
36 | 
37 |     config = read_ini(args.config_file)
38 |     my_config = dict()
39 |     for section in config:
40 |         my_config[section] = dict(config.items(section))
41 | 
42 |     copy(args.config_file, args.log_dir)
43 |     copy(args.graph_file, args.log_dir)
44 |     utils.write_args(args, args.log_dir)
45 | 
46 |     g = Graph()
47 |     g.read_graph_from_file(args.graph_file, args.graph_type)
48 |     n_machines = args.machines
49 |     procs_per_machine = args.procs_per_machine[0]
50 |     l = Linear(n_machines, procs_per_machine)
51 |     m_id = args.machine_id
52 | 
53 |     processes = []
54 |     for r in range(procs_per_machine):
55 |         processes.append(
56 |             mp.Process(
57 |                 target=KNN,
58 |                 args=[
59 |                     r,
60 |                     m_id,
61 |                     l,
62 |                     g,
63 |                     my_config,
64 |                     args.iterations,
65 |                     args.log_dir,
66 |                     args.weights_store_dir,
67 |                     log_level[args.log_level],
68 |                     args.test_after,
69 |                     args.train_evaluate_after,
70 |                     args.reset_optimizer,
71 |                 ],
72 |             )
73 |         )
74 | 
75 |     for p in processes:
76 |         p.start()
77 | 
78 |     for p in processes:
79 |         p.join()
80 | 


--------------------------------------------------------------------------------
/eval/testingKFN.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from shutil import copy
 4 | 
 5 | from localconfig import LocalConfig
 6 | from torch import multiprocessing as mp
 7 | 
 8 | from decentralizepy import utils
 9 | from decentralizepy.graphs.Graph import Graph
10 | from decentralizepy.mappings.Linear import Linear
11 | from decentralizepy.node.KFNNode import KFNNode
12 | 
13 | 
14 | def read_ini(file_path):
15 |     config = LocalConfig(file_path)
16 |     for section in config:
17 |         print("Section: ", section)
18 |         for key, value in config.items(section):
19 |             print((key, value))
20 |     print(dict(config.items("DATASET")))
21 |     return config
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     args = utils.get_args()
26 | 
27 |     Path(args.log_dir).mkdir(parents=True, exist_ok=True)
28 | 
29 |     log_level = {
30 |         "INFO": logging.INFO,
31 |         "DEBUG": logging.DEBUG,
32 |         "WARNING": logging.WARNING,
33 |         "ERROR": logging.ERROR,
34 |         "CRITICAL": logging.CRITICAL,
35 |     }
36 | 
37 |     config = read_ini(args.config_file)
38 |     my_config = dict()
39 |     for section in config:
40 |         my_config[section] = dict(config.items(section))
41 | 
42 |     copy(args.config_file, args.log_dir)
43 |     copy(args.graph_file, args.log_dir)
44 |     utils.write_args(args, args.log_dir)
45 | 
46 |     g = Graph()
47 |     g.read_graph_from_file(args.graph_file, args.graph_type)
48 |     n_machines = args.machines
49 |     procs_per_machine = args.procs_per_machine[0]
50 |     l = Linear(n_machines, procs_per_machine)
51 |     m_id = args.machine_id
52 | 
53 |     processes = []
54 |     for r in range(procs_per_machine):
55 |         processes.append(
56 |             mp.Process(
57 |                 target=KFNNode,
58 |                 args=[
59 |                     r,
60 |                     m_id,
61 |                     l,
62 |                     g,
63 |                     my_config,
64 |                     args.iterations,
65 |                     args.log_dir,
66 |                     args.weights_store_dir,
67 |                     log_level[args.log_level],
68 |                     args.test_after,
69 |                     args.train_evaluate_after,
70 |                     args.reset_optimizer,
71 |                 ],
72 |             )
73 |         )
74 | 
75 |     for p in processes:
76 |         p.start()
77 | 
78 |     for p in processes:
79 |         p.join()
80 | 


--------------------------------------------------------------------------------
/eval/testing.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from shutil import copy
 4 | 
 5 | from localconfig import LocalConfig
 6 | from torch import multiprocessing as mp
 7 | 
 8 | from decentralizepy import utils
 9 | from decentralizepy.graphs.Graph import Graph
10 | from decentralizepy.mappings.Linear import Linear
11 | from decentralizepy.node.DPSGDNode import DPSGDNode
12 | 
13 | 
14 | def read_ini(file_path):
15 |     config = LocalConfig(file_path)
16 |     for section in config:
17 |         print("Section: ", section)
18 |         for key, value in config.items(section):
19 |             print((key, value))
20 |     print(dict(config.items("DATASET")))
21 |     return config
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     args = utils.get_args()
26 | 
27 |     Path(args.log_dir).mkdir(parents=True, exist_ok=True)
28 | 
29 |     log_level = {
30 |         "INFO": logging.INFO,
31 |         "DEBUG": logging.DEBUG,
32 |         "WARNING": logging.WARNING,
33 |         "ERROR": logging.ERROR,
34 |         "CRITICAL": logging.CRITICAL,
35 |     }
36 | 
37 |     config = read_ini(args.config_file)
38 |     my_config = dict()
39 |     for section in config:
40 |         my_config[section] = dict(config.items(section))
41 | 
42 |     copy(args.config_file, args.log_dir)
43 |     copy(args.graph_file, args.log_dir)
44 |     utils.write_args(args, args.log_dir)
45 | 
46 |     g = Graph()
47 |     g.read_graph_from_file(args.graph_file, args.graph_type)
48 |     n_machines = args.machines
49 |     procs_per_machine = args.procs_per_machine[0]
50 | 
51 |     l = Linear(n_machines, procs_per_machine)
52 |     m_id = args.machine_id
53 | 
54 |     processes = []
55 |     for r in range(procs_per_machine):
56 |         processes.append(
57 |             mp.Process(
58 |                 target=DPSGDNode,
59 |                 args=[
60 |                     r,
61 |                     m_id,
62 |                     l,
63 |                     g,
64 |                     my_config,
65 |                     args.iterations,
66 |                     args.log_dir,
67 |                     args.weights_store_dir,
68 |                     log_level[args.log_level],
69 |                     args.test_after,
70 |                     args.train_evaluate_after,
71 |                     args.reset_optimizer,
72 |                 ],
73 |             )
74 |         )
75 | 
76 |     for p in processes:
77 |         p.start()
78 | 
79 |     for p in processes:
80 |         p.join()
81 | 


--------------------------------------------------------------------------------
/eval/36_nodes.edges:
--------------------------------------------------------------------------------
  1 | 36
  2 | 0 1
  3 | 0 2
  4 | 0 35
  5 | 0 6
  6 | 1 0
  7 | 1 2
  8 | 1 17
  9 | 1 28
 10 | 1 30
 11 | 2 0
 12 | 2 1
 13 | 2 3
 14 | 2 7
 15 | 2 8
 16 | 2 19
 17 | 2 31
 18 | 3 2
 19 | 3 4
 20 | 3 5
 21 | 3 23
 22 | 3 25
 23 | 3 26
 24 | 4 34
 25 | 4 3
 26 | 4 5
 27 | 4 16
 28 | 4 18
 29 | 5 3
 30 | 5 4
 31 | 5 6
 32 | 5 10
 33 | 5 23
 34 | 6 0
 35 | 6 33
 36 | 6 5
 37 | 6 7
 38 | 6 9
 39 | 6 20
 40 | 6 26
 41 | 7 8
 42 | 7 2
 43 | 7 6
 44 | 8 32
 45 | 8 2
 46 | 8 34
 47 | 8 7
 48 | 8 9
 49 | 9 35
 50 | 9 6
 51 | 9 8
 52 | 9 10
 53 | 9 11
 54 | 9 18
 55 | 9 23
 56 | 9 31
 57 | 10 34
 58 | 10 5
 59 | 10 9
 60 | 10 11
 61 | 10 17
 62 | 10 18
 63 | 10 22
 64 | 10 23
 65 | 11 34
 66 | 11 9
 67 | 11 10
 68 | 11 12
 69 | 11 19
 70 | 11 25
 71 | 11 27
 72 | 11 29
 73 | 11 30
 74 | 12 32
 75 | 12 11
 76 | 12 13
 77 | 12 15
 78 | 12 16
 79 | 12 23
 80 | 13 12
 81 | 13 14
 82 | 13 15
 83 | 13 18
 84 | 13 25
 85 | 14 35
 86 | 14 13
 87 | 14 15
 88 | 14 16
 89 | 14 25
 90 | 15 33
 91 | 15 12
 92 | 15 13
 93 | 15 14
 94 | 15 16
 95 | 15 18
 96 | 15 27
 97 | 15 30
 98 | 16 35
 99 | 16 4
100 | 16 12
101 | 16 14
102 | 16 15
103 | 16 17
104 | 17 1
105 | 17 10
106 | 17 16
107 | 17 18
108 | 17 19
109 | 18 32
110 | 18 4
111 | 18 9
112 | 18 10
113 | 18 13
114 | 18 15
115 | 18 17
116 | 18 19
117 | 18 20
118 | 19 2
119 | 19 11
120 | 19 17
121 | 19 18
122 | 19 20
123 | 19 30
124 | 20 35
125 | 20 6
126 | 20 18
127 | 20 19
128 | 20 21
129 | 20 22
130 | 20 27
131 | 21 20
132 | 21 22
133 | 21 23
134 | 21 29
135 | 21 30
136 | 22 10
137 | 22 20
138 | 22 21
139 | 22 23
140 | 22 25
141 | 23 3
142 | 23 5
143 | 23 9
144 | 23 10
145 | 23 12
146 | 23 21
147 | 23 22
148 | 23 24
149 | 23 29
150 | 24 25
151 | 24 23
152 | 25 33
153 | 25 3
154 | 25 35
155 | 25 11
156 | 25 13
157 | 25 14
158 | 25 22
159 | 25 24
160 | 25 26
161 | 25 29
162 | 25 31
163 | 26 27
164 | 26 25
165 | 26 3
166 | 26 6
167 | 27 35
168 | 27 11
169 | 27 15
170 | 27 20
171 | 27 26
172 | 27 28
173 | 28 1
174 | 28 27
175 | 28 29
176 | 29 11
177 | 29 21
178 | 29 23
179 | 29 25
180 | 29 28
181 | 29 30
182 | 30 32
183 | 30 1
184 | 30 11
185 | 30 15
186 | 30 19
187 | 30 21
188 | 30 29
189 | 30 31
190 | 31 32
191 | 31 2
192 | 31 9
193 | 31 25
194 | 31 30
195 | 32 33
196 | 32 8
197 | 32 12
198 | 32 18
199 | 32 30
200 | 32 31
201 | 33 32
202 | 33 34
203 | 33 6
204 | 33 15
205 | 33 25
206 | 34 33
207 | 34 35
208 | 34 4
209 | 34 8
210 | 34 10
211 | 34 11
212 | 35 0
213 | 35 34
214 | 35 9
215 | 35 14
216 | 35 16
217 | 35 20
218 | 35 25
219 | 35 27
220 | 


--------------------------------------------------------------------------------
/eval/testingManual.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from shutil import copy
 4 | 
 5 | from localconfig import LocalConfig
 6 | from torch import multiprocessing as mp
 7 | 
 8 | from decentralizepy import utils
 9 | from decentralizepy.graphs.Graph import Graph
10 | from decentralizepy.mappings.Manual import Manual
11 | from decentralizepy.node.DPSGDNode import DPSGDNode
12 | 
13 | 
14 | def read_ini(file_path):
15 |     config = LocalConfig(file_path)
16 |     for section in config:
17 |         print("Section: ", section)
18 |         for key, value in config.items(section):
19 |             print((key, value))
20 |     print(dict(config.items("DATASET")))
21 |     return config
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     args = utils.get_args()
26 |     Path(args.log_dir).mkdir(parents=True, exist_ok=True)
27 | 
28 |     log_level = {
29 |         "INFO": logging.INFO,
30 |         "DEBUG": logging.DEBUG,
31 |         "WARNING": logging.WARNING,
32 |         "ERROR": logging.ERROR,
33 |         "CRITICAL": logging.CRITICAL,
34 |     }
35 | 
36 |     config = read_ini(args.config_file)
37 |     my_config = dict()
38 |     for section in config:
39 |         my_config[section] = dict(config.items(section))
40 | 
41 |     copy(args.config_file, args.log_dir)
42 |     copy(args.graph_file, args.log_dir)
43 |     utils.write_args(args, args.log_dir)
44 | 
45 |     g = Graph()
46 |     g.read_graph_from_file(args.graph_file, args.graph_type)
47 |     n_machines = args.machines
48 |     procs_per_machine = args.procs_per_machine
49 |     m_id = args.machine_id
50 | 
51 |     l = Manual(n_machines, procs_per_machine, current_machine=m_id)
52 | 
53 |     processes = []
54 |     for r in range(procs_per_machine[m_id]):
55 |         processes.append(
56 |             mp.Process(
57 |                 target=DPSGDNode,
58 |                 args=[
59 |                     r,
60 |                     m_id,
61 |                     l,
62 |                     g,
63 |                     my_config,
64 |                     args.iterations,
65 |                     args.log_dir,
66 |                     args.weights_store_dir,
67 |                     log_level[args.log_level],
68 |                     args.test_after,
69 |                     args.train_evaluate_after,
70 |                     args.reset_optimizer,
71 |                 ],
72 |             )
73 |         )
74 | 
75 |     for p in processes:
76 |         p.start()
77 | 
78 |     for p in processes:
79 |         p.join()
80 | 


--------------------------------------------------------------------------------
/tutorial/EpidemicLearning/testingEL_Local.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from shutil import copy
 4 | 
 5 | from localconfig import LocalConfig
 6 | from torch import multiprocessing as mp
 7 | 
 8 | from decentralizepy import utils
 9 | from decentralizepy.graphs.Graph import Graph
10 | from decentralizepy.mappings.Linear import Linear
11 | from decentralizepy.node.EpidemicLearning.EL_Local import EL_Local
12 | 
13 | 
14 | def read_ini(file_path):
15 |     config = LocalConfig(file_path)
16 |     for section in config:
17 |         print("Section: ", section)
18 |         for key, value in config.items(section):
19 |             print((key, value))
20 |     print(dict(config.items("DATASET")))
21 |     return config
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     args = utils.get_args()
26 | 
27 |     Path(args.log_dir).mkdir(parents=True, exist_ok=True)
28 | 
29 |     log_level = {
30 |         "INFO": logging.INFO,
31 |         "DEBUG": logging.DEBUG,
32 |         "WARNING": logging.WARNING,
33 |         "ERROR": logging.ERROR,
34 |         "CRITICAL": logging.CRITICAL,
35 |     }
36 | 
37 |     config = read_ini(args.config_file)
38 |     my_config = dict()
39 |     for section in config:
40 |         my_config[section] = dict(config.items(section))
41 | 
42 |     copy(args.config_file, args.log_dir)
43 |     copy(args.graph_file, args.log_dir)
44 |     utils.write_args(args, args.log_dir)
45 | 
46 |     g = Graph()
47 |     g.read_graph_from_file(args.graph_file, args.graph_type)
48 |     n_machines = args.machines
49 |     procs_per_machine = args.procs_per_machine[0]
50 | 
51 |     l = Linear(n_machines, procs_per_machine)
52 |     m_id = args.machine_id
53 | 
54 |     processes = []
55 |     for r in range(procs_per_machine):
56 |         processes.append(
57 |             mp.Process(
58 |                 target=EL_Local,
59 |                 args=[
60 |                     r,
61 |                     m_id,
62 |                     l,
63 |                     g,
64 |                     my_config,
65 |                     args.iterations,
66 |                     args.log_dir,
67 |                     args.weights_store_dir,
68 |                     log_level[args.log_level],
69 |                     args.test_after,
70 |                     args.train_evaluate_after,
71 |                     args.reset_optimizer,
72 |                 ],
73 |             )
74 |         )
75 | 
76 |     for p in processes:
77 |         p.start()
78 | 
79 |     for p in processes:
80 |         p.join()
81 | 


--------------------------------------------------------------------------------
/tutorial/EpidemicLearning/fullyConnected_16.edges:
--------------------------------------------------------------------------------
  1 | 16
  2 | 0 1
  3 | 0 2
  4 | 0 3
  5 | 0 4
  6 | 0 5
  7 | 0 6
  8 | 0 7
  9 | 0 8
 10 | 0 9
 11 | 0 10
 12 | 0 11
 13 | 0 12
 14 | 0 13
 15 | 0 14
 16 | 0 15
 17 | 1 0
 18 | 1 2
 19 | 1 3
 20 | 1 4
 21 | 1 5
 22 | 1 6
 23 | 1 7
 24 | 1 8
 25 | 1 9
 26 | 1 10
 27 | 1 11
 28 | 1 12
 29 | 1 13
 30 | 1 14
 31 | 1 15
 32 | 2 0
 33 | 2 1
 34 | 2 3
 35 | 2 4
 36 | 2 5
 37 | 2 6
 38 | 2 7
 39 | 2 8
 40 | 2 9
 41 | 2 10
 42 | 2 11
 43 | 2 12
 44 | 2 13
 45 | 2 14
 46 | 2 15
 47 | 3 0
 48 | 3 1
 49 | 3 2
 50 | 3 4
 51 | 3 5
 52 | 3 6
 53 | 3 7
 54 | 3 8
 55 | 3 9
 56 | 3 10
 57 | 3 11
 58 | 3 12
 59 | 3 13
 60 | 3 14
 61 | 3 15
 62 | 4 0
 63 | 4 1
 64 | 4 2
 65 | 4 3
 66 | 4 5
 67 | 4 6
 68 | 4 7
 69 | 4 8
 70 | 4 9
 71 | 4 10
 72 | 4 11
 73 | 4 12
 74 | 4 13
 75 | 4 14
 76 | 4 15
 77 | 5 0
 78 | 5 1
 79 | 5 2
 80 | 5 3
 81 | 5 4
 82 | 5 6
 83 | 5 7
 84 | 5 8
 85 | 5 9
 86 | 5 10
 87 | 5 11
 88 | 5 12
 89 | 5 13
 90 | 5 14
 91 | 5 15
 92 | 6 0
 93 | 6 1
 94 | 6 2
 95 | 6 3
 96 | 6 4
 97 | 6 5
 98 | 6 7
 99 | 6 8
100 | 6 9
101 | 6 10
102 | 6 11
103 | 6 12
104 | 6 13
105 | 6 14
106 | 6 15
107 | 7 0
108 | 7 1
109 | 7 2
110 | 7 3
111 | 7 4
112 | 7 5
113 | 7 6
114 | 7 8
115 | 7 9
116 | 7 10
117 | 7 11
118 | 7 12
119 | 7 13
120 | 7 14
121 | 7 15
122 | 8 0
123 | 8 1
124 | 8 2
125 | 8 3
126 | 8 4
127 | 8 5
128 | 8 6
129 | 8 7
130 | 8 9
131 | 8 10
132 | 8 11
133 | 8 12
134 | 8 13
135 | 8 14
136 | 8 15
137 | 9 0
138 | 9 1
139 | 9 2
140 | 9 3
141 | 9 4
142 | 9 5
143 | 9 6
144 | 9 7
145 | 9 8
146 | 9 10
147 | 9 11
148 | 9 12
149 | 9 13
150 | 9 14
151 | 9 15
152 | 10 0
153 | 10 1
154 | 10 2
155 | 10 3
156 | 10 4
157 | 10 5
158 | 10 6
159 | 10 7
160 | 10 8
161 | 10 9
162 | 10 11
163 | 10 12
164 | 10 13
165 | 10 14
166 | 10 15
167 | 11 0
168 | 11 1
169 | 11 2
170 | 11 3
171 | 11 4
172 | 11 5
173 | 11 6
174 | 11 7
175 | 11 8
176 | 11 9
177 | 11 10
178 | 11 12
179 | 11 13
180 | 11 14
181 | 11 15
182 | 12 0
183 | 12 1
184 | 12 2
185 | 12 3
186 | 12 4
187 | 12 5
188 | 12 6
189 | 12 7
190 | 12 8
191 | 12 9
192 | 12 10
193 | 12 11
194 | 12 13
195 | 12 14
196 | 12 15
197 | 13 0
198 | 13 1
199 | 13 2
200 | 13 3
201 | 13 4
202 | 13 5
203 | 13 6
204 | 13 7
205 | 13 8
206 | 13 9
207 | 13 10
208 | 13 11
209 | 13 12
210 | 13 14
211 | 13 15
212 | 14 0
213 | 14 1
214 | 14 2
215 | 14 3
216 | 14 4
217 | 14 5
218 | 14 6
219 | 14 7
220 | 14 8
221 | 14 9
222 | 14 10
223 | 14 11
224 | 14 12
225 | 14 13
226 | 14 15
227 | 15 0
228 | 15 1
229 | 15 2
230 | 15 3
231 | 15 4
232 | 15 5
233 | 15 6
234 | 15 7
235 | 15 8
236 | 15 9
237 | 15 10
238 | 15 11
239 | 15 12
240 | 15 13
241 | 15 14
242 | 


--------------------------------------------------------------------------------
/src/decentralizepy/mappings/Linear.py:
--------------------------------------------------------------------------------
 1 | from decentralizepy.mappings.Mapping import Mapping
 2 | 
 3 | 
 4 | class Linear(Mapping):
 5 |     """
 6 |     This class defines the mapping:
 7 |         uid = machine_id * procs_per_machine + rank
 8 | 
 9 |     """
10 | 
11 |     def __init__(
12 |         self, n_machines, procs_per_machine, global_service_machine=0, current_machine=0
13 |     ):
14 |         """
15 |         Constructor
16 | 
17 |         Parameters
18 |         ----------
19 |         n_machines : int
20 |             Number of machines involved in learning
21 |         procs_per_machine : int
22 |             Number of processes spawned per machine
23 |         global_service_machine: int, optional
24 |             Machine ID on which the server/services are hosted
25 |         current_machine: int, optional
26 |             Machine ID of local machine
27 | 
28 |         """
29 |         super().__init__(n_machines * procs_per_machine)
30 |         self.n_machines = n_machines
31 |         self.procs_per_machine = procs_per_machine
32 |         self.global_service_machine = global_service_machine
33 |         self.current_machine = current_machine
34 | 
35 |     def get_uid(self, rank: int, machine_id: int):
36 |         """
37 |         Gives the global unique identifier of the node
38 | 
39 |         Parameters
40 |         ----------
41 |         rank : int
42 |             Node's rank on its machine
43 |         machine_id : int
44 |             node's machine in the cluster
45 | 
46 |         Returns
47 |         -------
48 |         int
49 |             the unique identifier
50 | 
51 |         """
52 |         if rank < 0:
53 |             return rank
54 |         return machine_id * self.procs_per_machine + rank
55 | 
56 |     def get_machine_and_rank(self, uid: int):
57 |         """
58 |         Gives the rank and machine_id of the node
59 | 
60 |         Parameters
61 |         ----------
62 |         uid : int
63 |             globally unique identifier of the node
64 | 
65 |         Returns
66 |         -------
67 |         2-tuple
68 |             a tuple of rank and machine_id
69 | 
70 |         """
71 |         if uid < 0:
72 |             return uid, self.global_service_machine
73 |         return (uid % self.procs_per_machine), (uid // self.procs_per_machine)
74 | 
75 |     def get_local_procs_count(self):
76 |         """
77 |         Gives number of processes that run on the node
78 | 
79 |         Returns
80 |         -------
81 |         int
82 |             the number of local processes
83 | 
84 |         """
85 | 
86 |         return self.procs_per_machine
87 | 


--------------------------------------------------------------------------------
/src/decentralizepy/compression/Lz4Wrapper.py:
--------------------------------------------------------------------------------
  1 | import lz4.frame
  2 | import numpy as np
  3 | 
  4 | from decentralizepy.compression.Compression import Compression
  5 | 
  6 | 
  7 | class Lz4Wrapper(Compression):
  8 |     """
  9 |     Compression API
 10 | 
 11 |     """
 12 | 
 13 |     def __init__(self, compress_metadata=True, compress_data=False, *args, **kwargs):
 14 |         """
 15 |         Constructor
 16 |         """
 17 |         self.compress_metadata = compress_metadata
 18 |         self.compress_data = compress_data
 19 | 
 20 |     def compress(self, arr):
 21 |         """
 22 |         compression function
 23 | 
 24 |         Parameters
 25 |         ----------
 26 |         arr : np.ndarray
 27 |             Data to compress
 28 | 
 29 |         Returns
 30 |         -------
 31 |         bytearray
 32 |             encoded data as bytes
 33 | 
 34 |         """
 35 |         if self.compress_metadata:
 36 |             arr.sort()
 37 |             diff = np.diff(arr, prepend=0).astype(np.int32)
 38 |             to_compress = diff.tobytes("C")
 39 |             return lz4.frame.compress(to_compress)
 40 |         return arr
 41 | 
 42 |     def decompress(self, bytes):
 43 |         """
 44 |         decompression function
 45 | 
 46 |         Parameters
 47 |         ----------
 48 |         bytes :bytearray
 49 |             compressed data
 50 | 
 51 |         Returns
 52 |         -------
 53 |         arr : np.ndarray
 54 |             decompressed data as array
 55 | 
 56 |         """
 57 |         if self.compress_metadata:
 58 |             decomp = lz4.frame.decompress(bytes)
 59 |             return np.cumsum(np.frombuffer(decomp, dtype=np.int32))
 60 |         return bytes
 61 | 
 62 |     def compress_float(self, arr):
 63 |         """
 64 |         compression function for float arrays
 65 | 
 66 |         Parameters
 67 |         ----------
 68 |         arr : np.ndarray
 69 |             Data to compress
 70 | 
 71 |         Returns
 72 |         -------
 73 |         bytearray
 74 |             encoded data as bytes
 75 | 
 76 |         """
 77 |         if self.compress_data:
 78 |             to_compress = arr.tobytes("C")
 79 |             return lz4.frame.compress(to_compress)
 80 |         return arr
 81 | 
 82 |     def decompress_float(self, bytes):
 83 |         """
 84 |         decompression function for compressed float arrays
 85 | 
 86 |         Parameters
 87 |         ----------
 88 |         bytes :bytearray
 89 |             compressed data
 90 | 
 91 |         Returns
 92 |         -------
 93 |         arr : np.ndarray
 94 |             decompressed data as array
 95 | 
 96 |         """
 97 |         if self.compress_data:
 98 |             decomp = lz4.frame.decompress(bytes)
 99 |             return np.frombuffer(decomp, dtype=np.float32)
100 |         return bytes
101 | 


--------------------------------------------------------------------------------
/src/decentralizepy/models/Model.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | from pathlib import Path
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | 
 7 | 
 8 | class Model(nn.Module):
 9 |     """
10 |     This class wraps the torch model
11 |     More fields can be added here
12 | 
13 |     """
14 | 
15 |     def __init__(self):
16 |         """
17 |         Constructor
18 | 
19 |         """
20 |         super().__init__()
21 |         self.model_change = None
22 |         self._param_count_ot = None
23 |         self._param_count_total = None
24 |         self.accumulated_changes = None
25 |         self.shared_parameters_counter = None
26 | 
27 |     def count_params(self, only_trainable=False):
28 |         """
29 |         Counts the total number of params
30 | 
31 |         Parameters
32 |         ----------
33 |         only_trainable : bool
34 |             Counts only parameters with gradients when True
35 | 
36 |         Returns
37 |         -------
38 |         int
39 |             Total number of parameters
40 | 
41 |         """
42 |         if only_trainable:
43 |             if not self._param_count_ot:
44 |                 self._param_count_ot = sum(
45 |                     p.numel() for p in self.parameters() if p.requires_grad
46 |                 )
47 |             return self._param_count_ot
48 |         else:
49 |             if not self._param_count_total:
50 |                 self._param_count_total = sum(p.numel() for p in self.parameters())
51 |             return self._param_count_total
52 | 
53 |     def rewind_accumulation(self, indices):
54 |         """
55 |         resets accumulated_changes at the given indices
56 | 
57 |         Parameters
58 |         ----------
59 |         indices : torch.Tensor
60 |             Tensor that contains indices corresponding to the flatten model
61 | 
62 |         """
63 |         if self.accumulated_changes is not None:
64 |             self.accumulated_changes[indices] = 0.0
65 | 
66 |     def dump_weights(self, directory, uid, round):
67 |         """
68 |         saves the current model as a pt file into the specified direcectory
69 | 
70 |         Parameters
71 |         ----------
72 |         directory : str
73 |             directory in which the weights are dumped
74 |         uid : int
75 |             uid of the node, will be used to give the weight a unique name
76 |         round : int
77 |             current round, will be used to give the weight a unique name
78 | 
79 |         """
80 |         torch.save(self.state_dict(), Path(directory) / f"{round}_weight_{uid}.pt")
81 | 
82 |     def get_weights(self):
83 |         """
84 |         flattens the current weights
85 | 
86 |         """
87 |         with torch.no_grad():
88 |             tensors_to_cat = []
89 |             for _, v in self.state_dict().items():
90 |                 tensors_to_cat.append(v.flatten())
91 |             flat = torch.cat(tensors_to_cat)
92 | 
93 |         return flat
94 | 


--------------------------------------------------------------------------------
/src/decentralizepy/communication/Communication.py:
--------------------------------------------------------------------------------
  1 | class Communication:
  2 |     """
  3 |     Communcation API
  4 | 
  5 |     """
  6 | 
  7 |     def __init__(self, rank, machine_id, mapping, total_procs):
  8 |         """
  9 |         Constructor
 10 | 
 11 |         Parameters
 12 |         ----------
 13 |         rank : int
 14 |             Local rank of the process
 15 |         machine_id : int
 16 |             Machine id of the process
 17 |         mapping : decentralizepy.mappings.Mapping
 18 |             uid, rank, machine_id invertible mapping
 19 |         total_procs : int
 20 |             Total number of processes
 21 | 
 22 |         """
 23 |         self.total_procs = total_procs
 24 |         self.rank = rank
 25 |         self.machine_id = machine_id
 26 |         self.mapping = mapping
 27 |         self.uid = mapping.get_uid(rank, machine_id)
 28 |         self.total_bytes = 0
 29 | 
 30 |     def encrypt(self, data):
 31 |         """
 32 |         Encode/Encrypt data.
 33 | 
 34 |         Parameters
 35 |         ----------
 36 |         data : dict
 37 |             Data dict to send
 38 | 
 39 |         Returns
 40 |         -------
 41 |         byte
 42 |             Encoded data
 43 | 
 44 |         """
 45 |         raise NotImplementedError
 46 | 
 47 |     def decrypt(self, sender, data):
 48 |         """
 49 |         Decodes received data.
 50 | 
 51 |         Parameters
 52 |         ----------
 53 |         sender : byte
 54 |             sender of the data
 55 |         data : byte
 56 |             Data received
 57 | 
 58 |         Returns
 59 |         -------
 60 |         tuple
 61 |             (sender: int, data: dict)
 62 | 
 63 |         """
 64 |         raise NotImplementedError
 65 | 
 66 |     def connect_neighbors(self, neighbors):
 67 |         """
 68 |         Connects all neighbors.
 69 | 
 70 |         Parameters
 71 |         ----------
 72 |         neighbors : list(int)
 73 |             List of neighbors
 74 | 
 75 |         """
 76 |         raise NotImplementedError
 77 | 
 78 |     def receive(self):
 79 |         """
 80 |         Returns ONE message received.
 81 | 
 82 |         Returns
 83 |         ----------
 84 |         dict
 85 |             Received and decrypted data
 86 | 
 87 |         """
 88 |         raise NotImplementedError
 89 | 
 90 |     def send(self, uid, data):
 91 |         """
 92 |         Send a message to a process.
 93 | 
 94 |         Parameters
 95 |         ----------
 96 |         uid : int
 97 |             Neighbor's unique ID
 98 |         data : dict
 99 |             Message as a Python dictionary
100 | 
101 |         """
102 |         raise NotImplementedError
103 | 
104 |     def disconnect_neighbors(self):
105 |         """
106 |         Disconnects all neighbors.
107 | 
108 |         """
109 |         raise NotImplementedError
110 | 
111 |     def terminate(self):
112 |         """
113 |         Terminate the communication sockets.
114 | 
115 |         """
116 |         return
117 | 


--------------------------------------------------------------------------------
/src/decentralizepy/compression/Elias.py:
--------------------------------------------------------------------------------
 1 | # elias implementation: taken from this stack overflow post:
 2 | # https://stackoverflow.com/questions/62843156/python-fast-compression-of-large-amount-of-numbers-with-elias-gamma
 3 | import numpy as np
 4 | 
 5 | from decentralizepy.compression.Compression import Compression
 6 | 
 7 | 
 8 | class Elias(Compression):
 9 |     """
10 |     Compression API
11 | 
12 |     """
13 | 
14 |     def __init__(self, *args, **kwargs):
15 |         """
16 |         Constructor
17 |         """
18 |         super().__init__()
19 | 
20 |     def compress(self, arr):
21 |         """
22 |         compression function
23 | 
24 |         Parameters
25 |         ----------
26 |         arr : np.ndarray
27 |             Data to compress
28 | 
29 |         Returns
30 |         -------
31 |         bytearray
32 |             encoded data as bytes
33 | 
34 |         """
35 |         arr.sort()
36 |         first = arr[0]
37 |         arr = np.diff(arr).astype(np.int32)
38 |         arr = arr.view(f"u{arr.itemsize}")
39 |         l = np.log2(arr).astype("u1")
40 |         L = ((l << 1) + 1).cumsum()
41 |         out = np.zeros(int(L[-1] + 128), "u1")
42 |         for i in range(l.max() + 1):
43 |             out[L - i - 1] += (arr >> i) & 1
44 | 
45 |         s = np.array([out.size], dtype=np.int64)
46 |         size = np.ndarray(8, dtype="u1", buffer=s.data)
47 |         packed = np.packbits(out)
48 |         packed[-8:] = size
49 |         s = np.array([first], dtype=np.int64)
50 |         size = np.ndarray(8, dtype="u1", buffer=s.data)
51 |         packed[-16:-8] = size
52 |         return packed
53 | 
54 |     def decompress(self, bytes):
55 |         """
56 |         decompression function
57 | 
58 |         Parameters
59 |         ----------
60 |         bytes :bytearray
61 |             compressed data
62 | 
63 |         Returns
64 |         -------
65 |         arr : np.ndarray
66 |             decompressed data as array
67 | 
68 |         """
69 |         n_arr = bytes[-8:]
70 |         n = np.ndarray(1, dtype=np.int64, buffer=n_arr.data)[0]
71 |         first = bytes[-16:-8]
72 |         first = np.ndarray(1, dtype=np.int64, buffer=first.data)[0]
73 |         b = bytes[:-16]
74 |         b = np.unpackbits(b, count=n).view(bool)
75 |         s = b.nonzero()[0]
76 |         s = (s << 1).repeat(np.diff(s, prepend=-1))
77 |         s -= np.arange(-1, len(s) - 1)
78 |         s = s.tolist()  # list has faster __getitem__
79 |         ns = len(s)
80 | 
81 |         def gen():
82 |             idx = 0
83 |             yield idx
84 |             while idx < ns:
85 |                 idx = s[idx]
86 |                 yield idx
87 | 
88 |         offs = np.fromiter(gen(), int)
89 |         sz = np.diff(offs) >> 1
90 |         mx = sz.max() + 1
91 |         out_fin = np.zeros(offs.size, int)
92 |         out_fin[0] = first
93 |         out = out_fin[1:]
94 |         for i in range(mx):
95 |             out[b[offs[1:] - i - 1] & (sz >= i)] += 1 << i
96 |         out = np.cumsum(out_fin)
97 |         return out
98 | 


--------------------------------------------------------------------------------
/src/decentralizepy/mappings/Manual.py:
--------------------------------------------------------------------------------
  1 | from decentralizepy.mappings.Mapping import Mapping
  2 | 
  3 | 
  4 | class Manual(Mapping):
  5 |     """
  6 |     This class defines the manual mapping
  7 | 
  8 |     """
  9 | 
 10 |     def __init__(
 11 |         self, n_machines, procs_per_machine, global_service_machine=0, current_machine=0
 12 |     ):
 13 |         """
 14 |         Constructor
 15 | 
 16 |         Parameters
 17 |         ----------
 18 |         n_machines : int
 19 |             Number of machines involved in learning
 20 |         procs_per_machine : list(int)
 21 |             A list of number of processes spawned per machine
 22 |         global_service_machine: int, optional
 23 |             Machine ID on which the server/services are hosted
 24 |         current_machine: int, optional
 25 |             Machine ID of local machine
 26 | 
 27 |         """
 28 | 
 29 |         self.n_procs = 0
 30 |         for i in procs_per_machine:
 31 |             self.n_procs += i
 32 |         super().__init__(self.n_procs)
 33 |         self.n_machines = n_machines
 34 |         self.procs_per_machine = procs_per_machine
 35 |         self.global_service_machine = global_service_machine
 36 |         self.current_machine = current_machine
 37 | 
 38 |     def get_uid(self, rank: int, machine_id: int):
 39 |         """
 40 |         Gives the global unique identifier of the node
 41 | 
 42 |         Parameters
 43 |         ----------
 44 |         rank : int
 45 |             Node's rank on its machine
 46 |         machine_id : int
 47 |             node's machine in the cluster
 48 | 
 49 |         Returns
 50 |         -------
 51 |         int
 52 |             the unique identifier
 53 | 
 54 |         """
 55 |         if rank < 0:
 56 |             return rank
 57 |         cur_uid = 0
 58 |         for i in range(machine_id):
 59 |             cur_uid += self.procs_per_machine[i]
 60 |         return cur_uid + rank
 61 | 
 62 |     def get_machine_and_rank(self, uid: int):
 63 |         """
 64 |         Gives the rank and machine_id of the node
 65 | 
 66 |         Parameters
 67 |         ----------
 68 |         uid : int
 69 |             globally unique identifier of the node
 70 | 
 71 |         Returns
 72 |         -------
 73 |         2-tuple
 74 |             a tuple of rank and machine_id
 75 | 
 76 |         """
 77 |         if uid < 0:
 78 |             return uid, self.global_service_machine
 79 | 
 80 |         machine, rank = 0, 0
 81 |         for procs in self.procs_per_machine:
 82 |             if uid < procs:
 83 |                 rank = uid
 84 |                 break
 85 |             else:
 86 |                 machine += 1
 87 |                 uid -= procs
 88 |         return rank, machine
 89 | 
 90 |     def get_local_procs_count(self):
 91 |         """
 92 |         Gives number of processes that run on the node
 93 | 
 94 |         Returns
 95 |         -------
 96 |         int
 97 |             the number of local processes
 98 | 
 99 |         """
100 | 
101 |         return self.procs_per_machine[self.current_machine]
102 | 


--------------------------------------------------------------------------------
/src/decentralizepy/sharing/JWINS/JWINS.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from decentralizepy.sharing.JWINS.Wavelet import Wavelet
 4 | 
 5 | 
 6 | class JWINS(Wavelet):
 7 |     """
 8 |     This class implements the JWINS sharing algorithm.
 9 | 
10 |     """
11 | 
12 |     def __init__(
13 |         self,
14 |         rank,
15 |         machine_id,
16 |         communication,
17 |         mapping,
18 |         graph,
19 |         model,
20 |         dataset,
21 |         log_dir,
22 |         alpha_list="[0.1, 0.2, 0.3, 0.4, 1.0]",
23 |         dict_ordered=True,
24 |         save_shared=False,
25 |         metadata_cap=1.0,
26 |         wavelet="haar",
27 |         level=4,
28 |         change_based_selection=True,
29 |         save_accumulated="",
30 |         accumulation=False,
31 |         accumulate_averaging_changes=False,
32 |         compress=False,
33 |         compression_package=None,
34 |         compression_class=None,
35 |     ):
36 |         """
37 |         Constructor
38 | 
39 |         Parameters
40 |         ----------
41 |         rank : int
42 |             Local rank
43 |         machine_id : int
44 |             Global machine id
45 |         communication : decentralizepy.communication.Communication
46 |             Communication module used to send and receive messages
47 |         mapping : decentralizepy.mappings.Mapping
48 |             Mapping (rank, machine_id) -> uid
49 |         graph : decentralizepy.graphs.Graph
50 |             Graph reprensenting neighbors
51 |         model : decentralizepy.models.Model
52 |             Model to train
53 |         dataset : decentralizepy.datasets.Dataset
54 |             Dataset for sharing data. Not implemented yet! TODO
55 |         log_dir : str
56 |             Location to write shared_params (only writing for 2 procs per machine)
57 |         dict_ordered : bool
58 |             Specifies if the python dict maintains the order of insertion
59 |         save_shared : bool
60 |             Specifies if the indices of shared parameters should be logged
61 |         metadata_cap : float
62 |             Share full model when self.alpha > metadata_cap
63 | 
64 |         """
65 |         super().__init__(
66 |             rank,
67 |             machine_id,
68 |             communication,
69 |             mapping,
70 |             graph,
71 |             model,
72 |             dataset,
73 |             log_dir,
74 |             1.0,
75 |             dict_ordered,
76 |             save_shared,
77 |             metadata_cap,
78 |             wavelet,
79 |             level,
80 |             change_based_selection,
81 |             save_accumulated,
82 |             accumulation,
83 |             accumulate_averaging_changes,
84 |             compress,
85 |             compression_package,
86 |             compression_class,
87 |         )
88 |         self.alpha_list = eval(alpha_list)
89 |         random.seed(self.mapping.get_uid(self.rank, self.machine_id))
90 | 
91 |     def get_data_to_send(self, degree=None):
92 |         """
93 |         Perform a sharing step. Implements D-PSGD with alpha randomly chosen.
94 | 
95 |         """
96 |         self.alpha = random.choice(self.alpha_list)
97 |         return super().get_data_to_send()
98 | 


--------------------------------------------------------------------------------
/eval/testingPeerSampler.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from pathlib import Path
  3 | from shutil import copy
  4 | 
  5 | from localconfig import LocalConfig
  6 | from torch import multiprocessing as mp
  7 | 
  8 | from decentralizepy import utils
  9 | from decentralizepy.graphs.Graph import Graph
 10 | from decentralizepy.mappings.Linear import Linear
 11 | from decentralizepy.node.DPSGDWithPeerSampler import DPSGDWithPeerSampler
 12 | from decentralizepy.node.PeerSampler import PeerSampler
 13 | 
 14 | 
 15 | def read_ini(file_path):
 16 |     config = LocalConfig(file_path)
 17 |     for section in config:
 18 |         print("Section: ", section)
 19 |         for key, value in config.items(section):
 20 |             print((key, value))
 21 |     print(dict(config.items("DATASET")))
 22 |     return config
 23 | 
 24 | 
 25 | if __name__ == "__main__":
 26 |     args = utils.get_args()
 27 | 
 28 |     Path(args.log_dir).mkdir(parents=True, exist_ok=True)
 29 | 
 30 |     log_level = {
 31 |         "INFO": logging.INFO,
 32 |         "DEBUG": logging.DEBUG,
 33 |         "WARNING": logging.WARNING,
 34 |         "ERROR": logging.ERROR,
 35 |         "CRITICAL": logging.CRITICAL,
 36 |     }
 37 | 
 38 |     config = read_ini(args.config_file)
 39 |     my_config = dict()
 40 |     for section in config:
 41 |         my_config[section] = dict(config.items(section))
 42 | 
 43 |     copy(args.config_file, args.log_dir)
 44 |     copy(args.graph_file, args.log_dir)
 45 |     utils.write_args(args, args.log_dir)
 46 | 
 47 |     g = Graph()
 48 |     g.read_graph_from_file(args.graph_file, args.graph_type)
 49 |     n_machines = args.machines
 50 |     procs_per_machine = args.procs_per_machine[0]
 51 |     l = Linear(n_machines, procs_per_machine)
 52 |     m_id = args.machine_id
 53 | 
 54 |     sm = args.server_machine
 55 |     sr = args.server_rank
 56 | 
 57 |     processes = []
 58 |     if sm == m_id:
 59 |         processes.append(
 60 |             mp.Process(
 61 |                 target=PeerSampler,
 62 |                 args=[
 63 |                     sr,
 64 |                     m_id,
 65 |                     l,
 66 |                     g,
 67 |                     my_config,
 68 |                     args.iterations,
 69 |                     args.log_dir,
 70 |                     log_level[args.log_level],
 71 |                 ],
 72 |             )
 73 |         )
 74 | 
 75 |     for r in range(0, procs_per_machine):
 76 |         processes.append(
 77 |             mp.Process(
 78 |                 target=DPSGDWithPeerSampler,
 79 |                 args=[
 80 |                     r,
 81 |                     m_id,
 82 |                     l,
 83 |                     g,
 84 |                     my_config,
 85 |                     args.iterations,
 86 |                     args.log_dir,
 87 |                     args.weights_store_dir,
 88 |                     log_level[args.log_level],
 89 |                     args.test_after,
 90 |                     args.train_evaluate_after,
 91 |                     args.reset_optimizer,
 92 |                 ],
 93 |             )
 94 |         )
 95 | 
 96 |     for p in processes:
 97 |         p.start()
 98 | 
 99 |     for p in processes:
100 |         p.join()
101 | 


--------------------------------------------------------------------------------
/eval/testingPeerSamplerDynamic.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from pathlib import Path
  3 | from shutil import copy
  4 | 
  5 | from localconfig import LocalConfig
  6 | from torch import multiprocessing as mp
  7 | 
  8 | from decentralizepy import utils
  9 | from decentralizepy.graphs.Graph import Graph
 10 | from decentralizepy.mappings.Linear import Linear
 11 | from decentralizepy.node.DPSGDWithPeerSampler import DPSGDWithPeerSampler
 12 | from decentralizepy.node.PeerSamplerDynamic import PeerSamplerDynamic
 13 | 
 14 | 
 15 | def read_ini(file_path):
 16 |     config = LocalConfig(file_path)
 17 |     for section in config:
 18 |         print("Section: ", section)
 19 |         for key, value in config.items(section):
 20 |             print((key, value))
 21 |     print(dict(config.items("DATASET")))
 22 |     return config
 23 | 
 24 | 
 25 | if __name__ == "__main__":
 26 |     args = utils.get_args()
 27 | 
 28 |     Path(args.log_dir).mkdir(parents=True, exist_ok=True)
 29 | 
 30 |     log_level = {
 31 |         "INFO": logging.INFO,
 32 |         "DEBUG": logging.DEBUG,
 33 |         "WARNING": logging.WARNING,
 34 |         "ERROR": logging.ERROR,
 35 |         "CRITICAL": logging.CRITICAL,
 36 |     }
 37 | 
 38 |     config = read_ini(args.config_file)
 39 |     my_config = dict()
 40 |     for section in config:
 41 |         my_config[section] = dict(config.items(section))
 42 | 
 43 |     copy(args.config_file, args.log_dir)
 44 |     copy(args.graph_file, args.log_dir)
 45 |     utils.write_args(args, args.log_dir)
 46 | 
 47 |     g = Graph()
 48 |     g.read_graph_from_file(args.graph_file, args.graph_type)
 49 |     n_machines = args.machines
 50 |     procs_per_machine = args.procs_per_machine[0]
 51 |     m_id = args.machine_id
 52 | 
 53 |     sm = args.server_machine
 54 |     sr = args.server_rank
 55 | 
 56 |     l = Linear(
 57 |         n_machines, procs_per_machine, global_service_machine=sm, current_machine=m_id
 58 |     )
 59 | 
 60 |     processes = []
 61 |     if sm == m_id:
 62 |         processes.append(
 63 |             mp.Process(
 64 |                 target=PeerSamplerDynamic,
 65 |                 args=[
 66 |                     sr,
 67 |                     m_id,
 68 |                     l,
 69 |                     g,
 70 |                     my_config,
 71 |                     args.iterations,
 72 |                     args.log_dir,
 73 |                     log_level[args.log_level],
 74 |                 ],
 75 |             )
 76 |         )
 77 | 
 78 |     for r in range(0, procs_per_machine):
 79 |         processes.append(
 80 |             mp.Process(
 81 |                 target=DPSGDWithPeerSampler,
 82 |                 args=[
 83 |                     r,
 84 |                     m_id,
 85 |                     l,
 86 |                     g,
 87 |                     my_config,
 88 |                     args.iterations,
 89 |                     args.log_dir,
 90 |                     args.weights_store_dir,
 91 |                     log_level[args.log_level],
 92 |                     args.test_after,
 93 |                     args.train_evaluate_after,
 94 |                     args.reset_optimizer,
 95 |                 ],
 96 |             )
 97 |         )
 98 | 
 99 |     for p in processes:
100 |         p.start()
101 | 
102 |     for p in processes:
103 |         p.join()
104 | 


--------------------------------------------------------------------------------
/eval/testingPeerSamplerDynamicManual.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from pathlib import Path
  3 | from shutil import copy
  4 | 
  5 | from localconfig import LocalConfig
  6 | from torch import multiprocessing as mp
  7 | 
  8 | from decentralizepy import utils
  9 | from decentralizepy.graphs.Graph import Graph
 10 | from decentralizepy.mappings.Manual import Manual
 11 | from decentralizepy.node.DPSGDWithPeerSampler import DPSGDWithPeerSampler
 12 | from decentralizepy.node.PeerSamplerDynamic import PeerSamplerDynamic
 13 | 
 14 | 
 15 | def read_ini(file_path):
 16 |     config = LocalConfig(file_path)
 17 |     for section in config:
 18 |         print("Section: ", section)
 19 |         for key, value in config.items(section):
 20 |             print((key, value))
 21 |     print(dict(config.items("DATASET")))
 22 |     return config
 23 | 
 24 | 
 25 | if __name__ == "__main__":
 26 |     args = utils.get_args()
 27 | 
 28 |     Path(args.log_dir).mkdir(parents=True, exist_ok=True)
 29 | 
 30 |     log_level = {
 31 |         "INFO": logging.INFO,
 32 |         "DEBUG": logging.DEBUG,
 33 |         "WARNING": logging.WARNING,
 34 |         "ERROR": logging.ERROR,
 35 |         "CRITICAL": logging.CRITICAL,
 36 |     }
 37 | 
 38 |     config = read_ini(args.config_file)
 39 |     my_config = dict()
 40 |     for section in config:
 41 |         my_config[section] = dict(config.items(section))
 42 | 
 43 |     copy(args.config_file, args.log_dir)
 44 |     copy(args.graph_file, args.log_dir)
 45 |     utils.write_args(args, args.log_dir)
 46 | 
 47 |     g = Graph()
 48 |     g.read_graph_from_file(args.graph_file, args.graph_type)
 49 |     n_machines = args.machines
 50 |     procs_per_machine = args.procs_per_machine
 51 |     m_id = args.machine_id
 52 | 
 53 |     sm = args.server_machine
 54 |     sr = args.server_rank
 55 | 
 56 |     l = Manual(
 57 |         n_machines, procs_per_machine, global_service_machine=sm, current_machine=m_id
 58 |     )
 59 | 
 60 |     processes = []
 61 |     if sm == m_id:
 62 |         processes.append(
 63 |             mp.Process(
 64 |                 target=PeerSamplerDynamic,
 65 |                 args=[
 66 |                     sr,
 67 |                     m_id,
 68 |                     l,
 69 |                     g,
 70 |                     my_config,
 71 |                     args.iterations,
 72 |                     args.log_dir,
 73 |                     log_level[args.log_level],
 74 |                 ],
 75 |             )
 76 |         )
 77 | 
 78 |     for r in range(0, procs_per_machine[m_id]):
 79 |         processes.append(
 80 |             mp.Process(
 81 |                 target=DPSGDWithPeerSampler,
 82 |                 args=[
 83 |                     r,
 84 |                     m_id,
 85 |                     l,
 86 |                     g,
 87 |                     my_config,
 88 |                     args.iterations,
 89 |                     args.log_dir,
 90 |                     args.weights_store_dir,
 91 |                     log_level[args.log_level],
 92 |                     args.test_after,
 93 |                     args.train_evaluate_after,
 94 |                     args.reset_optimizer,
 95 |                 ],
 96 |             )
 97 |         )
 98 | 
 99 |     for p in processes:
100 |         p.start()
101 | 
102 |     for p in processes:
103 |         p.join()
104 | 


--------------------------------------------------------------------------------
/eval/testingSTC.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from pathlib import Path
  3 | from shutil import copy
  4 | 
  5 | from localconfig import LocalConfig
  6 | from torch import multiprocessing as mp
  7 | 
  8 | from decentralizepy import utils
  9 | from decentralizepy.graphs.Graph import Graph
 10 | from decentralizepy.mappings.Linear import Linear
 11 | from decentralizepy.node.STC.STCClient import STCClient
 12 | from decentralizepy.node.STC.STCServer import STCServer
 13 | 
 14 | 
 15 | def read_ini(file_path):
 16 |     config = LocalConfig(file_path)
 17 |     for section in config:
 18 |         print("Section: ", section)
 19 |         for key, value in config.items(section):
 20 |             print((key, value))
 21 |     print(dict(config.items("DATASET")))
 22 |     return config
 23 | 
 24 | 
 25 | if __name__ == "__main__":
 26 |     args = utils.get_args()
 27 | 
 28 |     Path(args.log_dir).mkdir(parents=True, exist_ok=True)
 29 | 
 30 |     log_level = {
 31 |         "INFO": logging.INFO,
 32 |         "DEBUG": logging.DEBUG,
 33 |         "WARNING": logging.WARNING,
 34 |         "ERROR": logging.ERROR,
 35 |         "CRITICAL": logging.CRITICAL,
 36 |     }
 37 | 
 38 |     config = read_ini(args.config_file)
 39 |     my_config = dict()
 40 |     for section in config:
 41 |         my_config[section] = dict(config.items(section))
 42 | 
 43 |     copy(args.config_file, args.log_dir)
 44 |     copy(args.graph_file, args.log_dir)
 45 |     utils.write_args(args, args.log_dir)
 46 | 
 47 |     g = Graph()
 48 |     g.read_graph_from_file(args.graph_file, args.graph_type)
 49 |     n_machines = args.machines
 50 |     procs_per_machine = args.procs_per_machine[0]
 51 |     l = Linear(n_machines, procs_per_machine)
 52 |     m_id = args.machine_id
 53 | 
 54 |     sm = args.server_machine
 55 |     sr = args.server_rank
 56 | 
 57 |     processes = []
 58 |     if sm == m_id:
 59 |         processes.append(
 60 |             mp.Process(
 61 |                 target=STCServer,
 62 |                 args=[
 63 |                     sr,
 64 |                     m_id,
 65 |                     l,
 66 |                     g,
 67 |                     my_config,
 68 |                     args.iterations,
 69 |                     args.log_dir,
 70 |                     args.weights_store_dir,
 71 |                     log_level[args.log_level],
 72 |                     args.test_after,
 73 |                     args.train_evaluate_after,
 74 |                     args.working_rate,
 75 |                 ],
 76 |             )
 77 |         )
 78 | 
 79 |     for r in range(0, procs_per_machine):
 80 |         processes.append(
 81 |             mp.Process(
 82 |                 target=STCClient,
 83 |                 args=[
 84 |                     r,
 85 |                     m_id,
 86 |                     l,
 87 |                     g,
 88 |                     my_config,
 89 |                     args.iterations,
 90 |                     args.log_dir,
 91 |                     args.weights_store_dir,
 92 |                     log_level[args.log_level],
 93 |                     args.test_after,
 94 |                     args.train_evaluate_after,
 95 |                     args.reset_optimizer,
 96 |                 ],
 97 |             )
 98 |         )
 99 | 
100 |     for p in processes:
101 |         p.start()
102 | 
103 |     for p in processes:
104 |         p.join()
105 | 


--------------------------------------------------------------------------------
/tutorial/EpidemicLearning/testingEL_Oracle.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from pathlib import Path
  3 | from shutil import copy
  4 | 
  5 | from localconfig import LocalConfig
  6 | from torch import multiprocessing as mp
  7 | 
  8 | from decentralizepy import utils
  9 | from decentralizepy.graphs.Graph import Graph
 10 | from decentralizepy.mappings.Linear import Linear
 11 | from decentralizepy.node.EpidemicLearning.EL_Oracle_Client import EL_Oracle_Client
 12 | from decentralizepy.node.EpidemicLearning.EL_Oracle_TopologyBuilder import (
 13 |     EL_Oracle_TopologyBuilder,
 14 | )
 15 | 
 16 | 
 17 | def read_ini(file_path):
 18 |     config = LocalConfig(file_path)
 19 |     for section in config:
 20 |         print("Section: ", section)
 21 |         for key, value in config.items(section):
 22 |             print((key, value))
 23 |     print(dict(config.items("DATASET")))
 24 |     return config
 25 | 
 26 | 
 27 | if __name__ == "__main__":
 28 |     args = utils.get_args()
 29 | 
 30 |     Path(args.log_dir).mkdir(parents=True, exist_ok=True)
 31 | 
 32 |     log_level = {
 33 |         "INFO": logging.INFO,
 34 |         "DEBUG": logging.DEBUG,
 35 |         "WARNING": logging.WARNING,
 36 |         "ERROR": logging.ERROR,
 37 |         "CRITICAL": logging.CRITICAL,
 38 |     }
 39 | 
 40 |     config = read_ini(args.config_file)
 41 |     my_config = dict()
 42 |     for section in config:
 43 |         my_config[section] = dict(config.items(section))
 44 | 
 45 |     copy(args.config_file, args.log_dir)
 46 |     copy(args.graph_file, args.log_dir)
 47 |     utils.write_args(args, args.log_dir)
 48 | 
 49 |     g = Graph()
 50 |     g.read_graph_from_file(args.graph_file, args.graph_type)
 51 |     n_machines = args.machines
 52 |     procs_per_machine = args.procs_per_machine[0]
 53 |     m_id = args.machine_id
 54 | 
 55 |     sm = args.server_machine
 56 |     sr = args.server_rank
 57 | 
 58 |     l = Linear(
 59 |         n_machines, procs_per_machine, global_service_machine=sm, current_machine=m_id
 60 |     )
 61 | 
 62 |     processes = []
 63 |     if sm == m_id:
 64 |         processes.append(
 65 |             mp.Process(
 66 |                 target=EL_Oracle_TopologyBuilder,
 67 |                 args=[
 68 |                     sr,
 69 |                     m_id,
 70 |                     l,
 71 |                     g,
 72 |                     my_config,
 73 |                     args.iterations,
 74 |                     args.log_dir,
 75 |                     log_level[args.log_level],
 76 |                 ],
 77 |             )
 78 |         )
 79 | 
 80 |     for r in range(0, procs_per_machine):
 81 |         processes.append(
 82 |             mp.Process(
 83 |                 target=EL_Oracle_Client,
 84 |                 args=[
 85 |                     r,
 86 |                     m_id,
 87 |                     l,
 88 |                     g,
 89 |                     my_config,
 90 |                     args.iterations,
 91 |                     args.log_dir,
 92 |                     args.weights_store_dir,
 93 |                     log_level[args.log_level],
 94 |                     args.test_after,
 95 |                     args.train_evaluate_after,
 96 |                     args.reset_optimizer,
 97 |                 ],
 98 |             )
 99 |         )
100 | 
101 |     for p in processes:
102 |         p.start()
103 | 
104 |     for p in processes:
105 |         p.join()
106 | 


--------------------------------------------------------------------------------
/eval/testingFederated.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from pathlib import Path
  3 | from shutil import copy
  4 | 
  5 | from localconfig import LocalConfig
  6 | from torch import multiprocessing as mp
  7 | 
  8 | from decentralizepy import utils
  9 | from decentralizepy.graphs.Graph import Graph
 10 | from decentralizepy.mappings.Linear import Linear
 11 | from decentralizepy.node.DPSGDNodeFederated import DPSGDNodeFederated
 12 | from decentralizepy.node.FederatedParameterServer import FederatedParameterServer
 13 | 
 14 | 
 15 | def read_ini(file_path):
 16 |     config = LocalConfig(file_path)
 17 |     for section in config:
 18 |         print("Section: ", section)
 19 |         for key, value in config.items(section):
 20 |             print((key, value))
 21 |     print(dict(config.items("DATASET")))
 22 |     return config
 23 | 
 24 | 
 25 | if __name__ == "__main__":
 26 |     args = utils.get_args()
 27 | 
 28 |     Path(args.log_dir).mkdir(parents=True, exist_ok=True)
 29 | 
 30 |     log_level = {
 31 |         "INFO": logging.INFO,
 32 |         "DEBUG": logging.DEBUG,
 33 |         "WARNING": logging.WARNING,
 34 |         "ERROR": logging.ERROR,
 35 |         "CRITICAL": logging.CRITICAL,
 36 |     }
 37 | 
 38 |     config = read_ini(args.config_file)
 39 |     my_config = dict()
 40 |     for section in config:
 41 |         my_config[section] = dict(config.items(section))
 42 | 
 43 |     copy(args.config_file, args.log_dir)
 44 |     copy(args.graph_file, args.log_dir)
 45 |     utils.write_args(args, args.log_dir)
 46 | 
 47 |     g = Graph()
 48 |     g.read_graph_from_file(args.graph_file, args.graph_type)
 49 |     n_machines = args.machines
 50 |     procs_per_machine = args.procs_per_machine[0]
 51 |     l = Linear(n_machines, procs_per_machine)
 52 |     m_id = args.machine_id
 53 | 
 54 |     sm = args.server_machine
 55 |     sr = args.server_rank
 56 | 
 57 |     processes = []
 58 |     if sm == m_id:
 59 |         processes.append(
 60 |             mp.Process(
 61 |                 target=FederatedParameterServer,
 62 |                 args=[
 63 |                     sr,
 64 |                     m_id,
 65 |                     l,
 66 |                     g,
 67 |                     my_config,
 68 |                     args.iterations,
 69 |                     args.log_dir,
 70 |                     args.weights_store_dir,
 71 |                     log_level[args.log_level],
 72 |                     args.test_after,
 73 |                     args.train_evaluate_after,
 74 |                     args.working_rate,
 75 |                 ],
 76 |             )
 77 |         )
 78 | 
 79 |     for r in range(0, procs_per_machine):
 80 |         processes.append(
 81 |             mp.Process(
 82 |                 target=DPSGDNodeFederated,
 83 |                 args=[
 84 |                     r,
 85 |                     m_id,
 86 |                     l,
 87 |                     g,
 88 |                     my_config,
 89 |                     args.iterations,
 90 |                     args.log_dir,
 91 |                     args.weights_store_dir,
 92 |                     log_level[args.log_level],
 93 |                     args.test_after,
 94 |                     args.train_evaluate_after,
 95 |                     args.reset_optimizer,
 96 |                 ],
 97 |             )
 98 |         )
 99 | 
100 |     for p in processes:
101 |         p.start()
102 | 
103 |     for p in processes:
104 |         p.join()
105 | 


--------------------------------------------------------------------------------
/eval/plot_model.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | from pathlib import Path
 5 | 
 6 | import numpy as np
 7 | from matplotlib import pyplot as plt
 8 | from pyexpat import model
 9 | 
10 | 
11 | def plot(x, y, label, *args):
12 |     plt.plot(x, y, *args, label=label)
13 |     plt.legend()
14 | 
15 | 
16 | def reject_outliers(data, m=2.0):
17 |     d = np.abs(data - np.median(data))
18 |     mdev = np.median(d)
19 |     s = d / (mdev if mdev else 1.0)
20 |     return data[s < m]
21 | 
22 | 
23 | def plot_model(path, title):
24 |     model_path = os.path.join(path, "plots")
25 |     Path(model_path).mkdir(parents=True, exist_ok=True)
26 |     files = [f for f in os.listdir(path) if f.endswith("json")]
27 |     for file in files:
28 |         filepath = os.path.join(path, file)
29 |         with open(filepath, "r") as inf:
30 |             model_vec = json.load(inf)
31 |             del model_vec["order"]
32 |             del model_vec["shapes"]
33 |             model_vec = np.array(model_vec[list(model_vec.keys())[0]])
34 |         num_elements = model_vec.shape[0]
35 |         x_axis = np.arange(1, num_elements + 1)
36 |         plt.clf()
37 |         plt.title(title)
38 |         plot(x_axis, model_vec, "unsorted", ".")
39 |         model_vec = np.sort(model_vec)
40 |         plot(x_axis, model_vec, "sorted")
41 |         plt.savefig(os.path.join(model_path, file[0:-5]))
42 | 
43 | 
44 | def plot_ratio(path_change, path_val, title):
45 |     model_path = os.path.join(path_change, "plots_ratio")
46 |     Path(model_path).mkdir(parents=True, exist_ok=True)
47 |     files_change = [f for f in os.listdir(path_change) if f.endswith("json")]
48 |     files_val = [f for f in os.listdir(path_val) if f.endswith("json")]
49 |     for i, file in enumerate(files_change):
50 |         print("Processed ", file)
51 |         filepath_change = os.path.join(path_change, file)
52 |         filepath_val = os.path.join(path_val, files_val[i])
53 |         with open(filepath_change, "r") as inf:
54 |             model_change = json.load(inf)
55 |             del model_change["order"]
56 |             del model_change["shapes"]
57 |             model_change = np.array(model_change[list(model_change.keys())[0]])
58 |         with open(filepath_val, "r") as inf:
59 |             model_val = json.load(inf)
60 |             del model_val["order"]
61 |             del model_val["shapes"]
62 |             model_val = np.array(model_val[list(model_val.keys())[0]])
63 |         num_elements = model_val.shape[0]
64 |         x_axis = np.arange(1, num_elements + 1)
65 |         plt.clf()
66 |         plt.title(title)
67 |         model_vec = np.divide(
68 |             model_change,
69 |             model_val,
70 |             out=np.zeros_like(model_change),
71 |             where=model_val != 0.0,
72 |         )
73 |         model_vec = reject_outliers(model_vec)
74 |         num_elements = model_vec.shape[0]
75 |         x_axis = np.arange(1, num_elements + 1)
76 |         plot(x_axis, model_vec, "unsorted", ".")
77 |         model_vec = np.sort(model_vec)
78 |         plot(x_axis, model_vec, "sorted")
79 |         plt.savefig(os.path.join(model_path, file[0:-5]))
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     assert len(sys.argv) == 3
84 |     plot_model(
85 |         os.path.join(sys.argv[1], "model_change", sys.argv[2]), "Change in Weights"
86 |     )
87 |     plot_model(os.path.join(sys.argv[1], "model_val", sys.argv[2]), "Model Parameters")
88 |     plot_ratio(
89 |         os.path.join(sys.argv[1], "model_change", sys.argv[2]),
90 |         os.path.join(sys.argv[1], "model_val", sys.argv[2]),
91 |         "Ratio",
92 |     )
93 | 


--------------------------------------------------------------------------------
/src/decentralizepy/node/PeerSamplerDynamic.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from decentralizepy.graphs.Graph import Graph
  4 | from decentralizepy.graphs.Regular import Regular
  5 | from decentralizepy.mappings.Mapping import Mapping
  6 | from decentralizepy.node.PeerSampler import PeerSampler
  7 | 
  8 | 
  9 | class PeerSamplerDynamic(PeerSampler):
 10 |     """
 11 |     This class defines the peer sampling service
 12 | 
 13 |     """
 14 | 
 15 |     def get_neighbors(self, node, iteration=None):
 16 |         if iteration != None:
 17 |             if iteration > self.iteration:
 18 |                 logging.debug(
 19 |                     "iteration, self.iteration: {}, {}".format(
 20 |                         iteration, self.iteration
 21 |                     )
 22 |                 )
 23 |                 assert iteration == self.iteration + 1
 24 |                 self.iteration = iteration
 25 |                 self.graphs.append(
 26 |                     Regular(
 27 |                         self.graph.n_procs,
 28 |                         self.graph_degree,
 29 |                         seed=self.random_seed * 100000 + iteration,
 30 |                     )
 31 |                 )
 32 |             return self.graphs[iteration].neighbors(node)
 33 |         else:
 34 |             return self.graph.neighbors(node)
 35 | 
 36 |     def __init__(
 37 |         self,
 38 |         rank: int,
 39 |         machine_id: int,
 40 |         mapping: Mapping,
 41 |         graph: Graph,
 42 |         config,
 43 |         iterations=1,
 44 |         log_dir=".",
 45 |         log_level=logging.INFO,
 46 |         *args
 47 |     ):
 48 |         """
 49 |         Constructor
 50 | 
 51 |         Parameters
 52 |         ----------
 53 |         rank : int
 54 |             Rank of process local to the machine
 55 |         machine_id : int
 56 |             Machine ID on which the process in running
 57 |         mapping : decentralizepy.mappings
 58 |             The object containing the mapping rank <--> uid
 59 |         graph : decentralizepy.graphs
 60 |             The object containing the global graph
 61 |         config : dict
 62 |             A dictionary of configurations. Must contain the following:
 63 |             [DATASET]
 64 |                 dataset_package
 65 |                 dataset_class
 66 |                 model_class
 67 |             [OPTIMIZER_PARAMS]
 68 |                 optimizer_package
 69 |                 optimizer_class
 70 |             [TRAIN_PARAMS]
 71 |                 training_package = decentralizepy.training.Training
 72 |                 training_class = Training
 73 |                 epochs_per_round = 25
 74 |                 batch_size = 64
 75 |         iterations : int
 76 |             Number of iterations (communication steps) for which the model should be trained
 77 |         log_dir : str
 78 |             Logging directory
 79 |         log_level : logging.Level
 80 |             One of DEBUG, INFO, WARNING, ERROR, CRITICAL
 81 |         args : optional
 82 |             Other arguments
 83 | 
 84 |         """
 85 | 
 86 |         self.iteration = -1
 87 |         self.graphs = []
 88 | 
 89 |         nodeConfigs = config["NODE"]
 90 |         self.graph_degree = nodeConfigs["graph_degree"]
 91 | 
 92 |         self.instantiate(
 93 |             rank,
 94 |             machine_id,
 95 |             mapping,
 96 |             graph,
 97 |             config,
 98 |             iterations,
 99 |             log_dir,
100 |             log_level,
101 |             *args
102 |         )
103 | 
104 |         self.run()
105 | 
106 |         logging.info("Peer Sampler exiting")
107 | 


--------------------------------------------------------------------------------
/src/decentralizepy/sharing/PlainAverageSharing.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import torch
  4 | 
  5 | from decentralizepy.sharing.Sharing import Sharing
  6 | 
  7 | 
  8 | class PlainAverageSharing(Sharing):
  9 |     """
 10 |     Class to do plain averaging instead of Metropolis Hastings
 11 | 
 12 |     """
 13 | 
 14 |     def __init__(
 15 |         self,
 16 |         rank,
 17 |         machine_id,
 18 |         communication,
 19 |         mapping,
 20 |         graph,
 21 |         model,
 22 |         dataset,
 23 |         log_dir,
 24 |         compress=False,
 25 |         compression_package=None,
 26 |         compression_class=None,
 27 |         float_precision=None,
 28 |     ):
 29 |         """
 30 |         Constructor
 31 | 
 32 |         Parameters
 33 |         ----------
 34 |         rank : int
 35 |             Local rank
 36 |         machine_id : int
 37 |             Global machine id
 38 |         communication : decentralizepy.communication.Communication
 39 |             Communication module used to send and receive messages
 40 |         mapping : decentralizepy.mappings.Mapping
 41 |             Mapping (rank, machine_id) -> uid
 42 |         graph : decentralizepy.graphs.Graph
 43 |             Graph reprensenting neighbors
 44 |         model : decentralizepy.models.Model
 45 |             Model to train
 46 |         dataset : decentralizepy.datasets.Dataset
 47 |             Dataset for sharing data.
 48 |         log_dir : str
 49 |             Location to write shared_params (only writing for 2 procs per machine)
 50 | 
 51 |         """
 52 |         super().__init__(
 53 |             rank,
 54 |             machine_id,
 55 |             communication,
 56 |             mapping,
 57 |             graph,
 58 |             model,
 59 |             dataset,
 60 |             log_dir,
 61 |             compress,
 62 |             compression_package,
 63 |             compression_class,
 64 |             float_precision,
 65 |         )
 66 |         self.received_this_round = 0
 67 | 
 68 |     def _pre_step(self):
 69 |         """
 70 |         Called at the beginning of step.
 71 | 
 72 |         """
 73 |         pass
 74 | 
 75 |     def _post_step(self):
 76 |         """
 77 |         Called at the end of step.
 78 | 
 79 |         """
 80 |         pass
 81 | 
 82 |     def _averaging(self, peer_deques):
 83 |         """
 84 |         Averages the received model with the local model
 85 | 
 86 |         """
 87 |         self.received_this_round = 0
 88 |         with torch.no_grad():
 89 |             total = dict()
 90 |             weight = 1 / (len(peer_deques) + 1)
 91 |             for i, n in enumerate(peer_deques):
 92 |                 self.received_this_round += 1
 93 |                 data = peer_deques[n].popleft()
 94 |                 iteration = data["iteration"]
 95 |                 del data["iteration"]
 96 |                 del data["CHANNEL"]
 97 |                 logging.debug(
 98 |                     "Averaging model from neighbor {} of iteration {}".format(
 99 |                         n, iteration
100 |                     )
101 |                 )
102 |                 data = self.deserialized_model(data)
103 |                 for key, value in data.items():
104 |                     if key in total:
105 |                         total[key] += value * weight
106 |                     else:
107 |                         total[key] = value * weight
108 | 
109 |             for key, value in self.model.state_dict().items():
110 |                 total[key] += value * weight
111 | 
112 |         self.model.load_state_dict(total)
113 |         self._post_step()
114 |         self.communication_round += 1
115 | 
116 |     def get_data_to_send(self, *args, **kwargs):
117 |         self._pre_step()
118 |         data = self.serialized_model()
119 |         data["iteration"] = self.communication_round
120 |         return data
121 | 


--------------------------------------------------------------------------------
/generate_graph.py:
--------------------------------------------------------------------------------
  1 | import getopt
  2 | import sys
  3 | 
  4 | from decentralizepy.graphs.FullyConnected import FullyConnected
  5 | from decentralizepy.graphs.Regular import Regular
  6 | from decentralizepy.graphs.Ring import Ring
  7 | from decentralizepy.graphs.SmallWorld import SmallWorld
  8 | from decentralizepy.graphs.Star import Star
  9 | 
 10 | if __name__ == "__main__":
 11 |     """
 12 |     Script to generate a graph file.
 13 | 
 14 |     Usage
 15 |     -----
 16 |     python generate_graph.py -g <graph_type> -n <num_nodes> -s <seed> -d <degree> -k <k_over_2> -b <beta> -f <file_name> -a
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     graph_type : str
 21 |         One of {"Regular", "FullyConnected", "Ring", "SmallWorld", "Star"}
 22 |     num_nodes : int
 23 |         Number of nodes in the graph
 24 |     seed : int, optional
 25 |         Seed for random number generator
 26 |     degree : int, optional
 27 |         Degree of the graph
 28 |     k_over_2 : int, optional
 29 |         Parameter for smallworld
 30 |     beta : float, optional
 31 |         Parameter for smallworld
 32 |     file_name : str, optional
 33 |         Name of the file to write the graph to
 34 |     a : flag, optional
 35 |         If set, the graph is written in adjacency list format, otherwise in edge list format
 36 |     h : flag, optional
 37 |         Prints this help message
 38 | 
 39 |     """
 40 |     __doc__ = "Usage: python3 generate_graph.py -g <graph_type> -n <num_nodes> -s <seed> -d <degree> -k <k_over_2> -b <beta> -f <file_name> -a -h"
 41 |     assert len(sys.argv) >= 2, __doc__
 42 |     argumentList = sys.argv[1:]
 43 | 
 44 |     options = "hg:n:s:d:k:b:f:a"
 45 | 
 46 |     long_options = [
 47 |         "graph=",
 48 |         "nodes=",
 49 |         "seed=",
 50 |         "degree=",
 51 |         "kover2=",
 52 |         "beta=",
 53 |         "file=",
 54 |         "adjacency",
 55 |         "help",
 56 |     ]
 57 | 
 58 |     try:
 59 |         arguments, values = getopt.getopt(argumentList, options, long_options)
 60 | 
 61 |         graph_type = None
 62 |         num_nodes = None
 63 |         seed = None
 64 |         degree = None
 65 |         k_over_2 = None
 66 |         beta = None
 67 |         file_name = None
 68 |         type_adjacency = "edges"
 69 | 
 70 |         for currentArgument, currentValue in arguments:
 71 |             if currentArgument in ("-h", "--help"):
 72 |                 print(__doc__)
 73 |                 exit(0)
 74 |             elif currentArgument in ("-g", "--graph"):
 75 |                 graph_type = currentValue
 76 |             elif currentArgument in ("-n", "--nodes"):
 77 |                 num_nodes = int(currentValue)
 78 |             elif currentArgument in ("-s", "--seed"):
 79 |                 seed = int(currentValue)
 80 |             elif currentArgument in ("-d", "--degree"):
 81 |                 degree = int(currentValue)
 82 |             elif currentArgument in ("-k", "--kover2"):
 83 |                 k_over_2 = int(currentValue)
 84 |             elif currentArgument in ("-b", "--beta"):
 85 |                 beta = float(currentValue)
 86 |             elif currentArgument in ("-f", "--file"):
 87 |                 file_name = currentValue
 88 |             elif currentArgument in ("-a", "--adjacency"):
 89 |                 type_adjacency = "adjacency"
 90 | 
 91 |         if graph_type == "Regular":
 92 |             g = Regular(num_nodes, degree, seed)
 93 |         elif graph_type == "FullyConnected":
 94 |             g = FullyConnected(num_nodes)
 95 |         elif graph_type == "Ring":
 96 |             g = Ring(num_nodes)
 97 |         elif graph_type == "SmallWorld":
 98 |             g = SmallWorld(num_nodes, k_over_2, beta)
 99 |         elif graph_type == "Star":
100 |             g = Star(num_nodes)
101 |         else:
102 |             raise ValueError("Invalid graph type: " + graph_type)
103 | 
104 |         if file_name is not None:
105 |             g.write_graph_to_file(file_name, type=type_adjacency)
106 |         else:
107 |             raise ValueError("No file name. " + __doc__)
108 |     except getopt.error as err:
109 |         print(str(err))
110 |         sys.exit(2)
111 | 


--------------------------------------------------------------------------------
/src/decentralizepy/utils.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import datetime
  3 | import json
  4 | import os
  5 | 
  6 | 
  7 | def conditional_value(var, nul, default):
  8 |     """
  9 |     Set the value to default if nul.
 10 | 
 11 |     Parameters
 12 |     ----------
 13 |     var : any
 14 |         The value
 15 |     nul : any
 16 |         The null value. Assigns default if var == nul
 17 |     default : any
 18 |         The default value
 19 | 
 20 |     Returns
 21 |     -------
 22 |     type(var)
 23 |         The final value
 24 | 
 25 |     """
 26 |     if var != nul:
 27 |         return var
 28 |     else:
 29 |         return default
 30 | 
 31 | 
 32 | def remove_keys(d, keys_to_remove):
 33 |     """
 34 |     Removes given keys from the dict. Returns a new list.
 35 | 
 36 |     Parameters
 37 |     ----------
 38 |     d : dict
 39 |         The initial dictionary
 40 |     keys_to_remove : list
 41 |         List of keys to remove from dict
 42 | 
 43 |     Returns
 44 |     -------
 45 |     dict
 46 |         A new dictionary with the given keys removed.
 47 | 
 48 |     """
 49 |     return {key: d[key] for key in d if key not in keys_to_remove}
 50 | 
 51 | 
 52 | def get_args():
 53 |     """
 54 |     Utility to parse arguments.
 55 | 
 56 |     Returns
 57 |     -------
 58 |     args
 59 |         Command line arguments
 60 | 
 61 |     """
 62 |     parser = argparse.ArgumentParser()
 63 |     parser.add_argument("-mid", "--machine_id", type=int, default=0)
 64 |     parser.add_argument("-ps", "--procs_per_machine", type=int, default=1, nargs="+")
 65 |     parser.add_argument("-ms", "--machines", type=int, default=1)
 66 |     parser.add_argument(
 67 |         "-ld",
 68 |         "--log_dir",
 69 |         type=str,
 70 |         default="./{}".format(datetime.datetime.now().isoformat(timespec="minutes")),
 71 |     )
 72 |     parser.add_argument(
 73 |         "-wsd",
 74 |         "--weights_store_dir",
 75 |         type=str,
 76 |         default="./{}_ws".format(datetime.datetime.now().isoformat(timespec="minutes")),
 77 |     )
 78 |     parser.add_argument("-is", "--iterations", type=int, default=1)
 79 |     parser.add_argument("-cf", "--config_file", type=str, default="config.ini")
 80 |     parser.add_argument("-ll", "--log_level", type=str, default="INFO")
 81 |     parser.add_argument("-gf", "--graph_file", type=str, default="36_nodes.edges")
 82 |     parser.add_argument("-gt", "--graph_type", type=str, default="edges")
 83 |     parser.add_argument("-ta", "--test_after", type=int, default=5)
 84 |     parser.add_argument("-tea", "--train_evaluate_after", type=int, default=1)
 85 |     parser.add_argument("-ro", "--reset_optimizer", type=int, default=1)
 86 |     parser.add_argument("-sm", "--server_machine", type=int, default=0)
 87 |     parser.add_argument("-sr", "--server_rank", type=int, default=-1)
 88 |     parser.add_argument("-wr", "--working_rate", type=float, default=1.0)
 89 | 
 90 |     args = parser.parse_args()
 91 |     return args
 92 | 
 93 | 
 94 | def write_args(args, path):
 95 |     """
 96 |     Write arguments to a json file
 97 | 
 98 |     Parameters
 99 |     ----------
100 |     args : args
101 |         Command line args
102 |     path : str
103 |         Location of the file to write to
104 | 
105 |     """
106 |     data = {
107 |         "machine_id": args.machine_id,
108 |         "procs_per_machine": args.procs_per_machine,
109 |         "machines": args.machines,
110 |         "log_dir": args.log_dir,
111 |         "weights_store_dir": args.weights_store_dir,
112 |         "iterations": args.iterations,
113 |         "config_file": args.config_file,
114 |         "log_level": args.log_level,
115 |         "graph_file": args.graph_file,
116 |         "graph_type": args.graph_type,
117 |         "test_after": args.test_after,
118 |         "train_evaluate_after": args.train_evaluate_after,
119 |         "reset_optimizer": args.reset_optimizer,
120 |         "working_rate": args.working_rate,
121 |     }
122 |     with open(os.path.join(path, "args.json"), "w") as of:
123 |         json.dump(data, of)
124 | 
125 | 
126 | def identity(obj):
127 |     """
128 |     Identity function
129 |     Parameters
130 |     ----------
131 |     obj
132 |         Some object
133 |     Returns
134 |     -------
135 |      obj
136 |         The same object
137 |     """
138 |     return obj
139 | 


--------------------------------------------------------------------------------
/eval/96_regular.edges:
--------------------------------------------------------------------------------
  1 | 96
  2 | 0 24
  3 | 0 1
  4 | 0 26
  5 | 0 95
  6 | 1 2
  7 | 1 0
  8 | 1 82
  9 | 1 83
 10 | 2 33
 11 | 2 90
 12 | 2 3
 13 | 2 1
 14 | 3 2
 15 | 3 4
 16 | 3 14
 17 | 3 79
 18 | 4 3
 19 | 4 12
 20 | 4 5
 21 | 4 86
 22 | 5 64
 23 | 5 42
 24 | 5 4
 25 | 5 6
 26 | 6 9
 27 | 6 5
 28 | 6 62
 29 | 6 7
 30 | 7 24
 31 | 7 8
 32 | 7 45
 33 | 7 6
 34 | 8 81
 35 | 8 17
 36 | 8 9
 37 | 8 7
 38 | 9 8
 39 | 9 10
 40 | 9 53
 41 | 9 6
 42 | 10 9
 43 | 10 11
 44 | 10 29
 45 | 10 31
 46 | 11 80
 47 | 11 10
 48 | 11 36
 49 | 11 12
 50 | 12 11
 51 | 12 4
 52 | 12 13
 53 | 12 70
 54 | 13 12
 55 | 13 53
 56 | 13 30
 57 | 13 14
 58 | 14 3
 59 | 14 15
 60 | 14 13
 61 | 14 47
 62 | 15 16
 63 | 15 26
 64 | 15 14
 65 | 16 41
 66 | 16 17
 67 | 16 15
 68 | 17 8
 69 | 17 16
 70 | 17 18
 71 | 17 83
 72 | 18 17
 73 | 18 19
 74 | 18 95
 75 | 18 63
 76 | 19 82
 77 | 19 18
 78 | 19 20
 79 | 19 22
 80 | 20 19
 81 | 20 59
 82 | 20 21
 83 | 20 22
 84 | 21 72
 85 | 21 58
 86 | 21 20
 87 | 21 22
 88 | 22 19
 89 | 22 20
 90 | 22 21
 91 | 22 23
 92 | 23 24
 93 | 23 65
 94 | 23 85
 95 | 23 22
 96 | 24 0
 97 | 24 25
 98 | 24 23
 99 | 24 7
100 | 25 32
101 | 25 24
102 | 25 26
103 | 25 38
104 | 26 0
105 | 26 25
106 | 26 27
107 | 26 15
108 | 27 32
109 | 27 26
110 | 27 28
111 | 27 63
112 | 28 27
113 | 28 92
114 | 28 29
115 | 28 39
116 | 29 10
117 | 29 52
118 | 29 28
119 | 29 30
120 | 30 66
121 | 30 29
122 | 30 13
123 | 30 31
124 | 31 32
125 | 31 10
126 | 31 36
127 | 31 30
128 | 32 25
129 | 32 27
130 | 32 31
131 | 32 33
132 | 33 32
133 | 33 2
134 | 33 84
135 | 33 34
136 | 34 33
137 | 34 50
138 | 34 35
139 | 34 93
140 | 35 57
141 | 35 34
142 | 35 43
143 | 35 36
144 | 36 35
145 | 36 11
146 | 36 37
147 | 36 31
148 | 37 88
149 | 37 36
150 | 37 38
151 | 37 79
152 | 38 25
153 | 38 37
154 | 38 39
155 | 38 49
156 | 39 40
157 | 39 28
158 | 39 77
159 | 39 38
160 | 40 41
161 | 40 91
162 | 40 39
163 | 40 87
164 | 41 16
165 | 41 40
166 | 41 42
167 | 41 51
168 | 42 41
169 | 42 43
170 | 42 5
171 | 43 42
172 | 43 35
173 | 43 44
174 | 44 72
175 | 44 43
176 | 44 75
177 | 44 45
178 | 45 67
179 | 45 44
180 | 45 46
181 | 45 7
182 | 46 76
183 | 46 45
184 | 46 54
185 | 46 47
186 | 47 48
187 | 47 65
188 | 47 14
189 | 47 46
190 | 48 56
191 | 48 49
192 | 48 61
193 | 48 47
194 | 49 48
195 | 49 50
196 | 49 38
197 | 49 71
198 | 50 49
199 | 50 34
200 | 50 51
201 | 50 93
202 | 51 41
203 | 51 50
204 | 51 52
205 | 51 95
206 | 52 51
207 | 52 74
208 | 52 53
209 | 52 29
210 | 53 9
211 | 53 52
212 | 53 13
213 | 53 54
214 | 54 75
215 | 54 53
216 | 54 46
217 | 54 55
218 | 55 56
219 | 55 69
220 | 55 85
221 | 55 54
222 | 56 48
223 | 56 57
224 | 56 69
225 | 56 55
226 | 57 56
227 | 57 89
228 | 57 58
229 | 57 35
230 | 58 57
231 | 58 59
232 | 58 21
233 | 58 86
234 | 59 73
235 | 59 58
236 | 59 20
237 | 59 60
238 | 60 62
239 | 60 59
240 | 60 61
241 | 60 78
242 | 61 48
243 | 61 62
244 | 61 60
245 | 61 94
246 | 62 60
247 | 62 61
248 | 62 6
249 | 62 63
250 | 63 64
251 | 63 18
252 | 63 27
253 | 63 62
254 | 64 65
255 | 64 84
256 | 64 5
257 | 64 63
258 | 65 64
259 | 65 66
260 | 65 23
261 | 65 47
262 | 66 65
263 | 66 89
264 | 66 67
265 | 66 30
266 | 67 80
267 | 67 66
268 | 67 68
269 | 67 45
270 | 68 67
271 | 68 92
272 | 68 69
273 | 68 94
274 | 69 56
275 | 69 68
276 | 69 70
277 | 69 55
278 | 70 90
279 | 70 12
280 | 70 69
281 | 70 71
282 | 71 72
283 | 71 49
284 | 71 70
285 | 71 87
286 | 72 73
287 | 72 44
288 | 72 21
289 | 72 71
290 | 73 72
291 | 73 91
292 | 73 59
293 | 73 74
294 | 74 73
295 | 74 75
296 | 74 52
297 | 74 76
298 | 75 74
299 | 75 44
300 | 75 54
301 | 75 76
302 | 76 74
303 | 76 75
304 | 76 77
305 | 76 46
306 | 77 81
307 | 77 76
308 | 77 78
309 | 77 39
310 | 78 88
311 | 78 60
312 | 78 77
313 | 78 79
314 | 79 80
315 | 79 3
316 | 79 37
317 | 79 78
318 | 80 81
319 | 80 67
320 | 80 11
321 | 80 79
322 | 81 8
323 | 81 82
324 | 81 80
325 | 81 77
326 | 82 81
327 | 82 1
328 | 82 83
329 | 82 19
330 | 83 1
331 | 83 82
332 | 83 84
333 | 83 17
334 | 84 64
335 | 84 33
336 | 84 83
337 | 84 85
338 | 85 84
339 | 85 55
340 | 85 86
341 | 85 23
342 | 86 58
343 | 86 4
344 | 86 85
345 | 86 87
346 | 87 40
347 | 87 88
348 | 87 86
349 | 87 71
350 | 88 89
351 | 88 37
352 | 88 78
353 | 88 87
354 | 89 88
355 | 89 57
356 | 89 66
357 | 89 90
358 | 90 89
359 | 90 2
360 | 90 91
361 | 90 70
362 | 91 40
363 | 91 73
364 | 91 90
365 | 91 92
366 | 92 93
367 | 92 91
368 | 92 68
369 | 92 28
370 | 93 50
371 | 93 34
372 | 93 94
373 | 93 92
374 | 94 93
375 | 94 68
376 | 94 61
377 | 94 95
378 | 95 0
379 | 95 18
380 | 95 51
381 | 95 94
382 | 


--------------------------------------------------------------------------------
/src/decentralizepy/compression/Quantization.py:
--------------------------------------------------------------------------------
  1 | # Quantize to [-k, k]
  2 | 
  3 | import pickle
  4 | 
  5 | import numpy as np
  6 | 
  7 | from decentralizepy.compression.Compression import Compression
  8 | 
  9 | 
 10 | class Quantization(Compression):
 11 |     """
 12 |     Compress metadata and quantize parameters
 13 | 
 14 |     """
 15 | 
 16 |     def __init__(self, float_precision: int = 2**15 - 1, *args, **kwargs):
 17 |         """
 18 |         Constructor
 19 | 
 20 |         Parameters
 21 |         ----------
 22 |         float_precision : int, optional
 23 |             Quantization parameter
 24 |         """
 25 |         super().__init__(*args, **kwargs)
 26 |         self.k = float_precision
 27 | 
 28 |     def compress_float(self, x):
 29 |         """
 30 |         compression function for float arrays
 31 | 
 32 |         Parameters
 33 |         ----------
 34 |         x : np.ndarray
 35 |             Data to compress
 36 | 
 37 |         Returns
 38 |         -------
 39 |         bytearray
 40 |             encoded data as bytes
 41 | 
 42 |         """
 43 | 
 44 |         # Compute scale factor
 45 |         scale_factor = np.mean(np.abs(x)) / self.k
 46 |         # scale_factor = np.max(np.abs(x)) / self.k
 47 | 
 48 |         # Normalize x to [-k, k]
 49 |         norm_factor = np.max(np.abs(x)) / self.k
 50 |         x = x / norm_factor
 51 |         x = x.round().astype(np.int32)
 52 | 
 53 |         # Get the maximum absolute value from the input array
 54 |         max_abs = np.max(np.abs(x))
 55 | 
 56 |         # Get the nearest power of 2 greater than equal to max_abs
 57 |         nearest_pow_2 = 2 ** np.ceil(np.log2(max_abs))
 58 | 
 59 |         # Check if nearest_pow_2 is the same as max_abs
 60 |         if nearest_pow_2 == max_abs:
 61 |             nearest_pow_2 = nearest_pow_2 * 2
 62 | 
 63 |         # Calculate the number of bits required to represent the nearest power of 2
 64 |         num_bits = int(np.ceil(np.log2(nearest_pow_2))) + 1
 65 | 
 66 |         # Make all numbers of x positive
 67 |         x = x + nearest_pow_2 - 1
 68 | 
 69 |         x = np.asarray(x, dtype=np.uint32)
 70 | 
 71 |         # Create a numpy array of shape (x.shape, num_bits) and fill it with zeros
 72 |         bit_rep = np.zeros((x.shape[0], num_bits), dtype=np.uint8)
 73 | 
 74 |         # Iterate over x and convert each number to binary
 75 |         for i in range(len(x)):
 76 |             str_bit = np.binary_repr(x[i], width=num_bits)
 77 |             array_bit = np.array(list(str_bit), dtype=np.uint8)
 78 |             indices_with_1 = np.where(array_bit == 1)[0]
 79 |             bit_rep[i][indices_with_1] = 1
 80 | 
 81 |         bit_rep = bit_rep.reshape(-1)
 82 | 
 83 |         # Pack the bits into minimum number of bytes
 84 |         intermediate_rep = np.packbits(bit_rep, bitorder="little")
 85 |         padding = np.array([0], dtype=np.uint8)
 86 |         if bit_rep.shape[0] % 8:
 87 |             padding = np.array([8 - (bit_rep.shape[0] % 8)], dtype=np.uint8)
 88 |         num_bits = np.array([num_bits], dtype=np.uint8)
 89 |         to_send = np.concatenate((padding, num_bits, intermediate_rep), dtype=np.uint8)
 90 | 
 91 |         return pickle.dumps((scale_factor, to_send))
 92 | 
 93 |     def decompress_float(self, bytes):
 94 |         """
 95 |         decompression function for compressed float arrays
 96 | 
 97 |         Parameters
 98 |         ----------
 99 |         bytes :bytearray
100 |             compressed data
101 | 
102 |         Returns
103 |         -------
104 |         np.ndarray
105 |             decompressed data as array
106 | 
107 |         """
108 |         # Extract scale_factor and x from bytes
109 |         scale_factor, x = pickle.loads(bytes)
110 | 
111 |         # Extract padding and num_bits from x
112 |         padding = -x[0].item() if x[0].item() else None
113 |         num_bits = x[1].item()
114 |         rest_of_x = x[2:].astype(np.uint8)
115 | 
116 |         # Unpack rest_of_x and reshape it
117 |         received_x = np.unpackbits(rest_of_x, bitorder="little", count=padding)
118 |         received_x = received_x.reshape((-1, num_bits)).astype(np.uint8)
119 | 
120 |         # Initialize an unit8 array with the same number of rows as received_x
121 |         output = np.zeros(received_x.shape[0], dtype=np.int32)
122 | 
123 |         # Convert each row into an integer
124 |         for i in range(received_x.shape[0]):
125 |             output[i] = (
126 |                 int("".join(received_x[i].astype(str)), 2) - (2 ** (num_bits - 1)) + 1
127 |             )
128 | 
129 |         # Denormalize the output
130 |         output = output * scale_factor
131 | 
132 |         return output.astype(np.float32)
133 | 


--------------------------------------------------------------------------------
/eval/plot_percentile.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import sys
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | import torch
  8 | from matplotlib import pyplot as plt
  9 | 
 10 | 
 11 | def get_stats(l):
 12 |     assert len(l) > 0
 13 |     mean_dict, stdev_dict, min_dict, max_dict = {}, {}, {}, {}
 14 |     for key in l[0].keys():
 15 |         all_nodes = [i[key] for i in l]
 16 |         all_nodes = np.array(all_nodes)
 17 |         mean = np.mean(all_nodes)
 18 |         std = np.std(all_nodes)
 19 |         min = np.min(all_nodes)
 20 |         max = np.max(all_nodes)
 21 |         mean_dict[int(key)] = mean
 22 |         stdev_dict[int(key)] = std
 23 |         min_dict[int(key)] = min
 24 |         max_dict[int(key)] = max
 25 |     return mean_dict, stdev_dict, min_dict, max_dict
 26 | 
 27 | 
 28 | def plot(means, stdevs, mins, maxs, title, label, loc):
 29 |     plt.title(title)
 30 |     plt.xlabel("communication rounds")
 31 |     x_axis = list(means.keys())
 32 |     y_axis = list(means.values())
 33 |     err = list(stdevs.values())
 34 |     plt.errorbar(x_axis, y_axis, yerr=err, label=label)
 35 |     plt.legend(loc=loc)
 36 | 
 37 | 
 38 | def plot_results(path):
 39 |     """
 40 |     plots the percentiles
 41 |     Based on plot.py
 42 |     Parameters
 43 |     ----------
 44 |     path
 45 |         path to the folders from which to create the percentiles plots
 46 | 
 47 |     """
 48 |     folders = os.listdir(path)
 49 |     folders.sort()
 50 |     print("Reading folders from: ", path)
 51 |     print("Folders: ", folders)
 52 |     for folder in folders:
 53 |         folder_path = os.path.join(path, folder)
 54 |         if not os.path.isdir(folder_path):
 55 |             continue
 56 |         results = []
 57 |         all_shared_params = []
 58 |         machine_folders = os.listdir(folder_path)
 59 |         for machine_folder in machine_folders:
 60 |             mf_path = os.path.join(folder_path, machine_folder)
 61 |             if not os.path.isdir(mf_path):
 62 |                 continue
 63 |             files = os.listdir(mf_path)
 64 |             shared_params = [f for f in files if f.endswith("_shared_parameters.json")]
 65 |             files = [f for f in files if f.endswith("_results.json")]
 66 |             for f in files:
 67 |                 filepath = os.path.join(mf_path, f)
 68 |                 with open(filepath, "r") as inf:
 69 |                     results.append(json.load(inf))
 70 |             for sp in shared_params:
 71 |                 filepath = os.path.join(mf_path, sp)
 72 |                 with open(filepath, "r") as spf:
 73 |                     all_shared_params.append(np.array(json.load(spf), dtype=np.int32))
 74 | 
 75 |         # Plot Training loss
 76 |         plt.figure(1)
 77 |         # Average of the shared parameters
 78 |         mean = np.mean(all_shared_params, axis=0)
 79 |         std = np.std(all_shared_params, axis=0)
 80 |         with open(
 81 |             os.path.join(path, "shared_params_avg_" + folder + ".json"), "w"
 82 |         ) as mf:
 83 |             json.dump(mean.tolist(), mf)
 84 | 
 85 |         with open(
 86 |             os.path.join(path, "shared_params_std_" + folder + ".json"), "w"
 87 |         ) as sf:
 88 |             json.dump(std.tolist(), sf)
 89 | 
 90 |         # copy jupyter notebook code
 91 |         percentile = np.percentile(mean, np.arange(0, 100, 1))
 92 |         plt.plot(np.arange(0, 100, 1), percentile, label=folder)
 93 |         plt.title("Shared parameters Percentiles")
 94 |         # plt.ylabel("Absolute frequency value")
 95 |         plt.xlabel("Percentiles")
 96 |         plt.xticks(np.arange(0, 110, 10))
 97 |         plt.legend(loc="lower right")
 98 | 
 99 |         plt.figure(2)
100 |         sort = torch.sort(torch.tensor(mean)).values
101 |         print(sort)
102 |         length = sort.shape[0]
103 |         length = int(length / 20)
104 |         bins = [
105 |             torch.sum(sort[length * i : length * (i + 1)]).item() for i in range(20)
106 |         ]
107 |         total = np.sum(bins)
108 |         perc = bins / total  # np.divide(bins, total)
109 |         print(perc)
110 |         plt.bar(np.arange(0, 97.5, 5), perc, width=5, align="edge", label=folder)
111 | 
112 |         plt.title("Shared parameters Percentiles")
113 |         # plt.ylabel("Absolute frequency value")
114 |         plt.xlabel("Percentiles")
115 |         plt.legend(loc="lower right")
116 |         plt.savefig(os.path.join(path, f"percentiles_histogram_{folder}.png"), dpi=300)
117 |         plt.clf()
118 |         plt.cla()
119 | 
120 |     plt.figure(1)
121 |     plt.savefig(os.path.join(path, "percentiles.png"), dpi=300)
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     assert len(sys.argv) == 2
126 |     plot_results(sys.argv[1])
127 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | .. image:: https://upload.wikimedia.org/wikipedia/commons/f/f4/Logo_EPFL.svg
  2 |    :alt: EPFL logo
  3 |    :width: 75px
  4 |    :align: right
  5 | 
  6 | ==============
  7 | decentralizepy
  8 | ==============
  9 | 
 10 | decentralizepy is a framework for running distributed applications (particularly ML) on top of arbitrary topologies (decentralized, federated, parameter server).
 11 | It was primarily conceived for assessing scientific ideas on several aspects of distributed learning (communication efficiency, privacy, data heterogeneity etc.).
 12 | 
 13 | -------------------------
 14 | Setting up decentralizepy
 15 | -------------------------
 16 | 
 17 | * Fork the repository.
 18 | * Clone and enter your local repository.
 19 | * Check if you have ``python>=3.8``. ::
 20 | 
 21 |     python --version
 22 | 
 23 | * (Optional) Create and activate a virtual environment. ::
 24 |   
 25 |     python3 -m venv [venv-name]
 26 |     source [venv-name]/bin/activate
 27 | 
 28 | * Update pip. ::
 29 | 
 30 |     pip3 install --upgrade pip
 31 |     pip install --upgrade pip
 32 | 
 33 | * On Mac M1, installing ``pyzmq`` fails with `pip`. Use `conda <https://conda.io>`_.
 34 | * Install decentralizepy for development. (zsh) ::
 35 | 
 36 |     pip3 install --editable .\[dev\]
 37 |     
 38 | * Install decentralizepy for development. (bash) ::
 39 | 
 40 |     pip3 install --editable .[dev]
 41 | 
 42 | * Download CIFAR-10 using ``download_dataset.py``. ::
 43 | 
 44 |     python download_dataset.py
 45 | 
 46 | * (Optional) Download other datasets from LEAF <https://github.com/TalwalkarLab/leaf> and place them in ``eval/data/``.
 47 |  
 48 | ----------------
 49 | Running the code
 50 | ----------------
 51 | 
 52 | * Follow the tutorial in ``tutorial/``. OR,
 53 | * Generate a new graph file with the required topology using ``generate_graph.py``. ::
 54 | 
 55 |     python generate_graph.py --help
 56 | 
 57 | * Choose and modify one of the config files in ``eval/{step,epoch}_configs``.
 58 | * Modify the dataset paths and ``addresses_filepath`` in the config file.
 59 | * In eval/run.sh, modify arguments as required.
 60 | * Execute eval/run.sh on all the machines simultaneously. There is a synchronization barrier mechanism at the start so that all processes start training together.
 61 | 
 62 | ------
 63 | Citing
 64 | ------
 65 | 
 66 | Cite us as ::
 67 | 
 68 |     @inproceedings{decentralizepy,
 69 |    author = {Dhasade, Akash and Kermarrec, Anne-Marie and Pires, Rafael and Sharma, Rishi and Vujasinovic, Milos},
 70 |    title = {Decentralized Learning Made Easy with DecentralizePy},
 71 |    year = {2023},
 72 |    isbn = {9798400700842},
 73 |    publisher = {Association for Computing Machinery},
 74 |    address = {New York, NY, USA},
 75 |    url = {https://doi.org/10.1145/3578356.3592587},
 76 |    doi = {10.1145/3578356.3592587},
 77 |    booktitle = {Proceedings of the 3rd Workshop on Machine Learning and Systems},
 78 |    pages = {34–41},
 79 |    numpages = {8},
 80 |    keywords = {peer-to-peer, distributed systems, machine learning, middleware, decentralized learning, network topology},
 81 |    location = {Rome, Italy},
 82 |    series = {EuroMLSys '23}
 83 |    }
 84 | 
 85 | -------------------------
 86 | Built with DecentralizePy
 87 | -------------------------
 88 | 
 89 | .. _`Epidemic Learning`: https://arxiv.org/abs/2310.01972/
 90 | 
 91 | `Epidemic Learning`_
 92 | --------------------
 93 | 
 94 | Tutorial
 95 |     ``tutorial/EpidemicLearning``
 96 | Source files
 97 |     ``src/node/EpidemicLearning/``
 98 | Cite
 99 |     ``Martijn de Vos, Sadegh Farhadkhani, Rachid Guerraoui, Anne-Marie Kermarrec, Rafael Pires, and Rishi Sharma. Epidemic Learning: Boosting Decentralized Learning with Randomized Communication. In Thirty-seventh Conference on Neural Information Processing Systems (NeurIPS), 2023.``
100 | 
101 | .. _`Get More for Less in Decentralized Learning Systems`: https://ieeexplore.ieee.org/document/10272515/
102 | 
103 | `Get More for Less in Decentralized Learning Systems`_
104 | ------------------------------------------------------
105 | 
106 | Tutorial
107 |     ``tutorial/JWINS``
108 | Source files
109 |     ``src/sharing/JWINS/``
110 | Cite
111 |     ``Akash Dhasade, Anne-Marie Kermarrec, Rafael Pires, Rishi Sharma, Jeffrey Wigger, and Milos Vujasinovic. Get More for Less in Decentralized Learning Systems. In IEEE 43rd International Conference on Distributed Computing Systems (ICDCS), 2023.``
112 | 
113 | 
114 | ------------
115 | Contributing
116 | ------------
117 | 
118 | * ``isort`` and ``black`` are installed along with the package for code linting.
119 | * While in the root directory of the repository, before committing the changes, please run ::
120 | 
121 |     black .
122 |     isort .
123 | 
124 | -------
125 | Modules
126 | -------
127 | 
128 | Following are the modules of decentralizepy:
129 | 
130 | Node
131 | ----
132 | * The Manager. Optimizations at process level.
133 | 
134 | Dataset
135 | -------
136 | * Static
137 | 
138 | Training
139 | --------
140 | * Heterogeneity. How much do I want to work?
141 | 
142 | Graph
143 | -----
144 | * Static. Who are my neighbours? Topologies.
145 | 
146 | Mapping
147 | -------
148 | * Naming. The globally unique ids of the ``processes <-> machine_id, local_rank``
149 | 
150 | Sharing
151 | -------
152 | * Leverage Redundancy. Privacy. Optimizations in model and data sharing.
153 | 
154 | Communication
155 | -------------
156 | * IPC/Network level. Compression. Privacy. Reliability
157 | 
158 | Model
159 | -----
160 | * Learning Model
161 | 


--------------------------------------------------------------------------------
/src/decentralizepy/node/DPSGDWithPeerSampler.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import math
  3 | import os
  4 | from collections import deque
  5 | 
  6 | import torch
  7 | 
  8 | from decentralizepy.graphs.Graph import Graph
  9 | from decentralizepy.mappings.Mapping import Mapping
 10 | from decentralizepy.node.DPSGDNode import DPSGDNode
 11 | 
 12 | 
 13 | class DPSGDWithPeerSampler(DPSGDNode):
 14 |     """
 15 |     This class defines the node for DPSGD
 16 | 
 17 |     """
 18 | 
 19 |     def receive_neighbors(self):
 20 |         return self.receive_channel("PEERS")[1]["NEIGHBORS"]
 21 | 
 22 |     def get_neighbors(self, node=None):
 23 |         logging.debug("Requesting neighbors from the peer sampler.")
 24 |         self.communication.send(
 25 |             self.peer_sampler_uid,
 26 |             {
 27 |                 "REQUEST_NEIGHBORS": self.uid,
 28 |                 "iteration": self.iteration,
 29 |                 "CHANNEL": "SERVER_REQUEST",
 30 |             },
 31 |         )
 32 |         my_neighbors = self.receive_neighbors()
 33 |         logging.debug("Neighbors this round: {}".format(my_neighbors))
 34 |         return my_neighbors
 35 | 
 36 |     def __init__(
 37 |         self,
 38 |         rank: int,
 39 |         machine_id: int,
 40 |         mapping: Mapping,
 41 |         graph: Graph,
 42 |         config,
 43 |         iterations=1,
 44 |         log_dir=".",
 45 |         weights_store_dir=".",
 46 |         log_level=logging.INFO,
 47 |         test_after=5,
 48 |         train_evaluate_after=1,
 49 |         reset_optimizer=1,
 50 |         peer_sampler_uid=-1,
 51 |         *args
 52 |     ):
 53 |         """
 54 |         Constructor
 55 | 
 56 |         Parameters
 57 |         ----------
 58 |         rank : int
 59 |             Rank of process local to the machine
 60 |         machine_id : int
 61 |             Machine ID on which the process in running
 62 |         mapping : decentralizepy.mappings
 63 |             The object containing the mapping rank <--> uid
 64 |         graph : decentralizepy.graphs
 65 |             The object containing the global graph
 66 |         config : dict
 67 |             A dictionary of configurations. Must contain the following:
 68 |             [DATASET]
 69 |                 dataset_package
 70 |                 dataset_class
 71 |                 model_class
 72 |             [OPTIMIZER_PARAMS]
 73 |                 optimizer_package
 74 |                 optimizer_class
 75 |             [TRAIN_PARAMS]
 76 |                 training_package = decentralizepy.training.Training
 77 |                 training_class = Training
 78 |                 epochs_per_round = 25
 79 |                 batch_size = 64
 80 |         iterations : int
 81 |             Number of iterations (communication steps) for which the model should be trained
 82 |         log_dir : str
 83 |             Logging directory
 84 |         weights_store_dir : str
 85 |             Directory in which to store model weights
 86 |         log_level : logging.Level
 87 |             One of DEBUG, INFO, WARNING, ERROR, CRITICAL
 88 |         test_after : int
 89 |             Number of iterations after which the test loss and accuracy arecalculated
 90 |         train_evaluate_after : int
 91 |             Number of iterations after which the train loss is calculated
 92 |         reset_optimizer : int
 93 |             1 if optimizer should be reset every communication round, else 0
 94 |         args : optional
 95 |             Other arguments
 96 | 
 97 |         """
 98 | 
 99 |         total_threads = os.cpu_count()
100 |         self.threads_per_proc = max(
101 |             math.floor(total_threads / mapping.get_local_procs_count()), 1
102 |         )
103 |         torch.set_num_threads(self.threads_per_proc)
104 |         torch.set_num_interop_threads(1)
105 |         self.instantiate(
106 |             rank,
107 |             machine_id,
108 |             mapping,
109 |             graph,
110 |             config,
111 |             iterations,
112 |             log_dir,
113 |             weights_store_dir,
114 |             log_level,
115 |             test_after,
116 |             train_evaluate_after,
117 |             reset_optimizer,
118 |             *args
119 |         )
120 |         logging.info(
121 |             "Each proc uses %d threads out of %d.", self.threads_per_proc, total_threads
122 |         )
123 | 
124 |         self.message_queue["PEERS"] = deque()
125 | 
126 |         self.peer_sampler_uid = peer_sampler_uid
127 |         self.connect_neighbor(self.peer_sampler_uid)
128 |         self.wait_for_hello(self.peer_sampler_uid)
129 | 
130 |         self.run()
131 | 
132 |     def disconnect_neighbors(self):
133 |         """
134 |         Disconnects all neighbors.
135 | 
136 |         Raises
137 |         ------
138 |         RuntimeError
139 |             If received another message while waiting for BYEs
140 | 
141 |         """
142 |         if not self.sent_disconnections:
143 |             logging.info("Disconnecting neighbors")
144 | 
145 |             if self.peer_sampler_uid in self.barrier:
146 |                 self.communication.send(
147 |                     self.peer_sampler_uid,
148 |                     {"BYE": self.uid, "CHANNEL": "SERVER_REQUEST"},
149 |                 )
150 |                 self.barrier.remove(self.peer_sampler_uid)
151 | 
152 |             for uid in self.barrier:
153 |                 self.communication.send(uid, {"BYE": self.uid, "CHANNEL": "DISCONNECT"})
154 |             self.sent_disconnections = True
155 | 
156 |             while len(self.barrier):
157 |                 sender, _ = self.receive_disconnect()
158 |                 self.barrier.remove(sender)
159 | 


--------------------------------------------------------------------------------
/src/decentralizepy/datasets/Dataset.py:
--------------------------------------------------------------------------------
  1 | from decentralizepy import utils
  2 | from decentralizepy.mappings.Mapping import Mapping
  3 | 
  4 | 
  5 | class Dataset:
  6 |     """
  7 |     This class defines the Dataset API.
  8 |     All datasets must follow this API.
  9 | 
 10 |     """
 11 | 
 12 |     def __init__(
 13 |         self,
 14 |         rank: int,
 15 |         machine_id: int,
 16 |         mapping: Mapping,
 17 |         random_seed: int = 1234,
 18 |         only_local=False,
 19 |         train_dir="",
 20 |         test_dir="",
 21 |         sizes="",
 22 |         test_batch_size="",
 23 |         validation_source="",
 24 |         validation_size="",
 25 |     ):
 26 |         """
 27 |         Constructor which reads the data files, instantiates and partitions the dataset
 28 | 
 29 |         Parameters
 30 |         ----------
 31 |         rank : int
 32 |             Rank of the current process (to get the partition).
 33 |         machine_id : int
 34 |             Machine ID
 35 |         mapping : decentralizepy.mappings.Mapping
 36 |             Mapping to convert rank, machine_id -> uid for data partitioning
 37 |             It also provides the total number of global processes
 38 |         random_seed : int, optional
 39 |             Random seed for the dataset
 40 |         only_local : bool, optional
 41 |             True if the dataset needs to be partioned only among local procs, False otherwise
 42 |         train_dir : str, optional
 43 |             Path to the training data files. Required to instantiate the training set
 44 |             The training set is partitioned according to the number of global processes and sizes
 45 |         test_dir : str. optional
 46 |             Path to the testing data files Required to instantiate the testing set
 47 |         sizes : list(int), optional
 48 |             A list of fractions specifying how much data to alot each process. Sum of fractions should be 1.0
 49 |             By default, each process gets an equal amount.
 50 |         test_batch_size : int, optional
 51 |             Batch size during testing. Default value is 64
 52 |         validation_source : str, optional
 53 |             Source of the validation set. Can be one of 'train' or 'test'
 54 |         validation_size : int, optional
 55 |             size of the test set used as validation set
 56 |         """
 57 |         self.rank = rank
 58 |         self.machine_id = machine_id
 59 |         self.mapping = mapping
 60 |         self.random_seed = random_seed
 61 |         self.uid = self.mapping.get_uid(rank, machine_id)
 62 |         self.only_local = only_local
 63 |         self.dataset_id = self.rank if self.only_local else self.uid
 64 |         self.num_partitions = (
 65 |             self.mapping.get_local_procs_count()
 66 |             if self.only_local
 67 |             else self.mapping.get_n_procs()
 68 |         )
 69 |         self.train_dir = utils.conditional_value(train_dir, "", None)
 70 |         self.test_dir = utils.conditional_value(test_dir, "", None)
 71 |         self.sizes = utils.conditional_value(sizes, "", None)
 72 |         self.test_batch_size = utils.conditional_value(test_batch_size, "", 64)
 73 |         self.num_classes = None
 74 |         self.validation_size = utils.conditional_value(validation_size, "", None)
 75 |         self.validation_source = utils.conditional_value(validation_source, "", None)
 76 | 
 77 |         if self.sizes:
 78 |             if type(self.sizes) == str:
 79 |                 self.sizes = eval(self.sizes)
 80 | 
 81 |         if train_dir:
 82 |             self.__training__ = True
 83 |         else:
 84 |             self.__training__ = False
 85 | 
 86 |         if test_dir:
 87 |             self.__testing__ = True
 88 |         else:
 89 |             self.__testing__ = False
 90 | 
 91 |         if self.validation_size and self.validation_source:
 92 |             self.__validating__ = True
 93 |         else:
 94 |             self.__validating__ = False
 95 | 
 96 |         self.label_distribution = None
 97 | 
 98 |     def get_label_distribution(self):
 99 |         # Only supported for classification
100 |         if self.label_distribution == None:
101 |             self.label_distribution = [0 for _ in range(self.num_classes)]
102 |             tr_set = self.get_trainset()
103 |             for _, ys in tr_set:
104 |                 for y in ys:
105 |                     y_val = y.item()
106 |                     self.label_distribution[y_val] += 1
107 | 
108 |         return self.label_distribution
109 | 
110 |     def get_trainset(self):
111 |         """
112 |         Function to get the training set
113 | 
114 |         Returns
115 |         -------
116 |         torch.utils.Dataset(decentralizepy.datasets.Data)
117 | 
118 |         Raises
119 |         ------
120 |         RuntimeError
121 |             If the training set was not initialized
122 | 
123 |         """
124 |         raise NotImplementedError
125 | 
126 |     def get_testset(self):
127 |         """
128 |         Function to get the test set
129 | 
130 |         Returns
131 |         -------
132 |         torch.utils.Dataset(decentralizepy.datasets.Data)
133 | 
134 |         Raises
135 |         ------
136 |         RuntimeError
137 |             If the test set was not initialized
138 | 
139 |         """
140 |         raise NotImplementedError
141 | 
142 |     def get_validationset(self):
143 |         """
144 |         Function to get the test set
145 | 
146 |         Returns
147 |         -------
148 |         torch.utils.Dataset(decentralizepy.datasets.Data)
149 | 
150 |         Raises
151 |         ------
152 |         RuntimeError
153 |             If the test set was not initialized
154 | 
155 |         """
156 |         raise NotImplementedError
157 | 


--------------------------------------------------------------------------------
/src/decentralizepy/graphs/Graph.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx
  2 | import numpy as np
  3 | 
  4 | 
  5 | class Graph:
  6 |     """
  7 |     This class defines the graph topology.
  8 |     Adapted from https://gitlab.epfl.ch/sacs/ml-rawdatasharing/dnn-recommender/-/blob/master/api.py
  9 |     """
 10 | 
 11 |     def __init__(self, n_procs=None):
 12 |         """
 13 |         Constructor
 14 | 
 15 |         Parameters
 16 |         ----------
 17 |         n_procs : int, optional
 18 |             Number of processes in the graph, if already known
 19 | 
 20 |         """
 21 |         if n_procs != None:
 22 |             self.n_procs = n_procs
 23 |             self.adj_list = [set() for i in range(self.n_procs)]
 24 | 
 25 |     def get_all_nodes(self):
 26 |         return [i for i in range(self.n_procs)]
 27 | 
 28 |     def __insert_adj__(self, node, neighbours):
 29 |         """
 30 |         Inserts `neighbours` into the adjacency list of `node`
 31 | 
 32 |         Parameters
 33 |         ----------
 34 |         node : int
 35 |             The vertex in question
 36 |         neighbours : list(int)
 37 |             A list of neighbours of the `node`
 38 | 
 39 |         """
 40 |         self.adj_list[node].update(neighbours)
 41 | 
 42 |     def __insert_edge__(self, x, y):
 43 |         """
 44 |         Inserts edge `x -> y` into the graph
 45 | 
 46 |         Parameters
 47 |         ----------
 48 |         x : int
 49 |             The source vertex
 50 |         y : int
 51 |             The destination vertex
 52 | 
 53 |         """
 54 |         self.adj_list[x].add(y)
 55 |         self.adj_list[y].add(x)
 56 | 
 57 |     def read_graph_from_file(self, file, type="edges", force_connect=False):
 58 |         """
 59 |         Reads the graph from a given file
 60 | 
 61 |         Parameters
 62 |         ----------
 63 |         file : str
 64 |             path to the file
 65 |         type : str
 66 |             `edges` or `adjacency`
 67 |         force_connect : bool, optional
 68 |             Should the graph be force-connected using a ring
 69 | 
 70 |         Returns
 71 |         -------
 72 |         int
 73 |             Number of processes, read from the first line of the file
 74 | 
 75 |         Raises
 76 |         ------
 77 |         ValueError
 78 |             If the type is not either `edges` or `adjacency`
 79 | 
 80 |         """
 81 | 
 82 |         with open(file, "r") as inf:
 83 |             self.n_procs = int(inf.readline().strip())
 84 |             self.adj_list = [set() for i in range(self.n_procs)]
 85 | 
 86 |             lines = inf.readlines()
 87 |             if type == "edges":
 88 |                 for line in lines:
 89 |                     x, y = map(int, line.strip().split())
 90 |                     self.__insert_edge__(x, y)
 91 |             elif type == "adjacency":
 92 |                 node_id = 0
 93 |                 for line in lines:
 94 |                     neighbours = map(int, line.strip().split())
 95 |                     self.__insert_adj__(node_id, neighbours)
 96 |                     node_id += 1
 97 |             else:
 98 |                 raise ValueError("type must be from {edges, adjacency}!")
 99 | 
100 |         if force_connect:
101 |             self.connect_graph()
102 | 
103 |         return self.n_procs
104 | 
105 |     def write_graph_to_file(self, file, type="edges"):
106 |         """
107 |         Writes graph to file
108 | 
109 |         Parameters
110 |         ----------
111 |         file : str
112 |             File path
113 |         type : str
114 |             One of {"edges", "adjacency"}. Writes the corresponding format.
115 | 
116 |         """
117 |         with open(file, "w") as of:
118 |             of.write(str(self.n_procs) + "\n")
119 |             if type == "edges":
120 |                 for node, adj in enumerate(self.adj_list):
121 |                     for neighbor in adj:
122 |                         of.write("{} {}".format(node, neighbor) + "\n")
123 |             elif type == "adjacency":
124 |                 for adj in self.adj_list:
125 |                     of.write(str(*adj) + "\n")
126 |             else:
127 |                 raise ValueError("type must be from {edges, adjacency}!")
128 | 
129 |     def connect_graph(self):
130 |         """
131 |         Connects the graph using a Ring
132 | 
133 |         """
134 |         for node in range(self.n_procs):
135 |             self.adj_list[node].add((node + 1) % self.n_procs)
136 |             self.adj_list[node].add((node - 1) % self.n_procs)
137 | 
138 |     def neighbors(self, uid):
139 |         """
140 |         Gives the neighbors of a node
141 | 
142 |         Parameters
143 |         ----------
144 |         uid : int
145 |             globally unique identifier of the node
146 | 
147 |         Returns
148 |         -------
149 |         set(int)
150 |             a set of neighbours
151 | 
152 |         """
153 |         return self.adj_list[uid]
154 | 
155 |     def centr(self):
156 |         my_adj = {x: list(adj) for x, adj in enumerate(self.adj_list)}
157 |         nxGraph = nx.Graph(my_adj)
158 |         a = nx.to_numpy_matrix(nxGraph)
159 |         self.averaging_weights = np.ones((self.n_procs, self.n_procs), dtype=float)
160 |         centrality = nx.betweenness_centrality(nxGraph)
161 |         for i in range(len(centrality)):
162 |             centrality[i] += 0.01
163 |         for i in range(self.averaging_weights.shape[0]):
164 |             s = 0
165 |             for j in range(self.averaging_weights.shape[0]):
166 |                 self.averaging_weights[i, j] = 1.0 / centrality[j]
167 |                 s += self.averaging_weights[i, j]
168 |             for j in range(self.averaging_weights.shape[0]):
169 |                 self.averaging_weights[i, j] = self.averaging_weights[i, j] / s
170 |         return self.averaging_weights
171 | 


--------------------------------------------------------------------------------
/eval/80_nodes.edges:
--------------------------------------------------------------------------------
  1 | 80
  2 | 0 1
  3 | 0 3
  4 | 0 60
  5 | 0 47
  6 | 0 79
  7 | 0 21
  8 | 0 53
  9 | 0 28
 10 | 1 0
 11 | 1 2
 12 | 1 70
 13 | 1 20
 14 | 1 61
 15 | 1 30
 16 | 2 33
 17 | 2 1
 18 | 2 3
 19 | 2 4
 20 | 2 68
 21 | 3 0
 22 | 3 2
 23 | 3 4
 24 | 3 14
 25 | 3 19
 26 | 3 20
 27 | 3 25
 28 | 3 59
 29 | 4 2
 30 | 4 3
 31 | 4 5
 32 | 4 78
 33 | 4 16
 34 | 5 4
 35 | 5 6
 36 | 5 7
 37 | 5 10
 38 | 5 76
 39 | 5 79
 40 | 5 24
 41 | 5 60
 42 | 6 17
 43 | 6 26
 44 | 6 5
 45 | 6 7
 46 | 7 5
 47 | 7 6
 48 | 7 8
 49 | 7 54
 50 | 7 61
 51 | 8 7
 52 | 8 9
 53 | 8 16
 54 | 8 19
 55 | 8 57
 56 | 8 63
 57 | 9 68
 58 | 9 8
 59 | 9 10
 60 | 9 77
 61 | 9 54
 62 | 9 24
 63 | 9 27
 64 | 9 30
 65 | 10 69
 66 | 10 5
 67 | 10 9
 68 | 10 11
 69 | 10 45
 70 | 10 47
 71 | 10 55
 72 | 11 10
 73 | 11 19
 74 | 11 12
 75 | 11 71
 76 | 12 37
 77 | 12 74
 78 | 12 11
 79 | 12 13
 80 | 12 63
 81 | 13 65
 82 | 13 71
 83 | 13 39
 84 | 13 76
 85 | 13 45
 86 | 13 14
 87 | 13 12
 88 | 13 51
 89 | 13 53
 90 | 13 54
 91 | 13 63
 92 | 14 32
 93 | 14 64
 94 | 14 3
 95 | 14 70
 96 | 14 13
 97 | 14 15
 98 | 14 51
 99 | 14 23
100 | 14 60
101 | 15 69
102 | 15 40
103 | 15 44
104 | 15 14
105 | 15 16
106 | 15 19
107 | 15 53
108 | 15 22
109 | 15 27
110 | 16 4
111 | 16 70
112 | 16 8
113 | 16 77
114 | 16 15
115 | 16 17
116 | 16 60
117 | 16 31
118 | 17 32
119 | 17 6
120 | 17 40
121 | 17 16
122 | 17 18
123 | 18 32
124 | 18 46
125 | 18 17
126 | 18 19
127 | 18 20
128 | 18 24
129 | 19 3
130 | 19 8
131 | 19 11
132 | 19 75
133 | 19 78
134 | 19 15
135 | 19 18
136 | 19 20
137 | 19 21
138 | 19 55
139 | 19 58
140 | 20 1
141 | 20 65
142 | 20 3
143 | 20 70
144 | 20 18
145 | 20 19
146 | 20 21
147 | 20 22
148 | 21 0
149 | 21 38
150 | 21 46
151 | 21 19
152 | 21 20
153 | 21 22
154 | 21 24
155 | 22 15
156 | 22 51
157 | 22 20
158 | 22 21
159 | 22 23
160 | 22 63
161 | 23 36
162 | 23 14
163 | 23 79
164 | 23 22
165 | 23 24
166 | 24 5
167 | 24 38
168 | 24 37
169 | 24 9
170 | 24 18
171 | 24 21
172 | 24 23
173 | 24 25
174 | 24 61
175 | 25 66
176 | 25 3
177 | 25 58
178 | 25 40
179 | 25 24
180 | 25 26
181 | 26 6
182 | 26 53
183 | 26 25
184 | 26 27
185 | 26 29
186 | 27 71
187 | 27 9
188 | 27 15
189 | 27 48
190 | 27 52
191 | 27 54
192 | 27 26
193 | 27 28
194 | 27 62
195 | 28 0
196 | 28 40
197 | 28 46
198 | 28 56
199 | 28 27
200 | 28 29
201 | 29 48
202 | 29 26
203 | 29 28
204 | 29 30
205 | 30 65
206 | 30 1
207 | 30 34
208 | 30 68
209 | 30 9
210 | 30 29
211 | 30 31
212 | 31 16
213 | 31 32
214 | 31 43
215 | 31 30
216 | 32 33
217 | 32 38
218 | 32 41
219 | 32 44
220 | 32 14
221 | 32 17
222 | 32 18
223 | 32 57
224 | 32 31
225 | 33 32
226 | 33 2
227 | 33 34
228 | 33 69
229 | 33 54
230 | 33 63
231 | 34 38
232 | 34 33
233 | 34 35
234 | 34 30
235 | 35 34
236 | 35 36
237 | 35 38
238 | 35 39
239 | 35 46
240 | 36 35
241 | 36 37
242 | 36 42
243 | 36 46
244 | 36 54
245 | 36 23
246 | 37 36
247 | 37 38
248 | 37 12
249 | 37 76
250 | 37 24
251 | 38 32
252 | 38 34
253 | 38 35
254 | 38 37
255 | 38 39
256 | 38 21
257 | 38 24
258 | 39 40
259 | 39 35
260 | 39 13
261 | 39 38
262 | 40 39
263 | 40 41
264 | 40 15
265 | 40 17
266 | 40 55
267 | 40 25
268 | 40 28
269 | 41 32
270 | 41 42
271 | 41 40
272 | 41 79
273 | 42 64
274 | 42 36
275 | 42 41
276 | 42 43
277 | 42 50
278 | 42 53
279 | 42 55
280 | 42 58
281 | 43 64
282 | 43 68
283 | 43 42
284 | 43 44
285 | 43 31
286 | 44 32
287 | 44 43
288 | 44 45
289 | 44 15
290 | 44 51
291 | 44 62
292 | 45 72
293 | 45 10
294 | 45 44
295 | 45 13
296 | 45 46
297 | 45 50
298 | 46 35
299 | 46 36
300 | 46 76
301 | 46 45
302 | 46 77
303 | 46 47
304 | 46 18
305 | 46 50
306 | 46 21
307 | 46 28
308 | 47 0
309 | 47 10
310 | 47 74
311 | 47 46
312 | 47 48
313 | 48 49
314 | 48 27
315 | 48 29
316 | 48 47
317 | 49 64
318 | 49 48
319 | 49 50
320 | 49 52
321 | 49 54
322 | 50 64
323 | 50 42
324 | 50 76
325 | 50 45
326 | 50 46
327 | 50 49
328 | 50 51
329 | 51 67
330 | 51 72
331 | 51 44
332 | 51 13
333 | 51 14
334 | 51 50
335 | 51 52
336 | 51 22
337 | 51 55
338 | 52 76
339 | 52 49
340 | 52 51
341 | 52 53
342 | 52 27
343 | 53 0
344 | 53 68
345 | 53 42
346 | 53 13
347 | 53 15
348 | 53 52
349 | 53 54
350 | 53 55
351 | 53 26
352 | 54 33
353 | 54 36
354 | 54 7
355 | 54 9
356 | 54 13
357 | 54 49
358 | 54 53
359 | 54 55
360 | 54 57
361 | 54 27
362 | 55 40
363 | 55 10
364 | 55 42
365 | 55 51
366 | 55 19
367 | 55 53
368 | 55 54
369 | 55 56
370 | 56 57
371 | 56 28
372 | 56 55
373 | 57 32
374 | 57 68
375 | 57 8
376 | 57 54
377 | 57 56
378 | 57 58
379 | 58 42
380 | 58 19
381 | 58 25
382 | 58 59
383 | 58 57
384 | 59 64
385 | 59 58
386 | 59 3
387 | 59 60
388 | 60 0
389 | 60 5
390 | 60 74
391 | 60 14
392 | 60 16
393 | 60 59
394 | 60 61
395 | 61 1
396 | 61 68
397 | 61 7
398 | 61 76
399 | 61 24
400 | 61 60
401 | 61 62
402 | 62 70
403 | 62 44
404 | 62 27
405 | 62 61
406 | 62 63
407 | 63 64
408 | 63 33
409 | 63 66
410 | 63 68
411 | 63 8
412 | 63 74
413 | 63 12
414 | 63 13
415 | 63 78
416 | 63 22
417 | 63 62
418 | 64 65
419 | 64 70
420 | 64 42
421 | 64 43
422 | 64 14
423 | 64 49
424 | 64 50
425 | 64 59
426 | 64 63
427 | 65 64
428 | 65 66
429 | 65 13
430 | 65 20
431 | 65 30
432 | 66 65
433 | 66 25
434 | 66 67
435 | 66 63
436 | 67 66
437 | 67 51
438 | 67 68
439 | 68 2
440 | 68 67
441 | 68 69
442 | 68 9
443 | 68 73
444 | 68 43
445 | 68 53
446 | 68 57
447 | 68 61
448 | 68 30
449 | 68 63
450 | 69 33
451 | 69 68
452 | 69 70
453 | 69 10
454 | 69 15
455 | 70 64
456 | 70 1
457 | 70 69
458 | 70 71
459 | 70 14
460 | 70 16
461 | 70 20
462 | 70 62
463 | 71 70
464 | 71 72
465 | 71 11
466 | 71 13
467 | 71 27
468 | 72 73
469 | 72 51
470 | 72 45
471 | 72 71
472 | 73 72
473 | 73 74
474 | 73 68
475 | 74 73
476 | 74 75
477 | 74 12
478 | 74 47
479 | 74 60
480 | 74 63
481 | 75 74
482 | 75 19
483 | 75 76
484 | 76 37
485 | 76 5
486 | 76 75
487 | 76 13
488 | 76 46
489 | 76 77
490 | 76 50
491 | 76 52
492 | 76 61
493 | 77 9
494 | 77 76
495 | 77 46
496 | 77 78
497 | 77 16
498 | 78 4
499 | 78 77
500 | 78 79
501 | 78 19
502 | 78 63
503 | 79 0
504 | 79 5
505 | 79 41
506 | 79 78
507 | 79 23
508 | 


--------------------------------------------------------------------------------
/src/decentralizepy/training/text/LLMTraining.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import torch
  4 | 
  5 | from decentralizepy import utils
  6 | from decentralizepy.training.Training import Training
  7 | 
  8 | 
  9 | class LLMTraining(Training):
 10 |     """
 11 |     This class implements the training module for a single node.
 12 | 
 13 |     """
 14 | 
 15 |     def __init__(
 16 |         self,
 17 |         rank,
 18 |         machine_id,
 19 |         mapping,
 20 |         model,
 21 |         optimizer,
 22 |         loss=None,
 23 |         log_dir=".",
 24 |         rounds="",
 25 |         full_epochs="",
 26 |         batch_size="",
 27 |         shuffle="",
 28 |     ):
 29 |         """
 30 |         Constructor
 31 | 
 32 |         Parameters
 33 |         ----------
 34 |         rank : int
 35 |             Rank of process local to the machine
 36 |         machine_id : int
 37 |             Machine ID on which the process in running
 38 |         mapping : decentralizepy.mappings
 39 |             The object containing the mapping rank <--> uid
 40 |         model : torch.nn.Module
 41 |             Neural Network for training
 42 |         optimizer : torch.optim
 43 |             Optimizer to learn parameters
 44 |         loss : function
 45 |             Loss function
 46 |         log_dir : str
 47 |             Directory to log the model change.
 48 |         rounds : int, optional
 49 |             Number of steps/epochs per training call
 50 |         full_epochs : bool, optional
 51 |             True if 1 round = 1 epoch. False if 1 round = 1 minibatch
 52 |         batch_size : int, optional
 53 |             Number of items to learn over, in one batch
 54 |         shuffle : bool
 55 |             True if the dataset should be shuffled before training.
 56 | 
 57 |         """
 58 |         super().__init__(
 59 |             rank,
 60 |             machine_id,
 61 |             mapping,
 62 |             model,
 63 |             optimizer,
 64 |             loss,
 65 |             log_dir,
 66 |             rounds,
 67 |             full_epochs,
 68 |             batch_size,
 69 |             shuffle,
 70 |         )
 71 | 
 72 |     def eval_loss(self, dataset):
 73 |         """
 74 |         Evaluate the loss on the training set
 75 | 
 76 |         Parameters
 77 |         ----------
 78 |         dataset : decentralizepy.datasets.Dataset
 79 |             The training dataset. Should implement get_trainset(batch_size, shuffle)
 80 | 
 81 |         """
 82 |         trainset = dataset.get_trainset(self.batch_size, self.shuffle)
 83 |         epoch_loss = 0.0
 84 |         count = 0
 85 |         with torch.no_grad():
 86 |             for batch in trainset:
 87 |                 input_ids = batch["input_ids"]
 88 |                 attention_mask = batch["attention_mask"]
 89 |                 labels = batch["labels"]
 90 |                 outputs = self.model(
 91 |                     input_ids, attention_mask=attention_mask, labels=labels
 92 |                 )
 93 |                 loss = outputs[0]
 94 |                 epoch_loss += loss.item()
 95 |                 count += 1
 96 |         loss = epoch_loss / count
 97 |         logging.info("Loss after iteration: {}".format(loss))
 98 |         return loss
 99 | 
100 |     def trainstep(self, batch):
101 |         """
102 |         One training step on a minibatch.
103 | 
104 |         Parameters
105 |         ----------
106 |         batch : any
107 |             Data item
108 | 
109 |         Returns
110 |         -------
111 |         int
112 |             Loss Value for the step
113 | 
114 |         """
115 |         self.optimizer.zero_grad()
116 |         input_ids = batch["input_ids"]
117 |         attention_mask = batch["attention_mask"]
118 |         labels = batch["labels"]
119 |         outputs = self.model(input_ids, attention_mask=attention_mask, labels=labels)
120 |         loss = outputs[0]
121 |         loss.backward()
122 |         self.optimizer.step()
123 |         return loss.item()
124 | 
125 |     def train_full(self, dataset):
126 |         """
127 |         One training iteration, goes through the entire dataset
128 | 
129 |         Parameters
130 |         ----------
131 |         trainset : torch.utils.data.Dataloader
132 |             The training dataset.
133 | 
134 |         """
135 |         trainset = dataset.get_trainset(self.batch_size, self.shuffle)
136 |         for epoch in range(self.rounds):
137 |             epoch_loss = 0.0
138 |             count = 0
139 |             for batch in trainset:
140 |                 logging.debug(
141 |                     "Starting minibatch {} with num_samples: {}".format(
142 |                         count, len(batch["input_ids"])
143 |                     )
144 |                 )
145 |                 epoch_loss += self.trainstep(batch)
146 |                 count += 1
147 |             logging.debug("Epoch: {} loss: {}".format(epoch, epoch_loss / count))
148 | 
149 |     def train(self, dataset):
150 |         """
151 |         One training iteration
152 | 
153 |         Parameters
154 |         ----------
155 |         dataset : decentralizepy.datasets.Dataset
156 |             The training dataset. Should implement get_trainset(batch_size, shuffle)
157 | 
158 |         """
159 |         self.model.train()
160 | 
161 |         if self.full_epochs:
162 |             self.train_full(dataset)
163 |         else:
164 |             iter_loss = 0.0
165 |             count = 0
166 |             trainset = dataset.get_trainset(self.batch_size, self.shuffle)
167 |             while count < self.rounds:
168 |                 for data in trainset:
169 |                     iter_loss += self.trainstep(data)
170 |                     count += 1
171 |                     logging.debug("Round: {} loss: {}".format(count, iter_loss / count))
172 |                     if count >= self.rounds:
173 |                         break
174 | 


--------------------------------------------------------------------------------
/eval/run_grid.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Documentation
  3 | # This bash file takes three inputs. The first argument (nfs_home) is the path to the nfs home directory.
  4 | # The second one (python_bin) is the path to the python bin folder.
  5 | # The last argument (logs_subfolder) is the path to the logs folder with respect to the nfs home directory.
  6 | #
  7 | # The nfs home directory should contain the code of this framework stored in $nfs_home/decentralizepy and a folder
  8 | # called configs which contains the file 'ip_addr_6Machines.json'
  9 | # The python bin folder needs to include all the dependencies of this project including crudini.
 10 | # The results will be stored in $nfs_home/$logs_subfolder
 11 | # Each of the experiments will be stored in its own folder inside the logs_subfolder. The folder of the experiment
 12 | # starts with the last part of the config name, i.e., for 'config_celeba_topkacc.ini' it will start with topkacc.
 13 | # The name further includes the learning rate, rounds and batchsize as well as the exact date at which the experiment
 14 | # was run.
 15 | # Example: ./run_grid.sh  /mnt/nfs/wigger /mnt/nfs/wigger/anaconda3/envs/sacs39/bin /logs/celeba
 16 | #
 17 | # Additional requirements:
 18 | # Each node needs a folder called 'tmp' in the user's home directory
 19 | #
 20 | # Note:
 21 | # - The script does not change the optimizer. All configs are writen to use SGD.
 22 | # - The script will set '--test_after' and '--train_evaluate_after' such that it happens at the end of a global epoch.
 23 | # - The '--reset_optimizer' option is set to 0, i.e., the optimizer is not reset after a communication round (only
 24 | #   relevant for Adams and other optimizers with internal state)
 25 | #
 26 | # Addapting the script to other datasets:
 27 | # Change the variable 'dataset_size' to reflect the data sets size.
 28 | #
 29 | # Known issues:
 30 | # - If the script is started at the very end of a minute then there is a change that two folders are created as not all
 31 | #   machines may start running the script at the exact same moment.
 32 | 
 33 | nfs_home=$1
 34 | python_bin=$2
 35 | logs_subfolder=$3
 36 | decpy_path=$nfs_home/decentralizepy/eval
 37 | cd $decpy_path
 38 | 
 39 | env_python=$python_bin/python3
 40 | graph=96_regular.edges
 41 | config_file=~/tmp/config.ini
 42 | procs_per_machine=16
 43 | machines=6
 44 | global_epochs=25
 45 | eval_file=testing.py
 46 | log_level=INFO
 47 | 
 48 | ip_machines=$nfs_home/configs/ip_addr_6Machines.json
 49 | 
 50 | m=`cat $ip_machines | grep $(/sbin/ifconfig ens785 | grep 'inet ' | awk '{print $2}') | cut -d'"' -f2`
 51 | export PYTHONFAULTHANDLER=1
 52 | 
 53 | # Base configs for which the gird search is done
 54 | tests=("step_configs/config_celeba_sharing.ini")
 55 | # Learning rates to test
 56 | lrs=( "0.001" "0.0001" "0.0001")
 57 | # Batch sizes to test
 58 | batchsize=("8" "16")
 59 | # The number of communication rounds per global epoch to test
 60 | comm_rounds_per_global_epoch=("1" "5" "10")
 61 | procs=`expr $procs_per_machine \* $machines`
 62 | echo procs: $procs
 63 | dataset_size=63741
 64 | # Calculating the number of samples that each user/proc will have on average
 65 | samples_per_user=`expr $dataset_size / $procs`
 66 | echo samples per user: $samples_per_user
 67 | 
 68 | for b in "${batchsize[@]}"
 69 | do
 70 |   echo batchsize: $b
 71 |   for r in "${comm_rounds_per_global_epoch[@]}"
 72 |   do
 73 |     echo communication rounds per global epoch: $r
 74 |     # calculating how many batches there are in a global epoch for each user/proc
 75 |     batches_per_epoch=$(($samples_per_user / $b))
 76 |     echo batches per global epoch: $batches_per_epoch
 77 |     # the number of iterations in 25 global epochs
 78 |     iterations=$($env_python -c "from math import floor; print($batches_per_epoch * $global_epochs) if $r >= $batches_per_epoch else print($global_epochs * $r)")
 79 |     echo iterations: $iterations
 80 |     # calculating the number of batches each user/proc uses per communication step (The actual number may be a float, which we round down)
 81 |     batches_per_comm_round=$($env_python -c "from math import floor; x = floor($batches_per_epoch / $r); print(1 if x==0 else x)")
 82 |     # since the batches per communication round were rounded down we need to change the number of iterations to reflect that
 83 |     new_iterations=$($env_python -c "from math import floor; tmp = floor($batches_per_epoch / $r); x = 1 if tmp == 0 else tmp; y = floor((($batches_per_epoch / $r)/x)*$iterations); print($iterations if y<$iterations else y)")
 84 |     echo batches per communication round: $batches_per_comm_round
 85 |     echo corrected iterations: $new_iterations
 86 |     test_after=$(($new_iterations / $global_epochs))
 87 |     echo test after: $test_after
 88 |     for lr in "${lrs[@]}"
 89 |     do
 90 |       for i in "${tests[@]}"
 91 |       do
 92 |         echo $i
 93 |         IFS='_' read -ra NAMES <<< $i
 94 |         IFS='.' read -ra NAME <<< ${NAMES[-1]}
 95 |         log_dir=$nfs_home$logs_subfolder/${NAME[0]}:lr=$lr:r=$r:b=$b:$(date '+%Y-%m-%dT%H:%M')/machine$m
 96 |         echo results are stored in: $log_dir
 97 |         mkdir -p $log_dir
 98 |         cp $i $config_file
 99 |         # changing the config files to reflect the values of the current grid search state
100 |         $python_bin/crudini --set $config_file COMMUNICATION addresses_filepath $ip_machines
101 |         $python_bin/crudini --set $config_file OPTIMIZER_PARAMS lr $lr
102 |         $python_bin/crudini --set $config_file TRAIN_PARAMS rounds $batches_per_comm_round
103 |         $python_bin/crudini --set $config_file TRAIN_PARAMS batch_size $b
104 |         $env_python $eval_file -ro 0 -tea $test_after -ld $log_dir -mid $m -ps $procs_per_machine -ms $machines -is $new_iterations -gf $graph -ta $test_after -cf $config_file -ll $log_level
105 |         echo $i is done
106 |         sleep 1
107 |         echo end of sleep
108 |       done
109 |     done
110 |   done
111 | done
112 | #
113 | 
114 | 


--------------------------------------------------------------------------------
/src/decentralizepy/training/Training.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import torch
  4 | 
  5 | from decentralizepy import utils
  6 | 
  7 | 
  8 | class Training:
  9 |     """
 10 |     This class implements the training module for a single node.
 11 | 
 12 |     """
 13 | 
 14 |     def __init__(
 15 |         self,
 16 |         rank,
 17 |         machine_id,
 18 |         mapping,
 19 |         model,
 20 |         optimizer,
 21 |         loss,
 22 |         log_dir,
 23 |         rounds="",
 24 |         full_epochs="",
 25 |         batch_size="",
 26 |         shuffle="",
 27 |     ):
 28 |         """
 29 |         Constructor
 30 | 
 31 |         Parameters
 32 |         ----------
 33 |         rank : int
 34 |             Rank of process local to the machine
 35 |         machine_id : int
 36 |             Machine ID on which the process in running
 37 |         mapping : decentralizepy.mappings
 38 |             The object containing the mapping rank <--> uid
 39 |         model : torch.nn.Module
 40 |             Neural Network for training
 41 |         optimizer : torch.optim
 42 |             Optimizer to learn parameters
 43 |         loss : function
 44 |             Loss function
 45 |         log_dir : str
 46 |             Directory to log the model change.
 47 |         rounds : int, optional
 48 |             Number of steps/epochs per training call
 49 |         full_epochs : bool, optional
 50 |             True if 1 round = 1 epoch. False if 1 round = 1 minibatch
 51 |         batch_size : int, optional
 52 |             Number of items to learn over, in one batch
 53 |         shuffle : bool
 54 |             True if the dataset should be shuffled before training.
 55 | 
 56 |         """
 57 |         self.model = model
 58 |         self.optimizer = optimizer
 59 |         self.loss = loss
 60 |         self.log_dir = log_dir
 61 |         self.rank = rank
 62 |         self.machine_id = machine_id
 63 |         self.mapping = mapping
 64 |         self.rounds = utils.conditional_value(rounds, "", int(1))
 65 |         self.full_epochs = utils.conditional_value(full_epochs, "", False)
 66 |         self.batch_size = utils.conditional_value(batch_size, "", int(1))
 67 |         self.shuffle = utils.conditional_value(shuffle, "", False)
 68 | 
 69 |     def reset_optimizer(self, optimizer):
 70 |         """
 71 |         Replace the current optimizer with a new one
 72 | 
 73 |         Parameters
 74 |         ----------
 75 |         optimizer : torch.optim
 76 |             A new optimizer
 77 | 
 78 |         """
 79 |         self.optimizer = optimizer
 80 | 
 81 |     def eval_loss(self, dataset):
 82 |         """
 83 |         Evaluate the loss on the training set
 84 | 
 85 |         Parameters
 86 |         ----------
 87 |         dataset : decentralizepy.datasets.Dataset
 88 |             The training dataset. Should implement get_trainset(batch_size, shuffle)
 89 | 
 90 |         """
 91 |         trainset = dataset.get_trainset(self.batch_size, self.shuffle)
 92 |         epoch_loss = 0.0
 93 |         count = 0
 94 |         with torch.no_grad():
 95 |             for data, target in trainset:
 96 |                 output = self.model(data)
 97 |                 loss_val = self.loss(output, target)
 98 |                 epoch_loss += loss_val.item()
 99 |                 count += 1
100 |         loss = epoch_loss / count
101 |         logging.info("Loss after iteration: {}".format(loss))
102 |         return loss
103 | 
104 |     def trainstep(self, data, target):
105 |         """
106 |         One training step on a minibatch.
107 | 
108 |         Parameters
109 |         ----------
110 |         data : any
111 |             Data item
112 |         target : any
113 |             Label
114 | 
115 |         Returns
116 |         -------
117 |         int
118 |             Loss Value for the step
119 | 
120 |         """
121 |         self.model.zero_grad()
122 |         output = self.model(data)
123 |         loss_val = self.loss(output, target)
124 |         loss_val.backward()
125 |         self.optimizer.step()
126 |         return loss_val.item()
127 | 
128 |     def train_full(self, dataset):
129 |         """
130 |         One training iteration, goes through the entire dataset
131 | 
132 |         Parameters
133 |         ----------
134 |         trainset : torch.utils.data.Dataloader
135 |             The training dataset.
136 | 
137 |         """
138 |         for epoch in range(self.rounds):
139 |             trainset = dataset.get_trainset(self.batch_size, self.shuffle)
140 |             epoch_loss = 0.0
141 |             count = 0
142 |             for data, target in trainset:
143 |                 logging.debug(
144 |                     "Starting minibatch {} with num_samples: {}".format(
145 |                         count, len(data)
146 |                     )
147 |                 )
148 |                 logging.debug("Classes: {}".format(target))
149 |                 epoch_loss += self.trainstep(data, target)
150 |                 count += 1
151 |             logging.debug("Epoch: {} loss: {}".format(epoch, epoch_loss / count))
152 | 
153 |     def train(self, dataset):
154 |         """
155 |         One training iteration
156 | 
157 |         Parameters
158 |         ----------
159 |         dataset : decentralizepy.datasets.Dataset
160 |             The training dataset. Should implement get_trainset(batch_size, shuffle)
161 | 
162 |         """
163 |         self.model.train()
164 | 
165 |         if self.full_epochs:
166 |             self.train_full(dataset)
167 |         else:
168 |             iter_loss = 0.0
169 |             count = 0
170 |             trainset = dataset.get_trainset(self.batch_size, self.shuffle)
171 |             while count < self.rounds:
172 |                 for data, target in trainset:
173 |                     iter_loss += self.trainstep(data, target)
174 |                     count += 1
175 |                     logging.debug("Round: {} loss: {}".format(count, iter_loss / count))
176 |                     if count >= self.rounds:
177 |                         break
178 | 


--------------------------------------------------------------------------------
/eval/96_nodes_smallworld.edges:
--------------------------------------------------------------------------------
  1 | 96
  2 | 0 1
  3 | 0 66
  4 | 0 8
  5 | 0 43
  6 | 0 19
  7 | 0 58
  8 | 0 95
  9 | 1 0
 10 | 1 2
 11 | 1 35
 12 | 1 4
 13 | 1 80
 14 | 1 50
 15 | 1 90
 16 | 2 56
 17 | 2 3
 18 | 2 35
 19 | 2 1
 20 | 3 2
 21 | 3 4
 22 | 3 5
 23 | 3 72
 24 | 3 15
 25 | 3 86
 26 | 3 55
 27 | 4 1
 28 | 4 3
 29 | 4 36
 30 | 4 37
 31 | 4 38
 32 | 4 5
 33 | 4 76
 34 | 5 3
 35 | 5 4
 36 | 5 6
 37 | 5 49
 38 | 5 53
 39 | 5 92
 40 | 6 67
 41 | 6 36
 42 | 6 5
 43 | 6 7
 44 | 6 78
 45 | 6 86
 46 | 7 64
 47 | 7 6
 48 | 7 8
 49 | 7 41
 50 | 7 47
 51 | 7 17
 52 | 7 87
 53 | 8 0
 54 | 8 7
 55 | 8 9
 56 | 8 56
 57 | 8 26
 58 | 9 8
 59 | 9 10
 60 | 9 75
 61 | 9 77
 62 | 9 15
 63 | 10 32
 64 | 10 36
 65 | 10 9
 66 | 10 11
 67 | 10 12
 68 | 10 81
 69 | 10 82
 70 | 11 32
 71 | 11 34
 72 | 11 10
 73 | 11 12
 74 | 11 59
 75 | 11 92
 76 | 11 61
 77 | 12 13
 78 | 12 10
 79 | 12 11
 80 | 12 29
 81 | 13 18
 82 | 13 12
 83 | 13 14
 84 | 14 73
 85 | 14 91
 86 | 14 13
 87 | 14 15
 88 | 15 3
 89 | 15 9
 90 | 15 75
 91 | 15 14
 92 | 15 47
 93 | 15 16
 94 | 15 27
 95 | 15 31
 96 | 16 17
 97 | 16 66
 98 | 16 46
 99 | 16 15
100 | 17 16
101 | 17 18
102 | 17 20
103 | 17 7
104 | 18 32
105 | 18 13
106 | 18 79
107 | 18 17
108 | 18 19
109 | 18 93
110 | 19 0
111 | 19 18
112 | 19 20
113 | 19 86
114 | 20 46
115 | 20 80
116 | 20 17
117 | 20 19
118 | 20 21
119 | 20 88
120 | 20 90
121 | 21 20
122 | 21 69
123 | 21 22
124 | 21 23
125 | 22 35
126 | 22 69
127 | 22 79
128 | 22 21
129 | 22 23
130 | 22 58
131 | 23 38
132 | 23 77
133 | 23 21
134 | 23 22
135 | 23 24
136 | 23 89
137 | 23 58
138 | 24 25
139 | 24 58
140 | 24 23
141 | 24 79
142 | 25 36
143 | 25 69
144 | 25 41
145 | 25 42
146 | 25 24
147 | 25 26
148 | 26 8
149 | 26 25
150 | 26 27
151 | 26 87
152 | 27 34
153 | 27 26
154 | 27 28
155 | 27 15
156 | 28 27
157 | 28 46
158 | 28 82
159 | 28 91
160 | 28 29
161 | 28 95
162 | 29 12
163 | 29 28
164 | 29 53
165 | 29 56
166 | 29 60
167 | 29 30
168 | 30 35
169 | 30 45
170 | 30 92
171 | 30 29
172 | 30 31
173 | 31 64
174 | 31 33
175 | 31 32
176 | 31 76
177 | 31 78
178 | 31 15
179 | 31 50
180 | 31 30
181 | 32 33
182 | 32 37
183 | 32 10
184 | 32 11
185 | 32 42
186 | 32 18
187 | 32 31
188 | 33 32
189 | 33 34
190 | 33 31
191 | 34 33
192 | 34 35
193 | 34 75
194 | 34 11
195 | 34 55
196 | 34 27
197 | 35 1
198 | 35 2
199 | 35 34
200 | 35 36
201 | 35 53
202 | 35 22
203 | 35 56
204 | 35 30
205 | 36 89
206 | 36 35
207 | 36 4
208 | 36 37
209 | 36 6
210 | 36 72
211 | 36 10
212 | 36 75
213 | 36 85
214 | 36 25
215 | 37 32
216 | 37 4
217 | 37 36
218 | 37 38
219 | 37 51
220 | 38 4
221 | 38 37
222 | 38 39
223 | 38 43
224 | 38 23
225 | 39 38
226 | 39 40
227 | 39 42
228 | 39 75
229 | 39 94
230 | 40 70
231 | 40 39
232 | 40 41
233 | 40 48
234 | 40 49
235 | 40 54
236 | 40 95
237 | 41 68
238 | 41 7
239 | 41 40
240 | 41 42
241 | 41 43
242 | 41 25
243 | 41 91
244 | 42 32
245 | 42 70
246 | 42 39
247 | 42 41
248 | 42 43
249 | 42 56
250 | 42 25
251 | 42 60
252 | 43 0
253 | 43 38
254 | 43 41
255 | 43 42
256 | 43 44
257 | 44 64
258 | 44 71
259 | 44 43
260 | 44 45
261 | 44 58
262 | 45 46
263 | 45 44
264 | 45 30
265 | 46 45
266 | 46 47
267 | 46 16
268 | 46 48
269 | 46 20
270 | 46 28
271 | 47 48
272 | 47 15
273 | 47 46
274 | 47 7
275 | 48 40
276 | 48 46
277 | 48 47
278 | 48 49
279 | 48 89
280 | 48 62
281 | 48 63
282 | 49 5
283 | 49 71
284 | 49 40
285 | 49 78
286 | 49 48
287 | 49 50
288 | 50 1
289 | 50 51
290 | 50 49
291 | 50 31
292 | 51 67
293 | 51 37
294 | 51 73
295 | 51 50
296 | 51 52
297 | 52 53
298 | 52 51
299 | 52 59
300 | 52 69
301 | 53 35
302 | 53 5
303 | 53 52
304 | 53 54
305 | 53 55
306 | 53 89
307 | 53 29
308 | 53 94
309 | 54 40
310 | 54 82
311 | 54 84
312 | 54 53
313 | 54 55
314 | 55 34
315 | 55 3
316 | 55 53
317 | 55 54
318 | 55 56
319 | 56 2
320 | 56 35
321 | 56 8
322 | 56 42
323 | 56 55
324 | 56 57
325 | 56 29
326 | 57 75
327 | 57 56
328 | 57 58
329 | 57 91
330 | 58 0
331 | 58 44
332 | 58 22
333 | 58 23
334 | 58 24
335 | 58 57
336 | 58 59
337 | 59 58
338 | 59 11
339 | 59 52
340 | 59 60
341 | 60 59
342 | 60 42
343 | 60 61
344 | 60 29
345 | 61 70
346 | 61 11
347 | 61 60
348 | 61 62
349 | 62 79
350 | 62 48
351 | 62 81
352 | 62 86
353 | 62 90
354 | 62 61
355 | 62 63
356 | 63 48
357 | 63 88
358 | 63 64
359 | 63 62
360 | 64 65
361 | 64 7
362 | 64 44
363 | 64 63
364 | 64 31
365 | 65 72
366 | 65 66
367 | 65 64
368 | 66 0
369 | 66 16
370 | 66 67
371 | 66 65
372 | 67 66
373 | 67 68
374 | 67 6
375 | 67 76
376 | 67 51
377 | 68 41
378 | 68 67
379 | 68 69
380 | 68 86
381 | 69 68
382 | 69 70
383 | 69 52
384 | 69 21
385 | 69 22
386 | 69 25
387 | 69 92
388 | 70 69
389 | 70 71
390 | 70 40
391 | 70 42
392 | 70 75
393 | 70 61
394 | 71 70
395 | 71 72
396 | 71 44
397 | 71 49
398 | 71 89
399 | 71 94
400 | 72 65
401 | 72 3
402 | 72 36
403 | 72 71
404 | 72 73
405 | 73 72
406 | 73 74
407 | 73 51
408 | 73 14
409 | 74 73
410 | 74 75
411 | 74 86
412 | 75 34
413 | 75 36
414 | 75 70
415 | 75 39
416 | 75 9
417 | 75 74
418 | 75 76
419 | 75 15
420 | 75 57
421 | 76 67
422 | 76 4
423 | 76 75
424 | 76 77
425 | 76 92
426 | 76 31
427 | 77 9
428 | 77 76
429 | 77 78
430 | 77 23
431 | 78 6
432 | 78 77
433 | 78 79
434 | 78 49
435 | 78 31
436 | 79 78
437 | 79 80
438 | 79 18
439 | 79 22
440 | 79 24
441 | 79 62
442 | 80 1
443 | 80 79
444 | 80 81
445 | 80 82
446 | 80 20
447 | 80 87
448 | 81 80
449 | 81 10
450 | 81 82
451 | 81 62
452 | 82 10
453 | 82 80
454 | 82 81
455 | 82 83
456 | 82 84
457 | 82 54
458 | 82 28
459 | 82 93
460 | 83 82
461 | 83 84
462 | 83 93
463 | 84 82
464 | 84 83
465 | 84 85
466 | 84 54
467 | 84 88
468 | 84 90
469 | 85 36
470 | 85 86
471 | 85 84
472 | 86 3
473 | 86 68
474 | 86 6
475 | 86 74
476 | 86 19
477 | 86 85
478 | 86 87
479 | 86 93
480 | 86 62
481 | 87 7
482 | 87 80
483 | 87 86
484 | 87 88
485 | 87 26
486 | 88 20
487 | 88 84
488 | 88 87
489 | 88 89
490 | 88 63
491 | 89 36
492 | 89 71
493 | 89 48
494 | 89 53
495 | 89 23
496 | 89 88
497 | 89 90
498 | 90 1
499 | 90 20
500 | 90 84
501 | 90 89
502 | 90 91
503 | 90 62
504 | 91 41
505 | 91 14
506 | 91 92
507 | 91 57
508 | 91 90
509 | 91 28
510 | 92 69
511 | 92 5
512 | 92 11
513 | 92 76
514 | 92 91
515 | 92 93
516 | 92 30
517 | 93 18
518 | 93 83
519 | 93 82
520 | 93 86
521 | 93 92
522 | 93 94
523 | 94 71
524 | 94 39
525 | 94 53
526 | 94 93
527 | 94 95
528 | 95 40
529 | 95 0
530 | 95 28
531 | 95 94
532 | 


--------------------------------------------------------------------------------
/eval/run_xtimes_cifar.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Documentation
  3 | # This bash file takes three inputs. The first argument (nfs_home) is the path to the nfs home directory.
  4 | # The second one (python_bin) is the path to the python bin folder.
  5 | # The last argument (logs_subfolder) is the path to the logs folder with respect to the nfs home directory.
  6 | #
  7 | # The nfs home directory should contain the code of this framework stored in $nfs_home/decentralizepy and a folder
  8 | # called configs which contains the file 'ip_addr_6Machines.json'
  9 | # The python bin folder needs to include all the dependencies of this project including crudini.
 10 | # The results will be stored in $nfs_home/$logs_subfolder
 11 | # Each of the experiments will be stored in its own folder inside the logs_subfolder. The folder of the experiment
 12 | # starts with the last part of the config name, i.e., for 'config_celeba_topkacc.ini' it will start with topkacc.
 13 | # The name further includes the learning rate, rounds and batchsize as well as the exact date at which the experiment
 14 | # was run.
 15 | # Example: ./run_grid.sh  /mnt/nfs/wigger /mnt/nfs/wigger/anaconda3/envs/sacs39/bin /logs/celeba
 16 | #
 17 | # Additional requirements:
 18 | # Each node needs a folder called 'tmp' in the user's home directory
 19 | #
 20 | # Note:
 21 | # - The script does not change the optimizer. All configs are writen to use SGD.
 22 | # - The script will set '--test_after' and '--train_evaluate_after' such that it happens at the end of a global epoch.
 23 | # - The '--reset_optimizer' option is set to 0, i.e., the optimizer is not reset after a communication round (only
 24 | #   relevant for Adams and other optimizers with internal state)
 25 | #
 26 | # Addapting the script to other datasets:
 27 | # Change the variable 'dataset_size' to reflect the data sets size.
 28 | #
 29 | # Known issues:
 30 | # - If the script is started at the very end of a minute then there is a change that two folders are created as not all
 31 | #   machines may start running the script at the exact same moment.
 32 | 
 33 | nfs_home=$1
 34 | python_bin=$2
 35 | logs_subfolder=$3
 36 | decpy_path=$nfs_home/decentralizepy/eval
 37 | cd $decpy_path
 38 | 
 39 | env_python=$python_bin/python3
 40 | graph=96_regular.edges
 41 | config_file=~/tmp/config.ini
 42 | procs_per_machine=16
 43 | machines=6
 44 | global_epochs=100
 45 | eval_file=testingFederated.py
 46 | log_level=INFO
 47 | working_rate=0.1
 48 | 
 49 | ip_machines=$nfs_home/configs/ip_addr_6Machines.json
 50 | 
 51 | m=`cat $ip_machines | grep $(/sbin/ifconfig ens785 | grep 'inet ' | awk '{print $2}') | cut -d'"' -f2`
 52 | export PYTHONFAULTHANDLER=1
 53 | 
 54 | # Base configs for which the gird search is done
 55 | tests=("step_configs/config_cifar_sharing.ini")
 56 | # Learning rates
 57 | lr="0.01"
 58 | # Batch size
 59 | batchsize="8"
 60 | # The number of communication rounds per global epoch
 61 | comm_rounds_per_global_epoch="20"
 62 | procs=`expr $procs_per_machine \* $machines`
 63 | echo procs: $procs
 64 | dataset_size=50000
 65 | # Calculating the number of samples that each user/proc will have on average
 66 | samples_per_user=`expr $dataset_size / $procs`
 67 | echo samples per user: $samples_per_user
 68 | 
 69 | # random_seeds for which to rerun the experiments
 70 | random_seeds=("90")
 71 | # random_seed = 97
 72 | echo batchsize: $batchsize
 73 | echo communication rounds per global epoch: $comm_rounds_per_global_epoch
 74 | # calculating how many batches there are in a global epoch for each user/proc
 75 | batches_per_epoch=$(($samples_per_user / $batchsize))
 76 | echo batches per global epoch: $batches_per_epoch
 77 | # the number of iterations in 25 global epochs
 78 | iterations=$($env_python -c "from math import floor; print($batches_per_epoch * $global_epochs) if $comm_rounds_per_global_epoch >= $batches_per_epoch else print($global_epochs * $comm_rounds_per_global_epoch)")
 79 | echo iterations: $iterations
 80 | # calculating the number of batches each user/proc uses per communication step (The actual number may be a float, which we round down)
 81 | batches_per_comm_round=$($env_python -c "from math import floor; x = floor($batches_per_epoch / $comm_rounds_per_global_epoch); print(1 if x==0 else x)")
 82 | # since the batches per communication round were rounded down we need to change the number of iterations to reflect that
 83 | new_iterations=$($env_python -c "from math import floor; tmp = floor($batches_per_epoch / $comm_rounds_per_global_epoch); x = 1 if tmp == 0 else tmp; y = floor((($batches_per_epoch / $comm_rounds_per_global_epoch)/x)*$iterations); print($iterations if y<$iterations else y)")
 84 | echo batches per communication round: $batches_per_comm_round
 85 | echo corrected iterations: $new_iterations
 86 | test_after=$(($new_iterations / $global_epochs))
 87 | echo test after: $test_after
 88 | for i in "${tests[@]}"
 89 | do
 90 |   for seed in "${random_seeds[@]}"
 91 |   do
 92 |     echo $i
 93 |     IFS='_' read -ra NAMES <<< $i
 94 |     IFS='.' read -ra NAME <<< ${NAMES[-1]}
 95 |     log_dir_base=$nfs_home/$logs_subfolder/${NAME[0]}:lr=$lr:r=$comm_rounds_per_global_epoch:b=$batchsize:$(date '+%Y-%m-%dT%H:%M')
 96 |     echo results are stored in: $log_dir_base
 97 |     log_dir=$log_dir_base/machine$m
 98 |     mkdir -p $log_dir
 99 |     weight_store_dir=$log_dir_base/weights
100 |     mkdir -p $weight_store_dir
101 |     cp $i $config_file
102 |     # changing the config files to reflect the values of the current grid search state
103 |     $python_bin/crudini --set $config_file COMMUNICATION addresses_filepath $ip_machines
104 |     $python_bin/crudini --set $config_file OPTIMIZER_PARAMS lr $lr
105 |     $python_bin/crudini --set $config_file TRAIN_PARAMS rounds $batches_per_comm_round
106 |     $python_bin/crudini --set $config_file TRAIN_PARAMS batch_size $batchsize
107 |     $python_bin/crudini --set $config_file DATASET random_seed $seed
108 |     $env_python $eval_file -ro 0 -tea $test_after -ld $log_dir -wsd $weight_store_dir -mid $m -ps $procs_per_machine -ms $machines -is $new_iterations -gf $graph -ta $test_after -cf $config_file -ll $log_level -wr $working_rate
109 |     echo $i is done
110 |     sleep 200
111 |     echo end of sleep
112 |     done
113 | done
114 | #
115 | 


--------------------------------------------------------------------------------
/eval/96_nodes_random2.edges:
--------------------------------------------------------------------------------
  1 | 96
  2 | 0 1
  3 | 0 36
  4 | 0 13
  5 | 0 46
  6 | 0 28
  7 | 0 95
  8 | 1 0
  9 | 1 33
 10 | 1 2
 11 | 1 36
 12 | 1 4
 13 | 1 43
 14 | 1 14
 15 | 1 21
 16 | 1 91
 17 | 1 95
 18 | 2 1
 19 | 2 3
 20 | 2 5
 21 | 2 9
 22 | 2 23
 23 | 2 89
 24 | 3 2
 25 | 3 4
 26 | 3 13
 27 | 3 18
 28 | 3 90
 29 | 4 1
 30 | 4 34
 31 | 4 3
 32 | 4 5
 33 | 4 73
 34 | 4 10
 35 | 4 88
 36 | 4 95
 37 | 5 2
 38 | 5 66
 39 | 5 4
 40 | 5 6
 41 | 5 74
 42 | 5 54
 43 | 5 90
 44 | 6 5
 45 | 6 7
 46 | 6 74
 47 | 6 16
 48 | 6 49
 49 | 6 80
 50 | 6 31
 51 | 7 6
 52 | 7 8
 53 | 7 80
 54 | 7 53
 55 | 7 21
 56 | 7 92
 57 | 8 64
 58 | 8 68
 59 | 8 7
 60 | 8 41
 61 | 8 9
 62 | 8 11
 63 | 8 45
 64 | 8 54
 65 | 8 88
 66 | 9 32
 67 | 9 2
 68 | 9 35
 69 | 9 8
 70 | 9 10
 71 | 9 76
 72 | 9 17
 73 | 9 85
 74 | 9 55
 75 | 10 34
 76 | 10 4
 77 | 10 38
 78 | 10 9
 79 | 10 11
 80 | 11 8
 81 | 11 42
 82 | 11 10
 83 | 11 76
 84 | 11 12
 85 | 12 73
 86 | 12 11
 87 | 12 13
 88 | 12 56
 89 | 12 58
 90 | 12 88
 91 | 13 0
 92 | 13 3
 93 | 13 74
 94 | 13 12
 95 | 13 14
 96 | 13 80
 97 | 13 25
 98 | 14 1
 99 | 14 42
100 | 14 13
101 | 14 15
102 | 14 63
103 | 15 39
104 | 15 14
105 | 15 47
106 | 15 16
107 | 15 25
108 | 16 34
109 | 16 36
110 | 16 6
111 | 16 15
112 | 16 17
113 | 17 9
114 | 17 45
115 | 17 79
116 | 17 16
117 | 17 18
118 | 17 24
119 | 17 26
120 | 17 59
121 | 18 3
122 | 18 17
123 | 18 19
124 | 18 84
125 | 18 91
126 | 19 39
127 | 19 41
128 | 19 48
129 | 19 18
130 | 19 20
131 | 19 91
132 | 20 90
133 | 20 19
134 | 20 21
135 | 20 22
136 | 20 26
137 | 21 32
138 | 21 1
139 | 21 7
140 | 21 74
141 | 21 20
142 | 21 22
143 | 21 90
144 | 21 95
145 | 22 74
146 | 22 50
147 | 22 20
148 | 22 21
149 | 22 23
150 | 23 2
151 | 23 66
152 | 23 40
153 | 23 46
154 | 23 48
155 | 23 22
156 | 23 24
157 | 23 95
158 | 24 17
159 | 24 27
160 | 24 25
161 | 24 23
162 | 25 13
163 | 25 15
164 | 25 88
165 | 25 24
166 | 25 26
167 | 25 94
168 | 26 17
169 | 26 20
170 | 26 25
171 | 26 27
172 | 26 61
173 | 27 34
174 | 27 69
175 | 27 45
176 | 27 28
177 | 27 24
178 | 27 26
179 | 27 60
180 | 28 0
181 | 28 64
182 | 28 85
183 | 28 57
184 | 28 27
185 | 28 29
186 | 29 65
187 | 29 78
188 | 29 50
189 | 29 28
190 | 29 61
191 | 29 30
192 | 30 38
193 | 30 43
194 | 30 93
195 | 30 29
196 | 30 31
197 | 31 32
198 | 31 67
199 | 31 6
200 | 31 48
201 | 31 93
202 | 31 30
203 | 32 33
204 | 32 35
205 | 32 37
206 | 32 9
207 | 32 43
208 | 32 21
209 | 32 91
210 | 32 92
211 | 32 93
212 | 32 31
213 | 33 32
214 | 33 1
215 | 33 34
216 | 33 71
217 | 34 33
218 | 34 35
219 | 34 4
220 | 34 10
221 | 34 16
222 | 34 81
223 | 34 27
224 | 35 32
225 | 35 34
226 | 35 36
227 | 35 9
228 | 35 51
229 | 36 0
230 | 36 1
231 | 36 35
232 | 36 37
233 | 36 16
234 | 36 56
235 | 37 32
236 | 37 60
237 | 37 38
238 | 37 36
239 | 38 37
240 | 38 39
241 | 38 10
242 | 38 45
243 | 38 30
244 | 39 40
245 | 39 19
246 | 39 38
247 | 39 15
248 | 40 39
249 | 40 41
250 | 40 48
251 | 40 23
252 | 40 91
253 | 40 63
254 | 41 8
255 | 41 40
256 | 41 42
257 | 41 19
258 | 41 85
259 | 42 41
260 | 42 43
261 | 42 11
262 | 42 14
263 | 42 53
264 | 43 32
265 | 43 1
266 | 43 42
267 | 43 44
268 | 43 45
269 | 43 30
270 | 44 43
271 | 44 67
272 | 44 45
273 | 44 46
274 | 45 38
275 | 45 8
276 | 45 43
277 | 45 44
278 | 45 46
279 | 45 17
280 | 45 87
281 | 45 27
282 | 46 0
283 | 46 44
284 | 46 77
285 | 46 45
286 | 46 47
287 | 46 23
288 | 46 61
289 | 46 95
290 | 47 48
291 | 47 65
292 | 47 46
293 | 47 15
294 | 48 40
295 | 48 47
296 | 48 49
297 | 48 19
298 | 48 86
299 | 48 23
300 | 48 60
301 | 48 31
302 | 49 6
303 | 49 79
304 | 49 48
305 | 49 50
306 | 49 89
307 | 50 81
308 | 50 49
309 | 50 51
310 | 50 22
311 | 50 29
312 | 51 35
313 | 51 50
314 | 51 52
315 | 51 86
316 | 51 90
317 | 51 94
318 | 52 66
319 | 52 51
320 | 52 53
321 | 53 7
322 | 53 42
323 | 53 52
324 | 53 54
325 | 53 56
326 | 53 90
327 | 54 8
328 | 54 53
329 | 54 5
330 | 54 55
331 | 55 65
332 | 55 9
333 | 55 56
334 | 55 54
335 | 56 36
336 | 56 74
337 | 56 12
338 | 56 53
339 | 56 55
340 | 56 57
341 | 57 56
342 | 57 58
343 | 57 28
344 | 58 57
345 | 58 59
346 | 58 12
347 | 59 70
348 | 59 75
349 | 59 17
350 | 59 58
351 | 59 60
352 | 60 37
353 | 60 59
354 | 60 48
355 | 60 27
356 | 60 61
357 | 61 46
358 | 61 29
359 | 61 26
360 | 61 60
361 | 61 93
362 | 61 62
363 | 62 68
364 | 62 93
365 | 62 85
366 | 62 61
367 | 62 63
368 | 63 64
369 | 63 40
370 | 63 14
371 | 63 93
372 | 63 62
373 | 64 8
374 | 64 65
375 | 64 28
376 | 64 63
377 | 65 64
378 | 65 66
379 | 65 69
380 | 65 74
381 | 65 47
382 | 65 55
383 | 65 29
384 | 66 65
385 | 66 67
386 | 66 69
387 | 66 5
388 | 66 52
389 | 66 23
390 | 67 66
391 | 67 68
392 | 67 44
393 | 67 86
394 | 67 31
395 | 68 8
396 | 68 67
397 | 68 69
398 | 68 62
399 | 69 65
400 | 69 66
401 | 69 68
402 | 69 70
403 | 69 77
404 | 69 83
405 | 69 27
406 | 70 59
407 | 70 69
408 | 70 78
409 | 70 71
410 | 71 33
411 | 71 70
412 | 71 72
413 | 71 87
414 | 71 90
415 | 72 73
416 | 72 90
417 | 72 71
418 | 73 72
419 | 73 74
420 | 73 4
421 | 73 12
422 | 74 65
423 | 74 5
424 | 74 6
425 | 74 73
426 | 74 75
427 | 74 13
428 | 74 21
429 | 74 22
430 | 74 56
431 | 75 74
432 | 75 59
433 | 75 76
434 | 76 9
435 | 76 75
436 | 76 11
437 | 76 77
438 | 77 69
439 | 77 76
440 | 77 78
441 | 77 46
442 | 77 93
443 | 78 70
444 | 78 77
445 | 78 79
446 | 78 87
447 | 78 29
448 | 79 80
449 | 79 17
450 | 79 78
451 | 79 49
452 | 80 6
453 | 80 7
454 | 80 13
455 | 80 79
456 | 80 81
457 | 80 85
458 | 81 34
459 | 81 80
460 | 81 50
461 | 81 82
462 | 81 88
463 | 82 81
464 | 82 83
465 | 83 82
466 | 83 84
467 | 83 69
468 | 84 18
469 | 84 83
470 | 84 85
471 | 84 95
472 | 85 9
473 | 85 41
474 | 85 80
475 | 85 84
476 | 85 86
477 | 85 88
478 | 85 28
479 | 85 62
480 | 86 67
481 | 86 48
482 | 86 51
483 | 86 85
484 | 86 87
485 | 86 88
486 | 87 71
487 | 87 45
488 | 87 78
489 | 87 86
490 | 87 88
491 | 88 89
492 | 88 4
493 | 88 8
494 | 88 12
495 | 88 81
496 | 88 85
497 | 88 86
498 | 88 87
499 | 88 25
500 | 89 88
501 | 89 49
502 | 89 2
503 | 89 90
504 | 90 3
505 | 90 5
506 | 90 71
507 | 90 72
508 | 90 51
509 | 90 20
510 | 90 21
511 | 90 53
512 | 90 89
513 | 90 91
514 | 91 32
515 | 91 1
516 | 91 40
517 | 91 18
518 | 91 19
519 | 91 90
520 | 91 92
521 | 92 32
522 | 92 91
523 | 92 93
524 | 92 7
525 | 93 32
526 | 93 77
527 | 93 63
528 | 93 30
529 | 93 94
530 | 93 92
531 | 93 61
532 | 93 62
533 | 93 31
534 | 94 25
535 | 94 51
536 | 94 93
537 | 94 95
538 | 95 0
539 | 95 1
540 | 95 4
541 | 95 46
542 | 95 84
543 | 95 21
544 | 95 23
545 | 95 94
546 | 


--------------------------------------------------------------------------------