├── .gitignore
├── LICENSE
├── README.md
├── aggregators
├── base.py
└── torchcomponentrepository.py
├── analyses
└── noise.py
├── commands.md
├── commands
├── float16
│ ├── casas.sh
│ ├── energy.sh
│ ├── epic_sounds.sh
│ ├── ut_har.sh
│ ├── widar.sh
│ └── wisdm.sh
├── grid_search
│ └── fedopt
│ │ ├── casas.sh
│ │ ├── energy.sh
│ │ ├── ut_har.sh
│ │ ├── widar.sh
│ │ ├── wisdm_p.sh
│ │ └── wisdm_w.sh
├── label_error
│ ├── casas.sh
│ ├── epic_sounds.sh
│ ├── ut_har.sh
│ ├── widar.sh
│ └── wisdm.sh
└── overview
│ ├── fedavg
│ ├── casas.sh
│ ├── energy.sh
│ ├── epic_sounds.sh
│ ├── ut_har.sh
│ ├── widar.sh
│ └── wisdm.sh
│ └── fedopt
│ ├── casas.sh
│ ├── energy.sh
│ ├── ut_har.sh
│ ├── widar.sh
│ └── wisdm.sh
├── config.yml
├── confusion_matrices
├── conf_casas.csv
├── conf_epic_sounds.csv
├── conf_ut_har.csv
├── conf_widar.csv
├── conf_wisdm_phone.csv
└── conf_wisdm_watch.csv
├── datasets
├── casas
│ └── download.py
├── ego4d
│ ├── process.py
│ ├── uids
│ └── utils.py
├── emognition
│ └── download.py
├── energy
│ └── energydata_complete.csv
├── epic_sounds
│ ├── download.py
│ ├── epic-kitchens-download-scripts-master
│ │ ├── README.md
│ │ ├── data
│ │ │ ├── epic_100_splits.csv
│ │ │ ├── epic_55_splits.csv
│ │ │ ├── errata.csv
│ │ │ └── md5.csv
│ │ ├── download_extension_only.sh
│ │ ├── download_full_epic.sh
│ │ └── epic_downloader.py
│ └── epic-sounds-annotations-main
│ │ ├── EPIC_Sounds_recognition_test_timestamps.csv
│ │ ├── EPIC_Sounds_recognition_test_timestamps.pkl
│ │ ├── EPIC_Sounds_train.csv
│ │ ├── EPIC_Sounds_train.pkl
│ │ ├── EPIC_Sounds_validation.csv
│ │ ├── EPIC_Sounds_validation.pkl
│ │ ├── sound_events_not_categorised.csv
│ │ └── sound_events_not_categorised.pkl
├── ut_har
│ ├── download.py
│ └── process.py
├── visdrone
│ ├── clusterer.py
│ ├── download.py
│ └── split.csv
├── widar
│ └── download.py
└── wisdm
│ ├── activity_key.txt
│ ├── activity_key_filtered.txt
│ └── download.py
├── distributed_main.py
├── environment_droplet.yml
├── figures
├── datasets.png
├── overview.png
└── pipeline.png
├── loaders
├── casas.py
├── cifar10.py
├── clusterer.py
├── ego4d.py
├── emognition.py
├── energy.py
├── epic_sounds.py
├── pack_audio.py
├── spatial_transforms.py
├── spec_augment.py
├── ut_har.py
├── utils.py
├── visdrone.py
├── widar.py
└── wisdm.py
├── main.py
├── models
├── casas.py
├── ego4d.py
├── emognition.py
├── energy.py
├── epic_sounds.py
├── ut_har.py
├── utils.py
├── widar.py
├── wisdm.py
└── yolov8.yaml
├── partition
├── centralized.py
├── dirichlet.py
├── label.py
├── uniform.py
├── user_index.py
└── utils.py
├── requirements.txt
├── scorers
├── classification_evaluator.py
├── localization_evaluator.py
├── regression_evaluator.py
├── ultralytics_yolo_evaluator.py
└── utils.py
├── strategies
└── base_fl.py
├── system.yml
├── trainers
├── distributed_base.py
├── ultralytics_distributed.py
└── utils.py
├── utils.py
└── validator.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | .idea/
161 | draft/
162 | *.meta
163 | /datasets/cifar10/cifar-10-batches-py/
164 | /datasets/cifar10/cifar-10-python.tar.gz
165 | /datasets/widar/federated/
166 | wandb/
167 | /datasets/*/*.tar
168 | /datasets/*/*.html
169 | /datasets/*/*.png
170 | /datasets/*/*.dt
171 | /datasets/emognition/
172 | /datasets/widar/federated/
173 | /datasets/widar/Widardata/
174 | /datasets/widar/Widardata.zip
175 | /datasets/wisdm/wisdm-dataset/
176 | /datasets/wisdm/processed.csv
177 | /logs/
178 | datasets/casas/
179 | datasets/bewie/
180 | /datasets/ego4d/ego4d_data/
181 | /datasets/visdrone/test/
182 | /datasets/visdrone/train/
183 | /datasets/visdrone/val/
184 | /datasets/visdrone/VisDrone2018-DET-test-dev/
185 | /datasets/visdrone/VisDrone2019-DET-train/
186 | /datasets/visdrone/VisDrone2019-DET-val/
187 | /datasets/*/*.zip
188 | /datasets/ego4d/negative/
189 | /datasets/ego4d/positive/
190 | /datasets/ego4d/ego4d_data_v2/
191 | /datasets/epic_sounds/EPIC-KITCHENS/
192 | /datasets/student_life/
193 | /datasets/ut_har/UT_HAR/
194 | /weights/
195 | datasets/wisdm/processed_watch.csv
196 | datasets/wisdm/processed_phone.csv
197 | *.html
198 | *.ini
199 | /datasets/emognition/_study_data.zip
200 | *.zip
201 | datasets/energy/energydata_complete.csv
202 | datasets/epic_sounds/EPIC_audio.hdf5
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # FedAIoT: A Federated Learning Benchmark for Artificial Intelligence of Things
2 |
3 | ## Table of Contents
4 | 1. [Introduction](#introduction)
5 | 2. [Requirements](#requirements)
6 | 3. [Datasets](#datasets)
7 | 4. [Usage](#usage)
8 | 5. [Citation](#citation)
9 |
10 | ## Introduction
11 | Despite the significant relevance of federated learning (FL) in the realm of IoT, most existing FL works are conducted on well-known datasets such as CIFAR-10 and CIFAR-100. These datasets, however, do not originate from authentic IoT devices and thus fail to capture the unique modalities and inherent challenges associated with real-world IoT data. This notable discrepancy underscores a strong need for an IoT-oriented FL benchmark to fill this critical gap.
12 |
13 | This repository holds the source code for [FedAIoT: A Federated Learning Benchmark for Artificial Intelligence of Things](https://arxiv.org/abs/2310.00109). FedAIoT is a benchmarking tool for evaluating FL algorithms against real IoT datasets. FedAIoT contains eight well-chosen datasets collected from a wide range of authentic IoT devices from smartwatch, smartphone and Wi-Fi routers, to drones, smart home sensors, and head-mounted device that either have already become an indispensable part of people’s daily lives or are driving emerging applications. These datasets encapsulate a variety of unique IoT-specific data modalities such as wireless data, drone images, and smart home sensor data (e.g., motion, energy, humidity, temperature) that have not been explored in existing FL benchmarks.
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | To facilitate the community benchmark the performance of the datasets and ensure reproducibility, FedAIoT includes a unified end-to-end FL framework for AIoT, which covers the complete FLfor- AIoT pipeline: from non-independent and identically distributed (non-IID) data partitioning, IoT-specific data preprocessing, to IoT-friendly models, FL hyperparameters, and IoT-factor emulator.
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 | ## Requirements
30 |
31 | ```bash
32 | pip install -r requirements.txt
33 | ```
34 | ## Datasets
35 |
36 | FedAIoT currently includes the following eight IoT datasets:
37 |
38 |
39 |
40 |
41 |
42 | Each dataset folder contains the `download.py` script to download the dataset.
43 |
44 |
45 | [//]: # (## Non-IID Partition Scheme)
46 |
47 | [//]: # (The partition classes split a large dataset into a list of smaller datasets. Several Partition methods are implemented. )
48 |
49 | [//]: # (1. Centralized essentially returns the original dataset as a list of one dataset.)
50 |
51 | [//]: # (2. Dirichlet partitions the dataset into a specified number of clients with non-IID dirichlet distribution.)
52 |
53 | [//]: # ()
54 | [//]: # (Create a partition object and use that to prtition any centralized dataset. Using the same partition on two )
55 |
56 | [//]: # (different data splits will result in the same distribution of data between clients. For example:)
57 |
58 | [//]: # (```python)
59 |
60 | [//]: # ( partition = DirichletPartition(num_clients=10))
61 |
62 | [//]: # ( train_partition = partition(dataset['train']))
63 |
64 | [//]: # (```)
65 |
66 | [//]: # (Here `train_partition` and `test_partition` will have `10` clients with the same relative class and sample )
67 |
68 | [//]: # (distribution.)
69 |
70 | [//]: # ()
71 | [//]: # (For more details on implementation: [See here](https://github.com/AIoT-MLSys-Lab/FedAIoT/blob/61d8147d56f7ef4ea04d43a708f4de523f9e36bc/distributed_main.py#L129-L145))
72 |
73 |
74 | [//]: # ([//]: # (## Models))
75 | [//]: # ()
76 | [//]: # ([//]: # ())
77 | [//]: # ([//]: # (The experiment supports various models and allows you to use custom models as well. See the models directory for the ))
78 | [//]: # ()
79 | [//]: # ([//]: # (individual implementations of the models for the respective datasets.))
80 | [//]: # ()
81 | [//]: # (## Training)
82 |
83 | [//]: # ()
84 | [//]: # (The experiment supports different federated learning algorithms and partition types. You can configure the experiment settings by modifying the `config.yml` file or passing the required parameters when running the script.)
85 |
86 | [//]: # ()
87 | [//]: # (The basic federated learning algorithm is implemented in the `algorithm.base_fl` module. Given an `aggregator` (See )
88 |
89 | [//]: # (aggregator module), `client_trainers` (ray actors for distributed training), `client_dataset_refs` (ray data )
90 |
91 | [//]: # (references), `client_num_per_round` (Number of clients sampled per round; < total clients), `global_model`, `round_idx`, )
92 |
93 | [//]: # (`scheduler`, `device` (cpu or gpu), it runs one round of federated learning following vanilla fed avg.)
94 |
95 | [//]: # (The following federated learning algorithms are included in the benchmark:)
96 |
97 | [//]: # ()
98 | [//]: # (- FedAvg)
99 |
100 | [//]: # (- FedAdam)
101 |
102 | [//]: # ()
103 | [//]: # ()
104 | [//]: # (Various training options and hyperparameters can be configured, such as the optimizer, learning rate, weight decay, epochs, and more.)
105 |
106 | ## Usage
107 | Before running, we need to set the environment variables `num_gpus` and `num_trainers_per_gpu`. This will set the total number of workers for the distributed system. If you want to use a subset of GPUs available in the hardware, specify the GPUs to be used by `CUDA_VISIBLE_DEVICES` variable.
108 |
109 | Take WISDM-W as an example. To train a centralized model on WISDM-W:
110 |
111 | ```
112 | num_gpus=1 num_trainers_per_gpu=1 CUDA_VISIBLE_DEVICES=0 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 1 --client_num_per_round 1 --partition_type central --alpha 0.1 --lr 0.01 --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 200 --batch_size 128 --analysis baseline --trainer BaseTrainer --watch_metric accuracy
113 | ```
114 |
115 | To train a federated model on WISDM-W with FedAvg and `10%` client sampling rate under high data heterogeneity:
116 |
117 | ```
118 | num_gpus=1 num_trainers_per_gpu=1 CUDA_VISIBLE_DEVICES=0 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr 0.01 --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --watch_metric accuracy
119 | ```
120 |
121 | For the full list of parameters, run:
122 | ```
123 | python distributed_main.py main --help
124 | ```
125 |
126 | ## Citation
127 |
128 | ```
129 | @article{
130 | alam2024fedaiot,
131 | title={Fed{AI}oT: A Federated Learning Benchmark for Artificial Intelligence of Things},
132 | author={Samiul Alam and Tuo Zhang and Tiantian Feng and Hui Shen and Zhichao Cao and Dong Zhao and Jeonggil Ko and Kiran Somasundaram and Shrikanth Narayanan and Salman Avestimehr and Mi Zhang},
133 | journal={Journal of Data-centric Machine Learning Research (DMLR)},
134 | year={2024},
135 | url={https://openreview.net/forum?id=fYNw9Ukljz},
136 | }
137 | ```
138 |
--------------------------------------------------------------------------------
/aggregators/base.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 |
3 | import numpy as np
4 | import torch
5 |
6 |
7 | from aggregators.torchcomponentrepository import TorchComponentRepository
8 |
9 |
10 | class FederatedAveraging:
11 | def __init__(self, global_model: torch.nn.Module,
12 | server_optimizer='sgd',
13 | server_lr=1e-2,
14 | server_momentum=0.9,
15 | eps=1e-3):
16 |
17 | self.global_model = global_model
18 | self.server_optimizer = server_optimizer
19 | self.server_lr = server_lr
20 | self.optimizer = TorchComponentRepository.get_class_by_name(self.server_optimizer, torch.optim.Optimizer)(
21 | filter(lambda p: p.requires_grad, global_model.parameters()),
22 | lr=server_lr,
23 | )
24 |
25 | def step(self,
26 | updated_parameter_list: list[dict[str:np.array]],
27 | weights: Union[None | list[float]],
28 | round_idx: int = 0):
29 | self.optimizer.zero_grad()
30 |
31 | params_n_plus_1 = self._average_updates(updated_parameter_list, weights)
32 | named_params = dict(self.global_model.cpu().named_parameters())
33 | state_n_plus_1 = self.global_model.cpu().state_dict()
34 | with torch.no_grad():
35 | for parameter_name, parameter_n_plus_1 in params_n_plus_1.items():
36 | if parameter_name in named_params.keys():
37 | parameter_n = named_params[parameter_name]
38 | parameter_n.grad = parameter_n.data - parameter_n_plus_1.data
39 | else:
40 | state_n_plus_1[parameter_name] = params_n_plus_1[parameter_name]
41 | self.global_model.load_state_dict(state_n_plus_1)
42 | self.optimizer.step()
43 | return self.global_model.cpu().state_dict()
44 |
45 | @staticmethod
46 | def _average_updates(update_list, weights=None):
47 | if weights is None:
48 | weights = [1 / len(update_list) for _ in range(len(update_list))]
49 | weights = np.array(weights, dtype=float)
50 | weights /= weights.sum()
51 | averaged_params = {k: v * weights[0] for k, v in update_list[0].items()}
52 | if len(update_list) > 1:
53 | for local_model_params, weight in zip(update_list[1:], weights[1:]):
54 | for k in averaged_params.keys():
55 | averaged_params[k] += local_model_params[k] * weight
56 | return averaged_params
57 |
--------------------------------------------------------------------------------
/aggregators/torchcomponentrepository.py:
--------------------------------------------------------------------------------
1 | from typing import List, Union, Type
2 |
3 | import torch
4 |
5 |
6 | class TorchComponentRepository:
7 | """A utility class for working with subclasses of PyTorch components,
8 | such as torch.optim.Optimizer and torch.optim.lr_scheduler._LRScheduler."""
9 |
10 | @classmethod
11 | def get_supported_names(cls, module) -> List[str]:
12 | """Returns a list of supported component names."""
13 | return [component.__name__.lower() for component in module.__subclasses__()]
14 |
15 | @classmethod
16 | def get_class_by_name(cls, name: str, module):
17 | """Returns the component class corresponding to the given name."""
18 | component_class = next((component for component in module.__subclasses__()
19 | if component.__name__.lower() == name.lower()), None)
20 | if not component_class:
21 | raise KeyError(f"Invalid component: {name}! Available components: {cls.get_supported_names(module)}")
22 | return component_class
23 |
24 | @classmethod
25 | def get_supported_parameters(cls, component: Union[str, Type], module=None) -> List[str]:
26 | """Returns a list of __init__ function parameters for a given component and module."""
27 | component_class = cls.get_class_by_name(component, module) if isinstance(component, str) else component
28 | params = component_class.__init__.__code__.co_varnames
29 | return [param for param in params if param not in {"defaults", "self", "params"}]
30 |
31 |
32 | if __name__ == '__main__':
33 | print(TorchComponentRepository.get_supported_names(torch.optim.Optimizer))
34 | print(TorchComponentRepository.get_supported_names(torch.optim.lr_scheduler._LRScheduler))
35 | print(TorchComponentRepository.get_class_by_name("adam", torch.optim.Optimizer))
36 | print(TorchComponentRepository.get_class_by_name("linearlr", torch.optim.lr_scheduler._LRScheduler))
37 | print(TorchComponentRepository.get_supported_parameters(torch.optim.Adam))
38 | print(TorchComponentRepository.get_supported_parameters(torch.optim.lr_scheduler.StepLR))
39 | print(TorchComponentRepository.get_supported_parameters("adam", torch.optim.Optimizer))
40 |
41 |
--------------------------------------------------------------------------------
/analyses/noise.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import random
3 |
4 | import numpy as np
5 | import wandb
6 | from matplotlib import pyplot as plt
7 | from torch.utils.data import Dataset
8 | from tqdm import tqdm
9 |
10 |
11 | class NoisyDataset(Dataset):
12 | def __init__(self, data):
13 | self.data = data
14 | self.targets = copy.deepcopy(data.targets)
15 |
16 | def __getitem__(self, index):
17 | return self.data[index][0], self.targets[index]
18 |
19 | def __len__(self):
20 | return len(self.data)
21 |
22 |
23 | def inject_label_noise(client_datasets, class_num, error_ratio, error_var):
24 | """
25 | Add label noise to client datasets and log noise percentages to wandb.
26 |
27 | Args:
28 | client_datasets: a list of client datasets
29 | class_num: an integer indicating the number of classes.
30 | error_ratio: a float between 0 and 1 indicating the ratio of labels to be flipped.
31 | error_var: a float indicating the variance of the Gaussian distribution used to determine
32 | the level of label noise.
33 |
34 | Returns:
35 | A list of client datasets, and a list of noise percentages for each dataset
36 | """
37 | client_datasets_label_error = []
38 | noise_percentages = []
39 | for original_data in client_datasets:
40 | # Determine the level of label noise for this client dataset. The level is computed by normal distribution
41 | noisy_level = np.random.normal(error_ratio, error_var)
42 | noisy_level = max(noisy_level, 0)
43 |
44 | # Set the level of sparsity in the noise matrix.
45 | sparse_level = 0.4
46 |
47 | # Create a probability matrix for each label, where each element represents the probability of a label being assigned to that image.
48 | prob_matrix = np.full(class_num * class_num, 1 - noisy_level)
49 |
50 | # Set a random subset of elements in the probability matrix to zero to create sparsity.
51 | sparse_elements = np.random.choice(class_num * class_num, round(class_num * (class_num - 1) * sparse_level),
52 | replace=False)
53 | sparse_elements = sparse_elements[sparse_elements % (class_num + 1) != 0]
54 | prob_matrix[sparse_elements] = 0
55 |
56 | # Update prob_matrix
57 | prob_matrix = prob_matrix.reshape((class_num, class_num))
58 | for idx in range(len(prob_matrix)):
59 | non_zeros = np.count_nonzero(prob_matrix[idx])
60 | prob_element = 0 if non_zeros == 1 else (noisy_level) / (non_zeros - 1)
61 | prob_matrix[idx] = np.where(prob_matrix[idx] == 1 - noisy_level, prob_element, prob_matrix[idx])
62 | prob_matrix[idx, idx] = 1 - noisy_level
63 |
64 | # Add label noise to dataset and calculate noise percentage
65 | original_labels = [sample[1] for sample in original_data]
66 | new_labels = [np.random.choice(class_num, p=prob_matrix[label]) for label in original_labels]
67 | new_dataset = [[original_data[i][0], new_labels[i]] for i in range(len(original_data))]
68 |
69 | noise_percentage = np.sum(np.array(original_labels) != np.array(new_labels)) / len(original_labels) * 100
70 | noise_percentages.append(noise_percentage)
71 |
72 | client_datasets_label_error.append(new_dataset)
73 |
74 | return client_datasets_label_error, noise_percentages
75 |
76 | # this is the function we use in the paper
77 |
78 | def inject_label_noise_with_matrix(client_datasets, class_num, confusion_matrix, error_label_ratio):
79 | """
80 | Add label noise to client datasets and log noise percentages to wandb.
81 |
82 | Args:
83 | client_datasets: a list of client datasets
84 | class_num: an integer indicating the number of classes.
85 | confusion_matrix: the confusion matrix for the new labelling, which the size is class_num x class_num
86 |
87 | Returns:
88 | A list of client datasets, and a list of noise percentages for each dataset
89 | """
90 | client_datasets_label_error = []
91 | noise_percentages = []
92 |
93 | for original_data in tqdm(client_datasets, total=len(client_datasets)):
94 | new_dataset = original_data
95 | new_dataset = NoisyDataset(new_dataset)
96 | # new_dataset = [[original_data[i][0], original_data[i][1]] for i in range(len(new_dataset))]
97 | num_elements = len(original_data)
98 | num_elements_to_change = int(num_elements * error_label_ratio)
99 | # indices_to_change = random.sample(range(num_elements), num_elements_to_change)
100 | indices = random.sample(range(num_elements), num_elements)
101 | indices_to_change = []
102 | for index in indices:
103 | current_label_true = original_data[index][1]
104 | change_prob = confusion_matrix[current_label_true]
105 | # to speed up the noise injection, we only change the label whose centralized accuracy is lower than 80%
106 | if np.max(change_prob) < 0.95:
107 | indices_to_change.append(index)
108 | if len(indices_to_change) == num_elements_to_change:
109 | break
110 |
111 | changed_indices = set()
112 | for index in indices_to_change:
113 | current_label = original_data[index][1]
114 | new_label = np.random.choice(class_num,
115 | p=confusion_matrix[current_label] / sum(confusion_matrix[current_label]))
116 | while new_label == current_label or index in changed_indices:
117 | new_label = np.random.choice(class_num,
118 | p=confusion_matrix[current_label] / sum(confusion_matrix[current_label]))
119 | new_dataset.targets[index] = new_label
120 | changed_indices.add(index)
121 |
122 | original_labels = [sample[1] for sample in original_data]
123 | new_labels = [sample[1] for sample in new_dataset]
124 | noise_percentage = np.sum(np.array(original_labels) != np.array(new_labels)) / len(original_labels) * 100
125 | noise_percentages.append(noise_percentage)
126 | client_datasets_label_error.append(new_dataset)
127 |
128 | return client_datasets_label_error, noise_percentages
129 |
130 |
131 | def plot_noise_percentage(original_datasets, noisy_datasets, run):
132 | """
133 | Function to calculate and plot label noise percentages for a list of datasets and upload it to wandb.
134 |
135 | Parameters:
136 | original_datasets (list): List of original PyTorch datasets.
137 | noisy_datasets (list): List of noisy PyTorch datasets.
138 | run (wandb.wandb_run.Run): The wandb run object to which the plot will be logged.
139 |
140 | Returns:
141 | None
142 | """
143 | # Compute label noise percentages
144 | label_noise_percentages = []
145 |
146 | for original_dataset, noisy_dataset in zip(original_datasets, noisy_datasets):
147 | original_labels = [label for _, label in original_dataset]
148 | noisy_labels = [label for _, label in noisy_dataset]
149 |
150 | # Compute noise percentage for this dataset
151 | noise_percentage = np.sum(np.array(original_labels) != np.array(noisy_labels)) / len(original_labels) * 100
152 | label_noise_percentages.append(noise_percentage)
153 |
154 | # Plot the label noise percentages as a histogram
155 | plt.hist(label_noise_percentages, bins=10, edgecolor='black')
156 | plt.title('Histogram of Label Noise Percentages')
157 | plt.xlabel('Label Noise Percentage')
158 | plt.ylabel('Count')
159 |
160 | # Save the plot to a file
161 | plt.savefig('label_noise_histogram.png')
162 | plt.close() # Close the plot
163 |
164 | # Log the plot to wandb
165 | run.log({"label_noise_histogram": wandb.Image('label_noise_histogram.png')})
166 |
--------------------------------------------------------------------------------
/commands/float16/casas.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | client_lr=0.01
5 | for seed in {1..3}
6 | do
7 | ## 6. casas
8 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy
9 | done
--------------------------------------------------------------------------------
/commands/float16/energy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | client_lr=0.1
4 | for seed in {1..3}
5 | do
6 | ## 7. energy
7 | seed=$seed num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric R^2
8 | done
9 |
10 |
--------------------------------------------------------------------------------
/commands/float16/epic_sounds.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0 seed=1 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy
2 | CUDA_VISIBLE_DEVICES=0 seed=2 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy
3 | CUDA_VISIBLE_DEVICES=0 seed=3 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy
4 |
--------------------------------------------------------------------------------
/commands/float16/ut_har.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | client_lr=0.0001
5 | for seed in {1..3}
6 | do
7 | ## 4. ut_har
8 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy
9 | done
--------------------------------------------------------------------------------
/commands/float16/widar.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | client_lr=0.005
4 | for seed in {1..3}
5 | do
6 | ## 3. widar
7 | seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 300 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy
8 | done
--------------------------------------------------------------------------------
/commands/float16/wisdm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | client_lr=0.01
4 | ## 1. wisdm phone
5 | ### NIID-0.1 SGD 10%-30%
6 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy
7 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy
8 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy
9 |
10 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy
11 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy
12 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy
13 |
--------------------------------------------------------------------------------
/commands/grid_search/fedopt/casas.sh:
--------------------------------------------------------------------------------
1 | lrs=(0.01 0.001 0.0001)
2 | for client_lr in "${lrs[@]}"
3 | do
4 | for server_lr in "${lrs[@]}"
5 | do
6 | ### NIID-0.1 Adam 10%-30%
7 | num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
8 | ### NIID-0.5 Adam 10%-30%
9 | num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
10 | done
11 | done
--------------------------------------------------------------------------------
/commands/grid_search/fedopt/energy.sh:
--------------------------------------------------------------------------------
1 | lrs=(0.01 0.001 0.0001)
2 | for client_lr in "${lrs[@]}"
3 | do
4 | for server_lr in "${lrs[@]}"
5 | do
6 | ### NIID-0.1 Adam 10%-30%
7 | num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2
8 | ### NIID-0.5 Adam 10%-30%
9 | num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2
10 | done
11 | done
--------------------------------------------------------------------------------
/commands/grid_search/fedopt/ut_har.sh:
--------------------------------------------------------------------------------
1 | lrs=(0.01 0.001 0.0001)
2 | for client_lr in "${lrs[@]}"
3 | do
4 | for server_lr in "${lrs[@]}"
5 | do
6 | ### NIID-0.1 Adam 10%
7 | num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
8 | ### NIID-0.5 Adam 10%
9 | num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
10 | done
11 | done
--------------------------------------------------------------------------------
/commands/grid_search/fedopt/widar.sh:
--------------------------------------------------------------------------------
1 | lrs=(0.01 0.001 0.0001)
2 | for client_lr in "${lrs[@]}"
3 | do
4 | for server_lr in "${lrs[@]}"
5 | do
6 | ### NIID-0.1 Adam 10%-30%
7 | num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 1000 --batch_size 8 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
8 | ### NIID-0.5 Adam 10%-30%
9 | num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 1000 --batch_size 8 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
10 | done
11 | done
--------------------------------------------------------------------------------
/commands/grid_search/fedopt/wisdm_p.sh:
--------------------------------------------------------------------------------
1 | lrs=(0.01 0.001 0.0001)
2 | for client_lr in "${lrs[@]}"
3 | do
4 | for server_lr in "${lrs[@]}"
5 | do
6 | ### NIID-0.1 Adam 10%
7 | num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
8 | ### NIID-0.5 Adam 10%
9 | num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
10 | done
11 | done
--------------------------------------------------------------------------------
/commands/grid_search/fedopt/wisdm_w.sh:
--------------------------------------------------------------------------------
1 | lrs=(0.01 0.001 0.0001)
2 | for client_lr in "${lrs[@]}"
3 | do
4 | for server_lr in "${lrs[@]}"
5 | do
6 | ### NIID-0.1 Adam 10%
7 | num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
8 | ### NIID-0.5 Adam 10%
9 | num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
10 | done
11 | done
--------------------------------------------------------------------------------
/commands/label_error/casas.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | client_lr=0.01
5 | for seed in {1..3}
6 | do
7 | ## 6. casas
8 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
9 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
10 | done
--------------------------------------------------------------------------------
/commands/label_error/epic_sounds.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0 seed=1 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
2 | CUDA_VISIBLE_DEVICES=0 seed=2 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
3 | CUDA_VISIBLE_DEVICES=1 seed=3 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
4 |
5 |
6 | CUDA_VISIBLE_DEVICES=3 seed=1 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
7 | CUDA_VISIBLE_DEVICES=3 seed=2 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
8 | CUDA_VISIBLE_DEVICES=4 seed=3 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
9 |
--------------------------------------------------------------------------------
/commands/label_error/ut_har.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | client_lr=0.001
5 | for seed in {1..3}
6 | do
7 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
8 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
9 | done
--------------------------------------------------------------------------------
/commands/label_error/widar.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | client_lr=0.001
4 | for seed in {1..3}
5 | do
6 | seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1500 --batch_size 8 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
7 | seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1500 --batch_size 8 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
8 | done
--------------------------------------------------------------------------------
/commands/label_error/wisdm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | client_lr=0.01
4 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
5 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
6 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
7 | ### NIID-0.1 SGD 10%-30%
8 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
9 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
10 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
11 |
12 |
13 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
14 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
15 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
16 | ### NIID-0.1 SGD 10%-30%
17 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
18 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
19 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy
20 |
--------------------------------------------------------------------------------
/commands/overview/fedavg/casas.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | client_lr=0.01
5 | for seed in {1..3}
6 | do
7 | ## 6. casas
8 | ### Centralized
9 | seed=$seed num_gpus=1 num_trainers_per_gpu=1 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 1 --client_num_per_round 1 --partition_type central --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 250 --batch_size 128 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
10 | ### NIID-0.1 SGD 10%-30%
11 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
12 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 18 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
13 | ### NIID-0.5 SGD 10%-30%
14 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
15 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 18 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
16 | ### NIID-0.1 Adam 10%-30%
17 | #seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr 0.1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
18 | ### NIID-0.5 Adam 10%-30%
19 | #seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr 0.1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
20 | done
--------------------------------------------------------------------------------
/commands/overview/fedavg/energy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | client_lr=0.1
4 | for seed in {1..3}
5 | do
6 | ## 7. energy
7 | ### Centralized
8 | # seed=$seed num_gpus=1 num_trainers_per_gpu=1 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 1 --client_num_per_round 1 --partition_type central --alpha 0.1 --lr 0.01 --client_optimizer sgd --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --watch_metric R^2
9 | ### NIID-0.1 SGD 10%-30%
10 | # seed=$seed num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2
11 | seed=$seed num_gpus=3 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 24 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2
12 | ### NIID-0.5 SGD 10%-30%
13 | # seed=$seed num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2
14 | # seed=$seed num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 24 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2
15 | done
16 |
17 |
--------------------------------------------------------------------------------
/commands/overview/fedavg/epic_sounds.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0 seed=1 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
2 | CUDA_VISIBLE_DEVICES=0 seed=2 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
3 | CUDA_VISIBLE_DEVICES=0 seed=3 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
4 |
--------------------------------------------------------------------------------
/commands/overview/fedavg/ut_har.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | client_lr=0.0001
5 | for seed in {1..3}
6 | do
7 | ## 4. ut_har
8 | ### Centralized
9 | seed=$seed num_gpus=1 num_trainers_per_gpu=1 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 1 --client_num_per_round 1 --partition_type central --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 500 --batch_size 128 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
10 | ### NIID-0.1 SGD 10%-30%
11 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
12 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
13 | ### NIID-0.5 SGD 10%-30%
14 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
15 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 6 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
16 | ### NIID-0.1 Adam 10%-30%
17 | # seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr .01 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
18 | ### NIID-0.5 Adam 10%-30%
19 | # seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr 0.01 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
20 | done
--------------------------------------------------------------------------------
/commands/overview/fedavg/widar.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | client_lr=0.005
4 | for seed in {1..3}
5 | do
6 | ## 3. widar
7 | ### Centralized
8 | # seed=$seed num_gpus=1 num_trainers_per_gpu=1 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 1 --client_num_per_round 1 --partition_type central --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 120 --batch_size 128 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
9 | ### NIID-0.1 SGD 10%-30%
10 | # seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 300 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
11 | seed=$seed num_gpus=3 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 12 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1500 --batch_size 12 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
12 | ### NIID-0.5 SGD 10%-30%
13 | # seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 300 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
14 | # seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 12 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 300 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
15 | ### NIID-0.1 Adam 10%-30%
16 | # seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr .01 --test_frequency 5 --comm_round 300 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
17 | ### NIID-0.5 Adam 10%-30%
18 | # seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr 0.01 --test_frequency 5 --comm_round 300 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
19 | done
--------------------------------------------------------------------------------
/commands/overview/fedopt/casas.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | for seed in {1..3}
4 | do
5 | ## 6. casas
6 | ## NIID-0.1 Adam 10%-30%
7 | client_lr=0.001
8 | server_lr=0.01
9 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr 0.1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
10 | ## NIID-0.5 Adam 10%-30%
11 | client_lr=0.0001
12 | server_lr=0.01
13 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr 0.1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
14 | done
--------------------------------------------------------------------------------
/commands/overview/fedopt/energy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | server_lr=0.001
4 | client_lr=0.01
5 | for seed in {1..3}
6 | do
7 | ## 7. energy
8 | ## NIID-0.1 Adam 10%-30%
9 | seed=$seed num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2
10 | ## NIID-0.5 Adam 10%-30%
11 | seed=$seed num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2
12 | done
--------------------------------------------------------------------------------
/commands/overview/fedopt/ut_har.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | server_lr=0.0001
4 | client_lr=0.0001
5 | for seed in {1..3}
6 | do
7 | ## 4. ut_har
8 | ## NIID-0.1 Adam 10%-30%
9 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
10 | ## NIID-0.5 Adam 10%-30%
11 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
12 | done
--------------------------------------------------------------------------------
/commands/overview/fedopt/widar.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | for seed in {1..3}
5 | do
6 | ## 3. widar
7 | server_lr=0.01
8 | client_lr=0.001
9 | ## NIID-0.1 Adam 10%-30%
10 | seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr .01 --test_frequency 5 --comm_round 300 --batch_size 8 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
11 | ## NIID-0.5 Adam 10%-30%
12 | server_lr=0.01
13 | client_lr=0.01
14 | seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr 0.01 --test_frequency 5 --comm_round 300 --batch_size 8 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
15 | done
--------------------------------------------------------------------------------
/commands/overview/fedopt/wisdm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | client_lr=0.01
4 | server_lr=0.01
5 | # 1. wisdm phone
6 | ### NIID-0.1 Adam 10%
7 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
8 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
9 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
10 | ### NIID-0.5 Adam 10%
11 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
12 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
13 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
14 |
15 | client_lr=0.001
16 | server_lr=0.1
17 | ## NIID-0.1 Adam 10%
18 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
19 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
20 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
21 | client_lr=0.01
22 | server_lr=0.01
23 | ## NIID-0.5 Adam 10%
24 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
25 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
26 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy
27 |
--------------------------------------------------------------------------------
/config.yml:
--------------------------------------------------------------------------------
1 | [DEFAULT]
2 | model = resnet18
3 | dataset = epic_sounds
4 | data_dir = ../data/
5 | client_num_in_total = 30
6 | client_num_per_round = 6
7 | gpu_worker_num = 8
8 | batch_size = 256
9 | client_optimizer = sgd
10 | lr = 1e-2
11 | wd = 0.001
12 | epochs = 1
13 | fl_algorithm = fedaiot_distributed
14 | comm_round = 100
15 | test_frequency = 5
16 | server_optimizer = sgd
17 | server_lr = 1
18 | alpha = 0.25
19 | partition_type = central
20 | device=cuda
21 | trainer = BaseTrainer
22 | amp = false
23 | analysis= baseline
24 | class_mixup=1.0
25 | entity=fedaiot
26 | project=ray_fl_dev_v5
27 |
--------------------------------------------------------------------------------
/confusion_matrices/conf_casas.csv:
--------------------------------------------------------------------------------
1 | "0","1","2","3","4","5","6","7","8","9","10","11"
2 | "26","1","1","2","0","0","0","0","0","1","0","0"
3 | "0","172","0","18","0","1","1","0","0","0","7","1"
4 | "0","0","97","4","4","0","1","0","0","0","2","1"
5 | "0","11","2","1297","21","3","9","6","1","4","46","10"
6 | "0","0","0","33","134","0","0","1","0","0","15","1"
7 | "0","0","0","13","3","29","3","0","0","6","0","0"
8 | "0","1","0","14","0","3","201","0","0","0","1","0"
9 | "0","3","0","8","1","0","0","32","0","0","3","6"
10 | "0","0","0","2","0","0","0","0","12","1","0","0"
11 | "0","1","0","11","1","12","0","0","0","118","0","0"
12 | "0","3","1","47","14","0","1","1","0","0","335","7"
13 | "0","3","0","16","1","0","1","4","0","1","4","86"
--------------------------------------------------------------------------------
/confusion_matrices/conf_epic_sounds.csv:
--------------------------------------------------------------------------------
1 | "0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39","40","41","42","43"
2 | "3557","711","485","356","23","42","366","20","112","85","11","41","675","45","138","0","15","15","52","0","10","16","5","25","0","1","2","6","11","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
3 | "187","2649","222","60","50","166","19","33","36","60","0","1","24","54","1","0","0","0","4","6","0","0","10","1","0","5","2","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
4 | "405","713","1798","60","121","18","199","3","73","17","0","15","21","78","10","5","25","5","9","0","0","5","5","0","0","0","0","0","1","0","0","0","0","4","0","0","0","0","0","0","0","0","0","0"
5 | "222","324","95","1596","19","0","175","8","12","21","0","5","44","22","1","5","0","5","0","0","0","0","24","7","0","6","0","9","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
6 | "86","903","360","118","1760","15","46","7","42","20","0","10","14","70","2","0","0","0","138","0","0","0","49","16","0","1","1","1","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0"
7 | "136","606","38","37","66","1700","24","1","48","2","0","7","67","72","10","0","0","0","13","56","2","0","7","7","0","1","1","2","1","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0"
8 | "234","291","162","339","37","0","431","5","15","36","0","15","41","21","10","0","10","0","20","0","0","0","9","15","0","8","2","8","1","0","0","0","0","0","0","0","0","0","5","0","0","0","0","0"
9 | "45","840","50","69","7","3","26","733","7","10","0","5","21","28","0","0","0","0","0","0","0","0","0","0","0","0","0","2","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
10 | "302","477","129","43","78","59","27","1","522","36","2","10","82","103","9","0","0","2","22","3","0","0","1","4","0","5","0","6","1","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0"
11 | "238","554","134","179","16","6","80","21","15","141","0","10","46","7","36","0","0","0","33","0","0","0","22","8","0","0","2","0","0","0","0","0","0","0","2","0","0","0","0","0","0","0","0","0"
12 | "478","108","122","53","5","0","116","10","12","5","26","10","91","6","20","0","0","5","15","0","0","10","0","4","0","3","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
13 | "452","182","129","139","21","7","173","1","39","11","0","40","70","25","7","0","0","5","7","0","0","0","8","8","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
14 | "720","228","141","57","11","15","30","2","112","18","10","7","670","12","85","0","0","5","5","0","0","0","15","2","0","0","1","0","2","0","0","0","0","0","2","0","0","5","0","0","0","0","0","0"
15 | "9","62","4","6","7","5","2","2","9","1","0","0","2","53","0","1","0","0","0","0","0","0","0","0","0","0","0","1","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
16 | "137","28","35","5","0","0","5","0","6","5","0","0","143","0","119","0","0","5","0","0","0","0","0","0","0","0","0","0","2","0","0","0","0","0","0","0","0","5","0","0","0","0","0","0"
17 | "45","75","25","20","5","0","0","30","0","15","0","0","10","0","0","355","0","0","5","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
18 | "81","21","147","0","0","0","15","0","2","5","0","0","15","13","0","0","51","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
19 | "163","39","20","29","0","5","16","0","19","10","0","0","67","13","15","2","0","12","0","0","0","0","1","0","0","0","0","0","1","0","3","0","0","0","0","0","0","0","0","0","0","0","0","0"
20 | "15","26","22","21","27","0","10","0","1","5","0","0","0","0","0","0","0","0","20","0","0","0","0","3","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
21 | "57","146","1","8","13","166","13","0","20","7","0","0","12","20","0","0","0","0","2","50","0","0","2","8","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
22 | "100","10","10","5","10","0","10","0","5","5","0","0","35","0","12","0","0","0","0","0","15","0","0","0","0","0","2","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0"
23 | "15","0","15","30","0","0","0","0","5","0","0","0","5","5","0","0","0","0","0","0","0","10","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
24 | "0","20","16","42","15","0","36","0","0","0","0","5","5","0","0","0","0","0","5","0","0","0","11","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
25 | "64","10","22","33","12","0","10","0","0","0","0","0","23","6","0","0","0","0","14","0","0","0","0","21","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
26 | "10","82","0","6","0","1","0","0","5","15","0","0","10","11","0","0","0","0","5","0","0","0","0","0","20","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
27 | "40","18","6","0","5","0","10","0","1","0","0","0","15","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
28 | "40","8","10","20","5","0","14","2","0","5","0","0","27","0","12","0","0","0","0","0","0","0","0","1","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
29 | "15","31","10","11","10","0","13","0","0","5","0","0","5","0","0","0","0","0","4","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
30 | "67","20","10","10","5","0","5","0","0","6","0","0","7","0","15","0","0","0","0","0","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","5","0","0","0","0","0","0"
31 | "12","42","0","3","9","15","3","1","25","0","0","5","0","28","1","0","0","0","3","0","0","0","0","1","0","0","2","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
32 | "39","18","0","12","1","0","6","1","9","5","0","0","35","1","17","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0"
33 | "55","13","10","28","7","0","7","0","1","0","0","0","0","3","10","0","0","5","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
34 | "15","10","5","0","3","4","10","0","0","0","0","0","0","2","5","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
35 | "0","8","1","0","1","3","1","0","2","0","0","0","0","18","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0"
36 | "21","5","5","10","1","0","10","0","6","0","0","5","21","4","11","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
37 | "0","124","23","5","5","0","0","0","3","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
38 | "11","7","5","0","0","1","0","5","0","0","0","0","0","1","0","0","0","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
39 | "25","10","5","10","0","0","0","0","0","5","0","0","0","0","40","0","0","0","0","0","0","0","0","0","0","0","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
40 | "5","0","0","10","0","0","0","0","0","0","0","0","0","0","20","0","0","0","0","0","0","0","0","0","0","0","0","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
41 | "20","5","5","5","0","0","0","0","5","0","0","0","5","0","15","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
42 | "5","11","0","0","2","1","0","0","0","0","0","0","2","17","0","0","0","0","0","0","0","0","0","1","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
43 | "0","16","4","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
44 | "0","10","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
45 | "0","7","0","0","0","0","0","5","1","0","0","0","0","2","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0"
--------------------------------------------------------------------------------
/confusion_matrices/conf_ut_har.csv:
--------------------------------------------------------------------------------
1 | "0","1","2","3","4","5","6"
2 | "63","0","0","2","0","0","1"
3 | "0","43","1","0","0","0","0"
4 | "0","0","31","0","0","0","3"
5 | "0","1","0","48","0","0","0"
6 | "0","0","0","4","117","0","0"
7 | "0","1","3","0","2","34","0"
8 | "0","0","1","0","0","3","26"
--------------------------------------------------------------------------------
/confusion_matrices/conf_widar.csv:
--------------------------------------------------------------------------------
1 | "0","1","2","3","4","5","6","7","8"
2 | "1049","65","148","55","9","32","23","18","24"
3 | "205","615","196","54","12","56","26","41","39"
4 | "46","5","245","58","1","14","0","2","4"
5 | "114","8","128","583","4","51","0","3","33"
6 | "136","11","22","14","53","7","2","3","2"
7 | "18","6","59","62","1","75","2","10","17"
8 | "54","70","21","4","0","5","68","4","24"
9 | "31","19","68","14","0","30","8","51","29"
10 | "33","24","9","18","0","14","17","8","127"
--------------------------------------------------------------------------------
/confusion_matrices/conf_wisdm_phone.csv:
--------------------------------------------------------------------------------
1 | "0","1","2","3","4","5","6","7","8","9","10","11"
2 | "232","23","38","0","0","0","0","0","0","0","0","0"
3 | "50","209","3","0","0","0","0","0","0","0","0","0"
4 | "142","0","173","0","0","0","1","0","0","0","0","11"
5 | "0","0","2","111","6","8","1","122","32","96","0","1"
6 | "22","0","0","0","221","38","7","7","11","0","0","34"
7 | "0","0","0","49","54","79","9","69","41","36","0","11"
8 | "13","5","9","2","61","9","25","74","14","40","13","43"
9 | "0","0","0","51","24","55","25","125","1","32","2","1"
10 | "1","0","24","51","32","76","37","47","43","34","2","1"
11 | "0","0","0","34","62","57","23","42","40","62","0","28"
12 | "4","0","41","115","34","3","14","27","1","85","0","47"
13 | "8","2","30","15","77","18","1","32","0","4","2","139"
--------------------------------------------------------------------------------
/confusion_matrices/conf_wisdm_watch.csv:
--------------------------------------------------------------------------------
1 | "0","1","2","3","4","5","6","7","8","9","10","11"
2 | "280","0","49","0","0","0","0","0","0","0","0","0"
3 | "1","287","0","0","0","0","0","0","0","0","4","4"
4 | "25","4","252","0","9","0","3","0","0","0","5","31"
5 | "0","0","0","171","38","54","0","25","34","23","0","0"
6 | "0","0","0","33","251","0","1","22","27","14","0","0"
7 | "0","0","0","48","2","211","0","0","1","36","0","0"
8 | "0","0","22","1","35","0","206","25","3","0","16","8"
9 | "1","0","2","12","11","6","15","116","146","9","0","22"
10 | "1","0","0","8","14","2","1","62","241","20","3","13"
11 | "0","0","0","43","11","15","0","20","0","240","0","0"
12 | "25","0","0","0","10","0","7","8","0","0","289","0"
13 | "1","0","36","0","0","0","1","0","3","0","1","292"
--------------------------------------------------------------------------------
/datasets/emognition/download.py:
--------------------------------------------------------------------------------
1 | import os
2 | import zipfile
3 |
4 | import gdown
5 |
6 | # Define the shared Google Drive file URL
7 | FILE_ID = "1XUYMBP0p2VSJTppgE2BT87rr6MK0vagl"
8 |
9 | # Define the directory where you want to save the dataset
10 | SAVE_DIR = "./datasets/emognition"
11 |
12 |
13 | # Function to download the file from Google Drive
14 | def download_file_from_google_drive(file_id, save_dir):
15 | if not os.path.exists(save_dir):
16 | os.makedirs(save_dir)
17 |
18 | file_path = os.path.join(save_dir, "_study_data.zip")
19 | gdown.download(output=file_path, quiet=False, id=file_id)
20 |
21 | return file_path
22 |
23 |
24 | # Function to extract the dataset
25 | def extract_file(file_path, save_dir):
26 | with zipfile.ZipFile(file_path, "r") as zip_ref:
27 | zip_ref.extractall(save_dir)
28 | print(f"Extracted dataset to {save_dir}")
29 |
30 |
31 | # Main function to download and extract the WidarData.zip file
32 | def main():
33 | file_path = download_file_from_google_drive(FILE_ID, SAVE_DIR)
34 | extract_file(file_path, SAVE_DIR)
35 |
36 |
37 | if __name__ == "__main__":
38 | main()
39 |
--------------------------------------------------------------------------------
/datasets/epic_sounds/download.py:
--------------------------------------------------------------------------------
1 | import os
2 | import zipfile
3 |
4 | import gdown
5 |
6 | # Define the shared Google Drive file URL
7 | FILE_ID = "1BAaBIYqU6gZDyFqu9aW6spvpwpsEDZMS"
8 |
9 | # Define the directory where you want to save the dataset
10 | SAVE_DIR = "./datasets/epic_sounds"
11 |
12 |
13 | # Function to download the file from Google Drive
14 | def download_file_from_google_drive(file_id, save_dir):
15 | if not os.path.exists(save_dir):
16 | os.makedirs(save_dir)
17 |
18 | file_path = os.path.join(save_dir, "EPIC_audio.hdf5")
19 | gdown.download(output=file_path, quiet=False, id=file_id)
20 |
21 | return file_path
22 |
23 |
24 | # Main function to download and extract the WidarData.zip file
25 | def main():
26 | download_file_from_google_drive(FILE_ID, SAVE_DIR)
27 |
28 |
29 | if __name__ == "__main__":
30 | main()
31 |
--------------------------------------------------------------------------------
/datasets/epic_sounds/epic-kitchens-download-scripts-master/data/epic_55_splits.csv:
--------------------------------------------------------------------------------
1 | participant_id,video_id,split
2 | P01,P01_01,train
3 | P01,P01_02,train
4 | P01,P01_03,train
5 | P01,P01_04,train
6 | P01,P01_05,train
7 | P01,P01_06,train
8 | P01,P01_07,train
9 | P01,P01_08,train
10 | P01,P01_09,train
11 | P01,P01_10,train
12 | P01,P01_16,train
13 | P01,P01_17,train
14 | P01,P01_18,train
15 | P01,P01_19,train
16 | P02,P02_01,train
17 | P02,P02_02,train
18 | P02,P02_03,train
19 | P02,P02_04,train
20 | P02,P02_05,train
21 | P02,P02_06,train
22 | P02,P02_07,train
23 | P02,P02_08,train
24 | P02,P02_09,train
25 | P02,P02_10,train
26 | P02,P02_11,train
27 | P03,P03_02,train
28 | P03,P03_03,train
29 | P03,P03_04,train
30 | P03,P03_05,train
31 | P03,P03_06,train
32 | P03,P03_07,train
33 | P03,P03_08,train
34 | P03,P03_09,train
35 | P03,P03_10,train
36 | P03,P03_11,train
37 | P03,P03_12,train
38 | P03,P03_13,train
39 | P03,P03_14,train
40 | P03,P03_15,train
41 | P03,P03_16,train
42 | P03,P03_17,train
43 | P03,P03_18,train
44 | P03,P03_19,train
45 | P03,P03_20,train
46 | P03,P03_27,train
47 | P03,P03_28,train
48 | P04,P04_01,train
49 | P04,P04_02,train
50 | P04,P04_03,train
51 | P04,P04_04,train
52 | P04,P04_05,train
53 | P04,P04_06,train
54 | P04,P04_07,train
55 | P04,P04_08,train
56 | P04,P04_09,train
57 | P04,P04_10,train
58 | P04,P04_11,train
59 | P04,P04_12,train
60 | P04,P04_13,train
61 | P04,P04_14,train
62 | P04,P04_15,train
63 | P04,P04_16,train
64 | P04,P04_17,train
65 | P04,P04_18,train
66 | P04,P04_19,train
67 | P04,P04_20,train
68 | P04,P04_21,train
69 | P04,P04_22,train
70 | P04,P04_23,train
71 | P05,P05_01,train
72 | P05,P05_02,train
73 | P05,P05_03,train
74 | P05,P05_04,train
75 | P05,P05_05,train
76 | P05,P05_06,train
77 | P05,P05_08,train
78 | P06,P06_01,train
79 | P06,P06_02,train
80 | P06,P06_03,train
81 | P06,P06_05,train
82 | P06,P06_07,train
83 | P06,P06_08,train
84 | P06,P06_09,train
85 | P07,P07_01,train
86 | P07,P07_02,train
87 | P07,P07_03,train
88 | P07,P07_04,train
89 | P07,P07_05,train
90 | P07,P07_06,train
91 | P07,P07_07,train
92 | P07,P07_08,train
93 | P07,P07_09,train
94 | P07,P07_10,train
95 | P07,P07_11,train
96 | P08,P08_01,train
97 | P08,P08_02,train
98 | P08,P08_03,train
99 | P08,P08_04,train
100 | P08,P08_05,train
101 | P08,P08_06,train
102 | P08,P08_07,train
103 | P08,P08_08,train
104 | P08,P08_11,train
105 | P08,P08_12,train
106 | P08,P08_13,train
107 | P08,P08_18,train
108 | P08,P08_19,train
109 | P08,P08_20,train
110 | P08,P08_21,train
111 | P08,P08_22,train
112 | P08,P08_23,train
113 | P08,P08_24,train
114 | P08,P08_25,train
115 | P08,P08_26,train
116 | P08,P08_27,train
117 | P08,P08_28,train
118 | P10,P10_01,train
119 | P10,P10_02,train
120 | P10,P10_04,train
121 | P12,P12_01,train
122 | P12,P12_02,train
123 | P12,P12_04,train
124 | P12,P12_05,train
125 | P12,P12_06,train
126 | P12,P12_07,train
127 | P13,P13_04,train
128 | P13,P13_05,train
129 | P13,P13_06,train
130 | P13,P13_07,train
131 | P13,P13_08,train
132 | P13,P13_09,train
133 | P13,P13_10,train
134 | P14,P14_01,train
135 | P14,P14_02,train
136 | P14,P14_03,train
137 | P14,P14_04,train
138 | P14,P14_05,train
139 | P14,P14_07,train
140 | P14,P14_09,train
141 | P15,P15_01,train
142 | P15,P15_02,train
143 | P15,P15_03,train
144 | P15,P15_07,train
145 | P15,P15_08,train
146 | P15,P15_09,train
147 | P15,P15_10,train
148 | P15,P15_11,train
149 | P15,P15_12,train
150 | P15,P15_13,train
151 | P16,P16_01,train
152 | P16,P16_02,train
153 | P16,P16_03,train
154 | P17,P17_01,train
155 | P17,P17_03,train
156 | P17,P17_04,train
157 | P19,P19_01,train
158 | P19,P19_02,train
159 | P19,P19_03,train
160 | P19,P19_04,train
161 | P20,P20_01,train
162 | P20,P20_02,train
163 | P20,P20_03,train
164 | P20,P20_04,train
165 | P21,P21_01,train
166 | P21,P21_03,train
167 | P21,P21_04,train
168 | P22,P22_05,train
169 | P22,P22_06,train
170 | P22,P22_07,train
171 | P22,P22_08,train
172 | P22,P22_09,train
173 | P22,P22_10,train
174 | P22,P22_11,train
175 | P22,P22_12,train
176 | P22,P22_13,train
177 | P22,P22_14,train
178 | P22,P22_15,train
179 | P22,P22_16,train
180 | P22,P22_17,train
181 | P23,P23_01,train
182 | P23,P23_02,train
183 | P23,P23_03,train
184 | P23,P23_04,train
185 | P24,P24_01,train
186 | P24,P24_02,train
187 | P24,P24_03,train
188 | P24,P24_04,train
189 | P24,P24_05,train
190 | P24,P24_06,train
191 | P24,P24_07,train
192 | P24,P24_08,train
193 | P25,P25_01,train
194 | P25,P25_02,train
195 | P25,P25_03,train
196 | P25,P25_04,train
197 | P25,P25_05,train
198 | P25,P25_09,train
199 | P25,P25_10,train
200 | P25,P25_11,train
201 | P25,P25_12,train
202 | P26,P26_01,train
203 | P26,P26_02,train
204 | P26,P26_03,train
205 | P26,P26_04,train
206 | P26,P26_05,train
207 | P26,P26_06,train
208 | P26,P26_07,train
209 | P26,P26_08,train
210 | P26,P26_09,train
211 | P26,P26_10,train
212 | P26,P26_11,train
213 | P26,P26_12,train
214 | P26,P26_13,train
215 | P26,P26_14,train
216 | P26,P26_15,train
217 | P26,P26_16,train
218 | P26,P26_17,train
219 | P26,P26_18,train
220 | P26,P26_19,train
221 | P26,P26_20,train
222 | P26,P26_21,train
223 | P26,P26_22,train
224 | P26,P26_23,train
225 | P26,P26_24,train
226 | P26,P26_25,train
227 | P26,P26_26,train
228 | P26,P26_27,train
229 | P26,P26_28,train
230 | P26,P26_29,train
231 | P27,P27_01,train
232 | P27,P27_02,train
233 | P27,P27_03,train
234 | P27,P27_04,train
235 | P27,P27_06,train
236 | P27,P27_07,train
237 | P28,P28_01,train
238 | P28,P28_02,train
239 | P28,P28_03,train
240 | P28,P28_04,train
241 | P28,P28_05,train
242 | P28,P28_06,train
243 | P28,P28_07,train
244 | P28,P28_08,train
245 | P28,P28_09,train
246 | P28,P28_10,train
247 | P28,P28_11,train
248 | P28,P28_12,train
249 | P28,P28_13,train
250 | P28,P28_14,train
251 | P29,P29_01,train
252 | P29,P29_02,train
253 | P29,P29_03,train
254 | P29,P29_04,train
255 | P30,P30_01,train
256 | P30,P30_02,train
257 | P30,P30_03,train
258 | P30,P30_04,train
259 | P30,P30_05,train
260 | P30,P30_06,train
261 | P30,P30_10,train
262 | P30,P30_11,train
263 | P31,P31_01,train
264 | P31,P31_02,train
265 | P31,P31_03,train
266 | P31,P31_04,train
267 | P31,P31_05,train
268 | P31,P31_06,train
269 | P31,P31_07,train
270 | P31,P31_08,train
271 | P31,P31_09,train
272 | P31,P31_13,train
273 | P31,P31_14,train
274 | P01,P01_11,test
275 | P01,P01_12,test
276 | P01,P01_13,test
277 | P01,P01_14,test
278 | P01,P01_15,test
279 | P02,P02_12,test
280 | P02,P02_13,test
281 | P02,P02_14,test
282 | P02,P02_15,test
283 | P03,P03_21,test
284 | P03,P03_22,test
285 | P03,P03_23,test
286 | P03,P03_24,test
287 | P03,P03_25,test
288 | P03,P03_26,test
289 | P04,P04_24,test
290 | P04,P04_25,test
291 | P04,P04_26,test
292 | P04,P04_27,test
293 | P04,P04_28,test
294 | P04,P04_29,test
295 | P04,P04_30,test
296 | P04,P04_31,test
297 | P04,P04_32,test
298 | P04,P04_33,test
299 | P05,P05_07,test
300 | P05,P05_09,test
301 | P06,P06_10,test
302 | P06,P06_11,test
303 | P06,P06_12,test
304 | P06,P06_13,test
305 | P06,P06_14,test
306 | P07,P07_12,test
307 | P07,P07_13,test
308 | P07,P07_14,test
309 | P07,P07_15,test
310 | P07,P07_16,test
311 | P07,P07_17,test
312 | P07,P07_18,test
313 | P08,P08_09,test
314 | P08,P08_10,test
315 | P08,P08_14,test
316 | P08,P08_15,test
317 | P08,P08_16,test
318 | P08,P08_17,test
319 | P10,P10_03,test
320 | P12,P12_03,test
321 | P12,P12_08,test
322 | P13,P13_01,test
323 | P13,P13_02,test
324 | P13,P13_03,test
325 | P14,P14_06,test
326 | P14,P14_08,test
327 | P15,P15_04,test
328 | P15,P15_05,test
329 | P15,P15_06,test
330 | P16,P16_04,test
331 | P17,P17_02,test
332 | P19,P19_05,test
333 | P19,P19_06,test
334 | P20,P20_05,test
335 | P20,P20_06,test
336 | P20,P20_07,test
337 | P21,P21_02,test
338 | P22,P22_01,test
339 | P22,P22_02,test
340 | P22,P22_03,test
341 | P22,P22_04,test
342 | P23,P23_05,test
343 | P24,P24_09,test
344 | P25,P25_06,test
345 | P25,P25_07,test
346 | P25,P25_08,test
347 | P26,P26_30,test
348 | P26,P26_31,test
349 | P26,P26_32,test
350 | P26,P26_33,test
351 | P26,P26_34,test
352 | P26,P26_35,test
353 | P26,P26_36,test
354 | P26,P26_37,test
355 | P26,P26_38,test
356 | P26,P26_39,test
357 | P26,P26_40,test
358 | P26,P26_41,test
359 | P27,P27_05,test
360 | P28,P28_15,test
361 | P28,P28_16,test
362 | P28,P28_17,test
363 | P28,P28_18,test
364 | P28,P28_19,test
365 | P28,P28_20,test
366 | P28,P28_21,test
367 | P28,P28_22,test
368 | P28,P28_23,test
369 | P28,P28_24,test
370 | P28,P28_25,test
371 | P28,P28_26,test
372 | P29,P29_05,test
373 | P29,P29_06,test
374 | P30,P30_07,test
375 | P30,P30_08,test
376 | P30,P30_09,test
377 | P31,P31_10,test
378 | P31,P31_11,test
379 | P31,P31_12,test
380 | P09,P09_01,test
381 | P09,P09_02,test
382 | P09,P09_03,test
383 | P09,P09_04,test
384 | P09,P09_05,test
385 | P09,P09_06,test
386 | P09,P09_07,test
387 | P09,P09_08,test
388 | P11,P11_01,test
389 | P11,P11_02,test
390 | P11,P11_03,test
391 | P11,P11_04,test
392 | P11,P11_05,test
393 | P11,P11_06,test
394 | P11,P11_07,test
395 | P11,P11_08,test
396 | P11,P11_09,test
397 | P11,P11_10,test
398 | P11,P11_11,test
399 | P11,P11_12,test
400 | P11,P11_13,test
401 | P11,P11_14,test
402 | P11,P11_15,test
403 | P11,P11_16,test
404 | P11,P11_17,test
405 | P11,P11_18,test
406 | P11,P11_19,test
407 | P11,P11_20,test
408 | P11,P11_21,test
409 | P11,P11_22,test
410 | P11,P11_23,test
411 | P11,P11_24,test
412 | P18,P18_01,test
413 | P18,P18_02,test
414 | P18,P18_03,test
415 | P18,P18_04,test
416 | P18,P18_05,test
417 | P18,P18_06,test
418 | P18,P18_07,test
419 | P18,P18_08,test
420 | P18,P18_09,test
421 | P18,P18_10,test
422 | P18,P18_11,test
423 | P18,P18_12,test
424 | P32,P32_01,test
425 | P32,P32_02,test
426 | P32,P32_03,test
427 | P32,P32_04,test
428 | P32,P32_05,test
429 | P32,P32_06,test
430 | P32,P32_07,test
431 | P32,P32_08,test
432 | P32,P32_09,test
433 | P32,P32_10,test
434 |
--------------------------------------------------------------------------------
/datasets/epic_sounds/epic-kitchens-download-scripts-master/data/errata.csv:
--------------------------------------------------------------------------------
1 | rdsf_path,dropbox_path
2 | P01/rgb_frames/P01_109.tar,https://www.dropbox.com/s/mh7y0goc5x945nu/P01_109.tar?dl=1
3 | P27/rgb_frames/P27_103.tar,https://www.dropbox.com/s/c1eo70v6dokr6cf/P27_103.tar?dl=1
4 | P01/flow_frames/P01_109.tar,https://www.dropbox.com/s/kdadnyf1epte0f1/P01_109.tar?dl=1
5 | P27/flow_frames/P27_103.tar,https://www.dropbox.com/s/48kiyqarqfmb2bk/P27_103.tar?dl=1
6 | hand-objects/P01/P01_109.pkl,https://www.dropbox.com/s/fjvhpd4o9l2n08y/P01_109.pkl?dl=1
7 | hand-objects/P27/P27_103.pkl,https://www.dropbox.com/s/ocvy4fskv9j8xmt/P27_103.pkl?dl=1
8 | masks/P01/P01_109.pkl,https://www.dropbox.com/s/mzhguzwsyjxbh9e/P01_109.pkl?dl=1
9 | masks/P27/P27_103.pkl,https://www.dropbox.com/s/zc15u7qlm3hqn0c/P27_103.pkl?dl=1
--------------------------------------------------------------------------------
/datasets/epic_sounds/epic-kitchens-download-scripts-master/download_extension_only.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python epic_downloader.py --extension_only
--------------------------------------------------------------------------------
/datasets/epic_sounds/epic-kitchens-download-scripts-master/download_full_epic.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python epic_downloader.py
--------------------------------------------------------------------------------
/datasets/epic_sounds/epic-sounds-annotations-main/EPIC_Sounds_recognition_test_timestamps.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/datasets/epic_sounds/epic-sounds-annotations-main/EPIC_Sounds_recognition_test_timestamps.pkl
--------------------------------------------------------------------------------
/datasets/epic_sounds/epic-sounds-annotations-main/EPIC_Sounds_train.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/datasets/epic_sounds/epic-sounds-annotations-main/EPIC_Sounds_train.pkl
--------------------------------------------------------------------------------
/datasets/epic_sounds/epic-sounds-annotations-main/EPIC_Sounds_validation.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/datasets/epic_sounds/epic-sounds-annotations-main/EPIC_Sounds_validation.pkl
--------------------------------------------------------------------------------
/datasets/epic_sounds/epic-sounds-annotations-main/sound_events_not_categorised.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/datasets/epic_sounds/epic-sounds-annotations-main/sound_events_not_categorised.pkl
--------------------------------------------------------------------------------
/datasets/ut_har/download.py:
--------------------------------------------------------------------------------
1 | import os
2 | import zipfile
3 |
4 | import gdown
5 | from process import process
6 |
7 | # Define the shared Google Drive file URL
8 | FILE_ID = "1fEiI3nAoOsddR5qcJQXqz4ocM3aMAcwz"
9 |
10 | # Define the directory where you want to save the dataset
11 | SAVE_DIR = "./datasets/ut_har"
12 |
13 |
14 | # Function to download the file from Google Drive
15 | def download_file_from_google_drive(file_id, save_dir):
16 | if not os.path.exists(save_dir):
17 | os.makedirs(save_dir)
18 |
19 | file_path = os.path.join(save_dir, "UT_HAR.zip")
20 | gdown.download(output=file_path, quiet=False, id=file_id)
21 |
22 | return file_path
23 |
24 |
25 | # Function to extract the dataset
26 | def extract_file(file_path, save_dir):
27 | with zipfile.ZipFile(file_path, "r") as zip_ref:
28 | zip_ref.extractall(save_dir)
29 | print(f"Extracted dataset to {save_dir}")
30 |
31 |
32 | # Main function to download and extract the WidarData.zip file
33 | def main():
34 | file_path = download_file_from_google_drive(FILE_ID, SAVE_DIR)
35 | extract_file(file_path, SAVE_DIR)
36 | process(SAVE_DIR)
37 |
38 |
39 | if __name__ == "__main__":
40 | main()
41 |
--------------------------------------------------------------------------------
/datasets/ut_har/process.py:
--------------------------------------------------------------------------------
1 | import glob
2 |
3 | import numpy as np
4 | import torch
5 |
6 |
7 | def process(root_dir='.'):
8 | data_list = glob.glob(root_dir + '/UT_HAR/data/*.csv')
9 | label_list = glob.glob(root_dir + '/UT_HAR/label/*.csv')
10 | print(data_list, label_list)
11 | WiFi_data = {}
12 | for data_dir in data_list:
13 | data_name = data_dir.split('/')[-1].split('.')[0]
14 | with open(data_dir, 'rb') as f:
15 | data = np.load(f)
16 | data = data.reshape(len(data), 1, 250, 90)
17 | data_norm = (data - np.min(data)) / (np.max(data) - np.min(data))
18 | WiFi_data[data_name] = torch.Tensor(data_norm)
19 | for label_dir in label_list:
20 | label_name = label_dir.split('/')[-1].split('.')[0]
21 | with open(label_dir, 'rb') as f:
22 | label = np.load(f)
23 | WiFi_data[label_name] = torch.Tensor(label)
24 | return WiFi_data
25 |
26 |
27 | if __name__ == '__main__':
28 | data = process()
29 | for k, v in data.items():
30 | print(k, v.shape)
31 |
--------------------------------------------------------------------------------
/datasets/visdrone/clusterer.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import pandas as pd
5 | import torch
6 | import torch
7 | import torchvision
8 | import torchvision.models as models
9 | import torchvision.transforms as transforms
10 | from PIL import Image
11 | from pycocotools import coco
12 | from torch.utils.data import Dataset, DataLoader
13 | from tqdm import tqdm
14 |
15 | transformations = transforms.Compose([
16 | transforms.Resize(256),
17 | transforms.CenterCrop(224),
18 | transforms.ToTensor(),
19 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
20 | ])
21 | model = models.resnet50(pretrained=True)
22 |
23 |
24 | def extract_imagenet_features(img_path, transform=transformations, model=model):
25 | # Load the pre-trained ResNet50 model
26 |
27 | # Set the model to evaluation mode
28 | model.eval()
29 |
30 | # Define the image pre-processing transforms
31 |
32 | # Load the image and apply the pre-processing transforms
33 | img = Image.open(img_path)
34 | img_tensor = transform(img).unsqueeze(0)
35 |
36 | # Extract features from the image using the model
37 | with torch.no_grad():
38 | features = model(img_tensor)
39 |
40 | # Flatten the features tensor
41 | flattened_features = features.flatten()
42 |
43 | return flattened_features
44 |
45 |
46 | class VisDroneDataset(Dataset):
47 | def __init__(self, data_dir, transform=None):
48 | # self.coco = coco.COCO(ann_dir)
49 | self.data_dir = data_dir
50 | self.images = [x for x in os.listdir(data_dir) if '.jpg' in x]
51 | self.transform = transform
52 | # self.images = os.listdir(os.path.join(data_dir, 'images'))
53 | # self.annotations = os.listdir(os.path.join(data_dir, 'annotations'))
54 |
55 | def __len__(self):
56 | return len(self.images)
57 |
58 | def __getitem__(self, idx):
59 | # Load the image
60 | # image = Image.open(os.path.join(self.data_dir, self.coco.loadImgs(self.images[idx])[0]['file_name']))
61 | image = extract_imagenet_features(os.path.join(self.data_dir, self.images[idx]))
62 | if self.transform:
63 | image = self.transform(image)
64 |
65 | # Load the annotations
66 | # with open(os.path.join(self.data_dir, 'annotations', self.annotations[idx]), 'r') as f:
67 | # annotations = f.readlines()
68 |
69 | # Parse the annotations
70 | # boxes = []
71 | # labels = []
72 | # for annotation in annotations:
73 | # xmin, ymin, xmax, ymax, label = annotation.strip().split(',')
74 | # boxes.append([int(xmin), int(ymin), int(xmax), int(ymax)])
75 | # labels.append(int(label))
76 |
77 | # Convert the annotations to tensors
78 | # boxes = torch.as_tensor(boxes, dtype=torch.float32)
79 | # labels = torch.as_tensor(labels, dtype=torch.int64)
80 |
81 | return image, self.images[idx] # boxes, labels
82 |
83 |
84 | # Define the transformations to be applied to the images
85 | transformations = transforms.Compose([
86 | transforms.Resize(256),
87 | transforms.CenterCrop(224),
88 | transforms.ToTensor(),
89 | transforms.Normalize(mean=[0.485, 0.456, 0.406],
90 | std=[0.229, 0.224, 0.225])
91 | ])
92 |
93 | dataset = VisDroneDataset(data_dir='train/images',
94 | transform=None)
95 |
96 | # Load a pretrained ResNet-50 model
97 | model = torchvision.models.resnet50(pretrained=True)
98 | model.eval()
99 |
100 | # Extract features for each image in the dataset
101 | ids = []
102 | features = []
103 | for i, (image_features, img_id) in tqdm(enumerate(dataset), total=len(dataset)):
104 | with torch.no_grad():
105 | feature = image_features.numpy()
106 | ids.append(img_id)
107 | features.append(feature)
108 |
109 | # Convert the features to a numpy array
110 | features = np.array(features)
111 |
112 | # Perform K-means clustering on the features to cluster the images into 100 clusters
113 | from sklearn.cluster import KMeans, DBSCAN
114 |
115 | kmeans = KMeans(n_clusters=10).fit(features, )
116 | df = pd.DataFrame({'image_id': ids, 'cluster': kmeans.labels_})
117 | df.to_csv('split.csv')
118 | print(df.groupby('cluster').count())
119 | clusters = kmeans.labels_
120 |
--------------------------------------------------------------------------------
/datasets/visdrone/download.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import zipfile
4 | from pathlib import Path
5 |
6 | import PIL.Image as Image
7 | import gdown
8 | import requests
9 | from tqdm import tqdm
10 |
11 | # Define the VisDrone dataset URLs
12 | DATASET_URLS = [
13 | "https://downloads.visdrone.org/data2018/VisDrone2018-DET-train.zip",
14 | "https://downloads.visdrone.org/data2018/VisDrone2018-DET-val.zip",
15 | "https://downloads.visdrone.org/data2018/VisDrone2018-DET-test-challenge.zip"
16 | ]
17 |
18 | FILE_IDs = [
19 | ('1i8iZ-zYBgWwzX9355HIYrWM1uKeqWW0S', 'VisDrone2019-DET-train.zip', 'train'),
20 | ('1qJKZdv2jEv2c7SfEdMwWR3KOyj_mfhBN', 'VisDrone2019-DET-val.zip', 'val'),
21 | ('1nTC4cqNqT_IJ7EIH28i9YTVGNFq5WgqL', 'VisDrone2019-DET-test-dev.zip', 'test')
22 | ]
23 |
24 | FOLODER_SPLITS = [
25 | ('VisDrone2019-DET-train', 'train'),
26 | ('VisDrone2019-DET-val', 'val'),
27 | ('VisDrone2018-DET-test-dev', 'test')
28 | ]
29 |
30 | # Define the directory where you want to save the dataset
31 | SAVE_DIR = "./datasets/visdrone"
32 |
33 |
34 | def convert_visdrone_to_yolo_format() -> None:
35 | """
36 | Convert VisDrone dataset to YOLOv5 format.
37 | """
38 | visdrone_folder = Path(SAVE_DIR)
39 |
40 | for folder, split in FOLODER_SPLITS:
41 | images_folder = visdrone_folder / f"{folder}/images"
42 | annotations_folder = visdrone_folder / f"{folder}/annotations"
43 |
44 | output_images_folder = visdrone_folder / f"{split}/images"
45 | output_labels_folder = visdrone_folder / f"{split}/labels"
46 |
47 | output_images_folder.mkdir(parents=True, exist_ok=True)
48 | output_labels_folder.mkdir(parents=True, exist_ok=True)
49 |
50 | for annotation_file in tqdm(annotations_folder.glob("*.txt")):
51 | image_file = images_folder / f"{annotation_file.stem}.jpg"
52 |
53 | if image_file.exists():
54 | # Copy image file
55 | shutil.copy(image_file, output_images_folder / image_file.name)
56 | img = Image.open(image_file).convert("RGB")
57 | # Convert and save label file
58 | with open(annotation_file) as f:
59 | lines = f.readlines()
60 |
61 | with open(output_labels_folder / annotation_file.name, "w") as f:
62 | for line in lines:
63 | items = line.strip().split(",")
64 |
65 | # Calculate normalized values required by YOLOv5
66 | # class_id, x_center, y_center, width, height
67 |
68 | class_id = int(items[5])
69 | x_center = (int(items[0]) + int(items[2]) / 2) / img.width
70 | y_center = (int(items[1]) + int(items[3]) / 2) / img.height
71 | width = int(items[2]) / img.width
72 | height = int(items[3]) / img.height
73 |
74 | f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")
75 |
76 |
77 | # Function to download the dataset
78 | def download_dataset(url, save_dir):
79 | if not os.path.exists(save_dir):
80 | os.makedirs(save_dir)
81 |
82 | response = requests.get(url, stream=True)
83 | file_size = int(response.headers.get("Content-Length", 0))
84 | filename = os.path.join(save_dir, url.split("/")[-1])
85 |
86 | with open(filename, "wb") as f:
87 | for data in response.iter_content(chunk_size=1024):
88 | f.write(data)
89 |
90 | print(f"Downloaded {filename}")
91 |
92 | return filename
93 |
94 |
95 | def download_file_from_google_drive(file_id, save_dir, filename):
96 | if not os.path.exists(save_dir):
97 | os.makedirs(save_dir)
98 | file_path = os.path.join(save_dir, filename)
99 | gdown.download(output=file_path, quiet=False, id=file_id)
100 | return file_path
101 |
102 |
103 | # Function to extract the dataset
104 | def extract_dataset(file_path, save_dir):
105 | with zipfile.ZipFile(file_path, "r") as zip_ref:
106 | zip_ref.extractall(save_dir)
107 |
108 | print(f"Extracted dataset to {save_dir}")
109 |
110 |
111 | # Main function to download and extract the VisDrone dataset
112 | def main():
113 | s_dir = SAVE_DIR
114 | for file_id, filename, split in FILE_IDs:
115 | file_path = download_file_from_google_drive(file_id=file_id,
116 | save_dir=SAVE_DIR,
117 | filename=filename)
118 | if 'test' in file_path:
119 | s_dir = f'{SAVE_DIR}/VisDrone2018-DET-test-dev'
120 | Path(s_dir).mkdir(exist_ok=True)
121 | extract_dataset(file_path, s_dir)
122 | print(file_path)
123 | convert_visdrone_to_yolo_format()
124 |
125 |
126 | if __name__ == "__main__":
127 | main()
128 |
--------------------------------------------------------------------------------
/datasets/widar/download.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 | import zipfile
4 | from pathlib import Path
5 |
6 | import gdown
7 | import numpy as np
8 | import torch
9 | from tqdm import tqdm
10 |
11 | # Define the shared Google Drive file URL
12 | FILE_ID = "14vp4D8W0X2bDLpXnpP-U_VT9PIGkVf_4"
13 |
14 | # Define the directory where you want to save the dataset
15 | SAVE_DIR = "./datasets/widar"
16 |
17 |
18 | # Function to download the file from Google Drive
19 | def download_file_from_google_drive(file_id, save_dir):
20 | if not os.path.exists(save_dir):
21 | os.makedirs(save_dir)
22 |
23 | file_path = os.path.join(save_dir, "Widardata.zip")
24 | gdown.download(output=file_path, quiet=False, id=file_id)
25 |
26 | return file_path
27 |
28 |
29 | # Function to extract the dataset
30 | def extract_file(file_path, save_dir):
31 | with zipfile.ZipFile(file_path, "r") as zip_ref:
32 | zip_ref.extractall(save_dir)
33 | print(f"Extracted dataset to {save_dir}")
34 |
35 |
36 | def process():
37 | files = glob.glob('./datasets/widar/Widardata/*/*/*.csv')
38 | data = {}
39 | for file in tqdm(files):
40 | y = int(file.split('/')[-2].split('-')[0]) - 1
41 | assert y >= 0, 'y is negative'
42 | user = int(file.split('/')[-1].split('-')[0].replace('user', ''))
43 | if user not in data.keys():
44 | data[user] = {'X': [], 'Y': []}
45 | x = np.genfromtxt(file, delimiter=',')
46 | data[user]['X'].append(x)
47 | data[user]['Y'].append(y)
48 | Path('./datasets/widar/federated').mkdir(exist_ok=True)
49 | for user in data.keys():
50 | X = np.concatenate(np.expand_dims(np.array(data[user]['X']), 0))
51 | Y = np.array(data[user]['Y'])
52 | print(f'{user}_data.pkl')
53 | print(X.shape, Y.shape)
54 | torch.save((X, Y), f'./datasets/widar/federated/{user}.pkl')
55 |
56 |
57 | # Main function to download and extract the WidarData.zip file
58 | def main():
59 | file_path = download_file_from_google_drive(FILE_ID, SAVE_DIR)
60 | extract_file(file_path, SAVE_DIR)
61 | process()
62 |
63 |
64 | if __name__ == "__main__":
65 | main()
66 |
--------------------------------------------------------------------------------
/datasets/wisdm/activity_key.txt:
--------------------------------------------------------------------------------
1 | walking = A
2 | jogging = B
3 | stairs = C
4 | sitting = D
5 | standing = E
6 | typing = F
7 | teeth = G
8 | soup = H
9 | chips = I
10 | pasta = J
11 | drinking = K
12 | sandwich = L
13 | kicking = M
14 | catch = O
15 | dribbling = P
16 | writing = Q
17 | clapping = R
18 | folding = S
19 |
--------------------------------------------------------------------------------
/datasets/wisdm/activity_key_filtered.txt:
--------------------------------------------------------------------------------
1 | name,code,fcode
2 | walking,A,0
3 | jogging,B,1
4 | stairs,C,2
5 | sitting,D,3
6 | standing,E,4
7 | typing,F,5
8 | teeth,G,6
9 | drinking,K,8
10 | eating,L,7
11 | writing,Q,9
12 | clapping,R,10
13 | folding,S,11
14 |
15 |
--------------------------------------------------------------------------------
/datasets/wisdm/download.py:
--------------------------------------------------------------------------------
1 | import os
2 | import zipfile
3 |
4 | import requests
5 |
6 | # Define the URL for the dataset
7 | WISDM_URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/00507/wisdm-dataset.zip"
8 |
9 | # Define the directory where you want to save the dataset
10 | SAVE_DIR = "datasets/wisdm/"
11 |
12 |
13 | # Function to download the dataset
14 | def download_wisdm_dataset(url, save_dir='./datasets/wisdm/'):
15 | if not os.path.exists(save_dir):
16 | os.makedirs(save_dir)
17 |
18 | response = requests.get(url, stream=True)
19 | file_size = int(response.headers.get("Content-Length", 0))
20 | filename = os.path.join(save_dir, url.split("/")[-1])
21 |
22 | with open(filename, "wb") as f:
23 | for data in response.iter_content(chunk_size=1024):
24 | f.write(data)
25 |
26 | print(f"Downloaded {filename}")
27 |
28 | return filename
29 |
30 |
31 | # Function to extract the dataset
32 | def extract_wisdm_dataset(file_path, save_dir):
33 | with zipfile.ZipFile(file_path, "r") as zip_ref:
34 | zip_ref.extractall(save_dir)
35 |
36 | print(f"Extracted dataset to {save_dir}")
37 |
38 |
39 | # Main function to download and extract the WISDM dataset
40 | def main():
41 | file_path = download_wisdm_dataset(WISDM_URL, SAVE_DIR)
42 | extract_wisdm_dataset(file_path, SAVE_DIR)
43 |
44 |
45 | if __name__ == "__main__":
46 | main()
47 |
--------------------------------------------------------------------------------
/figures/datasets.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/figures/datasets.png
--------------------------------------------------------------------------------
/figures/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/figures/overview.png
--------------------------------------------------------------------------------
/figures/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/figures/pipeline.png
--------------------------------------------------------------------------------
/loaders/casas.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import numpy as np
4 | import torch
5 | from sklearn.model_selection import train_test_split
6 | from torch.utils.data import TensorDataset
7 |
8 |
9 | def load_dataset(datasetName='all'):
10 | X = np.load('./datasets/casas/npy/' + datasetName + '-x.npy')
11 | Y = np.load('./datasets/casas/npy/' + datasetName + '-y.npy')
12 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
13 | print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
14 | X_tensor = torch.from_numpy(X.astype(int))
15 | y_tensor = torch.from_numpy(Y.astype(int))
16 |
17 | X_tensor_train = torch.from_numpy(X_train.astype(int))
18 | y_tensor_train = torch.from_numpy(Y_train.astype(int))
19 |
20 | X_tensor_test = torch.from_numpy(X_test.astype(int))
21 | y_tensor_test = torch.from_numpy(Y_test.astype(int))
22 | # Create a PyTorch Dataset using TensorDataset
23 | dataset = TensorDataset(X_tensor, y_tensor)
24 | train_dataset = TensorDataset(X_tensor_train, y_tensor_train)
25 | test_dataset = TensorDataset(X_tensor_test, y_tensor_test)
26 | dataset.targets = y_tensor
27 | train_dataset.targets = y_tensor_train
28 | test_dataset.targets = y_tensor_test
29 | data_dict = {
30 | 'full_dataset': dataset,
31 | 'train': train_dataset,
32 | 'test': test_dataset
33 | }
34 | return data_dict
35 |
36 |
37 | if __name__ == '__main__':
38 | dt = load_dataset()
39 | print(len(dt['train']))
40 | print(dt['train'][0][0].shape)
--------------------------------------------------------------------------------
/loaders/cifar10.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import List
3 |
4 | import altair as alt
5 | import matplotlib.pyplot as plt
6 | import numpy as np
7 | import pandas as pd
8 | import torch
9 | import torchvision
10 | from torch.utils.data import Dataset
11 | from torchvision.transforms import transforms
12 |
13 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
14 |
15 |
16 | def load_dataset():
17 | transform = transforms.Compose(
18 | [transforms.ToTensor(),
19 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
20 | batch_size = 4
21 | trainset = torchvision.datasets.CIFAR10(root='../datasets/cifar10', train=True,
22 | download=True, transform=transform)
23 | testset = torchvision.datasets.CIFAR10(root='../datasets/cifar10', train=False,
24 | download=True, transform=transform)
25 | classes = ('plane', 'car', 'bird', 'cat',
26 | 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
27 | return {'train': trainset, 'test': testset, 'label_names': classes}
28 |
29 |
30 | def compute_client_data_distribution(datasets: List[Dataset], num_classes: int):
31 | class_distribution = []
32 | data_distribution = []
33 |
34 | for i in range(len(datasets)):
35 | class_counts = torch.zeros(num_classes)
36 | for j in range(len(datasets[i].targets)):
37 | class_counts[datasets[i].targets[j]] += 1
38 | class_counts = class_counts.numpy()
39 | data_distribution.append(np.sum(class_counts))
40 | class_counts = class_counts / np.sum(class_counts)
41 | class_distribution.append(class_counts)
42 | return data_distribution, class_distribution
43 |
44 |
45 | def visualize_client_data_distribution(datasets: List[Dataset], num_clients: int, num_classes: int):
46 | data_distribution, class_distribution = compute_client_data_distribution(datasets, num_classes)
47 |
48 | # create a heatmap of the data distribution for each client
49 | fig, ax = plt.subplots()
50 | im = ax.imshow(np.array(class_distribution).T, cmap='YlGn')
51 |
52 | # add text annotations for each cell
53 | for i in range(len(class_distribution[0])):
54 | for j in range(len(class_distribution)):
55 | text = ax.text(j, i, class_distribution[j][i], ha="center", va="center", color="black")
56 |
57 | # add colorbar
58 | cbar = ax.figure.colorbar(im, ax=ax)
59 |
60 | # set tick labels and axis labels
61 | plt.xticks(fontsize=5)
62 | plt.yticks(fontsize=5)
63 | ax.set_xticks(np.arange(len(class_distribution)))
64 | ax.set_yticks(np.arange(len(class_distribution[0])))
65 | ax.set_xticklabels([f"{i}" if i % 10 == 0 else '' for i in range(len(class_distribution))])
66 | ax.set_yticklabels([f"{i}" for i in range(len(class_distribution[0]))])
67 | ax.set_xlabel("Client")
68 | ax.set_ylabel("Class")
69 | ax.set_title("Class Distribution of Clients")
70 |
71 | plt.show()
72 |
73 | fig, ax = plt.subplots()
74 | ax.bar(range(num_clients), data_distribution)
75 | ax.set_xlabel("Client")
76 | ax.set_ylabel("Data Samples")
77 | ax.set_title("Sample Distribution of Clients")
78 | plt.show()
79 | plt.savefig("sample_distribution_matplotlib.png")
80 |
81 |
82 | def vis_data_distribution_altair(data_distribution, class_distribution):
83 | data = []
84 | num_clients = len(data_distribution)
85 | for i in range(len(class_distribution[0])):
86 | for j in range(len(class_distribution)):
87 | data.append({"client": j, "class": i, "value": class_distribution[j][i]})
88 |
89 | heatmap = (
90 | alt.Chart(pd.DataFrame(data))
91 | .mark_rect()
92 | .encode(
93 | x=alt.X("client:N", title="Client"),
94 | y=alt.Y("class:N", title="Class"),
95 | color=alt.Color("value:Q", scale=alt.Scale(scheme="yellowgreenblue"),
96 | legend=alt.Legend(title="Percentage of Samples")),
97 | tooltip="value:Q",
98 | )
99 | .properties(
100 | title=alt.TitleParams(
101 | "Class Distribution of Clients",
102 | fontSize=12,
103 | ),
104 | # width=200,
105 | # height=120,
106 | )
107 | )
108 |
109 | text = (
110 | alt.Chart(pd.DataFrame(data))
111 | .mark_text()
112 | .encode(
113 | x=alt.X("client:N"),
114 | y=alt.Y("class:N"),
115 | text=alt.Text("value:Q", format=".2f", ),
116 | color=alt.condition(
117 | alt.datum.value > 0.5, alt.value("black"), alt.value("white")
118 | ),
119 | )
120 | .transform_filter((alt.datum.value > 0.01))
121 | )
122 |
123 | data_bar = (
124 | alt.Chart(pd.DataFrame({"client": range(num_clients), "value": data_distribution}))
125 | .mark_bar()
126 | .encode(
127 | x=alt.X("client:N", title="Client", axis=alt.Axis(labelFontSize=8)),
128 | y=alt.Y("value:Q", title="Data Samples", axis=alt.Axis(labelFontSize=8)),
129 | tooltip="value:Q",
130 | )
131 | .properties(
132 | title=alt.TitleParams(
133 | "Sample Distribution of Clients",
134 | fontSize=12,
135 | ),
136 | # width=200,
137 | # height=120,
138 | )
139 | )
140 |
141 | return alt.vconcat(heatmap + text, data_bar)
142 |
143 |
--------------------------------------------------------------------------------
/loaders/clusterer.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import pandas as pd
5 | import torch
6 | import torch
7 | import torchvision
8 | import torchvision.models as models
9 | import torchvision.transforms as transforms
10 | from PIL import Image
11 | # from pycocotools import coco
12 | from torch.utils.data import Dataset, DataLoader
13 | from tqdm import tqdm
14 |
15 |
16 | model = models.resnet50(pretrained=True)
17 |
18 | transformations = transforms.Compose([
19 | transforms.Resize(256),
20 | transforms.CenterCrop(224),
21 | transforms.ToTensor(),
22 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
23 | ])
24 | def extract_imagenet_features(img_path, transform=transformations, model=model):
25 | # Load the pre-trained ResNet50 model
26 |
27 | # Set the model to evaluation mode
28 | model.eval()
29 |
30 | # Define the image pre-processing transforms
31 |
32 | # Load the image and apply the pre-processing transforms
33 | img = Image.open(img_path)
34 | # img = np.array(img)/255.0
35 | # img = torch.from_numpy(img).float()
36 | img_tensor = transform(img)
37 | img_tensor = img_tensor.unsqueeze(0)
38 |
39 | # Extract features from the image using the model
40 | with torch.no_grad():
41 | features = model(img_tensor)
42 |
43 | # Flatten the features tensor
44 | flattened_features = features.flatten()
45 |
46 | return flattened_features
47 |
48 |
49 | class VisDroneDataset(Dataset):
50 | def __init__(self, data_dir='./datasets/visdrone/yolo_format/train', transform=None):
51 | self.data_dir = data_dir
52 | self.transform = transform
53 | self.images = os.listdir(os.path.join(data_dir, 'images'))
54 |
55 | def __len__(self):
56 | return len(self.images)
57 |
58 | def __getitem__(self, idx):
59 | # Load the image
60 | image = extract_imagenet_features(img_path=os.path.join(
61 | os.path.join(self.data_dir, 'images'
62 | ),
63 | self.images[idx]), transform=self.transform)
64 | # if self.transform:
65 | # image = self.transform(image)
66 |
67 | return image, self.images[idx]
68 |
69 |
70 | # Define the transformations to be applied to the images
71 | transformations = transforms.Compose([
72 | transforms.Resize(256),
73 | transforms.CenterCrop(224),
74 | transforms.ToTensor(),
75 | transforms.Normalize(mean=[0.485, 0.456, 0.406],
76 | std=[0.229, 0.224, 0.225])
77 | ])
78 |
79 | dataset = VisDroneDataset(transform=transformations)
80 |
81 | # Load a pretrained ResNet-50 model
82 | model = torchvision.models.resnet50(pretrained=True)
83 | model.eval()
84 |
85 | # Extract features for each image in the dataset
86 | ids = []
87 | features = []
88 | for i, (image_features, img_id) in tqdm(enumerate(dataset), total=len(dataset)):
89 | with torch.no_grad():
90 | feature = image_features.numpy()
91 | ids.append(img_id)
92 | features.append(feature)
93 |
94 | # Convert the features to a numpy array
95 | features = np.array(features)
96 |
97 | # Perform K-means clustering on the features to cluster the images into 100 clusters
98 | from sklearn.cluster import KMeans, DBSCAN
99 |
100 | kmeans = KMeans(n_clusters=100).fit(features, )
101 | df = pd.DataFrame({'image_id': ids, 'cluster': kmeans.labels_})
102 | df.to_csv('split.csv')
103 | print(df.groupby('cluster').count())
104 | clusters = kmeans.labels_
105 |
--------------------------------------------------------------------------------
/loaders/energy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import numpy as np
3 | import pandas as pd
4 | import torch
5 | from scipy.stats import pearsonr
6 | from sklearn.base import BaseEstimator, TransformerMixin
7 | from sklearn.decomposition import PCA
8 | from sklearn.metrics import r2_score
9 | from sklearn.model_selection import train_test_split
10 | from sklearn.pipeline import Pipeline
11 | from sklearn.preprocessing import StandardScaler
12 | from torch.utils.data import Dataset, DataLoader
13 | from torchmetrics import Metric, R2Score
14 |
15 |
16 | def digitize_values(values, a, b, num_bins):
17 | # Sort the values
18 | values = np.sort(values)
19 |
20 | # Determine the indices that will divide values into num_bins equal parts
21 | indices = np.linspace(0, len(values), num_bins + 1, endpoint=False, dtype=int)
22 |
23 | # Create bins using these indices
24 | bins = [values[indices[i]:indices[i + 1]] for i in range(num_bins)]
25 |
26 | # Now 'bins' is a list of arrays, where each array is a bin
27 | # containing approximately the same number of samples.
28 |
29 | # If you want to assign each original value to a bin index:
30 | digitized_values = np.zeros_like(values, dtype=np.int32)
31 | for i, b in enumerate(bins):
32 | digitized_values[np.isin(values, b)] = i
33 |
34 | return digitized_values
35 |
36 | class HandleOutliers(BaseEstimator, TransformerMixin):
37 | def __init__(self):
38 | return None
39 |
40 | def fit(self, X, y=None):
41 | '''
42 | Description : It notes the 90 and 10 percentile of each features in the dataframe.
43 | So that we can impute the outliers with the value of noted percentile.
44 | Parameters:
45 | X : Dataframe which you want to note percentile.
46 | y : It is not required.
47 | '''
48 | outlier_estimator_dict = {}
49 | for col in X.columns:
50 | upper_bound = np.percentile(X[col], 90)
51 | lower_bound = np.percentile(X[col], 10)
52 | outlier_estimator_dict[col] = {
53 | "upper_bound": upper_bound,
54 | "lower_bound": lower_bound}
55 | self.outlier_estimator_dict = outlier_estimator_dict
56 | return self
57 |
58 | def transform(self, X, y=None):
59 | '''
60 | Description : It replaces the outliers with the noted percentile value of respective column
61 | Parameters:
62 | X : Dataframe you want to replace outliers.
63 | Returns : A Dataframe with removed outliers.
64 | '''
65 | for col in X.columns:
66 | col_dict = self.outlier_estimator_dict[col]
67 | X[col] = np.where(X[col] > col_dict['upper_bound'], col_dict['upper_bound'], X[col])
68 | X[col] = np.where(X[col] < col_dict['lower_bound'], col_dict['lower_bound'], X[col])
69 |
70 | self.final_column_names = X.columns
71 | return X
72 |
73 |
74 | class AddPcaFeatures(BaseEstimator, TransformerMixin):
75 | def __init__(self, number_of_pca_columns=None):
76 | '''
77 | Parameters :
78 | number_of_pca_columns :(Int) Number of final dimension you want.
79 | '''
80 | self.number_of_pca_columns = number_of_pca_columns
81 | return None
82 |
83 | def fit(self, X, y=None):
84 | '''
85 | Description : It fits the data in the PCA algorithm
86 | Parameters:
87 | X : Dataframe which fits the PCA algorithm
88 | '''
89 | if self.number_of_pca_columns != None:
90 | self.pca = PCA(n_components=self.number_of_pca_columns)
91 | self.pca.fit(X)
92 | return self
93 |
94 | def transform(self, X, y=None):
95 | '''
96 | Parameters :
97 | X : Dataframe you want to reduce the dimension
98 | Returns : A Dataframe with the pca features along concatinated with the input Dataframe.
99 | '''
100 | if self.number_of_pca_columns != None:
101 | pca_column_names = [f'pca_{val}' for val in range(1, self.number_of_pca_columns + 1)]
102 | pca_features = self.pca.transform(X)
103 | pca_features = pd.DataFrame(pca_features, columns=pca_column_names, index=X.index)
104 | X = pd.concat([X, pca_features], axis=1)
105 |
106 | return X
107 |
108 |
109 | class AddCentralTendencyFeatures(BaseEstimator, TransformerMixin):
110 | def __init__(self, measure):
111 | '''
112 | Parameters :
113 | measure : 'mean' or 'median' depend on which features you want to add.
114 | '''
115 | self.measure = measure
116 | return None
117 |
118 | def fit(self, X, y=None):
119 | return self
120 |
121 | def transform(self, X, y=None):
122 | '''
123 | Description : Adds either mean or median columns of a temperature and humidity column for each observation.
124 | Parameter : Dataframe which you want to calculate
125 | Returns : Input Dataframe concatinated with the calculated features.
126 | '''
127 | if self.measure.lower() == 'mean':
128 | X['avg_house_temp'] = X[[col for col in X.columns if (('t' in col) and (len(col) < 3))]].mean(axis=1)
129 | X['avg_humidity_percentage'] = X[[col for col in X.columns if (('rh_' in col) and (len(col) < 5))]].mean(
130 | axis=1)
131 |
132 | else:
133 | X['med_house_temp'] = X[[col for col in X.columns if (('t' in col) and (len(col) < 3))]].median(axis=1)
134 | X['med_humidity_percentage'] = X[[col for col in X.columns if (('rh_' in col) and (len(col) < 5))]].median(
135 | axis=1)
136 |
137 | return X
138 |
139 |
140 | class AddDateFeatures(BaseEstimator, TransformerMixin):
141 | def __init__(self):
142 | return None
143 |
144 | def fit(self, X, y=None):
145 | return self
146 |
147 | def transform(self, X, y=None):
148 | X['day'] = X.date.dt.day
149 | X['month'] = X.date.dt.month
150 | return X.drop('date', axis=1)
151 |
152 |
153 | class RemoveCorrelatedFeatures(BaseEstimator, TransformerMixin):
154 | def __init__(self):
155 | return None
156 |
157 | def fit(self, X, y):
158 | '''
159 | Description : Remove correlated features with less correlation with target
160 | X : Dataframe with only features
161 | y : Target Series
162 | '''
163 | col_corr = set()
164 | corr_matrix = X.corr()
165 |
166 | for i in range(len(corr_matrix.columns)):
167 | for j in range(i):
168 | if abs(corr_matrix.iloc[i, j]) > 0.85:
169 | corr_i, _ = pearsonr(y, X.iloc[:, i])
170 | corr_j, _ = pearsonr(y, X.iloc[:, j])
171 | if abs(corr_i) < abs(corr_j):
172 | colname = corr_matrix.columns[i]
173 | col_corr.add(colname)
174 | else:
175 | colname = corr_matrix.columns[i]
176 | col_corr.add(colname)
177 |
178 | self.correlated_columns = col_corr
179 | self.final_column_names = set(X.columns) - self.correlated_columns
180 | return self
181 |
182 | def transform(self, X, y=None):
183 | '''
184 | Parameter : The Dataframe you want to remove correlated features
185 | Returns : Dataframe by removing the correlated features.
186 | '''
187 | return X.drop(self.correlated_columns, axis=1)
188 |
189 |
190 | class ApplyTransformation(BaseEstimator, TransformerMixin):
191 | def __init__(self):
192 | return None
193 |
194 | def fit(self, X, y=None):
195 | return self
196 |
197 | def transform(self, X, y=None):
198 | X[['t9', 'rv1', 'rv2', 'windspeed']] = np.log1p(X[['t9', 'rv1', 'rv2', 'windspeed']])
199 | X['visibility'] = np.where(X['visibility'] > 40, 1, 0)
200 | return X
201 |
202 |
203 | class EnergyDataset(Dataset):
204 | def __init__(self, features, labels):
205 | self.features = features
206 | self.labels = labels
207 | self.targets = digitize_values(labels, np.min(labels), np.max(labels), 10)
208 |
209 | def __len__(self):
210 | return len(self.labels)
211 |
212 | def __getitem__(self, idx):
213 | return torch.tensor(self.features[idx], dtype=torch.float), torch.tensor(self.labels[idx], dtype=torch.float)
214 |
215 |
216 | def load_dataset(split=0.2, seed=42):
217 | df = pd.read_csv('datasets/energy/energydata_complete.csv')
218 | df['date'] = pd.to_datetime(df['date'])
219 | df.set_index(df.date.copy(deep=True), inplace=True)
220 |
221 | # Preprocess the data
222 | # Split the data into features and target
223 | df.columns = [col.lower() for col in df.columns]
224 | X = df.drop('appliances', axis=1)
225 | y = df['appliances']
226 |
227 | # Split the dataset into train and test datasets
228 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=seed)
229 |
230 | # Scale the features to have zero mean and unit variance
231 | preprocessing_pipeline = Pipeline([
232 | ('transformation', ApplyTransformation()),
233 | ('remove_outliers', HandleOutliers()),
234 | ('add_central_tendency_features', AddCentralTendencyFeatures(measure='mean')),
235 | ('add_Date_Features', AddDateFeatures()),
236 | ('add_pca_features', AddPcaFeatures(number_of_pca_columns=3)),
237 | ('remove_correlated_features', RemoveCorrelatedFeatures()),
238 | ('standard_scalar', StandardScaler())
239 | ])
240 | # min_y = min(y_train)
241 | # max_y = max(y_train)
242 | # y_train = (y_train - min_y) / max_y
243 | # y_test = (y_test - min_y) / max_y
244 | y_train = np.log(y_train)
245 | y_test = np.log(y_test)
246 | X_train = preprocessing_pipeline.fit_transform(X_train, y_train)
247 | X_test = preprocessing_pipeline.transform(X_test)
248 | train_data = EnergyDataset(X_train, y_train)
249 | test_data = EnergyDataset(X_test, y_test)
250 | return {
251 | 'train': train_data,
252 | 'test': test_data,
253 | }
254 |
255 |
256 | if __name__ == '__main__':
257 | dt = load_dataset()
258 | print(len(dt['train']))
259 | print(dt['train'][0][0].shape)
--------------------------------------------------------------------------------
/loaders/pack_audio.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import numpy as np
4 | import torch
5 |
6 |
7 | def temporal_sampling(spectrogram, start_idx, end_idx, num_samples):
8 | """
9 | Given the start and end frame index, sample num_samples frames between
10 | the start and end with equal interval.
11 | Args:
12 | frames (tensor): a tensor of video frames, dimension is
13 | `num video frames` x `channel` x `height` x `width`.
14 | start_idx (int): the index of the start frame.
15 | end_idx (int): the index of the end frame.
16 | num_samples (int): number of frames to sample.
17 | Returns:
18 | frames (tersor): a tensor of temporal sampled video frames, dimension is
19 | `num clip frames` x `channel` x `height` x `width`.
20 | """
21 | index = torch.linspace(0, spectrogram.shape[1] - 1, num_samples).long()
22 | spectrogram = torch.index_select(spectrogram, 1, index)
23 | return spectrogram
24 |
25 |
26 | def get_start_end_idx(audio_size, clip_size, clip_idx, num_clips, start_sample=0):
27 | """
28 | Sample a clip of size clip_size from a video of size video_size and
29 | return the indices of the first and last frame of the clip. If clip_idx is
30 | -1, the clip is randomly sampled, otherwise uniformly split the video to
31 | num_clips clips, and select the start and end index of clip_idx-th video
32 | clip.
33 | Args:
34 | audio_size (int): number of overall frames.
35 | clip_size (int): size of the clip to sample from the frames.
36 | clip_idx (int): if clip_idx is -1, perform random jitter sampling. If
37 | clip_idx is larger than -1, uniformly split the video to num_clips
38 | clips, and select the start and end index of the clip_idx-th video
39 | clip.
40 | num_clips (int): overall number of clips to uniformly sample from the
41 | given video for testing.
42 | Returns:
43 | start_idx (int): the start frame index.
44 | end_idx (int): the end frame index.
45 | """
46 | delta = max(audio_size - clip_size, 0)
47 | if clip_idx == -1:
48 | # Random temporal sampling.
49 | start_idx = random.uniform(0, delta)
50 | else:
51 | # Uniformly sample the clip with the given index.
52 | start_idx = np.linspace(0, delta, num=num_clips)[clip_idx]
53 | end_idx = start_idx + clip_size - 1
54 | return start_sample + start_idx, start_sample + end_idx
55 |
56 |
57 | def pack_audio(audio_dataset, video_record, temporal_sample_index, sampling_rate=24000, clip_secs=1.999, n_ensemble=5):
58 | samples = audio_dataset[video_record.video_id][()]
59 | start_idx, end_idx = get_start_end_idx(
60 | video_record.num_audio_samples,
61 | int(round(sampling_rate * clip_secs)),
62 | temporal_sample_index,
63 | n_ensemble,
64 | start_sample=video_record.start_audio_sample
65 | )
66 | spectrogram = _extract_sound_feature(
67 | samples,
68 | video_record,
69 | int(start_idx),
70 | int(end_idx),
71 | clip_secs
72 | )
73 | return spectrogram
74 |
75 |
76 | def _log_specgram(
77 | audio,
78 | window_size=10,
79 | step_size=5,
80 | eps=1e-6,
81 | sampling_rate=24000
82 | ):
83 | nperseg = int(round(window_size * sampling_rate / 1e3))
84 | noverlap = int(round(step_size * sampling_rate / 1e3))
85 | from librosa import stft, filters
86 |
87 | # Mel-Spectrogram
88 | spec = stft(
89 | audio,
90 | n_fft=2048,
91 | window='hann',
92 | hop_length=noverlap,
93 | win_length=nperseg,
94 | pad_mode='constant'
95 | )
96 | mel_basis = filters.mel(
97 | sr=sampling_rate,
98 | n_fft=2048,
99 | n_mels=128,
100 | htk=True,
101 | norm=None
102 | )
103 | mel_spec = np.dot(mel_basis, np.abs(spec))
104 |
105 | # Log-Mel-Spectrogram
106 | log_mel_spec = np.log(mel_spec + eps)
107 | return log_mel_spec.T
108 |
109 |
110 | def _extract_sound_feature(samples, video_record, start_idx, end_idx, clip_duration, sampling_rate=24000):
111 | if video_record.num_audio_samples < int(round(sampling_rate * clip_duration)):
112 | samples = samples[video_record.start_audio_sample:video_record.end_audio_sample]
113 | else:
114 | samples = samples[start_idx:end_idx]
115 | spectrogram = _log_specgram(samples,
116 | window_size=10,
117 | step_size=5
118 | )
119 | if spectrogram.shape[0] < 400:
120 | num_timesteps_to_pad = 400 - spectrogram.shape[0]
121 | spectrogram = np.pad(spectrogram, ((0, num_timesteps_to_pad), (0, 0)), 'edge')
122 | return torch.tensor(spectrogram).unsqueeze(0)
123 |
--------------------------------------------------------------------------------
/loaders/spatial_transforms.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | from PIL import Image
4 | from torchvision.transforms import functional as F
5 | from torchvision.transforms import transforms
6 |
7 |
8 | class Compose(transforms.Compose):
9 |
10 | def randomize_parameters(self):
11 | for t in self.transforms:
12 | t.randomize_parameters()
13 |
14 |
15 | class ToTensor(transforms.ToTensor):
16 |
17 | def randomize_parameters(self):
18 | pass
19 |
20 |
21 | class Normalize(transforms.Normalize):
22 |
23 | def randomize_parameters(self):
24 | pass
25 |
26 | #
27 | # class ScaleValue(object):
28 | #
29 | # def __init__(self, s):
30 | # self.s = s
31 | #
32 | # def __call__(self, tensor):
33 | # tensor *= self.s
34 | # return tensor
35 | #
36 | # def randomize_parameters(self):
37 | # pass
38 | #
39 | #
40 | # class Resize(transforms.Resize):
41 | #
42 | # def randomize_parameters(self):
43 | # pass
44 | #
45 | #
46 | # class Scale(transforms.Scale):
47 | #
48 | # def randomize_parameters(self):
49 | # pass
50 | #
51 | #
52 | # class CenterCrop(transforms.CenterCrop):
53 | #
54 | # def randomize_parameters(self):
55 | # pass
56 |
57 |
58 | class CornerCrop(object):
59 |
60 | def __init__(self,
61 | size,
62 | crop_position=None,
63 | crop_positions=['c', 'tl', 'tr', 'bl', 'br']):
64 | self.size = size
65 | self.crop_position = crop_position
66 | self.crop_positions = crop_positions
67 |
68 | if crop_position is None:
69 | self.randomize = True
70 | else:
71 | self.randomize = False
72 | self.randomize_parameters()
73 |
74 | def __call__(self, img):
75 | image_width = img.size[0]
76 | image_height = img.size[1]
77 |
78 | h, w = (self.size, self.size)
79 | if self.crop_position == 'c':
80 | i = int(round((image_height - h) / 2.))
81 | j = int(round((image_width - w) / 2.))
82 | elif self.crop_position == 'tl':
83 | i = 0
84 | j = 0
85 | elif self.crop_position == 'tr':
86 | i = 0
87 | j = image_width - self.size
88 | elif self.crop_position == 'bl':
89 | i = image_height - self.size
90 | j = 0
91 | elif self.crop_position == 'br':
92 | i = image_height - self.size
93 | j = image_width - self.size
94 |
95 | img = F.crop(img, i, j, h, w)
96 |
97 | return img
98 |
99 | def randomize_parameters(self):
100 | if self.randomize:
101 | self.crop_position = self.crop_positions[random.randint(
102 | 0,
103 | len(self.crop_positions) - 1)]
104 |
105 | def __repr__(self):
106 | return self.__class__.__name__ + '(size={0}, crop_position={1}, randomize={2})'.format(
107 | self.size, self.crop_position, self.randomize)
108 |
109 |
110 | class RandomHorizontalFlip(transforms.RandomHorizontalFlip):
111 |
112 | def __init__(self, p=0.5):
113 | super().__init__(p)
114 | self.randomize_parameters()
115 |
116 | def __call__(self, img):
117 | """
118 | Args:
119 | img (PIL.Image): Image to be flipped.
120 | Returns:
121 | PIL.Image: Randomly flipped image.
122 | """
123 | if self.random_p < self.p:
124 | return F.hflip(img)
125 | return img
126 |
127 | def randomize_parameters(self):
128 | self.random_p = random.random()
129 |
130 |
131 | class MultiScaleCornerCrop(object):
132 |
133 | def __init__(self,
134 | size,
135 | scales,
136 | crop_positions=['c', 'tl', 'tr', 'bl', 'br'],
137 | interpolation=Image.BILINEAR):
138 | self.size = size
139 | self.scales = scales
140 | self.interpolation = interpolation
141 | self.crop_positions = crop_positions
142 |
143 | self.randomize_parameters()
144 |
145 | def __call__(self, img):
146 | short_side = min(img.size[0], img.size[1])
147 | crop_size = int(short_side * self.scale)
148 | self.corner_crop.size = crop_size
149 |
150 | img = self.corner_crop(img)
151 | return img.resize((self.size, self.size), self.interpolation)
152 |
153 | def randomize_parameters(self):
154 | self.scale = self.scales[random.randint(0, len(self.scales) - 1)]
155 | crop_position = self.crop_positions[random.randint(
156 | 0,
157 | len(self.crop_positions) - 1)]
158 |
159 | self.corner_crop = CornerCrop(None, crop_position)
160 |
161 | def __repr__(self):
162 | return self.__class__.__name__ + '(size={0}, scales={1}, interpolation={2})'.format(
163 | self.size, self.scales, self.interpolation)
164 |
165 |
166 | class RandomResizedCrop(transforms.RandomResizedCrop):
167 |
168 | def __init__(self,
169 | size,
170 | scale=(0.08, 1.0),
171 | ratio=(3. / 4., 4. / 3.),
172 | interpolation=Image.BILINEAR):
173 | super().__init__(size, scale, ratio, interpolation)
174 | self.randomize_parameters()
175 |
176 | def __call__(self, img):
177 | if self.randomize:
178 | self.random_crop = self.get_params(img, self.scale, self.ratio)
179 | self.randomize = False
180 |
181 | i, j, h, w = self.random_crop
182 | return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
183 |
184 | def randomize_parameters(self):
185 | self.randomize = True
186 |
187 |
188 | class ColorJitter(transforms.ColorJitter):
189 |
190 | def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
191 | super().__init__(brightness, contrast, saturation, hue)
192 | self.randomize_parameters()
193 |
194 | def __call__(self, img):
195 | if self.randomize:
196 | self.transform = self.get_params(self.brightness, self.contrast,
197 | self.saturation, self.hue)
198 | self.randomize = False
199 |
200 | return self.transform(img)
201 |
202 | def randomize_parameters(self):
203 | self.randomize = True
204 |
205 |
206 | class PickFirstChannels(object):
207 |
208 | def __init__(self, n):
209 | self.n = n
210 |
211 | def __call__(self, tensor):
212 | return tensor[:self.n, :, :]
213 |
214 | def randomize_parameters(self):
215 | pass
216 |
--------------------------------------------------------------------------------
/loaders/ut_har.py:
--------------------------------------------------------------------------------
1 | import glob
2 |
3 | import numpy as np
4 | import torch
5 | from torch.utils.data import Dataset
6 | from torch.utils.data.dataset import T_co
7 |
8 |
9 | class UTHarDataset(Dataset):
10 | def __init__(self, data: np.array, label: np.array):
11 | self.data = data
12 | self.targets = label
13 |
14 | def __len__(self) -> int:
15 | return len(self.data)
16 |
17 | def __getitem__(self, index) -> T_co:
18 | return self.data[index, :, :, :], int(self.targets[index])
19 |
20 |
21 | def load_dataset(root_dir='./datasets/ut_har'):
22 | data_list = glob.glob(root_dir + '/UT_HAR/data/*.csv')
23 | label_list = glob.glob(root_dir + '/UT_HAR/label/*.csv')
24 | ut_har_data = {}
25 | for data_dir in data_list:
26 | data_name = data_dir.split('/')[-1].split('.')[0]
27 | with open(data_dir, 'rb') as f:
28 | data = np.load(f)
29 | data = data.reshape(len(data), 1, 250, 90)
30 | data_norm = (data - np.min(data)) / (np.max(data) - np.min(data))
31 | ut_har_data[data_name] = torch.Tensor(data_norm)
32 | for label_dir in label_list:
33 | label_name = label_dir.split('/')[-1].split('.')[0]
34 | with open(label_dir, 'rb') as f:
35 | label = np.load(f)
36 | ut_har_data[label_name] = torch.Tensor(label)
37 | return {
38 | 'train': UTHarDataset(ut_har_data['X_train'], ut_har_data['y_train']),
39 | 'val': UTHarDataset(ut_har_data['X_val'], ut_har_data['y_val']),
40 | 'test': UTHarDataset(ut_har_data['X_test'], ut_har_data['y_val']),
41 | }
42 |
43 |
44 | if __name__ == '__main__':
45 | dataset = load_dataset()
46 | print(len(dataset['train']))
47 | print(dataset['train'][0][0].shape)
48 |
--------------------------------------------------------------------------------
/loaders/visdrone.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | from pathlib import Path
3 | from typing import List, Tuple, Dict, Any, Union
4 |
5 | import pandas as pd
6 | import torch
7 | from torch.utils.data import Dataset
8 | from tqdm import tqdm
9 | from ultralytics.yolo.data.dataset import YOLODataset
10 | from ultralytics.yolo.data.dataloaders.v5loader import LoadImagesAndLabels
11 |
12 | from loaders.utils import ParameterDict
13 |
14 | YOLO_HYPERPARAMETERS = {
15 | 'lr0': 0.01,
16 | 'lrf': 0.01,
17 | 'momentum': 0.937,
18 | 'weight_decay': 0.0005,
19 | 'warmup_epochs': 3.0,
20 | 'warmup_momentum': 0.8,
21 | 'warmup_bias_lr': 0.1,
22 | 'box': 7.5,
23 | 'cls': 0.5,
24 | 'dfl': 1.5,
25 | 'fl_gamma': 0.0,
26 | 'label_smoothing': 0.0,
27 | 'nbs': 64,
28 | 'hsv_h': 0.015,
29 | 'hsv_s': 0.7,
30 | 'hsv_v': 0.4,
31 | 'degrees': 0.0,
32 | 'translate': 0.1,
33 | 'scale': 0.5,
34 | 'shear': 0.0,
35 | 'perspective': 0.0,
36 | 'flipud': 0.0,
37 | 'fliplr': 0.5,
38 | 'mosaic': 1.0,
39 | 'mixup': 0.0,
40 | 'copy_paste': 0.0,
41 | 'mask_ratio': 0.0,
42 | 'overlap_mask': 0.0,
43 | 'conf': 0.25,
44 | 'iou': 0.45,
45 | 'max_det': 1000,
46 | 'plots': False,
47 | 'half': False, # use half precision (FP16)
48 | 'dnn': False,
49 | 'data': None,
50 | 'imgsz': 640,
51 | 'verbose': False
52 | }
53 | YOLO_HYPERPARAMETERS = ParameterDict(YOLO_HYPERPARAMETERS)
54 | NAMES = ('pedestrian', 'person', 'car', 'van', 'bus', 'truck', 'motor', 'bicycle', 'awning-tricycle', 'tricycle',
55 | 'block', 'car_group')
56 |
57 |
58 | class VisDroneDataset(Dataset):
59 | """
60 | A PyTorch Dataset class for the VisDrone dataset.
61 | """
62 |
63 | def __init__(self, root: str, hyp: Dict[str, Any], augment: bool = True):
64 | """
65 | Initialize the dataset.
66 |
67 | Args:
68 | root (str): Path to the root directory of the dataset.
69 | hyp (Dict[str, Any]): Hyperparameters dictionary.
70 | augment (bool, optional): Whether to apply data augmentation. Defaults to True.
71 | """
72 | self.root = root
73 | self.dataset = LoadImagesAndLabels(
74 | path=root,
75 | augment=augment,
76 | hyp=hyp,
77 | # rect=True
78 | )
79 |
80 | def __getitem__(self, index: int) -> Tuple[torch.Tensor, Any]:
81 | """
82 | Get an item from the dataset.
83 |
84 | Args:
85 | index (int): Index of the item.
86 |
87 | Returns:
88 | Tuple[torch.Tensor, Any]: A tuple containing the image tensor and the label.
89 | """
90 | dt = self.dataset[index]
91 | return dt[0].float() / 255.0, dt[1]
92 |
93 | def __len__(self) -> int:
94 | """
95 | Get the length of the dataset.
96 |
97 | Returns:
98 | int: The number of items in the dataset.
99 | """
100 | return len(self.dataset)
101 |
102 |
103 | def collate_fn(batch: List[Tuple[torch.Tensor, torch.Tensor, str, Tuple[int, int]]]) \
104 | -> Tuple[torch.Tensor, torch.Tensor, List[str], Tuple[Tuple[int, int], ...]]:
105 | """
106 | Custom collate function for DataLoader.
107 |
108 | Args:
109 | batch (List[Tuple[torch.Tensor, torch.Tensor, str, Tuple[int, int]]]): List of tuples, each containing an image tensor, label tensor, image path, and a tuple of image dimensions.
110 |
111 | Returns:
112 | Tuple[torch.Tensor, torch.Tensor, List[str], Tuple[Tuple[int, int], ...]]: A tuple containing stacked image tensors, concatenated label tensors, list of image paths, and a tuple of image dimensions.
113 | """
114 | im, label, path, shapes = zip(*batch) # transposed
115 | for i, lb in enumerate(label):
116 | lb[:, 0] = i # add target image index for build_targets()
117 | return torch.stack(im, 0).float(), torch.cat(label, 0), path, shapes
118 |
119 |
120 | def load_dataset(root: str = "datasets/visdrone",
121 | augment: bool = True,
122 | hyp: Dict[str, Any] = YOLO_HYPERPARAMETERS) \
123 | -> Dict[str, Union[YOLODataset, Dict[str, List[int]], Dict[str, Dict[int, List[int]]]]]:
124 | """
125 | Load the VisDrone dataset with YOLO format.
126 |
127 | Args:
128 | root (str, optional): Path to the root directory of the dataset. Defaults to "datasets/visdrone/yolo_format".
129 | augment (bool, optional): Whether to apply data augmentation. Defaults to False.
130 | hyp (Dict[str, Any], optional): Hyperparameters dictionary. Defaults to YOLO_HYPERPARAMETERS.
131 |
132 | Returns:
133 | Dict[str, Union[YOLODataset, Dict[str, List[int]], Dict[str, Dict[int, List[int]]]]]: A dictionary containing train, val, and test datasets, client_mapping, and split information.
134 | """
135 | print(f"Loading VisDrone dataset from {os.path.join(root, 'train')}...")
136 | dataset_train = YOLODataset(
137 | img_path=os.path.join(root, 'train'),
138 | hyp=hyp,
139 | augment=augment,
140 | names=['pedestrian', 'person', 'car', 'van', 'bus', 'truck', 'motor', 'bicycle', 'awning-tricycle', 'tricycle',
141 | 'block', 'car_group'],
142 | )
143 |
144 | dataset_val = YOLODataset(
145 | img_path=os.path.join(root, 'val'),
146 | hyp=hyp,
147 | augment=False,
148 | names=['pedestrian', 'person', 'car', 'van', 'bus', 'truck', 'motor', 'bicycle', 'awning-tricycle', 'tricycle',
149 | 'block', 'car_group']
150 | )
151 |
152 | dataset_test = YOLODataset(
153 | img_path=os.path.join(root, 'test'),
154 | hyp=hyp,
155 | augment=False,
156 | names=['pedestrian', 'person', 'car', 'van', 'bus', 'truck', 'motor', 'bicycle', 'awning-tricycle', 'tricycle',
157 | 'block', 'car_group']
158 | )
159 |
160 | df = pd.read_csv(f'{root}/split.csv', index_col='image_id')
161 | targets = []
162 | for i, d in tqdm(enumerate(dataset_train)):
163 | p = dataset_train[i]['im_file'].split('/')[-1]
164 | c = df.loc[p]['cluster']
165 | targets.append(c)
166 | if not Path('visdrone_client_mapping.pt').exists():
167 | client_mapping = {k: [] for k in range(100)}
168 | for i, d in tqdm(enumerate(dataset_train)):
169 | p = dataset_train[i]['im_file'].split('/')[-1]
170 | c = df.loc[p]['cluster']
171 | client_mapping[c].append(i)
172 | torch.save(client_mapping, 'visdrone_client_mapping.pt')
173 | dataset_train.targets = targets
174 | client_mapping = torch.load('visdrone_client_mapping.pt')
175 | return {
176 | 'train': dataset_train,
177 | 'val': dataset_val,
178 | 'test': dataset_test,
179 | 'client_mapping': None,
180 | 'split': {'train': client_mapping}
181 | }
182 |
183 |
184 | if __name__ == "__main__":
185 | # visdrone_folder = "../datasets/visdrone"
186 | # output_folder = "../datasets/visdrone/yolo_format"
187 | #
188 | # convert_visdrone_to_yolo_format(visdrone_folder, output_folder)
189 | load_dataset('../datasets/visdrone')
190 |
--------------------------------------------------------------------------------
/loaders/widar.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | from typing import List, Tuple, Dict
4 |
5 | import numpy as np
6 | import torch
7 | from torch.utils.data import Dataset
8 |
9 | from partition.utils import train_test_split, make_split
10 |
11 |
12 | class WidarDataset(Dataset):
13 | def __init__(self, data: List[Tuple[np.ndarray, int]]):
14 | """
15 | Initialize the WidarDataset class.
16 |
17 | Args:
18 | data (List[Tuple[np.ndarray, int]]): List of tuples containing input data and corresponding labels.
19 | """
20 | self.data = data
21 | self.targets = [d[1] for d in data]
22 |
23 | def __len__(self) -> int:
24 | """
25 | Return the length of the dataset.
26 |
27 | Returns:
28 | int: Length of the dataset.
29 | """
30 | return len(self.data)
31 |
32 | def __getitem__(self, idx: int) -> Tuple[np.ndarray, int]:
33 | """
34 | Get an item from the dataset by index.
35 |
36 | Args:
37 | idx (int): Index of the desired data.
38 |
39 | Returns:
40 | Tuple[np.ndarray, int]: A tuple containing the input data and corresponding label.
41 | """
42 | return self.data[idx][0].reshape(22, 20, 20), self.data[idx][1]
43 |
44 |
45 | def map_array(my_array: np.ndarray, mapping_dict: Dict[int, int]) -> np.ndarray:
46 | """
47 | Map values in a NumPy array based on a provided mapping dictionary.
48 |
49 | Args:
50 | my_array (np.ndarray): Input NumPy array to be mapped.
51 | mapping_dict (Dict[int, int]): Dictionary containing the mapping of input values to output values.
52 |
53 | Returns:
54 | np.ndarray: Mapped NumPy array.
55 | """
56 | mapping_func = np.vectorize(lambda x: mapping_dict.get(x, x))
57 | mapped_array = mapping_func(my_array)
58 | return mapped_array
59 |
60 |
61 | def filter_data(datum: Tuple[np.ndarray, List[int]], selected_classes: List[int]) -> Tuple[np.ndarray, np.ndarray]:
62 | """
63 | Filter input data and labels based on the selected classes.
64 |
65 | Args:
66 | datum (Tuple[np.ndarray, List[int]]): Tuple containing input data and corresponding labels.
67 | selected_classes (List[int]): List of selected classes to filter.
68 |
69 | Returns:
70 | Tuple[np.ndarray, np.ndarray]: Tuple containing filtered input data and corresponding labels.
71 | """
72 | input_data = datum[0]
73 | input_labels = np.array(datum[1])
74 | replace_classes = {v: k for k, v in enumerate(selected_classes)}
75 | mask = np.isin(input_labels, selected_classes)
76 | filtered_array = input_data[mask, :, :]
77 | filtered_classes = input_labels[mask]
78 | filtered_classes = map_array(filtered_classes, replace_classes)
79 |
80 | return filtered_array, filtered_classes
81 |
82 |
83 | def split_dataset(data: List[Tuple[np.ndarray, int]],
84 | client_mapping_train: Dict[int, List[int]],
85 | client_mapping_test: Dict[int, List[int]]) \
86 | -> Tuple[WidarDataset, WidarDataset, Dict[str, Dict[int, List[int]]]]:
87 | """
88 | Split the dataset into train and test sets based on the client mappings.
89 |
90 | Args:
91 | data (List[Tuple[np.ndarray, int]]): The input dataset as a list of tuples containing input data and corresponding labels.
92 | client_mapping_train (Dict[int, List[int]]): A dictionary containing the client indices for the training set.
93 | client_mapping_test (Dict[int, List[int]]): A dictionary containing the client indices for the test set.
94 |
95 | Returns:
96 | Tuple[WidarDataset, WidarDataset, Dict[str, Dict[int, List[int]]]]: A tuple containing the train and test WidarDatasets, and a dictionary with train and test mappings.
97 | """
98 | all_train, mapping_train = make_split(client_mapping_train)
99 | all_test, mapping_test = make_split(client_mapping_test)
100 |
101 | train_data = [data[i] for i in all_train]
102 | test_data = [data[i] for i in all_test]
103 | return WidarDataset(train_data), WidarDataset(test_data), {'train': mapping_train, 'test': mapping_test}
104 |
105 |
106 | def load_dataset(split=[x for x in list(range(0, 17)) if x not in [0, 1, 2, 3, 15]],
107 | selected_classes=[0, 3, 7, 10, 12, 14, 15, 16, 19],
108 | reprocess=False):
109 | """
110 | Load and preprocess the Widar dataset.
111 |
112 | Args:
113 | split (List[int], optional): List of client indices to include in the training set. Defaults to [x for x in list(range(0, 16)) if x not in [0, 1, 2, 3, 15]].
114 | selected_classes (List[int], optional): List of selected classes to filter. Defaults to [0, 3, 7, 10, 12, 14, 15, 16, 19].
115 | reprocess (bool, optional): Whether to reprocess the dataset or use existing preprocessed data. Defaults to False.
116 |
117 | Returns:
118 | Dict[str, Union[WidarDataset, Dict[int, List[int]]]]: Dictionary containing the full_dataset, train and test datasets, client_mapping, and split information.
119 | """
120 | path = 'datasets/widar/federated'
121 |
122 | data = os.listdir(path)
123 | dtt = []
124 | for i in data:
125 | if i.endswith('.pkl'):
126 | try:
127 | with open(f'{path}/{i}', 'rb') as f:
128 | dtt.append(torch.load(f))
129 | except pickle.UnpicklingError as e:
130 | print(f'Error loading {i}')
131 | data = dtt
132 | data.sort(key=lambda x: len(x[-1]))
133 | data = [filter_data(d, selected_classes) for d in data]
134 | all_users = list(range(0, len(data)))
135 | cl_idx = {}
136 | i = 0
137 | for j in all_users:
138 | d = data[j]
139 | cl_idx[j] = list(range(i, i + len(d[0])))
140 | i += len(d[0])
141 |
142 | x = [d[0] for d in data]
143 | x = np.concatenate(x, axis=0, dtype=np.float32)
144 | x = (x - .0025) / .0119
145 | y = np.concatenate([d[1] for d in data])
146 | data = [(x[i], y[i]) for i in range(len(x))]
147 | dataset = WidarDataset(data)
148 | data = [dataset[i] for i in range(len(dataset))]
149 | client_mapping_train, client_mapping_test = train_test_split(cl_idx, split)
150 | train_dataset, test_dataset, split = split_dataset(data, client_mapping_train, client_mapping_test)
151 | data_dict = {
152 | 'full_dataset': dataset,
153 | 'train': train_dataset,
154 | 'test': test_dataset,
155 | 'client_mapping': cl_idx,
156 | 'split': split
157 | }
158 | return data_dict
159 |
160 |
161 | if __name__ == '__main__':
162 | dt = load_dataset()
163 | print(len(dt['train']))
--------------------------------------------------------------------------------
/models/casas.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import loaders.casas
3 | import torch.nn as nn
4 |
5 | class LSTMModel(nn.Module):
6 | def __init__(self, input_dim, output_dim, no_activities):
7 | super(LSTMModel, self).__init__()
8 | self.embedding = nn.Embedding(input_dim, output_dim, padding_idx=0)
9 | self.lstm = nn.LSTM(output_dim, output_dim, batch_first=True)
10 | self.fc = nn.Linear(output_dim, no_activities)
11 | # self.fc = nn.Sequential(nn.Flatten(),
12 | # nn.Dropout(0.2),
13 | # nn.Linear(output_dim, output_dim),
14 | # nn.ReLU(),
15 | # nn.Dropout(0.2),
16 | # nn.Linear(64, no_activities))
17 | def forward(self, x):
18 | print(x.shape)
19 | x = self.embedding(x)
20 | print(x.shape)
21 | x, _ = self.lstm(x)
22 | print(x.shape)
23 | x = self.fc(x[:, -1, :])
24 | return x
25 | class BiLSTMModel(nn.Module):
26 | def __init__(self, input_dim=2000, output_dim=64, max_length=2000, no_activities=12):
27 | super(BiLSTMModel, self).__init__()
28 | self.embedding = nn.Embedding(input_dim, output_dim, padding_idx=0)
29 | self.lstm = nn.LSTM(output_dim, output_dim, bidirectional=True, batch_first=True)
30 | self.fc = nn.Linear(output_dim * 2, no_activities)
31 |
32 | def forward(self, x):
33 | x = self.embedding(x.type(torch.long))
34 | x, _ = self.lstm(x)
35 | x = self.fc(x[:, -1, :])
36 | return x
37 |
38 | class Ensemble2LSTMModel(nn.Module):
39 | def __init__(self, input_dim, output_dim, max_length, no_activities):
40 | super(Ensemble2LSTMModel, self).__init__()
41 | self.model1 = BiLSTMModel(input_dim, output_dim, max_length, no_activities)
42 | self.model2 = LSTMModel(input_dim, output_dim, max_length, no_activities)
43 | self.fc = nn.Linear(output_dim * 2, no_activities)
44 |
45 | def forward(self, x):
46 | x1 = self.model1(x)
47 | x2 = self.model2(x)
48 | x = torch.cat((x1, x2), dim=1)
49 | x = self.fc(x)
50 | return x
51 |
52 | class CascadeEnsembleLSTMModel(nn.Module):
53 | def __init__(self, input_dim, output_dim, max_length, no_activities):
54 | super(CascadeEnsembleLSTMModel, self).__init__()
55 | self.model1 = BiLSTMModel(input_dim, output_dim, max_length, no_activities)
56 | self.model2 = LSTMModel(input_dim, output_dim, max_length, no_activities)
57 | self.lstm = nn.LSTM(output_dim * 2, output_dim, batch_first=True)
58 | self.fc = nn.Linear(output_dim, no_activities)
59 |
60 | def forward(self, x):
61 | x1 = self.model1.embedding(x)
62 | x2 = self.model2.embedding(x)
63 | x1, _ = self.model1.lstm(x1)
64 | x2, _ = self.model2.lstm(x2)
65 | x = torch.cat((x1, x2), dim=2)
66 | x, _ = self.lstm(x)
67 | x = self.fc(x[:, -1, :])
68 | return x
69 |
70 | class CascadeEnsembleLSTMModel(nn.Module):
71 | def __init__(self, input_dim, output_dim, max_length, no_activities):
72 | super(CascadeEnsembleLSTMModel, self).__init__()
73 | self.embedding1 = nn.Embedding(input_dim, output_dim, padding_idx=0)
74 | self.embedding2 = nn.Embedding(input_dim, output_dim, padding_idx=0)
75 | self.lstm1 = nn.LSTM(output_dim, output_dim, bidirectional=True, batch_first=True)
76 | self.lstm2 = nn.LSTM(output_dim, output_dim, batch_first=True)
77 | self.lstm3 = nn.LSTM(output_dim * 2, output_dim, batch_first=True)
78 | self.fc = nn.Linear(output_dim, no_activities)
79 |
80 | def forward(self, x):
81 | x1 = self.embedding1(x)
82 | x2 = self.embedding2(x)
83 | x1, _ = self.lstm1(x1)
84 | x2, _ = self.lstm2(x2)
85 | x = torch.cat((x1, x2), dim=2)
86 | x, _ = self.lstm3(x)
87 | x = self.fc(x[:, -1, :])
88 | return x
89 |
90 | class CascadeLSTMModel(nn.Module):
91 | def __init__(self, input_dim, output_dim, max_length, no_activities):
92 | super(CascadeLSTMModel, self).__init__()
93 | self.embedding = nn.Embedding(input_dim, output_dim, padding_idx=0)
94 | self.lstm1 = nn.LSTM(output_dim, output_dim, bidirectional=True, batch_first=True)
95 | self.lstm2 = nn.LSTM(output_dim * 2, output_dim, batch_first=True)
96 | self.fc = nn.Linear(output_dim, no_activities)
97 |
98 | def forward(self, x):
99 | x = self.embedding(x)
100 | x, _ = self.lstm1(x)
101 | x, _ = self.lstm2(x)
102 | x = self.fc(x[:, -1, :])
103 | return x
104 |
105 |
106 |
--------------------------------------------------------------------------------
/models/ego4d.py:
--------------------------------------------------------------------------------
1 | import types
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torchvision
6 |
7 |
8 | def forward_reimpl(self, x):
9 | x = self.conv1(x)
10 | x = self.bn1(x)
11 | x = self.relu(x)
12 | x = self.maxpool(x)
13 |
14 | x = self.layer1(x)
15 | x = self.layer2(x)
16 | x = self.layer3(x)
17 | x = self.layer4(x)
18 |
19 | x = self.avgpool(x)
20 | # x = torch.flatten(x, 1)
21 | # x = self.fc(x)
22 |
23 | return x.squeeze(2).squeeze(2)
24 |
25 |
26 | class CNNLSTM(nn.Module):
27 | def __init__(self, hidden_size=512, num_layers=1, state=False):
28 | super(CNNLSTM, self).__init__()
29 | self.backbone = torchvision.models.resnet18(pretrained=False)
30 | self.backbone.fc = None
31 | self.lstm = nn.LSTM(512, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)
32 | self.regressor = nn.Linear(hidden_size * 2, 1)
33 | self.state = state
34 | if self.state:
35 | self.state_classifier = nn.Linear(hidden_size * 2, 2)
36 | self.backbone.forward = types.MethodType(forward_reimpl, self.backbone)
37 |
38 | def forward(self, x):
39 | # x: (b, c, seq_len, h, w)
40 | seq_len = x.shape[2]
41 | batch_size = x.shape[0]
42 | x = x.permute((0, 2, 1, 3, 4))
43 | x = x.reshape(-1, x.shape[2], x.shape[3], x.shape[4])
44 | x = self.backbone(x)
45 |
46 | x = x.view(batch_size, seq_len, -1)
47 | x, _ = self.lstm(x) # (b, seq_len, hidden_size*2)
48 | out = self.regressor(x).squeeze(2)
49 | if self.state:
50 | state = self.state_classifier(x.mean(1))
51 | return torch.sigmoid(out), state
52 | return torch.sigmoid(out)
53 |
--------------------------------------------------------------------------------
/models/emognition.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 |
4 | class LSTMRegressor(nn.Module):
5 | def __init__(self, input_size=5, output_size=2, hidden_size=128, num_layers=2):
6 | super(LSTMRegressor, self).__init__()
7 | self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
8 | self.fc = nn.Linear(hidden_size, output_size)
9 | self.dropout = nn.Dropout(0.3)
10 | self.output_activation = nn.Softmax()
11 |
12 | def forward(self, x):
13 | # Input shape: (batch_size, sequence_length, num_features)
14 | x, _ = self.lstm(x) # LSTM output shape: (batch_size, sequence_length, hidden_size)
15 | x = self.fc(self.dropout(x[:, -1, :]))
16 | # Use the last LSTM output; shape: (batch_size, output_size)
17 | return self.output_activation(x)
18 |
19 |
20 | class CNN_LSTM_Regressor(nn.Module):
21 | def __init__(self, input_size=5, num_emotions=2):
22 | super(CNN_LSTM_Regressor, self).__init__()
23 |
24 | self.conv1 = nn.Conv1d(input_size, 64, kernel_size=3, padding=1)
25 | self.relu = nn.ReLU()
26 | self.max_pool = nn.MaxPool1d(kernel_size=2)
27 | self.lstm = nn.LSTM(64, 128, num_layers=1, batch_first=True)
28 | self.fc = nn.Linear(128, num_emotions)
29 | self.output_activation = nn.Softmax()
30 |
31 | def forward(self, x):
32 | # Input shape: (batch_size, sequence_length, num_features)
33 | x = x.permute(0, 2, 1) # Change shape to (batch_size, num_features, sequence_length)
34 |
35 | # 1D Convolution
36 | x = self.conv1(x)
37 | x = self.relu(x)
38 | x = self.max_pool(x)
39 |
40 | x = x.permute(0, 2, 1) # Change shape to (batch_size, sequence_length, num_channels)
41 |
42 | # LSTM
43 | x, _ = self.lstm(x)
44 |
45 | # Fully connected layer
46 | x = self.fc(x[:, -1, :]) # Use the last LSTM output
47 |
48 | return self.output_activation(x)
49 |
--------------------------------------------------------------------------------
/models/energy.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 |
4 | class MLP(nn.Module):
5 | def __init__(self, input_size=18, hidden_size=210, output_size=1):
6 | super(MLP, self).__init__()
7 | self.layer1 = nn.Linear(input_size, hidden_size)
8 | self.layer2 = nn.Linear(hidden_size, hidden_size * 2)
9 | self.layer3 = nn.Linear(hidden_size * 2, hidden_size * 4)
10 | self.layer4 = nn.Linear(hidden_size * 4, hidden_size * 2)
11 | self.layer5 = nn.Linear(hidden_size * 2, hidden_size)
12 | self.output_layer = nn.Linear(hidden_size, output_size)
13 | self.relu = nn.ReLU()
14 | self.dropout = nn.Dropout(0.3)
15 |
16 | def forward(self, x):
17 | out = self.relu(self.layer1(x))
18 | out = self.dropout(out)
19 | out = self.relu(self.layer2(out))
20 | out = self.dropout(out)
21 | out = self.relu(self.layer3(out))
22 | out = self.dropout(out)
23 | out = self.relu(self.layer4(out))
24 | out = self.dropout(out)
25 | out = self.relu(self.layer5(out))
26 | out = self.dropout(out)
27 | out = self.output_layer(out)
28 | return out
29 |
--------------------------------------------------------------------------------
/models/utils.py:
--------------------------------------------------------------------------------
1 | import inspect
2 |
3 | from torch import nn
4 | from ultralytics.nn.tasks import DetectionModel
5 |
6 | from models import widar, wisdm, ut_har, emognition, casas, energy, ego4d, epic_sounds
7 |
8 | MODULE_MAP = {
9 | 'wisdm_phone': wisdm,
10 | 'wisdm_watch': wisdm,
11 | 'widar': widar,
12 | 'ut_har': ut_har,
13 | 'emognition': emognition,
14 | 'casas': casas,
15 | 'energy': energy,
16 | 'ego4d': ego4d,
17 | 'epic_sounds': epic_sounds
18 | }
19 |
20 |
21 | def find_subclasses_and_factory_functions(module, parent_class):
22 | results = []
23 |
24 | for _, obj in inspect.getmembers(module):
25 | # Check if it's a class and a subclass of the parent_class
26 | if inspect.isclass(obj) and issubclass(obj, parent_class) and obj != parent_class:
27 | results.append(obj)
28 | # Check if it's a function
29 | elif inspect.isfunction(obj):
30 | try:
31 | # Get the function's return type annotation
32 | return_annotation = inspect.signature(obj).return_annotation
33 |
34 | # Check if the return type annotation is a subclass of the parent_class
35 | if inspect.isclass(return_annotation) and issubclass(return_annotation,
36 | parent_class) and return_annotation != parent_class:
37 | results.append(obj)
38 | except (TypeError, ValueError, KeyError):
39 | # Ignore the function if the return type annotation is missing or not valid
40 | pass
41 |
42 | return results
43 |
44 |
45 | def find_class_by_name(class_list, target_name):
46 | return next((cls for cls in class_list if cls.__name__ == target_name), None)
47 |
48 |
49 | def load_model(model_name, trainer, dataset_name):
50 | if trainer == 'ultralytics':
51 | return DetectionModel(cfg=model_name)
52 |
53 | if dataset_name not in MODULE_MAP:
54 | raise ValueError('Dataset not supported')
55 |
56 | modules = find_subclasses_and_factory_functions(MODULE_MAP[dataset_name], nn.Module)
57 | model_cls = find_class_by_name(modules, model_name)
58 |
59 | if not model_cls:
60 | raise ValueError(f'No class found with the given name: {model_name}')
61 |
62 | return model_cls()
63 |
64 |
65 | if __name__ == '__main__':
66 | model = load_model('UT_HAR_ResNet18', 'BaseTrainer', 'ut_har')
67 | print(model)
68 |
--------------------------------------------------------------------------------
/models/wisdm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | # from torchsummary import summary
4 |
5 |
6 | class LSTM_NET(nn.Module):
7 | """Class to design a LSTM model."""
8 |
9 | def __init__(self, input_dim=6, hidden_dim=6, time_length=200):
10 | """Initialisation of the class (constructor)."""
11 | # Input:
12 | # input_dim, integer
13 | # hidden_dim; integer
14 | # time_length; integer
15 |
16 | super().__init__()
17 |
18 | self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True, num_layers=1)
19 | self.net = nn.Sequential(nn.Flatten(),
20 | nn.Dropout(0.2),
21 | nn.Linear(time_length * hidden_dim, 128),
22 | nn.ReLU(),
23 | nn.Dropout(0.2),
24 | nn.Linear(128, 12))
25 |
26 | def forward(self, input_data):
27 | """The layers are stacked to transport the data through the neural network for the forward part."""
28 | # Input:
29 | # input_data; torch.Tensor
30 | # Output:
31 | # x; torch.Tensor
32 |
33 | x, h = self.lstm(input_data)
34 | x = self.net(x)
35 |
36 | return x
37 |
38 |
39 | class GRU_NET(nn.Module):
40 | """Class to design a GRU model."""
41 |
42 | def __init__(self, input_size, hidden_size, num_layers, output_size, time_length):
43 | """Initialisation of the class (constructor)."""
44 | # Input:
45 | # input_size
46 | # sliding_window size; integer
47 | # hidden_size; integer
48 | # num_layers; integer
49 | # output_size; integer
50 |
51 | super().__init__()
52 |
53 | self.input_size = input_size
54 | self.hidden_size = hidden_size
55 | self.num_layers = num_layers
56 | self.output_size = output_size
57 | self.time_length = time_length
58 |
59 | self.gru = nn.GRU(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
60 |
61 | self.net = nn.Sequential(nn.Flatten(),
62 | nn.Linear(self.time_length * self.hidden_size, self.output_size, bias=True))
63 |
64 | def forward(self, input_data):
65 | """The layers are stacked to transport the data through the neural network for the forward part."""
66 | # Input:
67 | # input_data; torch.Tensor
68 | # Output:
69 | # x; torch.Tensor
70 | # h; torch.Tensor
71 |
72 | x, h = self.gru(input_data)
73 | x = self.net(x)
74 |
75 | return x
76 |
77 |
78 | class CNN_NET_V1(nn.Module):
79 | """Class to design a CNN model."""
80 |
81 | def __init__(self, time_length=200):
82 | """Initialisation of the class (constructor)."""
83 |
84 | super().__init__()
85 |
86 | self.relu = nn.ReLU()
87 | self.dropout = nn.Dropout(0.5)
88 | self.softmax = nn.Softmax(dim=1)
89 |
90 | self.bncnn1 = nn.BatchNorm1d(64)
91 | self.bncnn2 = nn.BatchNorm1d(128)
92 | self.bncnn3 = nn.BatchNorm1d(256)
93 | self.bnbout = nn.BatchNorm1d(64)
94 |
95 | self.cnn1 = nn.Conv1d(time_length, 64, 3, padding=2)
96 | self.cnn2 = nn.Conv1d(64, 128, 3, padding=1)
97 | self.cnn3 = nn.Conv1d(128, 256, 3, padding=1)
98 |
99 | self.avgpool = nn.AvgPool1d(3)
100 |
101 | self.linbout = nn.Linear(512, 64, bias=True)
102 | self.linout = nn.Linear(64, 12, bias=True)
103 |
104 | def forward(self, input_data):
105 | """The layers are stacked to transport the data through the neural network for the forward part."""
106 | # Input:
107 | # input_data; torch.Tensor
108 | # Output:
109 | # x; torch.Tensor
110 |
111 | # Input dimension: batch_size, features
112 | # x = input_data.unsqueeze(2) # add one dimension
113 | x = input_data
114 | # Input dimension: batch_size, 3, 1
115 | x = self.cnn1(x)
116 | x = self.bncnn1(x)
117 | x = self.relu(x)
118 |
119 | # Input dimension: batch_size, 64, 3
120 | x = self.cnn2(x)
121 | x = self.bncnn2(x)
122 | x = self.relu(x)
123 |
124 | # Input dimension: batch_size, 128, 3
125 | x = self.cnn3(x)
126 | x = self.bncnn3(x)
127 | x = self.relu(x)
128 |
129 | # Input dimension: batch_size, 256, 3
130 | x = self.avgpool(x)
131 |
132 | # Input dimension: batch_size, 256, 1
133 | x = self.linbout(torch.flatten(x, 1))
134 | x = self.bnbout(x)
135 | x = self.relu(x)
136 | x = self.dropout(x)
137 |
138 | # Input dimension: batch_size, 64
139 | x = self.linout(x)
140 | x = self.softmax(x)
141 | # Output dimension: batch_size, 6
142 |
143 | return x
144 |
145 |
146 | if __name__ == "__main__":
147 | x = torch.ones((64, 100, 6)).cuda()
148 | # lstmnet = CNN_NET_V1(6, 100)
149 | lstmnet = GRU_NET(6, 4, 2, 12, 100)
150 | # summary(lstmnet, (100, 6))
151 | o = lstmnet(x)
152 | print(o.shape)
153 |
--------------------------------------------------------------------------------
/models/yolov8.yaml:
--------------------------------------------------------------------------------
1 | nc: 12 # number of classes
2 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
3 | # [depth, width, max_channels]
4 | n: [0.33, 0.25, 1024]
5 | s: [0.33, 0.50, 1024]
6 | m: [0.67, 0.75, 768]
7 | l: [1.00, 1.00, 512]
8 | x: [1.00, 1.25, 512]
9 |
10 | # YOLOv8.0 backbone
11 | backbone:
12 | # [from, repeats, module, args]
13 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
14 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
15 | - [-1, 3, C2f, [128, True]]
16 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
17 | - [-1, 6, C2f, [256, True]]
18 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
19 | - [-1, 6, C2f, [512, True]]
20 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
21 | - [-1, 3, C2f, [1024, True]]
22 | - [-1, 1, SPPF, [1024, 5]] # 9
23 |
24 | # YOLOv8.0-p2 head
25 | head:
26 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
27 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4
28 | - [-1, 3, C2f, [512]] # 12
29 |
30 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
31 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3
32 | - [-1, 3, C2f, [256]] # 15 (P3/8-small)
33 |
34 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
35 | - [[-1, 2], 1, Concat, [1]] # cat backbone P2
36 | - [-1, 3, C2f, [128]] # 18 (P2/4-xsmall)
37 |
38 | - [-1, 1, Conv, [128, 3, 2]]
39 | - [[-1, 15], 1, Concat, [1]] # cat head P3
40 | - [-1, 3, C2f, [256]] # 21 (P3/8-small)
41 |
42 | - [-1, 1, Conv, [256, 3, 2]]
43 | - [[-1, 12], 1, Concat, [1]] # cat head P4
44 | - [-1, 3, C2f, [512]] # 24 (P4/16-medium)
45 |
46 | - [-1, 1, Conv, [512, 3, 2]]
47 | - [[-1, 9], 1, Concat, [1]] # cat head P5
48 | - [-1, 3, C2f, [1024]] # 27 (P5/32-large)
49 |
50 | - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
--------------------------------------------------------------------------------
/partition/centralized.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import numpy as np
4 | from torch.utils.data import Dataset
5 |
6 | from partition.utils import IndexedSubset
7 |
8 |
9 | class CentralizedPartition:
10 | def __init__(self):
11 | pass
12 |
13 | def __call__(self, dataset) -> List[Dataset]:
14 | total_num = len(dataset)
15 | idxs = list(range(total_num))
16 | dataset_ref = dataset
17 | return [
18 | IndexedSubset(
19 | dataset_ref,
20 | indices=idxs,
21 | )
22 | ]
23 |
--------------------------------------------------------------------------------
/partition/dirichlet.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from typing import List
3 |
4 | import numpy as np
5 | from torch.utils.data import Dataset
6 |
7 | from partition.utils import IndexedSubset
8 |
9 |
10 | class DirichletPartition:
11 | def __init__(
12 | self,
13 | num_clients: int,
14 | alpha: float,
15 | num_class: int = 10,
16 | minimum_data_size: int = 20,
17 | max_iter=10000
18 | ):
19 | self.num_clients = num_clients
20 | self.alpha = alpha
21 | self.num_class = num_class
22 | self.minimum_data_size = minimum_data_size
23 | self.max_iter = max_iter
24 | self.distributions = defaultdict(lambda: np.random.dirichlet(np.repeat(self.alpha, self.num_clients)))
25 |
26 | def __call__(self, dataset) -> List[Dataset]:
27 | it = 0
28 | if not isinstance(dataset.targets, np.ndarray):
29 | dataset.targets = np.array(
30 | dataset.targets, dtype=np.int64
31 | )
32 | net_dataidx_map = {}
33 | min_size = 0
34 | idx_batch = [[] for _ in range(self.num_clients)]
35 | while min_size < self.minimum_data_size and it < self.max_iter:
36 | it += 1
37 | idx_batch = [[] for _ in range(self.num_clients)]
38 | # for each class in the dataset
39 | for k in range(self.num_class):
40 | idx_k = np.where(dataset.targets == k)[0]
41 | np.random.shuffle(idx_k)
42 | proportions = self.distributions[k]
43 | ## Balance
44 | proportions = np.array(
45 | [
46 | p * (len(idx_j) < len(dataset) / self.num_clients)
47 | for p, idx_j in zip(proportions, idx_batch)
48 | ]
49 | )
50 | proportions = proportions / proportions.sum()
51 | proportions = (np.cumsum(proportions) * len(idx_k)).astype(int)[:-1]
52 | idx_batch = [
53 | idx_j + idx.tolist()
54 | for idx_j, idx in zip(idx_batch, np.split(idx_k, proportions))
55 | ]
56 | min_size = min([len(idx_j) for idx_j in idx_batch])
57 |
58 | # Redistribution loop
59 | it = 0
60 | while min_size < self.minimum_data_size and it < self.max_iter:
61 | # Find client with minimum and maximum samples
62 | min_samples_client = min(idx_batch, key=len)
63 | max_samples_client = max(idx_batch, key=len)
64 | # Get count of samples needed to reach minimum_data_size
65 | transfer_samples_count = self.minimum_data_size - len(min_samples_client)
66 | # Transfer samples from max_samples_client to min_samples_client
67 | min_samples_client.extend(max_samples_client[-transfer_samples_count:])
68 | del max_samples_client[-transfer_samples_count:]
69 | # Recalculate min_size
70 | min_size = min([len(idx_j) for idx_j in idx_batch])
71 | it += 1
72 |
73 | for j in range(self.num_clients):
74 | np.random.shuffle(idx_batch[j])
75 | net_dataidx_map[j] = idx_batch[j]
76 | dataset_ref = dataset
77 | return [
78 | IndexedSubset(
79 | dataset_ref,
80 | indices=net_dataidx_map[i],
81 | )
82 | for i in range(self.num_clients)
83 | ]
84 |
--------------------------------------------------------------------------------
/partition/label.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | from partition.utils import IndexedSubset
5 |
6 |
7 | class DisjointLabelPartition:
8 | def __init__(self, num_users, num_classes=10, max_class_per_user=2):
9 | self.num_users = num_users
10 | self.num_classes = num_classes
11 | self.max_unique_class_per_user = max_class_per_user
12 | self.label_split = None
13 |
14 | def __call__(self, dataset):
15 | class_indices_dict = {i: [] for i in range(self.num_classes)}
16 | client_data_indices_dict = {i: [] for i in range(self.num_users)}
17 | label = np.array(dataset.targets)
18 | for i in range(len(label)):
19 | label_i = label[i].item()
20 | class_indices_dict[label_i].append(i)
21 |
22 | num_classes = self.num_classes
23 | shard_per_user = self.max_unique_class_per_user
24 | label_idx_split = class_indices_dict
25 |
26 | shard_per_class = int(shard_per_user * self.num_users / num_classes)
27 |
28 | for label_i in label_idx_split:
29 | label_idx = label_idx_split[label_i]
30 | num_leftover = len(label_idx) % shard_per_class
31 | leftover = label_idx[-num_leftover:] if num_leftover > 0 else []
32 | new_label_idx = np.array(label_idx[:-num_leftover]) if num_leftover > 0 else np.array(label_idx)
33 | new_label_idx = new_label_idx.reshape((shard_per_class, -1)).tolist()
34 |
35 | for i, leftover_label_idx in enumerate(leftover):
36 | new_label_idx[i] = np.concatenate([new_label_idx[i], [leftover_label_idx]])
37 |
38 | label_idx_split[label_i] = new_label_idx
39 |
40 | if self.label_split is None:
41 | label_split = list(range(num_classes)) * shard_per_class
42 | label_split = torch.tensor(label_split)[torch.randperm(len(label_split))].tolist()
43 | label_split = np.array(label_split).reshape((self.num_users, -1)).tolist()
44 |
45 | for i in range(len(label_split)):
46 | label_split[i] = np.unique(label_split[i]).tolist()
47 |
48 | self.label_split = label_split
49 |
50 | for i in range(self.num_users):
51 | for label_i in self.label_split[i]:
52 | idx = torch.arange(len(label_idx_split[label_i]))[
53 | torch.randperm(len(label_idx_split[label_i]))[0]].item()
54 | client_data_indices_dict[i].extend(label_idx_split[label_i].pop(idx))
55 | dataset_ref = dataset
56 | return [IndexedSubset(dataset_ref, v) for _, v in client_data_indices_dict.items()]
57 |
--------------------------------------------------------------------------------
/partition/uniform.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import numpy as np
4 | from torch.utils.data import Dataset
5 |
6 | from partition.utils import IndexedSubset
7 |
8 |
9 | class UniformPartition:
10 | def __init__(
11 | self,
12 | num_clients: int,
13 | num_class: int = 10,
14 | ):
15 | self.num_clients = num_clients
16 | self.num_class = num_class
17 |
18 | def __call__(self, dataset) -> List[Dataset]:
19 | total_num = len(dataset)
20 | idxs = np.random.permutation(total_num)
21 | partitioned_idxs = np.array_split(idxs, self.num_clients)
22 | net_dataidx_map = {i: partitioned_idxs[i] for i in range(self.num_clients)}
23 | dataset_ref = dataset
24 | return [
25 | IndexedSubset(
26 | dataset_ref,
27 | indices=net_dataidx_map[i],
28 | )
29 | for i in range(self.num_clients)
30 | ]
31 |
--------------------------------------------------------------------------------
/partition/user_index.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from typing import List
4 |
5 | import numpy as np
6 | from torch.utils.data import Dataset
7 |
8 | from partition.utils import IndexedSubset
9 |
10 |
11 | class UserPartition:
12 | def __init__(
13 | self, user_idxs
14 | ):
15 | self.user_idx = user_idxs
16 |
17 | def __call__(self, dataset) -> List[Dataset]:
18 | dataset_ref = dataset
19 | return [
20 | IndexedSubset(
21 | dataset_ref,
22 | indices=v,
23 | )
24 | for _, v in self.user_idx.items()
25 | ]
26 |
--------------------------------------------------------------------------------
/partition/utils.py:
--------------------------------------------------------------------------------
1 | from collections.abc import Iterable
2 | from pathlib import Path
3 | from typing import List, Sized
4 |
5 | import altair as alt
6 | import numpy as np
7 | import pandas as pd
8 | import torch
9 | from torch.utils.data import Dataset
10 |
11 |
12 | class IndexedSubset(Dataset):
13 | def __init__(self, dataset, indices):
14 | self.indices = indices
15 | self.dataset = dataset
16 | self.targets = [dataset.targets[i] for i in indices]
17 |
18 | def __getitem__(self, index):
19 | try:
20 | i = self.indices[index]
21 | dt = self.dataset[i]
22 | except KeyError or IndexError:
23 | print(type(self))
24 | print("index = {}".format(index))
25 | print("i = {}".format(i))
26 | print(type(self.indices))
27 | print(self.indices)
28 | return self.dataset[self.indices[index]]
29 |
30 | def __len__(self):
31 | return len(self.indices)
32 |
33 |
34 | def train_test_split(client_mapping, split):
35 | if type(split) is float and split <= 1.0:
36 | train_clients = np.random.choice(list(client_mapping.keys()), int(len(client_mapping.keys()) * split),
37 | replace=False)
38 | elif isinstance(split, Iterable) and all(isinstance(item, int) for item in split):
39 | train_clients = list(split)
40 | elif type(split) is int and split < len(client_mapping.keys()):
41 | train_clients = np.random.choice(list(client_mapping.keys()), split, replace=False)
42 | else:
43 | raise ValueError("Invalid split value: {}".format(split))
44 | test_clients = list(set(client_mapping.keys()) - set(train_clients))
45 | return {k: client_mapping[k] for k in train_clients}, {k: client_mapping[k] for k in test_clients}
46 |
47 |
48 | def make_split(client_mapping_train):
49 | indices = []
50 | mapping_train = {k: [] for k in client_mapping_train.keys()}
51 | i = 0
52 | for k, v in client_mapping_train.items():
53 | indices += v
54 | for _ in range(len(v)):
55 | mapping_train[k].append(i)
56 | i += 1
57 | return indices, mapping_train
58 |
59 |
60 | def compute_client_data_distribution(datasets: List[Sized | Dataset], num_classes: int):
61 | class_distribution = []
62 | data_distribution = []
63 |
64 | for i in range(len(datasets)):
65 | class_counts = torch.zeros(num_classes)
66 | for j in range(len(datasets[i])):
67 | class_counts[int(datasets[i].targets[j])] += 1
68 | class_counts = class_counts.numpy()
69 | data_distribution.append(np.sum(class_counts))
70 | class_counts = class_counts / np.sum(class_counts)
71 | class_distribution.append(class_counts)
72 | return data_distribution, class_distribution
73 |
74 |
75 | def get_html_plots(data_distribution, class_distribution):
76 | data = []
77 | num_clients = len(data_distribution)
78 | for i in range(len(class_distribution[0])):
79 | for j in range(len(class_distribution)):
80 | data.append({"client": j, "class": i, "value": class_distribution[j][i]})
81 |
82 | heatmap = (
83 | alt.Chart(pd.DataFrame(data))
84 | .mark_rect()
85 | .encode(
86 | x=alt.X("client:N", title="Client"),
87 | y=alt.Y("class:N", title="Class"),
88 | color=alt.Color("value:Q", scale=alt.Scale(scheme="yellowgreenblue"),
89 | legend=alt.Legend(title="Percentage of Samples")),
90 | tooltip="value:Q",
91 | )
92 | .properties(
93 | title=alt.TitleParams(
94 | "Class Distribution of Clients",
95 | fontSize=12,
96 | ),
97 | # width=200,
98 | # height=120,
99 | )
100 | )
101 |
102 | text = (
103 | alt.Chart(pd.DataFrame(data))
104 | .mark_text()
105 | .encode(
106 | x=alt.X("client:N"),
107 | y=alt.Y("class:N"),
108 | text=alt.Text("value:Q", format=".2f", ),
109 | color=alt.condition(
110 | alt.datum.value > 0.5, alt.value("black"), alt.value("white")
111 | ),
112 | )
113 | .transform_filter((alt.datum.value > 0.01))
114 | )
115 |
116 | data_bar = (
117 | alt.Chart(pd.DataFrame({"client": range(num_clients), "value": data_distribution}))
118 | .mark_bar()
119 | .encode(
120 | x=alt.X("client:N", title="Client", axis=alt.Axis(labelFontSize=8)),
121 | y=alt.Y("value:Q", title="Data Samples", axis=alt.Axis(labelFontSize=8)),
122 | tooltip="value:Q",
123 | )
124 | .properties(
125 | title=alt.TitleParams(
126 | "Sample Distribution of Clients",
127 | fontSize=12,
128 | ),
129 | # width=200,
130 | # height=120,
131 | )
132 | )
133 | Path('logs/').mkdir(exist_ok=True)
134 | (heatmap + text).save('logs/class_dist.html'), data_bar.save('logs/data_dist.html')
135 | return 'logs/class_dist.html', 'logs/data_dist.html'
136 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.13.1
2 | numpy>=1.24.2
3 | pandas>=1.5.2
4 | tqdm>=4.64.1
5 | altair>=4.1.0
6 | matplotlib>=3.6.0
7 | torchvision>=0.14.1
8 | pillow>=9.4.0
9 | ultralytics==8.0.57
10 | click>=8.0.4
11 | wandb>=0.14.0
12 | torchmetrics>=0.11.2
13 | ray>=2.3.0
14 | fire>=0.5.0
15 | requests>=2.28.2
16 | gdown>=4.7.1
17 | einops~=0.6.0
18 | scikit-learn
19 | scipy~=1.10.0
20 | av
21 | h5py
22 | chardet
23 | librosa
24 | llvmlite
25 | numba
26 | decorator
--------------------------------------------------------------------------------
/scorers/classification_evaluator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from torch import nn
4 | from torchmetrics import Accuracy, F1Score, ConfusionMatrix
5 | from tqdm import tqdm
6 |
7 | from scorers.utils import LossMetric
8 |
9 |
10 | def evaluate(model, test_data, device, num_classes=12, batch_size=32):
11 | model.to(device)
12 | test_dataloader = torch.utils.data.DataLoader(
13 | dataset=test_data,
14 | shuffle=False,
15 | batch_size=batch_size,
16 | pin_memory=True,
17 | num_workers=1,
18 | drop_last=True,
19 | )
20 | model.eval()
21 |
22 | criterion = nn.CrossEntropyLoss(reduction="sum").to(device)
23 | metrics = {
24 | 'accuracy': Accuracy(task="multiclass", num_classes=num_classes).to(device),
25 | 'f1_score': F1Score(task="multiclass", num_classes=num_classes, average='macro').to(device),
26 | 'confusion': ConfusionMatrix(task="multiclass", num_classes=num_classes).to(device),
27 | }
28 | lbl_type = torch.LongTensor
29 | losses = {'cross_entropy_loss': LossMetric(criterion).to(device)}
30 | with torch.no_grad():
31 | label_list, pred_list = list(), list()
32 | for batch_idx, (data, labels) in enumerate(tqdm(test_dataloader)):
33 | # for data, labels, lens in test_data:
34 | labels = labels.type(lbl_type)
35 | data, labels = data.to(device), labels.to(device)
36 | output = model(data)
37 | for lm in losses.values():
38 | lm.update(output, labels)
39 | # pred = output.data.max(1, keepdim=True)[1]
40 |
41 | for mm in metrics.values():
42 | mm.update(output, labels)
43 | # pred = output.data.max(1, keepdim=True)[
44 | # 1
45 | # ] # get the index of the max log-probability
46 | # correct = pred.eq(labels.data.view_as(pred)).sum()
47 | # for idx in range(len(labels)):
48 | # label_list.append(labels.detach().cpu().numpy()[idx])
49 | # pred_list.append(pred.detach().cpu().numpy()[idx][0])
50 | #
51 | # metrics["test_correct"] += correct.item()
52 | # metrics["test_loss"] += loss * labels.size(0)
53 | # metrics["test_total"] += labels.size(0)
54 | return {k: v.compute().cpu().float() for k, v in metrics.items()} | {k: v.compute().cpu().float() for k, v in
55 | losses.items()}
56 |
--------------------------------------------------------------------------------
/scorers/localization_evaluator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from torch import nn
4 | from torchmetrics import Accuracy, Metric
5 | from tqdm import tqdm
6 |
7 | from scorers.utils import LossMetric
8 |
9 |
10 | def keyframe_distance(preds, uid_list):
11 | distance_list = list()
12 | sec_list = list()
13 | for pred, gt in zip(preds, uid_list):
14 | clip_length = gt['json_parent_end_sec'].item() - gt['json_parent_start_sec'].item()
15 | clip_frames = gt['json_parent_end_frame'].item() - gt['json_parent_start_frame'].item() + 1
16 | fps = clip_frames / clip_length
17 | keyframe_loc_pred = np.argmax(pred)
18 | keyframe_loc_pred = np.argmax(pred)
19 | keyframe_loc_pred_mapped = (gt['json_parent_end_frame'].item() - gt[
20 | 'json_parent_start_frame'].item()) / 16 * keyframe_loc_pred
21 | keyframe_loc_gt = gt['pnr_frame'].item() - gt['json_parent_start_frame'].item()
22 | err_frame = abs(keyframe_loc_pred_mapped - keyframe_loc_gt)
23 | err_sec = err_frame / fps
24 | distance_list.append(err_frame.item())
25 | sec_list.append(err_sec.item())
26 | # When there is no false positive
27 | if len(distance_list) == 0:
28 | # Should we return something else here?
29 | return 0, 0
30 | return np.array(distance_list), np.array(sec_list)
31 |
32 |
33 | class KeyframeDistance(Metric):
34 | def __init__(self):
35 | super().__init__(dist_sync_on_step=False)
36 | self.add_state("distance_list", default=[], dist_reduce_fx="cat")
37 | self.add_state("sec_list", default=[], dist_reduce_fx="cat")
38 |
39 | def update(self, preds: torch.Tensor, infos: list[torch.Tensor]):
40 | distance_list = list()
41 | sec_list = list()
42 | preds = preds.cpu().numpy()
43 | preds = preds[:, :-1]
44 | # pnr_frames = infos['pnr_frame']
45 | # clip_start_secs = infos['clip_start_sec']
46 | # clip_end_secs = infos['clip_end_sec']
47 | # clip_start_frames = infos['clip_start_frame']
48 | # clip_end_frames = infos['clip_end_frame']
49 | for pred, clip_start_sec, clip_end_sec, clip_start_frame, clip_end_frame, pnr_frame in zip(preds,
50 | *infos):
51 | # print(clip_start_sec, clip_end_sec, clip_start_frame, clip_end_frame, pnr_frame)
52 | if pnr_frame.item() == -1:
53 | continue
54 | clip_length = clip_start_sec.item() - clip_end_sec.item()
55 | clip_frames = clip_end_frame.item() - clip_start_frame.item() + 1
56 | fps = clip_frames / clip_length
57 | keyframe_loc_pred = np.argmax(pred)
58 | keyframe_loc_pred_mapped = (clip_end_frame.item() - clip_start_frame.item()) / 16 * keyframe_loc_pred
59 | keyframe_loc_gt = pnr_frame.item() - clip_start_frame.item()
60 | err_frame = abs(keyframe_loc_pred_mapped - keyframe_loc_gt)
61 | err_sec = err_frame / fps
62 | distance_list.append(err_frame.item())
63 | sec_list.append(err_sec.item())
64 | # When there is no false positive
65 | if len(distance_list) == 0:
66 | # Should we return something else here?
67 | return
68 | self.sec_list.extend(sec_list)
69 | self.distance_list.extend(distance_list)
70 |
71 | def compute(self):
72 | # Perform any final computations here.
73 | # This might just be converting your lists of distances and seconds to tensors.
74 | # Make sure to handle the case where the lists are empty.
75 | return torch.mean(torch.tensor(self.distance_list))
76 |
77 |
78 | def evaluate(model, test_data, device, num_classes=12, batch_size=32):
79 | model.to(device)
80 | test_dataloader = torch.utils.data.DataLoader(
81 | dataset=test_data,
82 | shuffle=True,
83 | batch_size=batch_size,
84 | pin_memory=True,
85 | num_workers=1,
86 | drop_last=True,
87 | )
88 | model.eval()
89 |
90 | criterion = nn.CrossEntropyLoss().to(device)
91 | metrics = {
92 | 'avg_multilabel_accuracy': Accuracy(task="multiclass", num_classes=num_classes, average='micro').to(device),
93 | # 'binary_accuracy': Accuracy(task="multiclass", num_classes=1).to(device),
94 | # 'f1_score': F1Score(task="multiclass", num_classes=num_classes, average='macro').to(device),
95 | 'keyframe_dist': KeyframeDistance().to(device),
96 | }
97 | lbl_type = torch.LongTensor
98 | losses = {
99 | 'cce_loss': LossMetric(criterion).to(device),
100 | }
101 |
102 | with torch.no_grad():
103 | label_list, pred_list = list(), list()
104 | for batch_idx, (data, labels, info) in enumerate(tqdm(test_dataloader)):
105 | # for data, labels, lens in test_data:
106 | labels = labels.type(lbl_type)
107 | data, labels = data.to(device), labels.to(device)
108 | output = model(data)
109 | for lm in losses.values():
110 | lm.update(output, labels)
111 | # pred = output.data.max(1, keepdim=True)[1]
112 |
113 | for name, mm in metrics.items():
114 | if name == 'keyframe_dist':
115 | mm.update(output, info)
116 | continue
117 | mm.update(output, labels)
118 | # pred = output.data.max(1, keepdim=True)[
119 | # 1
120 | # ] # get the index of the max log-probability
121 | # correct = pred.eq(labels.data.view_as(pred)).sum()
122 | # for idx in range(len(labels)):
123 | # label_list.append(labels.detach().cpu().numpy()[idx])
124 | # pred_list.append(pred.detach().cpu().numpy()[idx][0])
125 | #
126 | # metrics["test_correct"] += correct.item()
127 | # metrics["test_loss"] += loss * labels.size(0)
128 | # metrics["test_total"] += labels.size(0)
129 | return {k: v.compute().cpu().float() for k, v in metrics.items()} | {k: v.compute().cpu().float() for k, v in
130 | losses.items()}
131 |
--------------------------------------------------------------------------------
/scorers/regression_evaluator.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from sklearn.metrics import r2_score
3 | from torch import nn
4 | from torchmetrics import MeanAbsoluteError, R2Score
5 | from torchmetrics import Metric
6 | from tqdm import tqdm
7 |
8 | from scorers.utils import LossMetric
9 |
10 |
11 | # class R2Score(Metric):
12 | # def __init__(self, dist_sync_on_step=False):
13 | # super().__init__(dist_sync_on_step=dist_sync_on_step)
14 | # self.preds = []
15 | # self.targets = []
16 | #
17 | # def update(self, preds: torch.Tensor, target: torch.Tensor):
18 | # self.preds += preds.reshape((-1,)).cpu().tolist()
19 | # self.targets += target.reshape((-1,)).cpu().tolist()
20 | #
21 | # def compute(self):
22 | # return torch.tensor(r2_score(self.preds, self.targets))
23 |
24 |
25 | def evaluate(model, test_data, device, num_classes=1, batch_size=32):
26 | model.to(device)
27 | test_dataloader = torch.utils.data.DataLoader(
28 | dataset=test_data,
29 | shuffle=False,
30 | batch_size=batch_size,
31 | pin_memory=True,
32 | num_workers=1,
33 | drop_last=True,
34 | )
35 | model.eval()
36 |
37 | criterion = nn.MSELoss().to(device)
38 | metrics = {
39 | 'mae': MeanAbsoluteError().to(device),
40 | 'R^2': R2Score().to(device)
41 | }
42 | losses = {'L2 Loss': LossMetric(criterion).to(device)}
43 | with torch.no_grad():
44 | label_list, pred_list = list(), list()
45 | for batch_idx, (data, labels) in enumerate(tqdm(test_dataloader)):
46 | # for data, labels, lens in test_data:
47 | # labels = labels.type(torch.float)
48 | data, labels = data.to(device), labels.to(device)
49 | output = model(data)
50 | labels = labels.reshape((-1,))
51 | output = output.reshape((-1,))
52 | for lm in losses.values():
53 | lm.update(output, labels)
54 | # pred = output.data.max(1, keepdim=True)[1]
55 | for mm in metrics.values():
56 | mm.update(output, labels)
57 | # pred = output.data.max(1, keepdim=True)[
58 | # 1
59 | # ] # get the index of the max log-probability
60 | # correct = pred.eq(labels.data.view_as(pred)).sum()
61 | # for idx in range(len(labels)):
62 | # label_list.append(labels.detach().cpu().numpy()[idx])
63 | # pred_list.append(pred.detach().cpu().numpy()[idx][0])
64 | #
65 | # metrics["test_correct"] += correct.item()
66 | # metrics["test_loss"] += loss * labels.size(0)
67 | # metrics["test_total"] += labels.size(0)
68 | return {k: v.compute().cpu().float() for k, v in metrics.items()} | {k: v.compute().cpu().float() for k, v in
69 | losses.items()}
70 |
--------------------------------------------------------------------------------
/scorers/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchmetrics import Metric
3 |
4 |
5 | class LossMetric(Metric):
6 | def __init__(self, criterion):
7 | super().__init__()
8 | self.criterion = criterion
9 | self.add_state("loss", default=torch.tensor(0, dtype=float), dist_reduce_fx="mean")
10 | self.add_state("total", default=torch.tensor(0, dtype=float), dist_reduce_fx="mean")
11 |
12 | def update(self, output: torch.Tensor, target: torch.Tensor):
13 | l = target.size(0) * self.criterion(output, target.long()).data.item()
14 | self.loss += l
15 | self.total += target.size(0)
16 |
17 | def compute(self):
18 | return self.loss.float() / self.total.float()
19 |
--------------------------------------------------------------------------------
/strategies/base_fl.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import ray
3 | import torch
4 | from tqdm import tqdm
5 |
6 |
7 | def distributed_fedavg(aggregator,
8 | client_trainers,
9 | client_dataset_refs,
10 | client_num_per_round,
11 | global_model,
12 | round_idx,
13 | scheduler,
14 | device,
15 | precision):
16 | # Select random clients for each round
17 | sampled_clients_idx = np.random.choice(len(client_dataset_refs), client_num_per_round, replace=False)
18 | print(f"selected clients: {sampled_clients_idx}")
19 | # Initialize lists to store updates, weights, and local metrics
20 | all_updates, all_weights, all_local_metrics = [], [], []
21 |
22 | # Iterate over the sampled clients in chunks equal to the number of client trainers
23 | for i in tqdm(range(0, len(sampled_clients_idx), len(client_trainers))):
24 | # Initialize list to store remote steps
25 | remote_steps = []
26 |
27 | # Iterate over the client trainers
28 | for j, client_trainer in enumerate(client_trainers):
29 | idx = i + j
30 | if idx >= len(sampled_clients_idx):
31 | break
32 |
33 | # Update the remote client_trainer with the latest global model and scheduler state
34 | client_trainer.update.remote(global_model.state_dict(), scheduler.state_dict())
35 |
36 | # Perform a remote training step on the client_trainer
37 | if precision != 'float32':
38 | remote_step = client_trainer.step_low_precision.remote(sampled_clients_idx[idx],
39 | client_dataset_refs[sampled_clients_idx[idx]],
40 | round_idx,
41 | precision,
42 | device=device)
43 | else:
44 | remote_step = client_trainer.step.remote(sampled_clients_idx[idx],
45 | client_dataset_refs[sampled_clients_idx[idx]],
46 | round_idx,
47 | device=device)
48 | remote_steps.append(remote_step)
49 |
50 | # Retrieve remote steps results
51 | print(f"length of steps: {len(remote_steps)}")
52 | updates, num_client_samples, local_metrics = zip(*ray.get(remote_steps))
53 |
54 | # Add the results to the overall lists
55 | for u, n, l in zip(updates, num_client_samples, local_metrics):
56 | if n > 0:
57 | all_updates.append(u)
58 | all_weights.append(n)
59 | all_local_metrics.append(l)
60 | torch.cuda.empty_cache()
61 |
62 | # Calculate the average local metrics
63 | local_metrics_avg = {key: sum(metric[key] for metric in all_local_metrics if metric[key]) / len(all_local_metrics)
64 | for key in all_local_metrics[0]}
65 |
66 | print(all_local_metrics)
67 |
68 | # Update the global model using the aggregator
69 | state_n = aggregator.step(all_updates, all_weights, round_idx)
70 | global_model.load_state_dict(state_n)
71 |
72 | # Update the scheduler
73 | scheduler.step()
74 |
75 | return local_metrics_avg, global_model, scheduler
76 |
77 |
78 | def basic_fedavg(aggregator,
79 | client_trainers,
80 | client_dataset_refs,
81 | client_num_per_round,
82 | global_model,
83 | round_idx,
84 | scheduler,
85 | device,
86 | precision):
87 | # Select random clients for each round
88 | sampled_clients_idx = np.random.choice(len(client_dataset_refs), client_num_per_round, replace=False)
89 | print(f"selected clients: {sampled_clients_idx}")
90 | # Initialize lists to store updates, weights, and local metrics
91 | all_updates, all_weights, all_local_metrics = [], [], []
92 |
93 | # Iterate over the sampled clients in chunks equal to the number of client trainers
94 | for i in tqdm(range(0, len(sampled_clients_idx), len(client_trainers))):
95 | # Initialize list to store remote steps
96 | remote_steps = []
97 |
98 | # Iterate over the client trainers
99 | for j, client_trainer in enumerate(client_trainers):
100 | idx = i + j
101 | if idx >= len(sampled_clients_idx):
102 | break
103 |
104 | # Update the remote client_trainer with the latest global model and scheduler state
105 | client_trainer.update(global_model.state_dict(), scheduler.state_dict())
106 |
107 | # Perform a remote training step on the client_trainer
108 | if precision != 'float32':
109 | remote_step = client_trainer.step_low_precision(sampled_clients_idx[idx],
110 | client_dataset_refs[sampled_clients_idx[idx]],
111 | round_idx,
112 | precision,
113 | device=device)
114 | else:
115 | remote_step = client_trainer.step(sampled_clients_idx[idx],
116 | client_dataset_refs[sampled_clients_idx[idx]],
117 | round_idx,
118 | device=device)
119 | remote_steps.append(remote_step)
120 |
121 | # Retrieve remote steps results
122 | print(f"length of steps: {len(remote_steps)}")
123 | updates, num_client_samples, local_metrics = zip(*remote_steps)
124 |
125 | # Add the results to the overall lists
126 | for u, n, l in zip(updates, num_client_samples, local_metrics):
127 | if n > 0:
128 | all_updates.append(u)
129 | all_weights.append(n)
130 | all_local_metrics.append(l)
131 | torch.cuda.empty_cache()
132 |
133 | # Calculate the average local metrics
134 | local_metrics_avg = {key: sum(metric[key] for metric in all_local_metrics if metric[key]) / len(all_local_metrics)
135 | for key in all_local_metrics[0]}
136 |
137 | print(all_local_metrics)
138 |
139 | # Update the global model using the aggregator
140 | state_n = aggregator.step(all_updates, all_weights, round_idx)
141 | global_model.load_state_dict(state_n)
142 |
143 | # Update the scheduler
144 | scheduler.step()
145 |
146 | return local_metrics_avg, global_model, scheduler
--------------------------------------------------------------------------------
/system.yml:
--------------------------------------------------------------------------------
1 | [DEFAULT]
2 | num_gpus = 2
3 | num_trainers_per_gpu = 1
4 | CUDA_VISIBLE_DEVICES=1,2
5 | seed = 1
--------------------------------------------------------------------------------
/trainers/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import torch
5 | from torch.optim.lr_scheduler import LRScheduler
6 |
7 |
8 | class WarmupScheduler(LRScheduler):
9 | def __init__(self, optimizer, warmup_epochs, scheduler):
10 | self.warmup_epochs = warmup_epochs
11 | self.scheduler = scheduler
12 | super(WarmupScheduler, self).__init__(optimizer, -1)
13 | self._last_lr = [0.0] * len(optimizer.param_groups)
14 |
15 | def get_lr(self):
16 | if self.last_epoch < self.warmup_epochs:
17 | warmup_factor = self.last_epoch / self.warmup_epochs
18 | return [base_lr * warmup_factor for base_lr in self.base_lrs]
19 |
20 | return self.scheduler.get_last_lr()
21 |
22 | def step(self, epoch=None):
23 | if self.last_epoch < self.warmup_epochs:
24 | self.last_epoch += 1
25 | new_lrs = self.get_lr()
26 | for param_group, lr in zip(self.optimizer.param_groups, new_lrs):
27 | param_group['lr'] = lr
28 | self._last_lr = new_lrs
29 | else:
30 | self.scheduler.step(epoch)
31 | self._last_lr = self.scheduler.get_last_lr()
32 |
33 |
34 | def read_system_variable(system_config, ):
35 | num_gpus = int(os.environ['num_gpus']) if 'num_gpus' in os.environ \
36 | else system_config['DEFAULT'].getint('num_gpus', 1)
37 | num_trainers_per_gpu = int(os.environ['num_trainers_per_gpu']) if 'num_gpus' in os.environ \
38 | else system_config['DEFAULT'].getint('num_trainers_per_gpu', 1)
39 | seed = int(os.environ['seed']) if 'seed' in os.environ \
40 | else system_config['DEFAULT'].getint('seed', 1)
41 | return num_gpus, num_trainers_per_gpu, seed
42 |
43 |
44 | def set_seed(seed: int):
45 | """
46 | Set the random seed for PyTorch and NumPy.
47 | """
48 | # Set the random seed for PyTorch
49 | torch.manual_seed(seed)
50 | torch.cuda.manual_seed_all(seed)
51 |
52 | # Set the random seed for NumPy
53 | np.random.seed(seed)
54 |
55 | # Set the deterministic flag for CuDNN (GPU)
56 | torch.backends.cudnn.deterministic = True
57 | torch.backends.cudnn.benchmark = False
58 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import warnings
3 |
4 | import numpy as np
5 | import pandas as pd
6 | import torch
7 | from torch.optim.lr_scheduler import LRScheduler
8 |
9 | import loaders.casas
10 | import loaders.cifar10
11 | import loaders.ego4d
12 | import loaders.emognition
13 | import loaders.energy
14 | import loaders.epic_sounds
15 | import loaders.spatial_transforms
16 | import loaders.ut_har
17 | import loaders.visdrone
18 | import loaders.widar
19 | import loaders.wisdm
20 | import wandb
21 | from analyses.noise import inject_label_noise_with_matrix
22 | from loaders.utils import ParameterDict
23 | from partition.centralized import CentralizedPartition
24 | from partition.dirichlet import DirichletPartition
25 | from partition.uniform import UniformPartition
26 | from partition.user_index import UserPartition
27 | from partition.utils import compute_client_data_distribution, get_html_plots
28 |
29 |
30 | def read_system_variable(system_config, ):
31 | num_gpus = int(os.environ['num_gpus']) if 'num_gpus' in os.environ \
32 | else system_config['DEFAULT'].getint('num_gpus', 1)
33 | num_trainers_per_gpu = int(os.environ['num_trainers_per_gpu']) if 'num_gpus' in os.environ \
34 | else system_config['DEFAULT'].getint('num_trainers_per_gpu', 1)
35 | seed = int(os.environ['seed']) if 'seed' in os.environ \
36 | else system_config['DEFAULT'].getint('seed', 1)
37 | return num_gpus, num_trainers_per_gpu, seed
38 |
39 |
40 | class WarmupScheduler(LRScheduler):
41 | def __init__(self, optimizer, warmup_epochs, scheduler):
42 | self.warmup_epochs = warmup_epochs
43 | self.scheduler = scheduler
44 | super(WarmupScheduler, self).__init__(optimizer, -1)
45 | self._last_lr = [0.0] * len(optimizer.param_groups)
46 |
47 | def get_lr(self):
48 | if self.last_epoch < self.warmup_epochs:
49 | warmup_factor = self.last_epoch / self.warmup_epochs
50 | return [base_lr * warmup_factor for base_lr in self.base_lrs]
51 |
52 | return self.scheduler.get_last_lr()
53 |
54 | def step(self, epoch=None):
55 | if self.last_epoch < self.warmup_epochs:
56 | self.last_epoch += 1
57 | new_lrs = self.get_lr()
58 | for param_group, lr in zip(self.optimizer.param_groups, new_lrs):
59 | param_group['lr'] = lr
60 | self._last_lr = new_lrs
61 | else:
62 | self.scheduler.step(epoch)
63 | self._last_lr = self.scheduler.get_last_lr()
64 |
65 |
66 | def get_default_yolo_hyperparameters():
67 | YOLO_HYPERPARAMETERS = {
68 | 'lr0': 0.01,
69 | 'lrf': 0.01,
70 | 'momentum': 0.937,
71 | 'weight_decay': 0.0005,
72 | 'warmup_epochs': 3.0,
73 | 'warmup_momentum': 0.8,
74 | 'warmup_bias_lr': 0.1,
75 | 'box': 7.5,
76 | 'cls': 0.5,
77 | 'dfl': 1.5,
78 | 'fl_gamma': 0.0,
79 | 'label_smoothing': 0.0,
80 | 'nbs': 64,
81 | 'hsv_h': 0.015,
82 | 'hsv_s': 0.7,
83 | 'hsv_v': 0.4,
84 | 'degrees': 0.0,
85 | 'translate': 0.1,
86 | 'scale': 0.5,
87 | 'shear': 0.0,
88 | 'perspective': 0.0,
89 | 'flipud': 0.0,
90 | 'fliplr': 0.5,
91 | 'mosaic': 1.0,
92 | 'mixup': 0.0,
93 | 'copy_paste': 0.0,
94 | 'mask_ratio': 0.0,
95 | 'overlap_mask': 0.0,
96 | 'conf': 0.25,
97 | 'iou': 0.45,
98 | 'max_det': 1000,
99 | 'plots': False,
100 | 'half': False, # use half precision (FP16)
101 | 'dnn': False,
102 | 'data': None,
103 | 'imgsz': 640,
104 | 'verbose': False
105 | }
106 | YOLO_HYPERPARAMETERS = ParameterDict(YOLO_HYPERPARAMETERS)
107 | return YOLO_HYPERPARAMETERS
108 |
109 |
110 | def set_seed(seed: int):
111 | """
112 | Set the random seed for PyTorch and NumPy.
113 | """
114 | # Set the random seed for PyTorch
115 | torch.manual_seed(seed)
116 | torch.cuda.manual_seed_all(seed)
117 |
118 | # Set the random seed for NumPy
119 | np.random.seed(seed)
120 |
121 | # Set the deterministic flag for CuDNN (GPU)
122 | torch.backends.cudnn.deterministic = True
123 | torch.backends.cudnn.benchmark = False
124 |
125 |
126 | def load_dataset(dataset_name):
127 | if dataset_name == 'cifar10':
128 | dataset = loaders.cifar10.load_dataset()
129 | num_classes = 10
130 | elif dataset_name == 'wisdm_watch':
131 | dataset = loaders.wisdm.load_dataset(reprocess=False, modality='watch')
132 | num_classes = 12
133 | elif dataset_name == 'wisdm_phone':
134 | dataset = loaders.wisdm.load_dataset(reprocess=False, modality='phone')
135 | num_classes = 12
136 | elif dataset_name == 'widar':
137 | dataset = loaders.widar.load_dataset()
138 | num_classes = 9
139 | elif dataset_name == 'visdrone':
140 | dataset = loaders.visdrone.load_dataset()
141 | num_classes = 12
142 | elif dataset_name == 'ut_har':
143 | dataset = loaders.ut_har.load_dataset()
144 | num_classes = 7
145 | elif dataset_name == 'emognition':
146 | dataset = loaders.emognition.load_bracelet_data(reprocess=True)
147 | num_classes = 2
148 | elif dataset_name == 'casas':
149 | dataset = loaders.casas.load_dataset()
150 | num_classes = 12
151 | elif dataset_name == 'energy':
152 | dataset = loaders.energy.load_dataset()
153 | num_classes = 10
154 | elif dataset_name == 'epic_sounds':
155 | dataset = loaders.epic_sounds.load_dataset()
156 | num_classes = 44
157 | elif dataset_name == 'ego4d':
158 | dataset = loaders.ego4d.load_dataset(
159 | transforms=loaders.spatial_transforms.Compose(
160 | [loaders.spatial_transforms.Normalize([0.45], [0.225])]
161 | )
162 | )
163 | num_classes = 17
164 | # print(dataset['train'][1][1].shape)
165 | # print(np.unique(dataset['train'].targets), len(np.unique(dataset['train'].targets)))
166 | # raise ValueError('ego4d')
167 | else:
168 | raise ValueError(f'Dataset {dataset_name} type not supported')
169 |
170 | return dataset, num_classes
171 |
172 |
173 | def get_partition(partition_type, dataset_name, num_classes, client_num_in_total, client_num_per_round, alpha, dataset):
174 | if partition_type == 'user' and dataset_name in {'wisdm', 'widar', 'visdrone'}:
175 | partition = UserPartition(dataset['split']['train'])
176 | client_num_in_total = len(dataset['split']['train'].keys())
177 | elif partition_type == 'uniform':
178 | partition = UniformPartition(num_class=num_classes, num_clients=client_num_in_total)
179 | elif partition_type == 'dirichlet':
180 | if alpha is None:
181 | warnings.warn('alpha is not set, using default value 0.1')
182 | alpha = 0.1
183 | partition = DirichletPartition(num_class=num_classes, num_clients=client_num_in_total, alpha=alpha)
184 | elif partition_type == 'central':
185 | partition = CentralizedPartition()
186 | client_num_per_round = 1
187 | client_num_in_total = 1
188 | else:
189 | raise ValueError(f'Partition {partition_type} type not supported')
190 |
191 | return partition, client_num_in_total, client_num_per_round
192 |
193 |
194 | def plot_data_distributions(dataset, dataset_name, client_datasets, num_classes):
195 | if hasattr(dataset['train'], 'targets') and dataset_name != 'ego4d':
196 | data_distribution, class_distribution = compute_client_data_distribution(datasets=client_datasets,
197 | num_classes=num_classes)
198 | class_dist, sample_dist = get_html_plots(data_distribution, class_distribution)
199 | wandb.log({'class_dist': wandb.Html(class_dist, inject=False),
200 | 'sample_dist': wandb.Html(sample_dist, inject=False)},
201 | step=0)
202 | # if dataset_name == 'visdrone':
203 | # targets = [[d['cls'] for d in dt] for dt in client_datasets]
204 | # data_distribution, class_distribution = compute_client_target_distribution(targets, num_classes=12)
205 | # wandb.log({'visdrone_class_dist': wandb.Html(class_dist, inject=False),
206 | # 'sample_dist': wandb.Html(sample_dist, inject=False)},
207 | # step=0)
208 |
209 |
210 | def add_label_noise(analysis, dataset_name, client_datasets, num_classes):
211 | confusion_matrix = pd.read_csv(f'confusion_matrices/conf_{dataset_name}.csv', header=0, index_col=None)
212 | confusion_matrix = confusion_matrix.to_numpy()
213 | confusion_matrix = confusion_matrix / confusion_matrix.sum(axis=1)
214 | _, error_rate, error_var = analysis.split('-')
215 | error_rate = float(error_rate)
216 | error_var = float(error_var)
217 | print(f'Adding noise ...{error_rate}')
218 | client_datasets, noise_percentages = inject_label_noise_with_matrix(client_datasets,
219 | num_classes,
220 | confusion_matrix,
221 | error_rate)
222 | print(noise_percentages)
223 | return client_datasets, noise_percentages
224 |
225 |
226 | def plot_noise_distribution(noise_percentages):
227 | table = wandb.Table(data=[[d] for d in noise_percentages], columns=['noise_ratio'])
228 | wandb.log({"noise_percentages": wandb.plot.histogram(table, "noise_ratio",
229 | title="Label Noise Distribution")
230 | }, step=0)
231 |
--------------------------------------------------------------------------------