├── .gitignore ├── LICENSE ├── README.md ├── aggregators ├── base.py └── torchcomponentrepository.py ├── analyses └── noise.py ├── commands.md ├── commands ├── float16 │ ├── casas.sh │ ├── energy.sh │ ├── epic_sounds.sh │ ├── ut_har.sh │ ├── widar.sh │ └── wisdm.sh ├── grid_search │ └── fedopt │ │ ├── casas.sh │ │ ├── energy.sh │ │ ├── ut_har.sh │ │ ├── widar.sh │ │ ├── wisdm_p.sh │ │ └── wisdm_w.sh ├── label_error │ ├── casas.sh │ ├── epic_sounds.sh │ ├── ut_har.sh │ ├── widar.sh │ └── wisdm.sh └── overview │ ├── fedavg │ ├── casas.sh │ ├── energy.sh │ ├── epic_sounds.sh │ ├── ut_har.sh │ ├── widar.sh │ └── wisdm.sh │ └── fedopt │ ├── casas.sh │ ├── energy.sh │ ├── ut_har.sh │ ├── widar.sh │ └── wisdm.sh ├── config.yml ├── confusion_matrices ├── conf_casas.csv ├── conf_epic_sounds.csv ├── conf_ut_har.csv ├── conf_widar.csv ├── conf_wisdm_phone.csv └── conf_wisdm_watch.csv ├── datasets ├── casas │ └── download.py ├── ego4d │ ├── process.py │ ├── uids │ └── utils.py ├── emognition │ └── download.py ├── energy │ └── energydata_complete.csv ├── epic_sounds │ ├── download.py │ ├── epic-kitchens-download-scripts-master │ │ ├── README.md │ │ ├── data │ │ │ ├── epic_100_splits.csv │ │ │ ├── epic_55_splits.csv │ │ │ ├── errata.csv │ │ │ └── md5.csv │ │ ├── download_extension_only.sh │ │ ├── download_full_epic.sh │ │ └── epic_downloader.py │ └── epic-sounds-annotations-main │ │ ├── EPIC_Sounds_recognition_test_timestamps.csv │ │ ├── EPIC_Sounds_recognition_test_timestamps.pkl │ │ ├── EPIC_Sounds_train.csv │ │ ├── EPIC_Sounds_train.pkl │ │ ├── EPIC_Sounds_validation.csv │ │ ├── EPIC_Sounds_validation.pkl │ │ ├── sound_events_not_categorised.csv │ │ └── sound_events_not_categorised.pkl ├── ut_har │ ├── download.py │ └── process.py ├── visdrone │ ├── clusterer.py │ ├── download.py │ └── split.csv ├── widar │ └── download.py └── wisdm │ ├── activity_key.txt │ ├── activity_key_filtered.txt │ └── download.py ├── distributed_main.py ├── environment_droplet.yml ├── figures ├── datasets.png ├── overview.png └── pipeline.png ├── loaders ├── casas.py ├── cifar10.py ├── clusterer.py ├── ego4d.py ├── emognition.py ├── energy.py ├── epic_sounds.py ├── pack_audio.py ├── spatial_transforms.py ├── spec_augment.py ├── ut_har.py ├── utils.py ├── visdrone.py ├── widar.py └── wisdm.py ├── main.py ├── models ├── casas.py ├── ego4d.py ├── emognition.py ├── energy.py ├── epic_sounds.py ├── ut_har.py ├── utils.py ├── widar.py ├── wisdm.py └── yolov8.yaml ├── partition ├── centralized.py ├── dirichlet.py ├── label.py ├── uniform.py ├── user_index.py └── utils.py ├── requirements.txt ├── scorers ├── classification_evaluator.py ├── localization_evaluator.py ├── regression_evaluator.py ├── ultralytics_yolo_evaluator.py └── utils.py ├── strategies └── base_fl.py ├── system.yml ├── trainers ├── distributed_base.py ├── ultralytics_distributed.py └── utils.py ├── utils.py └── validator.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | .idea/ 161 | draft/ 162 | *.meta 163 | /datasets/cifar10/cifar-10-batches-py/ 164 | /datasets/cifar10/cifar-10-python.tar.gz 165 | /datasets/widar/federated/ 166 | wandb/ 167 | /datasets/*/*.tar 168 | /datasets/*/*.html 169 | /datasets/*/*.png 170 | /datasets/*/*.dt 171 | /datasets/emognition/ 172 | /datasets/widar/federated/ 173 | /datasets/widar/Widardata/ 174 | /datasets/widar/Widardata.zip 175 | /datasets/wisdm/wisdm-dataset/ 176 | /datasets/wisdm/processed.csv 177 | /logs/ 178 | datasets/casas/ 179 | datasets/bewie/ 180 | /datasets/ego4d/ego4d_data/ 181 | /datasets/visdrone/test/ 182 | /datasets/visdrone/train/ 183 | /datasets/visdrone/val/ 184 | /datasets/visdrone/VisDrone2018-DET-test-dev/ 185 | /datasets/visdrone/VisDrone2019-DET-train/ 186 | /datasets/visdrone/VisDrone2019-DET-val/ 187 | /datasets/*/*.zip 188 | /datasets/ego4d/negative/ 189 | /datasets/ego4d/positive/ 190 | /datasets/ego4d/ego4d_data_v2/ 191 | /datasets/epic_sounds/EPIC-KITCHENS/ 192 | /datasets/student_life/ 193 | /datasets/ut_har/UT_HAR/ 194 | /weights/ 195 | datasets/wisdm/processed_watch.csv 196 | datasets/wisdm/processed_phone.csv 197 | *.html 198 | *.ini 199 | /datasets/emognition/_study_data.zip 200 | *.zip 201 | datasets/energy/energydata_complete.csv 202 | datasets/epic_sounds/EPIC_audio.hdf5 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FedAIoT: A Federated Learning Benchmark for Artificial Intelligence of Things 2 | 3 | ## Table of Contents 4 | 1. [Introduction](#introduction) 5 | 2. [Requirements](#requirements) 6 | 3. [Datasets](#datasets) 7 | 4. [Usage](#usage) 8 | 5. [Citation](#citation) 9 | 10 | ## Introduction 11 | Despite the significant relevance of federated learning (FL) in the realm of IoT, most existing FL works are conducted on well-known datasets such as CIFAR-10 and CIFAR-100. These datasets, however, do not originate from authentic IoT devices and thus fail to capture the unique modalities and inherent challenges associated with real-world IoT data. This notable discrepancy underscores a strong need for an IoT-oriented FL benchmark to fill this critical gap. 12 | 13 | This repository holds the source code for [FedAIoT: A Federated Learning Benchmark for Artificial Intelligence of Things](https://arxiv.org/abs/2310.00109). FedAIoT is a benchmarking tool for evaluating FL algorithms against real IoT datasets. FedAIoT contains eight well-chosen datasets collected from a wide range of authentic IoT devices from smartwatch, smartphone and Wi-Fi routers, to drones, smart home sensors, and head-mounted device that either have already become an indispensable part of people’s daily lives or are driving emerging applications. These datasets encapsulate a variety of unique IoT-specific data modalities such as wireless data, drone images, and smart home sensor data (e.g., motion, energy, humidity, temperature) that have not been explored in existing FL benchmarks. 14 | 15 | 16 |

17 | comparison 18 |

19 | 20 | 21 | To facilitate the community benchmark the performance of the datasets and ensure reproducibility, FedAIoT includes a unified end-to-end FL framework for AIoT, which covers the complete FLfor- AIoT pipeline: from non-independent and identically distributed (non-IID) data partitioning, IoT-specific data preprocessing, to IoT-friendly models, FL hyperparameters, and IoT-factor emulator. 22 | 23 | 24 |

25 | comparison 26 |

27 | 28 | 29 | ## Requirements 30 | 31 | ```bash 32 | pip install -r requirements.txt 33 | ``` 34 | ## Datasets 35 | 36 | FedAIoT currently includes the following eight IoT datasets: 37 | 38 |

39 | comparison 40 |

41 | 42 | Each dataset folder contains the `download.py` script to download the dataset. 43 | 44 | 45 | [//]: # (## Non-IID Partition Scheme) 46 | 47 | [//]: # (The partition classes split a large dataset into a list of smaller datasets. Several Partition methods are implemented. ) 48 | 49 | [//]: # (1. Centralized essentially returns the original dataset as a list of one dataset.) 50 | 51 | [//]: # (2. Dirichlet partitions the dataset into a specified number of clients with non-IID dirichlet distribution.) 52 | 53 | [//]: # () 54 | [//]: # (Create a partition object and use that to prtition any centralized dataset. Using the same partition on two ) 55 | 56 | [//]: # (different data splits will result in the same distribution of data between clients. For example:) 57 | 58 | [//]: # (```python) 59 | 60 | [//]: # ( partition = DirichletPartition(num_clients=10)) 61 | 62 | [//]: # ( train_partition = partition(dataset['train'])) 63 | 64 | [//]: # (```) 65 | 66 | [//]: # (Here `train_partition` and `test_partition` will have `10` clients with the same relative class and sample ) 67 | 68 | [//]: # (distribution.) 69 | 70 | [//]: # () 71 | [//]: # (For more details on implementation: [See here](https://github.com/AIoT-MLSys-Lab/FedAIoT/blob/61d8147d56f7ef4ea04d43a708f4de523f9e36bc/distributed_main.py#L129-L145)) 72 | 73 | 74 | [//]: # ([//]: # (## Models)) 75 | [//]: # () 76 | [//]: # ([//]: # ()) 77 | [//]: # ([//]: # (The experiment supports various models and allows you to use custom models as well. See the models directory for the )) 78 | [//]: # () 79 | [//]: # ([//]: # (individual implementations of the models for the respective datasets.)) 80 | [//]: # () 81 | [//]: # (## Training) 82 | 83 | [//]: # () 84 | [//]: # (The experiment supports different federated learning algorithms and partition types. You can configure the experiment settings by modifying the `config.yml` file or passing the required parameters when running the script.) 85 | 86 | [//]: # () 87 | [//]: # (The basic federated learning algorithm is implemented in the `algorithm.base_fl` module. Given an `aggregator` (See ) 88 | 89 | [//]: # (aggregator module), `client_trainers` (ray actors for distributed training), `client_dataset_refs` (ray data ) 90 | 91 | [//]: # (references), `client_num_per_round` (Number of clients sampled per round; < total clients), `global_model`, `round_idx`, ) 92 | 93 | [//]: # (`scheduler`, `device` (cpu or gpu), it runs one round of federated learning following vanilla fed avg.) 94 | 95 | [//]: # (The following federated learning algorithms are included in the benchmark:) 96 | 97 | [//]: # () 98 | [//]: # (- FedAvg) 99 | 100 | [//]: # (- FedAdam) 101 | 102 | [//]: # () 103 | [//]: # () 104 | [//]: # (Various training options and hyperparameters can be configured, such as the optimizer, learning rate, weight decay, epochs, and more.) 105 | 106 | ## Usage 107 | Before running, we need to set the environment variables `num_gpus` and `num_trainers_per_gpu`. This will set the total number of workers for the distributed system. If you want to use a subset of GPUs available in the hardware, specify the GPUs to be used by `CUDA_VISIBLE_DEVICES` variable. 108 | 109 | Take WISDM-W as an example. To train a centralized model on WISDM-W: 110 | 111 | ``` 112 | num_gpus=1 num_trainers_per_gpu=1 CUDA_VISIBLE_DEVICES=0 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 1 --client_num_per_round 1 --partition_type central --alpha 0.1 --lr 0.01 --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 200 --batch_size 128 --analysis baseline --trainer BaseTrainer --watch_metric accuracy 113 | ``` 114 | 115 | To train a federated model on WISDM-W with FedAvg and `10%` client sampling rate under high data heterogeneity: 116 | 117 | ``` 118 | num_gpus=1 num_trainers_per_gpu=1 CUDA_VISIBLE_DEVICES=0 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr 0.01 --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --watch_metric accuracy 119 | ``` 120 | 121 | For the full list of parameters, run: 122 | ``` 123 | python distributed_main.py main --help 124 | ``` 125 | 126 | ## Citation 127 | 128 | ``` 129 | @article{ 130 | alam2024fedaiot, 131 | title={Fed{AI}oT: A Federated Learning Benchmark for Artificial Intelligence of Things}, 132 | author={Samiul Alam and Tuo Zhang and Tiantian Feng and Hui Shen and Zhichao Cao and Dong Zhao and Jeonggil Ko and Kiran Somasundaram and Shrikanth Narayanan and Salman Avestimehr and Mi Zhang}, 133 | journal={Journal of Data-centric Machine Learning Research (DMLR)}, 134 | year={2024}, 135 | url={https://openreview.net/forum?id=fYNw9Ukljz}, 136 | } 137 | ``` 138 | -------------------------------------------------------------------------------- /aggregators/base.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | from aggregators.torchcomponentrepository import TorchComponentRepository 8 | 9 | 10 | class FederatedAveraging: 11 | def __init__(self, global_model: torch.nn.Module, 12 | server_optimizer='sgd', 13 | server_lr=1e-2, 14 | server_momentum=0.9, 15 | eps=1e-3): 16 | 17 | self.global_model = global_model 18 | self.server_optimizer = server_optimizer 19 | self.server_lr = server_lr 20 | self.optimizer = TorchComponentRepository.get_class_by_name(self.server_optimizer, torch.optim.Optimizer)( 21 | filter(lambda p: p.requires_grad, global_model.parameters()), 22 | lr=server_lr, 23 | ) 24 | 25 | def step(self, 26 | updated_parameter_list: list[dict[str:np.array]], 27 | weights: Union[None | list[float]], 28 | round_idx: int = 0): 29 | self.optimizer.zero_grad() 30 | 31 | params_n_plus_1 = self._average_updates(updated_parameter_list, weights) 32 | named_params = dict(self.global_model.cpu().named_parameters()) 33 | state_n_plus_1 = self.global_model.cpu().state_dict() 34 | with torch.no_grad(): 35 | for parameter_name, parameter_n_plus_1 in params_n_plus_1.items(): 36 | if parameter_name in named_params.keys(): 37 | parameter_n = named_params[parameter_name] 38 | parameter_n.grad = parameter_n.data - parameter_n_plus_1.data 39 | else: 40 | state_n_plus_1[parameter_name] = params_n_plus_1[parameter_name] 41 | self.global_model.load_state_dict(state_n_plus_1) 42 | self.optimizer.step() 43 | return self.global_model.cpu().state_dict() 44 | 45 | @staticmethod 46 | def _average_updates(update_list, weights=None): 47 | if weights is None: 48 | weights = [1 / len(update_list) for _ in range(len(update_list))] 49 | weights = np.array(weights, dtype=float) 50 | weights /= weights.sum() 51 | averaged_params = {k: v * weights[0] for k, v in update_list[0].items()} 52 | if len(update_list) > 1: 53 | for local_model_params, weight in zip(update_list[1:], weights[1:]): 54 | for k in averaged_params.keys(): 55 | averaged_params[k] += local_model_params[k] * weight 56 | return averaged_params 57 | -------------------------------------------------------------------------------- /aggregators/torchcomponentrepository.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union, Type 2 | 3 | import torch 4 | 5 | 6 | class TorchComponentRepository: 7 | """A utility class for working with subclasses of PyTorch components, 8 | such as torch.optim.Optimizer and torch.optim.lr_scheduler._LRScheduler.""" 9 | 10 | @classmethod 11 | def get_supported_names(cls, module) -> List[str]: 12 | """Returns a list of supported component names.""" 13 | return [component.__name__.lower() for component in module.__subclasses__()] 14 | 15 | @classmethod 16 | def get_class_by_name(cls, name: str, module): 17 | """Returns the component class corresponding to the given name.""" 18 | component_class = next((component for component in module.__subclasses__() 19 | if component.__name__.lower() == name.lower()), None) 20 | if not component_class: 21 | raise KeyError(f"Invalid component: {name}! Available components: {cls.get_supported_names(module)}") 22 | return component_class 23 | 24 | @classmethod 25 | def get_supported_parameters(cls, component: Union[str, Type], module=None) -> List[str]: 26 | """Returns a list of __init__ function parameters for a given component and module.""" 27 | component_class = cls.get_class_by_name(component, module) if isinstance(component, str) else component 28 | params = component_class.__init__.__code__.co_varnames 29 | return [param for param in params if param not in {"defaults", "self", "params"}] 30 | 31 | 32 | if __name__ == '__main__': 33 | print(TorchComponentRepository.get_supported_names(torch.optim.Optimizer)) 34 | print(TorchComponentRepository.get_supported_names(torch.optim.lr_scheduler._LRScheduler)) 35 | print(TorchComponentRepository.get_class_by_name("adam", torch.optim.Optimizer)) 36 | print(TorchComponentRepository.get_class_by_name("linearlr", torch.optim.lr_scheduler._LRScheduler)) 37 | print(TorchComponentRepository.get_supported_parameters(torch.optim.Adam)) 38 | print(TorchComponentRepository.get_supported_parameters(torch.optim.lr_scheduler.StepLR)) 39 | print(TorchComponentRepository.get_supported_parameters("adam", torch.optim.Optimizer)) 40 | 41 | -------------------------------------------------------------------------------- /analyses/noise.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import random 3 | 4 | import numpy as np 5 | import wandb 6 | from matplotlib import pyplot as plt 7 | from torch.utils.data import Dataset 8 | from tqdm import tqdm 9 | 10 | 11 | class NoisyDataset(Dataset): 12 | def __init__(self, data): 13 | self.data = data 14 | self.targets = copy.deepcopy(data.targets) 15 | 16 | def __getitem__(self, index): 17 | return self.data[index][0], self.targets[index] 18 | 19 | def __len__(self): 20 | return len(self.data) 21 | 22 | 23 | def inject_label_noise(client_datasets, class_num, error_ratio, error_var): 24 | """ 25 | Add label noise to client datasets and log noise percentages to wandb. 26 | 27 | Args: 28 | client_datasets: a list of client datasets 29 | class_num: an integer indicating the number of classes. 30 | error_ratio: a float between 0 and 1 indicating the ratio of labels to be flipped. 31 | error_var: a float indicating the variance of the Gaussian distribution used to determine 32 | the level of label noise. 33 | 34 | Returns: 35 | A list of client datasets, and a list of noise percentages for each dataset 36 | """ 37 | client_datasets_label_error = [] 38 | noise_percentages = [] 39 | for original_data in client_datasets: 40 | # Determine the level of label noise for this client dataset. The level is computed by normal distribution 41 | noisy_level = np.random.normal(error_ratio, error_var) 42 | noisy_level = max(noisy_level, 0) 43 | 44 | # Set the level of sparsity in the noise matrix. 45 | sparse_level = 0.4 46 | 47 | # Create a probability matrix for each label, where each element represents the probability of a label being assigned to that image. 48 | prob_matrix = np.full(class_num * class_num, 1 - noisy_level) 49 | 50 | # Set a random subset of elements in the probability matrix to zero to create sparsity. 51 | sparse_elements = np.random.choice(class_num * class_num, round(class_num * (class_num - 1) * sparse_level), 52 | replace=False) 53 | sparse_elements = sparse_elements[sparse_elements % (class_num + 1) != 0] 54 | prob_matrix[sparse_elements] = 0 55 | 56 | # Update prob_matrix 57 | prob_matrix = prob_matrix.reshape((class_num, class_num)) 58 | for idx in range(len(prob_matrix)): 59 | non_zeros = np.count_nonzero(prob_matrix[idx]) 60 | prob_element = 0 if non_zeros == 1 else (noisy_level) / (non_zeros - 1) 61 | prob_matrix[idx] = np.where(prob_matrix[idx] == 1 - noisy_level, prob_element, prob_matrix[idx]) 62 | prob_matrix[idx, idx] = 1 - noisy_level 63 | 64 | # Add label noise to dataset and calculate noise percentage 65 | original_labels = [sample[1] for sample in original_data] 66 | new_labels = [np.random.choice(class_num, p=prob_matrix[label]) for label in original_labels] 67 | new_dataset = [[original_data[i][0], new_labels[i]] for i in range(len(original_data))] 68 | 69 | noise_percentage = np.sum(np.array(original_labels) != np.array(new_labels)) / len(original_labels) * 100 70 | noise_percentages.append(noise_percentage) 71 | 72 | client_datasets_label_error.append(new_dataset) 73 | 74 | return client_datasets_label_error, noise_percentages 75 | 76 | # this is the function we use in the paper 77 | 78 | def inject_label_noise_with_matrix(client_datasets, class_num, confusion_matrix, error_label_ratio): 79 | """ 80 | Add label noise to client datasets and log noise percentages to wandb. 81 | 82 | Args: 83 | client_datasets: a list of client datasets 84 | class_num: an integer indicating the number of classes. 85 | confusion_matrix: the confusion matrix for the new labelling, which the size is class_num x class_num 86 | 87 | Returns: 88 | A list of client datasets, and a list of noise percentages for each dataset 89 | """ 90 | client_datasets_label_error = [] 91 | noise_percentages = [] 92 | 93 | for original_data in tqdm(client_datasets, total=len(client_datasets)): 94 | new_dataset = original_data 95 | new_dataset = NoisyDataset(new_dataset) 96 | # new_dataset = [[original_data[i][0], original_data[i][1]] for i in range(len(new_dataset))] 97 | num_elements = len(original_data) 98 | num_elements_to_change = int(num_elements * error_label_ratio) 99 | # indices_to_change = random.sample(range(num_elements), num_elements_to_change) 100 | indices = random.sample(range(num_elements), num_elements) 101 | indices_to_change = [] 102 | for index in indices: 103 | current_label_true = original_data[index][1] 104 | change_prob = confusion_matrix[current_label_true] 105 | # to speed up the noise injection, we only change the label whose centralized accuracy is lower than 80% 106 | if np.max(change_prob) < 0.95: 107 | indices_to_change.append(index) 108 | if len(indices_to_change) == num_elements_to_change: 109 | break 110 | 111 | changed_indices = set() 112 | for index in indices_to_change: 113 | current_label = original_data[index][1] 114 | new_label = np.random.choice(class_num, 115 | p=confusion_matrix[current_label] / sum(confusion_matrix[current_label])) 116 | while new_label == current_label or index in changed_indices: 117 | new_label = np.random.choice(class_num, 118 | p=confusion_matrix[current_label] / sum(confusion_matrix[current_label])) 119 | new_dataset.targets[index] = new_label 120 | changed_indices.add(index) 121 | 122 | original_labels = [sample[1] for sample in original_data] 123 | new_labels = [sample[1] for sample in new_dataset] 124 | noise_percentage = np.sum(np.array(original_labels) != np.array(new_labels)) / len(original_labels) * 100 125 | noise_percentages.append(noise_percentage) 126 | client_datasets_label_error.append(new_dataset) 127 | 128 | return client_datasets_label_error, noise_percentages 129 | 130 | 131 | def plot_noise_percentage(original_datasets, noisy_datasets, run): 132 | """ 133 | Function to calculate and plot label noise percentages for a list of datasets and upload it to wandb. 134 | 135 | Parameters: 136 | original_datasets (list): List of original PyTorch datasets. 137 | noisy_datasets (list): List of noisy PyTorch datasets. 138 | run (wandb.wandb_run.Run): The wandb run object to which the plot will be logged. 139 | 140 | Returns: 141 | None 142 | """ 143 | # Compute label noise percentages 144 | label_noise_percentages = [] 145 | 146 | for original_dataset, noisy_dataset in zip(original_datasets, noisy_datasets): 147 | original_labels = [label for _, label in original_dataset] 148 | noisy_labels = [label for _, label in noisy_dataset] 149 | 150 | # Compute noise percentage for this dataset 151 | noise_percentage = np.sum(np.array(original_labels) != np.array(noisy_labels)) / len(original_labels) * 100 152 | label_noise_percentages.append(noise_percentage) 153 | 154 | # Plot the label noise percentages as a histogram 155 | plt.hist(label_noise_percentages, bins=10, edgecolor='black') 156 | plt.title('Histogram of Label Noise Percentages') 157 | plt.xlabel('Label Noise Percentage') 158 | plt.ylabel('Count') 159 | 160 | # Save the plot to a file 161 | plt.savefig('label_noise_histogram.png') 162 | plt.close() # Close the plot 163 | 164 | # Log the plot to wandb 165 | run.log({"label_noise_histogram": wandb.Image('label_noise_histogram.png')}) 166 | -------------------------------------------------------------------------------- /commands/float16/casas.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | client_lr=0.01 5 | for seed in {1..3} 6 | do 7 | ## 6. casas 8 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy 9 | done -------------------------------------------------------------------------------- /commands/float16/energy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | client_lr=0.1 4 | for seed in {1..3} 5 | do 6 | ## 7. energy 7 | seed=$seed num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric R^2 8 | done 9 | 10 | -------------------------------------------------------------------------------- /commands/float16/epic_sounds.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 seed=1 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy 2 | CUDA_VISIBLE_DEVICES=0 seed=2 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy 3 | CUDA_VISIBLE_DEVICES=0 seed=3 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy 4 | -------------------------------------------------------------------------------- /commands/float16/ut_har.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | client_lr=0.0001 5 | for seed in {1..3} 6 | do 7 | ## 4. ut_har 8 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy 9 | done -------------------------------------------------------------------------------- /commands/float16/widar.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | client_lr=0.005 4 | for seed in {1..3} 5 | do 6 | ## 3. widar 7 | seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 300 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy 8 | done -------------------------------------------------------------------------------- /commands/float16/wisdm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | client_lr=0.01 4 | ## 1. wisdm phone 5 | ### NIID-0.1 SGD 10%-30% 6 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy 7 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy 8 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy 9 | 10 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy 11 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy 12 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --precision float16 --watch_metric accuracy 13 | -------------------------------------------------------------------------------- /commands/grid_search/fedopt/casas.sh: -------------------------------------------------------------------------------- 1 | lrs=(0.01 0.001 0.0001) 2 | for client_lr in "${lrs[@]}" 3 | do 4 | for server_lr in "${lrs[@]}" 5 | do 6 | ### NIID-0.1 Adam 10%-30% 7 | num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 8 | ### NIID-0.5 Adam 10%-30% 9 | num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 10 | done 11 | done -------------------------------------------------------------------------------- /commands/grid_search/fedopt/energy.sh: -------------------------------------------------------------------------------- 1 | lrs=(0.01 0.001 0.0001) 2 | for client_lr in "${lrs[@]}" 3 | do 4 | for server_lr in "${lrs[@]}" 5 | do 6 | ### NIID-0.1 Adam 10%-30% 7 | num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2 8 | ### NIID-0.5 Adam 10%-30% 9 | num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2 10 | done 11 | done -------------------------------------------------------------------------------- /commands/grid_search/fedopt/ut_har.sh: -------------------------------------------------------------------------------- 1 | lrs=(0.01 0.001 0.0001) 2 | for client_lr in "${lrs[@]}" 3 | do 4 | for server_lr in "${lrs[@]}" 5 | do 6 | ### NIID-0.1 Adam 10% 7 | num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 8 | ### NIID-0.5 Adam 10% 9 | num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 10 | done 11 | done -------------------------------------------------------------------------------- /commands/grid_search/fedopt/widar.sh: -------------------------------------------------------------------------------- 1 | lrs=(0.01 0.001 0.0001) 2 | for client_lr in "${lrs[@]}" 3 | do 4 | for server_lr in "${lrs[@]}" 5 | do 6 | ### NIID-0.1 Adam 10%-30% 7 | num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 1000 --batch_size 8 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 8 | ### NIID-0.5 Adam 10%-30% 9 | num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 1000 --batch_size 8 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 10 | done 11 | done -------------------------------------------------------------------------------- /commands/grid_search/fedopt/wisdm_p.sh: -------------------------------------------------------------------------------- 1 | lrs=(0.01 0.001 0.0001) 2 | for client_lr in "${lrs[@]}" 3 | do 4 | for server_lr in "${lrs[@]}" 5 | do 6 | ### NIID-0.1 Adam 10% 7 | num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 8 | ### NIID-0.5 Adam 10% 9 | num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 10 | done 11 | done -------------------------------------------------------------------------------- /commands/grid_search/fedopt/wisdm_w.sh: -------------------------------------------------------------------------------- 1 | lrs=(0.01 0.001 0.0001) 2 | for client_lr in "${lrs[@]}" 3 | do 4 | for server_lr in "${lrs[@]}" 5 | do 6 | ### NIID-0.1 Adam 10% 7 | num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 8 | ### NIID-0.5 Adam 10% 9 | num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 10 | done 11 | done -------------------------------------------------------------------------------- /commands/label_error/casas.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | client_lr=0.01 5 | for seed in {1..3} 6 | do 7 | ## 6. casas 8 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 9 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 10 | done -------------------------------------------------------------------------------- /commands/label_error/epic_sounds.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 seed=1 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 2 | CUDA_VISIBLE_DEVICES=0 seed=2 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 3 | CUDA_VISIBLE_DEVICES=1 seed=3 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 4 | 5 | 6 | CUDA_VISIBLE_DEVICES=3 seed=1 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 7 | CUDA_VISIBLE_DEVICES=3 seed=2 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 8 | CUDA_VISIBLE_DEVICES=4 seed=3 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 9 | -------------------------------------------------------------------------------- /commands/label_error/ut_har.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | client_lr=0.001 5 | for seed in {1..3} 6 | do 7 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 8 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 9 | done -------------------------------------------------------------------------------- /commands/label_error/widar.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | client_lr=0.001 4 | for seed in {1..3} 5 | do 6 | seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1500 --batch_size 8 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 7 | seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1500 --batch_size 8 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 8 | done -------------------------------------------------------------------------------- /commands/label_error/wisdm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | client_lr=0.01 4 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 5 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 6 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 7 | ### NIID-0.1 SGD 10%-30% 8 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 9 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 10 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.1-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 11 | 12 | 13 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 14 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 15 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 16 | ### NIID-0.1 SGD 10%-30% 17 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 18 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 19 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis label_noise-0.3-0.0 --trainer BaseTrainer --amp --watch_metric accuracy 20 | -------------------------------------------------------------------------------- /commands/overview/fedavg/casas.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | client_lr=0.01 5 | for seed in {1..3} 6 | do 7 | ## 6. casas 8 | ### Centralized 9 | seed=$seed num_gpus=1 num_trainers_per_gpu=1 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 1 --client_num_per_round 1 --partition_type central --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 250 --batch_size 128 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 10 | ### NIID-0.1 SGD 10%-30% 11 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 12 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 18 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 13 | ### NIID-0.5 SGD 10%-30% 14 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 15 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 18 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 16 | ### NIID-0.1 Adam 10%-30% 17 | #seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr 0.1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 18 | ### NIID-0.5 Adam 10%-30% 19 | #seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr 0.1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 20 | done -------------------------------------------------------------------------------- /commands/overview/fedavg/energy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | client_lr=0.1 4 | for seed in {1..3} 5 | do 6 | ## 7. energy 7 | ### Centralized 8 | # seed=$seed num_gpus=1 num_trainers_per_gpu=1 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 1 --client_num_per_round 1 --partition_type central --alpha 0.1 --lr 0.01 --client_optimizer sgd --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --watch_metric R^2 9 | ### NIID-0.1 SGD 10%-30% 10 | # seed=$seed num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2 11 | seed=$seed num_gpus=3 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 24 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2 12 | ### NIID-0.5 SGD 10%-30% 13 | # seed=$seed num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2 14 | # seed=$seed num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 24 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2 15 | done 16 | 17 | -------------------------------------------------------------------------------- /commands/overview/fedavg/epic_sounds.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 seed=1 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 2 | CUDA_VISIBLE_DEVICES=0 seed=2 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 3 | CUDA_VISIBLE_DEVICES=0 seed=3 num_gpus=1 num_trainers_per_gpu=10 python distributed_main.py main --dataset_name epic_sounds --model resnet18 --client_num_in_total 300 --client_num_per_round 30 --partition_type dirichlet --alpha 0.1 --lr 0.1 --server_optimizer sgd --server_lr 1 --test_frequency 20 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 4 | -------------------------------------------------------------------------------- /commands/overview/fedavg/ut_har.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | client_lr=0.0001 5 | for seed in {1..3} 6 | do 7 | ## 4. ut_har 8 | ### Centralized 9 | seed=$seed num_gpus=1 num_trainers_per_gpu=1 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 1 --client_num_per_round 1 --partition_type central --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 500 --batch_size 128 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 10 | ### NIID-0.1 SGD 10%-30% 11 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 12 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 13 | ### NIID-0.5 SGD 10%-30% 14 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 15 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 6 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 16 | ### NIID-0.1 Adam 10%-30% 17 | # seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr .01 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 18 | ### NIID-0.5 Adam 10%-30% 19 | # seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr 0.01 --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 20 | done -------------------------------------------------------------------------------- /commands/overview/fedavg/widar.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | client_lr=0.005 4 | for seed in {1..3} 5 | do 6 | ## 3. widar 7 | ### Centralized 8 | # seed=$seed num_gpus=1 num_trainers_per_gpu=1 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 1 --client_num_per_round 1 --partition_type central --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 120 --batch_size 128 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 9 | ### NIID-0.1 SGD 10%-30% 10 | # seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 300 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 11 | seed=$seed num_gpus=3 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 12 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 1500 --batch_size 12 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 12 | ### NIID-0.5 SGD 10%-30% 13 | # seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 300 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 14 | # seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 12 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer sgd --server_lr 1 --test_frequency 5 --comm_round 300 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 15 | ### NIID-0.1 Adam 10%-30% 16 | # seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr .01 --test_frequency 5 --comm_round 300 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 17 | ### NIID-0.5 Adam 10%-30% 18 | # seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr 0.01 --test_frequency 5 --comm_round 300 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 19 | done -------------------------------------------------------------------------------- /commands/overview/fedopt/casas.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for seed in {1..3} 4 | do 5 | ## 6. casas 6 | ## NIID-0.1 Adam 10%-30% 7 | client_lr=0.001 8 | server_lr=0.01 9 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr 0.1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 10 | ## NIID-0.5 Adam 10%-30% 11 | client_lr=0.0001 12 | server_lr=0.01 13 | seed=$seed num_gpus=1 num_trainers_per_gpu=6 python distributed_main.py main --dataset_name casas --model BiLSTMModel --client_num_in_total 60 --client_num_per_round 6 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr 0.1 --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 14 | done -------------------------------------------------------------------------------- /commands/overview/fedopt/energy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | server_lr=0.001 4 | client_lr=0.01 5 | for seed in {1..3} 6 | do 7 | ## 7. energy 8 | ## NIID-0.1 Adam 10%-30% 9 | seed=$seed num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2 10 | ## NIID-0.5 Adam 10%-30% 11 | seed=$seed num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name energy --model MLP --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 3000 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric R^2 12 | done -------------------------------------------------------------------------------- /commands/overview/fedopt/ut_har.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | server_lr=0.0001 4 | client_lr=0.0001 5 | for seed in {1..3} 6 | do 7 | ## 4. ut_har 8 | ## NIID-0.1 Adam 10%-30% 9 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 10 | ## NIID-0.5 Adam 10%-30% 11 | seed=$seed num_gpus=1 num_trainers_per_gpu=2 python distributed_main.py main --dataset_name ut_har --model UT_HAR_ResNet18 --client_num_in_total 20 --client_num_per_round 2 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 1200 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 12 | done -------------------------------------------------------------------------------- /commands/overview/fedopt/widar.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | for seed in {1..3} 5 | do 6 | ## 3. widar 7 | server_lr=0.01 8 | client_lr=0.001 9 | ## NIID-0.1 Adam 10%-30% 10 | seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr .01 --test_frequency 5 --comm_round 300 --batch_size 8 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 11 | ## NIID-0.5 Adam 10%-30% 12 | server_lr=0.01 13 | client_lr=0.01 14 | seed=$seed num_gpus=1 num_trainers_per_gpu=4 python distributed_main.py main --dataset_name widar --model Widar_ResNet18 --client_num_in_total 40 --client_num_per_round 4 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr 0.01 --test_frequency 5 --comm_round 300 --batch_size 8 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 15 | done -------------------------------------------------------------------------------- /commands/overview/fedopt/wisdm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | client_lr=0.01 4 | server_lr=0.01 5 | # 1. wisdm phone 6 | ### NIID-0.1 Adam 10% 7 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 8 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 9 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 10 | ### NIID-0.5 Adam 10% 11 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 12 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 13 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_phone --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 14 | 15 | client_lr=0.001 16 | server_lr=0.1 17 | ## NIID-0.1 Adam 10% 18 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 19 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 20 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.1 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 21 | client_lr=0.01 22 | server_lr=0.01 23 | ## NIID-0.5 Adam 10% 24 | seed=1 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 25 | seed=2 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 26 | seed=3 num_gpus=1 num_trainers_per_gpu=8 python distributed_main.py main --dataset_name wisdm_watch --model LSTM_NET --client_num_in_total 80 --client_num_per_round 8 --partition_type dirichlet --alpha 0.5 --lr $client_lr --server_optimizer adam --server_lr $server_lr --test_frequency 5 --comm_round 400 --batch_size 32 --analysis baseline --trainer BaseTrainer --amp --watch_metric accuracy 27 | -------------------------------------------------------------------------------- /config.yml: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | model = resnet18 3 | dataset = epic_sounds 4 | data_dir = ../data/ 5 | client_num_in_total = 30 6 | client_num_per_round = 6 7 | gpu_worker_num = 8 8 | batch_size = 256 9 | client_optimizer = sgd 10 | lr = 1e-2 11 | wd = 0.001 12 | epochs = 1 13 | fl_algorithm = fedaiot_distributed 14 | comm_round = 100 15 | test_frequency = 5 16 | server_optimizer = sgd 17 | server_lr = 1 18 | alpha = 0.25 19 | partition_type = central 20 | device=cuda 21 | trainer = BaseTrainer 22 | amp = false 23 | analysis= baseline 24 | class_mixup=1.0 25 | entity=fedaiot 26 | project=ray_fl_dev_v5 27 | -------------------------------------------------------------------------------- /confusion_matrices/conf_casas.csv: -------------------------------------------------------------------------------- 1 | "0","1","2","3","4","5","6","7","8","9","10","11" 2 | "26","1","1","2","0","0","0","0","0","1","0","0" 3 | "0","172","0","18","0","1","1","0","0","0","7","1" 4 | "0","0","97","4","4","0","1","0","0","0","2","1" 5 | "0","11","2","1297","21","3","9","6","1","4","46","10" 6 | "0","0","0","33","134","0","0","1","0","0","15","1" 7 | "0","0","0","13","3","29","3","0","0","6","0","0" 8 | "0","1","0","14","0","3","201","0","0","0","1","0" 9 | "0","3","0","8","1","0","0","32","0","0","3","6" 10 | "0","0","0","2","0","0","0","0","12","1","0","0" 11 | "0","1","0","11","1","12","0","0","0","118","0","0" 12 | "0","3","1","47","14","0","1","1","0","0","335","7" 13 | "0","3","0","16","1","0","1","4","0","1","4","86" -------------------------------------------------------------------------------- /confusion_matrices/conf_epic_sounds.csv: -------------------------------------------------------------------------------- 1 | "0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39","40","41","42","43" 2 | "3557","711","485","356","23","42","366","20","112","85","11","41","675","45","138","0","15","15","52","0","10","16","5","25","0","1","2","6","11","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 3 | "187","2649","222","60","50","166","19","33","36","60","0","1","24","54","1","0","0","0","4","6","0","0","10","1","0","5","2","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 4 | "405","713","1798","60","121","18","199","3","73","17","0","15","21","78","10","5","25","5","9","0","0","5","5","0","0","0","0","0","1","0","0","0","0","4","0","0","0","0","0","0","0","0","0","0" 5 | "222","324","95","1596","19","0","175","8","12","21","0","5","44","22","1","5","0","5","0","0","0","0","24","7","0","6","0","9","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 6 | "86","903","360","118","1760","15","46","7","42","20","0","10","14","70","2","0","0","0","138","0","0","0","49","16","0","1","1","1","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0" 7 | "136","606","38","37","66","1700","24","1","48","2","0","7","67","72","10","0","0","0","13","56","2","0","7","7","0","1","1","2","1","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0" 8 | "234","291","162","339","37","0","431","5","15","36","0","15","41","21","10","0","10","0","20","0","0","0","9","15","0","8","2","8","1","0","0","0","0","0","0","0","0","0","5","0","0","0","0","0" 9 | "45","840","50","69","7","3","26","733","7","10","0","5","21","28","0","0","0","0","0","0","0","0","0","0","0","0","0","2","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 10 | "302","477","129","43","78","59","27","1","522","36","2","10","82","103","9","0","0","2","22","3","0","0","1","4","0","5","0","6","1","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0" 11 | "238","554","134","179","16","6","80","21","15","141","0","10","46","7","36","0","0","0","33","0","0","0","22","8","0","0","2","0","0","0","0","0","0","0","2","0","0","0","0","0","0","0","0","0" 12 | "478","108","122","53","5","0","116","10","12","5","26","10","91","6","20","0","0","5","15","0","0","10","0","4","0","3","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 13 | "452","182","129","139","21","7","173","1","39","11","0","40","70","25","7","0","0","5","7","0","0","0","8","8","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 14 | "720","228","141","57","11","15","30","2","112","18","10","7","670","12","85","0","0","5","5","0","0","0","15","2","0","0","1","0","2","0","0","0","0","0","2","0","0","5","0","0","0","0","0","0" 15 | "9","62","4","6","7","5","2","2","9","1","0","0","2","53","0","1","0","0","0","0","0","0","0","0","0","0","0","1","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 16 | "137","28","35","5","0","0","5","0","6","5","0","0","143","0","119","0","0","5","0","0","0","0","0","0","0","0","0","0","2","0","0","0","0","0","0","0","0","5","0","0","0","0","0","0" 17 | "45","75","25","20","5","0","0","30","0","15","0","0","10","0","0","355","0","0","5","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 18 | "81","21","147","0","0","0","15","0","2","5","0","0","15","13","0","0","51","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 19 | "163","39","20","29","0","5","16","0","19","10","0","0","67","13","15","2","0","12","0","0","0","0","1","0","0","0","0","0","1","0","3","0","0","0","0","0","0","0","0","0","0","0","0","0" 20 | "15","26","22","21","27","0","10","0","1","5","0","0","0","0","0","0","0","0","20","0","0","0","0","3","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 21 | "57","146","1","8","13","166","13","0","20","7","0","0","12","20","0","0","0","0","2","50","0","0","2","8","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 22 | "100","10","10","5","10","0","10","0","5","5","0","0","35","0","12","0","0","0","0","0","15","0","0","0","0","0","2","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0" 23 | "15","0","15","30","0","0","0","0","5","0","0","0","5","5","0","0","0","0","0","0","0","10","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 24 | "0","20","16","42","15","0","36","0","0","0","0","5","5","0","0","0","0","0","5","0","0","0","11","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 25 | "64","10","22","33","12","0","10","0","0","0","0","0","23","6","0","0","0","0","14","0","0","0","0","21","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 26 | "10","82","0","6","0","1","0","0","5","15","0","0","10","11","0","0","0","0","5","0","0","0","0","0","20","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 27 | "40","18","6","0","5","0","10","0","1","0","0","0","15","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 28 | "40","8","10","20","5","0","14","2","0","5","0","0","27","0","12","0","0","0","0","0","0","0","0","1","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 29 | "15","31","10","11","10","0","13","0","0","5","0","0","5","0","0","0","0","0","4","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 30 | "67","20","10","10","5","0","5","0","0","6","0","0","7","0","15","0","0","0","0","0","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","5","0","0","0","0","0","0" 31 | "12","42","0","3","9","15","3","1","25","0","0","5","0","28","1","0","0","0","3","0","0","0","0","1","0","0","2","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 32 | "39","18","0","12","1","0","6","1","9","5","0","0","35","1","17","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0" 33 | "55","13","10","28","7","0","7","0","1","0","0","0","0","3","10","0","0","5","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 34 | "15","10","5","0","3","4","10","0","0","0","0","0","0","2","5","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 35 | "0","8","1","0","1","3","1","0","2","0","0","0","0","18","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0" 36 | "21","5","5","10","1","0","10","0","6","0","0","5","21","4","11","0","0","0","0","0","0","0","0","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 37 | "0","124","23","5","5","0","0","0","3","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 38 | "11","7","5","0","0","1","0","5","0","0","0","0","0","1","0","0","0","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 39 | "25","10","5","10","0","0","0","0","0","5","0","0","0","0","40","0","0","0","0","0","0","0","0","0","0","0","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 40 | "5","0","0","10","0","0","0","0","0","0","0","0","0","0","20","0","0","0","0","0","0","0","0","0","0","0","0","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 41 | "20","5","5","5","0","0","0","0","5","0","0","0","5","0","15","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 42 | "5","11","0","0","2","1","0","0","0","0","0","0","2","17","0","0","0","0","0","0","0","0","0","1","0","0","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 43 | "0","16","4","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 44 | "0","10","0","5","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" 45 | "0","7","0","0","0","0","0","5","1","0","0","0","0","2","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0" -------------------------------------------------------------------------------- /confusion_matrices/conf_ut_har.csv: -------------------------------------------------------------------------------- 1 | "0","1","2","3","4","5","6" 2 | "63","0","0","2","0","0","1" 3 | "0","43","1","0","0","0","0" 4 | "0","0","31","0","0","0","3" 5 | "0","1","0","48","0","0","0" 6 | "0","0","0","4","117","0","0" 7 | "0","1","3","0","2","34","0" 8 | "0","0","1","0","0","3","26" -------------------------------------------------------------------------------- /confusion_matrices/conf_widar.csv: -------------------------------------------------------------------------------- 1 | "0","1","2","3","4","5","6","7","8" 2 | "1049","65","148","55","9","32","23","18","24" 3 | "205","615","196","54","12","56","26","41","39" 4 | "46","5","245","58","1","14","0","2","4" 5 | "114","8","128","583","4","51","0","3","33" 6 | "136","11","22","14","53","7","2","3","2" 7 | "18","6","59","62","1","75","2","10","17" 8 | "54","70","21","4","0","5","68","4","24" 9 | "31","19","68","14","0","30","8","51","29" 10 | "33","24","9","18","0","14","17","8","127" -------------------------------------------------------------------------------- /confusion_matrices/conf_wisdm_phone.csv: -------------------------------------------------------------------------------- 1 | "0","1","2","3","4","5","6","7","8","9","10","11" 2 | "232","23","38","0","0","0","0","0","0","0","0","0" 3 | "50","209","3","0","0","0","0","0","0","0","0","0" 4 | "142","0","173","0","0","0","1","0","0","0","0","11" 5 | "0","0","2","111","6","8","1","122","32","96","0","1" 6 | "22","0","0","0","221","38","7","7","11","0","0","34" 7 | "0","0","0","49","54","79","9","69","41","36","0","11" 8 | "13","5","9","2","61","9","25","74","14","40","13","43" 9 | "0","0","0","51","24","55","25","125","1","32","2","1" 10 | "1","0","24","51","32","76","37","47","43","34","2","1" 11 | "0","0","0","34","62","57","23","42","40","62","0","28" 12 | "4","0","41","115","34","3","14","27","1","85","0","47" 13 | "8","2","30","15","77","18","1","32","0","4","2","139" -------------------------------------------------------------------------------- /confusion_matrices/conf_wisdm_watch.csv: -------------------------------------------------------------------------------- 1 | "0","1","2","3","4","5","6","7","8","9","10","11" 2 | "280","0","49","0","0","0","0","0","0","0","0","0" 3 | "1","287","0","0","0","0","0","0","0","0","4","4" 4 | "25","4","252","0","9","0","3","0","0","0","5","31" 5 | "0","0","0","171","38","54","0","25","34","23","0","0" 6 | "0","0","0","33","251","0","1","22","27","14","0","0" 7 | "0","0","0","48","2","211","0","0","1","36","0","0" 8 | "0","0","22","1","35","0","206","25","3","0","16","8" 9 | "1","0","2","12","11","6","15","116","146","9","0","22" 10 | "1","0","0","8","14","2","1","62","241","20","3","13" 11 | "0","0","0","43","11","15","0","20","0","240","0","0" 12 | "25","0","0","0","10","0","7","8","0","0","289","0" 13 | "1","0","36","0","0","0","1","0","3","0","1","292" -------------------------------------------------------------------------------- /datasets/emognition/download.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | import gdown 5 | 6 | # Define the shared Google Drive file URL 7 | FILE_ID = "1XUYMBP0p2VSJTppgE2BT87rr6MK0vagl" 8 | 9 | # Define the directory where you want to save the dataset 10 | SAVE_DIR = "./datasets/emognition" 11 | 12 | 13 | # Function to download the file from Google Drive 14 | def download_file_from_google_drive(file_id, save_dir): 15 | if not os.path.exists(save_dir): 16 | os.makedirs(save_dir) 17 | 18 | file_path = os.path.join(save_dir, "_study_data.zip") 19 | gdown.download(output=file_path, quiet=False, id=file_id) 20 | 21 | return file_path 22 | 23 | 24 | # Function to extract the dataset 25 | def extract_file(file_path, save_dir): 26 | with zipfile.ZipFile(file_path, "r") as zip_ref: 27 | zip_ref.extractall(save_dir) 28 | print(f"Extracted dataset to {save_dir}") 29 | 30 | 31 | # Main function to download and extract the WidarData.zip file 32 | def main(): 33 | file_path = download_file_from_google_drive(FILE_ID, SAVE_DIR) 34 | extract_file(file_path, SAVE_DIR) 35 | 36 | 37 | if __name__ == "__main__": 38 | main() 39 | -------------------------------------------------------------------------------- /datasets/epic_sounds/download.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | import gdown 5 | 6 | # Define the shared Google Drive file URL 7 | FILE_ID = "1BAaBIYqU6gZDyFqu9aW6spvpwpsEDZMS" 8 | 9 | # Define the directory where you want to save the dataset 10 | SAVE_DIR = "./datasets/epic_sounds" 11 | 12 | 13 | # Function to download the file from Google Drive 14 | def download_file_from_google_drive(file_id, save_dir): 15 | if not os.path.exists(save_dir): 16 | os.makedirs(save_dir) 17 | 18 | file_path = os.path.join(save_dir, "EPIC_audio.hdf5") 19 | gdown.download(output=file_path, quiet=False, id=file_id) 20 | 21 | return file_path 22 | 23 | 24 | # Main function to download and extract the WidarData.zip file 25 | def main(): 26 | download_file_from_google_drive(FILE_ID, SAVE_DIR) 27 | 28 | 29 | if __name__ == "__main__": 30 | main() 31 | -------------------------------------------------------------------------------- /datasets/epic_sounds/epic-kitchens-download-scripts-master/data/epic_55_splits.csv: -------------------------------------------------------------------------------- 1 | participant_id,video_id,split 2 | P01,P01_01,train 3 | P01,P01_02,train 4 | P01,P01_03,train 5 | P01,P01_04,train 6 | P01,P01_05,train 7 | P01,P01_06,train 8 | P01,P01_07,train 9 | P01,P01_08,train 10 | P01,P01_09,train 11 | P01,P01_10,train 12 | P01,P01_16,train 13 | P01,P01_17,train 14 | P01,P01_18,train 15 | P01,P01_19,train 16 | P02,P02_01,train 17 | P02,P02_02,train 18 | P02,P02_03,train 19 | P02,P02_04,train 20 | P02,P02_05,train 21 | P02,P02_06,train 22 | P02,P02_07,train 23 | P02,P02_08,train 24 | P02,P02_09,train 25 | P02,P02_10,train 26 | P02,P02_11,train 27 | P03,P03_02,train 28 | P03,P03_03,train 29 | P03,P03_04,train 30 | P03,P03_05,train 31 | P03,P03_06,train 32 | P03,P03_07,train 33 | P03,P03_08,train 34 | P03,P03_09,train 35 | P03,P03_10,train 36 | P03,P03_11,train 37 | P03,P03_12,train 38 | P03,P03_13,train 39 | P03,P03_14,train 40 | P03,P03_15,train 41 | P03,P03_16,train 42 | P03,P03_17,train 43 | P03,P03_18,train 44 | P03,P03_19,train 45 | P03,P03_20,train 46 | P03,P03_27,train 47 | P03,P03_28,train 48 | P04,P04_01,train 49 | P04,P04_02,train 50 | P04,P04_03,train 51 | P04,P04_04,train 52 | P04,P04_05,train 53 | P04,P04_06,train 54 | P04,P04_07,train 55 | P04,P04_08,train 56 | P04,P04_09,train 57 | P04,P04_10,train 58 | P04,P04_11,train 59 | P04,P04_12,train 60 | P04,P04_13,train 61 | P04,P04_14,train 62 | P04,P04_15,train 63 | P04,P04_16,train 64 | P04,P04_17,train 65 | P04,P04_18,train 66 | P04,P04_19,train 67 | P04,P04_20,train 68 | P04,P04_21,train 69 | P04,P04_22,train 70 | P04,P04_23,train 71 | P05,P05_01,train 72 | P05,P05_02,train 73 | P05,P05_03,train 74 | P05,P05_04,train 75 | P05,P05_05,train 76 | P05,P05_06,train 77 | P05,P05_08,train 78 | P06,P06_01,train 79 | P06,P06_02,train 80 | P06,P06_03,train 81 | P06,P06_05,train 82 | P06,P06_07,train 83 | P06,P06_08,train 84 | P06,P06_09,train 85 | P07,P07_01,train 86 | P07,P07_02,train 87 | P07,P07_03,train 88 | P07,P07_04,train 89 | P07,P07_05,train 90 | P07,P07_06,train 91 | P07,P07_07,train 92 | P07,P07_08,train 93 | P07,P07_09,train 94 | P07,P07_10,train 95 | P07,P07_11,train 96 | P08,P08_01,train 97 | P08,P08_02,train 98 | P08,P08_03,train 99 | P08,P08_04,train 100 | P08,P08_05,train 101 | P08,P08_06,train 102 | P08,P08_07,train 103 | P08,P08_08,train 104 | P08,P08_11,train 105 | P08,P08_12,train 106 | P08,P08_13,train 107 | P08,P08_18,train 108 | P08,P08_19,train 109 | P08,P08_20,train 110 | P08,P08_21,train 111 | P08,P08_22,train 112 | P08,P08_23,train 113 | P08,P08_24,train 114 | P08,P08_25,train 115 | P08,P08_26,train 116 | P08,P08_27,train 117 | P08,P08_28,train 118 | P10,P10_01,train 119 | P10,P10_02,train 120 | P10,P10_04,train 121 | P12,P12_01,train 122 | P12,P12_02,train 123 | P12,P12_04,train 124 | P12,P12_05,train 125 | P12,P12_06,train 126 | P12,P12_07,train 127 | P13,P13_04,train 128 | P13,P13_05,train 129 | P13,P13_06,train 130 | P13,P13_07,train 131 | P13,P13_08,train 132 | P13,P13_09,train 133 | P13,P13_10,train 134 | P14,P14_01,train 135 | P14,P14_02,train 136 | P14,P14_03,train 137 | P14,P14_04,train 138 | P14,P14_05,train 139 | P14,P14_07,train 140 | P14,P14_09,train 141 | P15,P15_01,train 142 | P15,P15_02,train 143 | P15,P15_03,train 144 | P15,P15_07,train 145 | P15,P15_08,train 146 | P15,P15_09,train 147 | P15,P15_10,train 148 | P15,P15_11,train 149 | P15,P15_12,train 150 | P15,P15_13,train 151 | P16,P16_01,train 152 | P16,P16_02,train 153 | P16,P16_03,train 154 | P17,P17_01,train 155 | P17,P17_03,train 156 | P17,P17_04,train 157 | P19,P19_01,train 158 | P19,P19_02,train 159 | P19,P19_03,train 160 | P19,P19_04,train 161 | P20,P20_01,train 162 | P20,P20_02,train 163 | P20,P20_03,train 164 | P20,P20_04,train 165 | P21,P21_01,train 166 | P21,P21_03,train 167 | P21,P21_04,train 168 | P22,P22_05,train 169 | P22,P22_06,train 170 | P22,P22_07,train 171 | P22,P22_08,train 172 | P22,P22_09,train 173 | P22,P22_10,train 174 | P22,P22_11,train 175 | P22,P22_12,train 176 | P22,P22_13,train 177 | P22,P22_14,train 178 | P22,P22_15,train 179 | P22,P22_16,train 180 | P22,P22_17,train 181 | P23,P23_01,train 182 | P23,P23_02,train 183 | P23,P23_03,train 184 | P23,P23_04,train 185 | P24,P24_01,train 186 | P24,P24_02,train 187 | P24,P24_03,train 188 | P24,P24_04,train 189 | P24,P24_05,train 190 | P24,P24_06,train 191 | P24,P24_07,train 192 | P24,P24_08,train 193 | P25,P25_01,train 194 | P25,P25_02,train 195 | P25,P25_03,train 196 | P25,P25_04,train 197 | P25,P25_05,train 198 | P25,P25_09,train 199 | P25,P25_10,train 200 | P25,P25_11,train 201 | P25,P25_12,train 202 | P26,P26_01,train 203 | P26,P26_02,train 204 | P26,P26_03,train 205 | P26,P26_04,train 206 | P26,P26_05,train 207 | P26,P26_06,train 208 | P26,P26_07,train 209 | P26,P26_08,train 210 | P26,P26_09,train 211 | P26,P26_10,train 212 | P26,P26_11,train 213 | P26,P26_12,train 214 | P26,P26_13,train 215 | P26,P26_14,train 216 | P26,P26_15,train 217 | P26,P26_16,train 218 | P26,P26_17,train 219 | P26,P26_18,train 220 | P26,P26_19,train 221 | P26,P26_20,train 222 | P26,P26_21,train 223 | P26,P26_22,train 224 | P26,P26_23,train 225 | P26,P26_24,train 226 | P26,P26_25,train 227 | P26,P26_26,train 228 | P26,P26_27,train 229 | P26,P26_28,train 230 | P26,P26_29,train 231 | P27,P27_01,train 232 | P27,P27_02,train 233 | P27,P27_03,train 234 | P27,P27_04,train 235 | P27,P27_06,train 236 | P27,P27_07,train 237 | P28,P28_01,train 238 | P28,P28_02,train 239 | P28,P28_03,train 240 | P28,P28_04,train 241 | P28,P28_05,train 242 | P28,P28_06,train 243 | P28,P28_07,train 244 | P28,P28_08,train 245 | P28,P28_09,train 246 | P28,P28_10,train 247 | P28,P28_11,train 248 | P28,P28_12,train 249 | P28,P28_13,train 250 | P28,P28_14,train 251 | P29,P29_01,train 252 | P29,P29_02,train 253 | P29,P29_03,train 254 | P29,P29_04,train 255 | P30,P30_01,train 256 | P30,P30_02,train 257 | P30,P30_03,train 258 | P30,P30_04,train 259 | P30,P30_05,train 260 | P30,P30_06,train 261 | P30,P30_10,train 262 | P30,P30_11,train 263 | P31,P31_01,train 264 | P31,P31_02,train 265 | P31,P31_03,train 266 | P31,P31_04,train 267 | P31,P31_05,train 268 | P31,P31_06,train 269 | P31,P31_07,train 270 | P31,P31_08,train 271 | P31,P31_09,train 272 | P31,P31_13,train 273 | P31,P31_14,train 274 | P01,P01_11,test 275 | P01,P01_12,test 276 | P01,P01_13,test 277 | P01,P01_14,test 278 | P01,P01_15,test 279 | P02,P02_12,test 280 | P02,P02_13,test 281 | P02,P02_14,test 282 | P02,P02_15,test 283 | P03,P03_21,test 284 | P03,P03_22,test 285 | P03,P03_23,test 286 | P03,P03_24,test 287 | P03,P03_25,test 288 | P03,P03_26,test 289 | P04,P04_24,test 290 | P04,P04_25,test 291 | P04,P04_26,test 292 | P04,P04_27,test 293 | P04,P04_28,test 294 | P04,P04_29,test 295 | P04,P04_30,test 296 | P04,P04_31,test 297 | P04,P04_32,test 298 | P04,P04_33,test 299 | P05,P05_07,test 300 | P05,P05_09,test 301 | P06,P06_10,test 302 | P06,P06_11,test 303 | P06,P06_12,test 304 | P06,P06_13,test 305 | P06,P06_14,test 306 | P07,P07_12,test 307 | P07,P07_13,test 308 | P07,P07_14,test 309 | P07,P07_15,test 310 | P07,P07_16,test 311 | P07,P07_17,test 312 | P07,P07_18,test 313 | P08,P08_09,test 314 | P08,P08_10,test 315 | P08,P08_14,test 316 | P08,P08_15,test 317 | P08,P08_16,test 318 | P08,P08_17,test 319 | P10,P10_03,test 320 | P12,P12_03,test 321 | P12,P12_08,test 322 | P13,P13_01,test 323 | P13,P13_02,test 324 | P13,P13_03,test 325 | P14,P14_06,test 326 | P14,P14_08,test 327 | P15,P15_04,test 328 | P15,P15_05,test 329 | P15,P15_06,test 330 | P16,P16_04,test 331 | P17,P17_02,test 332 | P19,P19_05,test 333 | P19,P19_06,test 334 | P20,P20_05,test 335 | P20,P20_06,test 336 | P20,P20_07,test 337 | P21,P21_02,test 338 | P22,P22_01,test 339 | P22,P22_02,test 340 | P22,P22_03,test 341 | P22,P22_04,test 342 | P23,P23_05,test 343 | P24,P24_09,test 344 | P25,P25_06,test 345 | P25,P25_07,test 346 | P25,P25_08,test 347 | P26,P26_30,test 348 | P26,P26_31,test 349 | P26,P26_32,test 350 | P26,P26_33,test 351 | P26,P26_34,test 352 | P26,P26_35,test 353 | P26,P26_36,test 354 | P26,P26_37,test 355 | P26,P26_38,test 356 | P26,P26_39,test 357 | P26,P26_40,test 358 | P26,P26_41,test 359 | P27,P27_05,test 360 | P28,P28_15,test 361 | P28,P28_16,test 362 | P28,P28_17,test 363 | P28,P28_18,test 364 | P28,P28_19,test 365 | P28,P28_20,test 366 | P28,P28_21,test 367 | P28,P28_22,test 368 | P28,P28_23,test 369 | P28,P28_24,test 370 | P28,P28_25,test 371 | P28,P28_26,test 372 | P29,P29_05,test 373 | P29,P29_06,test 374 | P30,P30_07,test 375 | P30,P30_08,test 376 | P30,P30_09,test 377 | P31,P31_10,test 378 | P31,P31_11,test 379 | P31,P31_12,test 380 | P09,P09_01,test 381 | P09,P09_02,test 382 | P09,P09_03,test 383 | P09,P09_04,test 384 | P09,P09_05,test 385 | P09,P09_06,test 386 | P09,P09_07,test 387 | P09,P09_08,test 388 | P11,P11_01,test 389 | P11,P11_02,test 390 | P11,P11_03,test 391 | P11,P11_04,test 392 | P11,P11_05,test 393 | P11,P11_06,test 394 | P11,P11_07,test 395 | P11,P11_08,test 396 | P11,P11_09,test 397 | P11,P11_10,test 398 | P11,P11_11,test 399 | P11,P11_12,test 400 | P11,P11_13,test 401 | P11,P11_14,test 402 | P11,P11_15,test 403 | P11,P11_16,test 404 | P11,P11_17,test 405 | P11,P11_18,test 406 | P11,P11_19,test 407 | P11,P11_20,test 408 | P11,P11_21,test 409 | P11,P11_22,test 410 | P11,P11_23,test 411 | P11,P11_24,test 412 | P18,P18_01,test 413 | P18,P18_02,test 414 | P18,P18_03,test 415 | P18,P18_04,test 416 | P18,P18_05,test 417 | P18,P18_06,test 418 | P18,P18_07,test 419 | P18,P18_08,test 420 | P18,P18_09,test 421 | P18,P18_10,test 422 | P18,P18_11,test 423 | P18,P18_12,test 424 | P32,P32_01,test 425 | P32,P32_02,test 426 | P32,P32_03,test 427 | P32,P32_04,test 428 | P32,P32_05,test 429 | P32,P32_06,test 430 | P32,P32_07,test 431 | P32,P32_08,test 432 | P32,P32_09,test 433 | P32,P32_10,test 434 | -------------------------------------------------------------------------------- /datasets/epic_sounds/epic-kitchens-download-scripts-master/data/errata.csv: -------------------------------------------------------------------------------- 1 | rdsf_path,dropbox_path 2 | P01/rgb_frames/P01_109.tar,https://www.dropbox.com/s/mh7y0goc5x945nu/P01_109.tar?dl=1 3 | P27/rgb_frames/P27_103.tar,https://www.dropbox.com/s/c1eo70v6dokr6cf/P27_103.tar?dl=1 4 | P01/flow_frames/P01_109.tar,https://www.dropbox.com/s/kdadnyf1epte0f1/P01_109.tar?dl=1 5 | P27/flow_frames/P27_103.tar,https://www.dropbox.com/s/48kiyqarqfmb2bk/P27_103.tar?dl=1 6 | hand-objects/P01/P01_109.pkl,https://www.dropbox.com/s/fjvhpd4o9l2n08y/P01_109.pkl?dl=1 7 | hand-objects/P27/P27_103.pkl,https://www.dropbox.com/s/ocvy4fskv9j8xmt/P27_103.pkl?dl=1 8 | masks/P01/P01_109.pkl,https://www.dropbox.com/s/mzhguzwsyjxbh9e/P01_109.pkl?dl=1 9 | masks/P27/P27_103.pkl,https://www.dropbox.com/s/zc15u7qlm3hqn0c/P27_103.pkl?dl=1 -------------------------------------------------------------------------------- /datasets/epic_sounds/epic-kitchens-download-scripts-master/download_extension_only.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python epic_downloader.py --extension_only -------------------------------------------------------------------------------- /datasets/epic_sounds/epic-kitchens-download-scripts-master/download_full_epic.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python epic_downloader.py -------------------------------------------------------------------------------- /datasets/epic_sounds/epic-sounds-annotations-main/EPIC_Sounds_recognition_test_timestamps.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/datasets/epic_sounds/epic-sounds-annotations-main/EPIC_Sounds_recognition_test_timestamps.pkl -------------------------------------------------------------------------------- /datasets/epic_sounds/epic-sounds-annotations-main/EPIC_Sounds_train.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/datasets/epic_sounds/epic-sounds-annotations-main/EPIC_Sounds_train.pkl -------------------------------------------------------------------------------- /datasets/epic_sounds/epic-sounds-annotations-main/EPIC_Sounds_validation.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/datasets/epic_sounds/epic-sounds-annotations-main/EPIC_Sounds_validation.pkl -------------------------------------------------------------------------------- /datasets/epic_sounds/epic-sounds-annotations-main/sound_events_not_categorised.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/datasets/epic_sounds/epic-sounds-annotations-main/sound_events_not_categorised.pkl -------------------------------------------------------------------------------- /datasets/ut_har/download.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | import gdown 5 | from process import process 6 | 7 | # Define the shared Google Drive file URL 8 | FILE_ID = "1fEiI3nAoOsddR5qcJQXqz4ocM3aMAcwz" 9 | 10 | # Define the directory where you want to save the dataset 11 | SAVE_DIR = "./datasets/ut_har" 12 | 13 | 14 | # Function to download the file from Google Drive 15 | def download_file_from_google_drive(file_id, save_dir): 16 | if not os.path.exists(save_dir): 17 | os.makedirs(save_dir) 18 | 19 | file_path = os.path.join(save_dir, "UT_HAR.zip") 20 | gdown.download(output=file_path, quiet=False, id=file_id) 21 | 22 | return file_path 23 | 24 | 25 | # Function to extract the dataset 26 | def extract_file(file_path, save_dir): 27 | with zipfile.ZipFile(file_path, "r") as zip_ref: 28 | zip_ref.extractall(save_dir) 29 | print(f"Extracted dataset to {save_dir}") 30 | 31 | 32 | # Main function to download and extract the WidarData.zip file 33 | def main(): 34 | file_path = download_file_from_google_drive(FILE_ID, SAVE_DIR) 35 | extract_file(file_path, SAVE_DIR) 36 | process(SAVE_DIR) 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /datasets/ut_har/process.py: -------------------------------------------------------------------------------- 1 | import glob 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def process(root_dir='.'): 8 | data_list = glob.glob(root_dir + '/UT_HAR/data/*.csv') 9 | label_list = glob.glob(root_dir + '/UT_HAR/label/*.csv') 10 | print(data_list, label_list) 11 | WiFi_data = {} 12 | for data_dir in data_list: 13 | data_name = data_dir.split('/')[-1].split('.')[0] 14 | with open(data_dir, 'rb') as f: 15 | data = np.load(f) 16 | data = data.reshape(len(data), 1, 250, 90) 17 | data_norm = (data - np.min(data)) / (np.max(data) - np.min(data)) 18 | WiFi_data[data_name] = torch.Tensor(data_norm) 19 | for label_dir in label_list: 20 | label_name = label_dir.split('/')[-1].split('.')[0] 21 | with open(label_dir, 'rb') as f: 22 | label = np.load(f) 23 | WiFi_data[label_name] = torch.Tensor(label) 24 | return WiFi_data 25 | 26 | 27 | if __name__ == '__main__': 28 | data = process() 29 | for k, v in data.items(): 30 | print(k, v.shape) 31 | -------------------------------------------------------------------------------- /datasets/visdrone/clusterer.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import torch 6 | import torch 7 | import torchvision 8 | import torchvision.models as models 9 | import torchvision.transforms as transforms 10 | from PIL import Image 11 | from pycocotools import coco 12 | from torch.utils.data import Dataset, DataLoader 13 | from tqdm import tqdm 14 | 15 | transformations = transforms.Compose([ 16 | transforms.Resize(256), 17 | transforms.CenterCrop(224), 18 | transforms.ToTensor(), 19 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 20 | ]) 21 | model = models.resnet50(pretrained=True) 22 | 23 | 24 | def extract_imagenet_features(img_path, transform=transformations, model=model): 25 | # Load the pre-trained ResNet50 model 26 | 27 | # Set the model to evaluation mode 28 | model.eval() 29 | 30 | # Define the image pre-processing transforms 31 | 32 | # Load the image and apply the pre-processing transforms 33 | img = Image.open(img_path) 34 | img_tensor = transform(img).unsqueeze(0) 35 | 36 | # Extract features from the image using the model 37 | with torch.no_grad(): 38 | features = model(img_tensor) 39 | 40 | # Flatten the features tensor 41 | flattened_features = features.flatten() 42 | 43 | return flattened_features 44 | 45 | 46 | class VisDroneDataset(Dataset): 47 | def __init__(self, data_dir, transform=None): 48 | # self.coco = coco.COCO(ann_dir) 49 | self.data_dir = data_dir 50 | self.images = [x for x in os.listdir(data_dir) if '.jpg' in x] 51 | self.transform = transform 52 | # self.images = os.listdir(os.path.join(data_dir, 'images')) 53 | # self.annotations = os.listdir(os.path.join(data_dir, 'annotations')) 54 | 55 | def __len__(self): 56 | return len(self.images) 57 | 58 | def __getitem__(self, idx): 59 | # Load the image 60 | # image = Image.open(os.path.join(self.data_dir, self.coco.loadImgs(self.images[idx])[0]['file_name'])) 61 | image = extract_imagenet_features(os.path.join(self.data_dir, self.images[idx])) 62 | if self.transform: 63 | image = self.transform(image) 64 | 65 | # Load the annotations 66 | # with open(os.path.join(self.data_dir, 'annotations', self.annotations[idx]), 'r') as f: 67 | # annotations = f.readlines() 68 | 69 | # Parse the annotations 70 | # boxes = [] 71 | # labels = [] 72 | # for annotation in annotations: 73 | # xmin, ymin, xmax, ymax, label = annotation.strip().split(',') 74 | # boxes.append([int(xmin), int(ymin), int(xmax), int(ymax)]) 75 | # labels.append(int(label)) 76 | 77 | # Convert the annotations to tensors 78 | # boxes = torch.as_tensor(boxes, dtype=torch.float32) 79 | # labels = torch.as_tensor(labels, dtype=torch.int64) 80 | 81 | return image, self.images[idx] # boxes, labels 82 | 83 | 84 | # Define the transformations to be applied to the images 85 | transformations = transforms.Compose([ 86 | transforms.Resize(256), 87 | transforms.CenterCrop(224), 88 | transforms.ToTensor(), 89 | transforms.Normalize(mean=[0.485, 0.456, 0.406], 90 | std=[0.229, 0.224, 0.225]) 91 | ]) 92 | 93 | dataset = VisDroneDataset(data_dir='train/images', 94 | transform=None) 95 | 96 | # Load a pretrained ResNet-50 model 97 | model = torchvision.models.resnet50(pretrained=True) 98 | model.eval() 99 | 100 | # Extract features for each image in the dataset 101 | ids = [] 102 | features = [] 103 | for i, (image_features, img_id) in tqdm(enumerate(dataset), total=len(dataset)): 104 | with torch.no_grad(): 105 | feature = image_features.numpy() 106 | ids.append(img_id) 107 | features.append(feature) 108 | 109 | # Convert the features to a numpy array 110 | features = np.array(features) 111 | 112 | # Perform K-means clustering on the features to cluster the images into 100 clusters 113 | from sklearn.cluster import KMeans, DBSCAN 114 | 115 | kmeans = KMeans(n_clusters=10).fit(features, ) 116 | df = pd.DataFrame({'image_id': ids, 'cluster': kmeans.labels_}) 117 | df.to_csv('split.csv') 118 | print(df.groupby('cluster').count()) 119 | clusters = kmeans.labels_ 120 | -------------------------------------------------------------------------------- /datasets/visdrone/download.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import zipfile 4 | from pathlib import Path 5 | 6 | import PIL.Image as Image 7 | import gdown 8 | import requests 9 | from tqdm import tqdm 10 | 11 | # Define the VisDrone dataset URLs 12 | DATASET_URLS = [ 13 | "https://downloads.visdrone.org/data2018/VisDrone2018-DET-train.zip", 14 | "https://downloads.visdrone.org/data2018/VisDrone2018-DET-val.zip", 15 | "https://downloads.visdrone.org/data2018/VisDrone2018-DET-test-challenge.zip" 16 | ] 17 | 18 | FILE_IDs = [ 19 | ('1i8iZ-zYBgWwzX9355HIYrWM1uKeqWW0S', 'VisDrone2019-DET-train.zip', 'train'), 20 | ('1qJKZdv2jEv2c7SfEdMwWR3KOyj_mfhBN', 'VisDrone2019-DET-val.zip', 'val'), 21 | ('1nTC4cqNqT_IJ7EIH28i9YTVGNFq5WgqL', 'VisDrone2019-DET-test-dev.zip', 'test') 22 | ] 23 | 24 | FOLODER_SPLITS = [ 25 | ('VisDrone2019-DET-train', 'train'), 26 | ('VisDrone2019-DET-val', 'val'), 27 | ('VisDrone2018-DET-test-dev', 'test') 28 | ] 29 | 30 | # Define the directory where you want to save the dataset 31 | SAVE_DIR = "./datasets/visdrone" 32 | 33 | 34 | def convert_visdrone_to_yolo_format() -> None: 35 | """ 36 | Convert VisDrone dataset to YOLOv5 format. 37 | """ 38 | visdrone_folder = Path(SAVE_DIR) 39 | 40 | for folder, split in FOLODER_SPLITS: 41 | images_folder = visdrone_folder / f"{folder}/images" 42 | annotations_folder = visdrone_folder / f"{folder}/annotations" 43 | 44 | output_images_folder = visdrone_folder / f"{split}/images" 45 | output_labels_folder = visdrone_folder / f"{split}/labels" 46 | 47 | output_images_folder.mkdir(parents=True, exist_ok=True) 48 | output_labels_folder.mkdir(parents=True, exist_ok=True) 49 | 50 | for annotation_file in tqdm(annotations_folder.glob("*.txt")): 51 | image_file = images_folder / f"{annotation_file.stem}.jpg" 52 | 53 | if image_file.exists(): 54 | # Copy image file 55 | shutil.copy(image_file, output_images_folder / image_file.name) 56 | img = Image.open(image_file).convert("RGB") 57 | # Convert and save label file 58 | with open(annotation_file) as f: 59 | lines = f.readlines() 60 | 61 | with open(output_labels_folder / annotation_file.name, "w") as f: 62 | for line in lines: 63 | items = line.strip().split(",") 64 | 65 | # Calculate normalized values required by YOLOv5 66 | # class_id, x_center, y_center, width, height 67 | 68 | class_id = int(items[5]) 69 | x_center = (int(items[0]) + int(items[2]) / 2) / img.width 70 | y_center = (int(items[1]) + int(items[3]) / 2) / img.height 71 | width = int(items[2]) / img.width 72 | height = int(items[3]) / img.height 73 | 74 | f.write(f"{class_id} {x_center} {y_center} {width} {height}\n") 75 | 76 | 77 | # Function to download the dataset 78 | def download_dataset(url, save_dir): 79 | if not os.path.exists(save_dir): 80 | os.makedirs(save_dir) 81 | 82 | response = requests.get(url, stream=True) 83 | file_size = int(response.headers.get("Content-Length", 0)) 84 | filename = os.path.join(save_dir, url.split("/")[-1]) 85 | 86 | with open(filename, "wb") as f: 87 | for data in response.iter_content(chunk_size=1024): 88 | f.write(data) 89 | 90 | print(f"Downloaded {filename}") 91 | 92 | return filename 93 | 94 | 95 | def download_file_from_google_drive(file_id, save_dir, filename): 96 | if not os.path.exists(save_dir): 97 | os.makedirs(save_dir) 98 | file_path = os.path.join(save_dir, filename) 99 | gdown.download(output=file_path, quiet=False, id=file_id) 100 | return file_path 101 | 102 | 103 | # Function to extract the dataset 104 | def extract_dataset(file_path, save_dir): 105 | with zipfile.ZipFile(file_path, "r") as zip_ref: 106 | zip_ref.extractall(save_dir) 107 | 108 | print(f"Extracted dataset to {save_dir}") 109 | 110 | 111 | # Main function to download and extract the VisDrone dataset 112 | def main(): 113 | s_dir = SAVE_DIR 114 | for file_id, filename, split in FILE_IDs: 115 | file_path = download_file_from_google_drive(file_id=file_id, 116 | save_dir=SAVE_DIR, 117 | filename=filename) 118 | if 'test' in file_path: 119 | s_dir = f'{SAVE_DIR}/VisDrone2018-DET-test-dev' 120 | Path(s_dir).mkdir(exist_ok=True) 121 | extract_dataset(file_path, s_dir) 122 | print(file_path) 123 | convert_visdrone_to_yolo_format() 124 | 125 | 126 | if __name__ == "__main__": 127 | main() 128 | -------------------------------------------------------------------------------- /datasets/widar/download.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import zipfile 4 | from pathlib import Path 5 | 6 | import gdown 7 | import numpy as np 8 | import torch 9 | from tqdm import tqdm 10 | 11 | # Define the shared Google Drive file URL 12 | FILE_ID = "14vp4D8W0X2bDLpXnpP-U_VT9PIGkVf_4" 13 | 14 | # Define the directory where you want to save the dataset 15 | SAVE_DIR = "./datasets/widar" 16 | 17 | 18 | # Function to download the file from Google Drive 19 | def download_file_from_google_drive(file_id, save_dir): 20 | if not os.path.exists(save_dir): 21 | os.makedirs(save_dir) 22 | 23 | file_path = os.path.join(save_dir, "Widardata.zip") 24 | gdown.download(output=file_path, quiet=False, id=file_id) 25 | 26 | return file_path 27 | 28 | 29 | # Function to extract the dataset 30 | def extract_file(file_path, save_dir): 31 | with zipfile.ZipFile(file_path, "r") as zip_ref: 32 | zip_ref.extractall(save_dir) 33 | print(f"Extracted dataset to {save_dir}") 34 | 35 | 36 | def process(): 37 | files = glob.glob('./datasets/widar/Widardata/*/*/*.csv') 38 | data = {} 39 | for file in tqdm(files): 40 | y = int(file.split('/')[-2].split('-')[0]) - 1 41 | assert y >= 0, 'y is negative' 42 | user = int(file.split('/')[-1].split('-')[0].replace('user', '')) 43 | if user not in data.keys(): 44 | data[user] = {'X': [], 'Y': []} 45 | x = np.genfromtxt(file, delimiter=',') 46 | data[user]['X'].append(x) 47 | data[user]['Y'].append(y) 48 | Path('./datasets/widar/federated').mkdir(exist_ok=True) 49 | for user in data.keys(): 50 | X = np.concatenate(np.expand_dims(np.array(data[user]['X']), 0)) 51 | Y = np.array(data[user]['Y']) 52 | print(f'{user}_data.pkl') 53 | print(X.shape, Y.shape) 54 | torch.save((X, Y), f'./datasets/widar/federated/{user}.pkl') 55 | 56 | 57 | # Main function to download and extract the WidarData.zip file 58 | def main(): 59 | file_path = download_file_from_google_drive(FILE_ID, SAVE_DIR) 60 | extract_file(file_path, SAVE_DIR) 61 | process() 62 | 63 | 64 | if __name__ == "__main__": 65 | main() 66 | -------------------------------------------------------------------------------- /datasets/wisdm/activity_key.txt: -------------------------------------------------------------------------------- 1 | walking = A 2 | jogging = B 3 | stairs = C 4 | sitting = D 5 | standing = E 6 | typing = F 7 | teeth = G 8 | soup = H 9 | chips = I 10 | pasta = J 11 | drinking = K 12 | sandwich = L 13 | kicking = M 14 | catch = O 15 | dribbling = P 16 | writing = Q 17 | clapping = R 18 | folding = S 19 | -------------------------------------------------------------------------------- /datasets/wisdm/activity_key_filtered.txt: -------------------------------------------------------------------------------- 1 | name,code,fcode 2 | walking,A,0 3 | jogging,B,1 4 | stairs,C,2 5 | sitting,D,3 6 | standing,E,4 7 | typing,F,5 8 | teeth,G,6 9 | drinking,K,8 10 | eating,L,7 11 | writing,Q,9 12 | clapping,R,10 13 | folding,S,11 14 | 15 | -------------------------------------------------------------------------------- /datasets/wisdm/download.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | import requests 5 | 6 | # Define the URL for the dataset 7 | WISDM_URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/00507/wisdm-dataset.zip" 8 | 9 | # Define the directory where you want to save the dataset 10 | SAVE_DIR = "datasets/wisdm/" 11 | 12 | 13 | # Function to download the dataset 14 | def download_wisdm_dataset(url, save_dir='./datasets/wisdm/'): 15 | if not os.path.exists(save_dir): 16 | os.makedirs(save_dir) 17 | 18 | response = requests.get(url, stream=True) 19 | file_size = int(response.headers.get("Content-Length", 0)) 20 | filename = os.path.join(save_dir, url.split("/")[-1]) 21 | 22 | with open(filename, "wb") as f: 23 | for data in response.iter_content(chunk_size=1024): 24 | f.write(data) 25 | 26 | print(f"Downloaded {filename}") 27 | 28 | return filename 29 | 30 | 31 | # Function to extract the dataset 32 | def extract_wisdm_dataset(file_path, save_dir): 33 | with zipfile.ZipFile(file_path, "r") as zip_ref: 34 | zip_ref.extractall(save_dir) 35 | 36 | print(f"Extracted dataset to {save_dir}") 37 | 38 | 39 | # Main function to download and extract the WISDM dataset 40 | def main(): 41 | file_path = download_wisdm_dataset(WISDM_URL, SAVE_DIR) 42 | extract_wisdm_dataset(file_path, SAVE_DIR) 43 | 44 | 45 | if __name__ == "__main__": 46 | main() 47 | -------------------------------------------------------------------------------- /figures/datasets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/figures/datasets.png -------------------------------------------------------------------------------- /figures/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/figures/overview.png -------------------------------------------------------------------------------- /figures/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/FedAIoT/be10b0f8533f99c934061b879ba5ec486b59a874/figures/pipeline.png -------------------------------------------------------------------------------- /loaders/casas.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import numpy as np 4 | import torch 5 | from sklearn.model_selection import train_test_split 6 | from torch.utils.data import TensorDataset 7 | 8 | 9 | def load_dataset(datasetName='all'): 10 | X = np.load('./datasets/casas/npy/' + datasetName + '-x.npy') 11 | Y = np.load('./datasets/casas/npy/' + datasetName + '-y.npy') 12 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42) 13 | print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape) 14 | X_tensor = torch.from_numpy(X.astype(int)) 15 | y_tensor = torch.from_numpy(Y.astype(int)) 16 | 17 | X_tensor_train = torch.from_numpy(X_train.astype(int)) 18 | y_tensor_train = torch.from_numpy(Y_train.astype(int)) 19 | 20 | X_tensor_test = torch.from_numpy(X_test.astype(int)) 21 | y_tensor_test = torch.from_numpy(Y_test.astype(int)) 22 | # Create a PyTorch Dataset using TensorDataset 23 | dataset = TensorDataset(X_tensor, y_tensor) 24 | train_dataset = TensorDataset(X_tensor_train, y_tensor_train) 25 | test_dataset = TensorDataset(X_tensor_test, y_tensor_test) 26 | dataset.targets = y_tensor 27 | train_dataset.targets = y_tensor_train 28 | test_dataset.targets = y_tensor_test 29 | data_dict = { 30 | 'full_dataset': dataset, 31 | 'train': train_dataset, 32 | 'test': test_dataset 33 | } 34 | return data_dict 35 | 36 | 37 | if __name__ == '__main__': 38 | dt = load_dataset() 39 | print(len(dt['train'])) 40 | print(dt['train'][0][0].shape) -------------------------------------------------------------------------------- /loaders/cifar10.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | 4 | import altair as alt 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import pandas as pd 8 | import torch 9 | import torchvision 10 | from torch.utils.data import Dataset 11 | from torchvision.transforms import transforms 12 | 13 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" 14 | 15 | 16 | def load_dataset(): 17 | transform = transforms.Compose( 18 | [transforms.ToTensor(), 19 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 20 | batch_size = 4 21 | trainset = torchvision.datasets.CIFAR10(root='../datasets/cifar10', train=True, 22 | download=True, transform=transform) 23 | testset = torchvision.datasets.CIFAR10(root='../datasets/cifar10', train=False, 24 | download=True, transform=transform) 25 | classes = ('plane', 'car', 'bird', 'cat', 26 | 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') 27 | return {'train': trainset, 'test': testset, 'label_names': classes} 28 | 29 | 30 | def compute_client_data_distribution(datasets: List[Dataset], num_classes: int): 31 | class_distribution = [] 32 | data_distribution = [] 33 | 34 | for i in range(len(datasets)): 35 | class_counts = torch.zeros(num_classes) 36 | for j in range(len(datasets[i].targets)): 37 | class_counts[datasets[i].targets[j]] += 1 38 | class_counts = class_counts.numpy() 39 | data_distribution.append(np.sum(class_counts)) 40 | class_counts = class_counts / np.sum(class_counts) 41 | class_distribution.append(class_counts) 42 | return data_distribution, class_distribution 43 | 44 | 45 | def visualize_client_data_distribution(datasets: List[Dataset], num_clients: int, num_classes: int): 46 | data_distribution, class_distribution = compute_client_data_distribution(datasets, num_classes) 47 | 48 | # create a heatmap of the data distribution for each client 49 | fig, ax = plt.subplots() 50 | im = ax.imshow(np.array(class_distribution).T, cmap='YlGn') 51 | 52 | # add text annotations for each cell 53 | for i in range(len(class_distribution[0])): 54 | for j in range(len(class_distribution)): 55 | text = ax.text(j, i, class_distribution[j][i], ha="center", va="center", color="black") 56 | 57 | # add colorbar 58 | cbar = ax.figure.colorbar(im, ax=ax) 59 | 60 | # set tick labels and axis labels 61 | plt.xticks(fontsize=5) 62 | plt.yticks(fontsize=5) 63 | ax.set_xticks(np.arange(len(class_distribution))) 64 | ax.set_yticks(np.arange(len(class_distribution[0]))) 65 | ax.set_xticklabels([f"{i}" if i % 10 == 0 else '' for i in range(len(class_distribution))]) 66 | ax.set_yticklabels([f"{i}" for i in range(len(class_distribution[0]))]) 67 | ax.set_xlabel("Client") 68 | ax.set_ylabel("Class") 69 | ax.set_title("Class Distribution of Clients") 70 | 71 | plt.show() 72 | 73 | fig, ax = plt.subplots() 74 | ax.bar(range(num_clients), data_distribution) 75 | ax.set_xlabel("Client") 76 | ax.set_ylabel("Data Samples") 77 | ax.set_title("Sample Distribution of Clients") 78 | plt.show() 79 | plt.savefig("sample_distribution_matplotlib.png") 80 | 81 | 82 | def vis_data_distribution_altair(data_distribution, class_distribution): 83 | data = [] 84 | num_clients = len(data_distribution) 85 | for i in range(len(class_distribution[0])): 86 | for j in range(len(class_distribution)): 87 | data.append({"client": j, "class": i, "value": class_distribution[j][i]}) 88 | 89 | heatmap = ( 90 | alt.Chart(pd.DataFrame(data)) 91 | .mark_rect() 92 | .encode( 93 | x=alt.X("client:N", title="Client"), 94 | y=alt.Y("class:N", title="Class"), 95 | color=alt.Color("value:Q", scale=alt.Scale(scheme="yellowgreenblue"), 96 | legend=alt.Legend(title="Percentage of Samples")), 97 | tooltip="value:Q", 98 | ) 99 | .properties( 100 | title=alt.TitleParams( 101 | "Class Distribution of Clients", 102 | fontSize=12, 103 | ), 104 | # width=200, 105 | # height=120, 106 | ) 107 | ) 108 | 109 | text = ( 110 | alt.Chart(pd.DataFrame(data)) 111 | .mark_text() 112 | .encode( 113 | x=alt.X("client:N"), 114 | y=alt.Y("class:N"), 115 | text=alt.Text("value:Q", format=".2f", ), 116 | color=alt.condition( 117 | alt.datum.value > 0.5, alt.value("black"), alt.value("white") 118 | ), 119 | ) 120 | .transform_filter((alt.datum.value > 0.01)) 121 | ) 122 | 123 | data_bar = ( 124 | alt.Chart(pd.DataFrame({"client": range(num_clients), "value": data_distribution})) 125 | .mark_bar() 126 | .encode( 127 | x=alt.X("client:N", title="Client", axis=alt.Axis(labelFontSize=8)), 128 | y=alt.Y("value:Q", title="Data Samples", axis=alt.Axis(labelFontSize=8)), 129 | tooltip="value:Q", 130 | ) 131 | .properties( 132 | title=alt.TitleParams( 133 | "Sample Distribution of Clients", 134 | fontSize=12, 135 | ), 136 | # width=200, 137 | # height=120, 138 | ) 139 | ) 140 | 141 | return alt.vconcat(heatmap + text, data_bar) 142 | 143 | -------------------------------------------------------------------------------- /loaders/clusterer.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import torch 6 | import torch 7 | import torchvision 8 | import torchvision.models as models 9 | import torchvision.transforms as transforms 10 | from PIL import Image 11 | # from pycocotools import coco 12 | from torch.utils.data import Dataset, DataLoader 13 | from tqdm import tqdm 14 | 15 | 16 | model = models.resnet50(pretrained=True) 17 | 18 | transformations = transforms.Compose([ 19 | transforms.Resize(256), 20 | transforms.CenterCrop(224), 21 | transforms.ToTensor(), 22 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 23 | ]) 24 | def extract_imagenet_features(img_path, transform=transformations, model=model): 25 | # Load the pre-trained ResNet50 model 26 | 27 | # Set the model to evaluation mode 28 | model.eval() 29 | 30 | # Define the image pre-processing transforms 31 | 32 | # Load the image and apply the pre-processing transforms 33 | img = Image.open(img_path) 34 | # img = np.array(img)/255.0 35 | # img = torch.from_numpy(img).float() 36 | img_tensor = transform(img) 37 | img_tensor = img_tensor.unsqueeze(0) 38 | 39 | # Extract features from the image using the model 40 | with torch.no_grad(): 41 | features = model(img_tensor) 42 | 43 | # Flatten the features tensor 44 | flattened_features = features.flatten() 45 | 46 | return flattened_features 47 | 48 | 49 | class VisDroneDataset(Dataset): 50 | def __init__(self, data_dir='./datasets/visdrone/yolo_format/train', transform=None): 51 | self.data_dir = data_dir 52 | self.transform = transform 53 | self.images = os.listdir(os.path.join(data_dir, 'images')) 54 | 55 | def __len__(self): 56 | return len(self.images) 57 | 58 | def __getitem__(self, idx): 59 | # Load the image 60 | image = extract_imagenet_features(img_path=os.path.join( 61 | os.path.join(self.data_dir, 'images' 62 | ), 63 | self.images[idx]), transform=self.transform) 64 | # if self.transform: 65 | # image = self.transform(image) 66 | 67 | return image, self.images[idx] 68 | 69 | 70 | # Define the transformations to be applied to the images 71 | transformations = transforms.Compose([ 72 | transforms.Resize(256), 73 | transforms.CenterCrop(224), 74 | transforms.ToTensor(), 75 | transforms.Normalize(mean=[0.485, 0.456, 0.406], 76 | std=[0.229, 0.224, 0.225]) 77 | ]) 78 | 79 | dataset = VisDroneDataset(transform=transformations) 80 | 81 | # Load a pretrained ResNet-50 model 82 | model = torchvision.models.resnet50(pretrained=True) 83 | model.eval() 84 | 85 | # Extract features for each image in the dataset 86 | ids = [] 87 | features = [] 88 | for i, (image_features, img_id) in tqdm(enumerate(dataset), total=len(dataset)): 89 | with torch.no_grad(): 90 | feature = image_features.numpy() 91 | ids.append(img_id) 92 | features.append(feature) 93 | 94 | # Convert the features to a numpy array 95 | features = np.array(features) 96 | 97 | # Perform K-means clustering on the features to cluster the images into 100 clusters 98 | from sklearn.cluster import KMeans, DBSCAN 99 | 100 | kmeans = KMeans(n_clusters=100).fit(features, ) 101 | df = pd.DataFrame({'image_id': ids, 'cluster': kmeans.labels_}) 102 | df.to_csv('split.csv') 103 | print(df.groupby('cluster').count()) 104 | clusters = kmeans.labels_ 105 | -------------------------------------------------------------------------------- /loaders/energy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import numpy as np 3 | import pandas as pd 4 | import torch 5 | from scipy.stats import pearsonr 6 | from sklearn.base import BaseEstimator, TransformerMixin 7 | from sklearn.decomposition import PCA 8 | from sklearn.metrics import r2_score 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.pipeline import Pipeline 11 | from sklearn.preprocessing import StandardScaler 12 | from torch.utils.data import Dataset, DataLoader 13 | from torchmetrics import Metric, R2Score 14 | 15 | 16 | def digitize_values(values, a, b, num_bins): 17 | # Sort the values 18 | values = np.sort(values) 19 | 20 | # Determine the indices that will divide values into num_bins equal parts 21 | indices = np.linspace(0, len(values), num_bins + 1, endpoint=False, dtype=int) 22 | 23 | # Create bins using these indices 24 | bins = [values[indices[i]:indices[i + 1]] for i in range(num_bins)] 25 | 26 | # Now 'bins' is a list of arrays, where each array is a bin 27 | # containing approximately the same number of samples. 28 | 29 | # If you want to assign each original value to a bin index: 30 | digitized_values = np.zeros_like(values, dtype=np.int32) 31 | for i, b in enumerate(bins): 32 | digitized_values[np.isin(values, b)] = i 33 | 34 | return digitized_values 35 | 36 | class HandleOutliers(BaseEstimator, TransformerMixin): 37 | def __init__(self): 38 | return None 39 | 40 | def fit(self, X, y=None): 41 | ''' 42 | Description : It notes the 90 and 10 percentile of each features in the dataframe. 43 | So that we can impute the outliers with the value of noted percentile. 44 | Parameters: 45 | X : Dataframe which you want to note percentile. 46 | y : It is not required. 47 | ''' 48 | outlier_estimator_dict = {} 49 | for col in X.columns: 50 | upper_bound = np.percentile(X[col], 90) 51 | lower_bound = np.percentile(X[col], 10) 52 | outlier_estimator_dict[col] = { 53 | "upper_bound": upper_bound, 54 | "lower_bound": lower_bound} 55 | self.outlier_estimator_dict = outlier_estimator_dict 56 | return self 57 | 58 | def transform(self, X, y=None): 59 | ''' 60 | Description : It replaces the outliers with the noted percentile value of respective column 61 | Parameters: 62 | X : Dataframe you want to replace outliers. 63 | Returns : A Dataframe with removed outliers. 64 | ''' 65 | for col in X.columns: 66 | col_dict = self.outlier_estimator_dict[col] 67 | X[col] = np.where(X[col] > col_dict['upper_bound'], col_dict['upper_bound'], X[col]) 68 | X[col] = np.where(X[col] < col_dict['lower_bound'], col_dict['lower_bound'], X[col]) 69 | 70 | self.final_column_names = X.columns 71 | return X 72 | 73 | 74 | class AddPcaFeatures(BaseEstimator, TransformerMixin): 75 | def __init__(self, number_of_pca_columns=None): 76 | ''' 77 | Parameters : 78 | number_of_pca_columns :(Int) Number of final dimension you want. 79 | ''' 80 | self.number_of_pca_columns = number_of_pca_columns 81 | return None 82 | 83 | def fit(self, X, y=None): 84 | ''' 85 | Description : It fits the data in the PCA algorithm 86 | Parameters: 87 | X : Dataframe which fits the PCA algorithm 88 | ''' 89 | if self.number_of_pca_columns != None: 90 | self.pca = PCA(n_components=self.number_of_pca_columns) 91 | self.pca.fit(X) 92 | return self 93 | 94 | def transform(self, X, y=None): 95 | ''' 96 | Parameters : 97 | X : Dataframe you want to reduce the dimension 98 | Returns : A Dataframe with the pca features along concatinated with the input Dataframe. 99 | ''' 100 | if self.number_of_pca_columns != None: 101 | pca_column_names = [f'pca_{val}' for val in range(1, self.number_of_pca_columns + 1)] 102 | pca_features = self.pca.transform(X) 103 | pca_features = pd.DataFrame(pca_features, columns=pca_column_names, index=X.index) 104 | X = pd.concat([X, pca_features], axis=1) 105 | 106 | return X 107 | 108 | 109 | class AddCentralTendencyFeatures(BaseEstimator, TransformerMixin): 110 | def __init__(self, measure): 111 | ''' 112 | Parameters : 113 | measure : 'mean' or 'median' depend on which features you want to add. 114 | ''' 115 | self.measure = measure 116 | return None 117 | 118 | def fit(self, X, y=None): 119 | return self 120 | 121 | def transform(self, X, y=None): 122 | ''' 123 | Description : Adds either mean or median columns of a temperature and humidity column for each observation. 124 | Parameter : Dataframe which you want to calculate 125 | Returns : Input Dataframe concatinated with the calculated features. 126 | ''' 127 | if self.measure.lower() == 'mean': 128 | X['avg_house_temp'] = X[[col for col in X.columns if (('t' in col) and (len(col) < 3))]].mean(axis=1) 129 | X['avg_humidity_percentage'] = X[[col for col in X.columns if (('rh_' in col) and (len(col) < 5))]].mean( 130 | axis=1) 131 | 132 | else: 133 | X['med_house_temp'] = X[[col for col in X.columns if (('t' in col) and (len(col) < 3))]].median(axis=1) 134 | X['med_humidity_percentage'] = X[[col for col in X.columns if (('rh_' in col) and (len(col) < 5))]].median( 135 | axis=1) 136 | 137 | return X 138 | 139 | 140 | class AddDateFeatures(BaseEstimator, TransformerMixin): 141 | def __init__(self): 142 | return None 143 | 144 | def fit(self, X, y=None): 145 | return self 146 | 147 | def transform(self, X, y=None): 148 | X['day'] = X.date.dt.day 149 | X['month'] = X.date.dt.month 150 | return X.drop('date', axis=1) 151 | 152 | 153 | class RemoveCorrelatedFeatures(BaseEstimator, TransformerMixin): 154 | def __init__(self): 155 | return None 156 | 157 | def fit(self, X, y): 158 | ''' 159 | Description : Remove correlated features with less correlation with target 160 | X : Dataframe with only features 161 | y : Target Series 162 | ''' 163 | col_corr = set() 164 | corr_matrix = X.corr() 165 | 166 | for i in range(len(corr_matrix.columns)): 167 | for j in range(i): 168 | if abs(corr_matrix.iloc[i, j]) > 0.85: 169 | corr_i, _ = pearsonr(y, X.iloc[:, i]) 170 | corr_j, _ = pearsonr(y, X.iloc[:, j]) 171 | if abs(corr_i) < abs(corr_j): 172 | colname = corr_matrix.columns[i] 173 | col_corr.add(colname) 174 | else: 175 | colname = corr_matrix.columns[i] 176 | col_corr.add(colname) 177 | 178 | self.correlated_columns = col_corr 179 | self.final_column_names = set(X.columns) - self.correlated_columns 180 | return self 181 | 182 | def transform(self, X, y=None): 183 | ''' 184 | Parameter : The Dataframe you want to remove correlated features 185 | Returns : Dataframe by removing the correlated features. 186 | ''' 187 | return X.drop(self.correlated_columns, axis=1) 188 | 189 | 190 | class ApplyTransformation(BaseEstimator, TransformerMixin): 191 | def __init__(self): 192 | return None 193 | 194 | def fit(self, X, y=None): 195 | return self 196 | 197 | def transform(self, X, y=None): 198 | X[['t9', 'rv1', 'rv2', 'windspeed']] = np.log1p(X[['t9', 'rv1', 'rv2', 'windspeed']]) 199 | X['visibility'] = np.where(X['visibility'] > 40, 1, 0) 200 | return X 201 | 202 | 203 | class EnergyDataset(Dataset): 204 | def __init__(self, features, labels): 205 | self.features = features 206 | self.labels = labels 207 | self.targets = digitize_values(labels, np.min(labels), np.max(labels), 10) 208 | 209 | def __len__(self): 210 | return len(self.labels) 211 | 212 | def __getitem__(self, idx): 213 | return torch.tensor(self.features[idx], dtype=torch.float), torch.tensor(self.labels[idx], dtype=torch.float) 214 | 215 | 216 | def load_dataset(split=0.2, seed=42): 217 | df = pd.read_csv('datasets/energy/energydata_complete.csv') 218 | df['date'] = pd.to_datetime(df['date']) 219 | df.set_index(df.date.copy(deep=True), inplace=True) 220 | 221 | # Preprocess the data 222 | # Split the data into features and target 223 | df.columns = [col.lower() for col in df.columns] 224 | X = df.drop('appliances', axis=1) 225 | y = df['appliances'] 226 | 227 | # Split the dataset into train and test datasets 228 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=seed) 229 | 230 | # Scale the features to have zero mean and unit variance 231 | preprocessing_pipeline = Pipeline([ 232 | ('transformation', ApplyTransformation()), 233 | ('remove_outliers', HandleOutliers()), 234 | ('add_central_tendency_features', AddCentralTendencyFeatures(measure='mean')), 235 | ('add_Date_Features', AddDateFeatures()), 236 | ('add_pca_features', AddPcaFeatures(number_of_pca_columns=3)), 237 | ('remove_correlated_features', RemoveCorrelatedFeatures()), 238 | ('standard_scalar', StandardScaler()) 239 | ]) 240 | # min_y = min(y_train) 241 | # max_y = max(y_train) 242 | # y_train = (y_train - min_y) / max_y 243 | # y_test = (y_test - min_y) / max_y 244 | y_train = np.log(y_train) 245 | y_test = np.log(y_test) 246 | X_train = preprocessing_pipeline.fit_transform(X_train, y_train) 247 | X_test = preprocessing_pipeline.transform(X_test) 248 | train_data = EnergyDataset(X_train, y_train) 249 | test_data = EnergyDataset(X_test, y_test) 250 | return { 251 | 'train': train_data, 252 | 'test': test_data, 253 | } 254 | 255 | 256 | if __name__ == '__main__': 257 | dt = load_dataset() 258 | print(len(dt['train'])) 259 | print(dt['train'][0][0].shape) -------------------------------------------------------------------------------- /loaders/pack_audio.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def temporal_sampling(spectrogram, start_idx, end_idx, num_samples): 8 | """ 9 | Given the start and end frame index, sample num_samples frames between 10 | the start and end with equal interval. 11 | Args: 12 | frames (tensor): a tensor of video frames, dimension is 13 | `num video frames` x `channel` x `height` x `width`. 14 | start_idx (int): the index of the start frame. 15 | end_idx (int): the index of the end frame. 16 | num_samples (int): number of frames to sample. 17 | Returns: 18 | frames (tersor): a tensor of temporal sampled video frames, dimension is 19 | `num clip frames` x `channel` x `height` x `width`. 20 | """ 21 | index = torch.linspace(0, spectrogram.shape[1] - 1, num_samples).long() 22 | spectrogram = torch.index_select(spectrogram, 1, index) 23 | return spectrogram 24 | 25 | 26 | def get_start_end_idx(audio_size, clip_size, clip_idx, num_clips, start_sample=0): 27 | """ 28 | Sample a clip of size clip_size from a video of size video_size and 29 | return the indices of the first and last frame of the clip. If clip_idx is 30 | -1, the clip is randomly sampled, otherwise uniformly split the video to 31 | num_clips clips, and select the start and end index of clip_idx-th video 32 | clip. 33 | Args: 34 | audio_size (int): number of overall frames. 35 | clip_size (int): size of the clip to sample from the frames. 36 | clip_idx (int): if clip_idx is -1, perform random jitter sampling. If 37 | clip_idx is larger than -1, uniformly split the video to num_clips 38 | clips, and select the start and end index of the clip_idx-th video 39 | clip. 40 | num_clips (int): overall number of clips to uniformly sample from the 41 | given video for testing. 42 | Returns: 43 | start_idx (int): the start frame index. 44 | end_idx (int): the end frame index. 45 | """ 46 | delta = max(audio_size - clip_size, 0) 47 | if clip_idx == -1: 48 | # Random temporal sampling. 49 | start_idx = random.uniform(0, delta) 50 | else: 51 | # Uniformly sample the clip with the given index. 52 | start_idx = np.linspace(0, delta, num=num_clips)[clip_idx] 53 | end_idx = start_idx + clip_size - 1 54 | return start_sample + start_idx, start_sample + end_idx 55 | 56 | 57 | def pack_audio(audio_dataset, video_record, temporal_sample_index, sampling_rate=24000, clip_secs=1.999, n_ensemble=5): 58 | samples = audio_dataset[video_record.video_id][()] 59 | start_idx, end_idx = get_start_end_idx( 60 | video_record.num_audio_samples, 61 | int(round(sampling_rate * clip_secs)), 62 | temporal_sample_index, 63 | n_ensemble, 64 | start_sample=video_record.start_audio_sample 65 | ) 66 | spectrogram = _extract_sound_feature( 67 | samples, 68 | video_record, 69 | int(start_idx), 70 | int(end_idx), 71 | clip_secs 72 | ) 73 | return spectrogram 74 | 75 | 76 | def _log_specgram( 77 | audio, 78 | window_size=10, 79 | step_size=5, 80 | eps=1e-6, 81 | sampling_rate=24000 82 | ): 83 | nperseg = int(round(window_size * sampling_rate / 1e3)) 84 | noverlap = int(round(step_size * sampling_rate / 1e3)) 85 | from librosa import stft, filters 86 | 87 | # Mel-Spectrogram 88 | spec = stft( 89 | audio, 90 | n_fft=2048, 91 | window='hann', 92 | hop_length=noverlap, 93 | win_length=nperseg, 94 | pad_mode='constant' 95 | ) 96 | mel_basis = filters.mel( 97 | sr=sampling_rate, 98 | n_fft=2048, 99 | n_mels=128, 100 | htk=True, 101 | norm=None 102 | ) 103 | mel_spec = np.dot(mel_basis, np.abs(spec)) 104 | 105 | # Log-Mel-Spectrogram 106 | log_mel_spec = np.log(mel_spec + eps) 107 | return log_mel_spec.T 108 | 109 | 110 | def _extract_sound_feature(samples, video_record, start_idx, end_idx, clip_duration, sampling_rate=24000): 111 | if video_record.num_audio_samples < int(round(sampling_rate * clip_duration)): 112 | samples = samples[video_record.start_audio_sample:video_record.end_audio_sample] 113 | else: 114 | samples = samples[start_idx:end_idx] 115 | spectrogram = _log_specgram(samples, 116 | window_size=10, 117 | step_size=5 118 | ) 119 | if spectrogram.shape[0] < 400: 120 | num_timesteps_to_pad = 400 - spectrogram.shape[0] 121 | spectrogram = np.pad(spectrogram, ((0, num_timesteps_to_pad), (0, 0)), 'edge') 122 | return torch.tensor(spectrogram).unsqueeze(0) 123 | -------------------------------------------------------------------------------- /loaders/spatial_transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from PIL import Image 4 | from torchvision.transforms import functional as F 5 | from torchvision.transforms import transforms 6 | 7 | 8 | class Compose(transforms.Compose): 9 | 10 | def randomize_parameters(self): 11 | for t in self.transforms: 12 | t.randomize_parameters() 13 | 14 | 15 | class ToTensor(transforms.ToTensor): 16 | 17 | def randomize_parameters(self): 18 | pass 19 | 20 | 21 | class Normalize(transforms.Normalize): 22 | 23 | def randomize_parameters(self): 24 | pass 25 | 26 | # 27 | # class ScaleValue(object): 28 | # 29 | # def __init__(self, s): 30 | # self.s = s 31 | # 32 | # def __call__(self, tensor): 33 | # tensor *= self.s 34 | # return tensor 35 | # 36 | # def randomize_parameters(self): 37 | # pass 38 | # 39 | # 40 | # class Resize(transforms.Resize): 41 | # 42 | # def randomize_parameters(self): 43 | # pass 44 | # 45 | # 46 | # class Scale(transforms.Scale): 47 | # 48 | # def randomize_parameters(self): 49 | # pass 50 | # 51 | # 52 | # class CenterCrop(transforms.CenterCrop): 53 | # 54 | # def randomize_parameters(self): 55 | # pass 56 | 57 | 58 | class CornerCrop(object): 59 | 60 | def __init__(self, 61 | size, 62 | crop_position=None, 63 | crop_positions=['c', 'tl', 'tr', 'bl', 'br']): 64 | self.size = size 65 | self.crop_position = crop_position 66 | self.crop_positions = crop_positions 67 | 68 | if crop_position is None: 69 | self.randomize = True 70 | else: 71 | self.randomize = False 72 | self.randomize_parameters() 73 | 74 | def __call__(self, img): 75 | image_width = img.size[0] 76 | image_height = img.size[1] 77 | 78 | h, w = (self.size, self.size) 79 | if self.crop_position == 'c': 80 | i = int(round((image_height - h) / 2.)) 81 | j = int(round((image_width - w) / 2.)) 82 | elif self.crop_position == 'tl': 83 | i = 0 84 | j = 0 85 | elif self.crop_position == 'tr': 86 | i = 0 87 | j = image_width - self.size 88 | elif self.crop_position == 'bl': 89 | i = image_height - self.size 90 | j = 0 91 | elif self.crop_position == 'br': 92 | i = image_height - self.size 93 | j = image_width - self.size 94 | 95 | img = F.crop(img, i, j, h, w) 96 | 97 | return img 98 | 99 | def randomize_parameters(self): 100 | if self.randomize: 101 | self.crop_position = self.crop_positions[random.randint( 102 | 0, 103 | len(self.crop_positions) - 1)] 104 | 105 | def __repr__(self): 106 | return self.__class__.__name__ + '(size={0}, crop_position={1}, randomize={2})'.format( 107 | self.size, self.crop_position, self.randomize) 108 | 109 | 110 | class RandomHorizontalFlip(transforms.RandomHorizontalFlip): 111 | 112 | def __init__(self, p=0.5): 113 | super().__init__(p) 114 | self.randomize_parameters() 115 | 116 | def __call__(self, img): 117 | """ 118 | Args: 119 | img (PIL.Image): Image to be flipped. 120 | Returns: 121 | PIL.Image: Randomly flipped image. 122 | """ 123 | if self.random_p < self.p: 124 | return F.hflip(img) 125 | return img 126 | 127 | def randomize_parameters(self): 128 | self.random_p = random.random() 129 | 130 | 131 | class MultiScaleCornerCrop(object): 132 | 133 | def __init__(self, 134 | size, 135 | scales, 136 | crop_positions=['c', 'tl', 'tr', 'bl', 'br'], 137 | interpolation=Image.BILINEAR): 138 | self.size = size 139 | self.scales = scales 140 | self.interpolation = interpolation 141 | self.crop_positions = crop_positions 142 | 143 | self.randomize_parameters() 144 | 145 | def __call__(self, img): 146 | short_side = min(img.size[0], img.size[1]) 147 | crop_size = int(short_side * self.scale) 148 | self.corner_crop.size = crop_size 149 | 150 | img = self.corner_crop(img) 151 | return img.resize((self.size, self.size), self.interpolation) 152 | 153 | def randomize_parameters(self): 154 | self.scale = self.scales[random.randint(0, len(self.scales) - 1)] 155 | crop_position = self.crop_positions[random.randint( 156 | 0, 157 | len(self.crop_positions) - 1)] 158 | 159 | self.corner_crop = CornerCrop(None, crop_position) 160 | 161 | def __repr__(self): 162 | return self.__class__.__name__ + '(size={0}, scales={1}, interpolation={2})'.format( 163 | self.size, self.scales, self.interpolation) 164 | 165 | 166 | class RandomResizedCrop(transforms.RandomResizedCrop): 167 | 168 | def __init__(self, 169 | size, 170 | scale=(0.08, 1.0), 171 | ratio=(3. / 4., 4. / 3.), 172 | interpolation=Image.BILINEAR): 173 | super().__init__(size, scale, ratio, interpolation) 174 | self.randomize_parameters() 175 | 176 | def __call__(self, img): 177 | if self.randomize: 178 | self.random_crop = self.get_params(img, self.scale, self.ratio) 179 | self.randomize = False 180 | 181 | i, j, h, w = self.random_crop 182 | return F.resized_crop(img, i, j, h, w, self.size, self.interpolation) 183 | 184 | def randomize_parameters(self): 185 | self.randomize = True 186 | 187 | 188 | class ColorJitter(transforms.ColorJitter): 189 | 190 | def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): 191 | super().__init__(brightness, contrast, saturation, hue) 192 | self.randomize_parameters() 193 | 194 | def __call__(self, img): 195 | if self.randomize: 196 | self.transform = self.get_params(self.brightness, self.contrast, 197 | self.saturation, self.hue) 198 | self.randomize = False 199 | 200 | return self.transform(img) 201 | 202 | def randomize_parameters(self): 203 | self.randomize = True 204 | 205 | 206 | class PickFirstChannels(object): 207 | 208 | def __init__(self, n): 209 | self.n = n 210 | 211 | def __call__(self, tensor): 212 | return tensor[:self.n, :, :] 213 | 214 | def randomize_parameters(self): 215 | pass 216 | -------------------------------------------------------------------------------- /loaders/ut_har.py: -------------------------------------------------------------------------------- 1 | import glob 2 | 3 | import numpy as np 4 | import torch 5 | from torch.utils.data import Dataset 6 | from torch.utils.data.dataset import T_co 7 | 8 | 9 | class UTHarDataset(Dataset): 10 | def __init__(self, data: np.array, label: np.array): 11 | self.data = data 12 | self.targets = label 13 | 14 | def __len__(self) -> int: 15 | return len(self.data) 16 | 17 | def __getitem__(self, index) -> T_co: 18 | return self.data[index, :, :, :], int(self.targets[index]) 19 | 20 | 21 | def load_dataset(root_dir='./datasets/ut_har'): 22 | data_list = glob.glob(root_dir + '/UT_HAR/data/*.csv') 23 | label_list = glob.glob(root_dir + '/UT_HAR/label/*.csv') 24 | ut_har_data = {} 25 | for data_dir in data_list: 26 | data_name = data_dir.split('/')[-1].split('.')[0] 27 | with open(data_dir, 'rb') as f: 28 | data = np.load(f) 29 | data = data.reshape(len(data), 1, 250, 90) 30 | data_norm = (data - np.min(data)) / (np.max(data) - np.min(data)) 31 | ut_har_data[data_name] = torch.Tensor(data_norm) 32 | for label_dir in label_list: 33 | label_name = label_dir.split('/')[-1].split('.')[0] 34 | with open(label_dir, 'rb') as f: 35 | label = np.load(f) 36 | ut_har_data[label_name] = torch.Tensor(label) 37 | return { 38 | 'train': UTHarDataset(ut_har_data['X_train'], ut_har_data['y_train']), 39 | 'val': UTHarDataset(ut_har_data['X_val'], ut_har_data['y_val']), 40 | 'test': UTHarDataset(ut_har_data['X_test'], ut_har_data['y_val']), 41 | } 42 | 43 | 44 | if __name__ == '__main__': 45 | dataset = load_dataset() 46 | print(len(dataset['train'])) 47 | print(dataset['train'][0][0].shape) 48 | -------------------------------------------------------------------------------- /loaders/visdrone.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from pathlib import Path 3 | from typing import List, Tuple, Dict, Any, Union 4 | 5 | import pandas as pd 6 | import torch 7 | from torch.utils.data import Dataset 8 | from tqdm import tqdm 9 | from ultralytics.yolo.data.dataset import YOLODataset 10 | from ultralytics.yolo.data.dataloaders.v5loader import LoadImagesAndLabels 11 | 12 | from loaders.utils import ParameterDict 13 | 14 | YOLO_HYPERPARAMETERS = { 15 | 'lr0': 0.01, 16 | 'lrf': 0.01, 17 | 'momentum': 0.937, 18 | 'weight_decay': 0.0005, 19 | 'warmup_epochs': 3.0, 20 | 'warmup_momentum': 0.8, 21 | 'warmup_bias_lr': 0.1, 22 | 'box': 7.5, 23 | 'cls': 0.5, 24 | 'dfl': 1.5, 25 | 'fl_gamma': 0.0, 26 | 'label_smoothing': 0.0, 27 | 'nbs': 64, 28 | 'hsv_h': 0.015, 29 | 'hsv_s': 0.7, 30 | 'hsv_v': 0.4, 31 | 'degrees': 0.0, 32 | 'translate': 0.1, 33 | 'scale': 0.5, 34 | 'shear': 0.0, 35 | 'perspective': 0.0, 36 | 'flipud': 0.0, 37 | 'fliplr': 0.5, 38 | 'mosaic': 1.0, 39 | 'mixup': 0.0, 40 | 'copy_paste': 0.0, 41 | 'mask_ratio': 0.0, 42 | 'overlap_mask': 0.0, 43 | 'conf': 0.25, 44 | 'iou': 0.45, 45 | 'max_det': 1000, 46 | 'plots': False, 47 | 'half': False, # use half precision (FP16) 48 | 'dnn': False, 49 | 'data': None, 50 | 'imgsz': 640, 51 | 'verbose': False 52 | } 53 | YOLO_HYPERPARAMETERS = ParameterDict(YOLO_HYPERPARAMETERS) 54 | NAMES = ('pedestrian', 'person', 'car', 'van', 'bus', 'truck', 'motor', 'bicycle', 'awning-tricycle', 'tricycle', 55 | 'block', 'car_group') 56 | 57 | 58 | class VisDroneDataset(Dataset): 59 | """ 60 | A PyTorch Dataset class for the VisDrone dataset. 61 | """ 62 | 63 | def __init__(self, root: str, hyp: Dict[str, Any], augment: bool = True): 64 | """ 65 | Initialize the dataset. 66 | 67 | Args: 68 | root (str): Path to the root directory of the dataset. 69 | hyp (Dict[str, Any]): Hyperparameters dictionary. 70 | augment (bool, optional): Whether to apply data augmentation. Defaults to True. 71 | """ 72 | self.root = root 73 | self.dataset = LoadImagesAndLabels( 74 | path=root, 75 | augment=augment, 76 | hyp=hyp, 77 | # rect=True 78 | ) 79 | 80 | def __getitem__(self, index: int) -> Tuple[torch.Tensor, Any]: 81 | """ 82 | Get an item from the dataset. 83 | 84 | Args: 85 | index (int): Index of the item. 86 | 87 | Returns: 88 | Tuple[torch.Tensor, Any]: A tuple containing the image tensor and the label. 89 | """ 90 | dt = self.dataset[index] 91 | return dt[0].float() / 255.0, dt[1] 92 | 93 | def __len__(self) -> int: 94 | """ 95 | Get the length of the dataset. 96 | 97 | Returns: 98 | int: The number of items in the dataset. 99 | """ 100 | return len(self.dataset) 101 | 102 | 103 | def collate_fn(batch: List[Tuple[torch.Tensor, torch.Tensor, str, Tuple[int, int]]]) \ 104 | -> Tuple[torch.Tensor, torch.Tensor, List[str], Tuple[Tuple[int, int], ...]]: 105 | """ 106 | Custom collate function for DataLoader. 107 | 108 | Args: 109 | batch (List[Tuple[torch.Tensor, torch.Tensor, str, Tuple[int, int]]]): List of tuples, each containing an image tensor, label tensor, image path, and a tuple of image dimensions. 110 | 111 | Returns: 112 | Tuple[torch.Tensor, torch.Tensor, List[str], Tuple[Tuple[int, int], ...]]: A tuple containing stacked image tensors, concatenated label tensors, list of image paths, and a tuple of image dimensions. 113 | """ 114 | im, label, path, shapes = zip(*batch) # transposed 115 | for i, lb in enumerate(label): 116 | lb[:, 0] = i # add target image index for build_targets() 117 | return torch.stack(im, 0).float(), torch.cat(label, 0), path, shapes 118 | 119 | 120 | def load_dataset(root: str = "datasets/visdrone", 121 | augment: bool = True, 122 | hyp: Dict[str, Any] = YOLO_HYPERPARAMETERS) \ 123 | -> Dict[str, Union[YOLODataset, Dict[str, List[int]], Dict[str, Dict[int, List[int]]]]]: 124 | """ 125 | Load the VisDrone dataset with YOLO format. 126 | 127 | Args: 128 | root (str, optional): Path to the root directory of the dataset. Defaults to "datasets/visdrone/yolo_format". 129 | augment (bool, optional): Whether to apply data augmentation. Defaults to False. 130 | hyp (Dict[str, Any], optional): Hyperparameters dictionary. Defaults to YOLO_HYPERPARAMETERS. 131 | 132 | Returns: 133 | Dict[str, Union[YOLODataset, Dict[str, List[int]], Dict[str, Dict[int, List[int]]]]]: A dictionary containing train, val, and test datasets, client_mapping, and split information. 134 | """ 135 | print(f"Loading VisDrone dataset from {os.path.join(root, 'train')}...") 136 | dataset_train = YOLODataset( 137 | img_path=os.path.join(root, 'train'), 138 | hyp=hyp, 139 | augment=augment, 140 | names=['pedestrian', 'person', 'car', 'van', 'bus', 'truck', 'motor', 'bicycle', 'awning-tricycle', 'tricycle', 141 | 'block', 'car_group'], 142 | ) 143 | 144 | dataset_val = YOLODataset( 145 | img_path=os.path.join(root, 'val'), 146 | hyp=hyp, 147 | augment=False, 148 | names=['pedestrian', 'person', 'car', 'van', 'bus', 'truck', 'motor', 'bicycle', 'awning-tricycle', 'tricycle', 149 | 'block', 'car_group'] 150 | ) 151 | 152 | dataset_test = YOLODataset( 153 | img_path=os.path.join(root, 'test'), 154 | hyp=hyp, 155 | augment=False, 156 | names=['pedestrian', 'person', 'car', 'van', 'bus', 'truck', 'motor', 'bicycle', 'awning-tricycle', 'tricycle', 157 | 'block', 'car_group'] 158 | ) 159 | 160 | df = pd.read_csv(f'{root}/split.csv', index_col='image_id') 161 | targets = [] 162 | for i, d in tqdm(enumerate(dataset_train)): 163 | p = dataset_train[i]['im_file'].split('/')[-1] 164 | c = df.loc[p]['cluster'] 165 | targets.append(c) 166 | if not Path('visdrone_client_mapping.pt').exists(): 167 | client_mapping = {k: [] for k in range(100)} 168 | for i, d in tqdm(enumerate(dataset_train)): 169 | p = dataset_train[i]['im_file'].split('/')[-1] 170 | c = df.loc[p]['cluster'] 171 | client_mapping[c].append(i) 172 | torch.save(client_mapping, 'visdrone_client_mapping.pt') 173 | dataset_train.targets = targets 174 | client_mapping = torch.load('visdrone_client_mapping.pt') 175 | return { 176 | 'train': dataset_train, 177 | 'val': dataset_val, 178 | 'test': dataset_test, 179 | 'client_mapping': None, 180 | 'split': {'train': client_mapping} 181 | } 182 | 183 | 184 | if __name__ == "__main__": 185 | # visdrone_folder = "../datasets/visdrone" 186 | # output_folder = "../datasets/visdrone/yolo_format" 187 | # 188 | # convert_visdrone_to_yolo_format(visdrone_folder, output_folder) 189 | load_dataset('../datasets/visdrone') 190 | -------------------------------------------------------------------------------- /loaders/widar.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | from typing import List, Tuple, Dict 4 | 5 | import numpy as np 6 | import torch 7 | from torch.utils.data import Dataset 8 | 9 | from partition.utils import train_test_split, make_split 10 | 11 | 12 | class WidarDataset(Dataset): 13 | def __init__(self, data: List[Tuple[np.ndarray, int]]): 14 | """ 15 | Initialize the WidarDataset class. 16 | 17 | Args: 18 | data (List[Tuple[np.ndarray, int]]): List of tuples containing input data and corresponding labels. 19 | """ 20 | self.data = data 21 | self.targets = [d[1] for d in data] 22 | 23 | def __len__(self) -> int: 24 | """ 25 | Return the length of the dataset. 26 | 27 | Returns: 28 | int: Length of the dataset. 29 | """ 30 | return len(self.data) 31 | 32 | def __getitem__(self, idx: int) -> Tuple[np.ndarray, int]: 33 | """ 34 | Get an item from the dataset by index. 35 | 36 | Args: 37 | idx (int): Index of the desired data. 38 | 39 | Returns: 40 | Tuple[np.ndarray, int]: A tuple containing the input data and corresponding label. 41 | """ 42 | return self.data[idx][0].reshape(22, 20, 20), self.data[idx][1] 43 | 44 | 45 | def map_array(my_array: np.ndarray, mapping_dict: Dict[int, int]) -> np.ndarray: 46 | """ 47 | Map values in a NumPy array based on a provided mapping dictionary. 48 | 49 | Args: 50 | my_array (np.ndarray): Input NumPy array to be mapped. 51 | mapping_dict (Dict[int, int]): Dictionary containing the mapping of input values to output values. 52 | 53 | Returns: 54 | np.ndarray: Mapped NumPy array. 55 | """ 56 | mapping_func = np.vectorize(lambda x: mapping_dict.get(x, x)) 57 | mapped_array = mapping_func(my_array) 58 | return mapped_array 59 | 60 | 61 | def filter_data(datum: Tuple[np.ndarray, List[int]], selected_classes: List[int]) -> Tuple[np.ndarray, np.ndarray]: 62 | """ 63 | Filter input data and labels based on the selected classes. 64 | 65 | Args: 66 | datum (Tuple[np.ndarray, List[int]]): Tuple containing input data and corresponding labels. 67 | selected_classes (List[int]): List of selected classes to filter. 68 | 69 | Returns: 70 | Tuple[np.ndarray, np.ndarray]: Tuple containing filtered input data and corresponding labels. 71 | """ 72 | input_data = datum[0] 73 | input_labels = np.array(datum[1]) 74 | replace_classes = {v: k for k, v in enumerate(selected_classes)} 75 | mask = np.isin(input_labels, selected_classes) 76 | filtered_array = input_data[mask, :, :] 77 | filtered_classes = input_labels[mask] 78 | filtered_classes = map_array(filtered_classes, replace_classes) 79 | 80 | return filtered_array, filtered_classes 81 | 82 | 83 | def split_dataset(data: List[Tuple[np.ndarray, int]], 84 | client_mapping_train: Dict[int, List[int]], 85 | client_mapping_test: Dict[int, List[int]]) \ 86 | -> Tuple[WidarDataset, WidarDataset, Dict[str, Dict[int, List[int]]]]: 87 | """ 88 | Split the dataset into train and test sets based on the client mappings. 89 | 90 | Args: 91 | data (List[Tuple[np.ndarray, int]]): The input dataset as a list of tuples containing input data and corresponding labels. 92 | client_mapping_train (Dict[int, List[int]]): A dictionary containing the client indices for the training set. 93 | client_mapping_test (Dict[int, List[int]]): A dictionary containing the client indices for the test set. 94 | 95 | Returns: 96 | Tuple[WidarDataset, WidarDataset, Dict[str, Dict[int, List[int]]]]: A tuple containing the train and test WidarDatasets, and a dictionary with train and test mappings. 97 | """ 98 | all_train, mapping_train = make_split(client_mapping_train) 99 | all_test, mapping_test = make_split(client_mapping_test) 100 | 101 | train_data = [data[i] for i in all_train] 102 | test_data = [data[i] for i in all_test] 103 | return WidarDataset(train_data), WidarDataset(test_data), {'train': mapping_train, 'test': mapping_test} 104 | 105 | 106 | def load_dataset(split=[x for x in list(range(0, 17)) if x not in [0, 1, 2, 3, 15]], 107 | selected_classes=[0, 3, 7, 10, 12, 14, 15, 16, 19], 108 | reprocess=False): 109 | """ 110 | Load and preprocess the Widar dataset. 111 | 112 | Args: 113 | split (List[int], optional): List of client indices to include in the training set. Defaults to [x for x in list(range(0, 16)) if x not in [0, 1, 2, 3, 15]]. 114 | selected_classes (List[int], optional): List of selected classes to filter. Defaults to [0, 3, 7, 10, 12, 14, 15, 16, 19]. 115 | reprocess (bool, optional): Whether to reprocess the dataset or use existing preprocessed data. Defaults to False. 116 | 117 | Returns: 118 | Dict[str, Union[WidarDataset, Dict[int, List[int]]]]: Dictionary containing the full_dataset, train and test datasets, client_mapping, and split information. 119 | """ 120 | path = 'datasets/widar/federated' 121 | 122 | data = os.listdir(path) 123 | dtt = [] 124 | for i in data: 125 | if i.endswith('.pkl'): 126 | try: 127 | with open(f'{path}/{i}', 'rb') as f: 128 | dtt.append(torch.load(f)) 129 | except pickle.UnpicklingError as e: 130 | print(f'Error loading {i}') 131 | data = dtt 132 | data.sort(key=lambda x: len(x[-1])) 133 | data = [filter_data(d, selected_classes) for d in data] 134 | all_users = list(range(0, len(data))) 135 | cl_idx = {} 136 | i = 0 137 | for j in all_users: 138 | d = data[j] 139 | cl_idx[j] = list(range(i, i + len(d[0]))) 140 | i += len(d[0]) 141 | 142 | x = [d[0] for d in data] 143 | x = np.concatenate(x, axis=0, dtype=np.float32) 144 | x = (x - .0025) / .0119 145 | y = np.concatenate([d[1] for d in data]) 146 | data = [(x[i], y[i]) for i in range(len(x))] 147 | dataset = WidarDataset(data) 148 | data = [dataset[i] for i in range(len(dataset))] 149 | client_mapping_train, client_mapping_test = train_test_split(cl_idx, split) 150 | train_dataset, test_dataset, split = split_dataset(data, client_mapping_train, client_mapping_test) 151 | data_dict = { 152 | 'full_dataset': dataset, 153 | 'train': train_dataset, 154 | 'test': test_dataset, 155 | 'client_mapping': cl_idx, 156 | 'split': split 157 | } 158 | return data_dict 159 | 160 | 161 | if __name__ == '__main__': 162 | dt = load_dataset() 163 | print(len(dt['train'])) -------------------------------------------------------------------------------- /models/casas.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import loaders.casas 3 | import torch.nn as nn 4 | 5 | class LSTMModel(nn.Module): 6 | def __init__(self, input_dim, output_dim, no_activities): 7 | super(LSTMModel, self).__init__() 8 | self.embedding = nn.Embedding(input_dim, output_dim, padding_idx=0) 9 | self.lstm = nn.LSTM(output_dim, output_dim, batch_first=True) 10 | self.fc = nn.Linear(output_dim, no_activities) 11 | # self.fc = nn.Sequential(nn.Flatten(), 12 | # nn.Dropout(0.2), 13 | # nn.Linear(output_dim, output_dim), 14 | # nn.ReLU(), 15 | # nn.Dropout(0.2), 16 | # nn.Linear(64, no_activities)) 17 | def forward(self, x): 18 | print(x.shape) 19 | x = self.embedding(x) 20 | print(x.shape) 21 | x, _ = self.lstm(x) 22 | print(x.shape) 23 | x = self.fc(x[:, -1, :]) 24 | return x 25 | class BiLSTMModel(nn.Module): 26 | def __init__(self, input_dim=2000, output_dim=64, max_length=2000, no_activities=12): 27 | super(BiLSTMModel, self).__init__() 28 | self.embedding = nn.Embedding(input_dim, output_dim, padding_idx=0) 29 | self.lstm = nn.LSTM(output_dim, output_dim, bidirectional=True, batch_first=True) 30 | self.fc = nn.Linear(output_dim * 2, no_activities) 31 | 32 | def forward(self, x): 33 | x = self.embedding(x.type(torch.long)) 34 | x, _ = self.lstm(x) 35 | x = self.fc(x[:, -1, :]) 36 | return x 37 | 38 | class Ensemble2LSTMModel(nn.Module): 39 | def __init__(self, input_dim, output_dim, max_length, no_activities): 40 | super(Ensemble2LSTMModel, self).__init__() 41 | self.model1 = BiLSTMModel(input_dim, output_dim, max_length, no_activities) 42 | self.model2 = LSTMModel(input_dim, output_dim, max_length, no_activities) 43 | self.fc = nn.Linear(output_dim * 2, no_activities) 44 | 45 | def forward(self, x): 46 | x1 = self.model1(x) 47 | x2 = self.model2(x) 48 | x = torch.cat((x1, x2), dim=1) 49 | x = self.fc(x) 50 | return x 51 | 52 | class CascadeEnsembleLSTMModel(nn.Module): 53 | def __init__(self, input_dim, output_dim, max_length, no_activities): 54 | super(CascadeEnsembleLSTMModel, self).__init__() 55 | self.model1 = BiLSTMModel(input_dim, output_dim, max_length, no_activities) 56 | self.model2 = LSTMModel(input_dim, output_dim, max_length, no_activities) 57 | self.lstm = nn.LSTM(output_dim * 2, output_dim, batch_first=True) 58 | self.fc = nn.Linear(output_dim, no_activities) 59 | 60 | def forward(self, x): 61 | x1 = self.model1.embedding(x) 62 | x2 = self.model2.embedding(x) 63 | x1, _ = self.model1.lstm(x1) 64 | x2, _ = self.model2.lstm(x2) 65 | x = torch.cat((x1, x2), dim=2) 66 | x, _ = self.lstm(x) 67 | x = self.fc(x[:, -1, :]) 68 | return x 69 | 70 | class CascadeEnsembleLSTMModel(nn.Module): 71 | def __init__(self, input_dim, output_dim, max_length, no_activities): 72 | super(CascadeEnsembleLSTMModel, self).__init__() 73 | self.embedding1 = nn.Embedding(input_dim, output_dim, padding_idx=0) 74 | self.embedding2 = nn.Embedding(input_dim, output_dim, padding_idx=0) 75 | self.lstm1 = nn.LSTM(output_dim, output_dim, bidirectional=True, batch_first=True) 76 | self.lstm2 = nn.LSTM(output_dim, output_dim, batch_first=True) 77 | self.lstm3 = nn.LSTM(output_dim * 2, output_dim, batch_first=True) 78 | self.fc = nn.Linear(output_dim, no_activities) 79 | 80 | def forward(self, x): 81 | x1 = self.embedding1(x) 82 | x2 = self.embedding2(x) 83 | x1, _ = self.lstm1(x1) 84 | x2, _ = self.lstm2(x2) 85 | x = torch.cat((x1, x2), dim=2) 86 | x, _ = self.lstm3(x) 87 | x = self.fc(x[:, -1, :]) 88 | return x 89 | 90 | class CascadeLSTMModel(nn.Module): 91 | def __init__(self, input_dim, output_dim, max_length, no_activities): 92 | super(CascadeLSTMModel, self).__init__() 93 | self.embedding = nn.Embedding(input_dim, output_dim, padding_idx=0) 94 | self.lstm1 = nn.LSTM(output_dim, output_dim, bidirectional=True, batch_first=True) 95 | self.lstm2 = nn.LSTM(output_dim * 2, output_dim, batch_first=True) 96 | self.fc = nn.Linear(output_dim, no_activities) 97 | 98 | def forward(self, x): 99 | x = self.embedding(x) 100 | x, _ = self.lstm1(x) 101 | x, _ = self.lstm2(x) 102 | x = self.fc(x[:, -1, :]) 103 | return x 104 | 105 | 106 | -------------------------------------------------------------------------------- /models/ego4d.py: -------------------------------------------------------------------------------- 1 | import types 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torchvision 6 | 7 | 8 | def forward_reimpl(self, x): 9 | x = self.conv1(x) 10 | x = self.bn1(x) 11 | x = self.relu(x) 12 | x = self.maxpool(x) 13 | 14 | x = self.layer1(x) 15 | x = self.layer2(x) 16 | x = self.layer3(x) 17 | x = self.layer4(x) 18 | 19 | x = self.avgpool(x) 20 | # x = torch.flatten(x, 1) 21 | # x = self.fc(x) 22 | 23 | return x.squeeze(2).squeeze(2) 24 | 25 | 26 | class CNNLSTM(nn.Module): 27 | def __init__(self, hidden_size=512, num_layers=1, state=False): 28 | super(CNNLSTM, self).__init__() 29 | self.backbone = torchvision.models.resnet18(pretrained=False) 30 | self.backbone.fc = None 31 | self.lstm = nn.LSTM(512, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True) 32 | self.regressor = nn.Linear(hidden_size * 2, 1) 33 | self.state = state 34 | if self.state: 35 | self.state_classifier = nn.Linear(hidden_size * 2, 2) 36 | self.backbone.forward = types.MethodType(forward_reimpl, self.backbone) 37 | 38 | def forward(self, x): 39 | # x: (b, c, seq_len, h, w) 40 | seq_len = x.shape[2] 41 | batch_size = x.shape[0] 42 | x = x.permute((0, 2, 1, 3, 4)) 43 | x = x.reshape(-1, x.shape[2], x.shape[3], x.shape[4]) 44 | x = self.backbone(x) 45 | 46 | x = x.view(batch_size, seq_len, -1) 47 | x, _ = self.lstm(x) # (b, seq_len, hidden_size*2) 48 | out = self.regressor(x).squeeze(2) 49 | if self.state: 50 | state = self.state_classifier(x.mean(1)) 51 | return torch.sigmoid(out), state 52 | return torch.sigmoid(out) 53 | -------------------------------------------------------------------------------- /models/emognition.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class LSTMRegressor(nn.Module): 5 | def __init__(self, input_size=5, output_size=2, hidden_size=128, num_layers=2): 6 | super(LSTMRegressor, self).__init__() 7 | self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) 8 | self.fc = nn.Linear(hidden_size, output_size) 9 | self.dropout = nn.Dropout(0.3) 10 | self.output_activation = nn.Softmax() 11 | 12 | def forward(self, x): 13 | # Input shape: (batch_size, sequence_length, num_features) 14 | x, _ = self.lstm(x) # LSTM output shape: (batch_size, sequence_length, hidden_size) 15 | x = self.fc(self.dropout(x[:, -1, :])) 16 | # Use the last LSTM output; shape: (batch_size, output_size) 17 | return self.output_activation(x) 18 | 19 | 20 | class CNN_LSTM_Regressor(nn.Module): 21 | def __init__(self, input_size=5, num_emotions=2): 22 | super(CNN_LSTM_Regressor, self).__init__() 23 | 24 | self.conv1 = nn.Conv1d(input_size, 64, kernel_size=3, padding=1) 25 | self.relu = nn.ReLU() 26 | self.max_pool = nn.MaxPool1d(kernel_size=2) 27 | self.lstm = nn.LSTM(64, 128, num_layers=1, batch_first=True) 28 | self.fc = nn.Linear(128, num_emotions) 29 | self.output_activation = nn.Softmax() 30 | 31 | def forward(self, x): 32 | # Input shape: (batch_size, sequence_length, num_features) 33 | x = x.permute(0, 2, 1) # Change shape to (batch_size, num_features, sequence_length) 34 | 35 | # 1D Convolution 36 | x = self.conv1(x) 37 | x = self.relu(x) 38 | x = self.max_pool(x) 39 | 40 | x = x.permute(0, 2, 1) # Change shape to (batch_size, sequence_length, num_channels) 41 | 42 | # LSTM 43 | x, _ = self.lstm(x) 44 | 45 | # Fully connected layer 46 | x = self.fc(x[:, -1, :]) # Use the last LSTM output 47 | 48 | return self.output_activation(x) 49 | -------------------------------------------------------------------------------- /models/energy.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class MLP(nn.Module): 5 | def __init__(self, input_size=18, hidden_size=210, output_size=1): 6 | super(MLP, self).__init__() 7 | self.layer1 = nn.Linear(input_size, hidden_size) 8 | self.layer2 = nn.Linear(hidden_size, hidden_size * 2) 9 | self.layer3 = nn.Linear(hidden_size * 2, hidden_size * 4) 10 | self.layer4 = nn.Linear(hidden_size * 4, hidden_size * 2) 11 | self.layer5 = nn.Linear(hidden_size * 2, hidden_size) 12 | self.output_layer = nn.Linear(hidden_size, output_size) 13 | self.relu = nn.ReLU() 14 | self.dropout = nn.Dropout(0.3) 15 | 16 | def forward(self, x): 17 | out = self.relu(self.layer1(x)) 18 | out = self.dropout(out) 19 | out = self.relu(self.layer2(out)) 20 | out = self.dropout(out) 21 | out = self.relu(self.layer3(out)) 22 | out = self.dropout(out) 23 | out = self.relu(self.layer4(out)) 24 | out = self.dropout(out) 25 | out = self.relu(self.layer5(out)) 26 | out = self.dropout(out) 27 | out = self.output_layer(out) 28 | return out 29 | -------------------------------------------------------------------------------- /models/utils.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | from torch import nn 4 | from ultralytics.nn.tasks import DetectionModel 5 | 6 | from models import widar, wisdm, ut_har, emognition, casas, energy, ego4d, epic_sounds 7 | 8 | MODULE_MAP = { 9 | 'wisdm_phone': wisdm, 10 | 'wisdm_watch': wisdm, 11 | 'widar': widar, 12 | 'ut_har': ut_har, 13 | 'emognition': emognition, 14 | 'casas': casas, 15 | 'energy': energy, 16 | 'ego4d': ego4d, 17 | 'epic_sounds': epic_sounds 18 | } 19 | 20 | 21 | def find_subclasses_and_factory_functions(module, parent_class): 22 | results = [] 23 | 24 | for _, obj in inspect.getmembers(module): 25 | # Check if it's a class and a subclass of the parent_class 26 | if inspect.isclass(obj) and issubclass(obj, parent_class) and obj != parent_class: 27 | results.append(obj) 28 | # Check if it's a function 29 | elif inspect.isfunction(obj): 30 | try: 31 | # Get the function's return type annotation 32 | return_annotation = inspect.signature(obj).return_annotation 33 | 34 | # Check if the return type annotation is a subclass of the parent_class 35 | if inspect.isclass(return_annotation) and issubclass(return_annotation, 36 | parent_class) and return_annotation != parent_class: 37 | results.append(obj) 38 | except (TypeError, ValueError, KeyError): 39 | # Ignore the function if the return type annotation is missing or not valid 40 | pass 41 | 42 | return results 43 | 44 | 45 | def find_class_by_name(class_list, target_name): 46 | return next((cls for cls in class_list if cls.__name__ == target_name), None) 47 | 48 | 49 | def load_model(model_name, trainer, dataset_name): 50 | if trainer == 'ultralytics': 51 | return DetectionModel(cfg=model_name) 52 | 53 | if dataset_name not in MODULE_MAP: 54 | raise ValueError('Dataset not supported') 55 | 56 | modules = find_subclasses_and_factory_functions(MODULE_MAP[dataset_name], nn.Module) 57 | model_cls = find_class_by_name(modules, model_name) 58 | 59 | if not model_cls: 60 | raise ValueError(f'No class found with the given name: {model_name}') 61 | 62 | return model_cls() 63 | 64 | 65 | if __name__ == '__main__': 66 | model = load_model('UT_HAR_ResNet18', 'BaseTrainer', 'ut_har') 67 | print(model) 68 | -------------------------------------------------------------------------------- /models/wisdm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | # from torchsummary import summary 4 | 5 | 6 | class LSTM_NET(nn.Module): 7 | """Class to design a LSTM model.""" 8 | 9 | def __init__(self, input_dim=6, hidden_dim=6, time_length=200): 10 | """Initialisation of the class (constructor).""" 11 | # Input: 12 | # input_dim, integer 13 | # hidden_dim; integer 14 | # time_length; integer 15 | 16 | super().__init__() 17 | 18 | self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True, num_layers=1) 19 | self.net = nn.Sequential(nn.Flatten(), 20 | nn.Dropout(0.2), 21 | nn.Linear(time_length * hidden_dim, 128), 22 | nn.ReLU(), 23 | nn.Dropout(0.2), 24 | nn.Linear(128, 12)) 25 | 26 | def forward(self, input_data): 27 | """The layers are stacked to transport the data through the neural network for the forward part.""" 28 | # Input: 29 | # input_data; torch.Tensor 30 | # Output: 31 | # x; torch.Tensor 32 | 33 | x, h = self.lstm(input_data) 34 | x = self.net(x) 35 | 36 | return x 37 | 38 | 39 | class GRU_NET(nn.Module): 40 | """Class to design a GRU model.""" 41 | 42 | def __init__(self, input_size, hidden_size, num_layers, output_size, time_length): 43 | """Initialisation of the class (constructor).""" 44 | # Input: 45 | # input_size 46 | # sliding_window size; integer 47 | # hidden_size; integer 48 | # num_layers; integer 49 | # output_size; integer 50 | 51 | super().__init__() 52 | 53 | self.input_size = input_size 54 | self.hidden_size = hidden_size 55 | self.num_layers = num_layers 56 | self.output_size = output_size 57 | self.time_length = time_length 58 | 59 | self.gru = nn.GRU(self.input_size, self.hidden_size, self.num_layers, batch_first=True) 60 | 61 | self.net = nn.Sequential(nn.Flatten(), 62 | nn.Linear(self.time_length * self.hidden_size, self.output_size, bias=True)) 63 | 64 | def forward(self, input_data): 65 | """The layers are stacked to transport the data through the neural network for the forward part.""" 66 | # Input: 67 | # input_data; torch.Tensor 68 | # Output: 69 | # x; torch.Tensor 70 | # h; torch.Tensor 71 | 72 | x, h = self.gru(input_data) 73 | x = self.net(x) 74 | 75 | return x 76 | 77 | 78 | class CNN_NET_V1(nn.Module): 79 | """Class to design a CNN model.""" 80 | 81 | def __init__(self, time_length=200): 82 | """Initialisation of the class (constructor).""" 83 | 84 | super().__init__() 85 | 86 | self.relu = nn.ReLU() 87 | self.dropout = nn.Dropout(0.5) 88 | self.softmax = nn.Softmax(dim=1) 89 | 90 | self.bncnn1 = nn.BatchNorm1d(64) 91 | self.bncnn2 = nn.BatchNorm1d(128) 92 | self.bncnn3 = nn.BatchNorm1d(256) 93 | self.bnbout = nn.BatchNorm1d(64) 94 | 95 | self.cnn1 = nn.Conv1d(time_length, 64, 3, padding=2) 96 | self.cnn2 = nn.Conv1d(64, 128, 3, padding=1) 97 | self.cnn3 = nn.Conv1d(128, 256, 3, padding=1) 98 | 99 | self.avgpool = nn.AvgPool1d(3) 100 | 101 | self.linbout = nn.Linear(512, 64, bias=True) 102 | self.linout = nn.Linear(64, 12, bias=True) 103 | 104 | def forward(self, input_data): 105 | """The layers are stacked to transport the data through the neural network for the forward part.""" 106 | # Input: 107 | # input_data; torch.Tensor 108 | # Output: 109 | # x; torch.Tensor 110 | 111 | # Input dimension: batch_size, features 112 | # x = input_data.unsqueeze(2) # add one dimension 113 | x = input_data 114 | # Input dimension: batch_size, 3, 1 115 | x = self.cnn1(x) 116 | x = self.bncnn1(x) 117 | x = self.relu(x) 118 | 119 | # Input dimension: batch_size, 64, 3 120 | x = self.cnn2(x) 121 | x = self.bncnn2(x) 122 | x = self.relu(x) 123 | 124 | # Input dimension: batch_size, 128, 3 125 | x = self.cnn3(x) 126 | x = self.bncnn3(x) 127 | x = self.relu(x) 128 | 129 | # Input dimension: batch_size, 256, 3 130 | x = self.avgpool(x) 131 | 132 | # Input dimension: batch_size, 256, 1 133 | x = self.linbout(torch.flatten(x, 1)) 134 | x = self.bnbout(x) 135 | x = self.relu(x) 136 | x = self.dropout(x) 137 | 138 | # Input dimension: batch_size, 64 139 | x = self.linout(x) 140 | x = self.softmax(x) 141 | # Output dimension: batch_size, 6 142 | 143 | return x 144 | 145 | 146 | if __name__ == "__main__": 147 | x = torch.ones((64, 100, 6)).cuda() 148 | # lstmnet = CNN_NET_V1(6, 100) 149 | lstmnet = GRU_NET(6, 4, 2, 12, 100) 150 | # summary(lstmnet, (100, 6)) 151 | o = lstmnet(x) 152 | print(o.shape) 153 | -------------------------------------------------------------------------------- /models/yolov8.yaml: -------------------------------------------------------------------------------- 1 | nc: 12 # number of classes 2 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' 3 | # [depth, width, max_channels] 4 | n: [0.33, 0.25, 1024] 5 | s: [0.33, 0.50, 1024] 6 | m: [0.67, 0.75, 768] 7 | l: [1.00, 1.00, 512] 8 | x: [1.00, 1.25, 512] 9 | 10 | # YOLOv8.0 backbone 11 | backbone: 12 | # [from, repeats, module, args] 13 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 14 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 15 | - [-1, 3, C2f, [128, True]] 16 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 17 | - [-1, 6, C2f, [256, True]] 18 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 19 | - [-1, 6, C2f, [512, True]] 20 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 21 | - [-1, 3, C2f, [1024, True]] 22 | - [-1, 1, SPPF, [1024, 5]] # 9 23 | 24 | # YOLOv8.0-p2 head 25 | head: 26 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 27 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 28 | - [-1, 3, C2f, [512]] # 12 29 | 30 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 31 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 32 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 33 | 34 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 35 | - [[-1, 2], 1, Concat, [1]] # cat backbone P2 36 | - [-1, 3, C2f, [128]] # 18 (P2/4-xsmall) 37 | 38 | - [-1, 1, Conv, [128, 3, 2]] 39 | - [[-1, 15], 1, Concat, [1]] # cat head P3 40 | - [-1, 3, C2f, [256]] # 21 (P3/8-small) 41 | 42 | - [-1, 1, Conv, [256, 3, 2]] 43 | - [[-1, 12], 1, Concat, [1]] # cat head P4 44 | - [-1, 3, C2f, [512]] # 24 (P4/16-medium) 45 | 46 | - [-1, 1, Conv, [512, 3, 2]] 47 | - [[-1, 9], 1, Concat, [1]] # cat head P5 48 | - [-1, 3, C2f, [1024]] # 27 (P5/32-large) 49 | 50 | - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5) -------------------------------------------------------------------------------- /partition/centralized.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | from torch.utils.data import Dataset 5 | 6 | from partition.utils import IndexedSubset 7 | 8 | 9 | class CentralizedPartition: 10 | def __init__(self): 11 | pass 12 | 13 | def __call__(self, dataset) -> List[Dataset]: 14 | total_num = len(dataset) 15 | idxs = list(range(total_num)) 16 | dataset_ref = dataset 17 | return [ 18 | IndexedSubset( 19 | dataset_ref, 20 | indices=idxs, 21 | ) 22 | ] 23 | -------------------------------------------------------------------------------- /partition/dirichlet.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from typing import List 3 | 4 | import numpy as np 5 | from torch.utils.data import Dataset 6 | 7 | from partition.utils import IndexedSubset 8 | 9 | 10 | class DirichletPartition: 11 | def __init__( 12 | self, 13 | num_clients: int, 14 | alpha: float, 15 | num_class: int = 10, 16 | minimum_data_size: int = 20, 17 | max_iter=10000 18 | ): 19 | self.num_clients = num_clients 20 | self.alpha = alpha 21 | self.num_class = num_class 22 | self.minimum_data_size = minimum_data_size 23 | self.max_iter = max_iter 24 | self.distributions = defaultdict(lambda: np.random.dirichlet(np.repeat(self.alpha, self.num_clients))) 25 | 26 | def __call__(self, dataset) -> List[Dataset]: 27 | it = 0 28 | if not isinstance(dataset.targets, np.ndarray): 29 | dataset.targets = np.array( 30 | dataset.targets, dtype=np.int64 31 | ) 32 | net_dataidx_map = {} 33 | min_size = 0 34 | idx_batch = [[] for _ in range(self.num_clients)] 35 | while min_size < self.minimum_data_size and it < self.max_iter: 36 | it += 1 37 | idx_batch = [[] for _ in range(self.num_clients)] 38 | # for each class in the dataset 39 | for k in range(self.num_class): 40 | idx_k = np.where(dataset.targets == k)[0] 41 | np.random.shuffle(idx_k) 42 | proportions = self.distributions[k] 43 | ## Balance 44 | proportions = np.array( 45 | [ 46 | p * (len(idx_j) < len(dataset) / self.num_clients) 47 | for p, idx_j in zip(proportions, idx_batch) 48 | ] 49 | ) 50 | proportions = proportions / proportions.sum() 51 | proportions = (np.cumsum(proportions) * len(idx_k)).astype(int)[:-1] 52 | idx_batch = [ 53 | idx_j + idx.tolist() 54 | for idx_j, idx in zip(idx_batch, np.split(idx_k, proportions)) 55 | ] 56 | min_size = min([len(idx_j) for idx_j in idx_batch]) 57 | 58 | # Redistribution loop 59 | it = 0 60 | while min_size < self.minimum_data_size and it < self.max_iter: 61 | # Find client with minimum and maximum samples 62 | min_samples_client = min(idx_batch, key=len) 63 | max_samples_client = max(idx_batch, key=len) 64 | # Get count of samples needed to reach minimum_data_size 65 | transfer_samples_count = self.minimum_data_size - len(min_samples_client) 66 | # Transfer samples from max_samples_client to min_samples_client 67 | min_samples_client.extend(max_samples_client[-transfer_samples_count:]) 68 | del max_samples_client[-transfer_samples_count:] 69 | # Recalculate min_size 70 | min_size = min([len(idx_j) for idx_j in idx_batch]) 71 | it += 1 72 | 73 | for j in range(self.num_clients): 74 | np.random.shuffle(idx_batch[j]) 75 | net_dataidx_map[j] = idx_batch[j] 76 | dataset_ref = dataset 77 | return [ 78 | IndexedSubset( 79 | dataset_ref, 80 | indices=net_dataidx_map[i], 81 | ) 82 | for i in range(self.num_clients) 83 | ] 84 | -------------------------------------------------------------------------------- /partition/label.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from partition.utils import IndexedSubset 5 | 6 | 7 | class DisjointLabelPartition: 8 | def __init__(self, num_users, num_classes=10, max_class_per_user=2): 9 | self.num_users = num_users 10 | self.num_classes = num_classes 11 | self.max_unique_class_per_user = max_class_per_user 12 | self.label_split = None 13 | 14 | def __call__(self, dataset): 15 | class_indices_dict = {i: [] for i in range(self.num_classes)} 16 | client_data_indices_dict = {i: [] for i in range(self.num_users)} 17 | label = np.array(dataset.targets) 18 | for i in range(len(label)): 19 | label_i = label[i].item() 20 | class_indices_dict[label_i].append(i) 21 | 22 | num_classes = self.num_classes 23 | shard_per_user = self.max_unique_class_per_user 24 | label_idx_split = class_indices_dict 25 | 26 | shard_per_class = int(shard_per_user * self.num_users / num_classes) 27 | 28 | for label_i in label_idx_split: 29 | label_idx = label_idx_split[label_i] 30 | num_leftover = len(label_idx) % shard_per_class 31 | leftover = label_idx[-num_leftover:] if num_leftover > 0 else [] 32 | new_label_idx = np.array(label_idx[:-num_leftover]) if num_leftover > 0 else np.array(label_idx) 33 | new_label_idx = new_label_idx.reshape((shard_per_class, -1)).tolist() 34 | 35 | for i, leftover_label_idx in enumerate(leftover): 36 | new_label_idx[i] = np.concatenate([new_label_idx[i], [leftover_label_idx]]) 37 | 38 | label_idx_split[label_i] = new_label_idx 39 | 40 | if self.label_split is None: 41 | label_split = list(range(num_classes)) * shard_per_class 42 | label_split = torch.tensor(label_split)[torch.randperm(len(label_split))].tolist() 43 | label_split = np.array(label_split).reshape((self.num_users, -1)).tolist() 44 | 45 | for i in range(len(label_split)): 46 | label_split[i] = np.unique(label_split[i]).tolist() 47 | 48 | self.label_split = label_split 49 | 50 | for i in range(self.num_users): 51 | for label_i in self.label_split[i]: 52 | idx = torch.arange(len(label_idx_split[label_i]))[ 53 | torch.randperm(len(label_idx_split[label_i]))[0]].item() 54 | client_data_indices_dict[i].extend(label_idx_split[label_i].pop(idx)) 55 | dataset_ref = dataset 56 | return [IndexedSubset(dataset_ref, v) for _, v in client_data_indices_dict.items()] 57 | -------------------------------------------------------------------------------- /partition/uniform.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | from torch.utils.data import Dataset 5 | 6 | from partition.utils import IndexedSubset 7 | 8 | 9 | class UniformPartition: 10 | def __init__( 11 | self, 12 | num_clients: int, 13 | num_class: int = 10, 14 | ): 15 | self.num_clients = num_clients 16 | self.num_class = num_class 17 | 18 | def __call__(self, dataset) -> List[Dataset]: 19 | total_num = len(dataset) 20 | idxs = np.random.permutation(total_num) 21 | partitioned_idxs = np.array_split(idxs, self.num_clients) 22 | net_dataidx_map = {i: partitioned_idxs[i] for i in range(self.num_clients)} 23 | dataset_ref = dataset 24 | return [ 25 | IndexedSubset( 26 | dataset_ref, 27 | indices=net_dataidx_map[i], 28 | ) 29 | for i in range(self.num_clients) 30 | ] 31 | -------------------------------------------------------------------------------- /partition/user_index.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from typing import List 4 | 5 | import numpy as np 6 | from torch.utils.data import Dataset 7 | 8 | from partition.utils import IndexedSubset 9 | 10 | 11 | class UserPartition: 12 | def __init__( 13 | self, user_idxs 14 | ): 15 | self.user_idx = user_idxs 16 | 17 | def __call__(self, dataset) -> List[Dataset]: 18 | dataset_ref = dataset 19 | return [ 20 | IndexedSubset( 21 | dataset_ref, 22 | indices=v, 23 | ) 24 | for _, v in self.user_idx.items() 25 | ] 26 | -------------------------------------------------------------------------------- /partition/utils.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable 2 | from pathlib import Path 3 | from typing import List, Sized 4 | 5 | import altair as alt 6 | import numpy as np 7 | import pandas as pd 8 | import torch 9 | from torch.utils.data import Dataset 10 | 11 | 12 | class IndexedSubset(Dataset): 13 | def __init__(self, dataset, indices): 14 | self.indices = indices 15 | self.dataset = dataset 16 | self.targets = [dataset.targets[i] for i in indices] 17 | 18 | def __getitem__(self, index): 19 | try: 20 | i = self.indices[index] 21 | dt = self.dataset[i] 22 | except KeyError or IndexError: 23 | print(type(self)) 24 | print("index = {}".format(index)) 25 | print("i = {}".format(i)) 26 | print(type(self.indices)) 27 | print(self.indices) 28 | return self.dataset[self.indices[index]] 29 | 30 | def __len__(self): 31 | return len(self.indices) 32 | 33 | 34 | def train_test_split(client_mapping, split): 35 | if type(split) is float and split <= 1.0: 36 | train_clients = np.random.choice(list(client_mapping.keys()), int(len(client_mapping.keys()) * split), 37 | replace=False) 38 | elif isinstance(split, Iterable) and all(isinstance(item, int) for item in split): 39 | train_clients = list(split) 40 | elif type(split) is int and split < len(client_mapping.keys()): 41 | train_clients = np.random.choice(list(client_mapping.keys()), split, replace=False) 42 | else: 43 | raise ValueError("Invalid split value: {}".format(split)) 44 | test_clients = list(set(client_mapping.keys()) - set(train_clients)) 45 | return {k: client_mapping[k] for k in train_clients}, {k: client_mapping[k] for k in test_clients} 46 | 47 | 48 | def make_split(client_mapping_train): 49 | indices = [] 50 | mapping_train = {k: [] for k in client_mapping_train.keys()} 51 | i = 0 52 | for k, v in client_mapping_train.items(): 53 | indices += v 54 | for _ in range(len(v)): 55 | mapping_train[k].append(i) 56 | i += 1 57 | return indices, mapping_train 58 | 59 | 60 | def compute_client_data_distribution(datasets: List[Sized | Dataset], num_classes: int): 61 | class_distribution = [] 62 | data_distribution = [] 63 | 64 | for i in range(len(datasets)): 65 | class_counts = torch.zeros(num_classes) 66 | for j in range(len(datasets[i])): 67 | class_counts[int(datasets[i].targets[j])] += 1 68 | class_counts = class_counts.numpy() 69 | data_distribution.append(np.sum(class_counts)) 70 | class_counts = class_counts / np.sum(class_counts) 71 | class_distribution.append(class_counts) 72 | return data_distribution, class_distribution 73 | 74 | 75 | def get_html_plots(data_distribution, class_distribution): 76 | data = [] 77 | num_clients = len(data_distribution) 78 | for i in range(len(class_distribution[0])): 79 | for j in range(len(class_distribution)): 80 | data.append({"client": j, "class": i, "value": class_distribution[j][i]}) 81 | 82 | heatmap = ( 83 | alt.Chart(pd.DataFrame(data)) 84 | .mark_rect() 85 | .encode( 86 | x=alt.X("client:N", title="Client"), 87 | y=alt.Y("class:N", title="Class"), 88 | color=alt.Color("value:Q", scale=alt.Scale(scheme="yellowgreenblue"), 89 | legend=alt.Legend(title="Percentage of Samples")), 90 | tooltip="value:Q", 91 | ) 92 | .properties( 93 | title=alt.TitleParams( 94 | "Class Distribution of Clients", 95 | fontSize=12, 96 | ), 97 | # width=200, 98 | # height=120, 99 | ) 100 | ) 101 | 102 | text = ( 103 | alt.Chart(pd.DataFrame(data)) 104 | .mark_text() 105 | .encode( 106 | x=alt.X("client:N"), 107 | y=alt.Y("class:N"), 108 | text=alt.Text("value:Q", format=".2f", ), 109 | color=alt.condition( 110 | alt.datum.value > 0.5, alt.value("black"), alt.value("white") 111 | ), 112 | ) 113 | .transform_filter((alt.datum.value > 0.01)) 114 | ) 115 | 116 | data_bar = ( 117 | alt.Chart(pd.DataFrame({"client": range(num_clients), "value": data_distribution})) 118 | .mark_bar() 119 | .encode( 120 | x=alt.X("client:N", title="Client", axis=alt.Axis(labelFontSize=8)), 121 | y=alt.Y("value:Q", title="Data Samples", axis=alt.Axis(labelFontSize=8)), 122 | tooltip="value:Q", 123 | ) 124 | .properties( 125 | title=alt.TitleParams( 126 | "Sample Distribution of Clients", 127 | fontSize=12, 128 | ), 129 | # width=200, 130 | # height=120, 131 | ) 132 | ) 133 | Path('logs/').mkdir(exist_ok=True) 134 | (heatmap + text).save('logs/class_dist.html'), data_bar.save('logs/data_dist.html') 135 | return 'logs/class_dist.html', 'logs/data_dist.html' 136 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.13.1 2 | numpy>=1.24.2 3 | pandas>=1.5.2 4 | tqdm>=4.64.1 5 | altair>=4.1.0 6 | matplotlib>=3.6.0 7 | torchvision>=0.14.1 8 | pillow>=9.4.0 9 | ultralytics==8.0.57 10 | click>=8.0.4 11 | wandb>=0.14.0 12 | torchmetrics>=0.11.2 13 | ray>=2.3.0 14 | fire>=0.5.0 15 | requests>=2.28.2 16 | gdown>=4.7.1 17 | einops~=0.6.0 18 | scikit-learn 19 | scipy~=1.10.0 20 | av 21 | h5py 22 | chardet 23 | librosa 24 | llvmlite 25 | numba 26 | decorator -------------------------------------------------------------------------------- /scorers/classification_evaluator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torchmetrics import Accuracy, F1Score, ConfusionMatrix 5 | from tqdm import tqdm 6 | 7 | from scorers.utils import LossMetric 8 | 9 | 10 | def evaluate(model, test_data, device, num_classes=12, batch_size=32): 11 | model.to(device) 12 | test_dataloader = torch.utils.data.DataLoader( 13 | dataset=test_data, 14 | shuffle=False, 15 | batch_size=batch_size, 16 | pin_memory=True, 17 | num_workers=1, 18 | drop_last=True, 19 | ) 20 | model.eval() 21 | 22 | criterion = nn.CrossEntropyLoss(reduction="sum").to(device) 23 | metrics = { 24 | 'accuracy': Accuracy(task="multiclass", num_classes=num_classes).to(device), 25 | 'f1_score': F1Score(task="multiclass", num_classes=num_classes, average='macro').to(device), 26 | 'confusion': ConfusionMatrix(task="multiclass", num_classes=num_classes).to(device), 27 | } 28 | lbl_type = torch.LongTensor 29 | losses = {'cross_entropy_loss': LossMetric(criterion).to(device)} 30 | with torch.no_grad(): 31 | label_list, pred_list = list(), list() 32 | for batch_idx, (data, labels) in enumerate(tqdm(test_dataloader)): 33 | # for data, labels, lens in test_data: 34 | labels = labels.type(lbl_type) 35 | data, labels = data.to(device), labels.to(device) 36 | output = model(data) 37 | for lm in losses.values(): 38 | lm.update(output, labels) 39 | # pred = output.data.max(1, keepdim=True)[1] 40 | 41 | for mm in metrics.values(): 42 | mm.update(output, labels) 43 | # pred = output.data.max(1, keepdim=True)[ 44 | # 1 45 | # ] # get the index of the max log-probability 46 | # correct = pred.eq(labels.data.view_as(pred)).sum() 47 | # for idx in range(len(labels)): 48 | # label_list.append(labels.detach().cpu().numpy()[idx]) 49 | # pred_list.append(pred.detach().cpu().numpy()[idx][0]) 50 | # 51 | # metrics["test_correct"] += correct.item() 52 | # metrics["test_loss"] += loss * labels.size(0) 53 | # metrics["test_total"] += labels.size(0) 54 | return {k: v.compute().cpu().float() for k, v in metrics.items()} | {k: v.compute().cpu().float() for k, v in 55 | losses.items()} 56 | -------------------------------------------------------------------------------- /scorers/localization_evaluator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torchmetrics import Accuracy, Metric 5 | from tqdm import tqdm 6 | 7 | from scorers.utils import LossMetric 8 | 9 | 10 | def keyframe_distance(preds, uid_list): 11 | distance_list = list() 12 | sec_list = list() 13 | for pred, gt in zip(preds, uid_list): 14 | clip_length = gt['json_parent_end_sec'].item() - gt['json_parent_start_sec'].item() 15 | clip_frames = gt['json_parent_end_frame'].item() - gt['json_parent_start_frame'].item() + 1 16 | fps = clip_frames / clip_length 17 | keyframe_loc_pred = np.argmax(pred) 18 | keyframe_loc_pred = np.argmax(pred) 19 | keyframe_loc_pred_mapped = (gt['json_parent_end_frame'].item() - gt[ 20 | 'json_parent_start_frame'].item()) / 16 * keyframe_loc_pred 21 | keyframe_loc_gt = gt['pnr_frame'].item() - gt['json_parent_start_frame'].item() 22 | err_frame = abs(keyframe_loc_pred_mapped - keyframe_loc_gt) 23 | err_sec = err_frame / fps 24 | distance_list.append(err_frame.item()) 25 | sec_list.append(err_sec.item()) 26 | # When there is no false positive 27 | if len(distance_list) == 0: 28 | # Should we return something else here? 29 | return 0, 0 30 | return np.array(distance_list), np.array(sec_list) 31 | 32 | 33 | class KeyframeDistance(Metric): 34 | def __init__(self): 35 | super().__init__(dist_sync_on_step=False) 36 | self.add_state("distance_list", default=[], dist_reduce_fx="cat") 37 | self.add_state("sec_list", default=[], dist_reduce_fx="cat") 38 | 39 | def update(self, preds: torch.Tensor, infos: list[torch.Tensor]): 40 | distance_list = list() 41 | sec_list = list() 42 | preds = preds.cpu().numpy() 43 | preds = preds[:, :-1] 44 | # pnr_frames = infos['pnr_frame'] 45 | # clip_start_secs = infos['clip_start_sec'] 46 | # clip_end_secs = infos['clip_end_sec'] 47 | # clip_start_frames = infos['clip_start_frame'] 48 | # clip_end_frames = infos['clip_end_frame'] 49 | for pred, clip_start_sec, clip_end_sec, clip_start_frame, clip_end_frame, pnr_frame in zip(preds, 50 | *infos): 51 | # print(clip_start_sec, clip_end_sec, clip_start_frame, clip_end_frame, pnr_frame) 52 | if pnr_frame.item() == -1: 53 | continue 54 | clip_length = clip_start_sec.item() - clip_end_sec.item() 55 | clip_frames = clip_end_frame.item() - clip_start_frame.item() + 1 56 | fps = clip_frames / clip_length 57 | keyframe_loc_pred = np.argmax(pred) 58 | keyframe_loc_pred_mapped = (clip_end_frame.item() - clip_start_frame.item()) / 16 * keyframe_loc_pred 59 | keyframe_loc_gt = pnr_frame.item() - clip_start_frame.item() 60 | err_frame = abs(keyframe_loc_pred_mapped - keyframe_loc_gt) 61 | err_sec = err_frame / fps 62 | distance_list.append(err_frame.item()) 63 | sec_list.append(err_sec.item()) 64 | # When there is no false positive 65 | if len(distance_list) == 0: 66 | # Should we return something else here? 67 | return 68 | self.sec_list.extend(sec_list) 69 | self.distance_list.extend(distance_list) 70 | 71 | def compute(self): 72 | # Perform any final computations here. 73 | # This might just be converting your lists of distances and seconds to tensors. 74 | # Make sure to handle the case where the lists are empty. 75 | return torch.mean(torch.tensor(self.distance_list)) 76 | 77 | 78 | def evaluate(model, test_data, device, num_classes=12, batch_size=32): 79 | model.to(device) 80 | test_dataloader = torch.utils.data.DataLoader( 81 | dataset=test_data, 82 | shuffle=True, 83 | batch_size=batch_size, 84 | pin_memory=True, 85 | num_workers=1, 86 | drop_last=True, 87 | ) 88 | model.eval() 89 | 90 | criterion = nn.CrossEntropyLoss().to(device) 91 | metrics = { 92 | 'avg_multilabel_accuracy': Accuracy(task="multiclass", num_classes=num_classes, average='micro').to(device), 93 | # 'binary_accuracy': Accuracy(task="multiclass", num_classes=1).to(device), 94 | # 'f1_score': F1Score(task="multiclass", num_classes=num_classes, average='macro').to(device), 95 | 'keyframe_dist': KeyframeDistance().to(device), 96 | } 97 | lbl_type = torch.LongTensor 98 | losses = { 99 | 'cce_loss': LossMetric(criterion).to(device), 100 | } 101 | 102 | with torch.no_grad(): 103 | label_list, pred_list = list(), list() 104 | for batch_idx, (data, labels, info) in enumerate(tqdm(test_dataloader)): 105 | # for data, labels, lens in test_data: 106 | labels = labels.type(lbl_type) 107 | data, labels = data.to(device), labels.to(device) 108 | output = model(data) 109 | for lm in losses.values(): 110 | lm.update(output, labels) 111 | # pred = output.data.max(1, keepdim=True)[1] 112 | 113 | for name, mm in metrics.items(): 114 | if name == 'keyframe_dist': 115 | mm.update(output, info) 116 | continue 117 | mm.update(output, labels) 118 | # pred = output.data.max(1, keepdim=True)[ 119 | # 1 120 | # ] # get the index of the max log-probability 121 | # correct = pred.eq(labels.data.view_as(pred)).sum() 122 | # for idx in range(len(labels)): 123 | # label_list.append(labels.detach().cpu().numpy()[idx]) 124 | # pred_list.append(pred.detach().cpu().numpy()[idx][0]) 125 | # 126 | # metrics["test_correct"] += correct.item() 127 | # metrics["test_loss"] += loss * labels.size(0) 128 | # metrics["test_total"] += labels.size(0) 129 | return {k: v.compute().cpu().float() for k, v in metrics.items()} | {k: v.compute().cpu().float() for k, v in 130 | losses.items()} 131 | -------------------------------------------------------------------------------- /scorers/regression_evaluator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from sklearn.metrics import r2_score 3 | from torch import nn 4 | from torchmetrics import MeanAbsoluteError, R2Score 5 | from torchmetrics import Metric 6 | from tqdm import tqdm 7 | 8 | from scorers.utils import LossMetric 9 | 10 | 11 | # class R2Score(Metric): 12 | # def __init__(self, dist_sync_on_step=False): 13 | # super().__init__(dist_sync_on_step=dist_sync_on_step) 14 | # self.preds = [] 15 | # self.targets = [] 16 | # 17 | # def update(self, preds: torch.Tensor, target: torch.Tensor): 18 | # self.preds += preds.reshape((-1,)).cpu().tolist() 19 | # self.targets += target.reshape((-1,)).cpu().tolist() 20 | # 21 | # def compute(self): 22 | # return torch.tensor(r2_score(self.preds, self.targets)) 23 | 24 | 25 | def evaluate(model, test_data, device, num_classes=1, batch_size=32): 26 | model.to(device) 27 | test_dataloader = torch.utils.data.DataLoader( 28 | dataset=test_data, 29 | shuffle=False, 30 | batch_size=batch_size, 31 | pin_memory=True, 32 | num_workers=1, 33 | drop_last=True, 34 | ) 35 | model.eval() 36 | 37 | criterion = nn.MSELoss().to(device) 38 | metrics = { 39 | 'mae': MeanAbsoluteError().to(device), 40 | 'R^2': R2Score().to(device) 41 | } 42 | losses = {'L2 Loss': LossMetric(criterion).to(device)} 43 | with torch.no_grad(): 44 | label_list, pred_list = list(), list() 45 | for batch_idx, (data, labels) in enumerate(tqdm(test_dataloader)): 46 | # for data, labels, lens in test_data: 47 | # labels = labels.type(torch.float) 48 | data, labels = data.to(device), labels.to(device) 49 | output = model(data) 50 | labels = labels.reshape((-1,)) 51 | output = output.reshape((-1,)) 52 | for lm in losses.values(): 53 | lm.update(output, labels) 54 | # pred = output.data.max(1, keepdim=True)[1] 55 | for mm in metrics.values(): 56 | mm.update(output, labels) 57 | # pred = output.data.max(1, keepdim=True)[ 58 | # 1 59 | # ] # get the index of the max log-probability 60 | # correct = pred.eq(labels.data.view_as(pred)).sum() 61 | # for idx in range(len(labels)): 62 | # label_list.append(labels.detach().cpu().numpy()[idx]) 63 | # pred_list.append(pred.detach().cpu().numpy()[idx][0]) 64 | # 65 | # metrics["test_correct"] += correct.item() 66 | # metrics["test_loss"] += loss * labels.size(0) 67 | # metrics["test_total"] += labels.size(0) 68 | return {k: v.compute().cpu().float() for k, v in metrics.items()} | {k: v.compute().cpu().float() for k, v in 69 | losses.items()} 70 | -------------------------------------------------------------------------------- /scorers/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchmetrics import Metric 3 | 4 | 5 | class LossMetric(Metric): 6 | def __init__(self, criterion): 7 | super().__init__() 8 | self.criterion = criterion 9 | self.add_state("loss", default=torch.tensor(0, dtype=float), dist_reduce_fx="mean") 10 | self.add_state("total", default=torch.tensor(0, dtype=float), dist_reduce_fx="mean") 11 | 12 | def update(self, output: torch.Tensor, target: torch.Tensor): 13 | l = target.size(0) * self.criterion(output, target.long()).data.item() 14 | self.loss += l 15 | self.total += target.size(0) 16 | 17 | def compute(self): 18 | return self.loss.float() / self.total.float() 19 | -------------------------------------------------------------------------------- /strategies/base_fl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import ray 3 | import torch 4 | from tqdm import tqdm 5 | 6 | 7 | def distributed_fedavg(aggregator, 8 | client_trainers, 9 | client_dataset_refs, 10 | client_num_per_round, 11 | global_model, 12 | round_idx, 13 | scheduler, 14 | device, 15 | precision): 16 | # Select random clients for each round 17 | sampled_clients_idx = np.random.choice(len(client_dataset_refs), client_num_per_round, replace=False) 18 | print(f"selected clients: {sampled_clients_idx}") 19 | # Initialize lists to store updates, weights, and local metrics 20 | all_updates, all_weights, all_local_metrics = [], [], [] 21 | 22 | # Iterate over the sampled clients in chunks equal to the number of client trainers 23 | for i in tqdm(range(0, len(sampled_clients_idx), len(client_trainers))): 24 | # Initialize list to store remote steps 25 | remote_steps = [] 26 | 27 | # Iterate over the client trainers 28 | for j, client_trainer in enumerate(client_trainers): 29 | idx = i + j 30 | if idx >= len(sampled_clients_idx): 31 | break 32 | 33 | # Update the remote client_trainer with the latest global model and scheduler state 34 | client_trainer.update.remote(global_model.state_dict(), scheduler.state_dict()) 35 | 36 | # Perform a remote training step on the client_trainer 37 | if precision != 'float32': 38 | remote_step = client_trainer.step_low_precision.remote(sampled_clients_idx[idx], 39 | client_dataset_refs[sampled_clients_idx[idx]], 40 | round_idx, 41 | precision, 42 | device=device) 43 | else: 44 | remote_step = client_trainer.step.remote(sampled_clients_idx[idx], 45 | client_dataset_refs[sampled_clients_idx[idx]], 46 | round_idx, 47 | device=device) 48 | remote_steps.append(remote_step) 49 | 50 | # Retrieve remote steps results 51 | print(f"length of steps: {len(remote_steps)}") 52 | updates, num_client_samples, local_metrics = zip(*ray.get(remote_steps)) 53 | 54 | # Add the results to the overall lists 55 | for u, n, l in zip(updates, num_client_samples, local_metrics): 56 | if n > 0: 57 | all_updates.append(u) 58 | all_weights.append(n) 59 | all_local_metrics.append(l) 60 | torch.cuda.empty_cache() 61 | 62 | # Calculate the average local metrics 63 | local_metrics_avg = {key: sum(metric[key] for metric in all_local_metrics if metric[key]) / len(all_local_metrics) 64 | for key in all_local_metrics[0]} 65 | 66 | print(all_local_metrics) 67 | 68 | # Update the global model using the aggregator 69 | state_n = aggregator.step(all_updates, all_weights, round_idx) 70 | global_model.load_state_dict(state_n) 71 | 72 | # Update the scheduler 73 | scheduler.step() 74 | 75 | return local_metrics_avg, global_model, scheduler 76 | 77 | 78 | def basic_fedavg(aggregator, 79 | client_trainers, 80 | client_dataset_refs, 81 | client_num_per_round, 82 | global_model, 83 | round_idx, 84 | scheduler, 85 | device, 86 | precision): 87 | # Select random clients for each round 88 | sampled_clients_idx = np.random.choice(len(client_dataset_refs), client_num_per_round, replace=False) 89 | print(f"selected clients: {sampled_clients_idx}") 90 | # Initialize lists to store updates, weights, and local metrics 91 | all_updates, all_weights, all_local_metrics = [], [], [] 92 | 93 | # Iterate over the sampled clients in chunks equal to the number of client trainers 94 | for i in tqdm(range(0, len(sampled_clients_idx), len(client_trainers))): 95 | # Initialize list to store remote steps 96 | remote_steps = [] 97 | 98 | # Iterate over the client trainers 99 | for j, client_trainer in enumerate(client_trainers): 100 | idx = i + j 101 | if idx >= len(sampled_clients_idx): 102 | break 103 | 104 | # Update the remote client_trainer with the latest global model and scheduler state 105 | client_trainer.update(global_model.state_dict(), scheduler.state_dict()) 106 | 107 | # Perform a remote training step on the client_trainer 108 | if precision != 'float32': 109 | remote_step = client_trainer.step_low_precision(sampled_clients_idx[idx], 110 | client_dataset_refs[sampled_clients_idx[idx]], 111 | round_idx, 112 | precision, 113 | device=device) 114 | else: 115 | remote_step = client_trainer.step(sampled_clients_idx[idx], 116 | client_dataset_refs[sampled_clients_idx[idx]], 117 | round_idx, 118 | device=device) 119 | remote_steps.append(remote_step) 120 | 121 | # Retrieve remote steps results 122 | print(f"length of steps: {len(remote_steps)}") 123 | updates, num_client_samples, local_metrics = zip(*remote_steps) 124 | 125 | # Add the results to the overall lists 126 | for u, n, l in zip(updates, num_client_samples, local_metrics): 127 | if n > 0: 128 | all_updates.append(u) 129 | all_weights.append(n) 130 | all_local_metrics.append(l) 131 | torch.cuda.empty_cache() 132 | 133 | # Calculate the average local metrics 134 | local_metrics_avg = {key: sum(metric[key] for metric in all_local_metrics if metric[key]) / len(all_local_metrics) 135 | for key in all_local_metrics[0]} 136 | 137 | print(all_local_metrics) 138 | 139 | # Update the global model using the aggregator 140 | state_n = aggregator.step(all_updates, all_weights, round_idx) 141 | global_model.load_state_dict(state_n) 142 | 143 | # Update the scheduler 144 | scheduler.step() 145 | 146 | return local_metrics_avg, global_model, scheduler -------------------------------------------------------------------------------- /system.yml: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | num_gpus = 2 3 | num_trainers_per_gpu = 1 4 | CUDA_VISIBLE_DEVICES=1,2 5 | seed = 1 -------------------------------------------------------------------------------- /trainers/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import torch 5 | from torch.optim.lr_scheduler import LRScheduler 6 | 7 | 8 | class WarmupScheduler(LRScheduler): 9 | def __init__(self, optimizer, warmup_epochs, scheduler): 10 | self.warmup_epochs = warmup_epochs 11 | self.scheduler = scheduler 12 | super(WarmupScheduler, self).__init__(optimizer, -1) 13 | self._last_lr = [0.0] * len(optimizer.param_groups) 14 | 15 | def get_lr(self): 16 | if self.last_epoch < self.warmup_epochs: 17 | warmup_factor = self.last_epoch / self.warmup_epochs 18 | return [base_lr * warmup_factor for base_lr in self.base_lrs] 19 | 20 | return self.scheduler.get_last_lr() 21 | 22 | def step(self, epoch=None): 23 | if self.last_epoch < self.warmup_epochs: 24 | self.last_epoch += 1 25 | new_lrs = self.get_lr() 26 | for param_group, lr in zip(self.optimizer.param_groups, new_lrs): 27 | param_group['lr'] = lr 28 | self._last_lr = new_lrs 29 | else: 30 | self.scheduler.step(epoch) 31 | self._last_lr = self.scheduler.get_last_lr() 32 | 33 | 34 | def read_system_variable(system_config, ): 35 | num_gpus = int(os.environ['num_gpus']) if 'num_gpus' in os.environ \ 36 | else system_config['DEFAULT'].getint('num_gpus', 1) 37 | num_trainers_per_gpu = int(os.environ['num_trainers_per_gpu']) if 'num_gpus' in os.environ \ 38 | else system_config['DEFAULT'].getint('num_trainers_per_gpu', 1) 39 | seed = int(os.environ['seed']) if 'seed' in os.environ \ 40 | else system_config['DEFAULT'].getint('seed', 1) 41 | return num_gpus, num_trainers_per_gpu, seed 42 | 43 | 44 | def set_seed(seed: int): 45 | """ 46 | Set the random seed for PyTorch and NumPy. 47 | """ 48 | # Set the random seed for PyTorch 49 | torch.manual_seed(seed) 50 | torch.cuda.manual_seed_all(seed) 51 | 52 | # Set the random seed for NumPy 53 | np.random.seed(seed) 54 | 55 | # Set the deterministic flag for CuDNN (GPU) 56 | torch.backends.cudnn.deterministic = True 57 | torch.backends.cudnn.benchmark = False 58 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | from torch.optim.lr_scheduler import LRScheduler 8 | 9 | import loaders.casas 10 | import loaders.cifar10 11 | import loaders.ego4d 12 | import loaders.emognition 13 | import loaders.energy 14 | import loaders.epic_sounds 15 | import loaders.spatial_transforms 16 | import loaders.ut_har 17 | import loaders.visdrone 18 | import loaders.widar 19 | import loaders.wisdm 20 | import wandb 21 | from analyses.noise import inject_label_noise_with_matrix 22 | from loaders.utils import ParameterDict 23 | from partition.centralized import CentralizedPartition 24 | from partition.dirichlet import DirichletPartition 25 | from partition.uniform import UniformPartition 26 | from partition.user_index import UserPartition 27 | from partition.utils import compute_client_data_distribution, get_html_plots 28 | 29 | 30 | def read_system_variable(system_config, ): 31 | num_gpus = int(os.environ['num_gpus']) if 'num_gpus' in os.environ \ 32 | else system_config['DEFAULT'].getint('num_gpus', 1) 33 | num_trainers_per_gpu = int(os.environ['num_trainers_per_gpu']) if 'num_gpus' in os.environ \ 34 | else system_config['DEFAULT'].getint('num_trainers_per_gpu', 1) 35 | seed = int(os.environ['seed']) if 'seed' in os.environ \ 36 | else system_config['DEFAULT'].getint('seed', 1) 37 | return num_gpus, num_trainers_per_gpu, seed 38 | 39 | 40 | class WarmupScheduler(LRScheduler): 41 | def __init__(self, optimizer, warmup_epochs, scheduler): 42 | self.warmup_epochs = warmup_epochs 43 | self.scheduler = scheduler 44 | super(WarmupScheduler, self).__init__(optimizer, -1) 45 | self._last_lr = [0.0] * len(optimizer.param_groups) 46 | 47 | def get_lr(self): 48 | if self.last_epoch < self.warmup_epochs: 49 | warmup_factor = self.last_epoch / self.warmup_epochs 50 | return [base_lr * warmup_factor for base_lr in self.base_lrs] 51 | 52 | return self.scheduler.get_last_lr() 53 | 54 | def step(self, epoch=None): 55 | if self.last_epoch < self.warmup_epochs: 56 | self.last_epoch += 1 57 | new_lrs = self.get_lr() 58 | for param_group, lr in zip(self.optimizer.param_groups, new_lrs): 59 | param_group['lr'] = lr 60 | self._last_lr = new_lrs 61 | else: 62 | self.scheduler.step(epoch) 63 | self._last_lr = self.scheduler.get_last_lr() 64 | 65 | 66 | def get_default_yolo_hyperparameters(): 67 | YOLO_HYPERPARAMETERS = { 68 | 'lr0': 0.01, 69 | 'lrf': 0.01, 70 | 'momentum': 0.937, 71 | 'weight_decay': 0.0005, 72 | 'warmup_epochs': 3.0, 73 | 'warmup_momentum': 0.8, 74 | 'warmup_bias_lr': 0.1, 75 | 'box': 7.5, 76 | 'cls': 0.5, 77 | 'dfl': 1.5, 78 | 'fl_gamma': 0.0, 79 | 'label_smoothing': 0.0, 80 | 'nbs': 64, 81 | 'hsv_h': 0.015, 82 | 'hsv_s': 0.7, 83 | 'hsv_v': 0.4, 84 | 'degrees': 0.0, 85 | 'translate': 0.1, 86 | 'scale': 0.5, 87 | 'shear': 0.0, 88 | 'perspective': 0.0, 89 | 'flipud': 0.0, 90 | 'fliplr': 0.5, 91 | 'mosaic': 1.0, 92 | 'mixup': 0.0, 93 | 'copy_paste': 0.0, 94 | 'mask_ratio': 0.0, 95 | 'overlap_mask': 0.0, 96 | 'conf': 0.25, 97 | 'iou': 0.45, 98 | 'max_det': 1000, 99 | 'plots': False, 100 | 'half': False, # use half precision (FP16) 101 | 'dnn': False, 102 | 'data': None, 103 | 'imgsz': 640, 104 | 'verbose': False 105 | } 106 | YOLO_HYPERPARAMETERS = ParameterDict(YOLO_HYPERPARAMETERS) 107 | return YOLO_HYPERPARAMETERS 108 | 109 | 110 | def set_seed(seed: int): 111 | """ 112 | Set the random seed for PyTorch and NumPy. 113 | """ 114 | # Set the random seed for PyTorch 115 | torch.manual_seed(seed) 116 | torch.cuda.manual_seed_all(seed) 117 | 118 | # Set the random seed for NumPy 119 | np.random.seed(seed) 120 | 121 | # Set the deterministic flag for CuDNN (GPU) 122 | torch.backends.cudnn.deterministic = True 123 | torch.backends.cudnn.benchmark = False 124 | 125 | 126 | def load_dataset(dataset_name): 127 | if dataset_name == 'cifar10': 128 | dataset = loaders.cifar10.load_dataset() 129 | num_classes = 10 130 | elif dataset_name == 'wisdm_watch': 131 | dataset = loaders.wisdm.load_dataset(reprocess=False, modality='watch') 132 | num_classes = 12 133 | elif dataset_name == 'wisdm_phone': 134 | dataset = loaders.wisdm.load_dataset(reprocess=False, modality='phone') 135 | num_classes = 12 136 | elif dataset_name == 'widar': 137 | dataset = loaders.widar.load_dataset() 138 | num_classes = 9 139 | elif dataset_name == 'visdrone': 140 | dataset = loaders.visdrone.load_dataset() 141 | num_classes = 12 142 | elif dataset_name == 'ut_har': 143 | dataset = loaders.ut_har.load_dataset() 144 | num_classes = 7 145 | elif dataset_name == 'emognition': 146 | dataset = loaders.emognition.load_bracelet_data(reprocess=True) 147 | num_classes = 2 148 | elif dataset_name == 'casas': 149 | dataset = loaders.casas.load_dataset() 150 | num_classes = 12 151 | elif dataset_name == 'energy': 152 | dataset = loaders.energy.load_dataset() 153 | num_classes = 10 154 | elif dataset_name == 'epic_sounds': 155 | dataset = loaders.epic_sounds.load_dataset() 156 | num_classes = 44 157 | elif dataset_name == 'ego4d': 158 | dataset = loaders.ego4d.load_dataset( 159 | transforms=loaders.spatial_transforms.Compose( 160 | [loaders.spatial_transforms.Normalize([0.45], [0.225])] 161 | ) 162 | ) 163 | num_classes = 17 164 | # print(dataset['train'][1][1].shape) 165 | # print(np.unique(dataset['train'].targets), len(np.unique(dataset['train'].targets))) 166 | # raise ValueError('ego4d') 167 | else: 168 | raise ValueError(f'Dataset {dataset_name} type not supported') 169 | 170 | return dataset, num_classes 171 | 172 | 173 | def get_partition(partition_type, dataset_name, num_classes, client_num_in_total, client_num_per_round, alpha, dataset): 174 | if partition_type == 'user' and dataset_name in {'wisdm', 'widar', 'visdrone'}: 175 | partition = UserPartition(dataset['split']['train']) 176 | client_num_in_total = len(dataset['split']['train'].keys()) 177 | elif partition_type == 'uniform': 178 | partition = UniformPartition(num_class=num_classes, num_clients=client_num_in_total) 179 | elif partition_type == 'dirichlet': 180 | if alpha is None: 181 | warnings.warn('alpha is not set, using default value 0.1') 182 | alpha = 0.1 183 | partition = DirichletPartition(num_class=num_classes, num_clients=client_num_in_total, alpha=alpha) 184 | elif partition_type == 'central': 185 | partition = CentralizedPartition() 186 | client_num_per_round = 1 187 | client_num_in_total = 1 188 | else: 189 | raise ValueError(f'Partition {partition_type} type not supported') 190 | 191 | return partition, client_num_in_total, client_num_per_round 192 | 193 | 194 | def plot_data_distributions(dataset, dataset_name, client_datasets, num_classes): 195 | if hasattr(dataset['train'], 'targets') and dataset_name != 'ego4d': 196 | data_distribution, class_distribution = compute_client_data_distribution(datasets=client_datasets, 197 | num_classes=num_classes) 198 | class_dist, sample_dist = get_html_plots(data_distribution, class_distribution) 199 | wandb.log({'class_dist': wandb.Html(class_dist, inject=False), 200 | 'sample_dist': wandb.Html(sample_dist, inject=False)}, 201 | step=0) 202 | # if dataset_name == 'visdrone': 203 | # targets = [[d['cls'] for d in dt] for dt in client_datasets] 204 | # data_distribution, class_distribution = compute_client_target_distribution(targets, num_classes=12) 205 | # wandb.log({'visdrone_class_dist': wandb.Html(class_dist, inject=False), 206 | # 'sample_dist': wandb.Html(sample_dist, inject=False)}, 207 | # step=0) 208 | 209 | 210 | def add_label_noise(analysis, dataset_name, client_datasets, num_classes): 211 | confusion_matrix = pd.read_csv(f'confusion_matrices/conf_{dataset_name}.csv', header=0, index_col=None) 212 | confusion_matrix = confusion_matrix.to_numpy() 213 | confusion_matrix = confusion_matrix / confusion_matrix.sum(axis=1) 214 | _, error_rate, error_var = analysis.split('-') 215 | error_rate = float(error_rate) 216 | error_var = float(error_var) 217 | print(f'Adding noise ...{error_rate}') 218 | client_datasets, noise_percentages = inject_label_noise_with_matrix(client_datasets, 219 | num_classes, 220 | confusion_matrix, 221 | error_rate) 222 | print(noise_percentages) 223 | return client_datasets, noise_percentages 224 | 225 | 226 | def plot_noise_distribution(noise_percentages): 227 | table = wandb.Table(data=[[d] for d in noise_percentages], columns=['noise_ratio']) 228 | wandb.log({"noise_percentages": wandb.plot.histogram(table, "noise_ratio", 229 | title="Label Noise Distribution") 230 | }, step=0) 231 | --------------------------------------------------------------------------------