├── BiModNeuroCNN ├── results │ ├── __init__.py │ ├── dataframe_utils.py │ ├── metrics.py │ └── results.py ├── subjects │ ├── __init__.py │ ├── subjects_utils.py │ └── subjects.py ├── version.py ├── __init__.py ├── models │ ├── __init__.py │ ├── network_utils.py │ ├── bimodal_cnn.py │ └── bimodal_cnn_pooling.py ├── data_loader │ ├── __init__.py │ ├── utils1.py │ ├── signal_processing_utils.py │ ├── data_utils.py │ └── data_loader.py ├── training │ ├── __init__.py │ ├── training_utils.py │ ├── bimodal_classification.py │ └── bimodal_training.py ├── utils_final.py └── utils.py ├── statistics ├── results_4_stats.xlsx └── anova_2_way.m ├── requirements.txt ├── License.txt ├── setup.py ├── README.md ├── .gitignore └── training_demo_nCV.PY /BiModNeuroCNN/results/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /BiModNeuroCNN/subjects/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /BiModNeuroCNN/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.0" -------------------------------------------------------------------------------- /BiModNeuroCNN/__init__.py: -------------------------------------------------------------------------------- 1 | from BiModNeuroCNN.version import __version__ 2 | -------------------------------------------------------------------------------- /BiModNeuroCNN/models/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Bimodal convolutional neural network architechures 3 | """ 4 | -------------------------------------------------------------------------------- /BiModNeuroCNN/data_loader/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tools for loading and processing EEG or fNIRS data 3 | """ 4 | -------------------------------------------------------------------------------- /BiModNeuroCNN/training/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Training procedures for bimodal convolutional neural networks 3 | """ 4 | -------------------------------------------------------------------------------- /statistics/results_4_stats.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cfcooney/BiModNeuroCNN/HEAD/statistics/results_4_stats.xlsx -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | scipy 4 | sklearn 5 | tensorflow 6 | matplotlib 7 | h5py 8 | mne 9 | ast 10 | -------------------------------------------------------------------------------- /BiModNeuroCNN/data_loader/utils1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | from BiModNeuroCNN.subjects import subjects 4 | 5 | def subject_data_loader(filename): 6 | """ 7 | 8 | :param filename: (str) directory of stored data 9 | :return: tuples containing data and labels 10 | """ 11 | subj_object = subjects.Subject.load_subject(f"{filename}.pickle") 12 | return (subj_object.data1.astype(np.float32), subj_object.labels1.astype(np.int64)) 13 | -------------------------------------------------------------------------------- /BiModNeuroCNN/data_loader/signal_processing_utils.py: -------------------------------------------------------------------------------- 1 | from scipy.signal import butter, lfilter 2 | from BiModNeuroCNN.utils import timer 3 | 4 | def butter_bandpass(lowcut, highcut, fs, order=5): 5 | nyq = 0.5 * fs 6 | low = lowcut / nyq 7 | high = highcut / nyq 8 | b, a = butter(order, [low, high], btype='band') 9 | return b, a 10 | 11 | @timer #order = 5 is standard 12 | def butter_bandpass_filter(data, lowcut, highcut, fs, order=5): 13 | b, a = butter_bandpass(lowcut, highcut, fs, order=order) 14 | y = lfilter(b, a, data) 15 | return y -------------------------------------------------------------------------------- /BiModNeuroCNN/subjects/subjects_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def eeg_to_3d(data, labels, epoch, n_events, n_chans): 4 | """ 5 | function to return a 3D EEG data format from a 2D input. 6 | Parameters: 7 | data: 2D np.array of EEG 8 | labels: (np.array ||list) 9 | epoch: number of samples per trial, int 10 | n_events: number of trials, int 11 | n_chan: number of channels, int 12 | 13 | Output: 14 | np.array of shape n_events * n_chans * n_samples 15 | """ 16 | idx, a, x = ([] for i in range(3)) 17 | [idx.append(i) for i in range(0,data.shape[1],epoch)] 18 | for j in data: 19 | [a.append([j[idx[k]:idx[k]+epoch]]) for k in range(len(idx))] 20 | 21 | return np.reshape(np.array(a),(labels.shape[0],n_chans,epoch)) -------------------------------------------------------------------------------- /BiModNeuroCNN/utils_final.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def gpu_check(): 4 | """ 5 | Test script for discovering GPUs 6 | Expected Output: GPU and Cuda available: True 7 | ****************************** 8 | Number of GPUs: 1 9 | ****************************** 10 | Current GPU: 0 11 | ****************************** 12 | Current GPU location: 13 | ****************************** 14 | GPU device type: GeForce 940MX 15 | ****************************** 16 | 17 | """ 18 | available = torch.cuda.is_available() 19 | print(f"GPU and Cuda available: {available}") 20 | 21 | print("*"*30) 22 | 23 | n_gpus = torch.cuda.device_count() 24 | print(f"Number of GPUs: {n_gpus}") 25 | 26 | print("*"*30) 27 | 28 | device = torch.cuda.current_device() 29 | print(f"Current GPU: {device}") 30 | 31 | print("*"*30) 32 | 33 | location = torch.cuda.device(0) 34 | print(f"Current GPU location: {location}") 35 | 36 | print("*"*30) 37 | 38 | type_gpu = torch.cuda.get_device_name(0) 39 | print(f"GPU device type: {type_gpu}") 40 | 41 | print("*"*30) -------------------------------------------------------------------------------- /License.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Ciaran Cooney 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /BiModNeuroCNN/training/training_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def combine_batches(batch1,batch2): 4 | """ 5 | Function for combining batches of inputs and targets for 6 | 2 modalities of data. 7 | :param batch1: (list-type) [0] inputs, [1] targets 8 | :param batch2: (list-type) [0] inputs, [1] targets 9 | :return: (list) [inputs1, targets1, inputs2, targets2] 10 | """ 11 | new_batch = [] 12 | for inputs, target in zip(batch1, batch2): 13 | a = list(inputs) 14 | b = list(target) 15 | a.append(b[0]) 16 | a.append(b[1]) 17 | new_batch.append(a) 18 | return new_batch 19 | 20 | def current_loss(model_loss): 21 | """ 22 | Returns the minimum validation loss from the 23 | trained model 24 | """ 25 | losses_list = [] 26 | [losses_list.append(x) for x in model_loss] 27 | return np.min(np.array(losses_list)) 28 | 29 | def current_acc(model_acc): 30 | """ 31 | Returns the maximum validation accuracy from the 32 | trained model 33 | """ 34 | accs_list = [] 35 | [accs_list.append(x) for x in model_acc] 36 | return np.min(np.array(accs_list)) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | from os import path 3 | 4 | file_dir = path.abspath(path.dirname(__file__)) 5 | 6 | with open(path.join(file_dir, 'README.md'), "r") as f: 7 | long_description = f.read() 8 | 9 | 10 | version = dict() 11 | with open(path.join(file_dir, 'BiModNeuroCNN/version.py'), "r") as (version_file): 12 | exec(version_file.read(), version) 13 | 14 | 15 | setuptools.setup( 16 | 17 | name = "BiModNeuroCNN", 18 | version = version['__version__'], 19 | 20 | description = "Tools for bimodal training of CNNs, i.e. concurrent training with two data types", 21 | long_description = long_description, 22 | long_description_content_type = "text/markdown", 23 | 24 | url = "git@github.com:cfcooney/BiModNeuroCNN.git", 25 | 26 | author = "Ciaran Cooney", 27 | 28 | license='MIT License', 29 | 30 | install_requires=['braindecode==0.4.85', 'mne', 'numpy', 31 | 'pandas', 'scipy', 'matplotlib',], 32 | 33 | packages = setuptools.find_packages(), 34 | 35 | classifiers = [ 36 | 37 | "Intended Audience :: Developers", 38 | "Intended Audience :: Science/Research", 39 | 'Topic :: Software Development :: Build Tools', 40 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 41 | 42 | 'Programming Language :: Python :: 3.6', 43 | ] 44 | 45 | ) 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **BiModNeuroCNN** 2 | 3 | This is a package for training bimodal deep learning archtectures on dual streams 4 | of neurological data. Package tested on Electroencephalography (EEG) and 5 | function near-infrared stpectroscopy (fNIRS). 6 | 7 | Work in progress - more to be added in future. 8 | 9 | # Installation 10 | 11 | 1. Install PyTorch: http://pytorch.org/ 12 | 2. Install Braindecode: https://github.com/braindecode/braindecode 13 | 14 | 3. Install latest release of BiModNeuroCNN using pip: 15 | ``` 16 | pip install BiModNeuroCNN 17 | ``` 18 | 19 | ## Dataset 20 | Link to dataset to be added upon upcoming publication. 21 | 22 | ## Citing 23 | Paper currently under review. 24 | 25 | Braindecode was used to implement this package: 26 | >@article {HBM:HBM23730, 27 | >author = {Schirrmeister, Robin Tibor and Springenberg, Jost Tobias and Fiederer, 28 | > Lukas Dominique Josef and Glasstetter, Martin and Eggensperger, Katharina and Tangermann, Michael and 29 | > Hutter, Frank and Burgard, Wolfram and Ball, Tonio}, 30 | >title = {Deep learning with convolutional neural networks for EEG decoding and visualization}, 31 | >journal = {Human Brain Mapping}, 32 | >issn = {1097-0193}, 33 | >url = {http://dx.doi.org/10.1002/hbm.23730}, 34 | >doi = {10.1002/hbm.23730}, 35 | >month = {aug}, 36 | >year = {2017}, 37 | >keywords = {electroencephalography, EEG analysis, machine learning, end-to-end learning, brain–machine interface, 38 | > brain–computer interface, model interpretability, brain mapping}, 39 | >} 40 | -------------------------------------------------------------------------------- /BiModNeuroCNN/models/network_utils.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | 3 | def _transpose_time_to_spat(x): 4 | return x.permute(0, 3, 2, 1) 5 | 6 | def tensor_size(x): 7 | print(x.size()) 8 | return x 9 | 10 | def reshape_tensor(x): 11 | x 12 | return x.view(x.size(0),x.size(1)*x.size(2)*1) 13 | 14 | def reshape_output(x): 15 | return x.view(x.size(0),4, 1 ,1) 16 | 17 | def reshape_4_lstm(x): 18 | return x.view(x.size(0),1,x.size(1)) 19 | 20 | def dense_input(x): 21 | return x.size(2) 22 | 23 | def tensor_print(x): 24 | print(x.data.cpu().numpy()) 25 | return x 26 | 27 | def linear_input_shape(x): 28 | print(x.size(1)*x.size(2)) 29 | return x.size(1)*x.size(2) 30 | 31 | def mean_inplace(tensor_1, tensor_2): 32 | """ 33 | function for meaning softmax outputs from two networks, 34 | Cuurently not able to use as inplace changes to the tensor 35 | cause problems with backpropagation 36 | :param tensor_1: 37 | :param tensor_2: 38 | :return: 39 | """ 40 | for i in range(len(tensor_1)): 41 | for j in range(len(tensor_1[i])): 42 | tensor_1[i][j] = (tensor_1[i][j] + tensor_2[i][j]) / 2 43 | return tensor_1 44 | def new_mean(tensor_1, tensor_2): 45 | avg = [] 46 | for sm1, sm2 in zip(tensor_1, tensor_2): 47 | avg.append([(a+b) / 2 for a,b in zip(sm1, sm2)]) 48 | avg = th.tensor(avg, dtype=th.float32).cuda() 49 | return avg 50 | 51 | # remove empty dim at end and potentially remove empty time dim 52 | # do not just use squeeze as we never want to remove first dim 53 | def _squeeze_final_output(x): 54 | #print(x.shape) 55 | assert x.size()[3] == 1 56 | x = x[:, :, :, 0] 57 | if x.size()[2] == 1: 58 | x = x[:, :, 0] 59 | return x 60 | -------------------------------------------------------------------------------- /BiModNeuroCNN/results/dataframe_utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | def results_df(index, index_name, columns_list, column_names): 5 | """ 6 | create tiered dataframe for hyper-parameter results. 7 | """ 8 | assert len(columns_list) == len(column_names), "Unequal length for columns/names!" 9 | miindex = pd.MultiIndex.from_product([index],names=[index_name]) 10 | micol = pd.MultiIndex.from_product(columns_list,names=column_names) 11 | return pd.DataFrame(index=miindex, columns=micol).sort_index().sort_index(axis=1) 12 | 13 | def get_col_list(hyp_params): 14 | """ 15 | returns a list of lists containing hyper-parameters of XD. 16 | parameters 17 | ---------------- 18 | :param: hyp_params (dict) keys: names of hyp_params, values: lists of HP values 19 | """ 20 | y = [] 21 | for n in range(len(list(hyp_params.keys()))): 22 | a = [] 23 | x = hyp_params[list(hyp_params.keys())[n]] 24 | 25 | 26 | if type(x[0]) == tuple: 27 | x1 = [] 28 | for h in x: 29 | x1.append(str(h)) 30 | x = x1 31 | if callable(x[0]): 32 | a.append([x[s].__name__ for s in range(len(x))]) 33 | y.append(a[0]) 34 | else: 35 | y.append(x) 36 | return y 37 | 38 | def param_scores_df(index, hyp_params): 39 | """ 40 | Creates dataframe for storing the mean scores for each hyper-parameter 41 | for each subject. Mean and Std. of each hyper-parameter is then stored for plotting. 42 | """ 43 | columns_list = get_col_list(hyp_params) 44 | columns = list() 45 | for i in range(len(hyp_params)): 46 | for j in range(len(hyp_params[list(hyp_params.keys())[i]])): 47 | columns.append(f'{list(hyp_params.keys())[i]}, {columns_list[i][j]}') 48 | index.append("Mean") 49 | index.append("Std.") 50 | df = pd.DataFrame(index=index, columns=columns) 51 | a = df.columns.str.split(', ', expand=True).values 52 | 53 | #swap values in NaN and replace NAN to '' 54 | df.columns = pd.MultiIndex.from_tuples([('', x[0]) if pd.isnull(x[1]) else x for x in a]) 55 | return df -------------------------------------------------------------------------------- /BiModNeuroCNN/results/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.preprocessing import LabelBinarizer 3 | from sklearn.utils import check_array,check_consistent_length 4 | 5 | def weighted_sum(sample_score, sample_weight, normalize=False): 6 | if normalize: 7 | return np.average(sample_score, weights=sample_weight) 8 | elif sample_weight is not None: 9 | return np.dot(sample_score, sample_weight) 10 | else: 11 | return sample_score.sum() 12 | 13 | def cross_entropy(y_true, y_pred, eps=1e-15, labels=None): 14 | """ 15 | A metric that compares the predicted utterance likelihoods and 16 | the actual utterance identities across all trials for a subject. 17 | Given utterance log-likelihoods predicted by a model, cross entropy 18 | measures the average number of bits required to correctly classify 19 | those utterances. Cross entropy consideres predicted probabilities, not 20 | simply the most likely class for each trial. 21 | -- Lower cross entropy indicates better performance. 22 | :return: loss: float 23 | """ 24 | 25 | y_pred = check_array(y_pred, ensure_2d=False) 26 | 27 | lb = LabelBinarizer() 28 | if labels is not None: 29 | lb.fit(labels) 30 | else: 31 | lb.fit(y_true) 32 | 33 | 34 | if len(lb.classes_) <= 1: 35 | raise ValueError("Only 1 or 0 labels have been provided. Please provide correct labels.") 36 | 37 | transformed_labels = lb.transform(y_true) 38 | if transformed_labels.shape[1] == 1: 39 | transformed_labels = np.append(1 - transformed_labels, 40 | transformed_labels, axis=1) 41 | 42 | 43 | y_pred = np.clip(y_pred, eps, 1 - eps) #clipping required to protect against 1 and 0 probabilities 44 | 45 | transformed_labels = check_array(transformed_labels) 46 | 47 | if len(lb.classes_) != y_pred.shape[1]: 48 | raise ValueError("Ground truth and predictions contain a different number of values!") 49 | 50 | y_pred /= y_pred.sum(axis=1)[:, np.newaxis] 51 | 52 | loss = -(transformed_labels * np.log(y_pred)).sum(axis=1) 53 | 54 | return weighted_sum(loss, None,normalize=True) 55 | 56 | if __name__ == '__main__': 57 | 58 | labels = ['pig','cow','car','bus'] 59 | y_true = [1,2,0,3] 60 | y_pred = [[.1,.5,.2,.2], [.3,.05,.55,.1], [.5,.0,.0,.5], [.0,.35,0,.65]] 61 | 62 | # labels = ['pig'] 63 | # y_true = [0,1] 64 | # y_pred = [7.0722e-01, 2.3728e-05, 1.1968e-04, 2.9264e-01] 65 | 66 | print(cross_entropy(y_true, y_pred, eps=1e-15, labels=None)) 67 | 68 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/python 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python 3 | 4 | ### Python ### 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | pip-wheel-metadata/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | pytestdebug.log 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | db.sqlite3-journal 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | doc/_build/ 79 | 80 | # PyBuilder 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 101 | __pypackages__/ 102 | 103 | # Celery stuff 104 | celerybeat-schedule 105 | celerybeat.pid 106 | 107 | # SageMath parsed files 108 | *.sage.py 109 | 110 | # Environments 111 | .env 112 | .venv 113 | env/ 114 | venv/ 115 | ENV/ 116 | env.bak/ 117 | venv.bak/ 118 | pythonenv* 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | 138 | # pytype static type analyzer 139 | .pytype/ 140 | 141 | # profiling data 142 | .prof 143 | 144 | # End of https://www.toptal.com/developers/gitignore/api/python -------------------------------------------------------------------------------- /BiModNeuroCNN/data_loader/data_utils.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | import pandas as pd 4 | import collections 5 | from braindecode.datautil.signal_target import SignalAndTarget 6 | from imblearn.over_sampling import SMOTE 7 | 8 | def multi_SignalAndTarget(*args): 9 | """ 10 | Returns muliple SignalAndTarget objects from multiple (X,y) data tuples 11 | 12 | :param: *args (tuple) any number of tuples containing data and labels 13 | """ 14 | return_list= [] 15 | for arg in args: 16 | return_list.append(SignalAndTarget(arg[0], arg[1])) 17 | return tuple(return_list) 18 | 19 | def load_pickle(filename): 20 | 21 | with open(filename, 'rb') as f: 22 | file = pickle.load(f) 23 | return file 24 | 25 | def get_class_index_tuples(filename): 26 | """ 27 | Load removed trials from .txt file and reformat into list of tuples (index, class). 28 | Index is the trial number and class is the corresponding class label 29 | 30 | Inputs: filename (str): .txt file containing removed trials. E.g. f"{path}/removedEEG.txt" 31 | Returns: list of tuples (index, class) 32 | """ 33 | class_l, index_l, return_l = [], [], [] 34 | try: 35 | removed_trials= pd.read_csv(filename, header=None).values[0] 36 | 37 | for d in removed_trials: 38 | if type(d) == str: 39 | values = d.replace("(",",").replace(")","") 40 | class_l.append(int(values.split(",")[0])) 41 | index_l.append(int(values.split(",")[1])) 42 | [return_l.append((x,y)) for x,y in zip(index_l,class_l) if (x,y) not in return_l] 43 | return return_l 44 | except: 45 | print("Either no file available or no trials removed: [] returned.") 46 | return [] 47 | 48 | def combine_removed_trials(Rm1, Rm2, names): 49 | removed = collections.namedtuple("removed_samples", names) 50 | return removed(Rm1, Rm2) 51 | 52 | def get_classifier_window(data, start, end, prestim=0.5, sfreq=100): 53 | """ 54 | Similar to in 55 | that it extracts epoched time windows within a trial period. 56 | 57 | :param data: (np.array) n_trials * n_chans * n_samples 58 | :param start: (float) time to begin classification window 59 | :param end: (float) time to end classification window 60 | :param prestim: (float) length of pre-stimulus period in the data 61 | :return: (np.array): n_trials*n_channels*len(classification_window) 62 | """ 63 | 64 | fcn = lambda x : x * sfreq 65 | 66 | start_samples = int(fcn(start)) + int(fcn(prestim)) 67 | end_samples = int(fcn(end)) + int(fcn(prestim)) 68 | classifier_data = data[:,:,start_samples:end_samples] 69 | epoch = classifier_data.shape[2] 70 | 71 | return classifier_data, epoch 72 | 73 | 74 | def smote_augmentation(data, labels, mixing_ratio=2, print_shape=False): 75 | """ 76 | Method for oversampling the number of trials to augment 77 | training data. Shoulf only be used on training data 78 | :input: data (3d array): training data 79 | labels (np.array OR list): class labels 80 | mixing_ratio (int): ratio to oversample - e.g. 2 means 81 | ratio of synthetic data to real data is 2:1 82 | print_shape (bool): command to print oversampled data shape 83 | :return: data_os (ndarray): array with a balanced set of trials 84 | labels_os (np.array): array with a balanced set of labels 85 | """ 86 | unique, counts = np.unique(labels, return_counts=True) 87 | os_value = np.ceil(np.max(counts) * mixing_ratio).astype(np.int32) 88 | 89 | s = SMOTE(sampling_strategy={np.unique(labels)[0]: os_value, np.unique(labels)[1]: os_value, 90 | np.unique(labels)[2]: os_value, np.unique(labels)[3]: os_value}, 91 | random_state=10, k_neighbors=3) 92 | 93 | data_os_2d, labels_os = s.fit_resample(data.reshape((data.shape[0], data.shape[1] * data.shape[2])), labels) 94 | data_os = data_os_2d.reshape((data_os_2d.shape[0], data.shape[1], data.shape[2])) 95 | 96 | if print_shape: 97 | print(f"Oversampled data shape: {data_os.shape}") 98 | 99 | return data_os, labels_os -------------------------------------------------------------------------------- /statistics/anova_2_way.m: -------------------------------------------------------------------------------- 1 | % Ciaran Cooney, 2020 2 | % Script for performing 2-way ANOVA with post-hoc analysis 3 | % using the Tukey Honest Significant Difference criterion. 4 | % 5 | % Data Structure: 6 | % Column Variables 7 | % Row Variables Condition 1 | Condition 2 8 | % Condition 1, Replication 1 10 | 13 9 | % Condition 1, Replication 2 12 | 14 10 | % Condition 2, Replication 1 5 | 5 11 | % Condition 2, Replication 2 7 | 9 12 | % Condition 3, Replication 1 18 | 16 13 | % Condition 3, Replication 2 11 | 10 14 | %% 15 | clear all 16 | path = '/'; 17 | 18 | %y = xlsread([path 'test_data.xlsx'],'2-way', 'B2:C70'); 19 | %path = 'C:/Users/cfcoo/OneDrive - Dundalk Institute of Technology/Study_3/Multimodal/'; 20 | y = xlsread([path 'testing_data.xlsx'],'overt', 'B2:D43'); 21 | 22 | replications = 28; % number of sessions 23 | 24 | [p,tbl,stats] = anova2(y, replications); 25 | tbl 26 | 27 | % Pairwise comparison of the column data 28 | [c, m, h, nms] = multcompare(stats,'alpha',.05,'ctype','hsd'); %p-values returned in 'c' variable. 29 | 30 | figure 31 | % Pairwise comparison of the row data 32 | [cR, mR, hR, nmsR] = multcompare(stats,'alpha',.05,'Estimate','row','ctype','hsd'); %p-values returned in 'c' variable. 33 | 34 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 35 | %% 36 | clear all 37 | path = '/'; 38 | 39 | y = xlsread([path 'results_4_stats.xlsx'],'imagined', 'B2:D43'); 40 | 41 | replications = 21; % number of sessions 42 | 43 | [p,tbl,stats] = anova2(y, replications); 44 | tbl 45 | 46 | % Pairwise comparison of the column data 47 | [c, m, h, nms] = multcompare(stats,'alpha',.05,'ctype','hsd'); %p-values returned in 'c' variable. 48 | 49 | figure 50 | % Pairwise comparison of the row data 51 | [cR, mR, hR, nmsR] = multcompare(stats,'alpha',.05,'Estimate','row','ctype','hsd'); %p-values returned in 'c' variable. 52 | 53 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 54 | %% 55 | clear all 56 | path = '/'; 57 | 58 | y = xlsread([path 'results_4_stats.xlsx'],'bim_eeg_ov', 'B2:c169'); 59 | 60 | replications = 28; % number of sessions 61 | 62 | [p,tbl,stats] = anova2(y, replications); 63 | tbl 64 | 65 | % Pairwise comparison of the column data 66 | [c, m, h, nms] = multcompare(stats,'alpha',.05,'ctype','hsd'); %p-values returned in 'c' variable. 67 | 68 | figure 69 | % Pairwise comparison of the row data 70 | [cR, mR, hR, nmsR] = multcompare(stats,'alpha',.05,'Estimate','row','ctype','hsd'); %p-values returned in 'c' variable. 71 | 72 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 73 | %% 74 | clear all 75 | path = '/'; 76 | 77 | y = xlsread([path 'results_4_stats.xlsx'],'bim_eeg_im', 'B2:c127'); 78 | 79 | replications = 21; % number of sessions 80 | 81 | [p,tbl,stats] = anova2(y, replications); 82 | tbl 83 | 84 | % Pairwise comparison of the column data 85 | [c, m, h, nms] = multcompare(stats,'alpha',.05,'ctype','hsd'); %p-values returned in 'c' variable. 86 | 87 | figure 88 | % Pairwise comparison of the row data 89 | [cR, mR, hR, nmsR] = multcompare(stats,'alpha',.05,'Estimate','row','ctype','hsd'); %p-values returned in 'c' variable. 90 | 91 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 92 | %% 93 | %% 94 | clear all 95 | path = '/'; 96 | 97 | y = xlsread([path 'results_4_stats.xlsx'],'bim_fnirs_ov', 'B2:c169'); 98 | 99 | replications = 28; % number of sessions 100 | 101 | [p,tbl,stats] = anova2(y, replications); 102 | tbl 103 | 104 | % Pairwise comparison of the column data 105 | [c, m, h, nms] = multcompare(stats,'alpha',.05,'ctype','hsd'); %p-values returned in 'c' variable. 106 | 107 | figure 108 | % Pairwise comparison of the row data 109 | [cR, mR, hR, nmsR] = multcompare(stats,'alpha',.05,'Estimate','row','ctype','hsd'); %p-values returned in 'c' variable. 110 | 111 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 112 | %% 113 | clear all 114 | path = '/'; 115 | 116 | y = xlsread([path 'results_4_stats.xlsx'],'bim_fnirs_im', 'B2:c127'); 117 | 118 | replications = 21; % number of sessions 119 | 120 | [p,tbl,stats] = anova2(y, replications); 121 | tbl 122 | 123 | % Pairwise comparison of the column data 124 | [c, m, h, nms] = multcompare(stats,'alpha',.05,'ctype','hsd'); %p-values returned in 'c' variable. 125 | 126 | figure 127 | % Pairwise comparison of the row data 128 | [cR, mR, hR, nmsR] = multcompare(stats,'alpha',.05,'Estimate','row','ctype','hsd'); %p-values returned in 'c' variable. 129 | -------------------------------------------------------------------------------- /BiModNeuroCNN/data_loader/data_loader.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.io as spio 3 | import pickle 4 | from BiModNeuroCNN.utils import timer 5 | from BiModNeuroCNN.data_loader.data_utils import load_pickle 6 | 7 | 8 | class Loader: 9 | 10 | def __init__(self): 11 | self.data1 = np.array([]) 12 | self.labels1 = [] 13 | self.data2 = np.array([]) 14 | self.labels2 = [] 15 | self.combined_data = None 16 | self.datatype1 = None 17 | self.datatype2 = None 18 | 19 | def __repr__(self): 20 | return f"" 21 | 22 | def __str__(self): 23 | return f"Class for loading two different data types" 24 | 25 | def __getattr__(self, attr): 26 | if attr == "state": 27 | return f"Data 1:{not self.data1.size==0} : Labels 1:{not self.labels1==[]} :\ 28 | Data 2:{not self.data2.size==0} : Labels 2:{not self.labels2==[]}" 29 | if attr == "processed": 30 | pass 31 | 32 | def __setattr__(self, name, value): 33 | if name == "datatypes": 34 | self.datatype1 = value[0] 35 | self.datatype2 = value[1] 36 | else: 37 | super().__setattr__(name, value) 38 | 39 | @timer 40 | def loadmat(self, datafile, labelsfile=None): 41 | """ 42 | Load previously-validated EEG data and labels in the form of a .mat file. 43 | 44 | params: datafile (str): location and name of file containing data 45 | params: labelsfile (str): location and name of separate file containing labels 46 | 47 | Returns: n_trial * n_chans * n_samples Numpy array contianing EEG data. 48 | list containing labels for all trials. 49 | """ 50 | data = spio.loadmat(f"{datafile}.mat") 51 | data = data[list(data.keys())[3]] 52 | 53 | if labelsfile != None: 54 | labels = spio.loadmat(f"{labelsfile}.mat") 55 | labels = labels[list(labels.keys())[3]] 56 | 57 | if self.data1.size == 0: 58 | self.data1 = data 59 | self.labels1 = labels[0] 60 | elif self.data2.size == 0: 61 | self.data2 = data 62 | self.labels2 = labels[0] 63 | else: 64 | raise AttributeError("Maximum 2 data types already loaded") 65 | 66 | @timer 67 | def loadMNE(self, filename, data_tag='EEG', label_tag='labels', load_labels=True): 68 | """ 69 | Load previously-validated EEG data and labels in the form of an MNE Raw Array. 70 | 71 | Returns: n_chans * n_samples Numpy array contianing EEG data. 72 | data3D: n_trials * n_chans * n_samples reshaped EEG data. 73 | Numpy array containing labels for all trials. 74 | """ 75 | mnePickle = load_pickle(filename) 76 | data = mnePickle[data_tag].get_data()[:,:-1,:] #remove trigger channel from data 77 | if load_labels: 78 | labels = mnePickle[label_tag] 79 | 80 | if self.data1.size == 0: 81 | self.data1 = data 82 | self.labels1 = labels 83 | elif self.data2.size == 0: 84 | self.data2 = data 85 | self.labels2 = labels 86 | else: 87 | raise AttributeError("Maximum 2 data types already loaded") 88 | 89 | @timer 90 | def combine_data(self): 91 | """ 92 | Combine two data types into single np.array. Useful option for combined classification. 93 | Number of trials, channels and samples must be equal. 94 | 95 | Returns: n_trials * n_chans * n_samples Numpy array contianing data. 96 | """ 97 | assert self.data1 is not None, "No data loaded for set 1!" 98 | assert self.data2 is not None, "No data loaded for set 2!" 99 | 100 | assert self.data1.shape[0] == self.data2.shape[0], "Number of trials must be identical!" 101 | assert self.data1.shape[2] == self.data2.shape[2], "NUmber of samples must be identical!" 102 | 103 | self.combined_data = np.concatenate((self.data1, self.data2), axis=1) 104 | assert self.combined_data.shape[1] == self.data1.shape[1] + self.data1.shape[1], "Axis 1 should be sum of EEG and fNIRS Axis 1" 105 | 106 | 107 | @staticmethod 108 | def match_removed_trials(data1, labels1, data2, labels2, total_labels, removed_all, print_result=True): 109 | """ 110 | Ensure that samples in two data types are correctly aligned by removed rejected trials from both. 111 | 112 | Inputs: data1 (np.ndarray): one of the two multimodal data types 113 | data2 (np.ndarray): one of the two multimodal data types 114 | total_labels (np.array || list): all class labels from entire dataset 115 | removed_trials_df (pd.DataFrame): 2 rows containing class and index of removed trials 116 | labels: (np.array || list): labels associated with the specific classes of data1 and data2 117 | Returns: data1 (np.array): data1 == in dimensions to data2 118 | labels1 (np.array || list): data1 == data2 119 | data2 (np.array): data2 == in dimensions to data1 120 | labels2 (np.array || list): data2 == data1 121 | """ 122 | 123 | placeholder_data1 = np.zeros((data1.shape[1],data1.shape[2])) 124 | placeholder_data2 = np.zeros((data2.shape[1],data1.shape[2])) 125 | 126 | for tup1 in removed_all.data_1: 127 | labels1 = np.insert(labels1,tup1[0],tup1[1]) 128 | data1 = np.insert(data1, tup1[0], placeholder_data1, axis=0) 129 | 130 | for tup2 in removed_all.data_2: 131 | labels2 = np.insert(labels2,tup2[0],tup2[1]) 132 | data2 = np.insert(data2, tup2[0], placeholder_data2, axis=0) 133 | 134 | combined_tups = removed_all.data_1 135 | for t in removed_all.data_2: 136 | if t not in combined_tups: 137 | combined_tups.append(t) 138 | removal_index = [] 139 | for i in combined_tups: 140 | removal_index.append(i[0]) 141 | removal_index = list(reversed(np.sort(removal_index))) 142 | 143 | for idx in removal_index: 144 | total_labels = np.delete(total_labels, idx) 145 | data1 = np.delete(data1, idx, axis=0) 146 | labels1 = np.delete(labels1, idx) 147 | data2 = np.delete(data2, idx, axis=0) 148 | labels2 = np.delete(labels2, idx) 149 | 150 | if print_result: 151 | _, counts = np.unique(total_labels, return_counts=True) 152 | print(f"Total: {counts}") 153 | _, counts = np.unique(labels1, return_counts=True) 154 | print(f"EEG: {counts}") 155 | _, counts = np.unique(labels2, return_counts=True) 156 | print(f"fNIRS: {counts}") 157 | 158 | return data1, labels1, data2, labels2 -------------------------------------------------------------------------------- /BiModNeuroCNN/subjects/subjects.py: -------------------------------------------------------------------------------- 1 | from BiModNeuroCNN.utils import timer, labels_dict_and_list 2 | from BiModNeuroCNN.data_loader.data_loader import Loader 3 | from scipy.signal import decimate as dec 4 | from tensorflow.keras.utils import normalize 5 | from scipy.signal import butter, lfilter 6 | import numpy as np 7 | import pandas as pd 8 | import scipy.io as spio 9 | import pickle 10 | import warnings 11 | import os 12 | warnings.filterwarnings('ignore', category=FutureWarning) 13 | 14 | 15 | class Subject(Loader): 16 | 17 | direct = 'C:/Users/cfcoo/OneDrive - Ulster University/Study_3/Subject_Data' 18 | 19 | def __init__(self, id): 20 | 21 | super().__init__() 22 | 23 | self.id = id 24 | 25 | self.channels_validated = False 26 | self.trials_validated = False 27 | self.description = None 28 | self.data_loaded = False 29 | 30 | self.data1 = np.array([]) 31 | self.data2 = np.array([]) 32 | self.data_combined = None 33 | self.labels1 = [] 34 | self.labels2 = [] 35 | self.labels_combined = None 36 | 37 | self.epoched_data1 = None 38 | self.epoched_data2 = None 39 | self.classifier_start = 0 40 | self.classifier_end = 0 41 | 42 | self.classnames = [] 43 | 44 | self.sfreq1 = 0 45 | self.sfreq2 = 0 46 | self.lowcut = 0 47 | self.highcut = 0 48 | self.downsample_rate1 = 2 49 | self.downsample_rate2 = 2 50 | self.downsampled = [False, False] 51 | self.normalized = [False, False] 52 | self.filtered = [False, False] 53 | 54 | def __repr__(self): 55 | return f"" 56 | 57 | def __str__(self): 58 | return f"Class for creating subject-specific objects for multi-subject experiments." 59 | 60 | # def __getattr__(self, attr): 61 | # pass 62 | 63 | # def __setattr__(self, name, value): 64 | # pass 65 | 66 | def set_description(self, description): 67 | self.description = description 68 | 69 | def get_description(self): 70 | return self.description 71 | 72 | def change_directory(self, new_direct): 73 | self.direct = new_direct 74 | 75 | def set_channel_validation(self, validated): 76 | assert type(validated) == bool 77 | self.channels_validated = validated 78 | 79 | def get_channel_validation(self): 80 | return self.channels_validated 81 | 82 | def set_trial_validation(self, validated): 83 | assert type(validated) == bool 84 | self.trials_validated = validated 85 | 86 | def get_trial_validation(self): 87 | return self.trials_validated 88 | 89 | def get_classifier_window(self, start, end, data1=True, data2=True, prestim=0.5, sfreq1=100, sfreq2=100): 90 | """ 91 | Epoch the time-period within each trial to extract a specfic window for analysis. 92 | 93 | :param start: (float) time to begin classification window 94 | :param end: (float) time to end classification window 95 | :param data1 (bool) whether to apply method to self.data1 96 | :param data2 (bool) whether to apply method to self.data2 97 | :param prestim: (float) length of pre-stimulus period in the data 98 | :param sfreq1: (int) sampling frequency of self.data1 99 | :param sfreq2: (int) sampling frequency of self.data2 100 | :return: (np.array): n_trials*n_channels*len(classification_window) 101 | """ 102 | if data1 == False and data2 == False: 103 | raise ValueError(f"Require at least one data type to be True: data1:{data1}, data2:{data2}") 104 | else: 105 | self.classifier_start = start 106 | self.classifier_end = end 107 | if data1: 108 | fcn = lambda x : x * sfreq1 109 | 110 | start_samples = int(fcn(start)) + int(fcn(prestim)) 111 | end_samples = int(fcn(end)) + int(fcn(prestim)) 112 | 113 | self.epoched_data1 = self.data1[:,:,start_samples:end_samples] 114 | if data2: 115 | fcn = lambda x : x * sfreq2 116 | 117 | start_samples = int(fcn(start)) + int(fcn(prestim)) 118 | end_samples = int(fcn(end)) + int(fcn(prestim)) 119 | 120 | self.epoched_data2 = self.data2[:,:,start_samples:end_samples] 121 | 122 | def bandpass(self, lowcut, highcut, order, data1=True, data2=False, sfreq1=100, sfreq2=100): 123 | """ 124 | Bandpass filter the data with butterworth filter. Use for EEG data 125 | 126 | :params: lowcut (float): low-pass cutoff frequency 127 | :params: highcut (float): high-pass cutoff frequency 128 | :params: order (int): Butterworth filter order number - see scipy docs. 129 | :params: data1 (bool): filter data1 or not 130 | :params: data2 (bool): filter data2 or not 131 | :params: sfreq1: (int) sampling frequency of self.data1 132 | :params: sfreq2: (int) sampling frequency of self.data2 133 | Returns: n_trial * n_chans * n_samples Numpy array contianing filtered data. 134 | """ 135 | if data1 == False and data2 == False: 136 | raise ValueError(f"Require at least one data type to be True: data1:{data1}, data2:{data2}") 137 | else: 138 | self.lowcut = lowcut 139 | self.highcut = highcut 140 | if data1: 141 | self.sfreq1 = sfreq1 142 | nyq = 0.5 * sfreq1 143 | low = lowcut / nyq 144 | high = highcut / nyq 145 | b, a = butter(order, [low, high], btype='band') 146 | self.data1 = lfilter(b, a, self.data1) 147 | self.filtered[0] = True 148 | if data2: 149 | self.sfreq2 = sfreq2 150 | nyq = 0.5 * sfreq2 151 | low = lowcut / nyq 152 | high = highcut / nyq 153 | b, a = butter(order, [low, high], btype='band') 154 | self.data2 = lfilter(b, a, self.data2) 155 | self.filtered[1] = True 156 | 157 | def down_and_normal(self, data1=True, data2=False, downsample_rate1=2, downsample_rate2=2, norm=True): 158 | """ 159 | Downsample and normalize the data. 160 | 161 | :params: data1 (bool): apply to data1 or not 162 | :params: data2 (bool): apply to data2 or not 163 | :params: downsample_rate1 (int): downsample rate. 164 | :params: downsample_rate2 (int): downsample rate. 165 | :params: norm: (bool) to normalize or not to normalize. 166 | Returns: n_trial * n_chans * n_samples Numpy array containing downsampled and/or normalized data. 167 | """ 168 | if data1 == False and data2 == False: 169 | raise ValueError(f"Require at least one data type to be True: data1:{data1}, data2:{data2}") 170 | else: 171 | fnc = lambda a: a * 1e6 # improves numerical stability 172 | if data1: 173 | self.downsample_rate1 = downsample_rate1 174 | if self.downsample_rate1 > 1: 175 | self.data1 = dec(self.data1, downsample_rate1) 176 | self.downsampled[0] = True 177 | 178 | self.data1 = fnc(self.data1) 179 | if norm: 180 | self.data1 = normalize(self.data1) 181 | self.normalized[0] = True 182 | 183 | if data2: 184 | self.downsample_rate2 = downsample_rate2 185 | if self.downsample_rate2 > 1: 186 | self.data2 = dec(self.data2, downsample_rate2) 187 | self.downsampled[1] = True 188 | 189 | self.data2 = fnc(self.data2) 190 | if norm: 191 | self.data2 = normalize(self.data2) 192 | self.normalized[1] = True 193 | 194 | 195 | def get_classnames(self, classes): 196 | """ 197 | Returns sub-group of classnames from a global list of class names. List of 198 | class names passed as a pd.DataFrame with column names == class names 199 | labels corresponding to trials are associated with values in a dict. 200 | :return: list of class names to object 201 | """ 202 | labels_dict, _ = labels_dict_and_list(classes) 203 | for i in np.unique(self.labels1): 204 | self.classnames.append(labels_dict[str(i)]) 205 | 206 | def clear_data(self): 207 | """ 208 | Reset to empty data structures. 209 | """ 210 | self.data1 = np.array([]) 211 | self.data2 = np.array([]) 212 | self.data_combined = None 213 | self.labels1 = [] 214 | self.labels2 = [] 215 | self.labels_combined = None 216 | 217 | self.epoched_data1 = None 218 | self.epoched_data2 = None 219 | 220 | def save_subject(self, path, filename): 221 | """ 222 | Save the subject object as a pickle. 223 | 224 | :param path: (str) path to saving directory 225 | :param filename: (str) name to save object as 226 | """ 227 | if not os.path.exists(path): 228 | print("Creating new subject file...") 229 | os.makedirs(path) 230 | filename = f"{path}/{filename}.pickle" 231 | filehandler = open(filename, 'wb') 232 | pickle.dump(self.__dict__, filehandler, protocol=pickle.HIGHEST_PROTOCOL) 233 | print(f"Data object saved to: '{filename}'\n") 234 | 235 | def update(self,newdata): 236 | for key,value in newdata.items(): 237 | setattr(self,key,value) 238 | 239 | @classmethod 240 | def load_subject(self, f_name): 241 | with open(f_name, 'rb') as f: 242 | tmp_dict = pickle.load(f) 243 | f.close() 244 | self.update(self, tmp_dict) 245 | return self 246 | 247 | def get_details(self): 248 | print(f"Subject: {self.id}") 249 | print("-"*15) 250 | print(self.description) 251 | print("-"*15) 252 | if self.data1.size != 0: 253 | print(f"Data 1 shape: {self.data1.shape}") 254 | print(f"Labels 1 shape: {len(self.labels1)}") 255 | print(f"Class names: {self.classnames}") 256 | print(f"Number of valid channels: {self.data1.shape[1]}") 257 | print(f"Sampling Frequency: {self.sfreq1} Hz") 258 | print(f"Data downsampled: {self.downsampled[0]}") 259 | if self.downsampled[0]: 260 | print(f"Downsample Rate: {self.downsample_rate1}") 261 | if self.normalized[0]: 262 | print(f"Data normalized: {self.normalized[0]}") 263 | if self.filtered[0]: 264 | print(f"Data bandpass filtered between {self.lowcut} and {self.highcut} Hz") 265 | if self.epoched_data1 is not None: 266 | print(f"Classifier Window Size: {self.epoched_data1.shape}") 267 | print(f"Classifier Start Time: {self.classifier_start} seconds") 268 | print(f"Classifier End Time: {self.classifier_end} seconds\n") 269 | if self.data1.size != 0: 270 | print(f"Data 2 shape: {self.data2.shape}") 271 | print(f"Labels 2 shape: {len(self.labels2)}") 272 | print(f"Class names: {self.classnames}") 273 | print(f"Number of valid channels: {self.data2.shape[1]}") 274 | print(f"Sampling Frequency: {self.sfreq2} Hz") 275 | print(f"Data downsampled: {self.downsampled[1]}") 276 | if self.downsampled[1]: 277 | print(f"Downsample Rate: {self.downsample_rate1}") 278 | print(f"Data normalized: {self.normalized[1]}") 279 | if self.filtered[1]: 280 | print(f"Data bandpass filtered between {self.lowcut} and {self.highcut} Hz") 281 | if self.epoched_data2 is not None: 282 | print(f"Classifier Window Size: {self.epoched_data1.shape}") -------------------------------------------------------------------------------- /training_demo_nCV.PY: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from BiModNeuroCNN.results.results import Results 4 | from BiModNeuroCNN.training.bimodal_classification import Classification 5 | from BiModNeuroCNN.utils import load_subject, format_data, timer, get_model_loss_and_acc, windows_index 6 | from BiModNeuroCNN.data_loader.data_loader import Loader 7 | from BiModNeuroCNN.data_loader.data_utils import get_class_index_tuples, combine_removed_trials 8 | from BiModNeuroCNN.data_loader.utils1 import subject_data_loader 9 | from BiModNeuroCNN.models.bimodal_cnn import BiModalNet 10 | from BiModNeuroCNN.models.bimodal_cnn_pooling import BiModalNet_w_Pool 11 | from torch.optim.lr_scheduler import MultiStepLR, StepLR 12 | from braindecode.datautil.signal_target import SignalAndTarget 13 | from sklearn.model_selection import StratifiedKFold 14 | from braindecode.experiments.monitors import LossMonitor, MisclassMonitor, RuntimeMonitor 15 | from braindecode.torch_ext.constraints import MaxNormDefaultConstraint 16 | from braindecode.torch_ext.functions import safe_log 17 | from sklearn.metrics import accuracy_score 18 | import warnings 19 | warnings.filterwarnings('ignore', category=FutureWarning) 20 | import logging 21 | import sys 22 | import torch as th 23 | import os 24 | 25 | th.backends.cudnn.deterministic = True 26 | log = logging.getLogger() 27 | logging.basicConfig(format='%(asctime)s %(levelname)s : %(message)s', 28 | level=logging.INFO, stream=sys.stdout) 29 | 30 | 31 | WINDOW_LEN = 200 32 | OVERLAP = 150 33 | windows = windows_index(500,WINDOW_LEN,OVERLAP,250) 34 | 35 | hyp_params = dict(window=windows[:2], 36 | activation=["leaky_relu"], 37 | structure= ["shallow"]) 38 | 39 | 40 | parameters = dict(best_loss = 100.0, 41 | batch_size = 32, 42 | monitors = [LossMonitor(), MisclassMonitor(), RuntimeMonitor()], 43 | model_constraint = MaxNormDefaultConstraint(), 44 | max_increase_epochs = 0, 45 | cuda = True, 46 | epochs=1, 47 | learning_rate_scheduler=StepLR, 48 | lr_step=20, lr_gamma=0.9) 49 | 50 | 51 | 52 | EEGSubNet_params = dict(n_filters_time=40, filter_time_length=5, n_filters_spat=40, n_filters_2=20, filter_length_2=20, 53 | pool_time_length_1=5, pool_time_stride_1=2, pool_length_2=5, pool_stride_2=3, final_conv_length='auto', 54 | conv_nonlin=th.nn.functional.leaky_relu, pool_mode='mean', pool_nonlin=safe_log, 55 | split_first_layer=True, batch_norm=True, batch_norm_alpha=0.2, 56 | drop_prob=0.1) 57 | 58 | fNIRSSubNet_params = dict(n_filters_time=40, filter_time_length=5, n_filters_spat=40, n_filters_2=20, filter_length_2=20, 59 | pool_time_length_1=5, pool_time_stride_1=2, pool_length_2=5, pool_stride_2=3, final_conv_length='auto', 60 | conv_nonlin=th.nn.functional.leaky_relu, pool_mode='mean', pool_nonlin=safe_log, 61 | split_first_layer=True, batch_norm=True, batch_norm_alpha=0.2, 62 | drop_prob=0.1) 63 | 64 | 65 | @timer 66 | def train_nested_cv(data1, labels1, data2, labels2, model, rm1_file, rm2_file, subnet1_params, 67 | subnet2_params, directory, hyps, params, labels_dict): 68 | 69 | 70 | unique = np.unique(labels1, return_counts=False) 71 | 72 | num_folds = 5 73 | skf = StratifiedKFold(n_splits=num_folds, shuffle=False, 74 | random_state=10) # don't randomize trials to preserce structure 75 | 76 | 77 | subj_results = Results(directory, num_folds, 'test') # results structure 78 | subj_results.get_acc_loss_df(hyps, 'Fold') # empty dataframe headed with each HP set 79 | 80 | 81 | ##### Match Removed Trials ##### 82 | d1Rem = get_class_index_tuples(rm1_file) 83 | d2Rem = get_class_index_tuples(rm2_file) 84 | names = 'data_1,data_2' 85 | removed_all = combine_removed_trials(d1Rem, d2Rem, names) 86 | 87 | # Remove bad trials from both datasets and align 88 | data1_matched, labels1_matched, data2_matched, labels2_matched = Loader.match_removed_trials(data1, labels1, data2, 89 | labels2, total_labels, 90 | removed_all, print_result=False) 91 | 92 | subtr_ceoff = np.min(labels1_matched) # required to set labels from zero 93 | fcn = lambda l: l - subtr_ceoff 94 | labels1_matched = fcn(labels1_matched) 95 | labels2_matched = fcn(labels2_matched) 96 | 97 | assert labels1_matched.all() == labels2_matched.all(), f"Order of trial labels must be identical!" 98 | 99 | 100 | data_params = dict(n_classes=len(unique), 101 | n_chans_d1=data1_matched.shape[1], 102 | input_time_length_d1=WINDOW_LEN, 103 | n_chans_d2=data2_matched.shape[1], 104 | input_time_length_d2=WINDOW_LEN) 105 | 106 | clf = Classification(model, subnet1_params, subnet2_params, hyps, params, data_params, path3, "package_test") 107 | 108 | subj_results.y_true = np.array([]) 109 | trainsetlist, testsetlist, inner_fold_acc, inner_fold_loss, inner_fold_CE = ([] for i in range(5)) 110 | 111 | print(f"Inner-fold training for Subject {subject} in progress...") 112 | 113 | for inner_ind, outer_index in skf.split(data1_matched, labels1_matched): 114 | 115 | data1_matched_if, data1_matched_of, data2_matched_if, data2_matched_of = data1_matched[inner_ind], data1_matched[outer_index], \ 116 | data2_matched[inner_ind], data2_matched[outer_index] 117 | inner_labels, outer_labels = labels1_matched[inner_ind], labels1_matched[outer_index] 118 | subj_results.concat_y_true(outer_labels) 119 | 120 | print(data1_matched_if.shape, data2_matched_if.shape) 121 | 122 | trainsetlist.append((SignalAndTarget(data1_matched_if, inner_labels), SignalAndTarget(data2_matched_if, inner_labels))) # used for outer-fold train/test 123 | testsetlist.append((SignalAndTarget(data1_matched_of, outer_labels), SignalAndTarget(data2_matched_of, outer_labels))) 124 | 125 | for train_idx, valid_idx in skf.split(data1_matched_if, inner_labels): 126 | 127 | d1_train, d1_val, d2_train, d2_val = data1_matched_if[train_idx], data1_matched_if[valid_idx], \ 128 | data2_matched_if[train_idx], data2_matched_if[valid_idx] 129 | y_train, y_val = inner_labels[train_idx], inner_labels[valid_idx] 130 | 131 | train_set_1 = SignalAndTarget(d1_train, y_train) 132 | val_set_1 = SignalAndTarget(d1_val, y_val) 133 | train_set_2 = SignalAndTarget(d2_train, y_train) 134 | val_set_2 = SignalAndTarget(d2_val, y_val) 135 | 136 | hyp_param_acc, hyp_param_loss, hyp_param_CE = clf.train_inner(train_set_1, val_set_1, train_set_2, val_set_2, 137 | test_set_1=None, test_set_2=None, save_model=False) 138 | 139 | 140 | inner_fold_loss.append(hyp_param_loss) #5 outer folds * 5 inner folds * number of HPs 141 | inner_fold_acc.append(hyp_param_acc) 142 | inner_fold_CE.append(hyp_param_CE) 143 | 144 | subj_results.fill_acc_loss_df(inner_fold_loss, inner_fold_acc, inner_fold_CE) 145 | 146 | subj_results.get_best_params("accuracy") 147 | print(f"best params: {subj_results.best_params}") 148 | clf.best_params = subj_results.best_params 149 | clf.set_best_params() 150 | 151 | # accuracy score for each fold, combined predictions for each fold 152 | scores, fold_models, predictions, probabilities, outer_cross_entropy, y_true = clf.train_outer(trainsetlist, 153 | testsetlist, 154 | False, print_details=True) 155 | 156 | print(f"Accuracy: {round((accuracy_score(y_true, predictions) * 100), 3)}") 157 | 158 | subj_results.outer_fold_accuracies = scores 159 | subj_results.y_pred = np.array(predictions) 160 | subj_results.y_probs = np.array(probabilities) 161 | subj_results.outer_fold_cross_entropies = outer_cross_entropy 162 | 163 | subj_results.train_loss, subj_results.valid_loss, subj_results.test_loss, subj_results.train_acc, subj_results.valid_acc, subj_results.test_acc = get_model_loss_and_acc( 164 | fold_models) 165 | try: 166 | subj_results.save_result() 167 | except BaseException: 168 | print(f"Unable to save results for Subject: {subj} / Session: {session} - {category}") 169 | try: 170 | subj_results.subject_stats() 171 | print("") 172 | print(subj_results.subject_stats_df.head()) 173 | 174 | subj_results.get_accuracy() 175 | #print(f"Mean Accuracy: {subj_results.accuracy}") 176 | except ValueError: 177 | print(f"Unable to store subject stats as excel file - see the subject results pickle.") 178 | pass 179 | 180 | 181 | if __name__ == '__main__': 182 | 183 | directory = 'BiModNeuroCNN/data/' 184 | 185 | subjects = ['11'] 186 | sessions = [1] 187 | 188 | save_dir = directory # chance to suitable storage directory 189 | 190 | labels_dict = dict(actionText=[1, 2, 3,4], combsText=[5, 6, 7, 8], 191 | actionImage=[9, 10, 11, 12], combsImage=[13, 14, 15,16], 192 | actionAudio=[17, 18, 19, 20], combsAudio=[21, 22, 23, 24]) 193 | 194 | categories = ["actionImage"] 195 | 196 | model = BiModalNet 197 | 198 | for subject in subjects: 199 | path1 = f"{save_dir}/S{subject}" 200 | 201 | if not os.path.exists(path1): 202 | os.makedirs(path1) 203 | 204 | for session in sessions: 205 | path2 = f"{path1}/Session_{session}" 206 | 207 | total_labels = pd.read_csv(f"{directory}/S{subject}/Session_{session}/total_labels.txt", header=None).values[0] 208 | removed_labels_1 = "{directory}/S{subj}/Session_{session}/removedEEG.txt" 209 | removed_labels_2 = "{directory}/S{subj}/Session_{session}/removedEEG.txt" 210 | 211 | if not os.path.exists(path2): 212 | os.makedirs(path2) 213 | 214 | for category in categories: 215 | path3 = f"{path2}/{category}" 216 | 217 | if not os.path.exists(path3): 218 | os.makedirs(path3) 219 | 220 | f_name_1 = f"{directory}/S{subject}/Session_{session}/classifierData/{category}_EEG_CLF" 221 | f_name_2 = f"{directory}/S{subject}/Session_{session}/classifierData/{category}_fNIRS_CLF_0" 222 | data_1, labels_1 = subject_data_loader(f_name_1) 223 | data_2, labels_2 = subject_data_loader(f_name_2) 224 | 225 | train_nested_cv(data_1, labels_1, data_2, labels_2, model, removed_labels_1, removed_labels_2, EEGSubNet_params, 226 | fNIRSSubNet_params, directory, hyp_params, parameters, labels_dict) 227 | 228 | 229 | -------------------------------------------------------------------------------- /BiModNeuroCNN/models/bimodal_cnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch as th 3 | from torch import nn 4 | from torch.nn import init 5 | from braindecode.models.base import BaseModel 6 | from braindecode.torch_ext.modules import Expression 7 | from braindecode.torch_ext.functions import safe_log, square 8 | from BiModNeuroCNN.models.network_utils import reshape_tensor, reshape_4_lstm, _transpose_time_to_spat, tensor_size 9 | 10 | class SubNet(BaseModel): 11 | """ 12 | Temporal-Spatial first layer based on [2] 13 | 14 | References 15 | ---------- 16 | 17 | .. [2] Schirrmeister, R. T., Springenberg, J. T., Fiederer, L. D. J., 18 | Glasstetter, M., Eggensperger, K., Tangermann, M., Hutter, F. & Ball, T. (2017). 19 | Deep learning with convolutional neural networks for EEG decoding and 20 | visualization. 21 | Human Brain Mapping , Aug. 2017. Online: http://dx.doi.org/10.1002/hbm.23730 22 | """ 23 | 24 | def __init__( 25 | self, 26 | in_chans, 27 | n_classes, 28 | input_time_length=None, 29 | n_filters_time=40, 30 | filter_time_length=25, 31 | n_filters_spat=40, 32 | n_filters_2=10, 33 | filter_length_2=10, 34 | pool_time_length=25, 35 | pool_time_stride=15, 36 | final_conv_length=30, 37 | conv_nonlin=square, 38 | pool_mode="mean", 39 | pool_nonlin=safe_log, 40 | later_nonlin=None, 41 | later_pool_nonlin=nn.functional.leaky_relu, 42 | split_first_layer=True, 43 | batch_norm=True, 44 | batch_norm_alpha=0.1, 45 | drop_prob=0.1, 46 | stride_before_pool=False, 47 | structure = "shallow", 48 | fc1_out_features = 500, 49 | fc2_out_features = 500, 50 | ): 51 | if final_conv_length == "auto": 52 | assert input_time_length is not None 53 | self.__dict__.update(locals()) 54 | del self.self 55 | 56 | def create_network(self): 57 | if self.stride_before_pool: 58 | conv_stride = self.pool_time_stride 59 | else: 60 | conv_stride = 1 61 | model = nn.Sequential() 62 | 63 | if self.split_first_layer: 64 | model.add_module("dimshuffle", Expression(_transpose_time_to_spat)) 65 | model.add_module("conv_time", nn.Conv2d(1, self.n_filters_time, (self.filter_time_length, 1), 66 | stride=1, ), ) 67 | model.add_module("conv_spat", nn.Conv2d(self.n_filters_time, self.n_filters_spat, 68 | (1, self.in_chans), stride=1, bias=not self.batch_norm, ),) 69 | n_filters_conv = self.n_filters_spat 70 | n_filters_op = self.n_filters_spat * (self.input_time_length - 4) # semi-hardcoded at the moment 71 | else: 72 | model.add_module("conv_time", nn.Conv2d(self.in_chans, self.n_filters_time, 73 | (self.filter_time_length, 1), stride=1, 74 | bias=not self.batch_norm, ), ) 75 | n_filters_conv = self.n_filters_time 76 | n_filters_op = self.n_filters_time * (self.input_time_length - 4) # semi-hardcoded at the moment 77 | 78 | if self.batch_norm: 79 | model.add_module("bnorm", nn.BatchNorm2d(n_filters_conv, momentum=self.batch_norm_alpha, 80 | affine=True), ) 81 | model.add_module("conv_nonlin", Expression(self.conv_nonlin)) 82 | model.add_module("drop", nn.Dropout(p=self.drop_prob)) 83 | 84 | def add_conv_pool_block(model, n_filters_before, 85 | n_filters, filter_length, block_nr): 86 | 87 | model.add_module(f"conv_{block_nr}", nn.Conv2d(n_filters_before, n_filters, 88 | (filter_length, 1), stride=(conv_stride, 1), 89 | bias=not self.batch_norm)) 90 | 91 | if self.batch_norm: 92 | model.add_module(f"bnorm_{block_nr}", nn.BatchNorm2d(n_filters, 93 | momentum=self.batch_norm_alpha, 94 | affine=True, eps=1e-5)) 95 | model.add_module(f"nonlin_{block_nr}", Expression(self.conv_nonlin)) 96 | model.add_module(f"drop_{block_nr}", nn.Dropout(p=self.drop_prob)) 97 | 98 | if self.structure == "deep": 99 | add_conv_pool_block(model, n_filters_conv, self.n_filters_2, 100 | self.filter_length_2, 2) 101 | 102 | n_filters_op = self.n_filters_2 * (self.input_time_length - 23) # semi-hardcoded at the moment 103 | 104 | model.add_module('reshape', Expression(reshape_tensor)) 105 | 106 | model.add_module('fc_1', nn.Linear(n_filters_op, self.fc1_out_features, bias=True)) 107 | 108 | 109 | # Initialization is xavier for initial layers 110 | init.xavier_uniform_(model.conv_time.weight, gain=1) 111 | # maybe no bias in case of no split layer and batch norm 112 | if self.split_first_layer or (not self.batch_norm): 113 | init.constant_(model.conv_time.bias, 0) 114 | if self.split_first_layer: 115 | init.xavier_uniform_(model.conv_spat.weight, gain=1) 116 | if not self.batch_norm: 117 | init.constant_(model.conv_spat.bias, 0) 118 | if self.batch_norm: 119 | init.constant_(model.bnorm.weight, 1) 120 | init.constant_(model.bnorm.bias, 0) 121 | 122 | param_dict = dict(list(model.named_parameters())) 123 | if self.structure == "deep": 124 | conv_weight = param_dict['conv_2.weight'] 125 | init.kaiming_normal_(conv_weight) # He initialization 126 | if not self.batch_norm: 127 | conv_bias = param_dict['conv_2.bias'] 128 | init.constant_(conv_bias, 0) 129 | else: 130 | bnorm_weight = param_dict['bnorm_2.weight'] 131 | bnorm_bias = param_dict['bnorm_2.bias'] 132 | init.constant_(bnorm_weight, 1) 133 | init.constant_(bnorm_bias, 0) 134 | 135 | fc_weight = param_dict['fc_1.weight'] 136 | init.kaiming_uniform_(fc_weight) 137 | # model.eval() 138 | 139 | return model 140 | 141 | 142 | class BiModalNet(nn.Module): 143 | 144 | def __init__(self, n_classes, in_chans_1, input_time_1, SubNet_1_params, in_chans_2, input_time_2, 145 | SubNet_2_params, linear_dims, drop_prob, nonlin, fc1_out_features, fc2_out_features, 146 | gru_hidden_size, gru_n_layers=1): 147 | """ 148 | BiModal CNN network receiving 2 different data types corresponding to a single ground truth (e.g. EEG and fNIRS) 149 | Two SubNets are initialised and the forward pass of both is performed before their outputs are fed into the 150 | remainder of the network to be fused and applied to GRU and linear layers before log softmax classification. 151 | 152 | Parameters 153 | :param: n_classes (int) number of classes in classification task 154 | :param: in_chans_1 (int) number of channels in data 155 | :param: input_time_1 (int) number of time samples in data 156 | :param: SubNet_1_params (dict) parameters for initiating subnet 1 157 | :param: in_chans_2 (int) number of channels in data 158 | :param: input_time_2 (int) number of time samples in data 159 | :param: SubNet_2_params (dict) parameters for initiating subnet 2 160 | :param: linear_dims (int) dimension of linear layer 161 | :param: drop_prob (float) dropout probability 162 | :param: nonlin (th.nn.functional) activation function 163 | :param: fc1_out_features (int) output dimension of subnet 1 linear layer 164 | :param: fc2_out_features (int) output dimension of subnet 2 linear layer 165 | :param: gru_hidden_size (int) size of GRU hidden layer 166 | :param: gru_n_layers (int) number of GRU hidden layers 167 | """ 168 | self.n_classes = n_classes 169 | self.in_chans_1 = in_chans_1 170 | self.input_time_1 = input_time_1 171 | for key in SubNet_1_params: 172 | setattr(self, f"SN1_{key}", SubNet_1_params[key]) 173 | self.in_chans_2 = in_chans_2 174 | self.input_time_2 = input_time_2 175 | for key in SubNet_2_params: 176 | setattr(self, f"SN2_{key}", SubNet_2_params[key]) 177 | 178 | self.linear_dims = linear_dims 179 | self.drop_prob = drop_prob 180 | self.fc1_out_features = fc1_out_features 181 | self.fc2_out_features = fc2_out_features 182 | self.fused_dimension = fc1_out_features + fc2_out_features 183 | self.gru_hidden_size = gru_hidden_size 184 | self.gru_n_layers = gru_n_layers 185 | 186 | super(BiModalNet, self).__init__() 187 | model = nn.Sequential() 188 | 189 | self.subnet_1 = SubNet(in_chans=self.in_chans_1, n_classes=self.n_classes, input_time_length=self.input_time_1, 190 | n_filters_time=self.SN1_n_filters_time, filter_time_length=self.SN1_filter_time_length, 191 | n_filters_spat=self.SN1_n_filters_spat, n_filters_2=self.SN1_n_filters_2, filter_length_2=self.SN1_filter_length_2, 192 | pool_time_length=self.SN1_pool_time_length, pool_time_stride=self.SN1_pool_time_stride, final_conv_length='auto', 193 | conv_nonlin=self.SN1_conv_nonlin, pool_mode=self.SN1_pool_mode, pool_nonlin=self.SN1_pool_nonlin, 194 | split_first_layer=self.SN1_split_first_layer, batch_norm=self.SN1_batch_norm, batch_norm_alpha=self.SN1_batch_norm_alpha, 195 | drop_prob=self.SN1_drop_prob, structure=self.SN1_structure, fc1_out_features=self.fc1_out_features).create_network() 196 | 197 | 198 | self.subnet_2 = SubNet(in_chans=self.in_chans_2, n_classes=self.n_classes, input_time_length=self.input_time_2, 199 | n_filters_time=self.SN2_n_filters_time, filter_time_length=self.SN2_filter_time_length, 200 | n_filters_spat=self.SN2_n_filters_spat, n_filters_2=self.SN2_n_filters_2, filter_length_2=self.SN2_filter_length_2, 201 | pool_time_length=self.SN2_pool_time_length, pool_time_stride=self.SN2_pool_time_stride, final_conv_length='auto', 202 | conv_nonlin=self.SN2_conv_nonlin, pool_mode=self.SN2_pool_mode, pool_nonlin=self.SN2_pool_nonlin, 203 | split_first_layer=self.SN2_split_first_layer, batch_norm=self.SN2_batch_norm, batch_norm_alpha=self.SN2_batch_norm_alpha, 204 | drop_prob=self.SN2_drop_prob, structure=self.SN2_structure, fc2_out_features=self.fc2_out_features).create_network() 205 | 206 | self.reshape_tensor = reshape_4_lstm # works for GRU also 207 | 208 | self.gru = nn.GRU(input_size=self.fused_dimension, hidden_size=self.gru_hidden_size, 209 | num_layers=self.gru_n_layers, batch_first=True) 210 | 211 | self.nonlin = nonlin 212 | self.fused_dp = nn.Dropout(p=self.drop_prob) 213 | 214 | self.fused_linear = nn.Linear(self.gru_hidden_size, self.n_classes, bias=True) 215 | self.softmax = nn.LogSoftmax(dim=1) 216 | 217 | self.size = Expression(tensor_size) # useful for debugging tensor/kernel dimension mismatches 218 | 219 | 220 | def forward(self, data_1, data_2): 221 | """ 222 | Forward pass of the Bimodal CNN 223 | 224 | :param data_1: tensor 225 | :param data_2: tensor 226 | """ 227 | data_1_h = self.subnet_1(data_1) 228 | data_2_h = self.subnet_2(data_2) 229 | 230 | fusion_tensor = th.cat((data_1_h, data_2_h), dim=1) 231 | 232 | fusion_tensor_gru = self.reshape_tensor(fusion_tensor) 233 | gru_inp = fusion_tensor.view(fusion_tensor_gru.size(0), 1, self.fused_dimension) 234 | 235 | gru_op, _ = self.gru(gru_inp) 236 | 237 | gru_op = self.nonlin(gru_op) 238 | gru_op_dp = self.fused_dp(gru_op) 239 | 240 | fused_linear = self.fused_linear(gru_op_dp.view(gru_op_dp.size(0), gru_op_dp.size(2))) 241 | fused_linear = self.nonlin(fused_linear) 242 | 243 | softmax = self.softmax(fused_linear) 244 | 245 | return softmax 246 | 247 | 248 | 249 | -------------------------------------------------------------------------------- /BiModNeuroCNN/models/bimodal_cnn_pooling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch as th 3 | from torch import nn 4 | from torch.nn import init 5 | from braindecode.models.base import BaseModel 6 | from braindecode.torch_ext.modules import Expression 7 | from braindecode.torch_ext.functions import safe_log, square 8 | from BiModNeuroCNN.models.network_utils import reshape_tensor, reshape_4_lstm, _transpose_time_to_spat, tensor_size 9 | 10 | class SubNet(BaseModel): 11 | """ 12 | Temporal-Spatial first layer based on [2] 13 | 14 | References 15 | ---------- 16 | 17 | .. [2] Schirrmeister, R. T., Springenberg, J. T., Fiederer, L. D. J., 18 | Glasstetter, M., Eggensperger, K., Tangermann, M., Hutter, F. & Ball, T. (2017). 19 | Deep learning with convolutional neural networks for EEG decoding and 20 | visualization. 21 | Human Brain Mapping , Aug. 2017. Online: http://dx.doi.org/10.1002/hbm.23730 22 | """ 23 | 24 | def __init__( 25 | self, 26 | in_chans, 27 | n_classes, 28 | input_time_length=None, 29 | n_filters_time=40, 30 | filter_time_length=25, 31 | n_filters_spat=40, 32 | n_filters_2=10, 33 | filter_length_2=10, 34 | pool_time_length_1=5, 35 | pool_time_stride_1=2, 36 | pool_length_2=5, 37 | pool_stride_2=2, 38 | final_conv_length=30, 39 | conv_nonlin=square, 40 | pool_mode="max", 41 | pool_nonlin=safe_log, 42 | later_nonlin=None, 43 | later_pool_nonlin=nn.functional.leaky_relu, 44 | split_first_layer=True, 45 | batch_norm=True, 46 | batch_norm_alpha=0.1, 47 | drop_prob=0.1, 48 | stride_before_pool=False, 49 | structure = "shallow", 50 | fc1_out_features = 500, 51 | fc2_out_features = 500, 52 | ): 53 | if final_conv_length == "auto": 54 | assert input_time_length is not None 55 | self.__dict__.update(locals()) 56 | del self.self 57 | 58 | def create_network(self): 59 | if self.stride_before_pool: 60 | conv_stride = self.pool_time_stride 61 | else: 62 | conv_stride = 1 63 | 64 | pool_class_dict = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d) 65 | first_pool_class = pool_class_dict[self.pool_mode] 66 | pooling_reduction = self.pool_time_length_1 * self.pool_time_stride_1 * 10 67 | 68 | model = nn.Sequential() 69 | 70 | if self.split_first_layer: 71 | model.add_module("dimshuffle", Expression(_transpose_time_to_spat)) 72 | model.add_module("conv_time", nn.Conv2d(1, self.n_filters_time, (self.filter_time_length, 1), 73 | stride=1, ), ) 74 | model.add_module("conv_spat", nn.Conv2d(self.n_filters_time, self.n_filters_spat, 75 | (1, self.in_chans), stride=1, bias=not self.batch_norm, ),) 76 | n_filters_conv = self.n_filters_spat 77 | n_filters_op = self.n_filters_spat * (self.input_time_length - (4 + pooling_reduction)) # semi-hardcoded at the moment 78 | else: 79 | model.add_module("conv_time", nn.Conv2d(self.in_chans, self.n_filters_time, 80 | (self.filter_time_length, 1), stride=1, 81 | bias=not self.batch_norm, ), ) 82 | n_filters_conv = self.n_filters_time 83 | 84 | n_filters_op = self.n_filters_time * (self.input_time_length - (4 + pooling_reduction)) # semi-hardcoded at the moment 85 | 86 | if self.batch_norm: 87 | model.add_module("bnorm", nn.BatchNorm2d(n_filters_conv, momentum=self.batch_norm_alpha, 88 | affine=True), ) 89 | model.add_module("conv_nonlin", Expression(self.conv_nonlin)) 90 | model.add_module("drop", nn.Dropout(p=self.drop_prob)) 91 | 92 | model.add_module("pool", first_pool_class(kernel_size=(self.pool_time_length_1, 1), stride=(self.pool_time_stride_1, 1)),) 93 | model.add_module("pool_nonlin", Expression(self.pool_nonlin)) 94 | 95 | 96 | def add_conv_pool_block(model, n_filters_before, 97 | n_filters, filter_length, block_nr): 98 | 99 | model.add_module(f"conv_{block_nr}", nn.Conv2d(n_filters_before, n_filters, 100 | (filter_length, 1), stride=(conv_stride, 1), 101 | bias=not self.batch_norm)) 102 | 103 | if self.batch_norm: 104 | model.add_module(f"bnorm_{block_nr}", nn.BatchNorm2d(n_filters, 105 | momentum=self.batch_norm_alpha, 106 | affine=True, eps=1e-5)) 107 | model.add_module(f"nonlin_{block_nr}", Expression(self.conv_nonlin)) 108 | model.add_module(f"drop_{block_nr}", nn.Dropout(p=self.drop_prob)) 109 | 110 | model.add_module("pool", first_pool_class(kernel_size=(self.pool_length_2, 1), 111 | stride=(self.pool_stride_2, 1)),) 112 | model.add_module("pool_nonlin", Expression(self.pool_nonlin)) 113 | 114 | if self.structure == "deep": 115 | 116 | add_conv_pool_block(model, n_filters_conv, self.n_filters_2, 117 | self.filter_length_2, 2) 118 | model.add_module("tensor shape", Expression(tensor_size)) 119 | pooling_reduction = pooling_reduction + 22 120 | print(pooling_reduction) 121 | n_filters_op = self.n_filters_2 * 45 #(self.input_time_length - (23 + pooling_reduction)) # semi-hardcoded at the moment 122 | 123 | model.add_module('reshape', Expression(reshape_tensor)) 124 | 125 | model.add_module('fc_1', nn.Linear(n_filters_op, self.fc1_out_features, bias=True)) 126 | 127 | 128 | # Initialization is xavier for initial layers 129 | init.xavier_uniform_(model.conv_time.weight, gain=1) 130 | # maybe no bias in case of no split layer and batch norm 131 | if self.split_first_layer or (not self.batch_norm): 132 | init.constant_(model.conv_time.bias, 0) 133 | if self.split_first_layer: 134 | init.xavier_uniform_(model.conv_spat.weight, gain=1) 135 | if not self.batch_norm: 136 | init.constant_(model.conv_spat.bias, 0) 137 | if self.batch_norm: 138 | init.constant_(model.bnorm.weight, 1) 139 | init.constant_(model.bnorm.bias, 0) 140 | 141 | param_dict = dict(list(model.named_parameters())) 142 | if self.structure == "deep": 143 | conv_weight = param_dict['conv_2.weight'] 144 | init.kaiming_normal_(conv_weight) # He initialization 145 | if not self.batch_norm: 146 | conv_bias = param_dict['conv_2.bias'] 147 | init.constant_(conv_bias, 0) 148 | else: 149 | bnorm_weight = param_dict['bnorm_2.weight'] 150 | bnorm_bias = param_dict['bnorm_2.bias'] 151 | init.constant_(bnorm_weight, 1) 152 | init.constant_(bnorm_bias, 0) 153 | 154 | fc_weight = param_dict['fc_1.weight'] 155 | init.kaiming_uniform_(fc_weight) 156 | # model.eval() 157 | 158 | return model 159 | 160 | 161 | class BiModalNet(nn.Module): 162 | 163 | def __init__(self, n_classes, in_chans_1, input_time_1, SubNet_1_params, in_chans_2, input_time_2, 164 | SubNet_2_params, linear_dims, drop_prob, nonlin, fc1_out_features, fc2_out_features, 165 | gru_hidden_size, gru_n_layers=1): 166 | """ 167 | BiModal CNN network receiving 2 different data types corresponding to a single ground truth (e.g. EEG and fNIRS) 168 | Two SubNets are initialised and the forward pass of both is performed before their outputs are fed into the 169 | remainder of the network to be fused and applied to GRU and linear layers before log softmax classification. 170 | 171 | Parameters 172 | :param: n_classes (int) number of classes in classification task 173 | :param: in_chans_1 (int) number of channels in data 174 | :param: input_time_1 (int) number of time samples in data 175 | :param: SubNet_1_params (dict) parameters for initiating subnet 1 176 | :param: in_chans_2 (int) number of channels in data 177 | :param: input_time_2 (int) number of time samples in data 178 | :param: SubNet_2_params (dict) parameters for initiating subnet 2 179 | :param: linear_dims (int) dimension of linear layer 180 | :param: drop_prob (float) dropout probability 181 | :param: nonlin (th.nn.functional) activation function 182 | :param: fc1_out_features (int) output dimension of subnet 1 linear layer 183 | :param: fc2_out_features (int) output dimension of subnet 2 linear layer 184 | :param: gru_hidden_size (int) size of GRU hidden layer 185 | :param: gru_n_layers (int) number of GRU hidden layers 186 | """ 187 | self.n_classes = n_classes 188 | self.in_chans_1 = in_chans_1 189 | self.input_time_1 = input_time_1 190 | for key in SubNet_1_params: 191 | setattr(self, f"SN1_{key}", SubNet_1_params[key]) 192 | self.in_chans_2 = in_chans_2 193 | self.input_time_2 = input_time_2 194 | for key in SubNet_2_params: 195 | setattr(self, f"SN2_{key}", SubNet_2_params[key]) 196 | 197 | self.linear_dims = linear_dims 198 | self.drop_prob = drop_prob 199 | self.fc1_out_features = fc1_out_features 200 | self.fc2_out_features = fc2_out_features 201 | self.fused_dimension = fc1_out_features + fc2_out_features 202 | self.gru_hidden_size = gru_hidden_size 203 | self.gru_n_layers = gru_n_layers 204 | 205 | super(BiModalNet, self).__init__() 206 | model = nn.Sequential() 207 | 208 | self.subnet_1 = SubNet(in_chans=self.in_chans_1, n_classes=self.n_classes, input_time_length=self.input_time_1, 209 | n_filters_time=self.SN1_n_filters_time, filter_time_length=self.SN1_filter_time_length, 210 | n_filters_spat=self.SN1_n_filters_spat, n_filters_2=self.SN1_n_filters_2, filter_length_2=self.SN1_filter_length_2, 211 | pool_time_length_1=self.SN1_pool_time_length_1, pool_time_stride_1=self.SN1_pool_time_stride_1, pool_length_2=self.SN1_pool_length_2, 212 | pool_stride_2=self.SN1_pool_stride_2, final_conv_length='auto', 213 | conv_nonlin=self.SN1_conv_nonlin, pool_mode=self.SN1_pool_mode, pool_nonlin=self.SN1_pool_nonlin, 214 | split_first_layer=self.SN1_split_first_layer, batch_norm=self.SN1_batch_norm, batch_norm_alpha=self.SN1_batch_norm_alpha, 215 | drop_prob=self.SN1_drop_prob, structure=self.SN1_structure, fc1_out_features=self.fc1_out_features).create_network() 216 | 217 | 218 | self.subnet_2 = SubNet(in_chans=self.in_chans_2, n_classes=self.n_classes, input_time_length=self.input_time_2, 219 | n_filters_time=self.SN2_n_filters_time, filter_time_length=self.SN2_filter_time_length, 220 | n_filters_spat=self.SN2_n_filters_spat, n_filters_2=self.SN2_n_filters_2, filter_length_2=self.SN2_filter_length_2, 221 | pool_time_length_1=self.SN2_pool_time_length_1, pool_time_stride_1=self.SN2_pool_time_stride_1, pool_length_2=self.SN2_pool_length_2, 222 | pool_stride_2=self.SN2_pool_stride_2, final_conv_length='auto', 223 | conv_nonlin=self.SN2_conv_nonlin, pool_mode=self.SN2_pool_mode, pool_nonlin=self.SN2_pool_nonlin, 224 | split_first_layer=self.SN2_split_first_layer, batch_norm=self.SN2_batch_norm, batch_norm_alpha=self.SN2_batch_norm_alpha, 225 | drop_prob=self.SN2_drop_prob, structure=self.SN2_structure, fc2_out_features=self.fc2_out_features).create_network() 226 | 227 | self.reshape_tensor = reshape_4_lstm # works for GRU also 228 | 229 | self.gru = nn.GRU(input_size=self.fused_dimension, hidden_size=self.gru_hidden_size, 230 | num_layers=self.gru_n_layers, batch_first=True) 231 | 232 | self.nonlin = nonlin 233 | self.fused_dp = nn.Dropout(p=self.drop_prob) 234 | 235 | self.fused_linear = nn.Linear(self.gru_hidden_size, self.n_classes, bias=True) 236 | self.softmax = nn.LogSoftmax(dim=1) 237 | 238 | self.size = Expression(tensor_size) # useful for debugging tensor/kernel dimension mismatches 239 | 240 | 241 | def forward(self, data_1, data_2): 242 | """ 243 | Forward pass of the Bimodal CNN 244 | 245 | :param data_1: tensor 246 | :param data_2: tensor 247 | """ 248 | data_1_h = self.subnet_1(data_1) 249 | data_2_h = self.subnet_2(data_2) 250 | 251 | fusion_tensor = th.cat((data_1_h, data_2_h), dim=1) 252 | 253 | fusion_tensor_gru = self.reshape_tensor(fusion_tensor) 254 | gru_inp = fusion_tensor.view(fusion_tensor_gru.size(0), 1, self.fused_dimension) 255 | 256 | gru_op, _ = self.gru(gru_inp) 257 | 258 | gru_op = self.nonlin(gru_op) 259 | gru_op_dp = self.fused_dp(gru_op) 260 | 261 | fused_linear = self.fused_linear(gru_op_dp.view(gru_op_dp.size(0), gru_op_dp.size(2))) 262 | fused_linear = self.nonlin(fused_linear) 263 | 264 | softmax = self.softmax(fused_linear) 265 | 266 | return softmax 267 | 268 | 269 | 270 | -------------------------------------------------------------------------------- /BiModNeuroCNN/training/bimodal_classification.py: -------------------------------------------------------------------------------- 1 | """ 2 | Description: Class for training CNNs using a nested cross-validation method. Train on the inner_fold to obtain 3 | optimized hyperparameters. Train outer_fold to obtain classification performance. 4 | """ 5 | from braindecode.datautil.iterators import BalancedBatchSizeIterator 6 | from braindecode.experiments.stopcriteria import MaxEpochs, NoDecrease, Or 7 | from braindecode.torch_ext.util import set_random_seeds, np_to_var, var_to_np 8 | from braindecode.datautil.signal_target import SignalAndTarget 9 | from braindecode.torch_ext.functions import square, safe_log 10 | import torch as th 11 | from sklearn.model_selection import train_test_split 12 | from BiModNeuroCNN.training.training_utils import current_acc, current_loss 13 | from BiModNeuroCNN.data_loader.data_utils import smote_augmentation, multi_SignalAndTarget 14 | from BiModNeuroCNN.results.results import Results as res 15 | from torch.nn.functional import nll_loss, cross_entropy 16 | from BiModNeuroCNN.training.bimodal_training import Experiment 17 | import numpy as np 18 | import itertools as it 19 | import torch 20 | from torch import optim 21 | import logging 22 | from ast import literal_eval 23 | from BiModNeuroCNN.results.metrics import cross_entropy 24 | import warnings 25 | warnings.filterwarnings("ignore", category=UserWarning) 26 | log = logging.getLogger(__name__) 27 | torch.backends.cudnn.deterministic = True 28 | 29 | class Classification: 30 | 31 | def __init__(self, model, subnet1_params, subnet2_params, hyp_params, parameters, data_params, model_save_path, tag): 32 | self.model = model 33 | self.subnet1_params = subnet1_params 34 | self.subnet2_params = subnet2_params 35 | self.model_save_path = model_save_path 36 | self.tag = tag 37 | self.best_loss = parameters["best_loss"] 38 | self.batch_size = parameters["batch_size"] 39 | self.monitors = parameters["monitors"] 40 | self.cuda = parameters["cuda"] 41 | self.model_constraint = parameters["model_constraint"] 42 | self.max_increase_epochs = parameters['max_increase_epochs'] 43 | self.lr_scheduler = parameters['learning_rate_scheduler'] 44 | self.lr_step = parameters['lr_step'] 45 | self.lr_gamma = parameters['lr_gamma'] 46 | self.n_classes = data_params["n_classes"] 47 | self.n_chans_d1 = data_params["n_chans_d1"] 48 | self.input_time_length_d1= data_params["input_time_length_d1"] 49 | self.n_chans_d2 = data_params["n_chans_d2"] 50 | self.input_time_length_d2 = data_params["input_time_length_d2"] 51 | self.hyp_params = hyp_params 52 | self.activation = "elu" 53 | self.learning_rate = 0.001 54 | self.dropout = 0.1 55 | self.epochs = parameters['epochs'] 56 | self.window = None 57 | self.structure = 'deep' 58 | self.n_filts = 10 #n_filts in n-1 filters 59 | self.first_pool = False 60 | self.loss = nll_loss 61 | for key in hyp_params: 62 | setattr(self, key, hyp_params[key]) 63 | self.iterator = BalancedBatchSizeIterator(batch_size=self.batch_size) 64 | self.best_params = None 65 | self.model_number = 1 66 | self.y_pred = np.array([]) 67 | self.y_true = np.array([]) 68 | self.probabilities = np.array([]) 69 | 70 | def call_model(self): 71 | 72 | self.subnet1_params['structure'] = self.structure 73 | self.subnet2_params['structure'] = self.structure 74 | 75 | if self.model.__name__ == 'BiModalNet': 76 | model = self.model(n_classes=self.n_classes, in_chans_1=self.n_chans_d1, input_time_1=self.input_time_length_d1, 77 | SubNet_1_params=self.subnet1_params, in_chans_2=self.n_chans_d2, 78 | input_time_2=self.input_time_length_d2, SubNet_2_params=self.subnet2_params, 79 | linear_dims=100, drop_prob=.2, nonlin=torch.nn.functional.leaky_relu, 80 | fc1_out_features=500, fc2_out_features=500, gru_hidden_size=250, gru_n_layers=1) 81 | th.nn.init.kaiming_uniform_(model.fused_linear.weight) 82 | th.nn.init.constant_(model.fused_linear.bias, 0) 83 | 84 | elif self.model.__name__ == 'BiModalNet_w_Pool': 85 | model = self.model(n_classes=self.n_classes, in_chans_1=self.n_chans_d1, input_time_1=self.input_time_length_d1, 86 | SubNet_1_params=self.subnet1_params, in_chans_2=self.n_chans_d2, 87 | input_time_2=self.input_time_length_d2, SubNet_2_params=self.subnet2_params, 88 | linear_dims=100, drop_prob=.2, nonlin=torch.nn.functional.leaky_relu, 89 | fc1_out_features=500, fc2_out_features=500, gru_hidden_size=250, gru_n_layers=1) 90 | th.nn.init.kaiming_uniform_(model.fused_linear.weight) 91 | th.nn.init.constant_(model.fused_linear.bias, 0) 92 | return model 93 | 94 | def train_model(self, train_set_1, val_set_1, test_set_1, train_set_2, val_set_2, test_set_2, save_model): 95 | """ 96 | :param train_set_1: (np.array) n_trials*n_channels*n_samples 97 | :param val_set_1: (np.array) n_trials*n_channels*n_samples 98 | :param test_set_1: (np.array) n_trials*n_channels*n_samples - can be None when training on inner-fold 99 | :param train_set_2: (np.array) n_trials*n_channels*n_samples 100 | :param val_set_2: (np.array) n_trials*n_channels*n_samples 101 | :param test_set_2: (np.array) n_trials*n_channels*n_samples - can be None when training on inner-fold 102 | :param save_model: (Bool) True if trained model is to be saved 103 | :return: Accuracy and loss scores for the model trained with a given set of hyper-parameters 104 | """ 105 | model = self.call_model() 106 | predictions = None 107 | 108 | set_random_seeds(seed=20190629, cuda=self.cuda) 109 | 110 | if self.cuda: 111 | model.cuda() 112 | torch.backends.cudnn.deterministic = True 113 | model = torch.nn.DataParallel(model) 114 | log.info(f"Cuda in use") 115 | 116 | log.info("%s model: ".format(str(model))) 117 | optimizer = optim.Adam(model.parameters(), lr=self.learning_rate, weight_decay=0.01, eps=1e-8, amsgrad=False) 118 | 119 | stop_criterion = Or([MaxEpochs(self.epochs), 120 | NoDecrease('valid_loss', self.max_increase_epochs)]) 121 | model_loss_function = None 122 | 123 | #####Setup to run the selected model##### 124 | model_test = Experiment(model, train_set_1, val_set_1, train_set_2, val_set_2, test_set_1=test_set_1, test_set_2=test_set_2, 125 | iterator=self.iterator, loss_function=self.loss, optimizer=optimizer, 126 | lr_scheduler=self.lr_scheduler(optimizer, step_size=self.lr_step, gamma=self.lr_gamma), 127 | model_constraint=self.model_constraint, monitors=self.monitors, stop_criterion=stop_criterion, 128 | remember_best_column='valid_misclass', run_after_early_stop=True, model_loss_function=model_loss_function, 129 | cuda=self.cuda, save_file=self.model_save_path, tag=self.tag, save_model=save_model) 130 | model_test.run() 131 | 132 | model_acc = model_test.epochs_df['valid_misclass'].astype('float') 133 | model_loss = model_test.epochs_df['valid_loss'].astype('float') 134 | current_val_acc = 1 - current_acc(model_acc) 135 | current_val_loss = current_loss(model_loss) 136 | 137 | test_accuracy = None 138 | if train_set_1 is not None and test_set_2 is not None: 139 | val_metric_index = self.get_model_index(model_test.epochs_df) 140 | test_accuracy = round((1 - model_test.epochs_df['test_misclass'].iloc[val_metric_index]) * 100, 3) 141 | predictions = model_test.model_predictions 142 | probabilities = model_test.model_probabilities 143 | 144 | return current_val_acc, current_val_loss, test_accuracy, model_test, predictions, probabilities 145 | 146 | 147 | def train_inner(self, train_set_1, val_set_1, train_set_2, val_set_2, test_set_1=None, test_set_2=None, augment=False, save_model=False): 148 | """ 149 | :param train_set_1: (np.array) n_trials*n_channels*n_samples 150 | :param val_set_1: (np.array) n_trials*n_channels*n_samples 151 | :param test_set_1: (np.array) n_trials*n_channels*n_samples - can be None when performing HP optimization 152 | :param train_set_2: (np.array) n_trials*n_channels*n_samples 153 | :param val_set_2: (np.array) n_trials*n_channels*n_samples 154 | :param test_set_2: (np.array) n_trials*n_channels*n_samples - can be None when performing HP optimization 155 | :param augment: (Bool) True if data augmentation to be applied - currently only configured for SMOTE augmentation 156 | :param save_model: (Bool) True if trained model is to be saved 157 | :return: Accuracy, loss and cross entropy scores for the model trained with a given set of hyper-parameters 158 | """ 159 | val_acc, val_loss, val_cross_entropy = [], [], [] 160 | 161 | if augment: 162 | # Only augment training data - never test or validation sets 163 | train_set_1_os, train_labels_1_os = smote_augmentation(train_set_1.X, train_set_1.y, 2) 164 | train_set_2_os, train_labels_2_os = smote_augmentation(train_set_1.X, train_set_1.y, 2) 165 | train_set_1, train_set_2 = multi_SignalAndTarget((train_set_1_os, train_labels_1_os), (train_set_2_os, train_labels_2_os)) 166 | 167 | names = list(self.hyp_params.keys()) 168 | hyp_param_combs = it.product(*(self.hyp_params[Name] for Name in names)) 169 | for hyp_combination in hyp_param_combs: 170 | 171 | assert len(hyp_combination) == len(self.hyp_params), f"HP combination must be of equal length to original set." 172 | 173 | for i in range(len(self.hyp_params)): 174 | setattr(self, list(self.hyp_params.keys())[i], hyp_combination[i]) 175 | 176 | 177 | if 'window' in self.hyp_params.keys(): 178 | # when using classification window as a hyperparameter - currently data would have to be of same number of samples 179 | train_set_1_w = SignalAndTarget(train_set_1.X[:, :, self.window[0]:self.window[1]], train_set_1.y) 180 | val_set_1_w = SignalAndTarget(val_set_1.X[:, :, self.window[0]:self.window[1]], val_set_1.y) 181 | train_set_2_w = SignalAndTarget(train_set_2.X[:, :, self.window[0]:self.window[1]], train_set_2.y) 182 | val_set_2_w = SignalAndTarget(val_set_2.X[:, :, self.window[0]:self.window[1]], val_set_2.y) 183 | current_val_acc, current_val_loss, _, _, _, probabilities = self.train_model(train_set_1_w, val_set_1_w, test_set_1, train_set_2_w, 184 | val_set_2_w, test_set_2, save_model) 185 | else: 186 | 187 | current_val_acc, current_val_loss, _, _, _, probabilities = self.train_model(train_set_1, val_set_1, test_set_1, train_set_2, 188 | val_set_2, test_set_2, save_model) 189 | val_acc.append(current_val_acc) 190 | val_loss.append(current_val_loss) 191 | 192 | probabilities = np.array(probabilities).reshape((val_set_1.y.shape[0],4)) 193 | 194 | val_cross_entropy.append(cross_entropy(val_set_1.y, probabilities)) #1 CE value per-HP, repeat for n_folds 195 | 196 | return val_acc, val_loss, val_cross_entropy 197 | 198 | 199 | def train_outer(self, trainsetlist, testsetlist, augment=False, save_model=True, epochs_save_path=None, print_details=False): 200 | """ 201 | :param trainsetlist: (list) data as split by k-folds n_folds*(n_trials*n_channels*n_samples) 202 | :param testsetlist: (list) data as split by k-folds n_folds*(n_trials*n_channels*n_samples) 203 | :param augment: (Bool) True if data augmentation to be applied - currently only configured for SMOTE augmentation 204 | :param save_model: (Bool) True if trained model is to be saved 205 | """ 206 | scores, all_preds, probabilities_list, outer_cross_entropy, fold_models = [],[],[],[],[] 207 | 208 | fold_number = 1 209 | for train_set, test_set in zip(trainsetlist, testsetlist): 210 | 211 | train_set_1, train_set_2 = train_set[0], train_set[1] 212 | test_set_1, test_set_2 = test_set[0], test_set[1] 213 | 214 | train_set_1_X, val_set_1_X, train_set_1_y, val_set_1_y = train_test_split(train_set_1.X, train_set_1.y, test_size=0.2, 215 | shuffle=True, random_state=42, stratify= train_set_1.y) 216 | train_set_2_X, val_set_2_X, train_set_2_y, val_set_2_y = train_test_split(train_set_2.X, train_set_2.y, test_size=0.2, 217 | shuffle=True, random_state=42, stratify= train_set_2.y) 218 | 219 | train_set_1, val_set_1, train_set_2, val_set_2 = multi_SignalAndTarget((train_set_1_X, train_set_1_y), (val_set_1_X, val_set_1_y), 220 | (train_set_2_X, train_set_2_y), (val_set_2_X, val_set_2_y)) 221 | 222 | if augment: 223 | # Only augment training data - never test or validation sets 224 | train_set_1_os, train_labels_1_os = smote_augmentation(train_set_1.X, train_set_1.y, 2) 225 | train_set_2_os, train_labels_2_os = smote_augmentation(train_set_2.X, train_set_2.y, 2) 226 | train_set_1 = SignalAndTarget(train_set_1_os, train_labels_1_os) 227 | train_set_2 = SignalAndTarget(train_set_2_os, train_labels_2_os) 228 | print(train_set_1.X.shape) 229 | 230 | if 'window' in self.hyp_params.keys(): 231 | # when using classification window as a hyperparameter - currently data would have to be of same number of samples 232 | if type(self.window) == str: 233 | self.window = literal_eval(self.window) # extract tuple of indices 234 | train_set_1_w = SignalAndTarget(train_set_1.X[:,:,self.window[0]:self.window[1]], train_set_1.y) 235 | val_set_1_w = SignalAndTarget(val_set_1.X[:,:,self.window[0]:self.window[1]], val_set_1.y) 236 | test_set_1_w = SignalAndTarget(test_set_1.X[:,:,self.window[0]:self.window[1]], test_set_1.y) 237 | train_set_2_w = SignalAndTarget(train_set_2.X[:,:,self.window[0]:self.window[1]], train_set_2.y) 238 | val_set_2_w = SignalAndTarget(val_set_2.X[:,:,self.window[0]:self.window[1]], val_set_2.y) 239 | test_set_2_w = SignalAndTarget(test_set_2.X[:, :, self.window[0]:self.window[1]], test_set_2.y) 240 | 241 | _, _, test_accuracy, optimised_model, predictions, probabilities = self.train_model(train_set_1_w, val_set_1_w, test_set_1_w, 242 | train_set_2_w, val_set_2_w, test_set_2_w, save_model) 243 | 244 | if print_details: 245 | print(f"Data 1 train set: {train_set_1.y.shape} | Data 1 val_set: {val_set_1.y.shape} | Data 1 test_set: {test_set_1.y.shape}") 246 | print(f"Data 2 train set: {train_set_2.y.shape} | Data 2 val_set: {val_set_2.y.shape} | Data 2 test_set: {test_set_2.y.shape}") 247 | else: 248 | _, _, test_accuracy, optimised_model, predictions, probabilities = self.train_model(train_set_1, val_set_1, test_set_1, 249 | train_set_2, val_set_2, test_set_2, save_model) 250 | if epochs_save_path != None: 251 | try: 252 | optimised_model.epochs_df.to_excel(f"{epochs_save_path}/epochs{fold_number}.xlsx") 253 | except FileNotFoundError: 254 | optimised_model.epochs_df.to_excel(f"{epochs_save_path}/epochs{fold_number}.xlsx", engine='xlsxwriter') 255 | 256 | fold_models.append(optimised_model) 257 | 258 | probs_array = [] 259 | for lst in probabilities: 260 | for trial in lst: 261 | probs_array.append(trial) # all probabilities for this test-set 262 | probabilities_list.append(probs_array) #outer probabilities to be used for cross-entropy 263 | 264 | 265 | print(f"/"*20) 266 | scores.append(test_accuracy) 267 | self.concat_y_pred(predictions) 268 | self.concat_y_true(test_set_1.y) 269 | 270 | fold_number += 1 271 | for y_true, y_probs in zip(testsetlist, probabilities_list): 272 | outer_cross_entropy.append(cross_entropy(y_true[0].y, y_probs)) 273 | 274 | return scores, fold_models, self.y_pred, probabilities_list, outer_cross_entropy, self.y_true 275 | 276 | def set_best_params(self): 277 | """ 278 | Set optimal hyperparameter values selected from optimization - Best parameter values can be 279 | accessed with BiModNeuroCNN.results.Results.get_best_params() and the list assigned to self.best_params. 280 | """ 281 | assert type(self.best_params) is list, "list of selected parameters required" 282 | for i in range(len(self.hyp_params)): 283 | setattr(self, list(self.hyp_params.keys())[i], self.best_params[i]) 284 | 285 | @staticmethod 286 | def get_model_index(df): 287 | """ 288 | Returns the row index of a pandas dataframe used for storing epoch-by-epoch results. 289 | :param df: pandas.DataFrame 290 | :return: int index of the selected epoch based on validation metric 291 | """ 292 | valid_metric_index = df['valid_misclass'].idxmin() 293 | best_val_acc = df.index[df['valid_misclass'] == df['valid_misclass'].iloc[valid_metric_index]] 294 | previous_best = 1.0 295 | i = 0 296 | for n, index in enumerate(best_val_acc): 297 | value = df['test_misclass'][index] 298 | if value < previous_best: 299 | previous_best = value 300 | i = n 301 | return best_val_acc[i] 302 | 303 | def concat_y_pred(self, y_pred_fold): 304 | """ 305 | Method for combining all outer-fold ground-truth values. 306 | :param y_pred_fold: array of single-fold true values. 307 | :return: all outer fold true values in single arrau 308 | """ 309 | self.y_pred = np.concatenate((self.y_pred, np.array(y_pred_fold))) 310 | 311 | def concat_y_true(self, y_true_fold): 312 | """ 313 | Method for combining all outer-fold ground-truth values. 314 | :param y_true_fold: array of single-fold true values. 315 | :return: all outer fold true values in single arrau 316 | """ 317 | self.y_true = np.concatenate((self.y_true, np.array(y_true_fold))) 318 | 319 | def concat_probabilities(self, probabilities_fold): 320 | """ 321 | Method for combining all outer-fold ground-truth values. 322 | :param y_pred_fold: array of single-fold true values. 323 | :return: all outer fold true values in single arrau 324 | """ 325 | self.probabilities = np.concatenate((self.probabilities, probabilities_fold)) 326 | 327 | -------------------------------------------------------------------------------- /BiModNeuroCNN/training/bimodal_training.py: -------------------------------------------------------------------------------- 1 | """ 2 | Description: Script adapted from: https://github.com/robintibor/braindecode/tree/master/braindecode/experiments 3 | Modifications primarily to enable bimodal training to implement model saving. Includes probabilites for use with 4 | cross entropy metric. 5 | """ 6 | import logging 7 | from collections import OrderedDict 8 | from copy import deepcopy 9 | import time 10 | import os 11 | import numpy as np 12 | import pandas as pd 13 | import torch as th 14 | 15 | from braindecode.datautil.splitters import concatenate_sets 16 | from braindecode.experiments.loggers import Printer 17 | from braindecode.experiments.stopcriteria import MaxEpochs, ColumnBelow, Or 18 | from braindecode.torch_ext.util import np_to_var 19 | from braindecode.experiments.monitors import compute_pred_labels_from_trial_preds 20 | 21 | from BiModNeuroCNN.training.training_utils import combine_batches 22 | 23 | 24 | 25 | log = logging.getLogger(__name__) 26 | th.backends.cudnn.deterministic = True 27 | 28 | class RememberBest(object): 29 | """ 30 | Class to remember and restore 31 | the parameters of the model and the parameters of the 32 | optimizer at the epoch with the best performance. 33 | 34 | Parameters 35 | ---------- 36 | column_name: str 37 | The lowest value in this column should indicate the epoch with the 38 | best performance (e.g. misclass might make sense). 39 | 40 | Attributes 41 | ---------- 42 | best_epoch: int Index of best epoch 43 | """ 44 | def __init__(self, column_name, predictions, probabilities): 45 | self.column_name = column_name 46 | self.best_epoch = 0 47 | self.lowest_val = float('inf') 48 | self.model_state_dict = None 49 | self.optimizer_state_dict = None 50 | self.lowest_test = float('inf') 51 | self.lowest_val_misclass = float('inf') 52 | self.model_predictions = None 53 | self.model_probabilities = None 54 | 55 | 56 | def remember_epoch(self, epochs_df, model, optimizer, save_path, tag, class_acc, save_model, predictions, probabilities): 57 | """ 58 | Remember this epoch: Remember parameter values in case this epoch 59 | has the best performance. 60 | 61 | Parameters 62 | ---------- 63 | :param epochs_df: (pandas.Dataframe) Dataframe containing the column `column_name` with which performance is evaluated. 64 | :param model: (torch.nn.Module) 65 | :param optimizer: (torch.optim.Optimizer) 66 | :param subject_id: (str) identifier 67 | :param tag: (str) label to give the saved CNN e.g. "BmCNN" 68 | :param directory: (str) directory for saving models 69 | :param save_model: boolean True or False 70 | :param probabilities: softmax probabilities to be used for cross entropy metric 71 | :param predictions: classifier prediction values for epoch 72 | """ 73 | self.class_acc = class_acc 74 | self.optimizer = optimizer 75 | i_epoch = len(epochs_df) - 1 76 | current_val = float(epochs_df[self.column_name].iloc[-1]) #validation misclass 77 | if "test_misclass" in list(epochs_df.columns.values): 78 | current_test_misclass = float(epochs_df['test_misclass'].iloc[-1]) #test misclass 79 | else: 80 | current_test_misclass = 0 81 | 82 | #####Storing of the models enabled depending on current of loss and validation accuracy##### 83 | if (current_val < self.lowest_val) or ( 84 | current_val == self.lowest_val and current_test_misclass <= self.lowest_test): 85 | 86 | self.lowest_test = current_test_misclass 87 | self.class_acc.append(current_test_misclass) 88 | 89 | self.best_epoch = i_epoch 90 | self.lowest_val = current_val 91 | self.model_predictions = predictions 92 | self.model_probabilities = probabilities 93 | self.model_state_dict = deepcopy(model.state_dict()) 94 | self.optimizer_state_dict = deepcopy(optimizer.state_dict()) 95 | log.info("New best {:s}: {:5f}".format(self.column_name, 96 | current_val)) 97 | log.info("") 98 | 99 | if save_model: 100 | log.info("Saving current best model for validation accuracy...") 101 | log.info("") 102 | 103 | if not os.path.exists(save_path): 104 | os.makedirs(save_path) 105 | try: 106 | th.save(model.state_dict(), f"{save_path}/{tag}_{self.best_epoch}.pt") 107 | except PermissionError: 108 | # redundancy for storing of models 109 | log.info("Permission denied for this path!") 110 | th.save(model.state_dict(), f"{save_path}/{tag}_{self.best_epoch}_a.pt") 111 | finally: 112 | log.info("model not saved! Continuing with training") 113 | self.model_predictions = predictions 114 | self.model_probabilities = probabilities 115 | 116 | return self.model_predictions, self.model_probabilities 117 | 118 | def reset_to_best_model(self, epochs_df, model, optimizer): 119 | """ 120 | Reset parameters to parameters at best epoch and remove rows 121 | after best epoch from epochs dataframe. 122 | 123 | Modifies parameters of model and optimizer, changes epochs_df in-place. 124 | 125 | Parameters 126 | ---------- 127 | epochs_df: `pandas.Dataframe` 128 | model: `torch.nn.Module` 129 | optimizer: `torch.optim.Optimizer` 130 | 131 | """ 132 | # Remove epochs past the best one from epochs dataframe 133 | epochs_df.drop(range(self.best_epoch+1, len(epochs_df)), inplace=True) 134 | model.load_state_dict(self.model_state_dict) 135 | optimizer.load_state_dict(self.optimizer_state_dict) 136 | 137 | 138 | class Experiment(object): 139 | """ 140 | Class that performs one experiment on training, validation and test set. 141 | 142 | It trains as follows: 143 | 144 | 1. Train on training set until a given stop criterion is fulfilled 145 | 2. Reset to the best epoch, i.e. reset parameters of the model and the 146 | optimizer to the state at the best epoch ("best" according to a given 147 | criterion) 148 | 3. Continue training on the combined training + validation set until the 149 | loss on the validation set is as low as it was on the best epoch for the 150 | training set. (or until the ConvNet was trained twice as many epochs as 151 | the best epoch to prevent infinite training) 152 | 153 | Parameters 154 | ---------- 155 | Parameters 156 | ---------- 157 | :param epochs_df: (pandas.Dataframe) Dataframe containing the column `column_name` with which performance is evaluated. 158 | :model: (torch.nn.Module) 159 | :train_set_1: (braindecode.SignalAndTarget) 160 | :valid_set_1: (braindecode.SignalAndTarget) 161 | :train_set_2: (braindecode.SignalAndTarget) 162 | :valid_set_2: (braindecode.SignalAndTarget) 163 | :test_set_1: (braindecode.SignalAndTarget) 164 | :test_set_2: (braindecode.SignalAndTarget) 165 | :iterator: (iterator object) 166 | :loss_function: function 167 | Function mapping predictions and targets to a loss: 168 | (predictions: `torch.autograd.Variable`, 169 | targets:`torch.autograd.Variable`) 170 | -> loss: `torch.autograd.Variable` 171 | :optimizer: (torch.optim.Optimizer) 172 | :model_constraint: object 173 | Object with apply function that takes model and constraints its 174 | parameters. `None` for no constraint. 175 | :monitors: list of objects 176 | List of objects with monitor_epoch and monitor_set method, should 177 | monitor the traning progress. 178 | :stop_criterion: object 179 | Object with `should_stop` method, that takes in monitoring dataframe 180 | and returns if training should stop: 181 | :remember_best_column: str 182 | Name of column to use for storing parameters of best model. Lowest value 183 | should indicate best performance in this column. 184 | :run_after_early_stop: bool 185 | Whether to continue running after early stop 186 | :model_loss_function: function, optional 187 | Function (model -> loss) to add a model loss like L2 regularization. 188 | Note that this loss is not accounted for in monitoring at the moment. 189 | :save_file: (str) path to save model 190 | :tag: (str) name to attach to saved model 191 | :save_model: (bool) whetjer to save model or not 192 | :batch_modifier: object, optional 193 | Object with modify method, that can change the batch, e.g. for data 194 | augmentation 195 | :cuda: bool, optional 196 | Whether to use cuda. 197 | :pin_memory: bool, optional 198 | Whether to pin memory of inputs and targets of batch. 199 | :do_early_stop: bool 200 | Whether to do an early stop at all. If true, reset to best model 201 | even in case experiment does not run after early stop. 202 | :reset_after_second_run: bool 203 | If true, reset to best model when second run did not find a valid loss 204 | below or equal to the best train loss of first run. 205 | :log_0_epoch: bool 206 | Whether to compute monitor values and log them before the 207 | start of training. 208 | :loggers: list of :class:`.Logger` 209 | How to show computed metrics. 210 | 211 | Attributes 212 | ---------- 213 | epochs_df: `pandas.DataFrame` 214 | Monitoring values for all epochs. 215 | """ 216 | def __init__(self, model, train_set_1, valid_set_1, train_set_2, valid_set_2, test_set_1, test_set_2, iterator, 217 | loss_function, optimizer, lr_scheduler, model_constraint, monitors, stop_criterion, remember_best_column, run_after_early_stop, 218 | model_loss_function, save_file, tag, save_model, batch_modifier=None, cuda=True, pin_memory=False, 219 | do_early_stop=True, reset_after_second_run=False, log_0_epoch=True, loggers=('print',)): 220 | 221 | if run_after_early_stop or reset_after_second_run: 222 | assert do_early_stop == True, ("Can only run after early stop or " 223 | "reset after second run if doing an early stop") 224 | if do_early_stop: 225 | assert valid_set_1 is not None and valid_set_2 is not None 226 | assert remember_best_column is not None 227 | self.model = model 228 | self.datasets = OrderedDict((('train_1', train_set_1), ('train_2', train_set_2), 229 | ('valid_1', valid_set_1), ('valid_2', valid_set_2), 230 | ('test_1', test_set_1), ('test_2', test_set_2))) 231 | 232 | if valid_set_1 is None or valid_set_2 is None: 233 | self.datasets.pop('valid_1') 234 | self.datasets.pop('valid_2') 235 | assert run_after_early_stop == False 236 | assert do_early_stop == False 237 | if test_set_1 is None or test_set_2 is None: 238 | self.datasets.pop('test_1') 239 | self.datasets.pop('test_2') 240 | 241 | self.iterator = iterator 242 | self.loss_function = loss_function 243 | self.optimizer = optimizer 244 | self.model_constraint = model_constraint 245 | self.monitors = monitors 246 | self.stop_criterion = stop_criterion 247 | self.remember_best_column = remember_best_column 248 | self.run_after_early_stop = run_after_early_stop 249 | self.model_loss_function = model_loss_function 250 | self.batch_modifier = batch_modifier 251 | self.cuda = cuda 252 | self.epochs_df = pd.DataFrame() 253 | self.before_stop_df = None 254 | self.rememberer = None 255 | self.pin_memory = pin_memory 256 | self.do_early_stop = do_early_stop 257 | self.reset_after_second_run = reset_after_second_run 258 | self.log_0_epoch = log_0_epoch 259 | self.loggers = loggers 260 | self.save_file = save_file 261 | self.tag = tag 262 | self.class_acc = [] 263 | self.save_model = save_model 264 | self.predictions = None 265 | self.probabilites = None 266 | self.lr_scheduler = lr_scheduler 267 | 268 | 269 | def run(self): 270 | """ 271 | Run complete training. 272 | """ 273 | self.setup_training() 274 | log.info("Run until first stop...") 275 | self.run_until_first_stop() 276 | if self.do_early_stop: 277 | # always setup for second stop, in order to get best model 278 | # even if not running after early stop... 279 | log.info("Setup for second stop...") 280 | self.setup_after_stop_training() 281 | if self.run_after_early_stop: 282 | log.info("Run until second stop...") 283 | loss_to_reach = float(self.epochs_df['train_loss'].iloc[-1]) 284 | self.run_until_second_stop() 285 | if self.reset_after_second_run: 286 | # if no valid loss was found below the best train loss on 1st 287 | # run, reset model to the epoch with lowest valid_misclass 288 | if float(self.epochs_df['valid_loss'].iloc[-1]) > loss_to_reach: 289 | log.info("Resetting to best epoch {:d}".format( 290 | self.rememberer.best_epoch)) 291 | self.rememberer.reset_to_best_model(self.epochs_df, 292 | self.model, 293 | self.optimizer) 294 | 295 | def setup_training(self): 296 | """ 297 | Setup training, i.e. transform model to cuda, 298 | initialize monitoring. 299 | """ 300 | # reset remember best extension in case you rerun some experiment 301 | if self.do_early_stop: 302 | self.rememberer = RememberBest(self.remember_best_column, self.predictions, self.probabilites) 303 | if self.loggers == ('print',): 304 | self.loggers = [Printer()] 305 | self.epochs_df = pd.DataFrame() 306 | if self.cuda: 307 | assert th.cuda.is_available(), "Cuda not available" 308 | self.model.cuda() 309 | 310 | def run_until_first_stop(self): 311 | """ 312 | Run training and evaluation using only training set for training 313 | until stop criterion is fulfilled. 314 | """ 315 | 316 | self.run_until_stop(self.datasets, remember_best=self.do_early_stop) 317 | 318 | def run_until_second_stop(self): 319 | """ 320 | Run training and evaluation using combined training + validation sets 321 | for training on both datasets. 322 | 323 | Runs until loss on validation set decreases below loss on training set 324 | of best epoch or until as many epochs trained after as before 325 | first stop. 326 | """ 327 | datasets = self.datasets 328 | datasets['train_1'] = concatenate_sets([datasets['train_1'], 329 | datasets['valid_1']]) 330 | datasets['train_2'] = concatenate_sets([datasets['train_2'], 331 | datasets['valid_2']]) 332 | 333 | self.run_until_stop(datasets, remember_best=True) 334 | 335 | def run_until_stop(self, datasets, remember_best): 336 | """ 337 | Run training and evaluation on given datasets until stop criterion is 338 | fulfilled. Return predictions and probabilites associated with best epochs. 339 | 340 | Parameters 341 | ---------- 342 | datasets: OrderedDict 343 | Dictionary with train, valid and test as str mapping to 344 | :class:`.SignalAndTarget` objects. 345 | remember_best: bool 346 | Whether to remember parameters at best epoch. 347 | """ 348 | if self.log_0_epoch: 349 | self.monitor_epoch(datasets) 350 | self.log_epoch() 351 | if remember_best: 352 | self.model_predictions, self.model_probabilities = self.rememberer.remember_epoch(self.epochs_df, self.model, self.optimizer, 353 | self.save_file, self.tag, self.class_acc, 354 | self.save_model, self.predictions, self.probabilites) 355 | self.iterator.reset_rng() 356 | while not self.stop_criterion.should_stop(self.epochs_df): 357 | self.run_one_epoch(datasets, remember_best) 358 | 359 | def run_one_epoch(self, datasets, remember_best): 360 | """ 361 | Run training and evaluation on given datasets for one epoch. Batches for 362 | two data types are combined. 363 | 364 | Parameters 365 | ---------- 366 | datasets: OrderedDict 367 | Dictionary with train, valid and test as str mapping to 368 | :class:`.SignalAndTarget` objects. 369 | remember_best: bool 370 | Whether to remember parameters if this epoch is best epoch. 371 | """ 372 | batch_generator_1 = self.iterator.get_batches(datasets['train_1'], 373 | shuffle=True) 374 | batch_generator_2 = self.iterator.get_batches(datasets['train_2'], 375 | shuffle=True) 376 | combined_batches = combine_batches(batch_generator_1, batch_generator_2) 377 | start_train_epoch_time = time.time() 378 | for inputs_1, targets_1, inputs_2, targets_2 in combined_batches: 379 | if self.batch_modifier is not None: 380 | inputs_1, targets_1 = self.batch_modifier.process(inputs_1, targets_1) 381 | inputs_2, targets_2 = self.batch_modifier.process(inputs_2, targets_2) 382 | 383 | if len(inputs_1) > 0 and len(inputs_2) > 0: 384 | 385 | self.train_batch(inputs_1, targets_1, inputs_2, targets_2) 386 | if self.lr_scheduler != None: 387 | self.lr_scheduler.step() 388 | 389 | end_train_epoch_time = time.time() 390 | 391 | log.info("Time only for training updates: {:.2f}s".format( 392 | end_train_epoch_time - start_train_epoch_time)) 393 | 394 | self.monitor_epoch(datasets) 395 | self.log_epoch() 396 | if remember_best: 397 | self. model_predictions, self.model_probabilities = self.rememberer.remember_epoch(self.epochs_df, self.model, self.optimizer, 398 | self.save_file, self.tag, self.class_acc, 399 | self.save_model, self.predictions, self.probabilites) 400 | 401 | def train_batch(self, inputs_1, targets_1, inputs_2, targets_2): 402 | """ 403 | Train on given inputs and targets. 404 | 405 | Parameters 406 | ---------- 407 | :inputs_1: (torch.autograd.Variable) 408 | :targets_1: (torch.autograd.Variable) 409 | :inputs_2: (torch.autograd.Variable) 410 | :targets_2: (torch.autograd.Variable) 411 | """ 412 | 413 | self.model.train() 414 | input_vars_1 = np_to_var(inputs_1, pin_memory=self.pin_memory) 415 | target_vars_1 = np_to_var(targets_1, pin_memory=self.pin_memory) 416 | input_vars_2 = np_to_var(inputs_2, pin_memory=self.pin_memory) 417 | target_vars_2 = np_to_var(targets_2, pin_memory=self.pin_memory) 418 | 419 | if self.cuda: 420 | input_vars_1 = input_vars_1.cuda() 421 | target_vars_1 = target_vars_1.cuda() 422 | input_vars_2 = input_vars_2.cuda() 423 | target_vars_2 = target_vars_2.cuda() 424 | self.optimizer.zero_grad() 425 | th.autograd.set_detect_anomaly(True) 426 | 427 | outputs = self.model(input_vars_1, input_vars_2) 428 | loss = self.loss_function(outputs, target_vars_1) 429 | if self.model_loss_function is not None: 430 | loss = loss + self.model_loss_function(self.model) 431 | 432 | loss.backward() 433 | 434 | self.optimizer.step() 435 | if self.model_constraint is not None: 436 | self.model_constraint.apply(self.model) 437 | 438 | 439 | def eval_on_batch(self, inputs_1, targets_1, inputs_2, targets_2): 440 | """ 441 | Evaluate given inputs and targets. 442 | 443 | Parameters 444 | ---------- 445 | :inputs_1: (torch.autograd.Variable) 446 | :targets_1: (torch.autograd.Variable) 447 | :inputs_2: (torch.autograd.Variable) 448 | :targets_2: (torch.autograd.Variable) 449 | 450 | Returns 451 | ------- 452 | predictions: `torch.autograd.Variable` 453 | loss: `torch.autograd.Variable` 454 | 455 | """ 456 | self.model.eval() 457 | with th.no_grad(): 458 | 459 | input_vars_1 = np_to_var(inputs_1, pin_memory=self.pin_memory) 460 | target_vars_1 = np_to_var(targets_1, pin_memory=self.pin_memory) # only 1 target array required 461 | input_vars_2 = np_to_var(inputs_2, pin_memory=self.pin_memory) 462 | 463 | if self.cuda: 464 | input_vars_1 = input_vars_1.cuda() 465 | target_vars_1 = target_vars_1.cuda() 466 | input_vars_2 = input_vars_2.cuda() 467 | 468 | outputs = self.model(input_vars_1, input_vars_2) 469 | 470 | probabilities = th.exp(outputs.cpu()).numpy() # calculated probabilities 471 | 472 | loss = self.loss_function(outputs, target_vars_1) 473 | if hasattr(outputs, 'cpu'): 474 | outputs = outputs.cpu().data.numpy() 475 | else: 476 | 477 | outputs = [o.cpu().data.numpy() for o in outputs] 478 | 479 | loss = loss.cpu().data.numpy() 480 | 481 | return outputs, loss, probabilities 482 | 483 | def monitor_epoch(self, datasets): 484 | """ 485 | Evaluate one epoch for given datasets. 486 | 487 | Stores results in `epochs_df` 488 | 489 | Parameters 490 | ---------- 491 | datasets: OrderedDict 492 | Dictionary with train, valid and test as str mapping to 493 | :class:`.SignalAndTarget` objects. 494 | 495 | """ 496 | result_dicts_per_monitor = OrderedDict() 497 | for m in self.monitors: 498 | result_dicts_per_monitor[m] = OrderedDict() 499 | for m in self.monitors: 500 | result_dict = m.monitor_epoch() 501 | if result_dict is not None: 502 | result_dicts_per_monitor[m].update(result_dict) 503 | 504 | set_1, set_2, set_list = [], [], [] 505 | for i, j in self.datasets.items(): 506 | set_list.append(i) 507 | for i in range(0, len(set_list), 2): 508 | set_1.append(set_list[i]) 509 | set_2.append(set_list[i + 1]) 510 | 511 | 512 | for name_1, name_2 in zip(set_1, set_2): 513 | setname = name_1.split('_')[0] 514 | 515 | batch_gen_1 = self.iterator.get_batches(datasets[name_1], shuffle=False) 516 | batch_gen_2 = self.iterator.get_batches(datasets[name_2], shuffle=False) 517 | combined_batches = combine_batches(batch_gen_1, batch_gen_2) 518 | 519 | all_preds = [] 520 | all_losses = [] 521 | all_probs = [] 522 | all_batch_sizes = [] 523 | all_targets = [] 524 | 525 | for inputs_1, targets_1, inputs_2, targets_2 in combined_batches: 526 | preds, loss, probabilities = self.eval_on_batch(inputs_1, targets_1, 527 | inputs_2, targets_2) 528 | all_preds.append(preds) 529 | all_losses.append(loss) 530 | all_probs.append(probabilities) 531 | all_batch_sizes.append(len(inputs_1)) 532 | all_targets.append(targets_1) 533 | 534 | for m in self.monitors: 535 | result_dict = m.monitor_set(setname, all_preds, all_losses, 536 | all_batch_sizes, all_targets, 537 | combined_batches) 538 | if result_dict is not None: 539 | result_dicts_per_monitor[m].update(result_dict) 540 | row_dict = OrderedDict() 541 | for m in self.monitors: 542 | row_dict.update(result_dicts_per_monitor[m]) 543 | self.epochs_df = self.epochs_df.append(row_dict, ignore_index=True) 544 | assert set(self.epochs_df.columns) == set(row_dict.keys()), f"Columns of dataframe: {str(set(self.epochs_df.columns))}\n and keys of dict {str(set(row_dict.keys()))} not same" 545 | 546 | self.epochs_df = self.epochs_df[list(row_dict.keys())] 547 | self.predictions = compute_pred_labels_from_trial_preds(all_preds, None) 548 | self.probabilites = all_probs 549 | 550 | 551 | def log_epoch(self): 552 | """ 553 | Print monitoring values for this epoch. 554 | """ 555 | for logger in self.loggers: 556 | logger.log_epoch(self.epochs_df) 557 | 558 | def setup_after_stop_training(self): 559 | """ 560 | Setup training after first stop. 561 | 562 | Resets parameters to best parameters and updates stop criterion. 563 | """ 564 | # also remember old monitor chans, will be put back into 565 | # monitor chans after experiment finished 566 | self.before_stop_df = deepcopy(self.epochs_df) 567 | self.rememberer.reset_to_best_model(self.epochs_df, self.model, 568 | self.optimizer) 569 | loss_to_reach = float(self.epochs_df['train_loss'].iloc[-1]) 570 | self.stop_criterion = Or(stop_criteria=[ 571 | MaxEpochs(max_epochs=self.rememberer.best_epoch * 2), 572 | ColumnBelow(column_name='valid_loss', target_value=loss_to_reach)]) 573 | log.info(f"Train loss to reach {loss_to_reach}") 574 | 575 | 576 | -------------------------------------------------------------------------------- /BiModNeuroCNN/results/results.py: -------------------------------------------------------------------------------- 1 | from BiModNeuroCNN.results.dataframe_utils import results_df, get_col_list, param_scores_df 2 | from BiModNeuroCNN.utils import load_pickle 3 | import numpy as np 4 | import pandas as pd 5 | import pickle 6 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, precision_recall_fscore_support, confusion_matrix, cohen_kappa_score 7 | import matplotlib.pyplot as plt 8 | import seaborn as sns 9 | import os 10 | import sys 11 | import h5py 12 | 13 | class Results(): 14 | 15 | direct = 'C:/Users/sb00745777/OneDrive - Ulster University/Study_3/Subject_Data' 16 | 17 | def __init__(self, save_path, folds=5, tag='', name="A"): 18 | self.save_path = save_path 19 | self.n_folds = folds 20 | self.y_true_list = [] 21 | self.y_true = np.array([]) 22 | self.y_pred_list = [] 23 | self.y_pred = np.array([]) 24 | self.y_probs = None 25 | self.results_path = None 26 | self.lossdf = None 27 | self.accdf = None 28 | self.cross_entropydf = None 29 | self.subject_stats_df = None 30 | self.best_params = None 31 | self.hyp_param_means = [] 32 | self.outer_fold_accuracies = [] # list of scores - 1 per fold 33 | self.outer_fold_cross_entropies = [] 34 | self.of_mean = None 35 | self.of_std = None 36 | self.accuracy = None 37 | self.precision = None 38 | self.f1_score = None 39 | self.recall = None 40 | self.kappa = None 41 | self.precision_list = [] 42 | self.f1_score_list = [] 43 | self.recall_list = [] 44 | self.kappa_list = [] 45 | self.cm = None 46 | self.train_loss = None 47 | self.test_loss = None 48 | self.valid_loss = None 49 | self.train_acc = None 50 | self.test_acc = None 51 | self.valid_acc = None 52 | self.tag = tag 53 | self.id = name 54 | 55 | def __repr__(self): 56 | return f"" 57 | 58 | def __str__(self): 59 | return f"Class for creating a Results object containing several metrics" 60 | 61 | def __getattr__(self, name): 62 | if name == "fold_accuracies": 63 | for i, j in enumerate(self.outer_fold_accuracies): 64 | print (f"Fold {i+1}: {j}%") 65 | folds_mean = np.mean(self.outer_fold_accuracies) 66 | return f"Mean: {folds_mean}%" 67 | elif name == "classes": 68 | unique, counts = np.unique(self.y_true, return_counts=True) 69 | return [f"Class {u}:{c}" for u,c in zip(unique, counts)] 70 | elif name == "predictions": 71 | unique, counts = np.unique(self.y_pred, return_counts=True) 72 | return [f"Class {u}:{c}" for u,c in zip(unique, counts)] 73 | elif name in dir(self): 74 | return name 75 | else: 76 | print(dir(self)) 77 | raise AttributeError(f"'{name}' does not exist in this context") 78 | 79 | def __setattr__(self, name, value): 80 | if name == "fold_accuracies": 81 | self.outer_fold_accuracies = value 82 | else: 83 | super().__setattr__(name, value) 84 | 85 | def change_directory(self, direct): 86 | self.direct = direct 87 | 88 | def set_results_path(self, results_path): 89 | self.results_path = results_path 90 | 91 | def concat_y_true(self, y_true_fold): 92 | """ 93 | Method for combining all outer-fold ground-truth values. 94 | :param y_true_fold: array of single-fold true values. 95 | :return: all outer fold true values in single arrau 96 | """ 97 | self.y_true = np.concatenate((self.y_true, np.array(y_true_fold))) 98 | 99 | def concat_y_pred(self, y_pred_fold): 100 | """ 101 | Method for combining all outer-fold ground-truth values. 102 | :param y_pred_fold: array of single-fold true values. 103 | :return: all outer fold true values in single arrau 104 | """ 105 | self.y_pred = np.concatenate((self.y_pred, np.array(y_pred_fold))) 106 | 107 | def append_y_true(self, y_true_fold): 108 | """ 109 | Method for combining all outer-fold ground-truth values. 110 | :param y_true_fold: array of single-fold true values. 111 | :return: list of outer fold true values. Each element contains one fold 112 | """ 113 | self.y_true_list.append((np.array(y_true_fold))) 114 | 115 | def append_y_pred(self, y_pred_fold): 116 | """ 117 | Method for combining all outer-fold ground-truth values. 118 | :param y_pred_fold: array of single-fold true values. 119 | :return: list of outer fold true values. Each element contains one fold 120 | """ 121 | self.y_pred_list.append((np.array(y_pred_fold))) 122 | 123 | def get_acc_loss_df(self, hyp_params, index_name, nested=True): 124 | """ 125 | Instantiates pd.DataFrames for storing accuracy or loss metrics for each fold 126 | and hyperparameter set. 127 | :param hyp_params (dict) keys: names of hyp_params, values: lists of HP values 128 | :param index_name (str) index name for dataframe 129 | """ 130 | if nested: 131 | index = list(n+1 for n in range(self.n_folds*self.n_folds)) 132 | else: 133 | index = list(n+1 for n in range(self.n_folds)) 134 | index.append("Mean") 135 | index.append("Std.") 136 | columns_list = get_col_list(hyp_params) 137 | 138 | names = list(hyp_params.keys()) 139 | 140 | self.lossdf = results_df(index,index_name,columns_list,names) 141 | self.accdf = results_df(index,index_name,columns_list,names) 142 | self.cross_entropydf = results_df(index,index_name,columns_list,names) 143 | 144 | 145 | def fill_acc_loss_df(self, inner_fold_accs=None, inner_fold_loss=None, inner_fold_CE=None, save=True): 146 | """ 147 | Method for inserting all inner-fold accuracies and losses associated with each hyper-parameter 148 | combination in a dataframe. Mean and Std. computed. The dataframes can be used to select optimal 149 | hyper-parameters. 150 | :param inner_fold_accs: list containing all inner-fold accuracy scores 151 | :param inner_fold_loss: list containing all inner-fold loss values 152 | :param inner_fold_CE: list containing all inner-fold CE values 153 | :param save: Boolean 154 | :return: Dataframes in which each column represents a particular hyper-parameter set. 155 | """ 156 | if inner_fold_accs is not None: 157 | for n, acc in enumerate(inner_fold_accs): 158 | self.accdf.iloc[n] = acc 159 | self.accdf.loc["Mean"].iloc[0] = self.accdf.iloc[1:n].mean(axis=0).values 160 | self.accdf.loc["Std."].iloc[0] = self.accdf.iloc[1:n].std(axis=0).values 161 | if save: 162 | try: 163 | self.accdf.to_excel(f"{self.save_path}/HP_acc{self.tag}.xlsx") 164 | except: 165 | self.accdf.to_excel(f"{self.save_path}/HP_acc{self.tag}.xlsx", engine='xlsxwriter') 166 | 167 | if inner_fold_loss is not None: 168 | for n, loss in enumerate(inner_fold_loss): 169 | self.lossdf.iloc[n] = loss 170 | self.lossdf.loc["Mean"].iloc[0] = self.lossdf.iloc[1:n].mean(axis=0).values 171 | self.lossdf.loc["Std."].iloc[0] = self.lossdf.iloc[1:n].std(axis=0).values 172 | if save: 173 | try: 174 | self.lossdf.to_excel(f"{self.save_path}/HP_loss{self.tag}.xlsx") 175 | except: 176 | self.lossdf.to_excel(f"{self.save_path}/HP_loss{self.tag}.xlsx", engine='xlsxwriter') 177 | 178 | if inner_fold_CE is not None: 179 | for n, ce in enumerate(inner_fold_CE): 180 | self.cross_entropydf.iloc[n] = ce 181 | self.cross_entropydf.loc["Mean"].iloc[0] = self.cross_entropydf.iloc[1:n].mean(axis=0).values 182 | self.cross_entropydf.loc["Std."].iloc[0] = self.cross_entropydf.iloc[1:n].std(axis=0).values 183 | if save: 184 | try: 185 | self.cross_entropydf.to_excel(f"{self.save_path}/HP_CE{self.tag}.xlsx") 186 | except: 187 | self.cross_entropydf.to_excel(f"{self.save_path}/HP_CE{self.tag}.xlsx", engine='xlsxwriter') 188 | 189 | 190 | def get_best_params(self, selection_method, save_path=None): 191 | """ 192 | Method for returning best hyper-parameter combination from inner fold accuracy or loss. 193 | :param selection_method: str: "accuracy" Or "loss". 194 | :return: list of optimal hyper-parameters. 195 | """ 196 | if save_path == None: # can overwrite object save_path with argument if required 197 | save_path = self.save_path 198 | 199 | if selection_method == "accuracy": 200 | self.best_params = list(self.accdf.columns[self.accdf.loc["Mean"].values.argmax()]) 201 | else: 202 | self.best_params = list(self.lossdf.columns[self.lossdf.loc["Mean"].values.argmin()]) 203 | best_params = pd.DataFrame(dict(best_params=self.best_params)) 204 | try: 205 | best_params.to_excel(f"{save_path}/BestParameters{self.tag}.xlsx") 206 | except: 207 | best_params.to_excel(f"{save_path}/BestParameters{self.tag}.xlsx", engine='xlsxwriter') # occasional problems with writing 208 | 209 | 210 | def get_hp_means(self, hyp_params, selection_method, save=False, save_path=None): 211 | """ 212 | Extracts mean accuracies for specific HP values (as opposed to HP sets) 213 | 214 | :param hyp_params (dict) keys: names of hyp_params, values: lists of HP values 215 | :param selection_method (str) 'accuracy' OR 'loss' 216 | """ 217 | if save_path == None: # can overwrite object save_path with argument if required 218 | save_path = self.save_path 219 | 220 | columns_list = get_col_list(hyp_params) 221 | for HP in columns_list: 222 | 223 | for value in HP: 224 | if selection_method == 'accuracy': 225 | sub_df = self.accdf[[i for i in self.accdf.columns if i[0] == value or i[1] == value or i[2] == value or i[3] == value]] 226 | self.hyp_param_means.append((value, sub_df.loc["Mean"].values.mean())) 227 | else: 228 | sub_df = self.lossdf[[i for i in self.lossdf.columns if i[0] == value or i[1] == value or i[2] == value or i[3] == value]] 229 | self.hyp_param_means.append((value, sub_df.loc["Mean"].values.mean())) 230 | if save: 231 | hp_val_list, hp_mean_list = [], [] 232 | for tup in self.hyp_param_means: 233 | hp_val_list.append(tup[0]) 234 | hp_mean_list.append(tup[1]) 235 | hp_means_df = pd.DataFrame(dict(HP_value=hp_val_list, HP_mean=hp_mean_list)) 236 | try: 237 | hp_means_df.to_excel(f"{save_path}/HP_means{self.tag}.xlsx") 238 | except: 239 | hp_means_df.to_excel(f"{save_path}/HP_means{self.tag}.xlsx", engine='xlsxwriter') # occasional problems with writing 240 | 241 | 242 | def set_outer_fold_accuracies(self, outer_fold_accuracies): 243 | self.outer_fold_accuracies = outer_fold_accuracies 244 | self.of_mean = np.mean(outer_fold_accuracies) 245 | self.of_std = np.std(outer_fold_accuracies) 246 | 247 | def get_accuracy(self): 248 | """ 249 | Method for calculating accuracy from all true and predicted values. 250 | :return: accuracy value (%) rounded to 3 decimal places. 251 | """ 252 | assert len(self.y_true) == len(self.y_pred), "data must be of equal length" 253 | self.accuracy = round((accuracy_score(self.y_true, self.y_pred) * 100), 3) 254 | 255 | def get_precision(self): 256 | assert len(self.y_true) == len(self.y_pred), "data must be of equal length" 257 | self.precision = round((precision_score(self.y_true, self.y_pred, average="macro") * 100), 3) 258 | 259 | def get_recall(self): 260 | assert len(self.y_true) == len(self.y_pred), "data must be of equal length" 261 | self.recall = round((recall_score(self.y_true, self.y_pred, average='macro') * 100), 3) 262 | 263 | def get_f_score(self): 264 | assert len(self.y_true) == len(self.y_pred), "data must be of equal length" 265 | self.f1_score = round((f1_score(self.y_true, self.y_pred, average='macro') * 100), 3) 266 | 267 | def get_kappa_value(self): 268 | assert len(self.y_true) == len(self.y_pred), "data must be of equal length" 269 | self.kappa = round(cohen_kappa_score(self.y_true, self.y_pred),3) 270 | 271 | def precision_recall_f_score(self): 272 | assert len(self.y_true) == len(self.y_pred), "data must be of equal length" 273 | precision_recall_fscore_support(self.y_true, self.y_pred) 274 | 275 | def confusion_matrix(self): 276 | assert len(self.y_true) == len(self.y_pred), "data must be of equal length" 277 | self.cm = confusion_matrix(self.y_true, self.y_pred) 278 | 279 | def subject_stats(self): 280 | """ 281 | Method for constructing and saving a Pandas Dataframe with Accuracy and 282 | statistical scores as below: 283 | fold 1 fold 2 Mean Std. Precision Recall F1 Score 284 | 01 18.065 16.779 17.422 0.643 16.447 16.447 16.447 285 | """ 286 | 287 | folds = [] 288 | for i in range(1, self.n_folds+1): 289 | folds.append(f'fold {i}') 290 | 291 | if np.array(self.outer_fold_accuracies).ndim == 1: 292 | self.subject_stats_df = pd.DataFrame(index=[self.id], columns=folds) 293 | self.subject_stats_df.iloc[0] = self.outer_fold_accuracies 294 | self.get_accuracy() 295 | self.subject_stats_df['Subj Mean'] = self.subject_stats_df.mean(axis=1, skipna=True) 296 | self.subject_stats_df['Subj Std.'] = self.subject_stats_df.drop('Subj Mean',axis=1).std(axis=1, skipna=True) 297 | self.get_precision() 298 | self.get_recall() 299 | self.get_f_score() 300 | self.subject_stats_df['Precision'] = self.precision 301 | self.subject_stats_df['Recall'] = self.recall 302 | self.subject_stats_df['F1 Score'] = self.f1_score 303 | for n,ce in enumerate(self.outer_fold_cross_entropies): 304 | self.subject_stats_df[f"CE - fold {n+1}"] = ce 305 | self.subject_stats_df["CE mean"] = np.mean(self.outer_fold_cross_entropies) 306 | self.subject_stats_df["CE std."] = np.std(self.outer_fold_cross_entropies) 307 | 308 | handle = f"{self.save_path}/statistics{self.tag}.xlsx" 309 | 310 | else: 311 | self.subject_stats_df = pd.DataFrame(index=[self.ids], columns=folds) 312 | for n,score in enumerate(self.outer_fold_accuracies): 313 | self.subject_stats_df.iloc[n] = score 314 | self.subject_stats_df['Subj Mean'] = self.subject_stats_df.mean(axis=1, skipna=True) 315 | self.subject_stats_df['Subj Std.'] = self.subject_stats_df.drop('Subj Mean',axis=1).std(axis=1, skipna=True) 316 | self.subject_stats_df['Precision'] = self.precision_list 317 | self.subject_stats_df['Recall'] = self.recall_list 318 | self.subject_stats_df['F1 Score'] = self.f1_score_list 319 | 320 | # adding cross-entropy values for each fold 321 | for n,_ in enumerate(folds): 322 | self.subject_stats_df[f"CE - fold {n+1}"] = "" 323 | for n,ce_list in enumerate(self.outer_fold_cross_entropies): 324 | for m,ce in enumerate(ce_list): 325 | self.subject_stats_df[f"CE - fold {m+1}"].iloc[n] = ce 326 | self.subject_stats_df["CE mean"] = self.outer_fold_ce_means 327 | self.subject_stats_df["CE std."] = self.outer_fold_ce_std 328 | 329 | self.subject_stats_df.loc["Mean"] = self.subject_stats_df.iloc[0:len(self.ids)].mean(axis=0).values 330 | self.subject_stats_df.loc["Std."] = self.subject_stats_df.iloc[0:len(self.ids)].std(axis=0).values 331 | 332 | handle = f"{self.results_path}/combined_stats{self.tag}.xlsx" 333 | try: 334 | self.subject_stats_df.to_excel(handle) 335 | except: 336 | self.subject_stats_df.to_excel(handle, engine='xlsxwriter') 337 | 338 | 339 | def save_result(self): 340 | filename = f"{self.save_path}/results_object{self.tag}.pickle" 341 | filehandler = open(filename, 'wb') 342 | try: 343 | pickle.dump(self.__dict__, filehandler, protocol=pickle.HIGHEST_PROTOCOL) 344 | except ValueError: 345 | file = f"{self.save_path}/results_object_alt_{self.tag}" 346 | self.save_as_pickled_object(file) 347 | 348 | def update(self, newdata): 349 | for key,value in newdata.items(): 350 | setattr(self,key,value) 351 | 352 | @classmethod 353 | def load_result(self, f_name): 354 | with open(f_name, 'rb') as f: 355 | tmp_dict = pickle.load(f) 356 | f.close() 357 | self.update(self, tmp_dict) 358 | return self 359 | 360 | def save_as_pickled_object(self, filepath): 361 | """ 362 | This is a defensive way to write pickle.write, allowing for very large files on all platforms 363 | """ 364 | subject = dict(subject=self) 365 | max_bytes = 2 ** 31 - 1 366 | bytes_out = pickle.dumps(subject) 367 | n_bytes = sys.getsizeof(bytes_out) 368 | with open(filepath, 'wb') as f_out: 369 | for idx in range(0, n_bytes, max_bytes): 370 | f_out.write(bytes_out[idx:idx + max_bytes]) 371 | 372 | def try_to_load_as_pickled_object_or_None(filepath): 373 | """ 374 | This is a defensive way to write pickle.load, allowing for very large files on all platforms 375 | """ 376 | max_bytes = 2 ** 31 - 1 377 | try: 378 | input_size = os.path.getsize(filepath) 379 | bytes_in = bytearray(0) 380 | with open(filepath, 'rb') as f_in: 381 | for _ in range(0, input_size, max_bytes): 382 | bytes_in += f_in.read(max_bytes) 383 | obj = pickle.loads(bytes_in) 384 | except: 385 | return None 386 | return obj 387 | 388 | 389 | class CombinedResults(Results): 390 | """ 391 | Written for combining the results of multiple subject/experiments. 392 | """ 393 | 394 | def __init__(self, save_path, load_path, f_names, folds, ids, tag): 395 | 396 | super().__init__(save_path, folds, tag) 397 | 398 | self.load_path = load_path 399 | self.f_names = f_names 400 | self.ids = ids 401 | self.total_cross_val_df = None 402 | self.total_best_hps = [] #list of best HPs for each subject 403 | self.BestParams = None 404 | self.hp_results_df = None 405 | self.outer_fold_ce_means = [] 406 | self.outer_fold_ce_std = [] 407 | self.combined_train_loss = [] 408 | self.combined_test_loss = [] 409 | self.combined_valid_loss = [] 410 | self.combined_train_acc = [] 411 | self.combined_test_acc = [] 412 | self.combined_valid_acc = [] 413 | self.HP_acc = pd.DataFrame(columns=self.ids) 414 | self.HP_loss = pd.DataFrame(columns=self.ids) 415 | self.HP_ce = pd.DataFrame(columns=self.ids) 416 | self.total_number = 0 417 | 418 | def __repr__(self): 419 | return f"" 420 | 421 | def __str__(self): 422 | return f"Class for combining the results from multiple subjects/experiments" 423 | 424 | def __len__(self): 425 | return len(self.f_names) 426 | 427 | def __getattr__(self, name): 428 | if name == "all_ids": 429 | return [(n+1, i) for n, i in enumerate(self.ids[:-2])] 430 | 431 | def cross_val_results_df(self, accuracy=True, cross_entropy=False, save=True): 432 | """ 433 | Combine all results into single pd.DataFrame, calculate mean and stdev. and store 434 | in Excel format 435 | 436 | :param: accuracy (bool) True if accuracy scores to be considered 437 | :param: cross entropy (bool) True if cross entropy scores to be considered 438 | :param: save (bool) True if results are to be stored as xlsx 439 | """ 440 | folds = [] 441 | for i in range(1, self.n_folds+1): 442 | folds.append(f'fold {i}') 443 | 444 | if accuracy: 445 | assert len(self.outer_fold_accuracies) != [], "Scores must be loaded to CombinedResults.outer_fold_accuracies" 446 | assert len(self.outer_fold_accuracies) == len(self.ids), "Number of subjects and results are not equal" 447 | assert len(self.outer_fold_accuracies[0]) == self.n_folds, "Number of scores and folds are not equal" 448 | 449 | self.total_acc_df = pd.DataFrame(index=self.ids, columns=folds) 450 | 451 | for n,score in enumerate(self.outer_fold_accuracies): 452 | self.total_acc_df.iloc[n] = score 453 | 454 | self.total_acc_df['Mean'] = self.total_acc_df.mean(axis=1,skipna=True) 455 | self.total_acc_df['Std.'] = self.total_acc_df.drop('Mean',axis=1).std(axis=1,skipna=True) 456 | 457 | if cross_entropy: 458 | assert len(self.outer_fold_cross_entropies) != [], "Scores must be loaded to CombinedResults.outer_fold_accuracies" 459 | assert len(self.outer_fold_cross_entropies) == len(self.ids), "Number of subjects and results are not equal" 460 | assert len(self.outer_fold_cross_entropies[0]) == self.n_folds, "Number of scores and folds are not equal" 461 | 462 | self.total_ce_df = pd.DataFrame(index=self.ids, columns=folds) 463 | 464 | for n,score in enumerate(self.outer_fold_cross_entropies): 465 | self.total_ce_df.iloc[n] = score 466 | 467 | self.total_ce_df['Mean'] = self.total_ce_df.mean(axis=1,skipna=True) 468 | self.total_ce_df['Std.'] = self.total_ce_df.drop('Mean',axis=1).std(axis=1,skipna=True) 469 | 470 | if save: 471 | if accuracy and cross_entropy: 472 | with pd.ExcelWriter(f'{self.save_path}/combined_scores.xlsx') as writer: 473 | self.total_acc_df.to_excel(writer, sheet_name='accuracy') 474 | self.total_ce_df.to_excel(writer, sheet_name='cross_entropy') 475 | elif not cross_entropy: 476 | with pd.ExcelWriter(f'{self.save_path}/combined_scores.xlsx') as writer: 477 | self.total_acc_df.to_excel(writer, sheet_name='accuracy') 478 | elif not accuracy: 479 | with pd.ExcelWriter(f'{self.save_path}/combined_scores.xlsx') as writer: 480 | self.total_ce_df.to_excel(writer, sheet_name='cross_entropy') 481 | 482 | def get_subject_results(self): 483 | """ 484 | Read in multiple results.Results objects and extract the required metrics into containers 485 | for further processing. 486 | """ 487 | 488 | for f_name in self.f_names: 489 | 490 | results_object = self.load_result(f"{self.load_path}/{f_name}.pickle") 491 | 492 | self.y_true = np.concatenate((self.y_true, results_object.y_true)) 493 | self.y_pred = np.concatenate((self.y_pred, results_object.y_pred)) # all true and prediction values 494 | 495 | self.outer_fold_accuracies.append(results_object.outer_fold_accuracies) 496 | self.outer_fold_cross_entropies.append(results_object.outer_fold_cross_entropies) 497 | self.outer_fold_ce_means.append(np.mean(results_object.outer_fold_cross_entropies)) 498 | self.outer_fold_ce_std.append(np.std(results_object.outer_fold_cross_entropies)) 499 | 500 | results_object.precision = self.get_res_obj_precision(results_object) 501 | results_object.f_score = self.get_res_obj_recall(results_object) 502 | results_object.recall = self.get_res_obj_f_score(results_object) 503 | results_object.kappa = self.get_res_obj_kappa_value(results_object) 504 | self.precision_list.append(results_object.precision) 505 | self.f1_score_list.append(results_object.f1_score) 506 | self.recall_list.append(results_object.recall) 507 | self.kappa_list.append(results_object.kappa) 508 | 509 | self.total_best_hps.append(results_object.best_params) 510 | self.hyp_param_means.append(results_object.hyp_param_means) 511 | 512 | self.combined_train_loss.append(results_object.train_loss) 513 | self.combined_test_loss.append(results_object.test_loss) 514 | self.combined_valid_loss.append(results_object.valid_loss) 515 | self.combined_train_acc.append(results_object.train_acc) 516 | self.combined_test_acc.append(results_object.test_acc) 517 | self.combined_valid_acc.append(results_object.valid_acc) 518 | 519 | # Save combined predictions and ground truth values to csv 520 | # np.savetxt(f"{self.direct}/results/{self.paradigm.replace('EEG_', '')}/y_true.csv", [self.y_true], 521 | # delimiter=',', fmt='%d') 522 | # np.savetxt(f"{self.direct}/results/{self.paradigm.replace('EEG_', '')}/y_pred.csv", [self.y_pred], 523 | # delimiter=',', fmt='%d') 524 | 525 | 526 | def param_scores(self, hyp_params): 527 | """ 528 | Saves a Pandas DataFrame as an Excel file which contains average inner-fold accuracy (or loss) 529 | for each independent hyperparameter value, and for all subjects 530 | :param hyp_params: dict containing all hyperparameter keys and values. 531 | """ 532 | paramscores_df = param_scores_df(self.ids, hyp_params) 533 | 534 | for i, j in enumerate(self.hyp_param_means): 535 | paramscores_df.iloc[i] = [score[1] for score in j] 536 | 537 | paramscores_df.loc["Mean"] = paramscores_df[0:len(self.ids)].mean(axis=0, skipna=True) 538 | paramscores_df.loc["Std."] = paramscores_df[0:len(self.ids)].std(axis=0, skipna=True) 539 | paramscores_df.to_excel(f"{self.save_path}/param_scores.xlsx") 540 | 541 | 542 | def inter_subject_hps(self, hyp_params, index_name, selection_method): 543 | """ 544 | Saves a Pandas DataFrame as an Excel file which contains average inner-fold accuracy (or loss) 545 | for each independent hyperparameter value, and for all subjects 546 | :param hyp_params: dict containing all hyperparameter keys and values. 547 | :param index_name: str name to give index column. 548 | :param selection_method: str "accuracy" OR "loss". 549 | """ 550 | index = self.ids 551 | columns_list = get_col_list(hyp_params) 552 | names = list(hyp_params.keys()) 553 | 554 | self.hp_results_df = results_df(index, index_name, columns_list, names) 555 | 556 | combined_hp = [] 557 | for f_name in self.f_names: 558 | 559 | results_object = self.load_result(f"{self.load_path}/{f_name}.pickle") 560 | 561 | acc = results_object.accdf.loc['Mean'].values 562 | combined_hp.append(acc) 563 | 564 | for i, j in enumerate(combined_hp): 565 | self.hp_results_df.iloc[i] = j 566 | self.hp_results_df.loc["Mean"].iloc[0] = self.hp_results_df.iloc[0:len(self.ids)].mean(axis=0, skipna=True) 567 | self.hp_results_df.loc["Std."].iloc[0] = self.hp_results_df.iloc[0:len(self.ids)].std(axis=0, skipna=True) 568 | self.hp_results_df.to_excel(f"{self.save_path}/total_hp_scores.xlsx") 569 | 570 | self.BestParams = self.hp_results_df.columns[self.hp_results_df.loc["Mean"].values.argmax()] 571 | self.BestParams = pd.DataFrame(dict(BestParams=self.BestParams)) 572 | self.BestParams.to_excel(f"{self.save_path}/BestParams.xlsx") 573 | 574 | 575 | def get_combined_inner_scores(self): 576 | """ 577 | Create pd.DataFrames to contain inner-fold validation accuracies/loss/cross entropy 578 | for all subjects or experiments and compute a mean - can be used for selecting inter-subject 579 | hyperparameters. 580 | """ 581 | 582 | for i, f_name in enumerate(self.f_names): 583 | 584 | results_object = self.load_result(f"{self.load_path}/{f_name}.pickle") 585 | 586 | self.HP_acc[self.ids[i]] = results_object.accdf.loc['Mean'].apply(lambda x : x * 100).values.ravel() 587 | self.HP_loss[self.ids[i]] = results_object.lossdf.loc['Mean'].values.ravel() 588 | self.HP_ce[self.ids[i]] = results_object.cross_entropydf.loc['Mean'].values.ravel() 589 | 590 | self.HP_acc.fillna(0, inplace=True) # zero-filling -- mean-filling may be a better option 591 | self.HP_loss.fillna(0, inplace=True) 592 | self.HP_ce.fillna(0, inplace=True) 593 | self.HP_acc['Mean'] = self.HP_acc.mean(axis=1, skipna=True) 594 | self.HP_loss['Mean'] = self.HP_loss.mean(axis=1, skipna=True) 595 | self.HP_ce['Mean'] = self.HP_ce.mean(axis=1, skipna=True) 596 | 597 | @staticmethod 598 | def get_res_obj_precision(res_obj): 599 | assert len(res_obj.y_true) == len(res_obj.y_pred), "data must be of equal length" 600 | return round((precision_score(res_obj.y_true, res_obj.y_pred, average="macro") * 100), 3) 601 | 602 | @staticmethod 603 | def get_res_obj_recall(res_obj): 604 | assert len(res_obj.y_true) == len(res_obj.y_pred), "data must be of equal length" 605 | return round((recall_score(res_obj.y_true, res_obj.y_pred, average='macro') * 100), 3) 606 | 607 | @staticmethod 608 | def get_res_obj_f_score(res_obj): 609 | assert len(res_obj.y_true) == len(res_obj.y_pred), "data must be of equal length" 610 | return round((f1_score(res_obj.y_true, res_obj.y_pred, average='macro') * 100), 3) 611 | 612 | @staticmethod 613 | def get_res_obj_kappa_value(res_obj): 614 | assert len(res_obj.y_true) == len(res_obj.y_pred), "data must be of equal length" 615 | return round(cohen_kappa_score(res_obj.y_true, res_obj.y_pred),3) -------------------------------------------------------------------------------- /BiModNeuroCNN/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name: Ciaran Cooney 3 | Date: 12/01/2019 4 | Description: Functions required for data processing and training of 5 | CNNs on imagined speech EEG data. 6 | """ 7 | 8 | import pickle 9 | import os 10 | import numpy as np 11 | import pandas as pd 12 | import matplotlib.pyplot as plt 13 | import seaborn as sns 14 | import time 15 | from functools import wraps 16 | 17 | def eeg_to_3d(data, epoch_size, n_events,n_chan): 18 | """ 19 | function to return a 3D EEG data format from a 2D input. 20 | Parameters: 21 | data: 2D np.array of EEG 22 | epoch_size: number of samples per trial, int 23 | n_events: number of trials, int 24 | n_chan: number of channels, int 25 | 26 | Output: 27 | np.array of shape n_events * n_chans * n_samples 28 | """ 29 | idx, a, x = ([] for i in range(3)) 30 | [idx.append(i) for i in range(0,data.shape[1],epoch_size)] 31 | for j in data: 32 | [a.append([j[idx[k]:idx[k]+epoch_size]]) for k in range(len(idx))] 33 | 34 | 35 | return np.reshape(np.array(a),(n_events,n_chan,epoch_size)) 36 | 37 | def load_pickle(direct, folder, filename): 38 | 39 | for file in os.listdir(direct + folder): 40 | if file.endswith(filename): 41 | pickle_file = (direct + folder + '/' + file) 42 | with open(pickle_file, 'rb') as f: 43 | file = pickle.load(f) 44 | 45 | return file, pickle_file 46 | 47 | def create_events(data, labels): 48 | events = [] 49 | x = np.zeros((data.shape[0], 3)) 50 | for i in range(data.shape[0]): 51 | x[i][0] = i 52 | x[i][2] = labels[i] 53 | [events.append(list(map(int, x[i]))) for i in range(data.shape[0])] 54 | return np.array(events) 55 | 56 | def reverse_coeffs(coeffs, N): 57 | """ Reverse order of coefficients in an array.""" 58 | idx = np.array([i for i in reversed(range(N))]) 59 | coeffs = coeffs[idx] 60 | coeffs = coeffs.reshape((N,1)) 61 | z = np.zeros((N,1)) 62 | return np.append(coeffs, z, axis=1) , coeffs 63 | 64 | def class_ratios(labels): 65 | unique, counts = np.unique(labels, return_counts=True) 66 | class_weight = dict() 67 | for i in range(len(unique)): 68 | class_weight[unique[i]] = len(labels) / (len(unique)*counts[i]) 69 | return class_weight 70 | 71 | def classification_report_csv(report, output_file): 72 | report_data = [] 73 | lines = report.split('\n') 74 | for line in lines[2:-3]: 75 | row = {} 76 | row_data = line.split(' ') 77 | row['class'] = row_data[0] 78 | row['precision'] = float(row_data[1]) 79 | row['recall'] = float(row_data[2]) 80 | row['f1_score'] = float(row_data[3]) 81 | row['support'] = float(row_data[4]) 82 | report_data.append(row) 83 | dataframe = pd.DataFrame.from_dict(report_data) 84 | dataframe.to_csv(output_file + '.csv', index = False) 85 | 86 | def load_features(direct, dict_key1, dict_key2=None): 87 | with open(direct, 'rb') as f: 88 | file = pickle.load(f) 89 | if dict_key2 == None: 90 | return np.array(file[dict_key1]) 91 | else: 92 | return np.array(file[dict_key1]), np.array(file[dict_key2]) 93 | 94 | def short_vs_long(features, labels, split, event_id): 95 | """Function for multilabel data into binary-class sets i.e., 96 | short words and long words 97 | """ 98 | short, long, s_idx, l_idx, s_features, l_features = ([] for i in range(6)) 99 | 100 | [short.append(event_id[i]) for i in event_id if len(i) <= split] 101 | [long.append(event_id[i]) for i in event_id if len(i) > split] 102 | 103 | [s_idx.append(i) for i, e in enumerate(labels) if e in short] 104 | [l_idx.append(i) for i, e in enumerate(labels) if e in long] 105 | 106 | [s_features.append(e) for i, e in enumerate(features) if i in s_idx] 107 | [l_features.append(e) for i, e in enumerate(features) if i in l_idx] 108 | 109 | s_labels = np.zeros(np.array(s_features).shape[0]) 110 | l_labels = np.ones(np.array(l_features).shape[0]) 111 | 112 | features = np.concatenate((s_features, l_features)) 113 | labels = np.concatenate((s_labels,l_labels)) 114 | 115 | return s_features, l_features, s_labels, l_labels, features, labels 116 | 117 | def return_indices(event_id, labels): 118 | indices = [] 119 | for _, k in enumerate(event_id): 120 | idx = [] 121 | for d, j in enumerate(labels): 122 | if event_id[k] == j: 123 | idx.append(d) 124 | indices.append(idx) 125 | return indices 126 | 127 | def load_subject_eeg(subject_id, vowels): 128 | """ returns eeg data corresponding to words and vowels 129 | given a subject identifier. 130 | """ 131 | 132 | data_folder = 'C:\\Users\\sb00745777\\OneDrive - Ulster University\\Study_2\\imagined_speech/S{}/post_ica/'.format(subject_id) 133 | data_folder1 = 'C:\\Users\\cfcoo\\OneDrive - Ulster University\\Study_2\\imagined_speech/S{}/post_ica/'.format(subject_id) 134 | words_file = 'raw_array_ica.pickle' 135 | vowels_file = 'raw_array_vowels_ica.pickle' 136 | 137 | try: 138 | with open(data_folder + words_file, 'rb') as f: 139 | file = pickle.load(f) 140 | except: 141 | print("Not on PC! Attempting to load from laptop.") 142 | with open(data_folder1 + words_file, 'rb') as f: 143 | file = pickle.load(f) 144 | 145 | w_data = file['raw_array'][:][0] 146 | w_labels = file['labels'] 147 | if vowels == False: 148 | return w_data, w_labels 149 | 150 | elif vowels: 151 | try: 152 | with open(data_folder + vowels_file, 'rb') as f: 153 | file = pickle.load(f) 154 | except: 155 | with open(data_folder1 + vowels_file, 'rb') as f: 156 | file = pickle.load(f) 157 | v_data = file['raw_array'][:][0] 158 | v_labels = file['labels'] 159 | return w_data, v_data, w_labels, v_labels 160 | 161 | def balanced_subsample(features, targets, random_state=12): 162 | """ 163 | function for balancing datasets by randomly-sampling data 164 | according to length of smallest class set. 165 | """ 166 | from sklearn.utils import resample 167 | unique, counts = np.unique(targets, return_counts=True) 168 | unique_classes = dict(zip(unique, counts)) 169 | mnm = len(targets) 170 | for i in unique_classes: 171 | if unique_classes[i] < mnm: 172 | mnm = unique_classes[i] 173 | 174 | X_list, y_list = [],[] 175 | for unique in np.unique(targets): 176 | idx = np.where(targets == unique) 177 | X = features[idx] 178 | y = targets[idx] 179 | 180 | #X1, y1 = resample(X,y,n_samples=mnm, random_state=random_state) 181 | X_list.append(X[:mnm]) 182 | y_list.append(y[:mnm]) 183 | 184 | balanced_X = X_list[0] 185 | balanced_y = y_list[0] 186 | 187 | for i in range(1, len(X_list)): 188 | balanced_X = np.concatenate((balanced_X, X_list[i])) 189 | balanced_y = np.concatenate((balanced_y, y_list[i])) 190 | 191 | return balanced_X, balanced_y 192 | 193 | def predict(model, X_test, batch_size, iterator, threshold_for_binary_case=None): 194 | """ 195 | Load torch model and make predictions on new data. 196 | """ 197 | all_preds = [] 198 | with th.no_grad(): 199 | for b_X, _ in iterator.get_batches(SignalAndTarget(X_test, X_test), False): 200 | b_X_var = np_to_var(b_X) 201 | all_preds.append(var_to_np(model(b_X_var))) 202 | 203 | pred_labels = compute_pred_labels_from_trial_preds( 204 | all_preds, threshold_for_binary_case) 205 | return pred_labels 206 | 207 | def plot_confusion_matrix(cm, classes,filename, 208 | normalize=False, 209 | title='Confusion matrix', 210 | cmap=plt.cm.Blues): 211 | 212 | """ 213 | Code for confusion matrix extracted from here: 214 | http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py 215 | """ 216 | if normalize: 217 | cm = (cm.astype('float') / cm.sum(axis=1)[:, np.newaxis])*100 218 | print("Normalized confusion matrix") 219 | else: 220 | print('Confusion matrix, without normalization') 221 | 222 | print(cm) 223 | fig = plt.figure(1, figsize=(9, 6)) 224 | #ax = plt.add_subplot(111) 225 | plt.tick_params(labelsize='large') 226 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 227 | #plt.title(title) 228 | plt.colorbar() 229 | tick_marks = np.arange(len(classes)) 230 | plt.xticks(tick_marks, classes, rotation=45) 231 | plt.yticks(tick_marks, classes) 232 | 233 | fmt = '.2f' if normalize else 'd' 234 | thresh = cm.max() / 2. 235 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 236 | plt.text(j, i, format(cm[i, j], fmt), 237 | horizontalalignment="center", 238 | color="white" if cm[i, j] > thresh else "black") 239 | 240 | plt.tight_layout() 241 | plt.ylabel('True label', fontsize='large', fontname='sans-serif') 242 | plt.xlabel('Predicted label', fontsize='large', fontname='sans-serif') 243 | fig.savefig(filename + '.jpg', bbox_inches='tight') 244 | return(fig) 245 | 246 | def print_confusion_matrix(confusion_matrix, class_names, filename, normalize = True, figsize = (5,5), fontsize=16): 247 | """Prints a confusion matrix, as returned by sklearn.metrics.confusion_matrix, as a heatmap. 248 | 249 | Arguments 250 | --------- 251 | confusion_matrix: numpy.ndarray 252 | The numpy.ndarray object returned from a call to sklearn.metrics.confusion_matrix. 253 | Similarly constructed ndarrays can also be used. 254 | class_names: list 255 | An ordered list of class names, in the order they index the given confusion matrix. 256 | figsize: tuple 257 | A 2-long tuple, the first value determining the horizontal size of the ouputted figure, 258 | the second determining the vertical size. Defaults to (10,7). 259 | fontsize: int 260 | Font size for axes labels. Defaults to 14. 261 | 262 | Returns 263 | ------- 264 | matplotlib.figure.Figure 265 | The resulting confusion matrix figure 266 | """ 267 | if normalize: 268 | confusion_matrix = (confusion_matrix.astype('float') / confusion_matrix.sum(axis=1)[:, np.newaxis])*100 269 | print("Normalized confusion matrix") 270 | else: 271 | print('Confusion matrix, without normalization') 272 | 273 | df_cm = pd.DataFrame( 274 | confusion_matrix, index=class_names, columns=class_names, 275 | ) 276 | fig = plt.figure(figsize=figsize) 277 | fmt = '.2f' if normalize else 'd' 278 | #####set heatmap customization##### 279 | try: 280 | heatmap = sns.heatmap(df_cm, annot=True, fmt=fmt, cmap='GnBu', linewidths=.5, cbar=False, annot_kws={"size": 16}) 281 | except ValueError: 282 | raise ValueError("Confusion matrix values must be integers.") 283 | 284 | heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize) 285 | heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize) 286 | plt.ylabel('True label', fontsize=16, fontname='sans-serif') 287 | plt.xlabel('Predicted label', fontsize=16, fontname='sans-serif') 288 | 289 | if filename != None: 290 | fig.savefig(filename + '.png', bbox_inches='tight') #store image as .png 291 | 292 | return fig 293 | 294 | def data_wrangler(data_type, subject_id): 295 | """ 296 | Function to return EEG data in format #trials*#channels*#samples. 297 | Also returns labels in the range 0 to n-1. 298 | """ 299 | epoch = 4096 300 | if data_type == 'words': 301 | data, labels = load_subject_eeg(subject_id, vowels=False) 302 | n_chan = len(data) 303 | data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32) 304 | labels = labels.astype(np.int64) 305 | elif data_type == 'vowels': 306 | _, data, _, labels = load_subject_eeg(subject_id, vowels=True) 307 | n_chan = len(data) 308 | data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32) 309 | labels = labels.astype(np.int64) 310 | elif data_type == 'all_classes': 311 | w_data, v_data, w_labels, v_labels = load_subject_eeg(subject_id, vowels=True) 312 | n_chan = len(w_data) 313 | words = eeg_to_3d(w_data, epoch, int(w_data.shape[1] / epoch), n_chan).astype(np.float32) 314 | vowels = eeg_to_3d(v_data, epoch, int(v_data.shape[1] / epoch), n_chan).astype(np.float32) 315 | data = np.concatenate((words, vowels), axis=0) 316 | labels = np.concatenate((w_labels, v_labels), axis=0).astype(np.int64) 317 | 318 | x = lambda a: a * 1e6 319 | data = x(data) 320 | 321 | if data_type == 'words': # zero-index the labels 322 | labels[:] = [x - 6 for x in labels] 323 | elif (data_type == 'vowels' or data_type == 'all_classes'): 324 | labels[:] = [x - 1 for x in labels] 325 | 326 | return data, labels 327 | 328 | 329 | def format_data(data_type, subject_id, epoch): 330 | """ 331 | Returns data into format required for inputting to the CNNs. 332 | 333 | Parameters: 334 | data_type: str() 335 | subject_id: str() 336 | epoch: length of single trials, int 337 | """ 338 | 339 | if data_type == 'words': 340 | data, labels = load_subject_eeg(subject_id, vowels=False) 341 | n_chan = len(data) 342 | data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32) 343 | labels = labels.astype(np.int64) 344 | labels[:] = [x - 6 for x in labels] # zero-index the labels 345 | elif data_type == 'vowels': 346 | _, data, _, labels = load_subject_eeg(subject_id, vowels=True) 347 | n_chan = len(data) 348 | data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32) 349 | labels = labels.astype(np.int64) 350 | labels[:] = [x - 1 for x in labels] 351 | elif data_type == 'all_classes': 352 | w_data, v_data, w_labels, v_labels = load_subject_eeg(subject_id, vowels=True) 353 | n_chan = len(w_data) 354 | words = eeg_to_3d(w_data, epoch, int(w_data.shape[1] / epoch), n_chan).astype(np.float32) 355 | vowels = eeg_to_3d(v_data, epoch, int(v_data.shape[1] / epoch), n_chan).astype(np.float32) 356 | data = np.concatenate((words, vowels), axis=0) 357 | labels = np.concatenate((w_labels, v_labels)).astype(np.int64) 358 | labels[:] = [x - 1 for x in labels] 359 | 360 | return data, labels 361 | 362 | def current_loss(model_loss): 363 | """ 364 | Returns the minimum validation loss from the 365 | trained model 366 | """ 367 | losses_list = [] 368 | [losses_list.append(x) for x in model_loss] 369 | return np.min(np.array(losses_list)) 370 | 371 | def current_acc(model_acc): 372 | """ 373 | Returns the maximum validation accuracy from the 374 | trained model 375 | """ 376 | accs_list = [] 377 | [accs_list.append(x) for x in model_acc] 378 | return np.min(np.array(accs_list)) 379 | 380 | def balance_classes(data1,data2): 381 | 382 | if data1.shape[0] > data2.shape[0]: 383 | data1 = data1[:data2.shape[0],:,:] 384 | elif data1.shape[0] < data2.shape[0]: 385 | data2 = data2[:data1.shape[0],:,:] 386 | 387 | return data1, data2 388 | 389 | def timer(orig_func): 390 | """ 391 | decorator for logging time of function. 392 | """ 393 | import time 394 | 395 | @wraps(orig_func) 396 | def wrapper(*args, **kwargs): 397 | t1 = time.time() 398 | result = orig_func(*args, *kwargs) 399 | t2 = time.time() - t1 400 | print(f"{orig_func.__name__} ran in: {round(t2,3)} seconds") 401 | return result 402 | 403 | return wrapper 404 | 405 | def windows(trial_data, sub, window_size, overlap, fs): 406 | """ 407 | Functon for obtaining classification windows for training. 408 | 409 | :param trial_data: EEG data - n_trials * n_chans * n_samples 410 | :param sub: subject object 411 | :param window_size: n number of samples 412 | :param overlap: n number of samples for overlap 413 | :param fs: sampling frequency 414 | :return: list containing data from each window 415 | """ 416 | windows_list, index_list = [],[] 417 | n_windows = int(sub.epoch / window_size + np.floor((sub.epoch - overlap) / window_size)) 418 | if n_windows == 0: 419 | n_windows = 1 420 | low_index = 0 421 | high_index = window_size 422 | for w in range(n_windows): 423 | data = trial_data[:, :, low_index:high_index] 424 | windows_list.append(data) 425 | index_list.append([low_index,high_index]) 426 | low_index += overlap 427 | high_index += overlap 428 | 429 | return np.array(windows_list), index_list 430 | 431 | def windows_index(epoch, window_size, overlap, fs): 432 | """ 433 | Functon for obtaining classification windows for training. 434 | 435 | :param epoch: length of overal trial 436 | :param window_size: n number of samples 437 | :param overlap: n number of samples for overlap 438 | :param fs: sampling frequency 439 | :return: list containing data from each window 440 | """ 441 | index_list = [] 442 | n_windows = int(epoch / window_size + np.floor((epoch - overlap) / window_size)) 443 | if n_windows == 0: 444 | n_windows = 1 445 | low_index = 0 446 | high_index = window_size 447 | for w in range(n_windows): 448 | index_list.append((low_index,high_index)) 449 | low_index += overlap 450 | high_index += overlap 451 | 452 | return index_list 453 | 454 | def get_class_labels(paradigm): 455 | """ 456 | Function for obtaining class labels from paradigm description 457 | :param paradigm: string format: 'EEG_semantics_text' 458 | :return: 459 | """ 460 | paradigm = paradigm.split('_')[1] 461 | if paradigm == 'semantics': 462 | class_labels = ['pig', 'dog', 'car', 'bus'] 463 | elif paradigm == 'action': 464 | class_labels = ['kick', 'jump', 'chew', 'blink'] 465 | elif paradigm == 'twoword': 466 | class_labels = ['red ball', 'blue hat', 'red blue', 'ball hat'] 467 | elif paradigm == 'concrete': 468 | class_labels = ['apple', 'tiger', 'fruit', 'animal'] 469 | return class_labels 470 | 471 | def misclass_to_class(column): 472 | return 1 - column 473 | 474 | def get_model_loss_and_acc(fold_models): 475 | """ 476 | Function for extracting epoch-by-epoch model loss and accuracy scores from 477 | models associated with multiple cross-validation folds 478 | :param fold_models: list of Braindecode (PyTorch) sequential models 479 | :return: train_loss: (pandas.series) main training loss per epoch across folds 480 | valid_loss: (pandas.series) main tvalidation loss per epoch across folds 481 | test_loss: (pandas.series) main testing loss per epoch across folds 482 | train_acc: (pandas.series) main training acc per epoch across folds 483 | valid_acc: (pandas.series) main tvalidation acc per epoch across folds 484 | test_acc: (pandas.series) main testing acc per epoch across folds 485 | """ 486 | train_loss = dict() 487 | valid_loss = dict() 488 | test_loss = dict() 489 | train_acc = dict() 490 | valid_acc = dict() 491 | test_acc = dict() 492 | 493 | for i, model in enumerate(fold_models): 494 | train_loss[i] = model.epochs_df['train_loss'] 495 | valid_loss[i] = model.epochs_df['valid_loss'] 496 | test_loss[i] = model.epochs_df['test_loss'] 497 | train_acc[i] = model.epochs_df['train_misclass'] 498 | valid_acc[i] = model.epochs_df['valid_misclass'] 499 | test_acc[i] = model.epochs_df['test_misclass'] 500 | 501 | train_loss = pd.DataFrame(train_loss) 502 | valid_loss = pd.DataFrame(valid_loss) 503 | test_loss = pd.DataFrame(test_loss) 504 | train_loss = train_loss.mean(axis=1, skipna=True) 505 | valid_loss = valid_loss.mean(axis=1, skipna=True) 506 | test_loss = test_loss.mean(axis=1, skipna=True) 507 | 508 | train_acc = pd.DataFrame(train_acc).apply(lambda x : misclass_to_class(x)) # function converts misclass to classification accuracy 509 | valid_acc = pd.DataFrame(valid_acc).apply(lambda x : misclass_to_class(x)) 510 | test_acc = pd.DataFrame(test_acc).apply(lambda x : misclass_to_class(x)) 511 | train_acc = train_acc.mean(axis=1, skipna=True) 512 | valid_acc = valid_acc.mean(axis=1, skipna=True) 513 | test_acc = test_acc.mean(axis=1, skipna=True) 514 | 515 | return train_loss, valid_loss, test_loss, train_acc, valid_acc, test_acc 516 | 517 | 518 | def labels_dict_and_list(classes): 519 | """ 520 | input: empty pandas DataFrame with column headings 521 | corresponding to class labels 522 | output: labels_dict (dict): key=number, value=string 523 | key_list (list): list of classes 524 | """ 525 | 526 | labels_dict = dict() 527 | key_list = [] 528 | for n, label in enumerate(classes.columns): 529 | labels_dict[str(n + 1)] = label 530 | 531 | for key in labels_dict: 532 | key_list.append(key) 533 | return labels_dict, key_list 534 | 535 | # def data_loader(directory, subj, session, category, *args): 536 | # """ 537 | 538 | # :param directory: (str) directory of stored data 539 | # :param subj: (str) Subject Identity, e.g. '01' 540 | # :param session: (int) Session Identity 541 | # :param category: (str) Experimental paradigm, e.g. "actionText" 542 | # :param args: (str) modalities of data 543 | # :return: list of tuples containing data and labels 544 | # """ 545 | # data = [] 546 | # for arg in args: 547 | # filename = f"classifierData/{category}_{arg}_CLF" 548 | # subj_object = load_subject(directory, subj, session, filename)["subject"] 549 | # data.append((subj_object.classifier_data.astype(np.float32), subj_object.labels.astype(np.int64) )) 550 | # return data 551 | 552 | def get_ordered_lists(*args): 553 | flatten = lambda fl: [item for sublist in fl for item in sublist] # flatten nested lists 554 | op_list = [] 555 | for arg in zip(*args): 556 | arg_list = flatten(arg) 557 | op_list.append(arg_list) 558 | return op_list 559 | 560 | def ordered_lists(*args): 561 | op_list = [] 562 | for arg in args: 563 | op_list.append(get_ordered_lists(*arg)) 564 | return get_ordered_lists(*op_list) 565 | 566 | 567 | """ 568 | Name: Ciaran Cooney 569 | Date: 12/01/2019 570 | Description: Functions required for data processing and training of 571 | CNNs on imagined speech EEG data. 572 | """ 573 | 574 | import pickle 575 | import os 576 | import numpy as np 577 | import pandas as pd 578 | import matplotlib.pyplot as plt 579 | import seaborn as sns 580 | import time 581 | from functools import wraps 582 | 583 | def eeg_to_3d(data, epoch_size, n_events,n_chan): 584 | """ 585 | function to return a 3D EEG data format from a 2D input. 586 | Parameters: 587 | data: 2D np.array of EEG 588 | epoch_size: number of samples per trial, int 589 | n_events: number of trials, int 590 | n_chan: number of channels, int 591 | 592 | Output: 593 | np.array of shape n_events * n_chans * n_samples 594 | """ 595 | idx, a, x = ([] for i in range(3)) 596 | [idx.append(i) for i in range(0,data.shape[1],epoch_size)] 597 | for j in data: 598 | [a.append([j[idx[k]:idx[k]+epoch_size]]) for k in range(len(idx))] 599 | 600 | 601 | return np.reshape(np.array(a),(n_events,n_chan,epoch_size)) 602 | 603 | def load_subject(direct, subject, session, filename): 604 | f_name = f"{direct}/S{subject}/Session_{session}/{filename}.pickle" 605 | with open(f_name, 'rb') as f: 606 | return pickle.load(f) 607 | 608 | def load_pickle(direct, folder, filename): 609 | 610 | for file in os.listdir(direct + folder): 611 | if file.endswith(filename): 612 | pickle_file = (direct + folder + '/' + file) 613 | with open(pickle_file, 'rb') as f: 614 | file = pickle.load(f) 615 | 616 | return file, pickle_file 617 | 618 | def create_events(data, labels): 619 | events = [] 620 | x = np.zeros((data.shape[0], 3)) 621 | for i in range(data.shape[0]): 622 | x[i][0] = i 623 | x[i][2] = labels[i] 624 | [events.append(list(map(int, x[i]))) for i in range(data.shape[0])] 625 | return np.array(events) 626 | 627 | def reverse_coeffs(coeffs, N): 628 | """ Reverse order of coefficients in an array.""" 629 | idx = np.array([i for i in reversed(range(N))]) 630 | coeffs = coeffs[idx] 631 | coeffs = coeffs.reshape((N,1)) 632 | z = np.zeros((N,1)) 633 | return np.append(coeffs, z, axis=1) , coeffs 634 | 635 | def class_ratios(labels): 636 | unique, counts = np.unique(labels, return_counts=True) 637 | class_weight = dict() 638 | for i in range(len(unique)): 639 | class_weight[unique[i]] = len(labels) / (len(unique)*counts[i]) 640 | return class_weight 641 | 642 | def classification_report_csv(report, output_file): 643 | report_data = [] 644 | lines = report.split('\n') 645 | for line in lines[2:-3]: 646 | row = {} 647 | row_data = line.split(' ') 648 | row['class'] = row_data[0] 649 | row['precision'] = float(row_data[1]) 650 | row['recall'] = float(row_data[2]) 651 | row['f1_score'] = float(row_data[3]) 652 | row['support'] = float(row_data[4]) 653 | report_data.append(row) 654 | dataframe = pd.DataFrame.from_dict(report_data) 655 | dataframe.to_csv(output_file + '.csv', index = False) 656 | 657 | def load_features(direct, dict_key1, dict_key2=None): 658 | with open(direct, 'rb') as f: 659 | file = pickle.load(f) 660 | if dict_key2 == None: 661 | return np.array(file[dict_key1]) 662 | else: 663 | return np.array(file[dict_key1]), np.array(file[dict_key2]) 664 | 665 | def short_vs_long(features, labels, split, event_id): 666 | """Function for multilabel data into binary-class sets i.e., 667 | short words and long words 668 | """ 669 | short, long, s_idx, l_idx, s_features, l_features = ([] for i in range(6)) 670 | 671 | [short.append(event_id[i]) for i in event_id if len(i) <= split] 672 | [long.append(event_id[i]) for i in event_id if len(i) > split] 673 | 674 | [s_idx.append(i) for i, e in enumerate(labels) if e in short] 675 | [l_idx.append(i) for i, e in enumerate(labels) if e in long] 676 | 677 | [s_features.append(e) for i, e in enumerate(features) if i in s_idx] 678 | [l_features.append(e) for i, e in enumerate(features) if i in l_idx] 679 | 680 | s_labels = np.zeros(np.array(s_features).shape[0]) 681 | l_labels = np.ones(np.array(l_features).shape[0]) 682 | 683 | features = np.concatenate((s_features, l_features)) 684 | labels = np.concatenate((s_labels,l_labels)) 685 | 686 | return s_features, l_features, s_labels, l_labels, features, labels 687 | 688 | def return_indices(event_id, labels): 689 | indices = [] 690 | for _, k in enumerate(event_id): 691 | idx = [] 692 | for d, j in enumerate(labels): 693 | if event_id[k] == j: 694 | idx.append(d) 695 | indices.append(idx) 696 | return indices 697 | 698 | def load_subject_eeg(subject_id, vowels): 699 | """ returns eeg data corresponding to words and vowels 700 | given a subject identifier. 701 | """ 702 | 703 | data_folder = 'C:\\Users\\sb00745777\\OneDrive - Ulster University\\Study_2\\imagined_speech/S{}/post_ica/'.format(subject_id) 704 | data_folder1 = 'C:\\Users\\cfcoo\\OneDrive - Ulster University\\Study_2\\imagined_speech/S{}/post_ica/'.format(subject_id) 705 | words_file = 'raw_array_ica.pickle' 706 | vowels_file = 'raw_array_vowels_ica.pickle' 707 | 708 | try: 709 | with open(data_folder + words_file, 'rb') as f: 710 | file = pickle.load(f) 711 | except: 712 | print("Not on PC! Attempting to load from laptop.") 713 | with open(data_folder1 + words_file, 'rb') as f: 714 | file = pickle.load(f) 715 | 716 | w_data = file['raw_array'][:][0] 717 | w_labels = file['labels'] 718 | if vowels == False: 719 | return w_data, w_labels 720 | 721 | elif vowels: 722 | try: 723 | with open(data_folder + vowels_file, 'rb') as f: 724 | file = pickle.load(f) 725 | except: 726 | with open(data_folder1 + vowels_file, 'rb') as f: 727 | file = pickle.load(f) 728 | v_data = file['raw_array'][:][0] 729 | v_labels = file['labels'] 730 | return w_data, v_data, w_labels, v_labels 731 | 732 | def balanced_subsample(features, targets, random_state=12): 733 | """ 734 | function for balancing datasets by randomly-sampling data 735 | according to length of smallest class set. 736 | """ 737 | from sklearn.utils import resample 738 | unique, counts = np.unique(targets, return_counts=True) 739 | unique_classes = dict(zip(unique, counts)) 740 | mnm = len(targets) 741 | for i in unique_classes: 742 | if unique_classes[i] < mnm: 743 | mnm = unique_classes[i] 744 | 745 | X_list, y_list = [],[] 746 | for unique in np.unique(targets): 747 | idx = np.where(targets == unique) 748 | X = features[idx] 749 | y = targets[idx] 750 | 751 | #X1, y1 = resample(X,y,n_samples=mnm, random_state=random_state) 752 | X_list.append(X[:mnm]) 753 | y_list.append(y[:mnm]) 754 | 755 | balanced_X = X_list[0] 756 | balanced_y = y_list[0] 757 | 758 | for i in range(1, len(X_list)): 759 | balanced_X = np.concatenate((balanced_X, X_list[i])) 760 | balanced_y = np.concatenate((balanced_y, y_list[i])) 761 | 762 | return balanced_X, balanced_y 763 | 764 | def predict(model, X_test, batch_size, iterator, threshold_for_binary_case=None): 765 | """ 766 | Load torch model and make predictions on new data. 767 | """ 768 | all_preds = [] 769 | with th.no_grad(): 770 | for b_X, _ in iterator.get_batches(SignalAndTarget(X_test, X_test), False): 771 | b_X_var = np_to_var(b_X) 772 | all_preds.append(var_to_np(model(b_X_var))) 773 | 774 | pred_labels = compute_pred_labels_from_trial_preds( 775 | all_preds, threshold_for_binary_case) 776 | return pred_labels 777 | 778 | def plot_confusion_matrix(cm, classes,filename, 779 | normalize=False, 780 | title='Confusion matrix', 781 | cmap=plt.cm.Blues): 782 | 783 | """ 784 | Code for confusion matrix extracted from here: 785 | http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py 786 | """ 787 | if normalize: 788 | cm = (cm.astype('float') / cm.sum(axis=1)[:, np.newaxis])*100 789 | print("Normalized confusion matrix") 790 | else: 791 | print('Confusion matrix, without normalization') 792 | 793 | print(cm) 794 | fig = plt.figure(1, figsize=(9, 6)) 795 | #ax = plt.add_subplot(111) 796 | plt.tick_params(labelsize='large') 797 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 798 | #plt.title(title) 799 | plt.colorbar() 800 | tick_marks = np.arange(len(classes)) 801 | plt.xticks(tick_marks, classes, rotation=45) 802 | plt.yticks(tick_marks, classes) 803 | 804 | fmt = '.2f' if normalize else 'd' 805 | thresh = cm.max() / 2. 806 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 807 | plt.text(j, i, format(cm[i, j], fmt), 808 | horizontalalignment="center", 809 | color="white" if cm[i, j] > thresh else "black") 810 | 811 | plt.tight_layout() 812 | plt.ylabel('True label', fontsize='large', fontname='sans-serif') 813 | plt.xlabel('Predicted label', fontsize='large', fontname='sans-serif') 814 | fig.savefig(filename + '.jpg', bbox_inches='tight') 815 | return(fig) 816 | 817 | def print_confusion_matrix(confusion_matrix, class_names, filename, normalize = True, figsize = (5,5), fontsize=16): 818 | """Prints a confusion matrix, as returned by sklearn.metrics.confusion_matrix, as a heatmap. 819 | 820 | Arguments 821 | --------- 822 | confusion_matrix: numpy.ndarray 823 | The numpy.ndarray object returned from a call to sklearn.metrics.confusion_matrix. 824 | Similarly constructed ndarrays can also be used. 825 | class_names: list 826 | An ordered list of class names, in the order they index the given confusion matrix. 827 | figsize: tuple 828 | A 2-long tuple, the first value determining the horizontal size of the ouputted figure, 829 | the second determining the vertical size. Defaults to (10,7). 830 | fontsize: int 831 | Font size for axes labels. Defaults to 14. 832 | 833 | Returns 834 | ------- 835 | matplotlib.figure.Figure 836 | The resulting confusion matrix figure 837 | """ 838 | if normalize: 839 | confusion_matrix = (confusion_matrix.astype('float') / confusion_matrix.sum(axis=1)[:, np.newaxis])*100 840 | print("Normalized confusion matrix") 841 | else: 842 | print('Confusion matrix, without normalization') 843 | 844 | df_cm = pd.DataFrame( 845 | confusion_matrix, index=class_names, columns=class_names, 846 | ) 847 | fig = plt.figure(figsize=figsize) 848 | fmt = '.2f' if normalize else 'd' 849 | #####set heatmap customization##### 850 | try: 851 | heatmap = sns.heatmap(df_cm, annot=True, fmt=fmt, cmap='GnBu', linewidths=.5, cbar=False, annot_kws={"size": 16}) 852 | except ValueError: 853 | raise ValueError("Confusion matrix values must be integers.") 854 | 855 | heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize) 856 | heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize) 857 | plt.ylabel('True label', fontsize=16, fontname='sans-serif') 858 | plt.xlabel('Predicted label', fontsize=16, fontname='sans-serif') 859 | 860 | if filename != None: 861 | fig.savefig(filename + '.png', bbox_inches='tight') #store image as .png 862 | 863 | return fig 864 | 865 | def data_wrangler(data_type, subject_id): 866 | """ 867 | Function to return EEG data in format #trials*#channels*#samples. 868 | Also returns labels in the range 0 to n-1. 869 | """ 870 | epoch = 4096 871 | if data_type == 'words': 872 | data, labels = load_subject_eeg(subject_id, vowels=False) 873 | n_chan = len(data) 874 | data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32) 875 | labels = labels.astype(np.int64) 876 | elif data_type == 'vowels': 877 | _, data, _, labels = load_subject_eeg(subject_id, vowels=True) 878 | n_chan = len(data) 879 | data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32) 880 | labels = labels.astype(np.int64) 881 | elif data_type == 'all_classes': 882 | w_data, v_data, w_labels, v_labels = load_subject_eeg(subject_id, vowels=True) 883 | n_chan = len(w_data) 884 | words = eeg_to_3d(w_data, epoch, int(w_data.shape[1] / epoch), n_chan).astype(np.float32) 885 | vowels = eeg_to_3d(v_data, epoch, int(v_data.shape[1] / epoch), n_chan).astype(np.float32) 886 | data = np.concatenate((words, vowels), axis=0) 887 | labels = np.concatenate((w_labels, v_labels), axis=0).astype(np.int64) 888 | 889 | x = lambda a: a * 1e6 890 | data = x(data) 891 | 892 | if data_type == 'words': # zero-index the labels 893 | labels[:] = [x - 6 for x in labels] 894 | elif (data_type == 'vowels' or data_type == 'all_classes'): 895 | labels[:] = [x - 1 for x in labels] 896 | 897 | return data, labels 898 | 899 | 900 | def format_data(data_type, subject_id, epoch): 901 | """ 902 | Returns data into format required for inputting to the CNNs. 903 | 904 | Parameters: 905 | data_type: str() 906 | subject_id: str() 907 | epoch: length of single trials, int 908 | """ 909 | 910 | if data_type == 'words': 911 | data, labels = load_subject_eeg(subject_id, vowels=False) 912 | n_chan = len(data) 913 | data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32) 914 | labels = labels.astype(np.int64) 915 | labels[:] = [x - 6 for x in labels] # zero-index the labels 916 | elif data_type == 'vowels': 917 | _, data, _, labels = load_subject_eeg(subject_id, vowels=True) 918 | n_chan = len(data) 919 | data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32) 920 | labels = labels.astype(np.int64) 921 | labels[:] = [x - 1 for x in labels] 922 | elif data_type == 'all_classes': 923 | w_data, v_data, w_labels, v_labels = load_subject_eeg(subject_id, vowels=True) 924 | n_chan = len(w_data) 925 | words = eeg_to_3d(w_data, epoch, int(w_data.shape[1] / epoch), n_chan).astype(np.float32) 926 | vowels = eeg_to_3d(v_data, epoch, int(v_data.shape[1] / epoch), n_chan).astype(np.float32) 927 | data = np.concatenate((words, vowels), axis=0) 928 | labels = np.concatenate((w_labels, v_labels)).astype(np.int64) 929 | labels[:] = [x - 1 for x in labels] 930 | 931 | return data, labels 932 | 933 | def current_loss(model_loss): 934 | """ 935 | Returns the minimum validation loss from the 936 | trained model 937 | """ 938 | losses_list = [] 939 | [losses_list.append(x) for x in model_loss] 940 | return np.min(np.array(losses_list)) 941 | 942 | def current_acc(model_acc): 943 | """ 944 | Returns the maximum validation accuracy from the 945 | trained model 946 | """ 947 | accs_list = [] 948 | [accs_list.append(x) for x in model_acc] 949 | return np.min(np.array(accs_list)) 950 | 951 | def balance_classes(data1,data2): 952 | 953 | if data1.shape[0] > data2.shape[0]: 954 | data1 = data1[:data2.shape[0],:,:] 955 | elif data1.shape[0] < data2.shape[0]: 956 | data2 = data2[:data1.shape[0],:,:] 957 | 958 | return data1, data2 959 | 960 | def timer(orig_func): 961 | """ 962 | decorator for logging time of function. 963 | """ 964 | import time 965 | 966 | @wraps(orig_func) 967 | def wrapper(*args, **kwargs): 968 | t1 = time.time() 969 | result = orig_func(*args, *kwargs) 970 | t2 = time.time() - t1 971 | print(f"{orig_func.__name__} ran in: {round(t2,3)} seconds") 972 | return result 973 | 974 | return wrapper 975 | 976 | def windows(trial_data, sub, window_size, overlap, fs): 977 | """ 978 | Functon for obtaining classification windows for training. 979 | 980 | :param trial_data: EEG data - n_trials * n_chans * n_samples 981 | :param sub: subject object 982 | :param window_size: n number of samples 983 | :param overlap: n number of samples for overlap 984 | :param fs: sampling frequency 985 | :return: list containing data from each window 986 | """ 987 | windows_list, index_list = [],[] 988 | n_windows = int(sub.epoch / window_size + np.floor((sub.epoch - overlap) / window_size)) 989 | if n_windows == 0: 990 | n_windows = 1 991 | low_index = 0 992 | high_index = window_size 993 | for w in range(n_windows): 994 | data = trial_data[:, :, low_index:high_index] 995 | windows_list.append(data) 996 | index_list.append([low_index,high_index]) 997 | low_index += overlap 998 | high_index += overlap 999 | 1000 | return np.array(windows_list), index_list 1001 | 1002 | def windows_index(epoch, window_size, overlap, fs): 1003 | """ 1004 | Functon for obtaining classification windows for training. 1005 | 1006 | :param epoch: length of overal trial 1007 | :param window_size: n number of samples 1008 | :param overlap: n number of samples for overlap 1009 | :param fs: sampling frequency 1010 | :return: list containing data from each window 1011 | """ 1012 | index_list = [] 1013 | n_windows = int(epoch / window_size + np.floor((epoch - overlap) / window_size)) 1014 | if n_windows == 0: 1015 | n_windows = 1 1016 | low_index = 0 1017 | high_index = window_size 1018 | for w in range(n_windows): 1019 | index_list.append((low_index,high_index)) 1020 | low_index += overlap 1021 | high_index += overlap 1022 | 1023 | return index_list 1024 | 1025 | def get_class_labels(paradigm): 1026 | """ 1027 | Function for obtaining class labels from paradigm description 1028 | :param paradigm: string format: 'EEG_semantics_text' 1029 | :return: 1030 | """ 1031 | paradigm = paradigm.split('_')[1] 1032 | if paradigm == 'semantics': 1033 | class_labels = ['pig', 'dog', 'car', 'bus'] 1034 | elif paradigm == 'action': 1035 | class_labels = ['kick', 'jump', 'chew', 'blink'] 1036 | elif paradigm == 'twoword': 1037 | class_labels = ['red ball', 'blue hat', 'red blue', 'ball hat'] 1038 | elif paradigm == 'concrete': 1039 | class_labels = ['apple', 'tiger', 'fruit', 'animal'] 1040 | return class_labels 1041 | 1042 | def misclass_to_class(column): 1043 | return 1 - column 1044 | 1045 | def get_model_loss_and_acc(fold_models): 1046 | """ 1047 | Function for extracting epoch-by-epoch model loss and accuracy scores from 1048 | models associated with multiple cross-validation folds 1049 | :param fold_models: list of Braindecode (PyTorch) sequential models 1050 | :return: train_loss: (pandas.series) main training loss per epoch across folds 1051 | valid_loss: (pandas.series) main tvalidation loss per epoch across folds 1052 | test_loss: (pandas.series) main testing loss per epoch across folds 1053 | train_acc: (pandas.series) main training acc per epoch across folds 1054 | valid_acc: (pandas.series) main tvalidation acc per epoch across folds 1055 | test_acc: (pandas.series) main testing acc per epoch across folds 1056 | """ 1057 | train_loss = dict() 1058 | valid_loss = dict() 1059 | test_loss = dict() 1060 | train_acc = dict() 1061 | valid_acc = dict() 1062 | test_acc = dict() 1063 | 1064 | for i, model in enumerate(fold_models): 1065 | train_loss[i] = model.epochs_df['train_loss'] 1066 | valid_loss[i] = model.epochs_df['valid_loss'] 1067 | test_loss[i] = model.epochs_df['test_loss'] 1068 | train_acc[i] = model.epochs_df['train_misclass'] 1069 | valid_acc[i] = model.epochs_df['valid_misclass'] 1070 | test_acc[i] = model.epochs_df['test_misclass'] 1071 | 1072 | train_loss = pd.DataFrame(train_loss) 1073 | valid_loss = pd.DataFrame(valid_loss) 1074 | test_loss = pd.DataFrame(test_loss) 1075 | train_loss = train_loss.mean(axis=1, skipna=True) 1076 | valid_loss = valid_loss.mean(axis=1, skipna=True) 1077 | test_loss = test_loss.mean(axis=1, skipna=True) 1078 | 1079 | train_acc = pd.DataFrame(train_acc).apply(lambda x : misclass_to_class(x)) # function converts misclass to classification accuracy 1080 | valid_acc = pd.DataFrame(valid_acc).apply(lambda x : misclass_to_class(x)) 1081 | test_acc = pd.DataFrame(test_acc).apply(lambda x : misclass_to_class(x)) 1082 | train_acc = train_acc.mean(axis=1, skipna=True) 1083 | valid_acc = valid_acc.mean(axis=1, skipna=True) 1084 | test_acc = test_acc.mean(axis=1, skipna=True) 1085 | 1086 | return train_loss, valid_loss, test_loss, train_acc, valid_acc, test_acc 1087 | 1088 | 1089 | def labels_dict_and_list(classes): 1090 | """ 1091 | input: empty pandas DataFrame with column headings 1092 | corresponding to class labels 1093 | output: labels_dict (dict): key=number, value=string 1094 | key_list (list): list of classes 1095 | """ 1096 | 1097 | labels_dict = dict() 1098 | key_list = [] 1099 | for n, label in enumerate(classes.columns): 1100 | labels_dict[str(n + 1)] = label 1101 | 1102 | for key in labels_dict: 1103 | key_list.append(key) 1104 | return labels_dict, key_list 1105 | 1106 | def data_loader(directory, subj, session, category, *args): 1107 | """ 1108 | 1109 | :param directory: (str) directory of stored data 1110 | :param subj: (str) Subject Identity, e.g. '01' 1111 | :param session: (int) Session Identity 1112 | :param category: (str) Experimental paradigm, e.g. "actionText" 1113 | :param args: (str) modalities of data 1114 | :return: list of tuples containing data and labels 1115 | """ 1116 | data = [] 1117 | for arg in args: 1118 | filename = f"classifierData/{category}_{arg}_CLF" 1119 | subj_object = subjects.Subject.load_subject(f"filename.pickle") #load_subject(directory, subj, session, filename) 1120 | #print(subj_object['data1']) 1121 | data.append((subj_object.data1.astype(np.float32), subj_object.labels1.astype(np.int64))) 1122 | return data 1123 | 1124 | def load_subject(direct, subject, session, filename): 1125 | f_name = f"{direct}/S{subject}/Session_{session}/{filename}.pickle" 1126 | with open(f_name, 'rb') as f: 1127 | return pickle.load(f) --------------------------------------------------------------------------------