├── BiModNeuroCNN
    ├── results
    │   ├── __init__.py
    │   ├── dataframe_utils.py
    │   ├── metrics.py
    │   └── results.py
    ├── subjects
    │   ├── __init__.py
    │   ├── subjects_utils.py
    │   └── subjects.py
    ├── version.py
    ├── __init__.py
    ├── models
    │   ├── __init__.py
    │   ├── network_utils.py
    │   ├── bimodal_cnn.py
    │   └── bimodal_cnn_pooling.py
    ├── data_loader
    │   ├── __init__.py
    │   ├── utils1.py
    │   ├── signal_processing_utils.py
    │   ├── data_utils.py
    │   └── data_loader.py
    ├── training
    │   ├── __init__.py
    │   ├── training_utils.py
    │   ├── bimodal_classification.py
    │   └── bimodal_training.py
    ├── utils_final.py
    └── utils.py
├── statistics
    ├── results_4_stats.xlsx
    └── anova_2_way.m
├── requirements.txt
├── License.txt
├── setup.py
├── README.md
├── .gitignore
└── training_demo_nCV.PY


/BiModNeuroCNN/results/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/subjects/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.0"


--------------------------------------------------------------------------------
/BiModNeuroCNN/__init__.py:
--------------------------------------------------------------------------------
1 | from BiModNeuroCNN.version import __version__
2 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/models/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Bimodal convolutional neural network architechures
3 | """
4 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/data_loader/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Tools for loading and processing EEG or fNIRS data
3 | """
4 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/training/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Training procedures for bimodal convolutional neural networks
3 | """
4 | 


--------------------------------------------------------------------------------
/statistics/results_4_stats.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfcooney/BiModNeuroCNN/HEAD/statistics/results_4_stats.xlsx


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | pandas
 3 | scipy
 4 | sklearn
 5 | tensorflow
 6 | matplotlib
 7 | h5py
 8 | mne
 9 | ast
10 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/data_loader/utils1.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pickle
 3 | from BiModNeuroCNN.subjects import subjects
 4 | 
 5 | def subject_data_loader(filename):
 6 |     """
 7 | 
 8 |     :param filename: (str) directory of stored data
 9 |     :return: tuples containing data and labels
10 |     """  
11 |     subj_object = subjects.Subject.load_subject(f"{filename}.pickle") 
12 |     return (subj_object.data1.astype(np.float32), subj_object.labels1.astype(np.int64))
13 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/data_loader/signal_processing_utils.py:
--------------------------------------------------------------------------------
 1 | from scipy.signal import butter, lfilter
 2 | from BiModNeuroCNN.utils import timer
 3 | 
 4 | def butter_bandpass(lowcut, highcut, fs, order=5):
 5 |     nyq = 0.5 * fs
 6 |     low = lowcut / nyq
 7 |     high = highcut / nyq
 8 |     b, a = butter(order, [low, high], btype='band')
 9 |     return b, a
10 | 
11 | @timer #order = 5 is standard
12 | def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
13 |     b, a = butter_bandpass(lowcut, highcut, fs, order=order)
14 |     y = lfilter(b, a, data)
15 |     return y


--------------------------------------------------------------------------------
/BiModNeuroCNN/subjects/subjects_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def eeg_to_3d(data, labels, epoch, n_events, n_chans):
 4 |     """
 5 |     function to return a 3D EEG data format from a 2D input.
 6 |     Parameters:
 7 |       data: 2D np.array of EEG
 8 |       labels: (np.array ||list) 
 9 |       epoch: number of samples per trial, int
10 |       n_events: number of trials, int
11 |       n_chan: number of channels, int
12 |         
13 |     Output:
14 |       np.array of shape n_events * n_chans * n_samples
15 |     """
16 |     idx, a, x = ([] for i in range(3))
17 |     [idx.append(i) for i in range(0,data.shape[1],epoch)]
18 |     for j in data:
19 |         [a.append([j[idx[k]:idx[k]+epoch]]) for k in range(len(idx))]
20 |         
21 |     return np.reshape(np.array(a),(labels.shape[0],n_chans,epoch))


--------------------------------------------------------------------------------
/BiModNeuroCNN/utils_final.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def gpu_check():
 4 | 	"""
 5 | 	Test script for discovering GPUs
 6 | 	Expected Output: GPU and Cuda available: True
 7 | 					******************************
 8 | 					Number of GPUs: 1
 9 | 					******************************
10 | 					Current GPU: 0
11 | 					******************************
12 | 					Current GPU location: <torch.cuda.device object at 0x000002B67ABF6940>
13 | 					******************************
14 | 					GPU device type: GeForce 940MX
15 | 					******************************
16 | 
17 | 	"""
18 | 	available = torch.cuda.is_available()
19 | 	print(f"GPU and Cuda available: {available}")
20 | 
21 | 	print("*"*30)
22 | 
23 | 	n_gpus = torch.cuda.device_count()
24 | 	print(f"Number of GPUs: {n_gpus}")
25 | 
26 | 	print("*"*30)
27 | 
28 | 	device = torch.cuda.current_device()
29 | 	print(f"Current GPU: {device}")
30 | 
31 | 	print("*"*30)
32 | 
33 | 	location = torch.cuda.device(0)
34 | 	print(f"Current GPU location: {location}")
35 | 
36 | 	print("*"*30)
37 | 
38 | 	type_gpu = torch.cuda.get_device_name(0)
39 | 	print(f"GPU device type: {type_gpu}")
40 | 
41 | 	print("*"*30)


--------------------------------------------------------------------------------
/License.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Ciaran Cooney
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/training/training_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def combine_batches(batch1,batch2):
 4 |     """
 5 |     Function for combining batches of inputs and targets for
 6 |     2 modalities of data.
 7 |     :param batch1: (list-type) [0] inputs, [1] targets
 8 |     :param batch2: (list-type) [0] inputs, [1] targets
 9 |     :return: (list) [inputs1, targets1, inputs2, targets2]
10 |     """
11 |     new_batch = []
12 |     for inputs, target in zip(batch1, batch2):
13 |         a = list(inputs)
14 |         b = list(target)
15 |         a.append(b[0])
16 |         a.append(b[1])
17 |         new_batch.append(a)
18 |     return new_batch
19 | 
20 | def current_loss(model_loss):
21 |     """
22 |     Returns the minimum validation loss from the 
23 |     trained model
24 |     """
25 |     losses_list = []
26 |     [losses_list.append(x) for x in model_loss]
27 |     return np.min(np.array(losses_list))
28 | 
29 | def current_acc(model_acc):
30 |     """
31 |     Returns the maximum validation accuracy from the 
32 |     trained model
33 |     """
34 |     accs_list = []
35 |     [accs_list.append(x) for x in model_acc]
36 |     return np.min(np.array(accs_list))


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | from os import path
 3 | 
 4 | file_dir = path.abspath(path.dirname(__file__))
 5 | 
 6 | with open(path.join(file_dir, 'README.md'), "r") as f:
 7 | 	long_description = f.read()
 8 | 
 9 | 
10 | version = dict()
11 | with open(path.join(file_dir, 'BiModNeuroCNN/version.py'), "r") as (version_file):
12 | 	exec(version_file.read(), version)
13 | 
14 | 
15 | setuptools.setup(
16 | 
17 | 	name = "BiModNeuroCNN",
18 | 	version = version['__version__'],
19 | 
20 | 	description = "Tools for bimodal training of CNNs, i.e. concurrent training with two data types",
21 | 	long_description = long_description,
22 | 	long_description_content_type = "text/markdown",
23 | 
24 | 	url = "git@github.com:cfcooney/BiModNeuroCNN.git",
25 | 
26 | 	author = "Ciaran Cooney",
27 | 
28 | 	license='MIT License',
29 | 
30 | 	install_requires=['braindecode==0.4.85', 'mne', 'numpy',
31 | 	                  'pandas', 'scipy', 'matplotlib',],
32 | 
33 | 	packages = setuptools.find_packages(),
34 | 
35 | 	classifiers = [
36 | 	
37 | 		"Intended Audience :: Developers",
38 |         "Intended Audience :: Science/Research",
39 |         'Topic :: Software Development :: Build Tools',
40 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
41 | 
42 | 		'Programming Language :: Python :: 3.6',
43 | 	]
44 | 
45 | 	)
46 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | **BiModNeuroCNN**
 2 | 
 3 | This is a package for training bimodal deep learning archtectures on dual streams 
 4 | of neurological data. Package tested on Electroencephalography (EEG) and 
 5 | function near-infrared stpectroscopy (fNIRS).
 6 | 
 7 | Work in progress - more to be added in future.
 8 | 
 9 | # Installation
10 | 
11 | 1. Install PyTorch: http://pytorch.org/
12 | 2. Install Braindecode: https://github.com/braindecode/braindecode
13 | 
14 | 3. Install latest release of BiModNeuroCNN using pip:
15 | ```
16 | pip install BiModNeuroCNN
17 | ```
18 | 
19 | ## Dataset
20 | Link to dataset to be added upon upcoming publication.
21 | 
22 | ## Citing
23 | Paper currently under review.
24 | 
25 | Braindecode was used to implement this package:
26 | >@article {HBM:HBM23730,
27 | >author = {Schirrmeister, Robin Tibor and Springenberg, Jost Tobias and Fiederer,
28 | >  Lukas Dominique Josef and Glasstetter, Martin and Eggensperger, Katharina and Tangermann, Michael and
29 | >  Hutter, Frank and Burgard, Wolfram and Ball, Tonio},
30 | >title = {Deep learning with convolutional neural networks for EEG decoding and visualization},
31 | >journal = {Human Brain Mapping},
32 | >issn = {1097-0193},
33 | >url = {http://dx.doi.org/10.1002/hbm.23730},
34 | >doi = {10.1002/hbm.23730},
35 | >month = {aug},
36 | >year = {2017},
37 | >keywords = {electroencephalography, EEG analysis, machine learning, end-to-end learning, brain–machine interface,
38 | >  brain–computer interface, model interpretability, brain mapping},
39 | >}
40 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/models/network_utils.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | 
 3 | def _transpose_time_to_spat(x):
 4 |     return x.permute(0, 3, 2, 1)
 5 | 
 6 | def tensor_size(x):
 7 |     print(x.size())
 8 |     return x
 9 | 
10 | def reshape_tensor(x):
11 |     x
12 |     return x.view(x.size(0),x.size(1)*x.size(2)*1)
13 | 
14 | def reshape_output(x):
15 |     return x.view(x.size(0),4, 1 ,1)
16 | 
17 | def reshape_4_lstm(x):
18 |     return x.view(x.size(0),1,x.size(1))
19 | 
20 | def dense_input(x):
21 |     return x.size(2)
22 | 
23 | def tensor_print(x):
24 |     print(x.data.cpu().numpy())
25 |     return x
26 | 
27 | def linear_input_shape(x):
28 |     print(x.size(1)*x.size(2))
29 |     return x.size(1)*x.size(2)
30 | 
31 | def mean_inplace(tensor_1, tensor_2):
32 |     """
33 |     function for meaning softmax outputs from two networks,
34 |     Cuurently not able to use as inplace changes to the tensor
35 |     cause problems with backpropagation
36 |     :param tensor_1:
37 |     :param tensor_2:
38 |     :return:
39 |     """
40 |     for i in range(len(tensor_1)):
41 |         for j in range(len(tensor_1[i])):
42 |             tensor_1[i][j] = (tensor_1[i][j] + tensor_2[i][j]) / 2
43 |     return tensor_1
44 | def new_mean(tensor_1, tensor_2):
45 |     avg = []
46 |     for sm1, sm2 in zip(tensor_1, tensor_2):
47 |         avg.append([(a+b) / 2 for a,b in zip(sm1, sm2)])
48 |     avg = th.tensor(avg, dtype=th.float32).cuda()
49 |     return avg
50 | 
51 | # remove empty dim at end and potentially remove empty time dim
52 | # do not just use squeeze as we never want to remove first dim
53 | def _squeeze_final_output(x):
54 |     #print(x.shape)
55 |     assert x.size()[3] == 1
56 |     x = x[:, :, :, 0]
57 |     if x.size()[2] == 1:
58 |         x = x[:, :, 0]
59 |     return x
60 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/results/dataframe_utils.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | import numpy as np 
 3 | 
 4 | def results_df(index, index_name, columns_list, column_names):
 5 |     """
 6 |     create tiered dataframe for hyper-parameter results.
 7 |     """
 8 |     assert len(columns_list) == len(column_names), "Unequal length for columns/names!"
 9 |     miindex = pd.MultiIndex.from_product([index],names=[index_name])
10 |     micol = pd.MultiIndex.from_product(columns_list,names=column_names)
11 |     return pd.DataFrame(index=miindex, columns=micol).sort_index().sort_index(axis=1)
12 | 
13 | def get_col_list(hyp_params):
14 |     """
15 |     returns a list of lists containing hyper-parameters of XD.
16 |     parameters
17 |     ----------------
18 |     :param: hyp_params (dict) keys: names of hyp_params, values: lists of HP values
19 |     """
20 |     y = []
21 |     for n in range(len(list(hyp_params.keys()))):
22 |         a = []
23 |         x = hyp_params[list(hyp_params.keys())[n]]
24 |         
25 |         
26 |         if type(x[0]) == tuple:
27 |             x1 = []
28 |             for h in x:
29 |                 x1.append(str(h))
30 |             x = x1
31 |         if callable(x[0]):
32 |             a.append([x[s].__name__ for s in range(len(x))])
33 |             y.append(a[0])
34 |         else:
35 |             y.append(x)
36 |     return y
37 | 
38 | def param_scores_df(index, hyp_params):
39 |     """
40 |     Creates dataframe for storing the mean scores for each hyper-parameter
41 |     for each subject. Mean and Std. of each hyper-parameter is then stored for plotting.
42 |     """
43 |     columns_list = get_col_list(hyp_params)
44 |     columns = list()
45 |     for i in range(len(hyp_params)):
46 |         for j in range(len(hyp_params[list(hyp_params.keys())[i]])):
47 |             columns.append(f'{list(hyp_params.keys())[i]}, {columns_list[i][j]}')
48 |     index.append("Mean")
49 |     index.append("Std.")
50 |     df = pd.DataFrame(index=index, columns=columns)
51 |     a = df.columns.str.split(', ', expand=True).values
52 | 
53 |     #swap values in NaN and replace NAN to ''
54 |     df.columns = pd.MultiIndex.from_tuples([('', x[0]) if pd.isnull(x[1]) else x for x in a])
55 |     return df


--------------------------------------------------------------------------------
/BiModNeuroCNN/results/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.preprocessing import LabelBinarizer
 3 | from sklearn.utils import check_array,check_consistent_length
 4 | 
 5 | def weighted_sum(sample_score, sample_weight, normalize=False):
 6 |     if normalize:
 7 |         return np.average(sample_score, weights=sample_weight)
 8 |     elif sample_weight is not None:
 9 |         return np.dot(sample_score, sample_weight)
10 |     else:
11 |         return sample_score.sum()
12 | 
13 | def cross_entropy(y_true, y_pred, eps=1e-15, labels=None):
14 |     """
15 |     A metric that compares the predicted utterance likelihoods and
16 |     the actual utterance identities across all trials for a subject.
17 |     Given utterance log-likelihoods predicted by a model, cross entropy
18 |     measures the average number of bits required to correctly classify
19 |     those utterances. Cross entropy consideres predicted probabilities, not
20 |     simply the most likely class for each trial.
21 |     -- Lower cross entropy indicates better performance.
22 |     :return: loss: float
23 |     """
24 | 
25 |     y_pred = check_array(y_pred, ensure_2d=False)
26 | 
27 |     lb = LabelBinarizer()
28 |     if labels is not None:
29 |         lb.fit(labels)
30 |     else:
31 |         lb.fit(y_true)
32 | 
33 | 
34 |     if len(lb.classes_) <= 1:
35 |         raise ValueError("Only 1 or 0 labels have been provided. Please provide correct labels.")
36 | 
37 |     transformed_labels = lb.transform(y_true)
38 |     if transformed_labels.shape[1] == 1:
39 |         transformed_labels = np.append(1 - transformed_labels,
40 |                                        transformed_labels, axis=1)
41 | 
42 | 
43 |     y_pred = np.clip(y_pred, eps, 1 - eps) #clipping required to protect against 1 and 0 probabilities
44 | 
45 |     transformed_labels = check_array(transformed_labels)
46 | 
47 |     if len(lb.classes_) != y_pred.shape[1]:
48 |         raise ValueError("Ground truth and predictions contain a different number of values!")
49 | 
50 |     y_pred /= y_pred.sum(axis=1)[:, np.newaxis]
51 | 
52 |     loss = -(transformed_labels * np.log(y_pred)).sum(axis=1)
53 | 
54 |     return  weighted_sum(loss, None,normalize=True)
55 | 
56 | if __name__ == '__main__':
57 |    
58 |     labels = ['pig','cow','car','bus']
59 |     y_true = [1,2,0,3]
60 |     y_pred = [[.1,.5,.2,.2], [.3,.05,.55,.1], [.5,.0,.0,.5], [.0,.35,0,.65]]
61 | 
62 |     # labels = ['pig']
63 |     # y_true = [0,1]
64 |     # y_pred = [7.0722e-01, 2.3728e-05, 1.1968e-04, 2.9264e-01]
65 | 
66 |     print(cross_entropy(y_true, y_pred, eps=1e-15, labels=None))
67 | 
68 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/python
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
  3 | 
  4 | ### Python ###
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | pytestdebug.log
 58 | 
 59 | # Translations
 60 | *.mo
 61 | *.pot
 62 | 
 63 | # Django stuff:
 64 | *.log
 65 | local_settings.py
 66 | db.sqlite3
 67 | db.sqlite3-journal
 68 | 
 69 | # Flask stuff:
 70 | instance/
 71 | .webassets-cache
 72 | 
 73 | # Scrapy stuff:
 74 | .scrapy
 75 | 
 76 | # Sphinx documentation
 77 | docs/_build/
 78 | doc/_build/
 79 | 
 80 | # PyBuilder
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # IPython
 87 | profile_default/
 88 | ipython_config.py
 89 | 
 90 | # pyenv
 91 | .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101 | __pypackages__/
102 | 
103 | # Celery stuff
104 | celerybeat-schedule
105 | celerybeat.pid
106 | 
107 | # SageMath parsed files
108 | *.sage.py
109 | 
110 | # Environments
111 | .env
112 | .venv
113 | env/
114 | venv/
115 | ENV/
116 | env.bak/
117 | venv.bak/
118 | pythonenv*
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/
137 | 
138 | # pytype static type analyzer
139 | .pytype/
140 | 
141 | # profiling data
142 | .prof
143 | 
144 | # End of https://www.toptal.com/developers/gitignore/api/python


--------------------------------------------------------------------------------
/BiModNeuroCNN/data_loader/data_utils.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import numpy as np
 3 | import pandas as pd
 4 | import collections
 5 | from braindecode.datautil.signal_target import SignalAndTarget
 6 | from imblearn.over_sampling import SMOTE
 7 | 
 8 | def multi_SignalAndTarget(*args):
 9 |         """
10 |         Returns muliple SignalAndTarget objects from multiple (X,y) data tuples
11 | 
12 |         :param: *args (tuple) any number of tuples containing data and labels
13 |         """
14 |         return_list= []
15 |         for arg in args:
16 |             return_list.append(SignalAndTarget(arg[0], arg[1]))
17 |         return tuple(return_list)
18 | 
19 | def load_pickle(filename):
20 |    
21 |     with open(filename, 'rb') as f:
22 |         file = pickle.load(f)
23 |     return file
24 | 
25 | def get_class_index_tuples(filename):
26 | 	"""
27 | 	Load removed trials from .txt file and reformat into list of tuples (index, class).
28 | 	Index is the trial number and class is the corresponding class label
29 | 
30 | 	Inputs: filename (str): .txt file containing removed trials. E.g. f"{path}/removedEEG.txt"
31 | 	Returns: list of tuples (index, class)
32 | 	"""
33 | 	class_l, index_l, return_l = [], [], []
34 | 	try:
35 | 		removed_trials= pd.read_csv(filename, header=None).values[0]
36 | 
37 | 		for d in removed_trials:
38 | 			if type(d) == str:
39 | 				values = d.replace("(",",").replace(")","")
40 | 				class_l.append(int(values.split(",")[0]))
41 | 				index_l.append(int(values.split(",")[1]))
42 | 		[return_l.append((x,y)) for x,y in zip(index_l,class_l) if (x,y) not in return_l]
43 | 		return return_l
44 | 	except:
45 | 		print("Either no file available or no trials removed: [] returned.")
46 | 		return []
47 | 
48 | def combine_removed_trials(Rm1, Rm2, names):
49 | 	removed = collections.namedtuple("removed_samples", names)
50 | 	return removed(Rm1, Rm2)
51 | 
52 | def get_classifier_window(data, start, end, prestim=0.5, sfreq=100):
53 | 	"""
54 | 	Similar to <BiModNeuroCNN.subjects.subjects.Subject.get_classifier_window> in 
55 | 	that it extracts epoched time windows within a trial period.
56 | 	
57 | 	:param data: (np.array) n_trials * n_chans * n_samples
58 | 	:param start: (float) time to begin classification window
59 | 	:param end: (float) time to end classification window
60 | 	:param prestim: (float) length of pre-stimulus period in the data
61 | 	:return: (np.array): n_trials*n_channels*len(classification_window)
62 | 	"""
63 | 	
64 | 	fcn = lambda x : x * sfreq
65 | 	
66 | 	start_samples = int(fcn(start)) + int(fcn(prestim))
67 | 	end_samples = int(fcn(end)) + int(fcn(prestim))
68 | 	classifier_data = data[:,:,start_samples:end_samples]
69 | 	epoch = classifier_data.shape[2]
70 | 
71 | 	return classifier_data, epoch
72 | 
73 | 
74 | def smote_augmentation(data, labels, mixing_ratio=2, print_shape=False):
75 |     """
76 |     Method for oversampling the number of trials to augment
77 |     training data. Shoulf only be used on training data
78 |     :input: data (3d array): training data
79 |             labels (np.array OR list): class labels
80 |             mixing_ratio (int): ratio to oversample - e.g. 2 means
81 |             ratio of synthetic data to real data is 2:1
82 |             print_shape (bool): command to print oversampled data shape
83 |     :return: data_os (ndarray): array with a balanced set of trials
84 |              labels_os (np.array): array with a balanced set of labels
85 |     """
86 |     unique, counts = np.unique(labels, return_counts=True)
87 |     os_value = np.ceil(np.max(counts) * mixing_ratio).astype(np.int32)
88 | 
89 |     s = SMOTE(sampling_strategy={np.unique(labels)[0]: os_value, np.unique(labels)[1]: os_value,
90 |                                  np.unique(labels)[2]: os_value, np.unique(labels)[3]: os_value},
91 |               random_state=10, k_neighbors=3)
92 | 
93 |     data_os_2d, labels_os = s.fit_resample(data.reshape((data.shape[0], data.shape[1] * data.shape[2])), labels)
94 |     data_os = data_os_2d.reshape((data_os_2d.shape[0], data.shape[1], data.shape[2]))
95 | 
96 |     if print_shape:
97 |         print(f"Oversampled data shape: {data_os.shape}")
98 | 
99 |     return data_os, labels_os


--------------------------------------------------------------------------------
/statistics/anova_2_way.m:
--------------------------------------------------------------------------------
  1 | % Ciaran Cooney, 2020
  2 | % Script for performing 2-way ANOVA with post-hoc analysis 
  3 | % using the Tukey Honest Significant Difference criterion.
  4 | %
  5 | % Data Structure:
  6 | %                                  Column Variables
  7 | %      Row Variables          Condition 1 | Condition 2
  8 | % Condition 1, Replication 1      10      |      13
  9 | % Condition 1, Replication 2      12      |      14
 10 | % Condition 2, Replication 1       5      |       5
 11 | % Condition 2, Replication 2       7      |       9
 12 | % Condition 3, Replication 1      18      |      16
 13 | % Condition 3, Replication 2      11      |      10
 14 | %%
 15 | clear all
 16 | path = '/';
 17 | 
 18 | %y = xlsread([path 'test_data.xlsx'],'2-way', 'B2:C70');
 19 | %path = 'C:/Users/cfcoo/OneDrive - Dundalk Institute of Technology/Study_3/Multimodal/';
 20 | y = xlsread([path 'testing_data.xlsx'],'overt', 'B2:D43');
 21 | 
 22 | replications = 28; % number of sessions
 23 | 
 24 | [p,tbl,stats] = anova2(y, replications);
 25 | tbl
 26 | 
 27 | % Pairwise comparison of the column data
 28 | [c, m, h, nms] = multcompare(stats,'alpha',.05,'ctype','hsd'); %p-values returned in 'c' variable.
 29 | 
 30 | figure
 31 | % Pairwise comparison of the row data
 32 | [cR, mR, hR, nmsR] = multcompare(stats,'alpha',.05,'Estimate','row','ctype','hsd'); %p-values returned in 'c' variable.
 33 | 
 34 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 35 | %%
 36 | clear all
 37 | path = '/';
 38 | 
 39 | y = xlsread([path 'results_4_stats.xlsx'],'imagined', 'B2:D43');
 40 | 
 41 | replications = 21; % number of sessions
 42 | 
 43 | [p,tbl,stats] = anova2(y, replications);
 44 | tbl
 45 | 
 46 | % Pairwise comparison of the column data
 47 | [c, m, h, nms] = multcompare(stats,'alpha',.05,'ctype','hsd'); %p-values returned in 'c' variable.
 48 | 
 49 | figure
 50 | % Pairwise comparison of the row data
 51 | [cR, mR, hR, nmsR] = multcompare(stats,'alpha',.05,'Estimate','row','ctype','hsd'); %p-values returned in 'c' variable.
 52 | 
 53 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 54 | %%
 55 | clear all
 56 | path = '/';
 57 | 
 58 | y = xlsread([path 'results_4_stats.xlsx'],'bim_eeg_ov', 'B2:c169');
 59 | 
 60 | replications = 28; % number of sessions
 61 | 
 62 | [p,tbl,stats] = anova2(y, replications);
 63 | tbl
 64 | 
 65 | % Pairwise comparison of the column data
 66 | [c, m, h, nms] = multcompare(stats,'alpha',.05,'ctype','hsd'); %p-values returned in 'c' variable.
 67 | 
 68 | figure
 69 | % Pairwise comparison of the row data
 70 | [cR, mR, hR, nmsR] = multcompare(stats,'alpha',.05,'Estimate','row','ctype','hsd'); %p-values returned in 'c' variable.
 71 | 
 72 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 73 | %%
 74 | clear all
 75 | path = '/';
 76 | 
 77 | y = xlsread([path 'results_4_stats.xlsx'],'bim_eeg_im', 'B2:c127');
 78 | 
 79 | replications = 21; % number of sessions
 80 | 
 81 | [p,tbl,stats] = anova2(y, replications);
 82 | tbl
 83 | 
 84 | % Pairwise comparison of the column data
 85 | [c, m, h, nms] = multcompare(stats,'alpha',.05,'ctype','hsd'); %p-values returned in 'c' variable.
 86 | 
 87 | figure
 88 | % Pairwise comparison of the row data
 89 | [cR, mR, hR, nmsR] = multcompare(stats,'alpha',.05,'Estimate','row','ctype','hsd'); %p-values returned in 'c' variable.
 90 | 
 91 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 92 | %%
 93 | %%
 94 | clear all
 95 | path = '/';
 96 | 
 97 | y = xlsread([path 'results_4_stats.xlsx'],'bim_fnirs_ov', 'B2:c169');
 98 | 
 99 | replications = 28; % number of sessions
100 | 
101 | [p,tbl,stats] = anova2(y, replications);
102 | tbl
103 | 
104 | % Pairwise comparison of the column data
105 | [c, m, h, nms] = multcompare(stats,'alpha',.05,'ctype','hsd'); %p-values returned in 'c' variable.
106 | 
107 | figure
108 | % Pairwise comparison of the row data
109 | [cR, mR, hR, nmsR] = multcompare(stats,'alpha',.05,'Estimate','row','ctype','hsd'); %p-values returned in 'c' variable.
110 | 
111 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
112 | %%
113 | clear all
114 | path = '/';
115 | 
116 | y = xlsread([path 'results_4_stats.xlsx'],'bim_fnirs_im', 'B2:c127');
117 | 
118 | replications = 21; % number of sessions
119 | 
120 | [p,tbl,stats] = anova2(y, replications);
121 | tbl
122 | 
123 | % Pairwise comparison of the column data
124 | [c, m, h, nms] = multcompare(stats,'alpha',.05,'ctype','hsd'); %p-values returned in 'c' variable.
125 | 
126 | figure
127 | % Pairwise comparison of the row data
128 | [cR, mR, hR, nmsR] = multcompare(stats,'alpha',.05,'Estimate','row','ctype','hsd'); %p-values returned in 'c' variable.
129 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/data_loader/data_loader.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.io as spio
  3 | import pickle
  4 | from BiModNeuroCNN.utils import timer
  5 | from BiModNeuroCNN.data_loader.data_utils import load_pickle
  6 | 
  7 | 
  8 | class Loader:
  9 | 
 10 | 	def __init__(self):
 11 | 		self.data1 = np.array([])
 12 | 		self.labels1 = []
 13 | 		self.data2 = np.array([])
 14 | 		self.labels2 = []
 15 | 		self.combined_data = None
 16 | 		self.datatype1 = None
 17 | 		self.datatype2 = None
 18 | 
 19 | 	def __repr__(self):
 20 | 		return f"<class 'BiModNeuroCNN.data_loader.Loader'>"
 21 | 
 22 | 	def __str__(self):
 23 | 		return f"Class for loading two different data types"
 24 | 
 25 | 	def __getattr__(self, attr):
 26 | 		if attr == "state":
 27 | 			return f"Data 1:{not self.data1.size==0} : Labels 1:{not self.labels1==[]} :\
 28 | 			Data 2:{not self.data2.size==0} : Labels 2:{not self.labels2==[]}"
 29 | 		if attr == "processed":
 30 | 			pass
 31 | 
 32 | 	def __setattr__(self, name, value):
 33 | 		if name == "datatypes":
 34 | 			self.datatype1 = value[0]
 35 | 			self.datatype2 = value[1]
 36 | 		else:
 37 | 			super().__setattr__(name, value)
 38 | 
 39 | 	@timer
 40 | 	def loadmat(self, datafile, labelsfile=None):
 41 | 		"""
 42 | 	    Load previously-validated EEG data and labels in the form of a .mat file.
 43 | 
 44 | 	    params: datafile (str): location and name of file containing data
 45 | 	    params: labelsfile (str): location and name of separate file containing labels
 46 | 
 47 | 	    Returns: n_trial * n_chans * n_samples Numpy array contianing EEG data.
 48 | 	             list containing labels for all trials.
 49 | 	    """
 50 | 		data = spio.loadmat(f"{datafile}.mat")
 51 | 		data = data[list(data.keys())[3]]
 52 | 		
 53 | 		if labelsfile != None:  
 54 | 			labels = spio.loadmat(f"{labelsfile}.mat")
 55 | 			labels = labels[list(labels.keys())[3]] 
 56 | 
 57 | 		if self.data1.size == 0:
 58 | 			self.data1 = data
 59 | 			self.labels1 = labels[0]
 60 | 		elif self.data2.size == 0:
 61 | 			self.data2 = data
 62 | 			self.labels2 = labels[0]
 63 | 		else:
 64 | 			raise AttributeError("Maximum 2 data types already loaded")
 65 | 
 66 | 	@timer
 67 | 	def loadMNE(self, filename, data_tag='EEG', label_tag='labels', load_labels=True):
 68 | 		"""
 69 |         Load previously-validated EEG data and labels in the form of an MNE Raw Array.
 70 | 
 71 |         Returns: n_chans * n_samples Numpy array contianing EEG data.
 72 |                  data3D: n_trials * n_chans * n_samples reshaped EEG data.
 73 |                  Numpy array containing labels for all trials.
 74 |         """
 75 | 		mnePickle = load_pickle(filename)
 76 | 		data = mnePickle[data_tag].get_data()[:,:-1,:] #remove trigger channel from data
 77 | 		if load_labels:
 78 | 			labels = mnePickle[label_tag]
 79 | 
 80 | 		if self.data1.size == 0:
 81 | 			self.data1 = data
 82 | 			self.labels1 = labels
 83 | 		elif self.data2.size == 0:
 84 | 			self.data2 = data
 85 | 			self.labels2 = labels
 86 | 		else:
 87 | 			raise AttributeError("Maximum 2 data types already loaded")
 88 | 
 89 | 	@timer
 90 | 	def combine_data(self):
 91 | 		"""
 92 |         Combine two data types into single np.array. Useful option for combined classification.
 93 | 		Number of trials, channels and samples must be equal.
 94 | 
 95 |         Returns: n_trials * n_chans * n_samples Numpy array contianing data.
 96 |         """
 97 | 		assert self.data1 is not None, "No data loaded for set 1!"
 98 | 		assert self.data2 is not None, "No data loaded for set 2!"
 99 | 
100 | 		assert self.data1.shape[0] == self.data2.shape[0], "Number of trials must be identical!"
101 | 		assert self.data1.shape[2] == self.data2.shape[2], "NUmber of samples must be identical!"
102 | 
103 | 		self.combined_data = np.concatenate((self.data1, self.data2), axis=1)
104 | 		assert self.combined_data.shape[1] == self.data1.shape[1] + self.data1.shape[1], "Axis 1 should be sum of EEG and fNIRS Axis 1"
105 | 
106 | 	
107 | 	@staticmethod	
108 | 	def match_removed_trials(data1, labels1, data2, labels2, total_labels, removed_all, print_result=True):
109 | 		"""
110 | 		Ensure that samples in two data types are correctly aligned by removed rejected trials from both.
111 | 
112 | 	    Inputs: data1 (np.ndarray): one of the two multimodal data types
113 | 	            data2 (np.ndarray): one of the two multimodal data types
114 | 	            total_labels (np.array || list): all class labels from entire dataset
115 | 	            removed_trials_df (pd.DataFrame): 2 rows containing class and index of removed trials
116 | 	            labels: (np.array || list): labels associated with the specific classes of data1 and data2
117 |         Returns: data1 (np.array): data1 == in dimensions to data2
118 |                  labels1 (np.array || list): data1 == data2
119 |                  data2 (np.array): data2 == in dimensions to data1
120 |                  labels2 (np.array || list): data2 == data1
121 | 	    """
122 | 
123 | 		placeholder_data1 = np.zeros((data1.shape[1],data1.shape[2]))
124 | 		placeholder_data2 = np.zeros((data2.shape[1],data1.shape[2]))
125 | 
126 | 		for tup1 in removed_all.data_1:
127 | 			labels1 = np.insert(labels1,tup1[0],tup1[1])
128 | 			data1 = np.insert(data1, tup1[0], placeholder_data1, axis=0)
129 | 
130 | 		for tup2 in removed_all.data_2:
131 | 			labels2 = np.insert(labels2,tup2[0],tup2[1])
132 | 			data2 = np.insert(data2, tup2[0], placeholder_data2, axis=0)
133 | 		
134 | 		combined_tups = removed_all.data_1
135 | 		for t in removed_all.data_2:
136 | 		    if t not in combined_tups:
137 | 		        combined_tups.append(t)
138 | 		removal_index = []
139 | 		for i in combined_tups:
140 | 		    removal_index.append(i[0])
141 | 		removal_index = list(reversed(np.sort(removal_index)))
142 | 
143 | 		for idx in removal_index:
144 | 		    total_labels = np.delete(total_labels, idx)
145 | 		    data1 = np.delete(data1, idx, axis=0)
146 | 		    labels1 = np.delete(labels1, idx)
147 | 		    data2 = np.delete(data2, idx, axis=0)
148 | 		    labels2 = np.delete(labels2, idx)
149 | 
150 | 		if print_result:
151 | 			_, counts = np.unique(total_labels, return_counts=True)
152 | 			print(f"Total: {counts}")
153 | 			_, counts = np.unique(labels1, return_counts=True)
154 | 			print(f"EEG: {counts}")
155 | 			_, counts = np.unique(labels2, return_counts=True)
156 | 			print(f"fNIRS: {counts}")
157 | 
158 | 		return data1, labels1, data2, labels2


--------------------------------------------------------------------------------
/BiModNeuroCNN/subjects/subjects.py:
--------------------------------------------------------------------------------
  1 | from BiModNeuroCNN.utils import timer, labels_dict_and_list
  2 | from BiModNeuroCNN.data_loader.data_loader import Loader
  3 | from scipy.signal import decimate as dec
  4 | from tensorflow.keras.utils import normalize
  5 | from scipy.signal import butter, lfilter
  6 | import numpy as np
  7 | import pandas as pd
  8 | import scipy.io as spio
  9 | import pickle
 10 | import warnings
 11 | import os
 12 | warnings.filterwarnings('ignore', category=FutureWarning)
 13 | 
 14 | 
 15 | class Subject(Loader):
 16 | 	
 17 | 	direct = 'C:/Users/cfcoo/OneDrive - Ulster University/Study_3/Subject_Data'
 18 | 
 19 | 	def __init__(self, id):
 20 | 
 21 | 		super().__init__()
 22 | 
 23 | 		self.id = id
 24 | 		
 25 | 		self.channels_validated = False
 26 | 		self.trials_validated = False
 27 | 		self.description = None
 28 | 		self.data_loaded = False
 29 | 
 30 | 		self.data1 = np.array([])
 31 | 		self.data2 = np.array([])
 32 | 		self.data_combined = None
 33 | 		self.labels1 = []
 34 | 		self.labels2 = []
 35 | 		self.labels_combined = None
 36 | 
 37 | 		self.epoched_data1 = None
 38 | 		self.epoched_data2 = None
 39 | 		self.classifier_start = 0
 40 | 		self.classifier_end = 0
 41 | 
 42 | 		self.classnames = []
 43 | 
 44 | 		self.sfreq1 = 0
 45 | 		self.sfreq2 = 0
 46 | 		self.lowcut = 0
 47 | 		self.highcut = 0
 48 | 		self.downsample_rate1 = 2
 49 | 		self.downsample_rate2 = 2
 50 | 		self.downsampled = [False, False]
 51 | 		self.normalized = [False, False]
 52 | 		self.filtered = [False, False]
 53 | 
 54 | 	def __repr__(self):
 55 | 		return f"<class 'BiModNeuroCNN.subjects.Subject'>"
 56 | 
 57 | 	def __str__(self):
 58 | 		return f"Class for creating subject-specific objects for multi-subject experiments."
 59 | 
 60 | 	# def __getattr__(self, attr):
 61 | 	# 	pass
 62 | 
 63 | 	# def __setattr__(self, name, value):
 64 | 	# 	pass
 65 | 
 66 | 	def set_description(self, description):
 67 | 		self.description = description 
 68 | 
 69 | 	def get_description(self):
 70 | 		return self.description
 71 | 
 72 | 	def change_directory(self, new_direct):
 73 | 		self.direct = new_direct
 74 | 
 75 | 	def set_channel_validation(self, validated):
 76 | 		assert type(validated) == bool
 77 | 		self.channels_validated = validated 
 78 | 
 79 | 	def get_channel_validation(self):
 80 | 		return self.channels_validated
 81 | 
 82 | 	def set_trial_validation(self, validated):
 83 | 		assert type(validated) == bool
 84 | 		self.trials_validated = validated 
 85 | 
 86 | 	def get_trial_validation(self):
 87 | 		return self.trials_validated
 88 | 
 89 | 	def get_classifier_window(self, start, end, data1=True, data2=True, prestim=0.5, sfreq1=100, sfreq2=100):
 90 | 		"""
 91 | 		Epoch the time-period within each trial to extract a specfic window for analysis.
 92 | 
 93 | 		:param start: (float) time to begin classification window
 94 | 		:param end: (float) time to end classification window
 95 | 		:param data1 (bool) whether to apply method to self.data1
 96 | 		:param data2 (bool) whether to apply method to self.data2
 97 | 		:param prestim: (float) length of pre-stimulus period in the data
 98 | 		:param sfreq1: (int) sampling frequency of self.data1
 99 | 		:param sfreq2: (int) sampling frequency of self.data2
100 | 		:return: (np.array): n_trials*n_channels*len(classification_window)
101 | 		"""
102 | 		if data1 == False and data2 == False:
103 | 			raise ValueError(f"Require at least one data type to be True: data1:{data1}, data2:{data2}")
104 | 		else:
105 | 			self.classifier_start = start
106 | 			self.classifier_end = end
107 | 			if data1:
108 | 				fcn = lambda x : x * sfreq1
109 | 			
110 | 				start_samples = int(fcn(start)) + int(fcn(prestim))
111 | 				end_samples = int(fcn(end)) + int(fcn(prestim))
112 | 
113 | 				self.epoched_data1 = self.data1[:,:,start_samples:end_samples]
114 | 			if data2:
115 | 				fcn = lambda x : x * sfreq2
116 | 
117 | 				start_samples = int(fcn(start)) + int(fcn(prestim))
118 | 				end_samples = int(fcn(end)) + int(fcn(prestim))
119 | 				
120 | 				self.epoched_data2 = self.data2[:,:,start_samples:end_samples]
121 | 
122 | 	def bandpass(self, lowcut, highcut, order, data1=True, data2=False, sfreq1=100, sfreq2=100):
123 | 		"""
124 | 		Bandpass filter the data with butterworth filter. Use for EEG data
125 | 
126 | 		:params: lowcut (float): low-pass cutoff frequency
127 | 	    :params: highcut (float): high-pass cutoff frequency
128 | 		:params: order (int): Butterworth filter order number - see scipy docs.
129 | 		:params: data1 (bool): filter data1 or not
130 | 		:params: data2 (bool): filter data2 or not
131 | 		:params: sfreq1: (int) sampling frequency of self.data1
132 | 		:params: sfreq2: (int) sampling frequency of self.data2
133 | 	    Returns: n_trial * n_chans * n_samples Numpy array contianing filtered data.
134 | 		"""
135 | 		if data1 == False and data2 == False:
136 | 			raise ValueError(f"Require at least one data type to be True: data1:{data1}, data2:{data2}")
137 | 		else:
138 | 			self.lowcut = lowcut
139 | 			self.highcut = highcut
140 | 			if data1:
141 | 				self.sfreq1 = sfreq1
142 | 				nyq = 0.5 * sfreq1
143 | 				low = lowcut / nyq
144 | 				high = highcut / nyq
145 | 				b, a = butter(order, [low, high], btype='band')
146 | 				self.data1 = lfilter(b, a, self.data1)
147 | 				self.filtered[0] = True
148 | 			if data2:
149 | 				self.sfreq2 = sfreq2
150 | 				nyq = 0.5 * sfreq2
151 | 				low = lowcut / nyq
152 | 				high = highcut / nyq
153 | 				b, a = butter(order, [low, high], btype='band')
154 | 				self.data2 = lfilter(b, a, self.data2)
155 | 				self.filtered[1] = True
156 | 	
157 | 	def down_and_normal(self, data1=True, data2=False, downsample_rate1=2, downsample_rate2=2, norm=True):
158 | 		"""
159 | 		Downsample and normalize the data.
160 | 
161 | 		:params: data1 (bool): apply to data1 or not
162 | 		:params: data2 (bool): apply to data2 or not
163 | 		:params: downsample_rate1 (int): downsample rate.
164 | 		:params: downsample_rate2 (int): downsample rate.
165 | 		:params: norm: (bool) to normalize or not to normalize.
166 | 	    Returns: n_trial * n_chans * n_samples Numpy array containing downsampled and/or normalized data.
167 | 		"""
168 | 		if data1 == False and data2 == False:
169 | 			raise ValueError(f"Require at least one data type to be True: data1:{data1}, data2:{data2}")
170 | 		else:
171 | 			fnc = lambda a: a * 1e6 # improves numerical stability
172 | 			if data1:
173 | 				self.downsample_rate1 = downsample_rate1
174 | 				if self.downsample_rate1 > 1:
175 | 					self.data1 = dec(self.data1, downsample_rate1) 
176 | 					self.downsampled[0] = True
177 | 				
178 | 				self.data1 = fnc(self.data1)
179 | 				if norm:
180 | 					self.data1 = normalize(self.data1)
181 | 					self.normalized[0] = True
182 | 
183 | 			if data2:
184 | 				self.downsample_rate2 = downsample_rate2
185 | 				if self.downsample_rate2 > 1:
186 | 					self.data2 = dec(self.data2, downsample_rate2) 
187 | 					self.downsampled[1] = True
188 | 				
189 | 				self.data2 = fnc(self.data2)
190 | 				if norm:
191 | 					self.data2 = normalize(self.data2)
192 | 					self.normalized[1] = True
193 | 		
194 | 
195 | 	def get_classnames(self, classes):
196 | 		"""
197 | 		Returns sub-group of classnames from a global list of class names. List of
198 | 		class names passed as a pd.DataFrame with column names == class names
199 | 		labels corresponding to trials are associated with values in a dict.
200 | 		:return: list of class names to object
201 | 		"""
202 | 		labels_dict, _ = labels_dict_and_list(classes)
203 | 		for i in np.unique(self.labels1):
204 | 			self.classnames.append(labels_dict[str(i)])
205 | 
206 | 	def clear_data(self):
207 | 		"""
208 | 		Reset to empty data structures.
209 | 		"""
210 | 		self.data1 = np.array([])
211 | 		self.data2 = np.array([])
212 | 		self.data_combined = None
213 | 		self.labels1 = []
214 | 		self.labels2 = []
215 | 		self.labels_combined = None
216 | 
217 | 		self.epoched_data1 = None
218 | 		self.epoched_data2 = None
219 | 
220 | 	def save_subject(self, path, filename):
221 | 		"""
222 | 		Save the subject object as a pickle.
223 | 
224 | 		:param path: (str) path to saving directory
225 | 		:param filename: (str) name to save object as
226 | 		"""
227 | 		if not os.path.exists(path):
228 | 			print("Creating new subject file...")
229 | 			os.makedirs(path)
230 | 		filename = f"{path}/{filename}.pickle"
231 | 		filehandler = open(filename, 'wb')
232 | 		pickle.dump(self.__dict__, filehandler, protocol=pickle.HIGHEST_PROTOCOL)
233 | 		print(f"Data object saved to: '{filename}'\n")
234 | 
235 | 	def update(self,newdata):
236 | 	    for key,value in newdata.items():
237 | 	        setattr(self,key,value)
238 | 
239 | 	@classmethod
240 | 	def load_subject(self, f_name):
241 | 		 with open(f_name, 'rb') as f:
242 | 		 	tmp_dict = pickle.load(f)
243 | 	 		f.close()
244 | 	 		self.update(self, tmp_dict)
245 |  			return self
246 | 
247 | 	def get_details(self):
248 | 		print(f"Subject: {self.id}")
249 | 		print("-"*15)
250 | 		print(self.description)
251 | 		print("-"*15)
252 | 		if self.data1.size != 0:
253 | 			print(f"Data 1 shape: {self.data1.shape}")
254 | 			print(f"Labels 1 shape: {len(self.labels1)}")
255 | 			print(f"Class names: {self.classnames}")
256 | 			print(f"Number of  valid channels: {self.data1.shape[1]}")
257 | 			print(f"Sampling Frequency: {self.sfreq1} Hz")
258 | 			print(f"Data downsampled: {self.downsampled[0]}")
259 | 			if self.downsampled[0]:
260 | 				print(f"Downsample Rate: {self.downsample_rate1}")
261 | 			if self.normalized[0]:
262 | 				print(f"Data normalized: {self.normalized[0]}")
263 | 			if self.filtered[0]:
264 | 				print(f"Data bandpass filtered between {self.lowcut} and {self.highcut} Hz")
265 | 			if self.epoched_data1 is not None:
266 | 				print(f"Classifier Window Size: {self.epoched_data1.shape}")
267 | 				print(f"Classifier Start Time: {self.classifier_start} seconds")
268 | 				print(f"Classifier End Time: {self.classifier_end} seconds\n")
269 | 		if self.data1.size != 0:
270 | 			print(f"Data 2 shape: {self.data2.shape}")
271 | 			print(f"Labels 2 shape: {len(self.labels2)}")
272 | 			print(f"Class names: {self.classnames}")
273 | 			print(f"Number of  valid channels: {self.data2.shape[1]}")
274 | 			print(f"Sampling Frequency: {self.sfreq2} Hz")
275 | 			print(f"Data downsampled: {self.downsampled[1]}")
276 | 			if self.downsampled[1]:
277 | 				print(f"Downsample Rate: {self.downsample_rate1}")
278 | 			print(f"Data normalized: {self.normalized[1]}")
279 | 			if self.filtered[1]:
280 | 				print(f"Data bandpass filtered between {self.lowcut} and {self.highcut} Hz")
281 | 			if self.epoched_data2 is not None:
282 | 				print(f"Classifier Window Size: {self.epoched_data1.shape}")


--------------------------------------------------------------------------------
/training_demo_nCV.PY:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from BiModNeuroCNN.results.results import Results
  4 | from BiModNeuroCNN.training.bimodal_classification import Classification
  5 | from BiModNeuroCNN.utils import load_subject, format_data, timer, get_model_loss_and_acc, windows_index
  6 | from BiModNeuroCNN.data_loader.data_loader import Loader
  7 | from BiModNeuroCNN.data_loader.data_utils import get_class_index_tuples, combine_removed_trials
  8 | from BiModNeuroCNN.data_loader.utils1 import subject_data_loader 
  9 | from BiModNeuroCNN.models.bimodal_cnn import BiModalNet
 10 | from BiModNeuroCNN.models.bimodal_cnn_pooling import BiModalNet_w_Pool
 11 | from torch.optim.lr_scheduler import MultiStepLR, StepLR
 12 | from braindecode.datautil.signal_target import SignalAndTarget
 13 | from sklearn.model_selection import StratifiedKFold
 14 | from braindecode.experiments.monitors import LossMonitor, MisclassMonitor, RuntimeMonitor
 15 | from braindecode.torch_ext.constraints import MaxNormDefaultConstraint
 16 | from braindecode.torch_ext.functions import safe_log
 17 | from sklearn.metrics import accuracy_score
 18 | import warnings
 19 | warnings.filterwarnings('ignore', category=FutureWarning)
 20 | import logging
 21 | import sys
 22 | import torch as th
 23 | import os
 24 | 
 25 | th.backends.cudnn.deterministic = True
 26 | log = logging.getLogger()
 27 | logging.basicConfig(format='%(asctime)s %(levelname)s : %(message)s',
 28 |                      level=logging.INFO, stream=sys.stdout)
 29 | 
 30 | 
 31 | WINDOW_LEN = 200
 32 | OVERLAP = 150
 33 | windows = windows_index(500,WINDOW_LEN,OVERLAP,250)
 34 | 
 35 | hyp_params = dict(window=windows[:2],
 36 | 				  activation=["leaky_relu"],
 37 |                   structure= ["shallow"])
 38 | 
 39 | 
 40 | parameters = dict(best_loss = 100.0,
 41 |                   batch_size = 32,
 42 |                   monitors = [LossMonitor(), MisclassMonitor(), RuntimeMonitor()],
 43 |                   model_constraint = MaxNormDefaultConstraint(),
 44 |                   max_increase_epochs = 0,
 45 |                   cuda = True,
 46 |                   epochs=1,
 47 |                   learning_rate_scheduler=StepLR,
 48 |                   lr_step=20, lr_gamma=0.9)
 49 | 
 50 | 
 51 | 
 52 | EEGSubNet_params = dict(n_filters_time=40, filter_time_length=5, n_filters_spat=40, n_filters_2=20, filter_length_2=20,
 53 |                         pool_time_length_1=5, pool_time_stride_1=2, pool_length_2=5, pool_stride_2=3, final_conv_length='auto',
 54 |                         conv_nonlin=th.nn.functional.leaky_relu, pool_mode='mean', pool_nonlin=safe_log,
 55 |                         split_first_layer=True, batch_norm=True, batch_norm_alpha=0.2,
 56 |                         drop_prob=0.1)
 57 | 
 58 | fNIRSSubNet_params = dict(n_filters_time=40, filter_time_length=5, n_filters_spat=40, n_filters_2=20, filter_length_2=20,
 59 |                         pool_time_length_1=5, pool_time_stride_1=2, pool_length_2=5, pool_stride_2=3, final_conv_length='auto',
 60 |                         conv_nonlin=th.nn.functional.leaky_relu, pool_mode='mean', pool_nonlin=safe_log,
 61 |                         split_first_layer=True, batch_norm=True, batch_norm_alpha=0.2,
 62 |                         drop_prob=0.1)
 63 | 
 64 | 
 65 | @timer
 66 | def train_nested_cv(data1, labels1, data2, labels2, model, rm1_file, rm2_file, subnet1_params,
 67 | 					subnet2_params, directory, hyps, params, labels_dict):
 68 | 
 69 | 
 70 | 	unique = np.unique(labels1, return_counts=False)
 71 | 
 72 | 	num_folds = 5
 73 | 	skf = StratifiedKFold(n_splits=num_folds, shuffle=False,
 74 | 	                      random_state=10)  # don't randomize trials to preserce structure
 75 | 
 76 | 
 77 | 	subj_results = Results(directory, num_folds, 'test') # results structure
 78 | 	subj_results.get_acc_loss_df(hyps, 'Fold')  # empty dataframe headed with each HP set
 79 | 
 80 | 
 81 | 	##### Match Removed Trials #####
 82 | 	d1Rem = get_class_index_tuples(rm1_file)
 83 | 	d2Rem = get_class_index_tuples(rm2_file)
 84 | 	names = 'data_1,data_2'
 85 | 	removed_all = combine_removed_trials(d1Rem, d2Rem, names)
 86 | 
 87 | 	# Remove bad trials from both datasets and align 
 88 | 	data1_matched, labels1_matched, data2_matched, labels2_matched = Loader.match_removed_trials(data1, labels1, data2,
 89 | 	                                                                                             labels2, total_labels,
 90 | 	                                                                                             removed_all, print_result=False)
 91 | 
 92 | 	subtr_ceoff = np.min(labels1_matched) # required to set labels from zero
 93 | 	fcn = lambda l: l - subtr_ceoff
 94 | 	labels1_matched = fcn(labels1_matched)
 95 | 	labels2_matched = fcn(labels2_matched)
 96 | 	
 97 | 	assert labels1_matched.all() == labels2_matched.all(), f"Order of trial labels must be identical!"
 98 | 
 99 | 
100 | 	data_params = dict(n_classes=len(unique),
101 | 	                   n_chans_d1=data1_matched.shape[1],
102 | 	                   input_time_length_d1=WINDOW_LEN,
103 | 	                   n_chans_d2=data2_matched.shape[1],
104 | 	                   input_time_length_d2=WINDOW_LEN)
105 | 
106 | 	clf = Classification(model, subnet1_params, subnet2_params, hyps, params, data_params, path3, "package_test")
107 | 
108 | 	subj_results.y_true = np.array([])
109 | 	trainsetlist, testsetlist, inner_fold_acc, inner_fold_loss, inner_fold_CE = ([] for i in range(5))
110 | 
111 | 	print(f"Inner-fold training for Subject {subject} in progress...")
112 | 
113 | 	for inner_ind, outer_index in skf.split(data1_matched, labels1_matched):
114 | 
115 | 	    data1_matched_if, data1_matched_of, data2_matched_if, data2_matched_of = data1_matched[inner_ind], data1_matched[outer_index], \
116 | 	                                                                             data2_matched[inner_ind], data2_matched[outer_index]
117 | 	    inner_labels, outer_labels = labels1_matched[inner_ind], labels1_matched[outer_index]
118 | 	    subj_results.concat_y_true(outer_labels)
119 | 
120 | 	    print(data1_matched_if.shape, data2_matched_if.shape)
121 | 
122 | 	    trainsetlist.append((SignalAndTarget(data1_matched_if, inner_labels), SignalAndTarget(data2_matched_if, inner_labels)))  # used for outer-fold train/test
123 | 	    testsetlist.append((SignalAndTarget(data1_matched_of, outer_labels), SignalAndTarget(data2_matched_of, outer_labels)))
124 | 
125 | 	    for train_idx, valid_idx in skf.split(data1_matched_if, inner_labels):
126 | 
127 | 	        d1_train, d1_val, d2_train, d2_val = data1_matched_if[train_idx], data1_matched_if[valid_idx], \
128 | 	        									 data2_matched_if[train_idx], data2_matched_if[valid_idx]
129 | 	        y_train, y_val = inner_labels[train_idx], inner_labels[valid_idx]
130 | 
131 | 	        train_set_1 = SignalAndTarget(d1_train, y_train)
132 | 	        val_set_1 = SignalAndTarget(d1_val, y_val)
133 | 	        train_set_2 = SignalAndTarget(d2_train, y_train)
134 | 	        val_set_2 = SignalAndTarget(d2_val, y_val)
135 | 
136 | 	        hyp_param_acc, hyp_param_loss, hyp_param_CE = clf.train_inner(train_set_1, val_set_1, train_set_2, val_set_2,
137 | 	                                                                      test_set_1=None, test_set_2=None, save_model=False)
138 | 
139 | 	        
140 | 	        inner_fold_loss.append(hyp_param_loss) #5 outer folds * 5 inner folds * number of HPs
141 | 	        inner_fold_acc.append(hyp_param_acc)
142 | 	        inner_fold_CE.append(hyp_param_CE)
143 | 
144 | 	subj_results.fill_acc_loss_df(inner_fold_loss,  inner_fold_acc, inner_fold_CE)
145 | 
146 | 	subj_results.get_best_params("accuracy")
147 | 	print(f"best params: {subj_results.best_params}")
148 | 	clf.best_params = subj_results.best_params 
149 | 	clf.set_best_params()
150 | 
151 | 	# accuracy score for each fold, combined predictions for each fold
152 | 	scores, fold_models, predictions, probabilities, outer_cross_entropy, y_true = clf.train_outer(trainsetlist,
153 | 	                                                                                               testsetlist,
154 | 	                                                                                               False, print_details=True)
155 | 
156 | 	print(f"Accuracy: {round((accuracy_score(y_true, predictions) * 100), 3)}")
157 | 
158 | 	subj_results.outer_fold_accuracies = scores
159 | 	subj_results.y_pred = np.array(predictions)
160 | 	subj_results.y_probs = np.array(probabilities)
161 | 	subj_results.outer_fold_cross_entropies = outer_cross_entropy
162 | 
163 | 	subj_results.train_loss, subj_results.valid_loss, subj_results.test_loss, subj_results.train_acc, subj_results.valid_acc, subj_results.test_acc = get_model_loss_and_acc(
164 | 	    fold_models)
165 | 	try:
166 | 	    subj_results.save_result()
167 | 	except BaseException:
168 | 	    print(f"Unable to save results for Subject: {subj} / Session: {session} - {category}")
169 | 	try:
170 | 	    subj_results.subject_stats()
171 | 	    print("")
172 | 	    print(subj_results.subject_stats_df.head())
173 | 
174 | 	    subj_results.get_accuracy()
175 | 	    #print(f"Mean Accuracy: {subj_results.accuracy}")
176 | 	except ValueError:
177 | 	    print(f"Unable to store subject stats as excel file - see the subject results pickle.")
178 | 	    pass  
179 | 
180 | 
181 | if __name__ == '__main__':
182 | 
183 | 	directory = 'BiModNeuroCNN/data/'
184 | 
185 | 	subjects = ['11']
186 | 	sessions = [1]
187 | 
188 | 	save_dir = directory # chance to suitable storage directory
189 | 
190 | 	labels_dict = dict(actionText=[1, 2, 3,4], combsText=[5, 6, 7, 8],
191 | 	                   actionImage=[9, 10, 11, 12], combsImage=[13, 14, 15,16],
192 | 	                   actionAudio=[17, 18, 19, 20], combsAudio=[21, 22, 23, 24])
193 | 
194 | 	categories = ["actionImage"]
195 | 
196 | 	model = BiModalNet
197 | 
198 | 	for subject in subjects:
199 | 		path1 = f"{save_dir}/S{subject}"
200 | 
201 | 		if not os.path.exists(path1):
202 | 		    os.makedirs(path1)
203 | 
204 | 		for session in sessions:
205 | 		    path2 = f"{path1}/Session_{session}"
206 | 
207 | 		    total_labels = pd.read_csv(f"{directory}/S{subject}/Session_{session}/total_labels.txt", header=None).values[0]
208 | 		    removed_labels_1 = "{directory}/S{subj}/Session_{session}/removedEEG.txt"
209 | 		    removed_labels_2 = "{directory}/S{subj}/Session_{session}/removedEEG.txt"
210 | 
211 | 		    if not os.path.exists(path2):
212 | 		        os.makedirs(path2)
213 | 
214 | 		    for category in categories:
215 | 		        path3 = f"{path2}/{category}"
216 | 
217 | 		        if not os.path.exists(path3):
218 | 		            os.makedirs(path3)
219 | 
220 | 		        f_name_1 = f"{directory}/S{subject}/Session_{session}/classifierData/{category}_EEG_CLF"
221 | 		        f_name_2 = f"{directory}/S{subject}/Session_{session}/classifierData/{category}_fNIRS_CLF_0"
222 | 		        data_1, labels_1 = subject_data_loader(f_name_1)
223 | 		        data_2, labels_2 = subject_data_loader(f_name_2)
224 | 		       
225 | 		        train_nested_cv(data_1, labels_1, data_2, labels_2, model, removed_labels_1, removed_labels_2, EEGSubNet_params,
226 | 		        				fNIRSSubNet_params, directory, hyp_params, parameters, labels_dict)
227 | 
228 | 
229 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/models/bimodal_cnn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch as th
  3 | from torch import nn
  4 | from torch.nn import init
  5 | from braindecode.models.base import BaseModel
  6 | from braindecode.torch_ext.modules import Expression
  7 | from braindecode.torch_ext.functions import safe_log, square
  8 | from BiModNeuroCNN.models.network_utils import reshape_tensor, reshape_4_lstm, _transpose_time_to_spat, tensor_size
  9 | 
 10 | class SubNet(BaseModel):
 11 |     """
 12 |     Temporal-Spatial first layer based on [2]
 13 | 
 14 |     References
 15 |     ----------
 16 | 
 17 |     .. [2] Schirrmeister, R. T., Springenberg, J. T., Fiederer, L. D. J.,
 18 |        Glasstetter, M., Eggensperger, K., Tangermann, M., Hutter, F. & Ball, T. (2017).
 19 |        Deep learning with convolutional neural networks for EEG decoding and
 20 |        visualization.
 21 |        Human Brain Mapping , Aug. 2017. Online: http://dx.doi.org/10.1002/hbm.23730
 22 |     """
 23 | 
 24 |     def __init__(
 25 |             self,
 26 |             in_chans,
 27 |             n_classes,
 28 |             input_time_length=None,
 29 |             n_filters_time=40,
 30 |             filter_time_length=25,
 31 |             n_filters_spat=40,
 32 |             n_filters_2=10,
 33 |             filter_length_2=10,
 34 |             pool_time_length=25,
 35 |             pool_time_stride=15,
 36 |             final_conv_length=30,
 37 |             conv_nonlin=square,
 38 |             pool_mode="mean",
 39 |             pool_nonlin=safe_log,
 40 |             later_nonlin=None,
 41 |             later_pool_nonlin=nn.functional.leaky_relu,
 42 |             split_first_layer=True,
 43 |             batch_norm=True,
 44 |             batch_norm_alpha=0.1,
 45 |             drop_prob=0.1,
 46 |             stride_before_pool=False,
 47 |             structure = "shallow",
 48 |             fc1_out_features = 500,
 49 |             fc2_out_features = 500,
 50 |     ):
 51 |         if final_conv_length == "auto":
 52 |             assert input_time_length is not None
 53 |         self.__dict__.update(locals())
 54 |         del self.self
 55 | 
 56 |     def create_network(self):
 57 |         if self.stride_before_pool:
 58 |             conv_stride = self.pool_time_stride
 59 |         else:
 60 |             conv_stride = 1
 61 |         model = nn.Sequential()
 62 | 
 63 |         if self.split_first_layer:
 64 |             model.add_module("dimshuffle", Expression(_transpose_time_to_spat))
 65 |             model.add_module("conv_time", nn.Conv2d(1, self.n_filters_time, (self.filter_time_length, 1),
 66 |                                                     stride=1, ), )
 67 |             model.add_module("conv_spat", nn.Conv2d(self.n_filters_time, self.n_filters_spat,
 68 |                                                     (1, self.in_chans), stride=1, bias=not self.batch_norm, ),)
 69 |             n_filters_conv = self.n_filters_spat
 70 |             n_filters_op = self.n_filters_spat * (self.input_time_length - 4) # semi-hardcoded at the moment
 71 |         else:
 72 |             model.add_module("conv_time", nn.Conv2d(self.in_chans, self.n_filters_time,
 73 |                                                     (self.filter_time_length, 1), stride=1,
 74 |                                                     bias=not self.batch_norm, ), )
 75 |             n_filters_conv = self.n_filters_time
 76 |             n_filters_op = self.n_filters_time * (self.input_time_length - 4) # semi-hardcoded at the moment
 77 | 
 78 |         if self.batch_norm:
 79 |             model.add_module("bnorm", nn.BatchNorm2d(n_filters_conv, momentum=self.batch_norm_alpha,
 80 |                                                      affine=True), )
 81 |         model.add_module("conv_nonlin", Expression(self.conv_nonlin))
 82 |         model.add_module("drop", nn.Dropout(p=self.drop_prob))
 83 | 
 84 |         def add_conv_pool_block(model, n_filters_before,
 85 |                                 n_filters, filter_length, block_nr):
 86 | 
 87 |             model.add_module(f"conv_{block_nr}", nn.Conv2d(n_filters_before, n_filters,
 88 |                                                            (filter_length, 1), stride=(conv_stride, 1),
 89 |                                                            bias=not self.batch_norm))
 90 |             
 91 |             if self.batch_norm:
 92 |                 model.add_module(f"bnorm_{block_nr}", nn.BatchNorm2d(n_filters,
 93 |                                                                      momentum=self.batch_norm_alpha,
 94 |                                                                      affine=True, eps=1e-5))
 95 |             model.add_module(f"nonlin_{block_nr}", Expression(self.conv_nonlin))
 96 |             model.add_module(f"drop_{block_nr}", nn.Dropout(p=self.drop_prob))
 97 | 
 98 |         if self.structure == "deep":
 99 |             add_conv_pool_block(model, n_filters_conv, self.n_filters_2,
100 |                                 self.filter_length_2, 2)
101 |             
102 |             n_filters_op = self.n_filters_2 * (self.input_time_length - 23) # semi-hardcoded at the moment
103 | 
104 |         model.add_module('reshape', Expression(reshape_tensor))
105 |         
106 |         model.add_module('fc_1', nn.Linear(n_filters_op, self.fc1_out_features, bias=True))
107 |         
108 | 
109 |         # Initialization is xavier for initial layers
110 |         init.xavier_uniform_(model.conv_time.weight, gain=1)
111 |         # maybe no bias in case of no split layer and batch norm
112 |         if self.split_first_layer or (not self.batch_norm):
113 |             init.constant_(model.conv_time.bias, 0)
114 |         if self.split_first_layer:
115 |             init.xavier_uniform_(model.conv_spat.weight, gain=1)
116 |             if not self.batch_norm:
117 |                 init.constant_(model.conv_spat.bias, 0)
118 |         if self.batch_norm:
119 |             init.constant_(model.bnorm.weight, 1)
120 |             init.constant_(model.bnorm.bias, 0)
121 | 
122 |         param_dict = dict(list(model.named_parameters()))
123 |         if self.structure == "deep":
124 |             conv_weight = param_dict['conv_2.weight']
125 |             init.kaiming_normal_(conv_weight)  # He initialization
126 |             if not self.batch_norm:
127 |                 conv_bias = param_dict['conv_2.bias']
128 |                 init.constant_(conv_bias, 0)
129 |             else:
130 |                 bnorm_weight = param_dict['bnorm_2.weight']
131 |                 bnorm_bias = param_dict['bnorm_2.bias']
132 |                 init.constant_(bnorm_weight, 1)
133 |                 init.constant_(bnorm_bias, 0)
134 | 
135 |         fc_weight = param_dict['fc_1.weight']
136 |         init.kaiming_uniform_(fc_weight)
137 |         # model.eval()
138 | 
139 |         return model
140 | 
141 | 
142 | class BiModalNet(nn.Module):
143 | 
144 |     def __init__(self, n_classes, in_chans_1, input_time_1, SubNet_1_params, in_chans_2, input_time_2,
145 |                  SubNet_2_params, linear_dims, drop_prob, nonlin, fc1_out_features, fc2_out_features,
146 |                  gru_hidden_size, gru_n_layers=1):
147 |         """
148 |         BiModal CNN network receiving 2 different data types corresponding to a single ground truth (e.g. EEG and fNIRS)
149 |         Two SubNets are initialised and the forward pass of both is performed before their outputs are fed into the 
150 |         remainder of the network to be fused and applied to GRU and linear layers before log softmax classification.
151 | 
152 |         Parameters
153 |         :param: n_classes (int) number of classes in classification task
154 |         :param: in_chans_1 (int) number of channels in data
155 |         :param: input_time_1 (int) number of time samples in data
156 |         :param: SubNet_1_params (dict) parameters for initiating subnet 1
157 |         :param: in_chans_2 (int) number of channels in data
158 |         :param: input_time_2 (int) number of time samples in data
159 |         :param: SubNet_2_params (dict) parameters for initiating subnet 2
160 |         :param: linear_dims (int) dimension of linear layer
161 |         :param: drop_prob (float) dropout probability
162 |         :param: nonlin (th.nn.functional) activation function
163 |         :param: fc1_out_features (int) output dimension of subnet 1 linear layer
164 |         :param: fc2_out_features (int) output dimension of subnet 2 linear layer
165 |         :param: gru_hidden_size (int) size of GRU hidden layer
166 |         :param: gru_n_layers (int) number of GRU hidden layers
167 |         """
168 |         self.n_classes = n_classes
169 |         self.in_chans_1 = in_chans_1
170 |         self.input_time_1 = input_time_1
171 |         for key in SubNet_1_params:
172 |             setattr(self, f"SN1_{key}", SubNet_1_params[key])
173 |         self.in_chans_2 = in_chans_2
174 |         self.input_time_2 = input_time_2
175 |         for key in SubNet_2_params:
176 |             setattr(self, f"SN2_{key}", SubNet_2_params[key])
177 | 
178 |         self.linear_dims = linear_dims
179 |         self.drop_prob = drop_prob
180 |         self.fc1_out_features = fc1_out_features
181 |         self.fc2_out_features = fc2_out_features
182 |         self.fused_dimension = fc1_out_features + fc2_out_features
183 |         self.gru_hidden_size = gru_hidden_size
184 |         self.gru_n_layers = gru_n_layers
185 | 
186 |         super(BiModalNet, self).__init__()
187 |         model = nn.Sequential()
188 | 
189 |         self.subnet_1 = SubNet(in_chans=self.in_chans_1, n_classes=self.n_classes, input_time_length=self.input_time_1,
190 |                                 n_filters_time=self.SN1_n_filters_time, filter_time_length=self.SN1_filter_time_length,
191 |                                 n_filters_spat=self.SN1_n_filters_spat, n_filters_2=self.SN1_n_filters_2, filter_length_2=self.SN1_filter_length_2,
192 |                                 pool_time_length=self.SN1_pool_time_length, pool_time_stride=self.SN1_pool_time_stride, final_conv_length='auto',
193 |                                 conv_nonlin=self.SN1_conv_nonlin, pool_mode=self.SN1_pool_mode, pool_nonlin=self.SN1_pool_nonlin,
194 |                                 split_first_layer=self.SN1_split_first_layer, batch_norm=self.SN1_batch_norm, batch_norm_alpha=self.SN1_batch_norm_alpha,
195 |                                 drop_prob=self.SN1_drop_prob, structure=self.SN1_structure, fc1_out_features=self.fc1_out_features).create_network() 
196 | 
197 | 
198 |         self.subnet_2 = SubNet(in_chans=self.in_chans_2, n_classes=self.n_classes, input_time_length=self.input_time_2,
199 |                                 n_filters_time=self.SN2_n_filters_time, filter_time_length=self.SN2_filter_time_length,
200 |                                 n_filters_spat=self.SN2_n_filters_spat, n_filters_2=self.SN2_n_filters_2, filter_length_2=self.SN2_filter_length_2,
201 |                                 pool_time_length=self.SN2_pool_time_length, pool_time_stride=self.SN2_pool_time_stride, final_conv_length='auto',
202 |                                 conv_nonlin=self.SN2_conv_nonlin, pool_mode=self.SN2_pool_mode, pool_nonlin=self.SN2_pool_nonlin,
203 |                                 split_first_layer=self.SN2_split_first_layer, batch_norm=self.SN2_batch_norm, batch_norm_alpha=self.SN2_batch_norm_alpha,
204 |                                 drop_prob=self.SN2_drop_prob, structure=self.SN2_structure, fc2_out_features=self.fc2_out_features).create_network()
205 | 
206 |         self.reshape_tensor = reshape_4_lstm # works for GRU also
207 |         
208 |         self.gru = nn.GRU(input_size=self.fused_dimension, hidden_size=self.gru_hidden_size,
209 |                           num_layers=self.gru_n_layers, batch_first=True)
210 |  
211 |         self.nonlin  = nonlin
212 |         self.fused_dp = nn.Dropout(p=self.drop_prob)
213 | 
214 |         self.fused_linear = nn.Linear(self.gru_hidden_size, self.n_classes, bias=True)
215 |         self.softmax = nn.LogSoftmax(dim=1)
216 | 
217 |         self.size = Expression(tensor_size) # useful for debugging tensor/kernel dimension mismatches
218 |         
219 | 
220 |     def forward(self, data_1, data_2):
221 |         """
222 |         Forward pass of the Bimodal CNN
223 |         
224 |         :param data_1: tensor
225 |         :param data_2: tensor
226 |         """
227 |         data_1_h = self.subnet_1(data_1)
228 |         data_2_h = self.subnet_2(data_2)
229 | 
230 |         fusion_tensor = th.cat((data_1_h, data_2_h), dim=1)
231 |         
232 |         fusion_tensor_gru = self.reshape_tensor(fusion_tensor)
233 |         gru_inp = fusion_tensor.view(fusion_tensor_gru.size(0), 1, self.fused_dimension)
234 | 
235 |         gru_op, _ = self.gru(gru_inp)
236 | 
237 |         gru_op = self.nonlin(gru_op)
238 |         gru_op_dp = self.fused_dp(gru_op)
239 | 
240 |         fused_linear = self.fused_linear(gru_op_dp.view(gru_op_dp.size(0), gru_op_dp.size(2)))
241 |         fused_linear = self.nonlin(fused_linear)
242 | 
243 |         softmax = self.softmax(fused_linear)
244 | 
245 |         return softmax
246 | 
247 | 
248 | 
249 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/models/bimodal_cnn_pooling.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch as th
  3 | from torch import nn
  4 | from torch.nn import init
  5 | from braindecode.models.base import BaseModel
  6 | from braindecode.torch_ext.modules import Expression
  7 | from braindecode.torch_ext.functions import safe_log, square
  8 | from BiModNeuroCNN.models.network_utils import reshape_tensor, reshape_4_lstm, _transpose_time_to_spat, tensor_size
  9 | 
 10 | class SubNet(BaseModel):
 11 |     """
 12 |     Temporal-Spatial first layer based on [2]
 13 | 
 14 |     References
 15 |     ----------
 16 | 
 17 |     .. [2] Schirrmeister, R. T., Springenberg, J. T., Fiederer, L. D. J.,
 18 |        Glasstetter, M., Eggensperger, K., Tangermann, M., Hutter, F. & Ball, T. (2017).
 19 |        Deep learning with convolutional neural networks for EEG decoding and
 20 |        visualization.
 21 |        Human Brain Mapping , Aug. 2017. Online: http://dx.doi.org/10.1002/hbm.23730
 22 |     """
 23 | 
 24 |     def __init__(
 25 |             self,
 26 |             in_chans,
 27 |             n_classes,
 28 |             input_time_length=None,
 29 |             n_filters_time=40,
 30 |             filter_time_length=25,
 31 |             n_filters_spat=40,
 32 |             n_filters_2=10,
 33 |             filter_length_2=10,
 34 |             pool_time_length_1=5,
 35 |             pool_time_stride_1=2,
 36 |             pool_length_2=5,
 37 |             pool_stride_2=2,
 38 |             final_conv_length=30,
 39 |             conv_nonlin=square,
 40 |             pool_mode="max",
 41 |             pool_nonlin=safe_log,
 42 |             later_nonlin=None,
 43 |             later_pool_nonlin=nn.functional.leaky_relu,
 44 |             split_first_layer=True,
 45 |             batch_norm=True,
 46 |             batch_norm_alpha=0.1,
 47 |             drop_prob=0.1,
 48 |             stride_before_pool=False,
 49 |             structure = "shallow",
 50 |             fc1_out_features = 500,
 51 |             fc2_out_features = 500,
 52 |     ):
 53 |         if final_conv_length == "auto":
 54 |             assert input_time_length is not None
 55 |         self.__dict__.update(locals())
 56 |         del self.self
 57 | 
 58 |     def create_network(self):
 59 |         if self.stride_before_pool:
 60 |             conv_stride = self.pool_time_stride
 61 |         else:
 62 |             conv_stride = 1
 63 | 
 64 |         pool_class_dict = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)
 65 |         first_pool_class = pool_class_dict[self.pool_mode]
 66 |         pooling_reduction = self.pool_time_length_1 * self.pool_time_stride_1 * 10
 67 | 
 68 |         model = nn.Sequential()
 69 | 
 70 |         if self.split_first_layer:
 71 |             model.add_module("dimshuffle", Expression(_transpose_time_to_spat))
 72 |             model.add_module("conv_time", nn.Conv2d(1, self.n_filters_time, (self.filter_time_length, 1),
 73 |                                                     stride=1, ), )
 74 |             model.add_module("conv_spat", nn.Conv2d(self.n_filters_time, self.n_filters_spat,
 75 |                                                     (1, self.in_chans), stride=1, bias=not self.batch_norm, ),)
 76 |             n_filters_conv = self.n_filters_spat
 77 |             n_filters_op = self.n_filters_spat * (self.input_time_length - (4 + pooling_reduction)) # semi-hardcoded at the moment
 78 |         else:
 79 |             model.add_module("conv_time", nn.Conv2d(self.in_chans, self.n_filters_time,
 80 |                                                     (self.filter_time_length, 1), stride=1,
 81 |                                                     bias=not self.batch_norm, ), )
 82 |             n_filters_conv = self.n_filters_time
 83 |             
 84 |             n_filters_op = self.n_filters_time * (self.input_time_length - (4 + pooling_reduction)) # semi-hardcoded at the moment
 85 | 
 86 |         if self.batch_norm:
 87 |             model.add_module("bnorm", nn.BatchNorm2d(n_filters_conv, momentum=self.batch_norm_alpha,
 88 |                                                      affine=True), )
 89 |         model.add_module("conv_nonlin", Expression(self.conv_nonlin))
 90 |         model.add_module("drop", nn.Dropout(p=self.drop_prob))
 91 | 
 92 |         model.add_module("pool", first_pool_class(kernel_size=(self.pool_time_length_1, 1), stride=(self.pool_time_stride_1, 1)),)
 93 |         model.add_module("pool_nonlin", Expression(self.pool_nonlin))
 94 | 
 95 | 
 96 |         def add_conv_pool_block(model, n_filters_before,
 97 |                                 n_filters, filter_length, block_nr):
 98 | 
 99 |             model.add_module(f"conv_{block_nr}", nn.Conv2d(n_filters_before, n_filters,
100 |                                                            (filter_length, 1), stride=(conv_stride, 1),
101 |                                                            bias=not self.batch_norm))
102 |             
103 |             if self.batch_norm:
104 |                 model.add_module(f"bnorm_{block_nr}", nn.BatchNorm2d(n_filters,
105 |                                                                      momentum=self.batch_norm_alpha,
106 |                                                                      affine=True, eps=1e-5))
107 |             model.add_module(f"nonlin_{block_nr}", Expression(self.conv_nonlin))
108 |             model.add_module(f"drop_{block_nr}", nn.Dropout(p=self.drop_prob))
109 | 
110 |             model.add_module("pool", first_pool_class(kernel_size=(self.pool_length_2, 1),
111 |                             stride=(self.pool_stride_2, 1)),)
112 |             model.add_module("pool_nonlin", Expression(self.pool_nonlin))
113 | 
114 |         if self.structure == "deep":
115 | 
116 |             add_conv_pool_block(model, n_filters_conv, self.n_filters_2,
117 |                                 self.filter_length_2, 2)
118 |             model.add_module("tensor shape", Expression(tensor_size))
119 |             pooling_reduction = pooling_reduction + 22
120 |             print(pooling_reduction)
121 |             n_filters_op = self.n_filters_2 * 45 #(self.input_time_length - (23 + pooling_reduction)) # semi-hardcoded at the moment
122 | 
123 |         model.add_module('reshape', Expression(reshape_tensor))
124 |         
125 |         model.add_module('fc_1', nn.Linear(n_filters_op, self.fc1_out_features, bias=True))
126 |         
127 | 
128 |         # Initialization is xavier for initial layers
129 |         init.xavier_uniform_(model.conv_time.weight, gain=1)
130 |         # maybe no bias in case of no split layer and batch norm
131 |         if self.split_first_layer or (not self.batch_norm):
132 |             init.constant_(model.conv_time.bias, 0)
133 |         if self.split_first_layer:
134 |             init.xavier_uniform_(model.conv_spat.weight, gain=1)
135 |             if not self.batch_norm:
136 |                 init.constant_(model.conv_spat.bias, 0)
137 |         if self.batch_norm:
138 |             init.constant_(model.bnorm.weight, 1)
139 |             init.constant_(model.bnorm.bias, 0)
140 | 
141 |         param_dict = dict(list(model.named_parameters()))
142 |         if self.structure == "deep":
143 |             conv_weight = param_dict['conv_2.weight']
144 |             init.kaiming_normal_(conv_weight)  # He initialization
145 |             if not self.batch_norm:
146 |                 conv_bias = param_dict['conv_2.bias']
147 |                 init.constant_(conv_bias, 0)
148 |             else:
149 |                 bnorm_weight = param_dict['bnorm_2.weight']
150 |                 bnorm_bias = param_dict['bnorm_2.bias']
151 |                 init.constant_(bnorm_weight, 1)
152 |                 init.constant_(bnorm_bias, 0)
153 | 
154 |         fc_weight = param_dict['fc_1.weight']
155 |         init.kaiming_uniform_(fc_weight)
156 |         # model.eval()
157 | 
158 |         return model
159 | 
160 | 
161 | class BiModalNet(nn.Module):
162 | 
163 |     def __init__(self, n_classes, in_chans_1, input_time_1, SubNet_1_params, in_chans_2, input_time_2,
164 |                  SubNet_2_params, linear_dims, drop_prob, nonlin, fc1_out_features, fc2_out_features,
165 |                  gru_hidden_size, gru_n_layers=1):
166 |         """
167 |         BiModal CNN network receiving 2 different data types corresponding to a single ground truth (e.g. EEG and fNIRS)
168 |         Two SubNets are initialised and the forward pass of both is performed before their outputs are fed into the 
169 |         remainder of the network to be fused and applied to GRU and linear layers before log softmax classification.
170 | 
171 |         Parameters
172 |         :param: n_classes (int) number of classes in classification task
173 |         :param: in_chans_1 (int) number of channels in data
174 |         :param: input_time_1 (int) number of time samples in data
175 |         :param: SubNet_1_params (dict) parameters for initiating subnet 1
176 |         :param: in_chans_2 (int) number of channels in data
177 |         :param: input_time_2 (int) number of time samples in data
178 |         :param: SubNet_2_params (dict) parameters for initiating subnet 2
179 |         :param: linear_dims (int) dimension of linear layer
180 |         :param: drop_prob (float) dropout probability
181 |         :param: nonlin (th.nn.functional) activation function
182 |         :param: fc1_out_features (int) output dimension of subnet 1 linear layer
183 |         :param: fc2_out_features (int) output dimension of subnet 2 linear layer
184 |         :param: gru_hidden_size (int) size of GRU hidden layer
185 |         :param: gru_n_layers (int) number of GRU hidden layers
186 |         """
187 |         self.n_classes = n_classes
188 |         self.in_chans_1 = in_chans_1
189 |         self.input_time_1 = input_time_1
190 |         for key in SubNet_1_params:
191 |             setattr(self, f"SN1_{key}", SubNet_1_params[key])
192 |         self.in_chans_2 = in_chans_2
193 |         self.input_time_2 = input_time_2
194 |         for key in SubNet_2_params:
195 |             setattr(self, f"SN2_{key}", SubNet_2_params[key])
196 | 
197 |         self.linear_dims = linear_dims
198 |         self.drop_prob = drop_prob
199 |         self.fc1_out_features = fc1_out_features
200 |         self.fc2_out_features = fc2_out_features
201 |         self.fused_dimension = fc1_out_features + fc2_out_features
202 |         self.gru_hidden_size = gru_hidden_size
203 |         self.gru_n_layers = gru_n_layers
204 | 
205 |         super(BiModalNet, self).__init__()
206 |         model = nn.Sequential()
207 | 
208 |         self.subnet_1 = SubNet(in_chans=self.in_chans_1, n_classes=self.n_classes, input_time_length=self.input_time_1,
209 |                                 n_filters_time=self.SN1_n_filters_time, filter_time_length=self.SN1_filter_time_length,
210 |                                 n_filters_spat=self.SN1_n_filters_spat, n_filters_2=self.SN1_n_filters_2, filter_length_2=self.SN1_filter_length_2,
211 |                                 pool_time_length_1=self.SN1_pool_time_length_1, pool_time_stride_1=self.SN1_pool_time_stride_1, pool_length_2=self.SN1_pool_length_2,
212 |                                 pool_stride_2=self.SN1_pool_stride_2, final_conv_length='auto',
213 |                                 conv_nonlin=self.SN1_conv_nonlin, pool_mode=self.SN1_pool_mode, pool_nonlin=self.SN1_pool_nonlin,
214 |                                 split_first_layer=self.SN1_split_first_layer, batch_norm=self.SN1_batch_norm, batch_norm_alpha=self.SN1_batch_norm_alpha,
215 |                                 drop_prob=self.SN1_drop_prob, structure=self.SN1_structure, fc1_out_features=self.fc1_out_features).create_network() 
216 | 
217 |  
218 |         self.subnet_2 = SubNet(in_chans=self.in_chans_2, n_classes=self.n_classes, input_time_length=self.input_time_2,
219 |                                 n_filters_time=self.SN2_n_filters_time, filter_time_length=self.SN2_filter_time_length,
220 |                                 n_filters_spat=self.SN2_n_filters_spat, n_filters_2=self.SN2_n_filters_2, filter_length_2=self.SN2_filter_length_2,
221 |                                 pool_time_length_1=self.SN2_pool_time_length_1, pool_time_stride_1=self.SN2_pool_time_stride_1, pool_length_2=self.SN2_pool_length_2,
222 |                                 pool_stride_2=self.SN2_pool_stride_2, final_conv_length='auto',
223 |                                 conv_nonlin=self.SN2_conv_nonlin, pool_mode=self.SN2_pool_mode, pool_nonlin=self.SN2_pool_nonlin,
224 |                                 split_first_layer=self.SN2_split_first_layer, batch_norm=self.SN2_batch_norm, batch_norm_alpha=self.SN2_batch_norm_alpha,
225 |                                 drop_prob=self.SN2_drop_prob, structure=self.SN2_structure, fc2_out_features=self.fc2_out_features).create_network()
226 | 
227 |         self.reshape_tensor = reshape_4_lstm # works for GRU also
228 |         
229 |         self.gru = nn.GRU(input_size=self.fused_dimension, hidden_size=self.gru_hidden_size,
230 |                           num_layers=self.gru_n_layers, batch_first=True)
231 |  
232 |         self.nonlin  = nonlin
233 |         self.fused_dp = nn.Dropout(p=self.drop_prob)
234 | 
235 |         self.fused_linear = nn.Linear(self.gru_hidden_size, self.n_classes, bias=True)
236 |         self.softmax = nn.LogSoftmax(dim=1)
237 | 
238 |         self.size = Expression(tensor_size) # useful for debugging tensor/kernel dimension mismatches
239 |         
240 | 
241 |     def forward(self, data_1, data_2):
242 |         """
243 |         Forward pass of the Bimodal CNN
244 |         
245 |         :param data_1: tensor
246 |         :param data_2: tensor
247 |         """
248 |         data_1_h = self.subnet_1(data_1)
249 |         data_2_h = self.subnet_2(data_2)
250 | 
251 |         fusion_tensor = th.cat((data_1_h, data_2_h), dim=1)
252 |         
253 |         fusion_tensor_gru = self.reshape_tensor(fusion_tensor)
254 |         gru_inp = fusion_tensor.view(fusion_tensor_gru.size(0), 1, self.fused_dimension)
255 | 
256 |         gru_op, _ = self.gru(gru_inp)
257 | 
258 |         gru_op = self.nonlin(gru_op)
259 |         gru_op_dp = self.fused_dp(gru_op)
260 | 
261 |         fused_linear = self.fused_linear(gru_op_dp.view(gru_op_dp.size(0), gru_op_dp.size(2)))
262 |         fused_linear = self.nonlin(fused_linear)
263 | 
264 |         softmax = self.softmax(fused_linear)
265 | 
266 |         return softmax
267 | 
268 | 
269 | 
270 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/training/bimodal_classification.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Description: Class for training CNNs using a nested cross-validation method. Train on the inner_fold to obtain
  3 | optimized hyperparameters. Train outer_fold to obtain classification performance.
  4 | """
  5 | from braindecode.datautil.iterators import BalancedBatchSizeIterator
  6 | from braindecode.experiments.stopcriteria import MaxEpochs, NoDecrease, Or
  7 | from braindecode.torch_ext.util import set_random_seeds, np_to_var, var_to_np
  8 | from braindecode.datautil.signal_target import SignalAndTarget
  9 | from braindecode.torch_ext.functions import square, safe_log
 10 | import torch as th
 11 | from sklearn.model_selection import train_test_split
 12 | from BiModNeuroCNN.training.training_utils import current_acc, current_loss
 13 | from BiModNeuroCNN.data_loader.data_utils import smote_augmentation, multi_SignalAndTarget
 14 | from BiModNeuroCNN.results.results import Results as res
 15 | from torch.nn.functional import nll_loss, cross_entropy
 16 | from BiModNeuroCNN.training.bimodal_training import Experiment
 17 | import numpy as np
 18 | import itertools as it
 19 | import torch
 20 | from torch import optim
 21 | import logging
 22 | from ast import literal_eval
 23 | from BiModNeuroCNN.results.metrics import cross_entropy
 24 | import warnings
 25 | warnings.filterwarnings("ignore", category=UserWarning)
 26 | log = logging.getLogger(__name__)
 27 | torch.backends.cudnn.deterministic = True
 28 | 
 29 | class Classification:
 30 | 
 31 |     def __init__(self, model, subnet1_params, subnet2_params, hyp_params, parameters, data_params, model_save_path, tag):
 32 |         self.model = model
 33 |         self.subnet1_params = subnet1_params
 34 |         self.subnet2_params = subnet2_params
 35 |         self.model_save_path = model_save_path
 36 |         self.tag = tag
 37 |         self.best_loss = parameters["best_loss"]
 38 |         self.batch_size = parameters["batch_size"]
 39 |         self.monitors = parameters["monitors"]
 40 |         self.cuda = parameters["cuda"]
 41 |         self.model_constraint = parameters["model_constraint"]
 42 |         self.max_increase_epochs = parameters['max_increase_epochs']
 43 |         self.lr_scheduler = parameters['learning_rate_scheduler']
 44 |         self.lr_step = parameters['lr_step']
 45 |         self.lr_gamma = parameters['lr_gamma']
 46 |         self.n_classes = data_params["n_classes"]
 47 |         self.n_chans_d1 = data_params["n_chans_d1"]
 48 |         self.input_time_length_d1= data_params["input_time_length_d1"]
 49 |         self.n_chans_d2 = data_params["n_chans_d2"]
 50 |         self.input_time_length_d2 = data_params["input_time_length_d2"]
 51 |         self.hyp_params = hyp_params
 52 |         self.activation = "elu"
 53 |         self.learning_rate = 0.001
 54 |         self.dropout = 0.1
 55 |         self.epochs = parameters['epochs']
 56 |         self.window = None
 57 |         self.structure = 'deep'
 58 |         self.n_filts = 10 #n_filts in n-1 filters
 59 |         self.first_pool = False
 60 |         self.loss = nll_loss
 61 |         for key in hyp_params:
 62 |             setattr(self, key, hyp_params[key])
 63 |         self.iterator = BalancedBatchSizeIterator(batch_size=self.batch_size)
 64 |         self.best_params = None
 65 |         self.model_number = 1
 66 |         self.y_pred = np.array([])
 67 |         self.y_true = np.array([])
 68 |         self.probabilities = np.array([])
 69 | 
 70 |     def call_model(self):
 71 | 
 72 |         self.subnet1_params['structure'] = self.structure
 73 |         self.subnet2_params['structure'] = self.structure
 74 | 
 75 |         if self.model.__name__ == 'BiModalNet':
 76 |             model = self.model(n_classes=self.n_classes, in_chans_1=self.n_chans_d1, input_time_1=self.input_time_length_d1,
 77 |                                SubNet_1_params=self.subnet1_params, in_chans_2=self.n_chans_d2,
 78 |                                input_time_2=self.input_time_length_d2, SubNet_2_params=self.subnet2_params,
 79 |                                linear_dims=100, drop_prob=.2, nonlin=torch.nn.functional.leaky_relu,
 80 |                                fc1_out_features=500, fc2_out_features=500, gru_hidden_size=250, gru_n_layers=1)
 81 |             th.nn.init.kaiming_uniform_(model.fused_linear.weight)
 82 |             th.nn.init.constant_(model.fused_linear.bias, 0)
 83 | 
 84 |         elif self.model.__name__ == 'BiModalNet_w_Pool':
 85 |             model = self.model(n_classes=self.n_classes, in_chans_1=self.n_chans_d1, input_time_1=self.input_time_length_d1,
 86 |                                SubNet_1_params=self.subnet1_params, in_chans_2=self.n_chans_d2,
 87 |                                input_time_2=self.input_time_length_d2, SubNet_2_params=self.subnet2_params,
 88 |                                linear_dims=100, drop_prob=.2, nonlin=torch.nn.functional.leaky_relu,
 89 |                                fc1_out_features=500, fc2_out_features=500, gru_hidden_size=250, gru_n_layers=1)
 90 |             th.nn.init.kaiming_uniform_(model.fused_linear.weight)
 91 |             th.nn.init.constant_(model.fused_linear.bias, 0)
 92 |         return model
 93 |         
 94 |     def train_model(self, train_set_1, val_set_1, test_set_1, train_set_2, val_set_2, test_set_2, save_model):
 95 |         """
 96 |         :param train_set_1: (np.array) n_trials*n_channels*n_samples
 97 |         :param val_set_1: (np.array) n_trials*n_channels*n_samples
 98 |         :param test_set_1: (np.array) n_trials*n_channels*n_samples - can be None when training on inner-fold
 99 |         :param train_set_2: (np.array) n_trials*n_channels*n_samples
100 |         :param val_set_2: (np.array) n_trials*n_channels*n_samples
101 |         :param test_set_2:  (np.array) n_trials*n_channels*n_samples - can be None when training on inner-fold
102 |         :param save_model: (Bool) True if trained model is to be saved
103 |         :return: Accuracy and loss scores for the model trained with a given set of hyper-parameters
104 |         """
105 |         model = self.call_model()
106 |         predictions = None
107 | 
108 |         set_random_seeds(seed=20190629, cuda=self.cuda)
109 | 
110 |         if self.cuda:
111 |             model.cuda()
112 |             torch.backends.cudnn.deterministic = True
113 |             model = torch.nn.DataParallel(model)
114 |             log.info(f"Cuda in use")
115 | 
116 |         log.info("%s model: ".format(str(model)))
117 |         optimizer = optim.Adam(model.parameters(), lr=self.learning_rate, weight_decay=0.01, eps=1e-8, amsgrad=False)
118 | 
119 |         stop_criterion = Or([MaxEpochs(self.epochs),
120 |                              NoDecrease('valid_loss', self.max_increase_epochs)])
121 |         model_loss_function = None
122 | 
123 |         #####Setup to run the selected model#####
124 |         model_test = Experiment(model, train_set_1, val_set_1, train_set_2, val_set_2, test_set_1=test_set_1, test_set_2=test_set_2,
125 |                                 iterator=self.iterator, loss_function=self.loss, optimizer=optimizer,
126 |                                 lr_scheduler=self.lr_scheduler(optimizer, step_size=self.lr_step, gamma=self.lr_gamma),
127 |                                 model_constraint=self.model_constraint, monitors=self.monitors, stop_criterion=stop_criterion,
128 |                                 remember_best_column='valid_misclass', run_after_early_stop=True, model_loss_function=model_loss_function,
129 |                                 cuda=self.cuda, save_file=self.model_save_path, tag=self.tag, save_model=save_model)
130 |         model_test.run()
131 | 
132 |         model_acc = model_test.epochs_df['valid_misclass'].astype('float')
133 |         model_loss = model_test.epochs_df['valid_loss'].astype('float')
134 |         current_val_acc = 1 - current_acc(model_acc)
135 |         current_val_loss = current_loss(model_loss)
136 | 
137 |         test_accuracy = None
138 |         if train_set_1 is not None and test_set_2 is not None:
139 |             val_metric_index = self.get_model_index(model_test.epochs_df)
140 |             test_accuracy = round((1 - model_test.epochs_df['test_misclass'].iloc[val_metric_index]) * 100, 3)
141 |             predictions = model_test.model_predictions
142 |         probabilities = model_test.model_probabilities
143 | 
144 |         return current_val_acc, current_val_loss, test_accuracy, model_test, predictions, probabilities
145 | 
146 |     
147 |     def train_inner(self, train_set_1, val_set_1, train_set_2, val_set_2, test_set_1=None, test_set_2=None, augment=False, save_model=False):
148 |         """
149 |         :param train_set_1: (np.array) n_trials*n_channels*n_samples
150 |         :param val_set_1: (np.array) n_trials*n_channels*n_samples
151 |         :param test_set_1: (np.array) n_trials*n_channels*n_samples - can be None when performing HP optimization
152 |         :param train_set_2: (np.array) n_trials*n_channels*n_samples
153 |         :param val_set_2: (np.array) n_trials*n_channels*n_samples
154 |         :param test_set_2:  (np.array) n_trials*n_channels*n_samples - can be None when performing HP optimization
155 |         :param augment: (Bool) True if data augmentation to be applied - currently only configured for SMOTE augmentation
156 |         :param save_model: (Bool) True if trained model is to be saved
157 |         :return: Accuracy, loss and cross entropy scores for the model trained with a given set of hyper-parameters
158 |         """
159 |         val_acc, val_loss, val_cross_entropy = [], [], []
160 |         
161 |         if augment:
162 |             # Only augment training data - never test or validation sets
163 |             train_set_1_os, train_labels_1_os = smote_augmentation(train_set_1.X, train_set_1.y, 2)
164 |             train_set_2_os, train_labels_2_os = smote_augmentation(train_set_1.X, train_set_1.y, 2)
165 |             train_set_1, train_set_2 = multi_SignalAndTarget((train_set_1_os, train_labels_1_os), (train_set_2_os, train_labels_2_os))
166 | 
167 |         names = list(self.hyp_params.keys())
168 |         hyp_param_combs = it.product(*(self.hyp_params[Name] for Name in names))
169 |         for hyp_combination in hyp_param_combs:
170 | 
171 |             assert len(hyp_combination) == len(self.hyp_params), f"HP combination must be of equal length to original set."
172 | 
173 |             for i in range(len(self.hyp_params)):
174 |                 setattr(self, list(self.hyp_params.keys())[i], hyp_combination[i])
175 | 
176 | 
177 |             if 'window' in self.hyp_params.keys():
178 |                 # when using classification window as a hyperparameter - currently data would have to be of same number of samples
179 |                 train_set_1_w = SignalAndTarget(train_set_1.X[:, :, self.window[0]:self.window[1]], train_set_1.y)
180 |                 val_set_1_w = SignalAndTarget(val_set_1.X[:, :, self.window[0]:self.window[1]], val_set_1.y)
181 |                 train_set_2_w = SignalAndTarget(train_set_2.X[:, :, self.window[0]:self.window[1]], train_set_2.y)
182 |                 val_set_2_w = SignalAndTarget(val_set_2.X[:, :, self.window[0]:self.window[1]], val_set_2.y)
183 |                 current_val_acc, current_val_loss, _, _, _, probabilities = self.train_model(train_set_1_w, val_set_1_w, test_set_1, train_set_2_w,
184 |                                                                                              val_set_2_w, test_set_2, save_model)
185 |             else:
186 | 
187 |                 current_val_acc, current_val_loss, _, _, _, probabilities = self.train_model(train_set_1, val_set_1, test_set_1, train_set_2,
188 |                                                                                          val_set_2, test_set_2, save_model)
189 |             val_acc.append(current_val_acc)
190 |             val_loss.append(current_val_loss)
191 | 
192 |             probabilities = np.array(probabilities).reshape((val_set_1.y.shape[0],4))
193 | 
194 |             val_cross_entropy.append(cross_entropy(val_set_1.y, probabilities)) #1 CE value per-HP, repeat for n_folds
195 | 
196 |         return val_acc, val_loss, val_cross_entropy
197 | 
198 | 
199 |     def train_outer(self, trainsetlist, testsetlist, augment=False, save_model=True, epochs_save_path=None, print_details=False):
200 |         """
201 |         :param trainsetlist: (list) data as split by k-folds n_folds*(n_trials*n_channels*n_samples)
202 |         :param testsetlist: (list) data as split by k-folds n_folds*(n_trials*n_channels*n_samples)
203 |         :param augment: (Bool) True if data augmentation to be applied - currently only configured for SMOTE augmentation
204 |         :param save_model: (Bool) True if trained model is to be saved
205 |         """
206 |         scores, all_preds, probabilities_list, outer_cross_entropy, fold_models = [],[],[],[],[]
207 |         
208 |         fold_number = 1
209 |         for train_set, test_set in zip(trainsetlist, testsetlist):
210 | 
211 |             train_set_1, train_set_2 = train_set[0], train_set[1]
212 |             test_set_1, test_set_2   = test_set[0], test_set[1]
213 | 
214 |             train_set_1_X, val_set_1_X, train_set_1_y, val_set_1_y = train_test_split(train_set_1.X, train_set_1.y, test_size=0.2,
215 |                                                                                       shuffle=True, random_state=42, stratify= train_set_1.y)
216 |             train_set_2_X, val_set_2_X, train_set_2_y, val_set_2_y = train_test_split(train_set_2.X, train_set_2.y, test_size=0.2,
217 |                                                                                       shuffle=True, random_state=42, stratify= train_set_2.y)
218 | 
219 |             train_set_1, val_set_1, train_set_2, val_set_2 = multi_SignalAndTarget((train_set_1_X, train_set_1_y), (val_set_1_X, val_set_1_y),
220 |                                                                                    (train_set_2_X, train_set_2_y), (val_set_2_X, val_set_2_y))
221 | 
222 |             if augment:
223 |                 # Only augment training data - never test or validation sets
224 |                 train_set_1_os, train_labels_1_os = smote_augmentation(train_set_1.X, train_set_1.y, 2)
225 |                 train_set_2_os, train_labels_2_os = smote_augmentation(train_set_2.X, train_set_2.y, 2)
226 |                 train_set_1 = SignalAndTarget(train_set_1_os, train_labels_1_os)
227 |                 train_set_2 = SignalAndTarget(train_set_2_os, train_labels_2_os)
228 |             print(train_set_1.X.shape)
229 | 
230 |             if 'window' in self.hyp_params.keys():
231 |                 # when using classification window as a hyperparameter - currently data would have to be of same number of samples
232 |                 if type(self.window) == str:
233 |                     self.window = literal_eval(self.window)  # extract tuple of indices
234 |                 train_set_1_w = SignalAndTarget(train_set_1.X[:,:,self.window[0]:self.window[1]], train_set_1.y)
235 |                 val_set_1_w = SignalAndTarget(val_set_1.X[:,:,self.window[0]:self.window[1]], val_set_1.y)
236 |                 test_set_1_w = SignalAndTarget(test_set_1.X[:,:,self.window[0]:self.window[1]], test_set_1.y)
237 |                 train_set_2_w = SignalAndTarget(train_set_2.X[:,:,self.window[0]:self.window[1]], train_set_2.y)
238 |                 val_set_2_w = SignalAndTarget(val_set_2.X[:,:,self.window[0]:self.window[1]], val_set_2.y)
239 |                 test_set_2_w = SignalAndTarget(test_set_2.X[:, :, self.window[0]:self.window[1]], test_set_2.y)
240 | 
241 |                 _, _, test_accuracy, optimised_model, predictions, probabilities = self.train_model(train_set_1_w, val_set_1_w, test_set_1_w,
242 |                                                                                                     train_set_2_w, val_set_2_w, test_set_2_w, save_model)
243 | 
244 |                 if print_details:
245 |                     print(f"Data 1 train set: {train_set_1.y.shape} | Data 1 val_set: {val_set_1.y.shape} | Data 1 test_set: {test_set_1.y.shape}")
246 |                     print(f"Data 2 train set: {train_set_2.y.shape} | Data 2 val_set: {val_set_2.y.shape} | Data 2 test_set: {test_set_2.y.shape}")
247 |             else:
248 |                 _, _, test_accuracy, optimised_model, predictions, probabilities = self.train_model(train_set_1, val_set_1, test_set_1,
249 |                                                                                                     train_set_2, val_set_2, test_set_2, save_model)
250 |             if epochs_save_path != None:
251 |                 try:
252 |                     optimised_model.epochs_df.to_excel(f"{epochs_save_path}/epochs{fold_number}.xlsx")
253 |                 except FileNotFoundError:
254 |                     optimised_model.epochs_df.to_excel(f"{epochs_save_path}/epochs{fold_number}.xlsx", engine='xlsxwriter')
255 |             
256 |             fold_models.append(optimised_model)
257 |             
258 |             probs_array = []
259 |             for lst in probabilities:
260 |                 for trial in lst:
261 |                     probs_array.append(trial) # all probabilities for this test-set
262 |             probabilities_list.append(probs_array) #outer probabilities to be used for cross-entropy
263 | 
264 | 
265 |             print(f"/"*20)
266 |             scores.append(test_accuracy)
267 |             self.concat_y_pred(predictions)
268 |             self.concat_y_true(test_set_1.y)
269 |             
270 |             fold_number += 1
271 |         for y_true, y_probs in zip(testsetlist, probabilities_list):
272 |             outer_cross_entropy.append(cross_entropy(y_true[0].y, y_probs))
273 | 
274 |         return scores, fold_models, self.y_pred, probabilities_list, outer_cross_entropy, self.y_true
275 | 
276 |     def set_best_params(self):
277 |         """
278 |         Set optimal hyperparameter values selected from optimization - Best parameter values can be
279 |         accessed with BiModNeuroCNN.results.Results.get_best_params() and the list assigned to self.best_params.
280 |         """
281 |         assert type(self.best_params) is list, "list of selected parameters required"
282 |         for i in range(len(self.hyp_params)):
283 |             setattr(self, list(self.hyp_params.keys())[i], self.best_params[i])
284 | 
285 |     @staticmethod
286 |     def get_model_index(df):
287 |         """
288 |         Returns the row index of a pandas dataframe used for storing epoch-by-epoch results.
289 |         :param df: pandas.DataFrame
290 |         :return: int index of the selected epoch based on validation metric
291 |         """
292 |         valid_metric_index = df['valid_misclass'].idxmin()
293 |         best_val_acc = df.index[df['valid_misclass'] == df['valid_misclass'].iloc[valid_metric_index]]
294 |         previous_best = 1.0
295 |         i = 0
296 |         for n, index in enumerate(best_val_acc):
297 |             value = df['test_misclass'][index]
298 |             if value < previous_best:
299 |                 previous_best = value
300 |                 i = n
301 |         return best_val_acc[i]
302 | 
303 |     def concat_y_pred(self, y_pred_fold):
304 |         """
305 |         Method for combining all outer-fold ground-truth values.
306 |         :param y_pred_fold: array of single-fold true values.
307 |         :return: all outer fold true values in single arrau
308 |         """
309 |         self.y_pred = np.concatenate((self.y_pred, np.array(y_pred_fold)))
310 | 
311 |     def concat_y_true(self, y_true_fold):
312 |         """
313 |         Method for combining all outer-fold ground-truth values.
314 |         :param y_true_fold: array of single-fold true values.
315 |         :return: all outer fold true values in single arrau
316 |         """
317 |         self.y_true = np.concatenate((self.y_true, np.array(y_true_fold)))
318 | 
319 |     def concat_probabilities(self, probabilities_fold):
320 |         """
321 |         Method for combining all outer-fold ground-truth values.
322 |         :param y_pred_fold: array of single-fold true values.
323 |         :return: all outer fold true values in single arrau
324 |         """
325 |         self.probabilities = np.concatenate((self.probabilities, probabilities_fold))
326 | 
327 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/training/bimodal_training.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Description: Script adapted from: https://github.com/robintibor/braindecode/tree/master/braindecode/experiments
  3 | Modifications primarily to enable bimodal training to implement model saving. Includes probabilites for use with
  4 | cross entropy metric.
  5 | """
  6 | import logging
  7 | from collections import OrderedDict
  8 | from copy import deepcopy
  9 | import time
 10 | import os
 11 | import numpy as np
 12 | import pandas as pd
 13 | import torch as th
 14 | 
 15 | from braindecode.datautil.splitters import concatenate_sets
 16 | from braindecode.experiments.loggers import Printer
 17 | from braindecode.experiments.stopcriteria import MaxEpochs, ColumnBelow, Or
 18 | from braindecode.torch_ext.util import np_to_var
 19 | from braindecode.experiments.monitors import compute_pred_labels_from_trial_preds
 20 | 
 21 | from BiModNeuroCNN.training.training_utils import combine_batches
 22 | 
 23 | 
 24 | 
 25 | log = logging.getLogger(__name__)
 26 | th.backends.cudnn.deterministic = True
 27 | 
 28 | class RememberBest(object):
 29 |     """
 30 |     Class to remember and restore 
 31 |     the parameters of the model and the parameters of the
 32 |     optimizer at the epoch with the best performance.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     column_name: str
 37 |         The lowest value in this column should indicate the epoch with the
 38 |         best performance (e.g. misclass might make sense).
 39 |         
 40 |     Attributes
 41 |     ----------
 42 |     best_epoch: int Index of best epoch
 43 |     """
 44 |     def __init__(self, column_name, predictions, probabilities):
 45 |         self.column_name = column_name
 46 |         self.best_epoch = 0
 47 |         self.lowest_val = float('inf')
 48 |         self.model_state_dict = None
 49 |         self.optimizer_state_dict = None
 50 |         self.lowest_test = float('inf')
 51 |         self.lowest_val_misclass = float('inf')
 52 |         self.model_predictions = None
 53 |         self.model_probabilities = None
 54 | 
 55 | 
 56 |     def remember_epoch(self, epochs_df, model, optimizer, save_path, tag, class_acc, save_model, predictions, probabilities):
 57 |         """
 58 |         Remember this epoch: Remember parameter values in case this epoch
 59 |         has the best performance.
 60 |         
 61 |         Parameters
 62 |         ----------
 63 |         :param epochs_df: (pandas.Dataframe) Dataframe containing the column `column_name` with which performance is evaluated.
 64 |         :param model: (torch.nn.Module)
 65 |         :param optimizer: (torch.optim.Optimizer)
 66 |         :param subject_id: (str) identifier
 67 |         :param tag: (str) label to give the saved CNN e.g. "BmCNN"
 68 |         :param directory: (str) directory for saving models
 69 |         :param save_model: boolean True or False
 70 |         :param probabilities: softmax probabilities to be used for cross entropy metric
 71 |         :param predictions:  classifier prediction values for epoch
 72 |         """
 73 |         self.class_acc = class_acc
 74 |         self.optimizer = optimizer
 75 |         i_epoch = len(epochs_df) - 1
 76 |         current_val = float(epochs_df[self.column_name].iloc[-1]) #validation misclass
 77 |         if "test_misclass" in list(epochs_df.columns.values):
 78 |             current_test_misclass = float(epochs_df['test_misclass'].iloc[-1]) #test misclass
 79 |         else:
 80 |             current_test_misclass = 0
 81 | 
 82 |         #####Storing of the models enabled depending on current of loss and validation accuracy#####
 83 |         if (current_val < self.lowest_val) or (
 84 |                 current_val == self.lowest_val and current_test_misclass <= self.lowest_test):
 85 |             
 86 |             self.lowest_test = current_test_misclass
 87 |             self.class_acc.append(current_test_misclass)
 88 | 
 89 |             self.best_epoch = i_epoch
 90 |             self.lowest_val = current_val
 91 |             self.model_predictions = predictions
 92 |             self.model_probabilities = probabilities
 93 |             self.model_state_dict = deepcopy(model.state_dict())
 94 |             self.optimizer_state_dict = deepcopy(optimizer.state_dict())
 95 |             log.info("New best {:s}: {:5f}".format(self.column_name,
 96 |                                                    current_val))
 97 |             log.info("")
 98 | 
 99 |             if save_model:
100 |                 log.info("Saving current best model for validation accuracy...")
101 |                 log.info("")
102 | 
103 |                 if not os.path.exists(save_path):
104 |                     os.makedirs(save_path)
105 |                 try:
106 |                     th.save(model.state_dict(), f"{save_path}/{tag}_{self.best_epoch}.pt")
107 |                 except PermissionError:
108 |                     # redundancy for storing of models
109 |                     log.info("Permission denied for this path!")
110 |                     th.save(model.state_dict(), f"{save_path}/{tag}_{self.best_epoch}_a.pt")
111 |                 finally:
112 |                     log.info("model not saved! Continuing with training")
113 |                 self.model_predictions = predictions
114 |                 self.model_probabilities = probabilities
115 |                 
116 |         return self.model_predictions, self.model_probabilities
117 | 
118 |     def reset_to_best_model(self, epochs_df, model, optimizer):
119 |         """
120 |         Reset parameters to parameters at best epoch and remove rows 
121 |         after best epoch from epochs dataframe.
122 |         
123 |         Modifies parameters of model and optimizer, changes epochs_df in-place.
124 |         
125 |         Parameters
126 |         ----------
127 |         epochs_df: `pandas.Dataframe`
128 |         model: `torch.nn.Module`
129 |         optimizer: `torch.optim.Optimizer`
130 | 
131 |         """
132 |         # Remove epochs past the best one from epochs dataframe
133 |         epochs_df.drop(range(self.best_epoch+1, len(epochs_df)), inplace=True)
134 |         model.load_state_dict(self.model_state_dict)
135 |         optimizer.load_state_dict(self.optimizer_state_dict)
136 | 
137 | 
138 | class Experiment(object):
139 |     """
140 |     Class that performs one experiment on training, validation and test set.
141 | 
142 |     It trains as follows:
143 |     
144 |     1. Train on training set until a given stop criterion is fulfilled
145 |     2. Reset to the best epoch, i.e. reset parameters of the model and the 
146 |        optimizer to the state at the best epoch ("best" according to a given
147 |        criterion)
148 |     3. Continue training on the combined training + validation set until the
149 |        loss on the validation set is as low as it was on the best epoch for the
150 |        training set. (or until the ConvNet was trained twice as many epochs as
151 |        the best epoch to prevent infinite training)
152 | 
153 |     Parameters
154 |     ----------
155 |     Parameters
156 |     ----------
157 |     :param epochs_df: (pandas.Dataframe) Dataframe containing the column `column_name` with which performance is evaluated.
158 |     :model: (torch.nn.Module)
159 |     :train_set_1: (braindecode.SignalAndTarget)
160 |     :valid_set_1: (braindecode.SignalAndTarget)
161 |     :train_set_2: (braindecode.SignalAndTarget)
162 |     :valid_set_2: (braindecode.SignalAndTarget)
163 |     :test_set_1: (braindecode.SignalAndTarget)
164 |     :test_set_2: (braindecode.SignalAndTarget)
165 |     :iterator: (iterator object)
166 |     :loss_function: function 
167 |         Function mapping predictions and targets to a loss: 
168 |         (predictions: `torch.autograd.Variable`, 
169 |         targets:`torch.autograd.Variable`)
170 |         -> loss: `torch.autograd.Variable`
171 |     :optimizer: (torch.optim.Optimizer)
172 |     :model_constraint: object
173 |         Object with apply function that takes model and constraints its 
174 |         parameters. `None` for no constraint.
175 |     :monitors: list of objects
176 |         List of objects with monitor_epoch and monitor_set method, should
177 |         monitor the traning progress.
178 |     :stop_criterion: object
179 |         Object with `should_stop` method, that takes in monitoring dataframe
180 |         and returns if training should stop:
181 |     :remember_best_column: str
182 |         Name of column to use for storing parameters of best model. Lowest value
183 |         should indicate best performance in this column.
184 |     :run_after_early_stop: bool
185 |         Whether to continue running after early stop
186 |     :model_loss_function: function, optional
187 |         Function (model -> loss) to add a model loss like L2 regularization.
188 |         Note that this loss is not accounted for in monitoring at the moment.
189 |     :save_file: (str) path to save model
190 |     :tag: (str) name to attach to saved model
191 |     :save_model: (bool) whetjer to save model or not
192 |     :batch_modifier: object, optional
193 |         Object with modify method, that can change the batch, e.g. for data
194 |         augmentation
195 |     :cuda: bool, optional
196 |         Whether to use cuda.
197 |     :pin_memory: bool, optional
198 |         Whether to pin memory of inputs and targets of batch.
199 |     :do_early_stop: bool
200 |         Whether to do an early stop at all. If true, reset to best model
201 |         even in case experiment does not run after early stop.
202 |     :reset_after_second_run: bool
203 |         If true, reset to best model when second run did not find a valid loss
204 |         below or equal to the best train loss of first run.
205 |     :log_0_epoch: bool
206 |         Whether to compute monitor values and log them before the
207 |         start of training.
208 |     :loggers: list of :class:`.Logger`
209 |         How to show computed metrics.
210 |         
211 |     Attributes
212 |     ----------
213 |     epochs_df: `pandas.DataFrame`
214 |         Monitoring values for all epochs.
215 |     """
216 |     def __init__(self, model, train_set_1, valid_set_1, train_set_2, valid_set_2, test_set_1, test_set_2, iterator,
217 |                  loss_function, optimizer, lr_scheduler, model_constraint, monitors, stop_criterion, remember_best_column, run_after_early_stop,
218 |                  model_loss_function, save_file, tag, save_model, batch_modifier=None, cuda=True, pin_memory=False,
219 |                  do_early_stop=True, reset_after_second_run=False, log_0_epoch=True, loggers=('print',)):
220 | 
221 |         if run_after_early_stop or reset_after_second_run:
222 |             assert do_early_stop == True, ("Can only run after early stop or "
223 |             "reset after second run if doing an early stop")
224 |         if do_early_stop:
225 |             assert valid_set_1 is not None and valid_set_2 is not None 
226 |             assert remember_best_column is not None
227 |         self.model = model
228 |         self.datasets = OrderedDict((('train_1', train_set_1), ('train_2', train_set_2),
229 |                                      ('valid_1', valid_set_1), ('valid_2', valid_set_2),
230 |                                      ('test_1', test_set_1), ('test_2', test_set_2))) 
231 | 
232 |         if valid_set_1 is None or valid_set_2 is None:
233 |             self.datasets.pop('valid_1')
234 |             self.datasets.pop('valid_2')
235 |             assert run_after_early_stop == False
236 |             assert do_early_stop == False
237 |         if test_set_1 is None or test_set_2 is None:
238 |             self.datasets.pop('test_1')
239 |             self.datasets.pop('test_2')
240 | 
241 |         self.iterator = iterator
242 |         self.loss_function = loss_function
243 |         self.optimizer = optimizer
244 |         self.model_constraint = model_constraint
245 |         self.monitors = monitors
246 |         self.stop_criterion = stop_criterion
247 |         self.remember_best_column = remember_best_column
248 |         self.run_after_early_stop = run_after_early_stop
249 |         self.model_loss_function = model_loss_function
250 |         self.batch_modifier = batch_modifier
251 |         self.cuda = cuda
252 |         self.epochs_df = pd.DataFrame()
253 |         self.before_stop_df = None
254 |         self.rememberer = None
255 |         self.pin_memory = pin_memory
256 |         self.do_early_stop = do_early_stop
257 |         self.reset_after_second_run = reset_after_second_run
258 |         self.log_0_epoch = log_0_epoch
259 |         self.loggers = loggers
260 |         self.save_file = save_file
261 |         self.tag = tag
262 |         self.class_acc = [] 
263 |         self.save_model = save_model
264 |         self.predictions = None
265 |         self.probabilites = None
266 |         self.lr_scheduler = lr_scheduler
267 | 
268 | 
269 |     def run(self):
270 |         """
271 |         Run complete training.
272 |         """
273 |         self.setup_training()
274 |         log.info("Run until first stop...")
275 |         self.run_until_first_stop()
276 |         if self.do_early_stop:
277 |             # always setup for second stop, in order to get best model
278 |             # even if not running after early stop...
279 |             log.info("Setup for second stop...")
280 |             self.setup_after_stop_training()
281 |         if self.run_after_early_stop:
282 |             log.info("Run until second stop...")
283 |             loss_to_reach = float(self.epochs_df['train_loss'].iloc[-1])
284 |             self.run_until_second_stop()
285 |             if self.reset_after_second_run:
286 |                 # if no valid loss was found below the best train loss on 1st
287 |                 # run, reset model to the epoch with lowest valid_misclass
288 |                 if float(self.epochs_df['valid_loss'].iloc[-1]) > loss_to_reach:
289 |                     log.info("Resetting to best epoch {:d}".format(
290 |                         self.rememberer.best_epoch))
291 |                     self.rememberer.reset_to_best_model(self.epochs_df,
292 |                                                         self.model,
293 |                                                         self.optimizer)
294 | 
295 |     def setup_training(self):
296 |         """
297 |         Setup training, i.e. transform model to cuda,
298 |         initialize monitoring.
299 |         """
300 |         # reset remember best extension in case you rerun some experiment
301 |         if self.do_early_stop:
302 |             self.rememberer = RememberBest(self.remember_best_column, self.predictions, self.probabilites)
303 |         if self.loggers == ('print',):
304 |             self.loggers = [Printer()]
305 |         self.epochs_df = pd.DataFrame()
306 |         if self.cuda:
307 |             assert th.cuda.is_available(), "Cuda not available"
308 |             self.model.cuda()
309 | 
310 |     def run_until_first_stop(self):
311 |         """
312 |         Run training and evaluation using only training set for training
313 |         until stop criterion is fulfilled.
314 |         """
315 | 
316 |         self.run_until_stop(self.datasets, remember_best=self.do_early_stop)
317 | 
318 |     def run_until_second_stop(self):
319 |         """
320 |         Run training and evaluation using combined training + validation sets 
321 |         for training on both datasets. 
322 |         
323 |         Runs until loss on validation set decreases below loss on training set 
324 |         of best epoch or  until as many epochs trained after as before 
325 |         first stop.
326 |         """
327 |         datasets = self.datasets
328 |         datasets['train_1'] = concatenate_sets([datasets['train_1'],
329 |                                                 datasets['valid_1']]) 
330 |         datasets['train_2'] = concatenate_sets([datasets['train_2'],
331 |                                                 datasets['valid_2']]) 
332 | 
333 |         self.run_until_stop(datasets, remember_best=True)
334 | 
335 |     def run_until_stop(self, datasets, remember_best):
336 |         """
337 |         Run training and evaluation on given datasets until stop criterion is
338 |         fulfilled. Return predictions and probabilites associated with best epochs.
339 |         
340 |         Parameters
341 |         ----------
342 |         datasets: OrderedDict
343 |             Dictionary with train, valid and test as str mapping to
344 |             :class:`.SignalAndTarget` objects.
345 |         remember_best: bool
346 |             Whether to remember parameters at best epoch.
347 |         """
348 |         if self.log_0_epoch:
349 |             self.monitor_epoch(datasets)
350 |             self.log_epoch()
351 |             if remember_best:
352 |                 self.model_predictions, self.model_probabilities = self.rememberer.remember_epoch(self.epochs_df, self.model, self.optimizer,
353 |                                                                                                   self.save_file, self.tag, self.class_acc,
354 |                                                                                                   self.save_model, self.predictions, self.probabilites) 
355 |         self.iterator.reset_rng()
356 |         while not self.stop_criterion.should_stop(self.epochs_df):
357 |             self.run_one_epoch(datasets, remember_best)
358 | 
359 |     def run_one_epoch(self, datasets, remember_best):
360 |         """
361 |         Run training and evaluation on given datasets for one epoch. Batches for 
362 |         two data types are combined.
363 |         
364 |         Parameters
365 |         ----------
366 |         datasets: OrderedDict
367 |             Dictionary with train, valid and test as str mapping to
368 |             :class:`.SignalAndTarget` objects.
369 |         remember_best: bool
370 |             Whether to remember parameters if this epoch is best epoch.
371 |         """
372 |         batch_generator_1 = self.iterator.get_batches(datasets['train_1'],
373 |                                                     shuffle=True) 
374 |         batch_generator_2 = self.iterator.get_batches(datasets['train_2'],
375 |                                                     shuffle=True) 
376 |         combined_batches = combine_batches(batch_generator_1, batch_generator_2)
377 |         start_train_epoch_time = time.time()
378 |         for inputs_1, targets_1, inputs_2, targets_2 in combined_batches:
379 |             if self.batch_modifier is not None:
380 |                 inputs_1, targets_1 = self.batch_modifier.process(inputs_1, targets_1) 
381 |                 inputs_2, targets_2 = self.batch_modifier.process(inputs_2, targets_2) 
382 | 
383 |             if len(inputs_1) > 0 and len(inputs_2) > 0:
384 | 
385 |                 self.train_batch(inputs_1, targets_1, inputs_2, targets_2)
386 |                 if self.lr_scheduler != None:
387 |                     self.lr_scheduler.step()
388 | 
389 |         end_train_epoch_time = time.time()
390 | 
391 |         log.info("Time only for training updates: {:.2f}s".format(
392 |             end_train_epoch_time - start_train_epoch_time))
393 | 
394 |         self.monitor_epoch(datasets)
395 |         self.log_epoch()
396 |         if remember_best:
397 |             self. model_predictions, self.model_probabilities = self.rememberer.remember_epoch(self.epochs_df, self.model, self.optimizer,
398 |                                                                                                self.save_file, self.tag, self.class_acc,
399 |                                                                                                self.save_model, self.predictions, self.probabilites) 
400 | 
401 |     def train_batch(self, inputs_1, targets_1, inputs_2, targets_2):
402 |         """
403 |         Train on given inputs and targets.
404 |         
405 |         Parameters
406 |         ----------
407 |         :inputs_1: (torch.autograd.Variable)
408 |         :targets_1: (torch.autograd.Variable)
409 |         :inputs_2: (torch.autograd.Variable)
410 |         :targets_2: (torch.autograd.Variable)
411 |         """
412 |         
413 |         self.model.train()
414 |         input_vars_1 = np_to_var(inputs_1, pin_memory=self.pin_memory)
415 |         target_vars_1 = np_to_var(targets_1, pin_memory=self.pin_memory)
416 |         input_vars_2 = np_to_var(inputs_2, pin_memory=self.pin_memory) 
417 |         target_vars_2 = np_to_var(targets_2, pin_memory=self.pin_memory) 
418 | 
419 |         if self.cuda:
420 |             input_vars_1 = input_vars_1.cuda()
421 |             target_vars_1 = target_vars_1.cuda()
422 |             input_vars_2 = input_vars_2.cuda() 
423 |             target_vars_2 = target_vars_2.cuda() 
424 |         self.optimizer.zero_grad()
425 |         th.autograd.set_detect_anomaly(True)
426 | 
427 |         outputs = self.model(input_vars_1, input_vars_2) 
428 |         loss = self.loss_function(outputs, target_vars_1) 
429 |         if self.model_loss_function is not None:
430 |             loss = loss + self.model_loss_function(self.model)
431 |         
432 |         loss.backward()
433 |        
434 |         self.optimizer.step()
435 |         if self.model_constraint is not None:
436 |             self.model_constraint.apply(self.model)
437 | 
438 | 
439 |     def eval_on_batch(self, inputs_1, targets_1, inputs_2, targets_2):
440 |         """
441 |         Evaluate given inputs and targets.
442 |         
443 |         Parameters
444 |         ----------
445 |         :inputs_1: (torch.autograd.Variable)
446 |         :targets_1: (torch.autograd.Variable)
447 |         :inputs_2: (torch.autograd.Variable)
448 |         :targets_2: (torch.autograd.Variable)
449 | 
450 |         Returns
451 |         -------
452 |         predictions: `torch.autograd.Variable`
453 |         loss: `torch.autograd.Variable`
454 | 
455 |         """
456 |         self.model.eval()
457 |         with th.no_grad():
458 | 
459 |             input_vars_1 = np_to_var(inputs_1, pin_memory=self.pin_memory)
460 |             target_vars_1 = np_to_var(targets_1, pin_memory=self.pin_memory) # only 1 target array required
461 |             input_vars_2 = np_to_var(inputs_2, pin_memory=self.pin_memory) 
462 | 
463 |             if self.cuda:
464 |                 input_vars_1 = input_vars_1.cuda()
465 |                 target_vars_1 = target_vars_1.cuda()
466 |                 input_vars_2 = input_vars_2.cuda()
467 | 
468 |             outputs = self.model(input_vars_1, input_vars_2)
469 | 
470 |             probabilities = th.exp(outputs.cpu()).numpy() # calculated probabilities
471 | 
472 |             loss = self.loss_function(outputs, target_vars_1)
473 |             if hasattr(outputs, 'cpu'):
474 |                 outputs = outputs.cpu().data.numpy()
475 |             else:
476 | 
477 |                 outputs = [o.cpu().data.numpy() for o in outputs]
478 |                 
479 |             loss = loss.cpu().data.numpy()
480 | 
481 |         return outputs, loss, probabilities
482 | 
483 |     def monitor_epoch(self, datasets):
484 |         """
485 |         Evaluate one epoch for given datasets.
486 |         
487 |         Stores results in `epochs_df`
488 |         
489 |         Parameters
490 |         ----------
491 |         datasets: OrderedDict
492 |             Dictionary with train, valid and test as str mapping to
493 |             :class:`.SignalAndTarget` objects.
494 | 
495 |         """
496 |         result_dicts_per_monitor = OrderedDict()
497 |         for m in self.monitors:
498 |             result_dicts_per_monitor[m] = OrderedDict()
499 |         for m in self.monitors:
500 |             result_dict = m.monitor_epoch()
501 |             if result_dict is not None:
502 |                 result_dicts_per_monitor[m].update(result_dict)
503 | 
504 |         set_1, set_2, set_list = [], [], []
505 |         for i, j in self.datasets.items():
506 |             set_list.append(i)
507 |         for i in range(0, len(set_list), 2):
508 |             set_1.append(set_list[i])
509 |             set_2.append(set_list[i + 1])
510 | 
511 | 
512 |         for name_1, name_2 in zip(set_1, set_2):
513 |             setname = name_1.split('_')[0]
514 | 
515 |             batch_gen_1 = self.iterator.get_batches(datasets[name_1], shuffle=False) 
516 |             batch_gen_2 = self.iterator.get_batches(datasets[name_2], shuffle=False)  
517 |             combined_batches = combine_batches(batch_gen_1, batch_gen_2)  
518 | 
519 |             all_preds = []
520 |             all_losses = []
521 |             all_probs = []
522 |             all_batch_sizes = []
523 |             all_targets = []
524 | 
525 |             for inputs_1, targets_1, inputs_2, targets_2 in combined_batches:
526 |                 preds, loss, probabilities = self.eval_on_batch(inputs_1, targets_1,
527 |                                                                 inputs_2, targets_2)  
528 |                 all_preds.append(preds)
529 |                 all_losses.append(loss)
530 |                 all_probs.append(probabilities)
531 |                 all_batch_sizes.append(len(inputs_1))
532 |                 all_targets.append(targets_1)
533 | 
534 |             for m in self.monitors:
535 |                 result_dict = m.monitor_set(setname, all_preds, all_losses,
536 |                                             all_batch_sizes, all_targets,
537 |                                             combined_batches) 
538 |                 if result_dict is not None:
539 |                     result_dicts_per_monitor[m].update(result_dict)
540 |         row_dict = OrderedDict()
541 |         for m in self.monitors:
542 |             row_dict.update(result_dicts_per_monitor[m])
543 |         self.epochs_df = self.epochs_df.append(row_dict, ignore_index=True)
544 |         assert set(self.epochs_df.columns) == set(row_dict.keys()), f"Columns of dataframe: {str(set(self.epochs_df.columns))}\n and keys of dict {str(set(row_dict.keys()))} not same"
545 |         
546 |         self.epochs_df = self.epochs_df[list(row_dict.keys())]
547 |         self.predictions = compute_pred_labels_from_trial_preds(all_preds, None)
548 |         self.probabilites = all_probs
549 | 
550 | 
551 |     def log_epoch(self):
552 |         """
553 |         Print monitoring values for this epoch.
554 |         """
555 |         for logger in self.loggers:
556 |             logger.log_epoch(self.epochs_df)
557 | 
558 |     def setup_after_stop_training(self):
559 |         """
560 |         Setup training after first stop. 
561 | 
562 |         Resets parameters to best parameters and updates stop criterion.
563 |         """
564 |         # also remember old monitor chans, will be put back into
565 |         # monitor chans after experiment finished
566 |         self.before_stop_df = deepcopy(self.epochs_df)
567 |         self.rememberer.reset_to_best_model(self.epochs_df, self.model,
568 |                                             self.optimizer)
569 |         loss_to_reach = float(self.epochs_df['train_loss'].iloc[-1])
570 |         self.stop_criterion = Or(stop_criteria=[
571 |             MaxEpochs(max_epochs=self.rememberer.best_epoch * 2),
572 |             ColumnBelow(column_name='valid_loss', target_value=loss_to_reach)])
573 |         log.info(f"Train loss to reach {loss_to_reach}")
574 | 
575 | 
576 | 


--------------------------------------------------------------------------------
/BiModNeuroCNN/results/results.py:
--------------------------------------------------------------------------------
  1 | from BiModNeuroCNN.results.dataframe_utils import results_df, get_col_list, param_scores_df
  2 | from BiModNeuroCNN.utils import load_pickle
  3 | import numpy as np 
  4 | import pandas as pd
  5 | import pickle
  6 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, precision_recall_fscore_support, confusion_matrix, cohen_kappa_score
  7 | import matplotlib.pyplot as plt
  8 | import seaborn as sns
  9 | import os
 10 | import sys
 11 | import h5py
 12 | 
 13 | class Results():
 14 | 
 15 | 	direct = 'C:/Users/sb00745777/OneDrive - Ulster University/Study_3/Subject_Data'
 16 | 
 17 | 	def __init__(self, save_path, folds=5, tag='', name="A"):
 18 | 		self.save_path = save_path
 19 | 		self.n_folds = folds
 20 | 		self.y_true_list = []
 21 | 		self.y_true = np.array([])
 22 | 		self.y_pred_list = []
 23 | 		self.y_pred = np.array([])
 24 | 		self.y_probs = None
 25 | 		self.results_path = None
 26 | 		self.lossdf = None
 27 | 		self.accdf = None
 28 | 		self.cross_entropydf = None
 29 | 		self.subject_stats_df = None
 30 | 		self.best_params = None
 31 | 		self.hyp_param_means = []
 32 | 		self.outer_fold_accuracies = []  # list of scores - 1 per fold
 33 | 		self.outer_fold_cross_entropies = []
 34 | 		self.of_mean = None
 35 | 		self.of_std = None
 36 | 		self.accuracy = None
 37 | 		self.precision = None
 38 | 		self.f1_score = None
 39 | 		self.recall = None
 40 | 		self.kappa = None
 41 | 		self.precision_list = []
 42 | 		self.f1_score_list = []
 43 | 		self.recall_list = []
 44 | 		self.kappa_list = []
 45 | 		self.cm = None
 46 | 		self.train_loss = None
 47 | 		self.test_loss = None
 48 | 		self.valid_loss = None
 49 | 		self.train_acc = None
 50 | 		self.test_acc = None
 51 | 		self.valid_acc = None
 52 | 		self.tag = tag
 53 | 		self.id = name
 54 | 
 55 | 	def __repr__(self):
 56 | 		return f"<class 'BiModNeuroCNN.results.results.Results'>"
 57 | 
 58 | 	def __str__(self):
 59 | 		return f"Class for creating a Results object containing several metrics"
 60 | 
 61 | 	def __getattr__(self, name):
 62 | 		if name == "fold_accuracies":
 63 | 			for i, j in enumerate(self.outer_fold_accuracies):
 64 | 				print (f"Fold {i+1}: {j}%")
 65 | 			folds_mean = np.mean(self.outer_fold_accuracies)
 66 | 			return f"Mean: {folds_mean}%"
 67 | 		elif name == "classes":
 68 | 			unique, counts = np.unique(self.y_true, return_counts=True)
 69 | 			return [f"Class {u}:{c}" for u,c in zip(unique, counts)]
 70 | 		elif name == "predictions":
 71 | 			unique, counts = np.unique(self.y_pred, return_counts=True)
 72 | 			return [f"Class {u}:{c}" for u,c in zip(unique, counts)]
 73 | 		elif name in dir(self):
 74 | 			return name
 75 | 		else:
 76 | 			print(dir(self))
 77 | 			raise AttributeError(f"'{name}' does not exist in this context")
 78 | 
 79 | 	def __setattr__(self, name, value):
 80 | 		if name == "fold_accuracies":
 81 | 			self.outer_fold_accuracies = value
 82 | 		else:
 83 | 			super().__setattr__(name, value)
 84 | 
 85 | 	def change_directory(self, direct):
 86 | 		self.direct = direct
 87 | 
 88 | 	def set_results_path(self, results_path):
 89 | 		self.results_path = results_path
 90 | 
 91 | 	def concat_y_true(self, y_true_fold):
 92 | 		"""
 93 | 		Method for combining all outer-fold ground-truth values.
 94 | 		:param y_true_fold: array of single-fold true values.
 95 | 		:return: all outer fold true values in single arrau
 96 | 		"""
 97 | 		self.y_true = np.concatenate((self.y_true, np.array(y_true_fold)))
 98 | 
 99 | 	def concat_y_pred(self, y_pred_fold):
100 | 		"""
101 | 		Method for combining all outer-fold ground-truth values.
102 | 		:param y_pred_fold: array of single-fold true values.
103 | 		:return: all outer fold true values in single arrau
104 | 		"""
105 | 		self.y_pred = np.concatenate((self.y_pred, np.array(y_pred_fold)))
106 | 
107 | 	def append_y_true(self, y_true_fold):
108 | 		"""
109 | 		Method for combining all outer-fold ground-truth values.
110 | 		:param y_true_fold: array of single-fold true values.
111 | 		:return: list of outer fold true values. Each element contains one fold
112 | 		"""
113 | 		self.y_true_list.append((np.array(y_true_fold)))
114 | 
115 | 	def append_y_pred(self, y_pred_fold):
116 | 		"""
117 | 		Method for combining all outer-fold ground-truth values.
118 | 		:param y_pred_fold: array of single-fold true values.
119 | 		:return: list of outer fold true values. Each element contains one fold
120 | 		"""
121 | 		self.y_pred_list.append((np.array(y_pred_fold)))
122 | 
123 | 	def get_acc_loss_df(self, hyp_params, index_name, nested=True):
124 | 		"""
125 | 		Instantiates pd.DataFrames for storing accuracy or loss metrics for each fold
126 | 		and hyperparameter set.
127 | 		:param hyp_params (dict) keys: names of hyp_params, values: lists of HP values 
128 | 		:param index_name (str) index name for dataframe
129 | 		"""
130 | 		if nested:
131 | 			index = list(n+1 for n in range(self.n_folds*self.n_folds))
132 | 		else:
133 | 			index = list(n+1 for n in range(self.n_folds))
134 | 		index.append("Mean")
135 | 		index.append("Std.")
136 | 		columns_list = get_col_list(hyp_params)
137 | 
138 | 		names = list(hyp_params.keys())
139 | 
140 | 		self.lossdf = results_df(index,index_name,columns_list,names)
141 | 		self.accdf  = results_df(index,index_name,columns_list,names)
142 | 		self.cross_entropydf = results_df(index,index_name,columns_list,names)
143 | 
144 | 
145 | 	def fill_acc_loss_df(self, inner_fold_accs=None, inner_fold_loss=None, inner_fold_CE=None, save=True):
146 | 		"""
147 | 		Method for inserting all inner-fold accuracies and losses associated with each hyper-parameter
148 | 		combination in a dataframe. Mean and Std. computed. The dataframes can be used to select optimal
149 | 		hyper-parameters.
150 | 		:param inner_fold_accs: list containing all inner-fold accuracy scores
151 | 		:param inner_fold_loss: list containing all inner-fold loss values
152 | 		:param inner_fold_CE: list containing all inner-fold CE values
153 | 		:param save: Boolean
154 | 		:return: Dataframes in which each column represents a particular hyper-parameter set.
155 | 		"""
156 | 		if inner_fold_accs is not None:
157 | 			for n, acc in enumerate(inner_fold_accs):
158 | 				self.accdf.iloc[n] = acc
159 | 			self.accdf.loc["Mean"].iloc[0] = self.accdf.iloc[1:n].mean(axis=0).values
160 | 			self.accdf.loc["Std."].iloc[0] = self.accdf.iloc[1:n].std(axis=0).values
161 | 			if save:
162 | 				try:
163 | 					self.accdf.to_excel(f"{self.save_path}/HP_acc{self.tag}.xlsx")
164 | 				except:
165 | 					self.accdf.to_excel(f"{self.save_path}/HP_acc{self.tag}.xlsx", engine='xlsxwriter')
166 | 
167 | 		if inner_fold_loss is not None:
168 | 			for n, loss in enumerate(inner_fold_loss):
169 | 				self.lossdf.iloc[n] = loss
170 | 			self.lossdf.loc["Mean"].iloc[0] = self.lossdf.iloc[1:n].mean(axis=0).values
171 | 			self.lossdf.loc["Std."].iloc[0] = self.lossdf.iloc[1:n].std(axis=0).values
172 | 			if save:
173 | 				try:
174 | 					self.lossdf.to_excel(f"{self.save_path}/HP_loss{self.tag}.xlsx")
175 | 				except:
176 | 					self.lossdf.to_excel(f"{self.save_path}/HP_loss{self.tag}.xlsx", engine='xlsxwriter')
177 | 
178 | 		if inner_fold_CE is not None:
179 | 			for n, ce in enumerate(inner_fold_CE):
180 | 				self.cross_entropydf.iloc[n] = ce
181 | 			self.cross_entropydf.loc["Mean"].iloc[0] = self.cross_entropydf.iloc[1:n].mean(axis=0).values
182 | 			self.cross_entropydf.loc["Std."].iloc[0] = self.cross_entropydf.iloc[1:n].std(axis=0).values
183 | 			if save:
184 | 				try:
185 | 					self.cross_entropydf.to_excel(f"{self.save_path}/HP_CE{self.tag}.xlsx")
186 | 				except:
187 | 					self.cross_entropydf.to_excel(f"{self.save_path}/HP_CE{self.tag}.xlsx", engine='xlsxwriter')
188 | 
189 | 
190 | 	def get_best_params(self, selection_method, save_path=None):
191 | 		"""
192 | 		Method for returning best hyper-parameter combination from inner fold accuracy or loss.
193 | 		:param selection_method: str: "accuracy" Or "loss".
194 | 		:return: list of optimal hyper-parameters.
195 | 		"""
196 | 		if save_path == None: # can overwrite object save_path with argument if required
197 | 			save_path = self.save_path
198 | 
199 | 		if selection_method == "accuracy":
200 | 			self.best_params = list(self.accdf.columns[self.accdf.loc["Mean"].values.argmax()])
201 | 		else:
202 | 			self.best_params = list(self.lossdf.columns[self.lossdf.loc["Mean"].values.argmin()])
203 | 		best_params = pd.DataFrame(dict(best_params=self.best_params))
204 | 		try:
205 | 			best_params.to_excel(f"{save_path}/BestParameters{self.tag}.xlsx")
206 | 		except:
207 | 			best_params.to_excel(f"{save_path}/BestParameters{self.tag}.xlsx", engine='xlsxwriter') # occasional problems with writing
208 | 
209 | 	
210 | 	def get_hp_means(self, hyp_params, selection_method, save=False, save_path=None):
211 | 		"""
212 | 		Extracts mean accuracies for specific HP values (as opposed to HP sets)
213 | 
214 | 		:param hyp_params (dict) keys: names of hyp_params, values: lists of HP values 
215 | 		:param selection_method (str) 'accuracy' OR 'loss'
216 | 		"""
217 | 		if save_path == None: # can overwrite object save_path with argument if required
218 | 			save_path = self.save_path
219 | 
220 | 		columns_list = get_col_list(hyp_params)
221 | 		for HP in columns_list:
222 | 			
223 | 			for value in HP:
224 | 				if selection_method == 'accuracy':
225 | 					sub_df = self.accdf[[i for i in self.accdf.columns if i[0] == value or i[1] == value or i[2] == value or i[3] == value]] 
226 | 					self.hyp_param_means.append((value, sub_df.loc["Mean"].values.mean()))
227 | 				else:
228 | 					sub_df = self.lossdf[[i for i in self.lossdf.columns if i[0] == value or i[1] == value or i[2] == value or i[3] == value]] 
229 | 					self.hyp_param_means.append((value, sub_df.loc["Mean"].values.mean()))
230 | 		if save:
231 | 			hp_val_list, hp_mean_list = [], []
232 | 			for tup in self.hyp_param_means:
233 | 				hp_val_list.append(tup[0])
234 | 				hp_mean_list.append(tup[1])
235 | 			hp_means_df = pd.DataFrame(dict(HP_value=hp_val_list, HP_mean=hp_mean_list))
236 | 			try:
237 | 				hp_means_df.to_excel(f"{save_path}/HP_means{self.tag}.xlsx")
238 | 			except:
239 | 				hp_means_df.to_excel(f"{save_path}/HP_means{self.tag}.xlsx", engine='xlsxwriter') # occasional problems with writing
240 | 
241 | 
242 | 	def set_outer_fold_accuracies(self, outer_fold_accuracies):
243 | 		self.outer_fold_accuracies = outer_fold_accuracies
244 | 		self.of_mean = np.mean(outer_fold_accuracies)
245 | 		self.of_std = np.std(outer_fold_accuracies)
246 | 
247 | 	def get_accuracy(self):
248 | 		"""
249 | 		Method for calculating accuracy from all true and predicted values.
250 | 		:return: accuracy value (%) rounded to 3 decimal places.
251 | 		"""
252 | 		assert len(self.y_true) == len(self.y_pred), "data must be of equal length"
253 | 		self.accuracy = round((accuracy_score(self.y_true, self.y_pred) * 100), 3)
254 | 
255 | 	def get_precision(self):
256 | 		assert len(self.y_true) == len(self.y_pred), "data must be of equal length"
257 | 		self.precision = round((precision_score(self.y_true, self.y_pred, average="macro") * 100), 3)
258 | 
259 | 	def get_recall(self):
260 | 		assert len(self.y_true) == len(self.y_pred), "data must be of equal length"
261 | 		self.recall = round((recall_score(self.y_true, self.y_pred, average='macro') * 100), 3)
262 | 
263 | 	def get_f_score(self):
264 | 		assert len(self.y_true) == len(self.y_pred), "data must be of equal length"
265 | 		self.f1_score = round((f1_score(self.y_true, self.y_pred, average='macro') * 100), 3)
266 | 
267 | 	def get_kappa_value(self):
268 | 		assert len(self.y_true) == len(self.y_pred), "data must be of equal length"
269 | 		self.kappa = round(cohen_kappa_score(self.y_true, self.y_pred),3)
270 | 
271 | 	def precision_recall_f_score(self):
272 | 		assert len(self.y_true) == len(self.y_pred), "data must be of equal length"
273 | 		precision_recall_fscore_support(self.y_true, self.y_pred)
274 | 
275 | 	def confusion_matrix(self):
276 | 		assert len(self.y_true) == len(self.y_pred), "data must be of equal length"
277 | 		self.cm = confusion_matrix(self.y_true, self.y_pred)
278 | 
279 | 	def subject_stats(self):
280 | 		"""
281 | 		Method for constructing and saving a Pandas Dataframe with Accuracy and
282 | 		statistical scores as below:
283 | 			fold 1  fold 2    Mean   Std.  Precision  Recall  F1 Score
284 | 		01  18.065  16.779  17.422  0.643     16.447  16.447    16.447
285 | 		"""
286 | 
287 | 		folds = []
288 | 		for i in range(1, self.n_folds+1):
289 | 			folds.append(f'fold {i}')
290 | 
291 | 		if np.array(self.outer_fold_accuracies).ndim == 1:
292 | 			self.subject_stats_df = pd.DataFrame(index=[self.id], columns=folds)
293 | 			self.subject_stats_df.iloc[0] = self.outer_fold_accuracies
294 | 			self.get_accuracy()
295 | 			self.subject_stats_df['Subj Mean'] = self.subject_stats_df.mean(axis=1, skipna=True)
296 | 			self.subject_stats_df['Subj Std.'] = self.subject_stats_df.drop('Subj Mean',axis=1).std(axis=1, skipna=True)
297 | 			self.get_precision()
298 | 			self.get_recall()
299 | 			self.get_f_score()
300 | 			self.subject_stats_df['Precision'] = self.precision
301 | 			self.subject_stats_df['Recall'] = self.recall
302 | 			self.subject_stats_df['F1 Score'] = self.f1_score
303 | 			for n,ce in enumerate(self.outer_fold_cross_entropies):
304 | 				self.subject_stats_df[f"CE - fold {n+1}"] = ce
305 | 			self.subject_stats_df["CE mean"] = np.mean(self.outer_fold_cross_entropies)
306 | 			self.subject_stats_df["CE std."] = np.std(self.outer_fold_cross_entropies)
307 | 
308 | 			handle = f"{self.save_path}/statistics{self.tag}.xlsx"
309 | 			
310 | 		else:
311 | 			self.subject_stats_df = pd.DataFrame(index=[self.ids], columns=folds)
312 | 			for n,score in enumerate(self.outer_fold_accuracies):
313 | 				self.subject_stats_df.iloc[n] = score
314 | 			self.subject_stats_df['Subj Mean'] = self.subject_stats_df.mean(axis=1, skipna=True)
315 | 			self.subject_stats_df['Subj Std.'] = self.subject_stats_df.drop('Subj Mean',axis=1).std(axis=1, skipna=True)
316 | 			self.subject_stats_df['Precision'] = self.precision_list
317 | 			self.subject_stats_df['Recall'] = self.recall_list
318 | 			self.subject_stats_df['F1 Score'] = self.f1_score_list
319 | 
320 | 			# adding cross-entropy values for each fold
321 | 			for n,_ in enumerate(folds):
322 | 				self.subject_stats_df[f"CE - fold {n+1}"] = ""
323 | 			for n,ce_list in enumerate(self.outer_fold_cross_entropies):
324 | 				for m,ce in enumerate(ce_list):
325 | 					self.subject_stats_df[f"CE - fold {m+1}"].iloc[n] = ce
326 | 			self.subject_stats_df["CE mean"] = self.outer_fold_ce_means
327 | 			self.subject_stats_df["CE std."] = self.outer_fold_ce_std
328 | 
329 | 			self.subject_stats_df.loc["Mean"] = self.subject_stats_df.iloc[0:len(self.ids)].mean(axis=0).values
330 | 			self.subject_stats_df.loc["Std."] = self.subject_stats_df.iloc[0:len(self.ids)].std(axis=0).values
331 | 
332 | 			handle = f"{self.results_path}/combined_stats{self.tag}.xlsx"
333 | 		try:
334 | 			self.subject_stats_df.to_excel(handle)
335 | 		except:
336 | 			self.subject_stats_df.to_excel(handle, engine='xlsxwriter')
337 | 
338 | 
339 | 	def save_result(self):
340 | 		filename = f"{self.save_path}/results_object{self.tag}.pickle"
341 | 		filehandler = open(filename, 'wb')
342 | 		try:
343 | 			pickle.dump(self.__dict__, filehandler, protocol=pickle.HIGHEST_PROTOCOL)
344 | 		except ValueError:
345 | 			file = f"{self.save_path}/results_object_alt_{self.tag}"
346 | 			self.save_as_pickled_object(file)
347 | 	
348 | 	def update(self, newdata):
349 | 	    for key,value in newdata.items():
350 | 	    	setattr(self,key,value)
351 | 
352 | 	@classmethod
353 | 	def load_result(self, f_name):
354 | 		 with open(f_name, 'rb') as f:
355 | 		 	tmp_dict = pickle.load(f)
356 | 	 		f.close()
357 | 	 		self.update(self, tmp_dict)
358 |  			return self
359 | 
360 | 	def save_as_pickled_object(self, filepath):
361 | 		"""
362 |         This is a defensive way to write pickle.write, allowing for very large files on all platforms
363 |         """
364 | 		subject = dict(subject=self)
365 | 		max_bytes = 2 ** 31 - 1
366 | 		bytes_out = pickle.dumps(subject)
367 | 		n_bytes = sys.getsizeof(bytes_out)
368 | 		with open(filepath, 'wb') as f_out:
369 | 			for idx in range(0, n_bytes, max_bytes):
370 | 				f_out.write(bytes_out[idx:idx + max_bytes])
371 | 
372 | 	def try_to_load_as_pickled_object_or_None(filepath):
373 | 		"""
374 |         This is a defensive way to write pickle.load, allowing for very large files on all platforms
375 |         """
376 | 		max_bytes = 2 ** 31 - 1
377 | 		try:
378 | 			input_size = os.path.getsize(filepath)
379 | 			bytes_in = bytearray(0)
380 | 			with open(filepath, 'rb') as f_in:
381 | 				for _ in range(0, input_size, max_bytes):
382 | 					bytes_in += f_in.read(max_bytes)
383 | 			obj = pickle.loads(bytes_in)
384 | 		except:
385 | 			return None
386 | 		return obj
387 | 
388 | 
389 | class CombinedResults(Results):
390 | 	"""
391 | 	Written for combining the results of multiple subject/experiments.
392 | 	"""
393 | 
394 | 	def __init__(self, save_path, load_path, f_names, folds, ids, tag):
395 | 
396 | 		super().__init__(save_path, folds, tag)
397 | 
398 | 		self.load_path = load_path
399 | 		self.f_names = f_names
400 | 		self.ids = ids
401 | 		self.total_cross_val_df = None
402 | 		self.total_best_hps = [] #list of best HPs for each subject
403 | 		self.BestParams = None
404 | 		self.hp_results_df = None
405 | 		self.outer_fold_ce_means = []
406 | 		self.outer_fold_ce_std = []
407 | 		self.combined_train_loss = []
408 | 		self.combined_test_loss = []
409 | 		self.combined_valid_loss = []
410 | 		self.combined_train_acc = []
411 | 		self.combined_test_acc = []
412 | 		self.combined_valid_acc = []
413 | 		self.HP_acc = pd.DataFrame(columns=self.ids)
414 | 		self.HP_loss = pd.DataFrame(columns=self.ids)
415 | 		self.HP_ce = pd.DataFrame(columns=self.ids)
416 | 		self.total_number = 0
417 | 
418 | 	def __repr__(self):
419 | 		return f"<class 'BiModNeuroCNN.results.results.CombinedResults'>"
420 | 
421 | 	def __str__(self):
422 | 		return f"Class for combining the results from multiple subjects/experiments"
423 | 
424 | 	def __len__(self):
425 | 		return len(self.f_names)
426 | 
427 | 	def __getattr__(self, name):
428 | 		if name == "all_ids":
429 | 			return [(n+1, i) for n, i in enumerate(self.ids[:-2])]
430 | 
431 | 	def cross_val_results_df(self, accuracy=True, cross_entropy=False, save=True):
432 | 		"""
433 | 		Combine all results into single pd.DataFrame, calculate mean and stdev. and store
434 | 		in Excel format
435 | 		
436 | 		:param: accuracy (bool) True if accuracy scores to be considered
437 | 		:param: cross entropy (bool) True if cross entropy scores to be considered
438 | 		:param: save (bool) True if results are to be stored as xlsx
439 | 		"""
440 | 		folds = []
441 | 		for i in range(1, self.n_folds+1):
442 | 			folds.append(f'fold {i}')
443 | 
444 | 		if accuracy:
445 | 			assert len(self.outer_fold_accuracies) != [], "Scores must be loaded to CombinedResults.outer_fold_accuracies" 
446 | 			assert len(self.outer_fold_accuracies) == len(self.ids), "Number of subjects and results are not equal"
447 | 			assert len(self.outer_fold_accuracies[0]) == self.n_folds, "Number of scores and folds are not equal"
448 | 
449 | 			self.total_acc_df = pd.DataFrame(index=self.ids, columns=folds)
450 | 
451 | 			for n,score in enumerate(self.outer_fold_accuracies):
452 | 				self.total_acc_df.iloc[n] = score
453 | 
454 | 			self.total_acc_df['Mean'] = self.total_acc_df.mean(axis=1,skipna=True)
455 | 			self.total_acc_df['Std.'] = self.total_acc_df.drop('Mean',axis=1).std(axis=1,skipna=True)
456 | 
457 | 		if cross_entropy:
458 | 			assert len(self.outer_fold_cross_entropies) != [], "Scores must be loaded to CombinedResults.outer_fold_accuracies" 
459 | 			assert len(self.outer_fold_cross_entropies) == len(self.ids), "Number of subjects and results are not equal"
460 | 			assert len(self.outer_fold_cross_entropies[0]) == self.n_folds, "Number of scores and folds are not equal"
461 | 
462 | 			self.total_ce_df = pd.DataFrame(index=self.ids, columns=folds)
463 | 
464 | 			for n,score in enumerate(self.outer_fold_cross_entropies):
465 | 				self.total_ce_df.iloc[n] = score
466 | 
467 | 			self.total_ce_df['Mean'] = self.total_ce_df.mean(axis=1,skipna=True)
468 | 			self.total_ce_df['Std.'] = self.total_ce_df.drop('Mean',axis=1).std(axis=1,skipna=True)
469 | 	
470 | 		if save:
471 | 			if accuracy and cross_entropy:
472 | 				with pd.ExcelWriter(f'{self.save_path}/combined_scores.xlsx') as writer:
473 | 					self.total_acc_df.to_excel(writer, sheet_name='accuracy')
474 | 					self.total_ce_df.to_excel(writer, sheet_name='cross_entropy')
475 | 			elif not cross_entropy:
476 | 				with pd.ExcelWriter(f'{self.save_path}/combined_scores.xlsx') as writer:
477 | 					self.total_acc_df.to_excel(writer, sheet_name='accuracy')
478 | 			elif not accuracy:
479 | 				with pd.ExcelWriter(f'{self.save_path}/combined_scores.xlsx') as writer:
480 | 					self.total_ce_df.to_excel(writer, sheet_name='cross_entropy')
481 | 
482 | 	def get_subject_results(self):
483 | 		"""
484 | 		Read in multiple results.Results objects and extract the required metrics into containers
485 | 		for further processing.
486 | 		"""
487 | 
488 | 		for f_name in self.f_names:
489 | 
490 | 			results_object = self.load_result(f"{self.load_path}/{f_name}.pickle")
491 | 
492 | 			self.y_true = np.concatenate((self.y_true, results_object.y_true))
493 | 			self.y_pred = np.concatenate((self.y_pred, results_object.y_pred)) # all true and prediction values
494 | 
495 | 			self.outer_fold_accuracies.append(results_object.outer_fold_accuracies)
496 | 			self.outer_fold_cross_entropies.append(results_object.outer_fold_cross_entropies)
497 | 			self.outer_fold_ce_means.append(np.mean(results_object.outer_fold_cross_entropies))
498 | 			self.outer_fold_ce_std.append(np.std(results_object.outer_fold_cross_entropies))
499 | 
500 | 			results_object.precision = self.get_res_obj_precision(results_object)
501 | 			results_object.f_score = self.get_res_obj_recall(results_object)
502 | 			results_object.recall = self.get_res_obj_f_score(results_object)
503 | 			results_object.kappa = self.get_res_obj_kappa_value(results_object)
504 | 			self.precision_list.append(results_object.precision)
505 | 			self.f1_score_list.append(results_object.f1_score)
506 | 			self.recall_list.append(results_object.recall)
507 | 			self.kappa_list.append(results_object.kappa)
508 | 
509 | 			self.total_best_hps.append(results_object.best_params)
510 | 			self.hyp_param_means.append(results_object.hyp_param_means)
511 | 
512 | 			self.combined_train_loss.append(results_object.train_loss)
513 | 			self.combined_test_loss.append(results_object.test_loss)
514 | 			self.combined_valid_loss.append(results_object.valid_loss)
515 | 			self.combined_train_acc.append(results_object.train_acc)
516 | 			self.combined_test_acc.append(results_object.test_acc)
517 | 			self.combined_valid_acc.append(results_object.valid_acc)
518 | 	
519 | 		# Save combined predictions and ground truth values to csv
520 | 		# np.savetxt(f"{self.direct}/results/{self.paradigm.replace('EEG_', '')}/y_true.csv", [self.y_true],
521 | 		# 		   delimiter=',', fmt='%d')
522 | 		# np.savetxt(f"{self.direct}/results/{self.paradigm.replace('EEG_', '')}/y_pred.csv", [self.y_pred],
523 | 		# 		   delimiter=',', fmt='%d')
524 | 
525 | 	
526 | 	def param_scores(self, hyp_params):
527 | 		"""
528 | 		Saves a Pandas DataFrame as an Excel file which contains average inner-fold accuracy (or loss)
529 | 		for each independent hyperparameter value, and for all subjects
530 | 		:param hyp_params: dict containing all hyperparameter keys and values.
531 | 		"""
532 | 		paramscores_df = param_scores_df(self.ids, hyp_params)
533 | 	
534 | 		for i, j in enumerate(self.hyp_param_means):
535 | 			paramscores_df.iloc[i] = [score[1] for score in j]
536 | 
537 | 		paramscores_df.loc["Mean"] = paramscores_df[0:len(self.ids)].mean(axis=0, skipna=True)
538 | 		paramscores_df.loc["Std."] = paramscores_df[0:len(self.ids)].std(axis=0, skipna=True)
539 | 		paramscores_df.to_excel(f"{self.save_path}/param_scores.xlsx")
540 | 
541 | 	
542 | 	def inter_subject_hps(self, hyp_params, index_name, selection_method):
543 | 		"""
544 | 		Saves a Pandas DataFrame as an Excel file which contains average inner-fold accuracy (or loss)
545 | 		for each independent hyperparameter value, and for all subjects
546 | 		:param hyp_params: dict containing all hyperparameter keys and values.
547 | 		:param index_name: str name to give index column.
548 | 		:param selection_method: str "accuracy" OR "loss".
549 | 		"""
550 | 		index = self.ids
551 | 		columns_list = get_col_list(hyp_params)
552 | 		names = list(hyp_params.keys())
553 | 		
554 | 		self.hp_results_df = results_df(index, index_name, columns_list, names)
555 | 		
556 | 		combined_hp = []
557 | 		for f_name in self.f_names:
558 | 
559 | 			results_object = self.load_result(f"{self.load_path}/{f_name}.pickle")
560 | 		
561 | 			acc = results_object.accdf.loc['Mean'].values
562 | 			combined_hp.append(acc)
563 | 		
564 | 		for i, j in enumerate(combined_hp):
565 | 			self.hp_results_df.iloc[i] = j
566 | 		self.hp_results_df.loc["Mean"].iloc[0] = self.hp_results_df.iloc[0:len(self.ids)].mean(axis=0, skipna=True)
567 | 		self.hp_results_df.loc["Std."].iloc[0] = self.hp_results_df.iloc[0:len(self.ids)].std(axis=0, skipna=True)
568 | 		self.hp_results_df.to_excel(f"{self.save_path}/total_hp_scores.xlsx")
569 | 
570 | 		self.BestParams = self.hp_results_df.columns[self.hp_results_df.loc["Mean"].values.argmax()]
571 | 		self.BestParams = pd.DataFrame(dict(BestParams=self.BestParams))
572 | 		self.BestParams.to_excel(f"{self.save_path}/BestParams.xlsx")
573 | 
574 | 	
575 | 	def get_combined_inner_scores(self):
576 | 		"""
577 | 		Create pd.DataFrames to contain inner-fold validation accuracies/loss/cross entropy
578 | 		for all subjects or experiments and compute a mean - can be used for selecting inter-subject
579 | 		hyperparameters. 
580 | 		"""
581 | 
582 | 		for i, f_name in enumerate(self.f_names):
583 | 
584 | 			results_object = self.load_result(f"{self.load_path}/{f_name}.pickle")
585 | 
586 | 			self.HP_acc[self.ids[i]]  = results_object.accdf.loc['Mean'].apply(lambda x : x * 100).values.ravel()
587 | 			self.HP_loss[self.ids[i]] = results_object.lossdf.loc['Mean'].values.ravel()
588 | 			self.HP_ce[self.ids[i]]   = results_object.cross_entropydf.loc['Mean'].values.ravel()
589 | 
590 | 		self.HP_acc.fillna(0, inplace=True) # zero-filling -- mean-filling may be a better option
591 | 		self.HP_loss.fillna(0, inplace=True)
592 | 		self.HP_ce.fillna(0, inplace=True)
593 | 		self.HP_acc['Mean'] = self.HP_acc.mean(axis=1, skipna=True)
594 | 		self.HP_loss['Mean'] = self.HP_loss.mean(axis=1, skipna=True)
595 | 		self.HP_ce['Mean'] = self.HP_ce.mean(axis=1, skipna=True)
596 | 
597 | 	@staticmethod
598 | 	def get_res_obj_precision(res_obj):
599 | 		assert len(res_obj.y_true) == len(res_obj.y_pred), "data must be of equal length"
600 | 		return round((precision_score(res_obj.y_true, res_obj.y_pred, average="macro") * 100), 3)
601 | 
602 | 	@staticmethod
603 | 	def get_res_obj_recall(res_obj):
604 | 		assert len(res_obj.y_true) == len(res_obj.y_pred), "data must be of equal length"
605 | 		return round((recall_score(res_obj.y_true, res_obj.y_pred, average='macro') * 100), 3)
606 | 
607 | 	@staticmethod
608 | 	def get_res_obj_f_score(res_obj):
609 | 		assert len(res_obj.y_true) == len(res_obj.y_pred), "data must be of equal length"
610 | 		return round((f1_score(res_obj.y_true, res_obj.y_pred, average='macro') * 100), 3)
611 | 
612 | 	@staticmethod
613 | 	def get_res_obj_kappa_value(res_obj):
614 | 		assert len(res_obj.y_true) == len(res_obj.y_pred), "data must be of equal length"
615 | 		return round(cohen_kappa_score(res_obj.y_true, res_obj.y_pred),3)


--------------------------------------------------------------------------------
/BiModNeuroCNN/utils.py:
--------------------------------------------------------------------------------
   1 | """
   2 | Name: Ciaran Cooney
   3 | Date: 12/01/2019
   4 | Description: Functions required for data processing and training of 
   5 | CNNs on imagined speech EEG data.
   6 | """
   7 | 
   8 | import pickle
   9 | import os
  10 | import numpy as np 
  11 | import pandas as pd
  12 | import matplotlib.pyplot as plt
  13 | import seaborn as sns
  14 | import time
  15 | from functools import wraps
  16 | 
  17 | def eeg_to_3d(data, epoch_size, n_events,n_chan):
  18 |     """
  19 |     function to return a 3D EEG data format from a 2D input.
  20 |     Parameters:
  21 |       data: 2D np.array of EEG
  22 |       epoch_size: number of samples per trial, int
  23 |       n_events: number of trials, int
  24 |       n_chan: number of channels, int
  25 |         
  26 |     Output:
  27 |       np.array of shape n_events * n_chans * n_samples
  28 |     """
  29 |     idx, a, x = ([] for i in range(3))
  30 |     [idx.append(i) for i in range(0,data.shape[1],epoch_size)]
  31 |     for j in data:
  32 |         [a.append([j[idx[k]:idx[k]+epoch_size]]) for k in range(len(idx))]
  33 |    
  34 |     
  35 |     return np.reshape(np.array(a),(n_events,n_chan,epoch_size))
  36 | 
  37 | def load_pickle(direct, folder, filename):
  38 |     
  39 |     for file in os.listdir(direct + folder):
  40 |         if file.endswith(filename):
  41 |             pickle_file = (direct + folder + '/' + file)
  42 |             with open(pickle_file, 'rb') as f:
  43 |                 file = pickle.load(f)
  44 | 
  45 |             return file, pickle_file
  46 | 
  47 | def create_events(data, labels):
  48 |     events = []
  49 |     x = np.zeros((data.shape[0], 3))
  50 |     for i in range(data.shape[0]):
  51 |         x[i][0] = i 
  52 |         x[i][2] = labels[i]
  53 |     [events.append(list(map(int, x[i]))) for i in range(data.shape[0])]
  54 |     return np.array(events)
  55 | 
  56 | def reverse_coeffs(coeffs, N):
  57 |     """ Reverse order of coefficients in an array."""
  58 |     idx = np.array([i for i in reversed(range(N))])
  59 |     coeffs = coeffs[idx]
  60 |     coeffs = coeffs.reshape((N,1))
  61 |     z = np.zeros((N,1))
  62 |     return np.append(coeffs, z, axis=1) , coeffs
  63 | 
  64 | def class_ratios(labels):
  65 |     unique, counts = np.unique(labels, return_counts=True)
  66 |     class_weight = dict()
  67 |     for i in range(len(unique)):
  68 |        class_weight[unique[i]] = len(labels) / (len(unique)*counts[i])
  69 |     return class_weight
  70 | 
  71 | def classification_report_csv(report, output_file):
  72 |     report_data = []
  73 |     lines = report.split('\n')
  74 |     for line in lines[2:-3]:
  75 |         row = {}
  76 |         row_data = line.split('      ')
  77 |         row['class'] = row_data[0]
  78 |         row['precision'] = float(row_data[1])
  79 |         row['recall'] = float(row_data[2])
  80 |         row['f1_score'] = float(row_data[3])
  81 |         row['support'] = float(row_data[4])
  82 |         report_data.append(row)
  83 |     dataframe = pd.DataFrame.from_dict(report_data)
  84 |     dataframe.to_csv(output_file + '.csv', index = False)
  85 | 
  86 | def load_features(direct, dict_key1, dict_key2=None):
  87 |     with open(direct, 'rb') as f:
  88 |         file = pickle.load(f)
  89 |     if dict_key2 == None:
  90 |         return np.array(file[dict_key1])
  91 |     else:
  92 |         return np.array(file[dict_key1]), np.array(file[dict_key2])
  93 | 
  94 | def short_vs_long(features, labels, split, event_id):
  95 |     """Function for multilabel data into binary-class sets i.e.,
  96 |        short words and long words
  97 |     """
  98 |     short, long, s_idx, l_idx, s_features, l_features = ([] for i in range(6))
  99 |     
 100 |     [short.append(event_id[i]) for i in event_id if len(i) <= split]
 101 |     [long.append(event_id[i]) for i in event_id if len(i) > split]
 102 |     
 103 |     [s_idx.append(i) for i, e in enumerate(labels) if e in short]
 104 |     [l_idx.append(i) for i, e in enumerate(labels) if e in long]
 105 |     
 106 |     [s_features.append(e) for i, e in enumerate(features) if i in s_idx]
 107 |     [l_features.append(e) for i, e in enumerate(features) if i in l_idx]
 108 |     
 109 |     s_labels = np.zeros(np.array(s_features).shape[0])
 110 |     l_labels = np.ones(np.array(l_features).shape[0])
 111 | 
 112 |     features = np.concatenate((s_features, l_features))
 113 |     labels = np.concatenate((s_labels,l_labels))
 114 |     
 115 |     return s_features, l_features, s_labels, l_labels, features, labels 
 116 | 
 117 | def return_indices(event_id, labels):
 118 |     indices = []
 119 |     for _, k in enumerate(event_id):
 120 |         idx = []
 121 |         for d, j in enumerate(labels):
 122 |             if event_id[k] == j:
 123 |                 idx.append(d)
 124 |         indices.append(idx)
 125 |     return indices
 126 | 
 127 | def load_subject_eeg(subject_id, vowels):
 128 |     """ returns eeg data corresponding to words and vowels 
 129 |         given a subject identifier.
 130 |     """
 131 | 
 132 |     data_folder = 'C:\\Users\\sb00745777\\OneDrive - Ulster University\\Study_2\\imagined_speech/S{}/post_ica/'.format(subject_id)
 133 |     data_folder1 = 'C:\\Users\\cfcoo\\OneDrive - Ulster University\\Study_2\\imagined_speech/S{}/post_ica/'.format(subject_id)
 134 |     words_file = 'raw_array_ica.pickle'
 135 |     vowels_file = 'raw_array_vowels_ica.pickle'
 136 |     
 137 |     try:
 138 |         with open(data_folder + words_file, 'rb') as f:
 139 |             file = pickle.load(f)
 140 |     except:
 141 |         print("Not on PC! Attempting to load from laptop.")
 142 |         with open(data_folder1 + words_file, 'rb') as f:
 143 |             file = pickle.load(f)
 144 |             
 145 |     w_data = file['raw_array'][:][0]
 146 |     w_labels = file['labels']
 147 |     if vowels == False:
 148 |         return w_data, w_labels
 149 | 
 150 |     elif vowels:
 151 |         try:
 152 |             with open(data_folder + vowels_file, 'rb') as f:
 153 |                 file = pickle.load(f)
 154 |         except:
 155 |             with open(data_folder1 + vowels_file, 'rb') as f:
 156 |                 file = pickle.load(f)
 157 |         v_data = file['raw_array'][:][0]
 158 |         v_labels = file['labels']
 159 |     return w_data, v_data, w_labels, v_labels
 160 | 
 161 | def balanced_subsample(features, targets, random_state=12):
 162 |     """
 163 |     function for balancing datasets by randomly-sampling data
 164 |     according to length of smallest class set.
 165 |     """
 166 |     from sklearn.utils import resample
 167 |     unique, counts = np.unique(targets, return_counts=True)
 168 |     unique_classes = dict(zip(unique, counts))
 169 |     mnm = len(targets)
 170 |     for i in unique_classes:
 171 |         if unique_classes[i] < mnm:
 172 |             mnm = unique_classes[i]
 173 | 
 174 |     X_list, y_list = [],[]
 175 |     for unique in np.unique(targets):
 176 |         idx = np.where(targets == unique)
 177 |         X = features[idx]
 178 |         y = targets[idx]
 179 |         
 180 |         #X1, y1 = resample(X,y,n_samples=mnm, random_state=random_state)
 181 |         X_list.append(X[:mnm])
 182 |         y_list.append(y[:mnm])
 183 |     
 184 |     balanced_X = X_list[0]
 185 |     balanced_y = y_list[0]
 186 |     
 187 |     for i in range(1, len(X_list)):
 188 |         balanced_X = np.concatenate((balanced_X, X_list[i]))
 189 |         balanced_y = np.concatenate((balanced_y, y_list[i]))
 190 | 
 191 |     return balanced_X, balanced_y
 192 | 
 193 | def predict(model, X_test, batch_size, iterator, threshold_for_binary_case=None):
 194 |     """
 195 |     Load torch model and make predictions on new data.
 196 |     """
 197 |     all_preds = []
 198 |     with th.no_grad():
 199 |         for b_X, _ in iterator.get_batches(SignalAndTarget(X_test, X_test), False):
 200 |             b_X_var = np_to_var(b_X)
 201 |             all_preds.append(var_to_np(model(b_X_var)))
 202 | 
 203 |         pred_labels = compute_pred_labels_from_trial_preds(
 204 |                     all_preds, threshold_for_binary_case)
 205 |     return pred_labels
 206 | 
 207 | def plot_confusion_matrix(cm, classes,filename,
 208 |                           normalize=False,
 209 |                           title='Confusion matrix',
 210 |                           cmap=plt.cm.Blues):
 211 | 
 212 |     """
 213 |     Code for confusion matrix extracted from here:
 214 |     http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py
 215 |     """
 216 |     if normalize:
 217 |         cm = (cm.astype('float') / cm.sum(axis=1)[:, np.newaxis])*100
 218 |         print("Normalized confusion matrix")
 219 |     else:
 220 |         print('Confusion matrix, without normalization')
 221 | 
 222 |     print(cm)
 223 |     fig = plt.figure(1, figsize=(9, 6))
 224 |     #ax = plt.add_subplot(111)
 225 |     plt.tick_params(labelsize='large')
 226 |     plt.imshow(cm, interpolation='nearest', cmap=cmap)
 227 |     #plt.title(title)
 228 |     plt.colorbar()
 229 |     tick_marks = np.arange(len(classes))
 230 |     plt.xticks(tick_marks, classes, rotation=45)
 231 |     plt.yticks(tick_marks, classes)
 232 | 
 233 |     fmt = '.2f' if normalize else 'd'
 234 |     thresh = cm.max() / 2.
 235 |     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
 236 |         plt.text(j, i, format(cm[i, j], fmt),
 237 |                  horizontalalignment="center",
 238 |                  color="white" if cm[i, j] > thresh else "black")
 239 | 
 240 |     plt.tight_layout()
 241 |     plt.ylabel('True label', fontsize='large', fontname='sans-serif')
 242 |     plt.xlabel('Predicted label', fontsize='large', fontname='sans-serif')
 243 |     fig.savefig(filename + '.jpg', bbox_inches='tight')
 244 |     return(fig)
 245 | 
 246 | def print_confusion_matrix(confusion_matrix, class_names, filename, normalize = True, figsize = (5,5), fontsize=16):
 247 |     """Prints a confusion matrix, as returned by sklearn.metrics.confusion_matrix, as a heatmap.
 248 |     
 249 |     Arguments
 250 |     ---------
 251 |     confusion_matrix: numpy.ndarray
 252 |         The numpy.ndarray object returned from a call to sklearn.metrics.confusion_matrix. 
 253 |         Similarly constructed ndarrays can also be used.
 254 |     class_names: list
 255 |         An ordered list of class names, in the order they index the given confusion matrix.
 256 |     figsize: tuple
 257 |         A 2-long tuple, the first value determining the horizontal size of the ouputted figure,
 258 |         the second determining the vertical size. Defaults to (10,7).
 259 |     fontsize: int
 260 |         Font size for axes labels. Defaults to 14.
 261 |         
 262 |     Returns
 263 |     -------
 264 |     matplotlib.figure.Figure
 265 |         The resulting confusion matrix figure
 266 |     """
 267 |     if normalize:
 268 |         confusion_matrix = (confusion_matrix.astype('float') / confusion_matrix.sum(axis=1)[:, np.newaxis])*100
 269 |         print("Normalized confusion matrix")
 270 |     else:
 271 |         print('Confusion matrix, without normalization')
 272 |     
 273 |     df_cm = pd.DataFrame(
 274 |         confusion_matrix, index=class_names, columns=class_names, 
 275 |     )
 276 |     fig = plt.figure(figsize=figsize)
 277 |     fmt = '.2f' if normalize else 'd'
 278 |     #####set heatmap customization#####
 279 |     try:
 280 |         heatmap = sns.heatmap(df_cm, annot=True, fmt=fmt, cmap='GnBu', linewidths=.5, cbar=False, annot_kws={"size": 16})
 281 |     except ValueError:
 282 |         raise ValueError("Confusion matrix values must be integers.")
 283 |         
 284 |     heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize)
 285 |     heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
 286 |     plt.ylabel('True label', fontsize=16, fontname='sans-serif')
 287 |     plt.xlabel('Predicted label', fontsize=16, fontname='sans-serif')
 288 |     
 289 |     if filename != None:
 290 |         fig.savefig(filename + '.png', bbox_inches='tight') #store image as .png
 291 |     
 292 |     return fig
 293 | 
 294 | def data_wrangler(data_type, subject_id):
 295 |     """
 296 |     Function to return EEG data in format #trials*#channels*#samples.
 297 |     Also returns labels in the range 0 to n-1.
 298 |     """
 299 |     epoch = 4096
 300 |     if data_type == 'words':
 301 |         data, labels = load_subject_eeg(subject_id, vowels=False)
 302 |         n_chan = len(data)
 303 |         data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32)
 304 |         labels = labels.astype(np.int64)
 305 |     elif data_type == 'vowels':
 306 |         _, data, _, labels = load_subject_eeg(subject_id, vowels=True)
 307 |         n_chan = len(data)
 308 |         data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32)
 309 |         labels = labels.astype(np.int64)
 310 |     elif data_type == 'all_classes':
 311 |         w_data, v_data, w_labels, v_labels = load_subject_eeg(subject_id, vowels=True)
 312 |         n_chan = len(w_data)
 313 |         words = eeg_to_3d(w_data, epoch, int(w_data.shape[1] / epoch), n_chan).astype(np.float32)
 314 |         vowels = eeg_to_3d(v_data, epoch, int(v_data.shape[1] / epoch), n_chan).astype(np.float32)
 315 |         data = np.concatenate((words, vowels), axis=0)
 316 |         labels = np.concatenate((w_labels, v_labels), axis=0).astype(np.int64)
 317 |     
 318 |     x = lambda a: a * 1e6
 319 |     data = x(data)
 320 |     
 321 |     if data_type == 'words': # zero-index the labels
 322 |         labels[:] = [x - 6 for x in labels]
 323 |     elif (data_type == 'vowels' or data_type == 'all_classes'):
 324 |         labels[:] = [x - 1 for x in labels]
 325 | 
 326 |     return data, labels
 327 | 
 328 | 
 329 | def format_data(data_type, subject_id, epoch):
 330 |     """
 331 |     Returns data into format required for inputting to the CNNs.
 332 | 
 333 |     Parameters:
 334 |         data_type: str()
 335 |         subject_id: str()
 336 |         epoch: length of single trials, int
 337 |     """
 338 | 
 339 |     if data_type == 'words':
 340 |         data, labels = load_subject_eeg(subject_id, vowels=False)
 341 |         n_chan = len(data)
 342 |         data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32)
 343 |         labels = labels.astype(np.int64)
 344 |         labels[:] = [x - 6 for x in labels] # zero-index the labels
 345 |     elif data_type == 'vowels':
 346 |         _, data, _, labels = load_subject_eeg(subject_id, vowels=True)
 347 |         n_chan = len(data)
 348 |         data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32)
 349 |         labels = labels.astype(np.int64)
 350 |         labels[:] = [x - 1 for x in labels]
 351 |     elif data_type == 'all_classes':
 352 |         w_data, v_data, w_labels, v_labels = load_subject_eeg(subject_id, vowels=True)
 353 |         n_chan = len(w_data)
 354 |         words = eeg_to_3d(w_data, epoch, int(w_data.shape[1] / epoch), n_chan).astype(np.float32)
 355 |         vowels = eeg_to_3d(v_data, epoch, int(v_data.shape[1] / epoch), n_chan).astype(np.float32)
 356 |         data = np.concatenate((words, vowels), axis=0)
 357 |         labels = np.concatenate((w_labels, v_labels)).astype(np.int64)
 358 |         labels[:] = [x - 1 for x in labels]
 359 | 
 360 |     return data, labels
 361 | 
 362 | def current_loss(model_loss):
 363 |     """
 364 |     Returns the minimum validation loss from the 
 365 |     trained model
 366 |     """
 367 |     losses_list = []
 368 |     [losses_list.append(x) for x in model_loss]
 369 |     return np.min(np.array(losses_list))
 370 | 
 371 | def current_acc(model_acc):
 372 |     """
 373 |     Returns the maximum validation accuracy from the 
 374 |     trained model
 375 |     """
 376 |     accs_list = []
 377 |     [accs_list.append(x) for x in model_acc]
 378 |     return np.min(np.array(accs_list))
 379 | 
 380 | def balance_classes(data1,data2):
 381 | 
 382 |     if data1.shape[0] > data2.shape[0]:
 383 |         data1 = data1[:data2.shape[0],:,:]
 384 |     elif data1.shape[0] < data2.shape[0]:
 385 |         data2 = data2[:data1.shape[0],:,:]
 386 |         
 387 |     return data1, data2
 388 | 
 389 | def timer(orig_func):
 390 |     """
 391 |     decorator for logging time of function.
 392 |     """
 393 |     import time
 394 |     
 395 |     @wraps(orig_func)
 396 |     def wrapper(*args, **kwargs):
 397 |         t1 = time.time()
 398 |         result = orig_func(*args, *kwargs)
 399 |         t2 = time.time() - t1
 400 |         print(f"{orig_func.__name__} ran in: {round(t2,3)} seconds")
 401 |         return result
 402 |     
 403 |     return wrapper
 404 | 
 405 | def windows(trial_data, sub, window_size, overlap, fs):
 406 |     """
 407 |     Functon for obtaining classification windows for training.
 408 | 
 409 |     :param trial_data: EEG data - n_trials * n_chans * n_samples
 410 |     :param sub: subject object
 411 |     :param window_size: n number of samples
 412 |     :param overlap: n number of samples for overlap
 413 |     :param fs: sampling frequency
 414 |     :return: list containing data from each window
 415 |     """
 416 |     windows_list, index_list = [],[]
 417 |     n_windows = int(sub.epoch / window_size + np.floor((sub.epoch - overlap) / window_size))
 418 |     if n_windows == 0:
 419 |         n_windows = 1
 420 |     low_index = 0
 421 |     high_index = window_size
 422 |     for w in range(n_windows):
 423 |         data = trial_data[:, :, low_index:high_index]
 424 |         windows_list.append(data)
 425 |         index_list.append([low_index,high_index])
 426 |         low_index += overlap
 427 |         high_index += overlap
 428 | 
 429 |     return np.array(windows_list), index_list
 430 | 
 431 | def windows_index(epoch, window_size, overlap, fs):
 432 |     """
 433 |     Functon for obtaining classification windows for training.
 434 | 
 435 |     :param epoch: length of overal trial
 436 |     :param window_size: n number of samples
 437 |     :param overlap: n number of samples for overlap
 438 |     :param fs: sampling frequency
 439 |     :return: list containing data from each window
 440 |     """
 441 |     index_list = []
 442 |     n_windows = int(epoch / window_size + np.floor((epoch - overlap) / window_size))
 443 |     if n_windows == 0:
 444 |         n_windows = 1
 445 |     low_index = 0
 446 |     high_index = window_size
 447 |     for w in range(n_windows):
 448 |         index_list.append((low_index,high_index))
 449 |         low_index += overlap
 450 |         high_index += overlap
 451 | 
 452 |     return index_list
 453 | 
 454 | def get_class_labels(paradigm):
 455 |     """
 456 |     Function for obtaining class labels from paradigm description
 457 |     :param paradigm: string format: 'EEG_semantics_text'
 458 |     :return:
 459 |     """
 460 |     paradigm = paradigm.split('_')[1]
 461 |     if paradigm == 'semantics':
 462 |         class_labels = ['pig', 'dog', 'car', 'bus']
 463 |     elif paradigm == 'action':
 464 |         class_labels = ['kick', 'jump', 'chew', 'blink']
 465 |     elif paradigm == 'twoword':
 466 |         class_labels = ['red ball', 'blue hat', 'red blue', 'ball hat']
 467 |     elif paradigm == 'concrete':
 468 |         class_labels = ['apple', 'tiger', 'fruit', 'animal']
 469 |     return class_labels
 470 | 
 471 | def misclass_to_class(column):
 472 |     return 1 - column
 473 | 
 474 | def get_model_loss_and_acc(fold_models):
 475 |      """
 476 |      Function for extracting epoch-by-epoch model loss and accuracy scores from
 477 |      models associated with multiple cross-validation folds
 478 |      :param fold_models: list of Braindecode (PyTorch) sequential models
 479 |      :return: train_loss: (pandas.series) main training loss per epoch across folds
 480 |               valid_loss: (pandas.series) main tvalidation loss per epoch across folds
 481 |               test_loss: (pandas.series) main testing loss per epoch across folds
 482 |               train_acc: (pandas.series) main training acc per epoch across folds
 483 |               valid_acc: (pandas.series) main tvalidation acc per epoch across folds
 484 |               test_acc: (pandas.series) main testing acc per epoch across folds
 485 |      """
 486 |      train_loss = dict()
 487 |      valid_loss = dict()
 488 |      test_loss = dict()
 489 |      train_acc = dict()
 490 |      valid_acc = dict()
 491 |      test_acc = dict()
 492 | 
 493 |      for i, model in enumerate(fold_models):
 494 |         train_loss[i] = model.epochs_df['train_loss']
 495 |         valid_loss[i] = model.epochs_df['valid_loss']
 496 |         test_loss[i] = model.epochs_df['test_loss']
 497 |         train_acc[i] =  model.epochs_df['train_misclass']
 498 |         valid_acc[i] = model.epochs_df['valid_misclass']
 499 |         test_acc[i] = model.epochs_df['test_misclass']
 500 | 
 501 |      train_loss = pd.DataFrame(train_loss)
 502 |      valid_loss = pd.DataFrame(valid_loss)
 503 |      test_loss = pd.DataFrame(test_loss)
 504 |      train_loss = train_loss.mean(axis=1, skipna=True)
 505 |      valid_loss = valid_loss.mean(axis=1, skipna=True)
 506 |      test_loss = test_loss.mean(axis=1, skipna=True)
 507 | 
 508 |      train_acc = pd.DataFrame(train_acc).apply(lambda x : misclass_to_class(x)) # function converts misclass to classification accuracy
 509 |      valid_acc = pd.DataFrame(valid_acc).apply(lambda x : misclass_to_class(x))
 510 |      test_acc = pd.DataFrame(test_acc).apply(lambda x : misclass_to_class(x))
 511 |      train_acc = train_acc.mean(axis=1, skipna=True)
 512 |      valid_acc = valid_acc.mean(axis=1, skipna=True)
 513 |      test_acc = test_acc.mean(axis=1, skipna=True)
 514 | 
 515 |      return train_loss, valid_loss, test_loss, train_acc, valid_acc, test_acc
 516 | 
 517 | 
 518 | def labels_dict_and_list(classes):
 519 |     """
 520 |     input: empty pandas DataFrame with column headings
 521 |            corresponding to class labels
 522 |     output: labels_dict (dict): key=number, value=string
 523 |             key_list (list): list of classes
 524 |     """
 525 | 
 526 |     labels_dict = dict()
 527 |     key_list = []
 528 |     for n, label in enumerate(classes.columns):
 529 |         labels_dict[str(n + 1)] = label
 530 | 
 531 |     for key in labels_dict:
 532 |         key_list.append(key)
 533 |     return labels_dict, key_list
 534 | 
 535 | # def data_loader(directory, subj, session, category, *args):
 536 | #     """
 537 | 
 538 | #     :param directory: (str) directory of stored data
 539 | #     :param subj: (str) Subject Identity, e.g. '01'
 540 | #     :param session: (int) Session Identity
 541 | #     :param category: (str) Experimental paradigm, e.g. "actionText"
 542 | #     :param args: (str) modalities of data
 543 | #     :return: list of tuples containing data and labels
 544 | #     """
 545 | #     data = []
 546 | #     for arg in args:
 547 | #         filename = f"classifierData/{category}_{arg}_CLF"
 548 | #         subj_object = load_subject(directory, subj, session, filename)["subject"]
 549 | #         data.append((subj_object.classifier_data.astype(np.float32), subj_object.labels.astype(np.int64) ))
 550 | #     return data
 551 | 
 552 | def get_ordered_lists(*args):
 553 |     flatten = lambda fl: [item for sublist in fl for item in sublist]  # flatten nested lists
 554 |     op_list = []
 555 |     for arg in zip(*args):
 556 |         arg_list = flatten(arg)
 557 |         op_list.append(arg_list)
 558 |     return op_list
 559 | 
 560 | def ordered_lists(*args):
 561 |     op_list = []
 562 |     for arg in args:
 563 |         op_list.append(get_ordered_lists(*arg))
 564 |     return get_ordered_lists(*op_list)
 565 | 
 566 | 
 567 | """
 568 | Name: Ciaran Cooney
 569 | Date: 12/01/2019
 570 | Description: Functions required for data processing and training of 
 571 | CNNs on imagined speech EEG data.
 572 | """
 573 | 
 574 | import pickle
 575 | import os
 576 | import numpy as np 
 577 | import pandas as pd
 578 | import matplotlib.pyplot as plt
 579 | import seaborn as sns
 580 | import time 
 581 | from functools import wraps
 582 | 
 583 | def eeg_to_3d(data, epoch_size, n_events,n_chan):
 584 |     """
 585 |     function to return a 3D EEG data format from a 2D input.
 586 |     Parameters:
 587 |       data: 2D np.array of EEG
 588 |       epoch_size: number of samples per trial, int
 589 |       n_events: number of trials, int
 590 |       n_chan: number of channels, int
 591 |         
 592 |     Output:
 593 |       np.array of shape n_events * n_chans * n_samples
 594 |     """
 595 |     idx, a, x = ([] for i in range(3))
 596 |     [idx.append(i) for i in range(0,data.shape[1],epoch_size)]
 597 |     for j in data:
 598 |         [a.append([j[idx[k]:idx[k]+epoch_size]]) for k in range(len(idx))]
 599 |    
 600 |     
 601 |     return np.reshape(np.array(a),(n_events,n_chan,epoch_size))
 602 | 
 603 | def load_subject(direct, subject, session, filename):
 604 |     f_name = f"{direct}/S{subject}/Session_{session}/{filename}.pickle"
 605 |     with open(f_name, 'rb') as f:
 606 |         return pickle.load(f)
 607 | 
 608 | def load_pickle(direct, folder, filename):
 609 | 	
 610 |     for file in os.listdir(direct + folder):
 611 |         if file.endswith(filename):
 612 |             pickle_file = (direct + folder + '/' + file)
 613 |             with open(pickle_file, 'rb') as f:
 614 |                 file = pickle.load(f)
 615 | 
 616 |             return file, pickle_file
 617 | 
 618 | def create_events(data, labels):
 619 | 	events = []
 620 | 	x = np.zeros((data.shape[0], 3))
 621 | 	for i in range(data.shape[0]):
 622 | 		x[i][0] = i 
 623 | 		x[i][2] = labels[i]
 624 | 	[events.append(list(map(int, x[i]))) for i in range(data.shape[0])]
 625 | 	return np.array(events)
 626 | 
 627 | def reverse_coeffs(coeffs, N):
 628 | 	""" Reverse order of coefficients in an array."""
 629 | 	idx = np.array([i for i in reversed(range(N))])
 630 | 	coeffs = coeffs[idx]
 631 | 	coeffs = coeffs.reshape((N,1))
 632 | 	z = np.zeros((N,1))
 633 | 	return np.append(coeffs, z, axis=1) , coeffs
 634 | 
 635 | def class_ratios(labels):
 636 |     unique, counts = np.unique(labels, return_counts=True)
 637 |     class_weight = dict()
 638 |     for i in range(len(unique)):
 639 |        class_weight[unique[i]] = len(labels) / (len(unique)*counts[i])
 640 |     return class_weight
 641 | 
 642 | def classification_report_csv(report, output_file):
 643 |     report_data = []
 644 |     lines = report.split('\n')
 645 |     for line in lines[2:-3]:
 646 |         row = {}
 647 |         row_data = line.split('      ')
 648 |         row['class'] = row_data[0]
 649 |         row['precision'] = float(row_data[1])
 650 |         row['recall'] = float(row_data[2])
 651 |         row['f1_score'] = float(row_data[3])
 652 |         row['support'] = float(row_data[4])
 653 |         report_data.append(row)
 654 |     dataframe = pd.DataFrame.from_dict(report_data)
 655 |     dataframe.to_csv(output_file + '.csv', index = False)
 656 | 
 657 | def load_features(direct, dict_key1, dict_key2=None):
 658 |     with open(direct, 'rb') as f:
 659 |         file = pickle.load(f)
 660 |     if dict_key2 == None:
 661 |         return np.array(file[dict_key1])
 662 |     else:
 663 |         return np.array(file[dict_key1]), np.array(file[dict_key2])
 664 | 
 665 | def short_vs_long(features, labels, split, event_id):
 666 |     """Function for multilabel data into binary-class sets i.e.,
 667 |        short words and long words
 668 |     """
 669 |     short, long, s_idx, l_idx, s_features, l_features = ([] for i in range(6))
 670 |     
 671 |     [short.append(event_id[i]) for i in event_id if len(i) <= split]
 672 |     [long.append(event_id[i]) for i in event_id if len(i) > split]
 673 |     
 674 |     [s_idx.append(i) for i, e in enumerate(labels) if e in short]
 675 |     [l_idx.append(i) for i, e in enumerate(labels) if e in long]
 676 |     
 677 |     [s_features.append(e) for i, e in enumerate(features) if i in s_idx]
 678 |     [l_features.append(e) for i, e in enumerate(features) if i in l_idx]
 679 |     
 680 |     s_labels = np.zeros(np.array(s_features).shape[0])
 681 |     l_labels = np.ones(np.array(l_features).shape[0])
 682 | 
 683 |     features = np.concatenate((s_features, l_features))
 684 |     labels = np.concatenate((s_labels,l_labels))
 685 |     
 686 |     return s_features, l_features, s_labels, l_labels, features, labels 
 687 | 
 688 | def return_indices(event_id, labels):
 689 |     indices = []
 690 |     for _, k in enumerate(event_id):
 691 |         idx = []
 692 |         for d, j in enumerate(labels):
 693 |             if event_id[k] == j:
 694 |                 idx.append(d)
 695 |         indices.append(idx)
 696 |     return indices
 697 | 
 698 | def load_subject_eeg(subject_id, vowels):
 699 |     """ returns eeg data corresponding to words and vowels 
 700 |         given a subject identifier.
 701 |     """
 702 | 
 703 |     data_folder = 'C:\\Users\\sb00745777\\OneDrive - Ulster University\\Study_2\\imagined_speech/S{}/post_ica/'.format(subject_id)
 704 |     data_folder1 = 'C:\\Users\\cfcoo\\OneDrive - Ulster University\\Study_2\\imagined_speech/S{}/post_ica/'.format(subject_id)
 705 |     words_file = 'raw_array_ica.pickle'
 706 |     vowels_file = 'raw_array_vowels_ica.pickle'
 707 |     
 708 |     try:
 709 |         with open(data_folder + words_file, 'rb') as f:
 710 |             file = pickle.load(f)
 711 |     except:
 712 |         print("Not on PC! Attempting to load from laptop.")
 713 |         with open(data_folder1 + words_file, 'rb') as f:
 714 |             file = pickle.load(f)
 715 |             
 716 |     w_data = file['raw_array'][:][0]
 717 |     w_labels = file['labels']
 718 |     if vowels == False:
 719 |         return w_data, w_labels
 720 | 
 721 |     elif vowels:
 722 |         try:
 723 |             with open(data_folder + vowels_file, 'rb') as f:
 724 |                 file = pickle.load(f)
 725 |         except:
 726 |             with open(data_folder1 + vowels_file, 'rb') as f:
 727 |                 file = pickle.load(f)
 728 |         v_data = file['raw_array'][:][0]
 729 |         v_labels = file['labels']
 730 |     return w_data, v_data, w_labels, v_labels
 731 | 
 732 | def balanced_subsample(features, targets, random_state=12):
 733 |     """
 734 |     function for balancing datasets by randomly-sampling data
 735 |     according to length of smallest class set.
 736 |     """
 737 |     from sklearn.utils import resample
 738 |     unique, counts = np.unique(targets, return_counts=True)
 739 |     unique_classes = dict(zip(unique, counts))
 740 |     mnm = len(targets)
 741 |     for i in unique_classes:
 742 |         if unique_classes[i] < mnm:
 743 |             mnm = unique_classes[i]
 744 | 
 745 |     X_list, y_list = [],[]
 746 |     for unique in np.unique(targets):
 747 |         idx = np.where(targets == unique)
 748 |         X = features[idx]
 749 |         y = targets[idx]
 750 |         
 751 |         #X1, y1 = resample(X,y,n_samples=mnm, random_state=random_state)
 752 |         X_list.append(X[:mnm])
 753 |         y_list.append(y[:mnm])
 754 |     
 755 |     balanced_X = X_list[0]
 756 |     balanced_y = y_list[0]
 757 |     
 758 |     for i in range(1, len(X_list)):
 759 |         balanced_X = np.concatenate((balanced_X, X_list[i]))
 760 |         balanced_y = np.concatenate((balanced_y, y_list[i]))
 761 | 
 762 |     return balanced_X, balanced_y
 763 | 
 764 | def predict(model, X_test, batch_size, iterator, threshold_for_binary_case=None):
 765 |     """
 766 |     Load torch model and make predictions on new data.
 767 |     """
 768 |     all_preds = []
 769 |     with th.no_grad():
 770 |         for b_X, _ in iterator.get_batches(SignalAndTarget(X_test, X_test), False):
 771 |             b_X_var = np_to_var(b_X)
 772 |             all_preds.append(var_to_np(model(b_X_var)))
 773 | 
 774 |         pred_labels = compute_pred_labels_from_trial_preds(
 775 |                     all_preds, threshold_for_binary_case)
 776 |     return pred_labels
 777 | 
 778 | def plot_confusion_matrix(cm, classes,filename,
 779 |                           normalize=False,
 780 |                           title='Confusion matrix',
 781 |                           cmap=plt.cm.Blues):
 782 | 
 783 |     """
 784 |     Code for confusion matrix extracted from here:
 785 |     http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py
 786 |     """
 787 |     if normalize:
 788 |         cm = (cm.astype('float') / cm.sum(axis=1)[:, np.newaxis])*100
 789 |         print("Normalized confusion matrix")
 790 |     else:
 791 |         print('Confusion matrix, without normalization')
 792 | 
 793 |     print(cm)
 794 |     fig = plt.figure(1, figsize=(9, 6))
 795 |     #ax = plt.add_subplot(111)
 796 |     plt.tick_params(labelsize='large')
 797 |     plt.imshow(cm, interpolation='nearest', cmap=cmap)
 798 |     #plt.title(title)
 799 |     plt.colorbar()
 800 |     tick_marks = np.arange(len(classes))
 801 |     plt.xticks(tick_marks, classes, rotation=45)
 802 |     plt.yticks(tick_marks, classes)
 803 | 
 804 |     fmt = '.2f' if normalize else 'd'
 805 |     thresh = cm.max() / 2.
 806 |     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
 807 |         plt.text(j, i, format(cm[i, j], fmt),
 808 |                  horizontalalignment="center",
 809 |                  color="white" if cm[i, j] > thresh else "black")
 810 | 
 811 |     plt.tight_layout()
 812 |     plt.ylabel('True label', fontsize='large', fontname='sans-serif')
 813 |     plt.xlabel('Predicted label', fontsize='large', fontname='sans-serif')
 814 |     fig.savefig(filename + '.jpg', bbox_inches='tight')
 815 |     return(fig)
 816 | 
 817 | def print_confusion_matrix(confusion_matrix, class_names, filename, normalize = True, figsize = (5,5), fontsize=16):
 818 |     """Prints a confusion matrix, as returned by sklearn.metrics.confusion_matrix, as a heatmap.
 819 |     
 820 |     Arguments
 821 |     ---------
 822 |     confusion_matrix: numpy.ndarray
 823 |         The numpy.ndarray object returned from a call to sklearn.metrics.confusion_matrix. 
 824 |         Similarly constructed ndarrays can also be used.
 825 |     class_names: list
 826 |         An ordered list of class names, in the order they index the given confusion matrix.
 827 |     figsize: tuple
 828 |         A 2-long tuple, the first value determining the horizontal size of the ouputted figure,
 829 |         the second determining the vertical size. Defaults to (10,7).
 830 |     fontsize: int
 831 |         Font size for axes labels. Defaults to 14.
 832 |         
 833 |     Returns
 834 |     -------
 835 |     matplotlib.figure.Figure
 836 |         The resulting confusion matrix figure
 837 |     """
 838 |     if normalize:
 839 |         confusion_matrix = (confusion_matrix.astype('float') / confusion_matrix.sum(axis=1)[:, np.newaxis])*100
 840 |         print("Normalized confusion matrix")
 841 |     else:
 842 |         print('Confusion matrix, without normalization')
 843 |     
 844 |     df_cm = pd.DataFrame(
 845 |         confusion_matrix, index=class_names, columns=class_names, 
 846 |     )
 847 |     fig = plt.figure(figsize=figsize)
 848 |     fmt = '.2f' if normalize else 'd'
 849 |     #####set heatmap customization#####
 850 |     try:
 851 |         heatmap = sns.heatmap(df_cm, annot=True, fmt=fmt, cmap='GnBu', linewidths=.5, cbar=False, annot_kws={"size": 16})
 852 |     except ValueError:
 853 |         raise ValueError("Confusion matrix values must be integers.")
 854 |         
 855 |     heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize)
 856 |     heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
 857 |     plt.ylabel('True label', fontsize=16, fontname='sans-serif')
 858 |     plt.xlabel('Predicted label', fontsize=16, fontname='sans-serif')
 859 |     
 860 |     if filename != None:
 861 |         fig.savefig(filename + '.png', bbox_inches='tight') #store image as .png
 862 |     
 863 |     return fig
 864 | 
 865 | def data_wrangler(data_type, subject_id):
 866 |     """
 867 |     Function to return EEG data in format #trials*#channels*#samples.
 868 |     Also returns labels in the range 0 to n-1.
 869 |     """
 870 |     epoch = 4096
 871 |     if data_type == 'words':
 872 |         data, labels = load_subject_eeg(subject_id, vowels=False)
 873 |         n_chan = len(data)
 874 |         data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32)
 875 |         labels = labels.astype(np.int64)
 876 |     elif data_type == 'vowels':
 877 |         _, data, _, labels = load_subject_eeg(subject_id, vowels=True)
 878 |         n_chan = len(data)
 879 |         data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32)
 880 |         labels = labels.astype(np.int64)
 881 |     elif data_type == 'all_classes':
 882 |         w_data, v_data, w_labels, v_labels = load_subject_eeg(subject_id, vowels=True)
 883 |         n_chan = len(w_data)
 884 |         words = eeg_to_3d(w_data, epoch, int(w_data.shape[1] / epoch), n_chan).astype(np.float32)
 885 |         vowels = eeg_to_3d(v_data, epoch, int(v_data.shape[1] / epoch), n_chan).astype(np.float32)
 886 |         data = np.concatenate((words, vowels), axis=0)
 887 |         labels = np.concatenate((w_labels, v_labels), axis=0).astype(np.int64)
 888 |     
 889 |     x = lambda a: a * 1e6
 890 |     data = x(data)
 891 |     
 892 |     if data_type == 'words': # zero-index the labels
 893 |         labels[:] = [x - 6 for x in labels]
 894 |     elif (data_type == 'vowels' or data_type == 'all_classes'):
 895 |         labels[:] = [x - 1 for x in labels]
 896 | 
 897 |     return data, labels
 898 | 
 899 | 
 900 | def format_data(data_type, subject_id, epoch):
 901 |     """
 902 |     Returns data into format required for inputting to the CNNs.
 903 | 
 904 |     Parameters:
 905 |         data_type: str()
 906 |         subject_id: str()
 907 |         epoch: length of single trials, int
 908 |     """
 909 | 
 910 |     if data_type == 'words':
 911 |         data, labels = load_subject_eeg(subject_id, vowels=False)
 912 |         n_chan = len(data)
 913 |         data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32)
 914 |         labels = labels.astype(np.int64)
 915 |         labels[:] = [x - 6 for x in labels] # zero-index the labels
 916 |     elif data_type == 'vowels':
 917 |         _, data, _, labels = load_subject_eeg(subject_id, vowels=True)
 918 |         n_chan = len(data)
 919 |         data = eeg_to_3d(data, epoch, int(data.shape[1] / epoch), n_chan).astype(np.float32)
 920 |         labels = labels.astype(np.int64)
 921 |         labels[:] = [x - 1 for x in labels]
 922 |     elif data_type == 'all_classes':
 923 |         w_data, v_data, w_labels, v_labels = load_subject_eeg(subject_id, vowels=True)
 924 |         n_chan = len(w_data)
 925 |         words = eeg_to_3d(w_data, epoch, int(w_data.shape[1] / epoch), n_chan).astype(np.float32)
 926 |         vowels = eeg_to_3d(v_data, epoch, int(v_data.shape[1] / epoch), n_chan).astype(np.float32)
 927 |         data = np.concatenate((words, vowels), axis=0)
 928 |         labels = np.concatenate((w_labels, v_labels)).astype(np.int64)
 929 |         labels[:] = [x - 1 for x in labels]
 930 | 
 931 |     return data, labels
 932 | 
 933 | def current_loss(model_loss):
 934 |     """
 935 |     Returns the minimum validation loss from the 
 936 |     trained model
 937 |     """
 938 |     losses_list = []
 939 |     [losses_list.append(x) for x in model_loss]
 940 |     return np.min(np.array(losses_list))
 941 | 
 942 | def current_acc(model_acc):
 943 |     """
 944 |     Returns the maximum validation accuracy from the 
 945 |     trained model
 946 |     """
 947 |     accs_list = []
 948 |     [accs_list.append(x) for x in model_acc]
 949 |     return np.min(np.array(accs_list))
 950 | 
 951 | def balance_classes(data1,data2):
 952 | 
 953 |     if data1.shape[0] > data2.shape[0]:
 954 |         data1 = data1[:data2.shape[0],:,:]
 955 |     elif data1.shape[0] < data2.shape[0]:
 956 |         data2 = data2[:data1.shape[0],:,:]
 957 |         
 958 |     return data1, data2
 959 | 
 960 | def timer(orig_func):
 961 |     """
 962 |     decorator for logging time of function.
 963 |     """
 964 |     import time
 965 |     
 966 |     @wraps(orig_func)
 967 |     def wrapper(*args, **kwargs):
 968 |         t1 = time.time()
 969 |         result = orig_func(*args, *kwargs)
 970 |         t2 = time.time() - t1
 971 |         print(f"{orig_func.__name__} ran in: {round(t2,3)} seconds")
 972 |         return result
 973 |     
 974 |     return wrapper
 975 | 
 976 | def windows(trial_data, sub, window_size, overlap, fs):
 977 |     """
 978 |     Functon for obtaining classification windows for training.
 979 | 
 980 |     :param trial_data: EEG data - n_trials * n_chans * n_samples
 981 |     :param sub: subject object
 982 |     :param window_size: n number of samples
 983 |     :param overlap: n number of samples for overlap
 984 |     :param fs: sampling frequency
 985 |     :return: list containing data from each window
 986 |     """
 987 |     windows_list, index_list = [],[]
 988 |     n_windows = int(sub.epoch / window_size + np.floor((sub.epoch - overlap) / window_size))
 989 |     if n_windows == 0:
 990 |         n_windows = 1
 991 |     low_index = 0
 992 |     high_index = window_size
 993 |     for w in range(n_windows):
 994 |         data = trial_data[:, :, low_index:high_index]
 995 |         windows_list.append(data)
 996 |         index_list.append([low_index,high_index])
 997 |         low_index += overlap
 998 |         high_index += overlap
 999 | 
1000 |     return np.array(windows_list), index_list
1001 | 
1002 | def windows_index(epoch, window_size, overlap, fs):
1003 |     """
1004 |     Functon for obtaining classification windows for training.
1005 | 
1006 |     :param epoch: length of overal trial
1007 |     :param window_size: n number of samples
1008 |     :param overlap: n number of samples for overlap
1009 |     :param fs: sampling frequency
1010 |     :return: list containing data from each window
1011 |     """
1012 |     index_list = []
1013 |     n_windows = int(epoch / window_size + np.floor((epoch - overlap) / window_size))
1014 |     if n_windows == 0:
1015 |         n_windows = 1
1016 |     low_index = 0
1017 |     high_index = window_size
1018 |     for w in range(n_windows):
1019 |         index_list.append((low_index,high_index))
1020 |         low_index += overlap
1021 |         high_index += overlap
1022 | 
1023 |     return index_list
1024 | 
1025 | def get_class_labels(paradigm):
1026 |     """
1027 |     Function for obtaining class labels from paradigm description
1028 |     :param paradigm: string format: 'EEG_semantics_text'
1029 |     :return:
1030 |     """
1031 |     paradigm = paradigm.split('_')[1]
1032 |     if paradigm == 'semantics':
1033 |         class_labels = ['pig', 'dog', 'car', 'bus']
1034 |     elif paradigm == 'action':
1035 |         class_labels = ['kick', 'jump', 'chew', 'blink']
1036 |     elif paradigm == 'twoword':
1037 |         class_labels = ['red ball', 'blue hat', 'red blue', 'ball hat']
1038 |     elif paradigm == 'concrete':
1039 |         class_labels = ['apple', 'tiger', 'fruit', 'animal']
1040 |     return class_labels
1041 | 
1042 | def misclass_to_class(column):
1043 |     return 1 - column
1044 | 
1045 | def get_model_loss_and_acc(fold_models):
1046 |      """
1047 |      Function for extracting epoch-by-epoch model loss and accuracy scores from
1048 |      models associated with multiple cross-validation folds
1049 |      :param fold_models: list of Braindecode (PyTorch) sequential models
1050 |      :return: train_loss: (pandas.series) main training loss per epoch across folds
1051 |               valid_loss: (pandas.series) main tvalidation loss per epoch across folds
1052 |               test_loss: (pandas.series) main testing loss per epoch across folds
1053 |               train_acc: (pandas.series) main training acc per epoch across folds
1054 |               valid_acc: (pandas.series) main tvalidation acc per epoch across folds
1055 |               test_acc: (pandas.series) main testing acc per epoch across folds
1056 |      """
1057 |      train_loss = dict()
1058 |      valid_loss = dict()
1059 |      test_loss = dict()
1060 |      train_acc = dict()
1061 |      valid_acc = dict()
1062 |      test_acc = dict()
1063 | 
1064 |      for i, model in enumerate(fold_models):
1065 |         train_loss[i] = model.epochs_df['train_loss']
1066 |         valid_loss[i] = model.epochs_df['valid_loss']
1067 |         test_loss[i] = model.epochs_df['test_loss']
1068 |         train_acc[i] =  model.epochs_df['train_misclass']
1069 |         valid_acc[i] = model.epochs_df['valid_misclass']
1070 |         test_acc[i] = model.epochs_df['test_misclass']
1071 | 
1072 |      train_loss = pd.DataFrame(train_loss)
1073 |      valid_loss = pd.DataFrame(valid_loss)
1074 |      test_loss = pd.DataFrame(test_loss)
1075 |      train_loss = train_loss.mean(axis=1, skipna=True)
1076 |      valid_loss = valid_loss.mean(axis=1, skipna=True)
1077 |      test_loss = test_loss.mean(axis=1, skipna=True)
1078 | 
1079 |      train_acc = pd.DataFrame(train_acc).apply(lambda x : misclass_to_class(x)) # function converts misclass to classification accuracy
1080 |      valid_acc = pd.DataFrame(valid_acc).apply(lambda x : misclass_to_class(x))
1081 |      test_acc = pd.DataFrame(test_acc).apply(lambda x : misclass_to_class(x))
1082 |      train_acc = train_acc.mean(axis=1, skipna=True)
1083 |      valid_acc = valid_acc.mean(axis=1, skipna=True)
1084 |      test_acc = test_acc.mean(axis=1, skipna=True)
1085 | 
1086 |      return train_loss, valid_loss, test_loss, train_acc, valid_acc, test_acc
1087 | 
1088 | 
1089 | def labels_dict_and_list(classes):
1090 |     """
1091 |     input: empty pandas DataFrame with column headings
1092 |            corresponding to class labels
1093 |     output: labels_dict (dict): key=number, value=string
1094 |             key_list (list): list of classes
1095 |     """
1096 | 
1097 |     labels_dict = dict()
1098 |     key_list = []
1099 |     for n, label in enumerate(classes.columns):
1100 |         labels_dict[str(n + 1)] = label
1101 | 
1102 |     for key in labels_dict:
1103 |         key_list.append(key)
1104 |     return labels_dict, key_list
1105 | 
1106 | def data_loader(directory, subj, session, category, *args):
1107 |     """
1108 | 
1109 |     :param directory: (str) directory of stored data
1110 |     :param subj: (str) Subject Identity, e.g. '01'
1111 |     :param session: (int) Session Identity
1112 |     :param category: (str) Experimental paradigm, e.g. "actionText"
1113 |     :param args: (str) modalities of data
1114 |     :return: list of tuples containing data and labels
1115 |     """
1116 |     data = []
1117 |     for arg in args:
1118 |         filename = f"classifierData/{category}_{arg}_CLF"
1119 |         subj_object = subjects.Subject.load_subject(f"filename.pickle") #load_subject(directory, subj, session, filename)
1120 |         #print(subj_object['data1'])
1121 |         data.append((subj_object.data1.astype(np.float32), subj_object.labels1.astype(np.int64)))
1122 |     return data
1123 | 
1124 | def load_subject(direct, subject, session, filename):
1125 |     f_name = f"{direct}/S{subject}/Session_{session}/{filename}.pickle"
1126 |     with open(f_name, 'rb') as f:
1127 |         return pickle.load(f)


--------------------------------------------------------------------------------