├── tools ├── __init__.py ├── decodingTRF.py ├── eeg_utils.py ├── decodingSSR.py ├── behavior.py └── audio.py ├── envAM.png ├── analyses_TRF.m ├── behavior.Rmd ├── analyses_behavior.ipynb ├── preprocessing.ipynb └── readme.md /tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /envAM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hadrienj/decodingEEG/master/envAM.png -------------------------------------------------------------------------------- /analyses_TRF.m: -------------------------------------------------------------------------------- 1 | fs = 64; 2 | map = -1; 3 | tmin = -50; 4 | tmax = 300; 5 | lambda = [0.00000001]; 6 | 7 | eeg = hdf5read('data_p1.h5', 'eeg_TRF'); 8 | 9 | att = hdf5read('data_p1.h5', 'envAttended'); 10 | 11 | % 80 trials 12 | for i = 1:80 13 | eeg_set{1, i} = eeg(:, :, i)'; 14 | att_set{1, i} = att(:, i)*2; 15 | end 16 | 17 | 18 | [r_att, p_att, ~, pred_att, model_att] = mTRFcrossval(att_set, eeg_set, fs, map, tmin, tmax, lambda); 19 | 20 | 21 | % convert to h5 22 | hdf5write('reconstructed_p1.h5', 'reconstructed', pred_att); 23 | -------------------------------------------------------------------------------- /behavior.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "behavior" 3 | author: "hj" 4 | date: "29/06/2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | 10 | library(ggplot2) 11 | library(dplyr) 12 | 13 | cols <- c('#36BBB8', '#FF54A4', '#0E78D5') 14 | 15 | 16 | simpleTheme <- theme(panel.grid.major.y = element_blank(), 17 | strip.text = element_text(size=12), 18 | axis.line = element_line(color='grey60', size=1), 19 | axis.ticks = element_line(color = "grey60", size = 1), 20 | axis.title = element_text(color = "grey10", size=36), 21 | axis.text = element_text(color = "grey30", size=26), 22 | legend.text = element_text(color = "grey30", size=26), 23 | panel.background = element_blank(), 24 | legend.key = element_blank()) 25 | 26 | 27 | data <-read.csv('behavior.csv') 28 | 29 | data$TC <- ifelse(data$TC == 'False', 0, 1) 30 | data$twoStreams <- ifelse(data$twoStreams == 'False', 0, 1) 31 | 32 | data$TC <- factor(data$TC, labels=c('Absent', 'Present')) 33 | 34 | 35 | ``` 36 | 37 | 38 | ```{r, results='hide',fig.keep='all',fig.height=12, fig.width=12} 39 | 40 | effect_TC <- data %>% 41 | filter(twoStreams==0) %>% 42 | group_by(TC, name) %>% 43 | summarise(mean1=mean(dprime)) %>% 44 | ungroup() %>% 45 | group_by(TC) %>% 46 | summarise(mean=mean(mean1), 47 | N=n(), 48 | se=sd(mean1)/sqrt(N)) %>% 49 | ggplot(data=., aes(x=TC, y=mean)) + 50 | geom_bar(stat = 'identity', width = 0.5, position = position_dodge(width=0.1), fill='grey50') + 51 | geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=0.05) + 52 | xlab('Tone cloud') + 53 | ylab('D-prime') + 54 | simpleTheme 55 | 56 | ggsave('images/effect_TC.pdf', effect_TC) 57 | 58 | exp2_dprime <- data %>% 59 | filter(twoStreams==1) %>% 60 | group_by(name) %>% 61 | summarise(mean1=mean(dprime)) %>% 62 | ungroup() %>% 63 | group_by(TC) %>% 64 | summarise(mean=mean(mean1), 65 | N=n(), 66 | se=sd(mean1)/sqrt(N)) %>% 67 | ggplot(data=., aes(x=TC, y=mean)) + 68 | geom_bar(stat = 'identity', width = 0.5, position = position_dodge(width=0.1)) + 69 | geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=0.05) + 70 | xlab('Tone cloud') + 71 | ylab('D-prime') + 72 | simpleTheme 73 | 74 | ggsave('images/exp2_dprime.pdf', exp2_dprime) 75 | ``` 76 | 77 | -------------------------------------------------------------------------------- /tools/decodingTRF.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def calculateCorr(env1, env2, fs, end=None): 4 | """ 5 | Get correlations between env1 and env2 for each trials. 6 | 7 | Parameters 8 | ---------- 9 | env1 : array-type 10 | First list of envelope of shape (trial, time). 11 | env2 : array-type 12 | Second list of envelope of shape (trial, time). 13 | fs : float 14 | Sampling frequency in Hz. 15 | end : float 16 | End limit in seconds to take for each trial. 17 | 18 | Returns: 19 | 20 | corrs : array-type 21 | List of correlations of shape (trial, 1). 22 | """ 23 | if (env1.shape[0] != env2.shape[0] or env1.shape[1] != env2.shape[1]): 24 | raise ValueError("Shapes of the envelopes have to be identical\ 25 | but they are: %s and %s" % (env1.shape, env2.shape)) 26 | 27 | if end is None: 28 | end = env1.shape[1] 29 | else: 30 | end = int(np.round(fs*end)) 31 | 32 | 33 | trials = env1.shape[0] 34 | corrs = np.zeros(trials) 35 | for trial in range(trials): 36 | corrs[trial] = np.corrcoef(env1[trial, :end], 37 | env2[trial, :end])[0, 1] 38 | 39 | return corrs 40 | 41 | def getTRFAccuracyByDur(envAttended, envUnattended, envMismatch, envReconstructed, trials, trialsDualStream): 42 | """ 43 | Get the classification accuracy according to duration of trials and trials used. 44 | 45 | Parameters 46 | ---------- 47 | envAttended : instance of numpy.array 48 | Attended envelopes. Shape (trial, time). 49 | envUnattended : instance of numpy.array 50 | Unattended envelopes. Shape (trial, time). 51 | envMismatch : instance of numpy.array 52 | Mismatch envelopes (corresponding to another trial). Shape (trial, time). 53 | envReconstructed : instance of numpy.array 54 | Reconstructed envelopes. Shape (trial, time). 55 | trials : array-type 56 | Trials to consider. 57 | trialsDualStream : array-like 58 | Trials to consider in the exp 2 referential (attended vs unattended with 59 | only 40 trials) 60 | 61 | Returns 62 | ------- 63 | classifMismatchTime : array-type 64 | List of classification accuracies (one value per second) for attended versus 65 | mismatch stream. 66 | classifAtt_unattTime : array-type 67 | List of classification accuracies (one value per second) for attended versus 68 | unattended stream (only trials included in the dual stream part). 69 | """ 70 | # trialsUnattended = getUnattendedTrialsNumber(trials) 71 | # envUnattendedTrials = envUnattended[trialsUnattended] 72 | classifMismatchTime = [] 73 | classifAtt_unattTime = [] 74 | testAll = [] 75 | # print 'trialsUnattended', trialsUnattended 76 | for i in range(0, 61): 77 | # Calculate all correlations without taking trials into account 78 | corrsAttended = calculateCorr(envAttended, envReconstructed, 79 | fs=64, end=i) 80 | 81 | corrsMismatch = calculateCorr(envMismatch, envReconstructed, 82 | fs=64, end=i) 83 | 84 | corrsUnattendedDualStream = calculateCorr(envUnattended, 85 | envReconstructed[trialsDualStream], fs=64, end=i) 86 | # print 'trialsDualStream', trialsDualStream 87 | # print 'corrsUnattendedDualStream', np.mean(corrsUnattendedDualStream) 88 | # Calculate the classification accuracy by selecting the trials to be used 89 | # Since the first trial dual streams is the number 40 we had to add 40 to the 90 | # trial number from the unattended part to the attended part 91 | classifMismatch = np.mean(corrsAttended[trials]>corrsMismatch[trials]) 92 | classifMismatchTime.append(classifMismatch) 93 | 94 | classifAtt_unatt = np.mean(corrsAttended[trialsDualStream]>corrsUnattendedDualStream) 95 | classifAtt_unattTime.append(classifAtt_unatt) 96 | 97 | test = np.mean(corrsUnattendedDualStream>corrsMismatch[trialsDualStream]) 98 | testAll.append(test) 99 | 100 | return classifMismatchTime, classifAtt_unattTime, testAll 101 | 102 | def getUnattendedTrialsNumber(trials): 103 | """ 104 | Get the trials for the condition dual streams. Since this condition was only from 105 | trial 40 to 80 these trials not all trials have to be considered. 106 | 107 | Parameters 108 | ---------- 109 | trials : array-type 110 | Trials to consider. 111 | Returns: 112 | 113 | newEnv : array-type 114 | Envelope of the selected trials of shape (trials, time). 115 | """ 116 | trialsUnattendedAll = np.arange(40, 80) 117 | trialsUnattendedIdx = np.isin(trials, trialsUnattendedAll) 118 | trialsUnattended = trials[trialsUnattendedIdx] - 40 119 | 120 | return trialsUnattended 121 | 122 | -------------------------------------------------------------------------------- /tools/eeg_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from scipy import signal 4 | from eeg import loadEEG, getEvents, chebyBandpassFilter, refToMastoids,\ 5 | create3DMatrix, getTrialNumList, refToAverageNP 6 | from behavior import getBehaviorData 7 | import h5py 8 | 9 | def processEEG(fnEEG, dbName, sessionNums, trialsToRemove, trialBehavior, fs, ref): 10 | """ 11 | Load and process EEG from .bdf file. The data is filtered according to 12 | `freqFilter`, re-referenced according to the mastoids and downsampled 13 | to 64 Hz if `downsampling` is set to True. 14 | 15 | Parameters 16 | ---------- 17 | fn : str 18 | Name of the bdf containing the EEG data. 19 | dbName : str 20 | Name of the database on the couch instance. 21 | sessionNums : array-type 22 | List of sessions to keep. 23 | trialsToRemove : array-type 24 | List of trials to remove from the analyses. 25 | ref : str 26 | Choose between referencing to mastoids ('mastoids') or to the average 27 | of all electrodes ('average'). 28 | fs : float 29 | Sampling frequency in Hz. 30 | 31 | Returns 32 | ------- 33 | dataFilt3D64 : instance of numpy.array 34 | A matrix of shape (trial, time, electrode) containing the processed data. 35 | """ 36 | if ref != 'average' and ref != 'mastoids': 37 | raise ValueError('Bad `ref` argument!') 38 | 39 | # Loading 40 | raw = loadEEG(fnEEG) 41 | print raw.ch_names[:64] 42 | data = raw[:, :][0].T 43 | 44 | # Get triggers 45 | trigs = getEvents(raw=raw, eventCode=65282, shortest_event=1) 46 | # Some triggers have been sent but the trial not done due to experimental errors 47 | # Let's remove these trials in the EEG data 48 | newTrigs = trigs.drop(trigs.index[trialsToRemove]).reset_index(drop=True) 49 | 50 | # Filtering 51 | zpk, dataFiltTRF = chebyBandpassFilter(data, [0.5, 1, 14.5, 15], gstop=80, gpass=1, 52 | fs=fs) 53 | zpk, dataFiltSSR = chebyBandpassFilter(data, [0.5, 1, 100, 101], gstop=80, gpass=1, 54 | fs=fs) 55 | 56 | del data 57 | 58 | if ref=='mastoids': 59 | dataFiltTRF = pd.DataFrame(dataFiltTRF, columns=raw.ch_names) 60 | dataFiltSSR = pd.DataFrame(dataFiltSSR, columns=raw.ch_names) 61 | # Re-referencing 62 | dataFiltRefTRF = refToMastoids(dataFiltTRF, 63 | dataFiltTRF['M1'], 64 | dataFiltTRF['M2']).iloc[:, :64] 65 | del dataFiltTRF 66 | # Re-referencing 67 | dataFiltRefSSR = refToMastoids(dataFiltSSR, 68 | dataFiltSSR['M1'], 69 | dataFiltSSR['M2']).iloc[:, :64] 70 | del dataFiltSSR 71 | elif ref=='average': 72 | # Re-referencing 73 | dataFiltRefTRF = refToAverageNP(dataFiltTRF[:, :64]) 74 | del dataFiltTRF 75 | # Re-referencing 76 | dataFiltRefSSR = refToAverageNP(dataFiltSSR[:, :64]) 77 | del dataFiltSSR 78 | 79 | dataFiltRefTRF = pd.DataFrame(dataFiltRefTRF, columns=raw.ch_names[:64]) 80 | dataFiltRefSSR = pd.DataFrame(dataFiltRefSSR, columns=raw.ch_names[:64]) 81 | 82 | trialDur = 60 83 | # Changing shape to 3D matrix 84 | # Choose the length according to the number of sample in the sound files 85 | dataFilt3DTRF = create3DMatrix(data=dataFiltRefTRF, trialTable=trialBehavior, 86 | events=newTrigs, trialList=getTrialNumList(trialBehavior), 87 | trialDur=trialDur, fs=fs, normalize=False, baselineDur=0) 88 | del dataFiltRefTRF 89 | dataFilt3DSSR = create3DMatrix(data=dataFiltRefSSR, trialTable=trialBehavior, 90 | events=newTrigs, trialList=getTrialNumList(trialBehavior), 91 | trialDur=trialDur, fs=fs, normalize=False, baselineDur=0) 92 | 93 | del dataFiltRefSSR 94 | 95 | # Remove the first two seconds to avoid bias since in some trials one 96 | # stream starts 2 seconds before the other 97 | start = int(np.round(2*fs)) 98 | # Remove last two seconds that should be less reliable 99 | end = start + int(np.round((trialDur - 2)*fs)) 100 | 101 | dataFilt3DTRF = dataFilt3DTRF[:, start:end, :] 102 | dataFilt3DSSR = dataFilt3DSSR[:, start:end, :] 103 | 104 | # Downsampling 105 | dataFilt3DTRF64 = signal.decimate(dataFilt3DTRF, q=8, axis=1, zero_phase=True) 106 | 107 | return dataFilt3DTRF64, dataFilt3DSSR 108 | 109 | def loadDataH5(path, pathReconstructed): 110 | """ 111 | Load data from .h5 file. This expects to load one file containing the EEG 112 | and the envelopes of the stimuli and another file the reconstructed 113 | envelope created from Matlab. 114 | 115 | Parameters 116 | ---------- 117 | path : str 118 | Path to the `.h5` file containing EEG and stimuli envelopes. 119 | pathReconstructed : str 120 | Path to the `.h5` file containing the reconstructed envelopes. 121 | 122 | Returns 123 | ------- 124 | eeg_TRF : instance of numpy.array 125 | A matrix of shape (trial, time, electrode) containing the data processed 126 | for the TRF. 127 | eeg_TRF : instance of numpy.array 128 | to do. 129 | envMismatch : instance of numpy.array 130 | to do. 131 | envUnattended : instance of numpy.array 132 | to do. 133 | envReconstructed : instance of numpy.array 134 | to do. 135 | eeg_aSSR : instance of numpy.array 136 | to do. 137 | """ 138 | f1 = h5py.File(path, 'r') 139 | eeg_TRF = np.array(list(f1['eeg_TRF'])) 140 | eeg_aSSR = np.array(list(f1['eeg_aSSR'])) 141 | envAttended = np.array(list(f1['envAttended'])) 142 | envUnattended = np.array(list(f1['envUnattended'])) 143 | f1.close() 144 | 145 | f2 = h5py.File(pathReconstructed, 'r') 146 | envReconstructed = np.array(list(f2['reconstructed'])) 147 | f2.close() 148 | 149 | # Roll trials to create mismatch envelope: 150 | envMismatch = np.roll(envAttended, 1, axis=0) 151 | 152 | return eeg_TRF, envAttended, envMismatch, envUnattended, envReconstructed, eeg_aSSR 153 | 154 | -------------------------------------------------------------------------------- /analyses_behavior.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "hideCode": true, 8 | "hidePrompt": true 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "import numpy as np\n", 13 | "import pandas as pd\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "\n", 17 | "from sklearn import svm\n", 18 | "from sklearn.model_selection import train_test_split\n", 19 | "\n", 20 | "import sys\n", 21 | "sys.path.append(\"/Users/lsp/Documents/projets/en_cours/eeg/eeg\")\n", 22 | "from eeg import create3DMatrix, loadEEG, getEvents, getTrialNumList, plotFFTElectrodes, plot3DMatrix, computePickEnergy\n", 23 | "\n", 24 | "sys.path.append(\"/Users/lsp/Documents/projets/en_cours/eeg/dss\")\n", 25 | "from dss import crossValidateDSS, createBiasedSignal, covUnnorm, computeDSS, applyDSS\n", 26 | "\n", 27 | "from tools.eeg_utils import processEEG, loadDataH5\n", 28 | "from tools.audio import getAttendedAndUnattendedEnv, butterLpass\n", 29 | "from tools.behavior import getBehaviorData, analyses, checkLinkTrialsBehaviorEEG\n", 30 | "from tools.decodingTRF import getTRFAccuracyByDur, getUnattendedTrialsNumber, calculateCorr\n", 31 | "from tools.decodingSSR import calculateBaseline, comparePicks, getSSRAccuracyByDur, crossVal\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "hideCode": true, 39 | "hidePrompt": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "%%javascript\n", 44 | "IPython.OutputArea.prototype._should_scroll = function(lines) {\n", 45 | " return false;\n", 46 | "}" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "hideCode": true, 54 | "hidePrompt": true 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "%load_ext autoreload\n", 59 | "%autoreload 2" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "hideCode": true, 67 | "hidePrompt": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "%matplotlib notebook" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "hideCode": true, 79 | "hidePrompt": true 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "# Plot parameters\n", 84 | "plt.style.use('ggplot')\n", 85 | "plt.rcParams['axes.facecolor']='w'\n", 86 | "\n", 87 | "%pylab inline\n", 88 | "pylab.rcParams['figure.figsize'] = (18, 10)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "# Behavioral analyses" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 7, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "loading session 1\n", 108 | "loading session 2\n", 109 | "loading session 3\n", 110 | "loading session 4\n", 111 | "loading session 5\n", 112 | "loading session 6\n", 113 | "loading session 7\n", 114 | "loading session 8\n", 115 | "loading session 1\n", 116 | "loading session 2\n", 117 | "loading session 3\n", 118 | "loading session 4\n", 119 | "loading session 5\n", 120 | "loading session 6\n", 121 | "loading session 7\n", 122 | "loading session 8\n", 123 | "loading session 1\n", 124 | "loading session 2\n", 125 | "loading session 3\n", 126 | "loading session 4\n", 127 | "loading session 5\n", 128 | "loading session 6\n", 129 | "loading session 7\n", 130 | "loading session 8\n", 131 | "loading session 1\n", 132 | "loading session 2\n", 133 | "loading session 3\n", 134 | "loading session 4\n", 135 | "loading session 5\n", 136 | "loading session 6\n", 137 | "loading session 7\n", 138 | "loading session 8\n" 139 | ] 140 | } 141 | ], 142 | "source": [ 143 | "# Get behavior data\n", 144 | "trialBehavior_p1 = getBehaviorData(dbName='hijee_18_06', sessionNums=np.arange(1, 9))\n", 145 | "trialBehavior_p2 = getBehaviorData(dbName='thomas_20_06', sessionNums=np.arange(1, 9))\n", 146 | "trialBehavior_p3 = getBehaviorData(dbName='isaac_21_06', sessionNums=np.arange(1, 9))\n", 147 | "trialBehavior_p4 = getBehaviorData(dbName='thomas_22_06', sessionNums=np.arange(1, 9))" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 66, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "subjs = [trialBehavior_p1, trialBehavior_p2, trialBehavior_p3, trialBehavior_p4]\n", 157 | "allPerf = pd.DataFrame()\n", 158 | "for i in range(len(subjs)):\n", 159 | " performances = analyses(subjs[i], verbose=False)\n", 160 | " performances['name'] = i\n", 161 | " allPerf = allPerf.append(performances, ignore_index=True)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 67, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "allPerf.to_csv('behavior.csv')" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "The analyses have been done in R from this csv file. See the file `behavior.Rmd`." 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [] 186 | } 187 | ], 188 | "metadata": { 189 | "kernelspec": { 190 | "display_name": "Python 2", 191 | "language": "python", 192 | "name": "python2" 193 | }, 194 | "language_info": { 195 | "codemirror_mode": { 196 | "name": "ipython", 197 | "version": 2 198 | }, 199 | "file_extension": ".py", 200 | "mimetype": "text/x-python", 201 | "name": "python", 202 | "nbconvert_exporter": "python", 203 | "pygments_lexer": "ipython2", 204 | "version": "2.7.10" 205 | }, 206 | "varInspector": { 207 | "cols": { 208 | "lenName": 16, 209 | "lenType": 16, 210 | "lenVar": 40 211 | }, 212 | "kernels_config": { 213 | "python": { 214 | "delete_cmd_postfix": "", 215 | "delete_cmd_prefix": "del ", 216 | "library": "var_list.py", 217 | "varRefreshCmd": "print(var_dic_list())" 218 | }, 219 | "r": { 220 | "delete_cmd_postfix": ") ", 221 | "delete_cmd_prefix": "rm(", 222 | "library": "var_list.r", 223 | "varRefreshCmd": "cat(var_dic_list()) " 224 | } 225 | }, 226 | "types_to_exclude": [ 227 | "module", 228 | "function", 229 | "builtin_function_or_method", 230 | "instance", 231 | "_Feature" 232 | ], 233 | "window_display": false 234 | } 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 2 238 | } 239 | -------------------------------------------------------------------------------- /preprocessing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "hideCode": true, 8 | "hidePrompt": true 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "import numpy as np\n", 13 | "import pandas as pd\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import h5py\n", 16 | "\n", 17 | "import sys\n", 18 | "sys.path.append(\"/Users/lsp/Documents/projets/en_cours/eeg/eeg\")\n", 19 | "from eeg import create3DMatrix, loadEEG, getEvents, getTrialNumList, plotFFTElectrodes, plot3DMatrix, computePickEnergy\n", 20 | "\n", 21 | "sys.path.append(\"/Users/lsp/Documents/projets/en_cours/eeg/dss\")\n", 22 | "from dss import crossValidateDSS, createBiasedSignal\n", 23 | "\n", 24 | "from tools.eeg_utils import processEEG\n", 25 | "from tools.audio import getAttendedAndUnattendedEnv\n", 26 | "from tools.behavior import getBehaviorData, analyses, checkLinkTrialsBehaviorEEG\n", 27 | "from tools.decodingTRF import getTRFAccuracyByDur, getUnattendedTrialsNumber, calculateCorr\n", 28 | "from tools.decodingSSR import calculateBaseline, comparePicks, getSSRAccuracyByDur, crossVal\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "hideCode": true, 36 | "hidePrompt": true 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "%%javascript\n", 41 | "IPython.OutputArea.prototype._should_scroll = function(lines) {\n", 42 | " return false;\n", 43 | "}" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "hideCode": true, 51 | "hidePrompt": true 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "%load_ext autoreload\n", 56 | "%autoreload 2" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": { 63 | "hideCode": true, 64 | "hidePrompt": true 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "%matplotlib notebook" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "hideCode": true, 76 | "hidePrompt": true 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "# Plot parameters\n", 81 | "plt.style.use('ggplot')\n", 82 | "plt.rcParams['axes.facecolor']='w'\n", 83 | "\n", 84 | "%pylab inline\n", 85 | "pylab.rcParams['figure.figsize'] = (18, 10)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "# Preprocessing\n", 93 | "\n", 94 | "The goal of this notebook is to preprocess the raw data in order to create a file for each participant containing:\n", 95 | "\n", 96 | "- The EEG data filtered between 1 and 15 Hz and downsampled to 64 Hz for the stimulus reconstruction (TRF). The shape of this matrix is (trial, time, electrode).\n", 97 | "- The EEG data filtered between 1 and 100 Hz for the auditory steady-state response analysis (aSSR). The shape of this matrix is (trial, time, electrode).\n", 98 | "- The envelopes of the attended stimuli. The shape of this matrix is (trial, time).\n", 99 | "- The envelopes of the unattended stimuli. The shape of this matrix is (trial, time)." 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "hideCode": true, 107 | "hidePrompt": true 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "def processingEEG(fnEEG, dbName, sessionNums, trialsToRemove, fs):\n", 112 | " password = \"a\"\n", 113 | " dbAddress = \"https://db.auditory.fr:6984/\"\n", 114 | " \n", 115 | " # Get behavior data\n", 116 | " trialBehavior = getBehaviorData(dbName, sessionNums)\n", 117 | " \n", 118 | " # Processing for TRF and SSR\n", 119 | " dataFilt3DTRF64, dataFilt3DSSR = processEEG(fnEEG, dbName, sessionNums,\n", 120 | " trialsToRemove, trialBehavior,\n", 121 | " fs, ref='average')\n", 122 | " # Get the envelopes for TRF\n", 123 | " envAttended, envUnattended = getAttendedAndUnattendedEnv(dbAddress, dbName,\n", 124 | " password, verbose=True,\n", 125 | " fs=48000.)\n", 126 | "\n", 127 | " # Get the minimum length to have all matrices the same length for TRF\n", 128 | " minLen = np.min([envAttended.shape[1], envUnattended.shape[1], dataFilt3DTRF64.shape[1]])\n", 129 | " \n", 130 | " dataFilt3DTRF64 = dataFilt3DTRF64[:, :minLen, :]\n", 131 | " envAttended = envAttended[:, :minLen]\n", 132 | " envUnattended = envUnattended[:, :minLen]\n", 133 | " \n", 134 | " # Export eeg data and attended envelope to do the TRF in Matlab and eeg for SSR (caching)\n", 135 | " with h5py.File('data_preproc/data_%s1.h5' % dbName, 'w') as hf:\n", 136 | " hf.create_dataset(\"eeg_TRF\", data=dataFilt3DTRF64)\n", 137 | " hf.create_dataset(\"eeg_aSSR\", data=dataFilt3DSSR)\n", 138 | " hf.create_dataset(\"envAttended\", data=envAttended)\n", 139 | " hf.create_dataset(\"envUnattended\", data=envUnattended)\n", 140 | " \n", 141 | " return dataFilt3DTRF64, envAttended, envUnattended, dataFilt3DSSR, trialBehavior" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "We used the function checkLinkTrialsBehaviorEEG (see package doc [here]()) to check the coherence between behavior and EEG. The triggers have to be updated:\n", 149 | "\n", 150 | "Remove the following trials:\n", 151 | "\n", 152 | "- hijee_18_06: 10\n", 153 | "- thomas_20_06_18: nothing\n", 154 | "- isaac_21_06: nothing\n", 155 | "- thomas_s_22_06_18: nothing" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "# Preprocess data for each participant" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": { 169 | "hideCode": true, 170 | "hidePrompt": true 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "dataFilt3DTRF64_p1, envAttended_p1, envUnattended_p1, dataFilt3DSSR_p1, trialBehavior_p1 = processingEEG(fnEEG='data_raw/p1.bdf',\n", 175 | " dbName=\"hijee_18_06\",\n", 176 | " sessionNums=np.arange(1, 9),\n", 177 | " trialsToRemove=[10],\n", 178 | " fs=512.)\n" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": { 185 | "hideCode": true, 186 | "hidePrompt": true 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "dataFilt3DTRF64_p2, envAttended_p2, envUnattended_p2, dataFilt3DSSR_p2, trialBehavior_p2 = processingEEG(fnEEG='data_raw/p2.bdf',\n", 191 | " dbName=\"thomas_20_06\",\n", 192 | " sessionNums=np.arange(1, 9),\n", 193 | " trialsToRemove=[],\n", 194 | " fs=512.)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "hideCode": true, 202 | "hidePrompt": true 203 | }, 204 | "outputs": [], 205 | "source": [ 206 | "dataFilt3DTRF64_p3, envAttended_p3, envUnattended_p3, dataFilt3DSSR_p3, trialBehavior_p3 = processingEEG(fnEEG='data_raw/p3.bdf',\n", 207 | " dbName=\"isaac_21_06\",\n", 208 | " sessionNums=np.arange(1, 9),\n", 209 | " trialsToRemove=[],\n", 210 | " fs=512.)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": { 217 | "hideCode": true, 218 | "hidePrompt": true 219 | }, 220 | "outputs": [], 221 | "source": [ 222 | "dataFilt3DTRF64_p4, envAttended_p4, envUnattended_p4, dataFilt3DSSR_p4, trialBehavior_p4 = processingEEG(fnEEG='data_raw/p4.bdf',\n", 223 | " dbName=\"thomas_22_06\",\n", 224 | " sessionNums=np.arange(1, 9),\n", 225 | " trialsToRemove=[],\n", 226 | " fs=512.)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [] 235 | } 236 | ], 237 | "metadata": { 238 | "kernelspec": { 239 | "display_name": "Python 2", 240 | "language": "python", 241 | "name": "python2" 242 | }, 243 | "language_info": { 244 | "codemirror_mode": { 245 | "name": "ipython", 246 | "version": 2 247 | }, 248 | "file_extension": ".py", 249 | "mimetype": "text/x-python", 250 | "name": "python", 251 | "nbconvert_exporter": "python", 252 | "pygments_lexer": "ipython2", 253 | "version": "2.7.10" 254 | }, 255 | "varInspector": { 256 | "cols": { 257 | "lenName": 16, 258 | "lenType": 16, 259 | "lenVar": 40 260 | }, 261 | "kernels_config": { 262 | "python": { 263 | "delete_cmd_postfix": "", 264 | "delete_cmd_prefix": "del ", 265 | "library": "var_list.py", 266 | "varRefreshCmd": "print(var_dic_list())" 267 | }, 268 | "r": { 269 | "delete_cmd_postfix": ") ", 270 | "delete_cmd_prefix": "rm(", 271 | "library": "var_list.r", 272 | "varRefreshCmd": "cat(var_dic_list()) " 273 | } 274 | }, 275 | "types_to_exclude": [ 276 | "module", 277 | "function", 278 | "builtin_function_or_method", 279 | "instance", 280 | "_Feature" 281 | ], 282 | "window_display": false 283 | } 284 | }, 285 | "nbformat": 4, 286 | "nbformat_minor": 2 287 | } 288 | -------------------------------------------------------------------------------- /tools/decodingSSR.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from eeg import computePickEnergy 4 | from behavior import analyses 5 | 6 | from sklearn import svm 7 | from sklearn.model_selection import train_test_split 8 | 9 | def calculateBaseline(data, fs): 10 | """ 11 | Calculate the baseline in order to take into account the fact that the eeg 12 | response can be different for the two AM rates. This functions computes 13 | the ratio between the AM rates in the one stream condition. 14 | 15 | Parameters 16 | ---------- 17 | eegData : instance of numpy.array 18 | EEG data of shape (trial, time, electrode). 19 | 20 | Returns 21 | ------- 22 | ratio : float 23 | Ratio between the 36 Hz stream and the 44 Hz stream. 24 | """ 25 | # Take only trials from 1 stream condition 26 | data36 = data[:10, :, :] 27 | data44 = data[20:30, :, :] 28 | # Average trials time domain 29 | data36MeanTrial = data36.mean(axis=0) 30 | data44MeanTrial = data44.mean(axis=0) 31 | 32 | # Compute picks for both streams 33 | pick36 = computePickEnergy(data36MeanTrial, pickFreq=36, showPlot=False, fs=fs) 34 | pick44 = computePickEnergy(data44MeanTrial, pickFreq=44, showPlot=False, fs=fs) 35 | # Average electrodes 36 | pick36Mean = np.mean(pick36) 37 | pick44Mean = np.mean(pick44) 38 | # Calculate baseline 39 | baseline = pick36Mean/pick44Mean 40 | return baseline 41 | 42 | def comparePicks(data, fs): 43 | """ 44 | Calculate the baseline in order to take into account the fact that the eeg 45 | response can be different for the two AM rates. This functions computes 46 | the ratio between the AM rates in the one stream condition. 47 | 48 | Parameters 49 | ---------- 50 | eegData : instance of numpy.array 51 | EEG data of shape (time, electrode). 52 | 53 | Returns 54 | ------- 55 | ratio : float 56 | Ratio between the 36 Hz stream and the 44 Hz stream. 57 | """ 58 | pick36 = computePickEnergy(data, pickFreq=36, showPlot=False, fs=fs) 59 | pick44 = computePickEnergy(data, pickFreq=44, showPlot=False, fs=fs) 60 | ratio = pick36/pick44 61 | return ratio 62 | 63 | def getSSRAccuracyByDur(data, trials, fs): 64 | """ 65 | Get the classification accuracy according to duration of trials and trials used. 66 | 67 | Parameters 68 | ---------- 69 | data : array-type 70 | Data to use to check accuracy. 71 | trials : array-type 72 | Trials to consider. 73 | fs : float 74 | Sampling frequency in Hz. 75 | 76 | Returns 77 | ------- 78 | allComparisons : array-type 79 | Array containing all comparison (for each duration). 80 | """ 81 | # Average data across trials 82 | dataSub = data[trials, :, :] 83 | 84 | allComparisons = np.zeros((59, 64)) 85 | for dur in range(1, 60): 86 | durSamples = int(np.round(dur*fs)) 87 | dataMeanTrial = dataSub[:, :durSamples, :].mean(axis=0) 88 | 89 | baseline = calculateBaseline(data[:, :durSamples, :], fs) 90 | 91 | electrodeComparison = comparePicks(dataMeanTrial, fs) 92 | electrodeBool = electrodeComparison>baseline 93 | 94 | allComparisons[dur-1, :] = electrodeBool 95 | return allComparisons 96 | 97 | def crossVal(data, data1, fs): 98 | """ 99 | This function has changed. To update and rename... 100 | 101 | Parameters 102 | ---------- 103 | data : array-type 104 | Shape (trial, time, electrode). Compute pick at 36 Hz for each trial. 105 | data1 : array-type 106 | Shape (trial, time, electrode). Compute pick at 44 Hz for each trial. 107 | fs : float 108 | Sampling frequency in Hz. 109 | 110 | Returns 111 | ------- 112 | aAll : array-type 113 | List of pick values for 36 Hz from `data`. Length of trial number. 114 | bAll : array-type 115 | List of pick values for 44 Hz from `data1`. Length of trial number. 116 | """ 117 | testRatios = [] 118 | aAll = [] 119 | bAll = [] 120 | accuracy = [] 121 | # data36 = data[40:60, :, :] 122 | # data44 = data[60:80, :, :] 123 | # ratio44All = comparePicks(data44.mean(axis=0), fs) 124 | # ratio36All = comparePicks(data36.mean(axis=0), fs) 125 | # Training 126 | for trial in range(data.shape[0]): 127 | # print trial 128 | # if trial < 20: 129 | # print 'Categorizing 36 Hz trial...' 130 | # trainingData36 = np.delete(data36, trial, axis=0) 131 | # ratio36 = comparePicks(trainingData36.mean(axis=0), fs) 132 | # ratio44 = ratio44All 133 | # else: 134 | # print 'Categorizing 44 Hz trial...' 135 | # trainingData44 = np.delete(data44, trial-20, axis=0) 136 | # ratio44 = comparePicks(trainingData44.mean(axis=0), fs) 137 | # ratio36 = ratio36All 138 | # print '36, 44: ', ratio36.mean(), ratio44.mean() 139 | # Testing 140 | testData = data[trial, :, :] 141 | testData1 = data1[trial, :, :] 142 | # testRatio = comparePicks(testData, fs) 143 | # testRatios.append(testRatio.mean()) 144 | 145 | a = computePickEnergy(testData, pickFreq=36, showPlot=False, fs=fs) 146 | b = computePickEnergy(testData1, pickFreq=44, showPlot=False, fs=fs) 147 | aAll.append(a.mean()) 148 | bAll.append(b.mean()) 149 | del a, b 150 | 151 | # print 'test ratio: ', testRatio.mean() 152 | # if np.abs(testRatio.mean() - ratio36.mean()) < np.abs(testRatio.mean() - ratio44.mean()): 153 | # print 'Categorized as 36 Hz trial' 154 | # if trial<20: 155 | # accuracy.append(1) 156 | # else: 157 | # accuracy.append(0) 158 | # else: 159 | # print 'Categorized as 44 Hz trial' 160 | # if trial>20: 161 | # accuracy.append(1) 162 | # else: 163 | # accuracy.append(0) 164 | return aAll, bAll 165 | 166 | def hyperOptC(data, c_vals, durs, electrodes, dprimeThresh, subjNum, condition, fs, trialBehaviorAll): 167 | """ 168 | Perform the hyper optimization of the c parameter of the SVM algorithm. 169 | Also compute the accuracy for a set of durations. 170 | 171 | Parameters 172 | ---------- 173 | data : array-type 174 | Data to use. Shape (trial, time, electrode). 175 | c_vals : array-type 176 | List of c values to try. 177 | durs : array-type 178 | List of durations to use. 179 | electrodes : array-type 180 | List of electrodes to consider. 181 | dprimeThresh : float 182 | Threshold of dprime to include the trial as a good trial. 183 | subjNum : array-type 184 | List of subject to consider. 185 | condition : str 186 | 'oneStream' or 'twoStreams': choose the condition. 187 | fs : float 188 | Sampling frequency in Hz. 189 | trialBehaviorAll : instance of pandas.Dataframe 190 | Behavior data from all participants. 191 | 192 | 193 | Returns 194 | ------- 195 | bestC : instance of pandas.Dataframe 196 | Dataframe containing the accuracy for each c parameter and duration. 197 | """ 198 | # Create dataframe to fill with the accuracy according to duration and c parameter 199 | bestC = pd.DataFrame(columns=['participant', 'dur', 'c', 'acc']) 200 | for dur in durs: 201 | durSamples = int(np.round(fs*dur)) 202 | # Get pick values (36 and 44 Hz) for specific duration and electrodes 203 | pick36, pick44 = crossVal(data[:, :durSamples, :electrodes], 204 | data[:, :durSamples, :electrodes], 205 | fs=fs) 206 | # Reshape to have one column per participant and all trials (80) in each col 207 | allPicks36 = np.zeros((80, subjNum)) 208 | allPicks44 = np.zeros((80, subjNum)) 209 | for subj in range(subjNum): 210 | allPicks36[:80, subj] = pick36[80*subj:(80*subj)+80] 211 | allPicks44[:80, subj] = pick44[80*subj:(80*subj)+80] 212 | 213 | for i in range(subjNum): 214 | # remove bad trials (with dprime lower than dprime threshold) for this participant 215 | performances = analyses(trialBehaviorAll[i], verbose=False) 216 | badTrials = performances.trial[performances.dprime0] 144 | lagIncorrect = lagIncorrect[lagIncorrect>0] 145 | 146 | isCorrectExists = lagCorrect.shape[0] > 0 147 | isIncorrectExists = lagIncorrect.shape[0] > 0 148 | 149 | if isCorrectExists: 150 | minCorrect = np.min(lagCorrect) 151 | if verbose: 152 | print 'min distance with correct = ', minCorrect 153 | if isIncorrectExists: 154 | minIncorrect = np.min(lagIncorrect) 155 | if verbose: 156 | print 'min distance with incorrect = ', minIncorrect 157 | # we check that there is a bump before the response 158 | if (isCorrectExists is True and isIncorrectExists is True and 159 | minCorrect < maxThresh and minCorrect > minThresh and 160 | minIncorrect < maxThresh and minIncorrect > minThresh): 161 | raise ValueError('It seems that there are two bumps very close...') 162 | if (isCorrectExists is True and minCorrect < maxThresh and minCorrect > minThresh): 163 | if verbose: 164 | print 'this is a hit' 165 | score = 1 166 | hit += 1 167 | answer = 'hit' 168 | hitTime.append(i) 169 | allHitTime.append(minCorrect) 170 | elif (isIncorrectExists is True and minIncorrect < maxThresh and minIncorrect > minThresh): 171 | if verbose: 172 | print 'this is a FA (false hit)' 173 | score = 0 174 | FA += 1 175 | answer = 'FA' 176 | falseHit += 1 177 | falseHitTime.append(i) 178 | allFalseHitTime.append(minIncorrect) 179 | else: 180 | if verbose: 181 | print 'this is a FA' 182 | score = 0 183 | FA += 1 184 | answer = 'FA' 185 | FATime.append(i) 186 | 187 | miss = gapNum - hit 188 | allFA = FA + falseHit 189 | 190 | hitRatio = hit/gapNum 191 | FARatio = allFA/gapNum 192 | 193 | # avoid infinite values in dprime calculation 194 | hitRatio1 = hitRatio 195 | FARatio1 = FARatio 196 | if hitRatio >= 1: 197 | hitRatio1 = 0.95 198 | if hitRatio <= 0: 199 | hitRatio1 = 0.05 200 | if FARatio <= 0: 201 | FARatio1 = 0.05 202 | if FARatio >= 1: 203 | FARatio1 = 0.95 204 | 205 | dprime = norm.ppf(hitRatio1) - norm.ppf(FARatio1) 206 | 207 | if verbose: 208 | print '\nhit = ', hit 209 | print 'FA = %s (including %s false hit)' % (FA, falseHit) 210 | print 'miss =', miss 211 | print 'gap =', gapNum 212 | 213 | 214 | if dataTrial.cloudCompNum.values[0] == 0: 215 | TC = False 216 | else: 217 | TC = True 218 | 219 | analyses.loc[trial] = [trial, dataTrial.freqDiff.values[0], 220 | hitRatio, hitRatio1, FARatio, FARatio1, falseHit, allFA, dprime, 221 | TC, dataTrial.correctStream.values[0], dataTrial.twoStreams.values[0], 222 | gapNum] 223 | 224 | if verbose: 225 | plt.figure() 226 | plotTrial(data, correctGap, incorrectGap, gapNum=gapNum, trial=trial, 227 | hitTime=hitTime, FATime=FATime, falseHitTime=falseHitTime, resp=resp) 228 | plt.show() 229 | plt.close() 230 | 231 | trial += 1 232 | return analyses 233 | 234 | def plotTrial(data, correctBump, incorrectBump, gapNum, trial, hitTime, FATime, falseHitTime, resp): 235 | """ 236 | Plot representation of the behavior trial. This shows the gaps of attended 237 | and unattended streams in green and red respectively and responses as 238 | vertical gray lines. 239 | still to implement... 240 | 241 | Parameters 242 | ---------- 243 | 244 | Returns 245 | ------- 246 | allTrials : instance of numpy.array 247 | List of trial numbers. 248 | """ 249 | allXTicks = [] 250 | for i in range(resp.shape[0]): 251 | plt.axvline(x=resp[i], color='gray') 252 | for i in range(gapNum): 253 | plt.axvline(x=correctBump[i], color='green') 254 | plt.axvline(x=incorrectBump[i], color='red') 255 | 256 | allXTicks.append(correctBump[i]) 257 | allXTicks.append(incorrectBump[i]) 258 | offset = 0.1 259 | for i in hitTime: 260 | plt.text(x=i-offset, y=0.5, s='H', color='green') 261 | for i in FATime: 262 | plt.text(x=i-offset, y=0.5, s='F', color='red') 263 | for i in falseHitTime: 264 | plt.text(x=i-offset, y=0.5, s='FH', color='red') 265 | plt.xlim(0, 15) 266 | plt.xticks(allXTicks, rotation=90) 267 | plt.title(trial) 268 | trial += 1 269 | 270 | def checkLinkTrialsBehaviorEEG(trialBehavior, events, sessionNum, trigs, fs): 271 | """ 272 | Check that answer recorded in behavior data correspond to triggers emitted 273 | by this answer. This allows to be sure that EEG data correspond to behavior. 274 | 275 | Parameters 276 | ---------- 277 | fs : float 278 | EEG data sampling frequency in Hz. 279 | 280 | Returns 281 | ------- 282 | 283 | """ 284 | for trial in getTrialNumList(trialBehavior, sessionNum=sessionNum): 285 | print trial 286 | t0Sample = trigs.iloc[trial, 0] 287 | # get response in this trial 288 | ev = events[((events[:, 0]>t0Sample) & (events[:, 0]=0] 340 | if allSubj: 341 | results = np.concatenate([results, results+80, 342 | results+(80*2), results+(80*3)]) 343 | return results 344 | else: 345 | if allSubj: 346 | allTrials = np.concatenate([allTrials, allTrials+80, 347 | allTrials+(80*2), allTrials+(80*3)]) 348 | return allTrials 349 | 350 | -------------------------------------------------------------------------------- /tools/audio.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.io import wavfile 3 | from scipy import signal, fftpack 4 | import urllib2, base64 5 | from subprocess import Popen, PIPE 6 | import soundfile as sf 7 | import couchdb 8 | from IPython.display import display, clear_output 9 | 10 | def audioToNP(audioWebm, stream, verbose=False): 11 | """ 12 | Get a list of matrices containing audio from a list of webm file. 13 | 14 | Parameters 15 | ---------- 16 | audioList : array-like 17 | List containing audio matrices. Its length is the number of trial 18 | audioLen : int 19 | The number of samples in each trial. 20 | stream : str 21 | Multiple sounds are associated with each trial. 22 | Stream contains character to discriminate. 23 | verbose : bool 24 | If True, more information are displayed. 25 | 26 | Returns: 27 | 28 | audioList : array-like 29 | List of matrices audio as elements. 30 | audioLen : int 31 | Number of samples for each trial. 32 | """ 33 | 34 | audioList = [] 35 | trialLenAll = [] 36 | 37 | for i in range(len(audioWebm)): 38 | for j in audioWebm[i].keys(): 39 | if stream in j: 40 | audioName = j 41 | if verbose: 42 | # clear_output(wait=True) 43 | display('Fetching %s file...' % audioName) 44 | 45 | fs, audio = fromWebmToWav(inputFile=audioWebm[i][audioName], 46 | filename='test%d'%i, verbose=verbose) 47 | # remove second identical channel 48 | audio = audio[:, 0] 49 | # start with non 0 values 50 | audio = np.trim_zeros(audio, trim='f') 51 | trialLenAll.append(audio.shape[0]) 52 | audioList.append(audio) 53 | 54 | trialLen = int(np.min(trialLenAll)) 55 | 56 | return audioList, trialLen 57 | 58 | def butterLpass(data, cutoff, fs, order=5): 59 | """ 60 | Filter data with a low pass butterworth filter. 61 | 62 | Parameters 63 | ---------- 64 | data : instance of numpy.array 65 | Matrix of shape (samples,) containing the signal to filter 66 | cutoff : float 67 | The cutoff frequency in Hz. 68 | fs : float 69 | The sampling frequency of the signal. 70 | order : int 71 | Order of the filter. 72 | 73 | Returns: 74 | 75 | y : instance of numpy.array 76 | Matrix of shape (samples,) containing the filtered signal. 77 | """ 78 | nyq = 0.5 * fs 79 | normal_cutoff = cutoff / nyq 80 | b, a = signal.butter(order, normal_cutoff, btype='low', analog=False) 81 | # using filtfilt instead of lfilt to avoid the offset of the window size 82 | y = signal.filtfilt(b, a, data) 83 | return(y) 84 | 85 | def downsampleTo64(data): 86 | """ 87 | Decimate data with a factor 750 to go from 48000 to 64 Hz. 88 | 89 | Parameters 90 | ---------- 91 | data : instance of numpy.array 92 | Matrix to downsample. 93 | 94 | Returns: 95 | 96 | newdata : instance of numpy.array 97 | Downsampled matrix of shape (trial, time). 98 | """ 99 | # The initial sampling rate is 48000. and we want to got to 64 100 | # It is done in multiple steps because the doc of scipy.signal.decimate 101 | # advice to use a factor bellow 13 102 | decimate_intermediate = [10, 5, 5, 3] 103 | newdata = data 104 | for i in decimate_intermediate: 105 | newdata = signal.decimate(newdata, q=i, axis=1, zero_phase=True) 106 | return newdata 107 | 108 | def fromWebmToWav(inputFile, filename, verbose=False): 109 | """ 110 | Convert webm file from database to wav by writing on disk. The files are 111 | not removed. 112 | 113 | Parameters 114 | ---------- 115 | inputFile : webm file 116 | Webm audio file to convert into wav. 117 | filename : str 118 | Base name to use to store files on disk. 119 | verbose : bool 120 | If True, more information are displayed. 121 | 122 | Returns: 123 | 124 | allAudioFiles : array-like 125 | List of all audio files corresponding to the session, db etc. 126 | """ 127 | audio_wav_file = inputFile 128 | filenameInput = '%s.webm' % filename 129 | filenameOutput = '%s.wav' % filename 130 | 131 | 132 | wavf = open(filenameInput, 'wrb') 133 | wavf.write(audio_wav_file) 134 | wavf.close() 135 | 136 | command = 'ffmpeg -i %s -y %s 2>&1' % (filenameInput, filenameOutput) 137 | 138 | conversion = Popen(command, shell = True, stdout = PIPE) 139 | # wait for the process to terminate 140 | out, err = conversion.communicate() 141 | errcode = conversion.returncode 142 | 143 | if verbose: 144 | # clear_output(wait=True) 145 | display(filenameOutput) 146 | 147 | fs, audio = wavfile.read(filenameOutput) 148 | return fs, audio 149 | 150 | def getAudio(dbAddress, dbName, password, sessionNum, verbose=False): 151 | """ 152 | Get names of audio files from couchdb. This allows for instance to use the 153 | names in the query to get the actual audio files. 154 | 155 | Parameters 156 | ---------- 157 | dbAddress : str 158 | Path to the couch database. 159 | dbName : str 160 | Name of the database on the couch instance. 161 | password : str 162 | Password of the couch database 163 | sessionNum : int 164 | Filter trials from a specific session number. 165 | 166 | Returns: 167 | 168 | allAudioFiles : array-like 169 | List of all audio files corresponding to the session, db etc. 170 | """ 171 | allAudioFileNames = getAudioFilenames(dbAddress, dbName, password, sessionNum) 172 | allAudioFiles = [] 173 | for trial in allAudioFileNames: 174 | allAudioFiles.append({}) 175 | audioFileNames = allAudioFileNames[trial] 176 | audioFilesTrial = [] 177 | for audioFileName in audioFileNames: 178 | url = "%s%s/maskingEEG_%d_%d/%s" % (dbAddress, dbName, sessionNum, trial, audioFileName) 179 | if verbose: 180 | print url 181 | request = urllib2.Request(url) 182 | base64string = base64.encodestring('%s:%s' % (dbName, password)).replace('\n', '') 183 | request.add_header("Authorization", "Basic %s" % base64string) 184 | result = None 185 | while result is None: 186 | try: 187 | result = urllib2.urlopen(request) 188 | except: 189 | pass 190 | snd = result.read() 191 | allAudioFiles[trial][audioFileName] = snd 192 | return allAudioFiles 193 | 194 | def getAudioFilenames(dbAddress, dbName, password, sessionNum): 195 | """ 196 | Get names of audio files from couchdb. This allows for instance to use the 197 | names in the query to get the actual audio files. 198 | 199 | Parameters 200 | ---------- 201 | dbAddress : str 202 | Path to the couch database. 203 | dbName : str 204 | Name of the database on the couch instance. 205 | password : str 206 | Password of the couch database 207 | sessionNum : int 208 | Filter trials from a specific session number. 209 | 210 | Returns: 211 | 212 | allAudioFileNames : dict 213 | Dictionary containing trial numbers as keys and array of audio file 214 | names as values. 215 | """ 216 | 217 | couch = couchdb.Server(dbAddress) 218 | couch.resource.credentials = (dbName, password) 219 | db = couch[dbName] 220 | 221 | count = 0 222 | allAudioFiles = {} 223 | for doc in db.view('_all_docs'): 224 | if (doc['id'].startswith('maskingEEG_%d' % sessionNum)): 225 | allAudioFiles[db.get(doc['id'])['trialNum']] = db.get(doc['id'])['_attachments'].keys() 226 | return allAudioFiles 227 | 228 | def getWebm(dbAddress, dbName, password, sessionNums): 229 | """ 230 | Get webm audio files from couchdb. 231 | 232 | Parameters 233 | ---------- 234 | dbAddress : str 235 | Path to the couch database. 236 | dbName : str 237 | Name of the database on the couch instance. 238 | password : str 239 | Password of the couch database 240 | sessionNums : array-like 241 | List of sessions to keep. 242 | 243 | Returns: 244 | 245 | allAudioFiles : array-like 246 | List of all audio files corresponding to the session, db etc. 247 | """ 248 | allAudioFiles = [] 249 | for sessionNum in sessionNums: 250 | audioFile = getAudio(dbAddress, dbName, password, sessionNum) 251 | allAudioFiles.append(audioFile) 252 | allAudioFiles = [item for sublist in allAudioFiles for item in sublist] 253 | return allAudioFiles 254 | 255 | def getConcatAudio(audioList, trialLen, verbose=False): 256 | """ 257 | Get all audio files under the form of one concatenated matrix containing the 258 | raw audio and another one containing the envelopes. 259 | 260 | Parameters 261 | ---------- 262 | audioList : array-like 263 | List containing audio matrices. Its length is the number of trials. 264 | audioLen : int 265 | The number of samples in each trial. 266 | verbose : bool 267 | If True, more information are displayed. 268 | 269 | Returns: 270 | 271 | audioAll : instance of numpy.array 272 | Matrix of shape (samples,) containing all audio trials concatenated. 273 | audioAllEnv : instance of numpy.array 274 | Matrix of shape (samples,) containing all audio envelopes concatenated. 275 | """ 276 | # The Hilbert transform can be very slow according to the number of samples used 277 | trialLenFastHilbert = fftpack.next_fast_len(trialLen) 278 | trialNum = len(audioList) 279 | 280 | audioAllEnv = np.zeros((trialNum*trialLen)) 281 | audioAll = np.zeros((trialNum*trialLen)) 282 | 283 | for i in range(len(audioList)): 284 | audio = audioList[i] 285 | audioAll[trialLen*i:trialLen*(i+1)] = audio[:trialLen] 286 | 287 | env = np.abs(signal.hilbert(audio, N=trialLenFastHilbert)) 288 | 289 | if verbose: 290 | # clear_output(wait=True) 291 | display(i, 'envelope finished') 292 | 293 | audioAllEnv[trialLen*i:trialLen*(i+1)] = env[:trialLen] 294 | del env, audio 295 | 296 | return(audioAll, audioAllEnv) 297 | 298 | def getEnv(dbAddress, dbName, password, verbose, sessionNums, fs, stream): 299 | """ 300 | Get the requested envelope corresponding to the user, sessionNum, stream etc. 301 | 302 | Parameters 303 | ---------- 304 | dbAddress : str 305 | Path to the couch database. 306 | dbName : str 307 | Name of the database on the couch instance. 308 | password : str 309 | Password of the couch database. 310 | verbose : bool 311 | If True, more information are displayed. 312 | sessionNums : array-like 313 | List of sessions to keep. 314 | fs : float 315 | Sampling frequency 316 | stream : str 317 | Stream to keep ('36' or '44'). 318 | 319 | Returns: 320 | 321 | audioAllEnvFilt2DDS : instance of numpy.array 322 | Matrix of shape (trial, time) containing the envelope filtered. 323 | """ 324 | audioWebm = getWebm(dbAddress, dbName, password, sessionNums) 325 | audioList, trialLen = audioToNP(audioWebm, stream, verbose) 326 | audioAll, audioAllEnv = getConcatAudio(audioList, trialLen, verbose) 327 | # Filtering 328 | audioAllEnvFilt = butterLpass(audioAllEnv, cutoff=15, fs=fs, order=5) 329 | totalTrialNum = len(audioList) 330 | # converting to 2D matrix 331 | audioAllEnvFilt2D = splitEnvInTrials(audioAllEnvFilt, totalTrialNum, trialLen) 332 | return audioAllEnvFilt2D 333 | 334 | def getAttendedAndUnattendedEnv(dbAddress, dbName, password, verbose, fs=48000.): 335 | """ 336 | Get all envelopes required for the analyses. The function will return 337 | 3D matrices containing attended and unattended envelopes. 338 | 339 | Parameters 340 | ---------- 341 | dbAddress : str 342 | Path to the couch database. 343 | dbName : str 344 | Name of the database on the couch instance. 345 | password : str 346 | Password of the couch database. 347 | verbose : bool 348 | If True, more information are displayed. 349 | sessionNums : array-like 350 | List of sessions to keep. 351 | stream : str 352 | Stream to keep ('36' or '44'). 353 | fs : float 354 | Sampling frequency 355 | 356 | Returns: 357 | 358 | attended : instance of numpy.array 359 | Matrix of shape (trial, time) containing the envelope of all attended 360 | streams. 361 | unattended : instance of numpy.array 362 | Matrix of shape (trial, time) containing the envelope of all unattended 363 | streams. 364 | """ 365 | print('This operation can takes few seconds/minutes... Please wait!') 366 | if verbose: 367 | print('noTC36...') 368 | noTC36 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[1], 369 | fs=fs, stream='36') 370 | if verbose: 371 | print('noTC44...') 372 | noTC44 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[3], 373 | fs=fs, stream='44') 374 | if verbose: 375 | print('TC36...') 376 | TC36 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[2], 377 | fs=fs, stream='36') 378 | if verbose: 379 | print('TC44...') 380 | TC44 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[4], 381 | fs=fs, stream='44') 382 | 383 | if verbose: 384 | print('stim36Att36...') 385 | stim36Att36 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[5, 6], 386 | fs=fs, stream='36') 387 | if verbose: 388 | print('stim44Att36...') 389 | stim44Att36 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[5, 6], 390 | fs=fs, stream='44') 391 | if verbose: 392 | print('stim36Att44...') 393 | stim36Att44 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[7, 8], 394 | fs=fs, stream='36') 395 | if verbose: 396 | print('stim44Att44...') 397 | stim44Att44 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[7, 8], 398 | fs=fs, stream='44') 399 | 400 | # Remove the first two seconds to avoid bias since in some trials one 401 | # stream starts 2 seconds before the other 402 | start = int(np.round(2*fs)) 403 | # Find the minimum duration among all envelopes in order to cut the others 404 | end = np.min([TC36.shape[1], TC44.shape[1], noTC36.shape[1], noTC44.shape[1], 405 | stim36Att36.shape[1], stim44Att36.shape[1], stim36Att44.shape[1], 406 | stim44Att44.shape[1]]) 407 | 408 | # Create attended and unattended streams 409 | attended = np.concatenate([noTC36[:, start:end], TC36[:, start:end], 410 | noTC44[:, start:end], TC44[:, start:end], stim36Att36[:, start:end], 411 | stim44Att44[:, start:end]], axis=0) 412 | unattended = np.concatenate([stim44Att36[:, start:end], 413 | stim36Att44[:, start:end]], axis=0) 414 | 415 | # downsampling 416 | attendedDS = downsampleTo64(attended) 417 | unattendedDS = downsampleTo64(unattended) 418 | print('Done!') 419 | return attendedDS, unattendedDS 420 | 421 | def splitEnvInTrials(data, totalTrialNum, trialLen, verbose=False): 422 | """ 423 | Convert the concatenated array of sound to a 2D matrix of shape (trial, time). 424 | 425 | Parameters 426 | ---------- 427 | data : instance of numpy.array 428 | Matrix of shape (samples,). Concatenated audio containing all trials in 429 | one 2D matrix. 430 | totalTrialNum : int 431 | The number of trials contained in the matrix data. 432 | trialLen : int 433 | The number of sample of one trial (we expect same length trials). 434 | 435 | Returns: 436 | 437 | newData : instance of numpy.array 438 | Matrix of shape (trial, time). 439 | """ 440 | 441 | newData = np.zeros((totalTrialNum, trialLen)) 442 | for trial in range(totalTrialNum): 443 | if verbose: 444 | print trial 445 | newData[trial, :trialLen] = data[trialLen*trial:trialLen*(trial+1)] 446 | return newData 447 | 448 | 449 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Project background 2 | 3 | This is the repository containing all the code needed to analyse the data presented on the poster for the conference *CuttingEEG* in Paris 2nd-5th July 2018. 4 | 5 | ![double AM](envAM.png) 6 | 7 | The aim of this project is to evaluate the possibility to use both auditory steady-state responses (aSSR) and stimulus reconstruction (SR) techiques from the same stimulus. For this purpose, we designed a double amplitude envelope: continuous streams were modulated by a first constant rate amplitude envelope (36 or 44 Hz) and by a random slower one. We used these two analyses to investigate the effect of informational masking in a first experiment and the effect of attention to one of two streams in a second experiment. We evaluated these methods by calculating a decoding accuracy (first experiment: decoding of the condition, 36 or 44 Hz streams; second experiment: decoding of what stream was attended). 8 | 9 | # Results 10 | 11 | ## Auditory steady-state response 12 | 13 | The FFT of the EEG signal has been calculated from the averaged trials according to rate (36 or 44 Hz) and condition (with or without tone cloud). We can see that the aSSR is larger for trials without tone cloud. 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | ## TRF 24 | 25 | 26 | 27 | # Project structure 28 | 29 | The anonymised raw EEG data (`.bdf` files) will be made available as soon as possible. 30 | 31 | ## Notebooks 32 | 33 | - The notebook `createStimViz.ipynb` is where some vizualisations from sound are made. 34 | 35 | - The notebook `preprocessing.ipynb` has to be run first to transform the raw `.bdf` files to the Numpy arrays containing the preprocessed data. It uses the file format `.h5` to store these data. It is compatible with Python and Matlab. Different datasets can be stored in each `.h5` file. In our case, we create one file per participant each containing: 36 | 37 | 1- eeg_TRF: data preprocessed for the stimulus reconstruction analyses 38 | 39 | 2- eeg_aSSR: data preprocessed for the aSSR analyses 40 | 41 | 3- envAttended: all attended envelopes (80 trials per participant = 320) 42 | 43 | 4- envUnattended all unattended envelopes (only from the exp 2 where there are two streams: 40 trials per participant = 160) 44 | 45 | - The notebook `analyses_aSSR.ipynb` contains the analyses related to the aSSR. It uses data created by the notebook `preprocessing.ipynb`. 46 | 47 | Some analyses have been done in R: see the file `behavior.Rmd`. 48 | 49 | Stimulus reconstruction has been done in the file `analyses_TRF.m` with the package [mTRF](https://sourceforge.net/projects/aespa/) by Crosse et al. 50 | 51 | ## Tools 52 | 53 | You can find in this folder all python functions used in the analyses. The file `audio.py` contains the audio processing functions (envelope extraction, fetch audio files from th database etc.). The file `behavior.py` contain functions related to behavior analyses. It goes from getting the data from couchDB to do analyses like d-prime calculation. The files `decodingSSR.py` and `decodingTRF.py` can be used to do the auditory steady-state response (aSSR) analyses and stimulus reconstruction. It includes functions used to prepare the data in a way required for the analyses. Finally, the file `eeg_utils.py` contains functions used for preprocessing, or loading the data. 54 | 55 | # Credit 56 | 57 | These analyses use the Matlab open source package `mTRF`: 58 | 59 | You can find it [here](https://sourceforge.net/projects/aespa/). 60 | 61 | Crosse, M. J., Di Liberto, G. M., Bednar, A., & Lalor, E. C. (2016). The Multivariate Temporal Response Function (mTRF) Toolbox: A MATLAB Toolbox for Relating Neural Signals to Continuous Stimuli. Frontiers in Human Neuroscience, 10, 604. http://doi.org/10.3389/fnhum.2016.00604 62 | 63 | # API 64 | 65 | You can find bellow the functions documentation: 66 | 67 | ## audio.py 68 | 69 | #### `audioToNP(audioWebm, stream, verbose=False)` 70 | 71 | Get a list of matrices containing audio from a list of webm file. 72 | 73 | - **`audioList`** `array-like` 74 | 75 | List containing audio matrices. Its length is the number of trial 76 | - **`audioLen`** `int` 77 | 78 | The number of samples in each trial. 79 | - **`stream`** `str` 80 | 81 | Multiple sounds are associated with each trial. 82 | Stream contains character to discriminate. 83 | - **`verbose`** `bool` 84 | 85 | If True, more information are displayed. 86 | 87 | Returns: 88 | 89 | - **`audioList`** `array-like` 90 | 91 | List of matrices audio as elements. 92 | - **`audioLen`** `int` 93 | 94 | Number of samples for each trial. 95 | 96 | #### `butterLpass(data, cutoff, fs, order=5)` 97 | 98 | Filter data with a low pass butterworth filter. 99 | 100 | - **`data`** `instance of numpy.array` 101 | 102 | Matrix of shape (samples,) containing the signal to filter 103 | - **`cutoff`** `float` 104 | 105 | The cutoff frequency in Hz. 106 | - **`fs`** `float` 107 | 108 | The sampling frequency of the signal. 109 | - **`order`** `int` 110 | 111 | Order of the filter. 112 | 113 | Returns: 114 | 115 | - **`y`** `instance of numpy.array` 116 | 117 | Matrix of shape (samples,) containing the filtered signal. 118 | 119 | #### `downsampleTo64(data)` 120 | 121 | Decimate data with a factor 750 to go from 48000 to 64 Hz. 122 | 123 | - **`data`** `instance of numpy.array` 124 | 125 | Matrix to downsample. 126 | 127 | Returns: 128 | 129 | - **`newdata`** `instance of numpy.array` 130 | 131 | Downsampled matrix of shape (trial, time). 132 | 133 | #### `fromWebmToWav(inputFile, filename, verbose=False)` 134 | 135 | Convert webm file from database to wav by writing on disk. The files are 136 | not removed. 137 | 138 | - **`inputFile`** `webm file` 139 | 140 | Webm audio file to convert into wav. 141 | - **`filename`** `str` 142 | 143 | Base name to use to store files on disk. 144 | - **`verbose`** `bool` 145 | 146 | If True, more information are displayed. 147 | 148 | Returns: 149 | 150 | - **`allAudioFiles`** `array-like` 151 | 152 | List of all audio files corresponding to the session, db etc. 153 | 154 | #### `getAttendedAndUnattendedEnv(dbAddress, dbName, password, verbose, fs=48000.)` 155 | 156 | Get all envelopes required for the analyses. The function will return 157 | 3D matrices containing attended and unattended envelopes. 158 | 159 | - **`dbAddress`** `str` 160 | 161 | Path to the couch database. 162 | - **`dbName`** `str` 163 | 164 | Name of the database on the couch instance. 165 | - **`password`** `str` 166 | 167 | Password of the couch database. 168 | - **`verbose`** `bool` 169 | 170 | If True, more information are displayed. 171 | - **`sessionNums`** `array-like` 172 | 173 | List of sessions to keep. 174 | - **`stream`** `str` 175 | 176 | Stream to keep ('36' or '44'). 177 | - **`fs`** `float` 178 | 179 | Sampling frequency 180 | 181 | Returns: 182 | 183 | - **`attended`** `instance of numpy.array` 184 | 185 | Matrix of shape (trial, time) containing the envelope of all attended 186 | streams. 187 | - **`unattended`** `instance of numpy.array` 188 | 189 | Matrix of shape (trial, time) containing the envelope of all unattended 190 | streams. 191 | 192 | #### `getAudio(dbAddress, dbName, password, sessionNum, verbose=False)` 193 | 194 | Get names of audio files from couchdb. This allows for instance to use the 195 | names in the query to get the actual audio files. 196 | 197 | - **`dbAddress`** `str` 198 | 199 | Path to the couch database. 200 | - **`dbName`** `str` 201 | 202 | Name of the database on the couch instance. 203 | - **`password`** `str` 204 | 205 | Password of the couch database 206 | - **`sessionNum`** `int` 207 | 208 | Filter trials from a specific session number. 209 | 210 | Returns: 211 | 212 | - **`allAudioFiles`** `array-like` 213 | 214 | List of all audio files corresponding to the session, db etc. 215 | 216 | #### `getAudioFilenames(dbAddress, dbName, password, sessionNum)` 217 | 218 | Get names of audio files from couchdb. This allows for instance to use the 219 | names in the query to get the actual audio files. 220 | 221 | - **`dbAddress`** `str` 222 | 223 | Path to the couch database. 224 | - **`dbName`** `str` 225 | 226 | Name of the database on the couch instance. 227 | - **`password`** `str` 228 | 229 | Password of the couch database 230 | - **`sessionNum`** `int` 231 | 232 | Filter trials from a specific session number. 233 | 234 | Returns: 235 | 236 | - **`allAudioFileNames`** `dict` 237 | 238 | Dictionary containing trial numbers as keys and array of audio file 239 | names as values. 240 | 241 | #### `getConcatAudio(audioList, trialLen, verbose=False)` 242 | 243 | Get all audio files under the form of one concatenated matrix containing the 244 | raw audio and another one containing the envelopes. 245 | 246 | - **`audioList`** `array-like` 247 | 248 | List containing audio matrices. Its length is the number of trials. 249 | - **`audioLen`** `int` 250 | 251 | The number of samples in each trial. 252 | - **`verbose`** `bool` 253 | 254 | If True, more information are displayed. 255 | 256 | Returns: 257 | 258 | - **`audioAll`** `instance of numpy.array` 259 | 260 | Matrix of shape (samples,) containing all audio trials concatenated. 261 | - **`audioAllEnv`** `instance of numpy.array` 262 | 263 | Matrix of shape (samples,) containing all audio envelopes concatenated. 264 | 265 | #### `getEnv(dbAddress, dbName, password, verbose, sessionNums, fs, stream)` 266 | 267 | Get the requested envelope corresponding to the user, sessionNum, stream etc. 268 | 269 | - **`dbAddress`** `str` 270 | 271 | Path to the couch database. 272 | - **`dbName`** `str` 273 | 274 | Name of the database on the couch instance. 275 | - **`password`** `str` 276 | 277 | Password of the couch database. 278 | - **`verbose`** `bool` 279 | 280 | If True, more information are displayed. 281 | - **`sessionNums`** `array-like` 282 | 283 | List of sessions to keep. 284 | - **`fs`** `float` 285 | 286 | Sampling frequency 287 | - **`stream`** `str` 288 | 289 | Stream to keep ('36' or '44'). 290 | 291 | Returns: 292 | 293 | - **`audioAllEnvFilt2DDS`** `instance of numpy.array` 294 | 295 | Matrix of shape (trial, time) containing the envelope filtered. 296 | 297 | #### `getWebm(dbAddress, dbName, password, sessionNums)` 298 | 299 | Get webm audio files from couchdb. 300 | 301 | - **`dbAddress`** `str` 302 | 303 | Path to the couch database. 304 | - **`dbName`** `str` 305 | 306 | Name of the database on the couch instance. 307 | - **`password`** `str` 308 | 309 | Password of the couch database 310 | - **`sessionNums`** `array-like` 311 | 312 | List of sessions to keep. 313 | 314 | Returns: 315 | 316 | - **`allAudioFiles`** `array-like` 317 | 318 | List of all audio files corresponding to the session, db etc. 319 | 320 | #### `splitEnvInTrials(data, totalTrialNum, trialLen, verbose=False)` 321 | 322 | Convert the concatenated array of sound to a 2D matrix of shape (trial, time). 323 | 324 | - **`data`** `instance of numpy.array` 325 | 326 | Matrix of shape (samples,). Concatenated audio containing all trials in 327 | one 2D matrix. 328 | - **`totalTrialNum`** `int` 329 | 330 | The number of trials contained in the matrix data. 331 | - **`trialLen`** `int` 332 | 333 | The number of sample of one trial (we expect same length trials). 334 | 335 | Returns: 336 | 337 | - **`newData`** `instance of numpy.array` 338 | 339 | Matrix of shape (trial, time). 340 | 341 | ## behavior.py 342 | 343 | #### `analyses(data, verbose)` 344 | 345 | Evaluate the behavior data by computing hits rate and false alarm rates. The 346 | continuous responses given by the participant are compared to the time stamps 347 | of the gaps in the attended stream and also in the unattended (if there is 348 | one). For each response: 1.calculate the delay between this response and each 349 | attended gap (`lagCorrect`). 2. calculate the delay between this response 350 | and each unattended gap (`laginCorrect`). 3. keep only positive values in 351 | each array because the response is done after the gap. This removes all 352 | other gaps for this response. 4. take the smaller value in each 353 | array: `minCorrect` and `minIncorrect`. 5. we consider that the response 354 | is linked to the gap if the delay is between `minThresh` and `maxThresh`. 355 | The margins should avoid having bumps in the two streams separated by less 356 | than maxThresh - minThresh. 357 | 358 | - **`data`** `instance of pandas.core.DataFrame` 359 | 360 | Behavior data to use to run the analyses. 361 | - **`verbose`** `bool` 362 | 363 | Print more details about the process. 364 | 365 | Returns: 366 | 367 | - **`analyses`** `instance of pandas.core.DataFrame` 368 | 369 | Dataframe containing the number of hits and false alarms for each trial. 370 | 371 | #### `checkLinkTrialsBehaviorEEG(trialBehavior, events, sessionNum, trigs, fs)` 372 | 373 | Check that answer recorded in behavior data correspond to triggers emitted 374 | by this answer. This allows to be sure that EEG data correspond to behavior. 375 | 376 | - **`fs`** `float` 377 | 378 | EEG data sampling frequency in Hz. 379 | 380 | Returns: 381 | 382 | 383 | 384 | #### `getBehaviorData(dbName, sessionNums)` 385 | 386 | Get behavior data from the couch database according to the name of the DB and 387 | the sessions. 388 | 389 | - **`dbName`** `str` 390 | 391 | Name of the database on the couch instance. 392 | - **`sessionNums`** `array-like` 393 | 394 | List of sessions to keep. 395 | 396 | Returns: 397 | 398 | - **`behaviorData`** `instance of pandas.core.DataFrame` 399 | 400 | Dataframe containing all parameters of all trials. 401 | 402 | #### `getBehaviorDataSession(dbAddress, dbName, sessionNum)` 403 | 404 | Fetch behavior data from couchdb (SOA, SNR and trial duration). 405 | 406 | - **`dbAddress`** `str` 407 | 408 | Path to the couch database. 409 | - **`dbName`** `str` 410 | 411 | Name of the database on the couch instance. 412 | - **`sessionNum`** `int` 413 | 414 | Behavior data will be fetched from this sessionNum. 415 | 416 | Returns: 417 | 418 | - **`allDoc`** `instance of pandas.core.DataFrame` 419 | 420 | A dataframe containing requested data. 421 | 422 | #### `getTrialNum(ref, allSubj, trialBehavior, **kwargs)` 423 | 424 | Get the trial numbers corresponding to specific conditions. 425 | 426 | - **`ref`** `int` 427 | 428 | If 1: the condition is all trials (like for overall analyses: exp 1 and 2). 429 | - **`allSubj`** `bool` 430 | 431 | Choose to return the trial number for one or all subjects. 432 | - **`trialBehavior`** `instance of Pandas.Dataframe` 433 | 434 | All behavior data. Trial numbers will be find related to condition present 435 | in this dataset. 436 | - **`**kwargs`** `other arguments` 437 | 438 | All conditions can be passed as argument like `correctStream=[False]`. 439 | 440 | Returns: 441 | 442 | - **`allTrials`** `instance of numpy.array` 443 | 444 | List of trial numbers. 445 | 446 | #### `plotTrial(data, correctBump, incorrectBump, gapNum, trial, hitTime, FATime, falseHitTime, resp)` 447 | 448 | Plot representation of the behavior trial. This shows the gaps of attended 449 | and unattended streams in green and red respectively and responses as 450 | vertical gray lines. 451 | still to implement... 452 | 453 | 454 | Returns: 455 | 456 | - **`allTrials`** `instance of numpy.array` 457 | 458 | List of trial numbers. 459 | 460 | 461 | ## decodingSSR.py 462 | 463 | #### `calculateBaseline(data, fs)` 464 | 465 | Calculate the baseline in order to take into account the fact that the eeg 466 | response can be different for the two AM rates. This functions computes 467 | the ratio between the AM rates in the one stream condition. 468 | 469 | - **`eegData`** `instance of numpy.array` 470 | 471 | EEG data of shape (trial, time, electrode). 472 | 473 | Returns: 474 | 475 | - **`ratio`** `float` 476 | 477 | Ratio between the 36 Hz stream and the 44 Hz stream. 478 | 479 | #### `comparePicks(data, fs)` 480 | 481 | Calculate the baseline in order to take into account the fact that the eeg 482 | response can be different for the two AM rates. This functions computes 483 | the ratio between the AM rates in the one stream condition. 484 | 485 | - **`eegData`** `instance of numpy.array` 486 | 487 | EEG data of shape (time, electrode). 488 | 489 | Returns: 490 | 491 | - **`ratio`** `float` 492 | 493 | Ratio between the 36 Hz stream and the 44 Hz stream. 494 | 495 | #### `crossVal(data, data1, fs)` 496 | 497 | This function has changed. To update... 498 | 499 | - **`data`** `array-type` 500 | 501 | Shape (trial, time, electrode). Compute pick at 36 Hz for each trial. 502 | - **`data1`** `array-type` 503 | 504 | Shape (trial, time, electrode). Compute pick at 44 Hz for each trial. 505 | - **`fs`** `float` 506 | 507 | Sampling frequency in Hz. 508 | 509 | Returns: 510 | 511 | - **`aAll`** `array-type` 512 | 513 | List of pick values for 36 Hz from `data`. Length of trial number. 514 | - **`bAll`** `array-type` 515 | 516 | List of pick values for 44 Hz from `data1`. Length of trial number. 517 | 518 | #### `getBestAcc(durs, bestC)` 519 | 520 | Return the c parameter corresponding to the better accuracy for the 4 521 | participants and for each duration. 522 | 523 | - **`durs`** `array-type` 524 | 525 | .List of durations to consider. 526 | - **`bestC`** `instance of pandas.Dataframe` 527 | 528 | Dataframe returned from the function 'hyperOptC'. 529 | 530 | Returns: 531 | 532 | - **`p1AccAll`** `array-type` 533 | 534 | List of accuracies for each duration with the better c parameter 535 | (at 60s) for the participant 1. 536 | - **`p2AccAll`** `array-type` 537 | 538 | List of accuracies for each duration with the better c parameter 539 | (at 60s) for the participant 2. 540 | - **`p3AccAll`** `array-type` 541 | 542 | List of accuracies for each duration with the better c parameter 543 | (at 60s) for the participant 3. 544 | - **`p4AccAll`** `array-type` 545 | 546 | List of accuracies for each duration with the better c parameter 547 | (at 60s) for the participant 4. 548 | 549 | #### `getSSRAccuracyByDur(data, trials, fs)` 550 | 551 | Get the classification accuracy according to duration of trials and trials used. 552 | 553 | - **`data`** `array-type` 554 | 555 | Data to use to check accuracy. 556 | - **`trials`** `array-type` 557 | 558 | Trials to consider. 559 | - **`fs`** `float` 560 | 561 | Sampling frequency in Hz. 562 | 563 | Returns: 564 | 565 | - **`allComparisons`** `array-type` 566 | 567 | Array containing all comparison (for each duration). 568 | 569 | #### `hyperOptC(data, c_vals, durs, electrodes, dprimeThresh, subjNum, condition, fs, trialBehaviorAll)` 570 | 571 | Perform the hyper optimization of the c parameter of the SVM algorithm. 572 | Also compute the accuracy for a set of durations. 573 | 574 | - **`data`** `array-type` 575 | 576 | Data to use. Shape (trial, time, electrode). 577 | - **`c_vals`** `array-type` 578 | 579 | List of c values to try. 580 | - **`durs`** `array-type` 581 | 582 | List of durations to use. 583 | - **`electrodes`** `array-type` 584 | 585 | List of electrodes to consider. 586 | - **`dprimeThresh`** `float` 587 | 588 | Threshold of dprime to include the trial as a good trial. 589 | - **`subjNum`** `array-type` 590 | 591 | List of subject to consider. 592 | - **`condition`** `str` 593 | 594 | 'oneStream' or 'twoStreams': choose the condition. 595 | - **`fs`** `float` 596 | 597 | Sampling frequency in Hz. 598 | - **`trialBehaviorAll`** `instance of pandas.Dataframe` 599 | 600 | Behavior data from all participants. 601 | 602 | 603 | Returns: 604 | 605 | - **`bestC`** `instance of pandas.Dataframe` 606 | 607 | Dataframe containing the accuracy for each c parameter and duration. 608 | 609 | 610 | ## decodingTRF.py 611 | 612 | All functions used to do the decoding from stimulus reconstruction. 613 | 614 | #### `calculateCorr(env1, env2, fs, end=None)` 615 | 616 | Get correlations between env1 and env2 for each trials. 617 | 618 | - **`env1`** `array-type` 619 | 620 | First list of envelope of shape (trial, time). 621 | - **`env2`** `array-type` 622 | 623 | Second list of envelope of shape (trial, time). 624 | - **`fs`** `float` 625 | 626 | Sampling frequency in Hz. 627 | - **`end`** `float` 628 | 629 | End limit in seconds to take for each trial. 630 | 631 | Returns: 632 | 633 | - **`corrs`** `array-type` 634 | 635 | List of correlations of shape (trial, 1). 636 | 637 | #### `getTRFAccuracyByDur(envAttended, envUnattended, envMismatch, envReconstructed, trials, trialsDualStream)` 638 | 639 | Get the classification accuracy according to duration of trials and trials used. 640 | 641 | - **`envAttended`** `instance of numpy.array` 642 | 643 | Attended envelopes. Shape (trial, time). 644 | - **`envUnattended`** `instance of numpy.array` 645 | 646 | Unattended envelopes. Shape (trial, time). 647 | - **`envMismatch`** `instance of numpy.array` 648 | 649 | Mismatch envelopes (corresponding to another trial). Shape (trial, time). 650 | - **`envReconstructed`** `instance of numpy.array` 651 | 652 | Reconstructed envelopes. Shape (trial, time). 653 | - **`trials`** `array-type` 654 | 655 | Trials to consider. 656 | - **`trialsDualStream`** `array-like` 657 | 658 | Trials to consider in the exp 2 referential (attended vs unattended with 659 | only 40 trials) 660 | 661 | Returns: 662 | 663 | - **`classifMismatchTime`** `array-type` 664 | 665 | List of classification accuracies (one value per second) for attended versus 666 | mismatch stream. 667 | - **`classifAtt_unattTime`** `array-type` 668 | 669 | List of classification accuracies (one value per second) for attended versus 670 | unattended stream (only trials included in the dual stream part). 671 | 672 | #### `getUnattendedTrialsNumber(trials)` 673 | 674 | Get the trials for the condition dual streams. Since this condition was only from 675 | trial 40 to 80 these trials not all trials have to be considered. 676 | 677 | - **`trials`** `array-type` 678 | 679 | Trials to consider. 680 | Returns: 681 | 682 | - **`newEnv`** `array-type` 683 | 684 | Envelope of the selected trials of shape (trials, time). 685 | 686 | 687 | ## eeg_utils.py 688 | 689 | #### `loadDataH5(path, pathReconstructed)` 690 | 691 | Load data from .h5 file. This expects to load one file containing the EEG 692 | and the envelopes of the stimuli and another file the reconstructed 693 | envelope created from Matlab. 694 | 695 | - **`path`** `str` 696 | 697 | Path to the `.h5` file containing EEG and stimuli envelopes. 698 | - **`pathReconstructed`** `str` 699 | 700 | Path to the `.h5` file containing the reconstructed envelopes. 701 | 702 | Returns: 703 | 704 | - **`eeg_TRF`** `instance of numpy.array` 705 | 706 | A matrix of shape (trial, time, electrode) containing the data processed 707 | for the TRF. 708 | - **`eeg_TRF`** `instance of numpy.array` 709 | 710 | to do. 711 | - **`envMismatch`** `instance of numpy.array` 712 | 713 | to do. 714 | - **`envUnattended`** `instance of numpy.array` 715 | 716 | to do. 717 | - **`envReconstructed`** `instance of numpy.array` 718 | 719 | to do. 720 | - **`eeg_aSSR`** `instance of numpy.array` 721 | 722 | to do. 723 | 724 | #### `processEEG(fnEEG, dbName, sessionNums, trialsToRemove, trialBehavior, fs, ref)` 725 | 726 | Load and process EEG from .bdf file. The data is filtered according to 727 | `freqFilter`, re-referenced according to the mastoids and downsampled 728 | to 64 Hz if `downsampling` is set to True. 729 | 730 | - **`fn`** `str` 731 | 732 | Name of the bdf containing the EEG data. 733 | - **`dbName`** `str` 734 | 735 | Name of the database on the couch instance. 736 | - **`sessionNums`** `array-type` 737 | 738 | List of sessions to keep. 739 | - **`trialsToRemove`** `array-type` 740 | 741 | List of trials to remove from the analyses. 742 | - **`ref`** `str` 743 | 744 | Choose between referencing to mastoids ('mastoids') or to the average 745 | of all electrodes ('average'). 746 | - **`fs`** `float` 747 | 748 | Sampling frequency in Hz. 749 | 750 | Returns: 751 | 752 | - **`dataFilt3D64`** `instance of numpy.array` 753 | 754 | A matrix of shape (trial, time, electrode) containing the processed data. 755 | --------------------------------------------------------------------------------