├── tools
    ├── __init__.py
    ├── decodingTRF.py
    ├── eeg_utils.py
    ├── decodingSSR.py
    ├── behavior.py
    └── audio.py
├── envAM.png
├── analyses_TRF.m
├── behavior.Rmd
├── analyses_behavior.ipynb
├── preprocessing.ipynb
└── readme.md


/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/envAM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hadrienj/decodingEEG/master/envAM.png


--------------------------------------------------------------------------------
/analyses_TRF.m:
--------------------------------------------------------------------------------
 1 | fs = 64;
 2 | map = -1;
 3 | tmin = -50;
 4 | tmax = 300;
 5 | lambda = [0.00000001];
 6 | 
 7 | eeg = hdf5read('data_p1.h5', 'eeg_TRF');
 8 | 
 9 | att = hdf5read('data_p1.h5', 'envAttended');
10 | 
11 | % 80 trials
12 | for i = 1:80
13 |     eeg_set{1, i} = eeg(:, :, i)';
14 |     att_set{1, i} = att(:, i)*2;
15 | end
16 | 
17 | 
18 | [r_att, p_att, ~, pred_att, model_att] = mTRFcrossval(att_set, eeg_set, fs, map, tmin, tmax, lambda);
19 | 
20 | 
21 | % convert to h5
22 | hdf5write('reconstructed_p1.h5', 'reconstructed', pred_att);
23 | 


--------------------------------------------------------------------------------
/behavior.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "behavior"
 3 | author: "hj"
 4 | date: "29/06/2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | 
10 | library(ggplot2)
11 | library(dplyr)
12 | 
13 | cols <- c('#36BBB8', '#FF54A4', '#0E78D5')
14 | 
15 | 
16 | simpleTheme <- theme(panel.grid.major.y = element_blank(),
17 |                      strip.text = element_text(size=12),
18 |                      axis.line = element_line(color='grey60', size=1),
19 |                      axis.ticks = element_line(color = "grey60", size = 1),
20 |                      axis.title = element_text(color = "grey10", size=36),
21 |                      axis.text = element_text(color = "grey30", size=26),
22 |                      legend.text = element_text(color = "grey30", size=26),
23 |                      panel.background = element_blank(),
24 |                      legend.key = element_blank())
25 | 
26 | 
27 | data <-read.csv('behavior.csv')
28 | 
29 | data$TC <- ifelse(data$TC == 'False', 0, 1)
30 | data$twoStreams <- ifelse(data$twoStreams == 'False', 0, 1)
31 | 
32 | data$TC <- factor(data$TC, labels=c('Absent', 'Present'))
33 | 
34 | 
35 | ```
36 | 
37 | 
38 | ```{r, results='hide',fig.keep='all',fig.height=12, fig.width=12}
39 | 
40 | effect_TC <- data %>%
41 |   filter(twoStreams==0) %>%
42 |   group_by(TC, name) %>%
43 |   summarise(mean1=mean(dprime)) %>%
44 |   ungroup() %>%
45 |   group_by(TC) %>%
46 |   summarise(mean=mean(mean1),
47 |             N=n(),
48 |             se=sd(mean1)/sqrt(N)) %>%
49 |   ggplot(data=., aes(x=TC, y=mean)) +
50 |     geom_bar(stat = 'identity', width = 0.5, position = position_dodge(width=0.1), fill='grey50') +
51 |     geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=0.05) +
52 |     xlab('Tone cloud') +
53 |     ylab('D-prime') +
54 |     simpleTheme
55 | 
56 | ggsave('images/effect_TC.pdf', effect_TC)
57 | 
58 | exp2_dprime <- data %>%
59 |   filter(twoStreams==1) %>%
60 |   group_by(name) %>%
61 |   summarise(mean1=mean(dprime)) %>%
62 |   ungroup() %>%
63 |   group_by(TC) %>%
64 |   summarise(mean=mean(mean1),
65 |             N=n(),
66 |             se=sd(mean1)/sqrt(N)) %>%
67 |   ggplot(data=., aes(x=TC, y=mean)) +
68 |     geom_bar(stat = 'identity', width = 0.5, position = position_dodge(width=0.1)) +
69 |     geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=0.05) +
70 |     xlab('Tone cloud') +
71 |     ylab('D-prime') +
72 |     simpleTheme
73 | 
74 | ggsave('images/exp2_dprime.pdf', exp2_dprime)
75 | ```
76 | 
77 | 


--------------------------------------------------------------------------------
/tools/decodingTRF.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | def calculateCorr(env1, env2, fs, end=None):
  4 |     """
  5 |     Get correlations between env1 and env2 for each trials.
  6 | 
  7 |     Parameters
  8 |     ----------
  9 |     env1 : array-type
 10 |         First list of envelope of shape (trial, time).
 11 |     env2 : array-type
 12 |         Second list of envelope of shape (trial, time).
 13 |     fs : float
 14 |         Sampling frequency in Hz.
 15 |     end : float
 16 |         End limit in seconds to take for each trial.
 17 | 
 18 |     Returns:
 19 | 
 20 |     corrs : array-type
 21 |         List of correlations of shape (trial, 1).
 22 |     """
 23 |     if (env1.shape[0] != env2.shape[0] or env1.shape[1] != env2.shape[1]):
 24 |         raise ValueError("Shapes of the envelopes have to be identical\
 25 |  but they are: %s and %s" % (env1.shape, env2.shape))
 26 | 
 27 |     if end is None:
 28 |         end = env1.shape[1]
 29 |     else:
 30 |         end = int(np.round(fs*end))
 31 | 
 32 | 
 33 |     trials = env1.shape[0]
 34 |     corrs = np.zeros(trials)
 35 |     for trial in range(trials):
 36 |         corrs[trial] = np.corrcoef(env1[trial, :end],
 37 |                                    env2[trial, :end])[0, 1]
 38 | 
 39 |     return corrs
 40 | 
 41 | def getTRFAccuracyByDur(envAttended, envUnattended, envMismatch, envReconstructed, trials, trialsDualStream):
 42 |     """
 43 |     Get the classification accuracy according to duration of trials and trials used.
 44 | 
 45 |     Parameters
 46 |     ----------
 47 |     envAttended : instance of numpy.array
 48 |         Attended envelopes. Shape (trial, time).
 49 |     envUnattended : instance of numpy.array
 50 |         Unattended envelopes. Shape (trial, time).
 51 |     envMismatch : instance of numpy.array
 52 |         Mismatch envelopes (corresponding to another trial). Shape (trial, time).
 53 |     envReconstructed : instance of numpy.array
 54 |         Reconstructed envelopes. Shape (trial, time).
 55 |     trials : array-type
 56 |         Trials to consider.
 57 |     trialsDualStream : array-like
 58 |         Trials to consider in the exp 2 referential (attended vs unattended with
 59 |         only 40 trials)
 60 | 
 61 |     Returns
 62 |     -------
 63 |     classifMismatchTime : array-type
 64 |         List of classification accuracies (one value per second) for attended versus
 65 |         mismatch stream.
 66 |     classifAtt_unattTime : array-type
 67 |         List of classification accuracies (one value per second) for attended versus
 68 |         unattended stream (only trials included in the dual stream part).
 69 |     """
 70 |     # trialsUnattended = getUnattendedTrialsNumber(trials)
 71 |     # envUnattendedTrials = envUnattended[trialsUnattended]
 72 |     classifMismatchTime = []
 73 |     classifAtt_unattTime = []
 74 |     testAll = []
 75 |     # print 'trialsUnattended', trialsUnattended
 76 |     for i in range(0, 61):
 77 |         # Calculate all correlations without taking trials into account
 78 |         corrsAttended = calculateCorr(envAttended, envReconstructed,
 79 |             fs=64, end=i)
 80 | 
 81 |         corrsMismatch = calculateCorr(envMismatch, envReconstructed,
 82 |             fs=64, end=i)
 83 | 
 84 |         corrsUnattendedDualStream = calculateCorr(envUnattended,
 85 |             envReconstructed[trialsDualStream], fs=64, end=i)
 86 |         # print 'trialsDualStream', trialsDualStream
 87 |         # print 'corrsUnattendedDualStream', np.mean(corrsUnattendedDualStream)
 88 |         # Calculate the classification accuracy by selecting the trials to be used
 89 |         # Since the first trial dual streams is the number 40 we had to add 40 to the
 90 |         # trial number from the unattended part to the attended part
 91 |         classifMismatch = np.mean(corrsAttended[trials]>corrsMismatch[trials])
 92 |         classifMismatchTime.append(classifMismatch)
 93 | 
 94 |         classifAtt_unatt = np.mean(corrsAttended[trialsDualStream]>corrsUnattendedDualStream)
 95 |         classifAtt_unattTime.append(classifAtt_unatt)
 96 | 
 97 |         test = np.mean(corrsUnattendedDualStream>corrsMismatch[trialsDualStream])
 98 |         testAll.append(test)
 99 | 
100 |     return classifMismatchTime, classifAtt_unattTime, testAll
101 | 
102 | def getUnattendedTrialsNumber(trials):
103 |     """
104 |     Get the trials for the condition dual streams. Since this condition was only from
105 |     trial 40 to 80 these trials not all trials have to be considered.
106 | 
107 |     Parameters
108 |     ----------
109 |     trials : array-type
110 |         Trials to consider.
111 |     Returns:
112 | 
113 |     newEnv : array-type
114 |         Envelope of the selected trials of shape (trials, time).
115 |     """
116 |     trialsUnattendedAll = np.arange(40, 80)
117 |     trialsUnattendedIdx = np.isin(trials, trialsUnattendedAll)
118 |     trialsUnattended = trials[trialsUnattendedIdx] - 40
119 | 
120 |     return trialsUnattended
121 | 
122 | 


--------------------------------------------------------------------------------
/tools/eeg_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from scipy import signal
  4 | from eeg import loadEEG, getEvents, chebyBandpassFilter, refToMastoids,\
  5 |     create3DMatrix, getTrialNumList, refToAverageNP
  6 | from behavior import getBehaviorData
  7 | import h5py
  8 | 
  9 | def processEEG(fnEEG, dbName, sessionNums, trialsToRemove, trialBehavior, fs, ref):
 10 |     """
 11 |     Load and process EEG from .bdf file. The data is filtered according to
 12 |     `freqFilter`, re-referenced according to the mastoids and downsampled
 13 |     to 64 Hz if `downsampling` is set to True.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     fn : str
 18 |         Name of the bdf containing the EEG data.
 19 |     dbName : str
 20 |         Name of the database on the couch instance.
 21 |     sessionNums : array-type
 22 |         List of sessions to keep.
 23 |     trialsToRemove : array-type
 24 |         List of trials to remove from the analyses.
 25 |     ref : str
 26 |         Choose between referencing to mastoids ('mastoids') or to the average
 27 |         of all electrodes ('average').
 28 |     fs : float
 29 |         Sampling frequency in Hz.
 30 | 
 31 |     Returns
 32 |     -------
 33 |     dataFilt3D64 : instance of numpy.array
 34 |         A matrix of shape (trial, time, electrode) containing the processed data.
 35 |     """
 36 |     if ref != 'average' and ref != 'mastoids':
 37 |         raise ValueError('Bad `ref` argument!')
 38 | 
 39 |     # Loading
 40 |     raw = loadEEG(fnEEG)
 41 |     print raw.ch_names[:64]
 42 |     data = raw[:, :][0].T
 43 | 
 44 |     # Get triggers
 45 |     trigs = getEvents(raw=raw, eventCode=65282, shortest_event=1)
 46 |     # Some triggers have been sent but the trial not done due to experimental errors
 47 |     # Let's remove these trials in the EEG data
 48 |     newTrigs = trigs.drop(trigs.index[trialsToRemove]).reset_index(drop=True)
 49 | 
 50 |     # Filtering
 51 |     zpk, dataFiltTRF = chebyBandpassFilter(data, [0.5, 1, 14.5, 15], gstop=80, gpass=1,
 52 |         fs=fs)
 53 |     zpk, dataFiltSSR = chebyBandpassFilter(data, [0.5, 1, 100, 101], gstop=80, gpass=1,
 54 |         fs=fs)
 55 | 
 56 |     del data
 57 | 
 58 |     if ref=='mastoids':
 59 |         dataFiltTRF = pd.DataFrame(dataFiltTRF, columns=raw.ch_names)
 60 |         dataFiltSSR = pd.DataFrame(dataFiltSSR, columns=raw.ch_names)
 61 |         # Re-referencing
 62 |         dataFiltRefTRF = refToMastoids(dataFiltTRF,
 63 |                                     dataFiltTRF['M1'],
 64 |                                     dataFiltTRF['M2']).iloc[:, :64]
 65 |         del dataFiltTRF
 66 |         # Re-referencing
 67 |         dataFiltRefSSR = refToMastoids(dataFiltSSR,
 68 |                                     dataFiltSSR['M1'],
 69 |                                     dataFiltSSR['M2']).iloc[:, :64]
 70 |         del dataFiltSSR
 71 |     elif ref=='average':
 72 |         # Re-referencing
 73 |         dataFiltRefTRF = refToAverageNP(dataFiltTRF[:, :64])
 74 |         del dataFiltTRF
 75 |         # Re-referencing
 76 |         dataFiltRefSSR = refToAverageNP(dataFiltSSR[:, :64])
 77 |         del dataFiltSSR
 78 | 
 79 |         dataFiltRefTRF = pd.DataFrame(dataFiltRefTRF, columns=raw.ch_names[:64])
 80 |         dataFiltRefSSR = pd.DataFrame(dataFiltRefSSR, columns=raw.ch_names[:64])
 81 | 
 82 |     trialDur = 60
 83 |     # Changing shape to 3D matrix
 84 |     # Choose the length according to the number of sample in the sound files
 85 |     dataFilt3DTRF = create3DMatrix(data=dataFiltRefTRF, trialTable=trialBehavior,
 86 |                            events=newTrigs, trialList=getTrialNumList(trialBehavior),
 87 |                            trialDur=trialDur, fs=fs, normalize=False, baselineDur=0)
 88 |     del dataFiltRefTRF
 89 |     dataFilt3DSSR = create3DMatrix(data=dataFiltRefSSR, trialTable=trialBehavior,
 90 |                            events=newTrigs, trialList=getTrialNumList(trialBehavior),
 91 |                            trialDur=trialDur, fs=fs, normalize=False, baselineDur=0)
 92 | 
 93 |     del dataFiltRefSSR
 94 | 
 95 |     # Remove the first two seconds to avoid bias since in some trials one
 96 |     # stream starts 2 seconds before the other
 97 |     start = int(np.round(2*fs))
 98 |     # Remove last two seconds that should be less reliable
 99 |     end = start + int(np.round((trialDur - 2)*fs))
100 | 
101 |     dataFilt3DTRF = dataFilt3DTRF[:, start:end, :]
102 |     dataFilt3DSSR = dataFilt3DSSR[:, start:end, :]
103 | 
104 |     # Downsampling
105 |     dataFilt3DTRF64 = signal.decimate(dataFilt3DTRF, q=8, axis=1, zero_phase=True)
106 | 
107 |     return dataFilt3DTRF64, dataFilt3DSSR
108 | 
109 | def loadDataH5(path, pathReconstructed):
110 |     """
111 |     Load data from .h5 file. This expects to load one file containing the EEG
112 |     and the envelopes of the stimuli and another file the reconstructed
113 |     envelope created from Matlab.
114 | 
115 |     Parameters
116 |     ----------
117 |     path : str
118 |         Path to the `.h5` file containing EEG and stimuli envelopes.
119 |     pathReconstructed : str
120 |         Path to the `.h5` file containing the reconstructed envelopes.
121 | 
122 |     Returns
123 |     -------
124 |     eeg_TRF : instance of numpy.array
125 |         A matrix of shape (trial, time, electrode) containing the data processed
126 |         for the TRF.
127 |     eeg_TRF : instance of numpy.array
128 |         to do.
129 |     envMismatch : instance of numpy.array
130 |         to do.
131 |     envUnattended : instance of numpy.array
132 |         to do.
133 |     envReconstructed : instance of numpy.array
134 |         to do.
135 |     eeg_aSSR : instance of numpy.array
136 |         to do.
137 |     """
138 |     f1 = h5py.File(path, 'r')
139 |     eeg_TRF = np.array(list(f1['eeg_TRF']))
140 |     eeg_aSSR = np.array(list(f1['eeg_aSSR']))
141 |     envAttended = np.array(list(f1['envAttended']))
142 |     envUnattended = np.array(list(f1['envUnattended']))
143 |     f1.close()
144 | 
145 |     f2 = h5py.File(pathReconstructed, 'r')
146 |     envReconstructed = np.array(list(f2['reconstructed']))
147 |     f2.close()
148 | 
149 |     # Roll trials to create mismatch envelope:
150 |     envMismatch = np.roll(envAttended, 1, axis=0)
151 | 
152 |     return eeg_TRF, envAttended, envMismatch, envUnattended, envReconstructed, eeg_aSSR
153 | 
154 | 


--------------------------------------------------------------------------------
/analyses_behavior.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "hideCode": true,
  8 |     "hidePrompt": true
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "import numpy as np\n",
 13 |     "import pandas as pd\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "\n",
 17 |     "from sklearn import svm\n",
 18 |     "from sklearn.model_selection import train_test_split\n",
 19 |     "\n",
 20 |     "import sys\n",
 21 |     "sys.path.append(\"/Users/lsp/Documents/projets/en_cours/eeg/eeg\")\n",
 22 |     "from eeg import create3DMatrix, loadEEG, getEvents, getTrialNumList, plotFFTElectrodes, plot3DMatrix, computePickEnergy\n",
 23 |     "\n",
 24 |     "sys.path.append(\"/Users/lsp/Documents/projets/en_cours/eeg/dss\")\n",
 25 |     "from dss import crossValidateDSS, createBiasedSignal, covUnnorm, computeDSS, applyDSS\n",
 26 |     "\n",
 27 |     "from tools.eeg_utils import processEEG, loadDataH5\n",
 28 |     "from tools.audio import getAttendedAndUnattendedEnv, butterLpass\n",
 29 |     "from tools.behavior import getBehaviorData, analyses, checkLinkTrialsBehaviorEEG\n",
 30 |     "from tools.decodingTRF import getTRFAccuracyByDur, getUnattendedTrialsNumber, calculateCorr\n",
 31 |     "from tools.decodingSSR import calculateBaseline, comparePicks, getSSRAccuracyByDur, crossVal\n"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "hideCode": true,
 39 |     "hidePrompt": true
 40 |    },
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "%%javascript\n",
 44 |     "IPython.OutputArea.prototype._should_scroll = function(lines) {\n",
 45 |     "    return false;\n",
 46 |     "}"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "hideCode": true,
 54 |     "hidePrompt": true
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "%load_ext autoreload\n",
 59 |     "%autoreload 2"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {
 66 |     "hideCode": true,
 67 |     "hidePrompt": true
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "%matplotlib notebook"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {
 78 |     "hideCode": true,
 79 |     "hidePrompt": true
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "# Plot parameters\n",
 84 |     "plt.style.use('ggplot')\n",
 85 |     "plt.rcParams['axes.facecolor']='w'\n",
 86 |     "\n",
 87 |     "%pylab inline\n",
 88 |     "pylab.rcParams['figure.figsize'] = (18, 10)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "# Behavioral analyses"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 7,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "name": "stdout",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "loading session 1\n",
108 |       "loading session 2\n",
109 |       "loading session 3\n",
110 |       "loading session 4\n",
111 |       "loading session 5\n",
112 |       "loading session 6\n",
113 |       "loading session 7\n",
114 |       "loading session 8\n",
115 |       "loading session 1\n",
116 |       "loading session 2\n",
117 |       "loading session 3\n",
118 |       "loading session 4\n",
119 |       "loading session 5\n",
120 |       "loading session 6\n",
121 |       "loading session 7\n",
122 |       "loading session 8\n",
123 |       "loading session 1\n",
124 |       "loading session 2\n",
125 |       "loading session 3\n",
126 |       "loading session 4\n",
127 |       "loading session 5\n",
128 |       "loading session 6\n",
129 |       "loading session 7\n",
130 |       "loading session 8\n",
131 |       "loading session 1\n",
132 |       "loading session 2\n",
133 |       "loading session 3\n",
134 |       "loading session 4\n",
135 |       "loading session 5\n",
136 |       "loading session 6\n",
137 |       "loading session 7\n",
138 |       "loading session 8\n"
139 |      ]
140 |     }
141 |    ],
142 |    "source": [
143 |     "# Get behavior data\n",
144 |     "trialBehavior_p1 = getBehaviorData(dbName='hijee_18_06', sessionNums=np.arange(1, 9))\n",
145 |     "trialBehavior_p2 = getBehaviorData(dbName='thomas_20_06', sessionNums=np.arange(1, 9))\n",
146 |     "trialBehavior_p3 = getBehaviorData(dbName='isaac_21_06', sessionNums=np.arange(1, 9))\n",
147 |     "trialBehavior_p4 = getBehaviorData(dbName='thomas_22_06', sessionNums=np.arange(1, 9))"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 66,
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "subjs = [trialBehavior_p1, trialBehavior_p2, trialBehavior_p3, trialBehavior_p4]\n",
157 |     "allPerf = pd.DataFrame()\n",
158 |     "for i in range(len(subjs)):\n",
159 |     "    performances = analyses(subjs[i], verbose=False)\n",
160 |     "    performances['name'] = i\n",
161 |     "    allPerf = allPerf.append(performances, ignore_index=True)"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 67,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "allPerf.to_csv('behavior.csv')"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "The analyses have been done in R from this csv file. See the file `behavior.Rmd`."
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": []
186 |   }
187 |  ],
188 |  "metadata": {
189 |   "kernelspec": {
190 |    "display_name": "Python 2",
191 |    "language": "python",
192 |    "name": "python2"
193 |   },
194 |   "language_info": {
195 |    "codemirror_mode": {
196 |     "name": "ipython",
197 |     "version": 2
198 |    },
199 |    "file_extension": ".py",
200 |    "mimetype": "text/x-python",
201 |    "name": "python",
202 |    "nbconvert_exporter": "python",
203 |    "pygments_lexer": "ipython2",
204 |    "version": "2.7.10"
205 |   },
206 |   "varInspector": {
207 |    "cols": {
208 |     "lenName": 16,
209 |     "lenType": 16,
210 |     "lenVar": 40
211 |    },
212 |    "kernels_config": {
213 |     "python": {
214 |      "delete_cmd_postfix": "",
215 |      "delete_cmd_prefix": "del ",
216 |      "library": "var_list.py",
217 |      "varRefreshCmd": "print(var_dic_list())"
218 |     },
219 |     "r": {
220 |      "delete_cmd_postfix": ") ",
221 |      "delete_cmd_prefix": "rm(",
222 |      "library": "var_list.r",
223 |      "varRefreshCmd": "cat(var_dic_list()) "
224 |     }
225 |    },
226 |    "types_to_exclude": [
227 |     "module",
228 |     "function",
229 |     "builtin_function_or_method",
230 |     "instance",
231 |     "_Feature"
232 |    ],
233 |    "window_display": false
234 |   }
235 |  },
236 |  "nbformat": 4,
237 |  "nbformat_minor": 2
238 | }
239 | 


--------------------------------------------------------------------------------
/preprocessing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "hideCode": true,
  8 |     "hidePrompt": true
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "import numpy as np\n",
 13 |     "import pandas as pd\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import h5py\n",
 16 |     "\n",
 17 |     "import sys\n",
 18 |     "sys.path.append(\"/Users/lsp/Documents/projets/en_cours/eeg/eeg\")\n",
 19 |     "from eeg import create3DMatrix, loadEEG, getEvents, getTrialNumList, plotFFTElectrodes, plot3DMatrix, computePickEnergy\n",
 20 |     "\n",
 21 |     "sys.path.append(\"/Users/lsp/Documents/projets/en_cours/eeg/dss\")\n",
 22 |     "from dss import crossValidateDSS, createBiasedSignal\n",
 23 |     "\n",
 24 |     "from tools.eeg_utils import processEEG\n",
 25 |     "from tools.audio import getAttendedAndUnattendedEnv\n",
 26 |     "from tools.behavior import getBehaviorData, analyses, checkLinkTrialsBehaviorEEG\n",
 27 |     "from tools.decodingTRF import getTRFAccuracyByDur, getUnattendedTrialsNumber, calculateCorr\n",
 28 |     "from tools.decodingSSR import calculateBaseline, comparePicks, getSSRAccuracyByDur, crossVal\n"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {
 35 |     "hideCode": true,
 36 |     "hidePrompt": true
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "%%javascript\n",
 41 |     "IPython.OutputArea.prototype._should_scroll = function(lines) {\n",
 42 |     "    return false;\n",
 43 |     "}"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {
 50 |     "hideCode": true,
 51 |     "hidePrompt": true
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "%load_ext autoreload\n",
 56 |     "%autoreload 2"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {
 63 |     "hideCode": true,
 64 |     "hidePrompt": true
 65 |    },
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "%matplotlib notebook"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {
 75 |     "hideCode": true,
 76 |     "hidePrompt": true
 77 |    },
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "# Plot parameters\n",
 81 |     "plt.style.use('ggplot')\n",
 82 |     "plt.rcParams['axes.facecolor']='w'\n",
 83 |     "\n",
 84 |     "%pylab inline\n",
 85 |     "pylab.rcParams['figure.figsize'] = (18, 10)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "# Preprocessing\n",
 93 |     "\n",
 94 |     "The goal of this notebook is to preprocess the raw data in order to create a file for each participant containing:\n",
 95 |     "\n",
 96 |     "- The EEG data filtered between 1 and 15 Hz and downsampled to 64 Hz for the stimulus reconstruction (TRF). The shape of this matrix is (trial, time, electrode).\n",
 97 |     "- The EEG data filtered between 1 and 100 Hz for the auditory steady-state response analysis (aSSR). The shape of this matrix is (trial, time, electrode).\n",
 98 |     "- The envelopes of the attended stimuli. The shape of this matrix is (trial, time).\n",
 99 |     "- The envelopes of the unattended stimuli. The shape of this matrix is (trial, time)."
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {
106 |     "hideCode": true,
107 |     "hidePrompt": true
108 |    },
109 |    "outputs": [],
110 |    "source": [
111 |     "def processingEEG(fnEEG, dbName, sessionNums, trialsToRemove, fs):\n",
112 |     "    password = \"a\"\n",
113 |     "    dbAddress = \"https://db.auditory.fr:6984/\"\n",
114 |     "    \n",
115 |     "    # Get behavior data\n",
116 |     "    trialBehavior = getBehaviorData(dbName, sessionNums)\n",
117 |     "    \n",
118 |     "    # Processing for TRF and SSR\n",
119 |     "    dataFilt3DTRF64, dataFilt3DSSR = processEEG(fnEEG, dbName, sessionNums,\n",
120 |     "                                                trialsToRemove, trialBehavior,\n",
121 |     "                                                fs, ref='average')\n",
122 |     "    # Get the envelopes for TRF\n",
123 |     "    envAttended, envUnattended = getAttendedAndUnattendedEnv(dbAddress, dbName,\n",
124 |     "                                                         password, verbose=True,\n",
125 |     "                                                            fs=48000.)\n",
126 |     "\n",
127 |     "    # Get the minimum length to have all matrices the same length for TRF\n",
128 |     "    minLen = np.min([envAttended.shape[1], envUnattended.shape[1], dataFilt3DTRF64.shape[1]])\n",
129 |     "    \n",
130 |     "    dataFilt3DTRF64 = dataFilt3DTRF64[:, :minLen, :]\n",
131 |     "    envAttended = envAttended[:, :minLen]\n",
132 |     "    envUnattended = envUnattended[:, :minLen]\n",
133 |     "    \n",
134 |     "    # Export eeg data and attended envelope to do the TRF in Matlab and eeg for SSR (caching)\n",
135 |     "    with h5py.File('data_preproc/data_%s1.h5' % dbName, 'w') as hf:\n",
136 |     "        hf.create_dataset(\"eeg_TRF\", data=dataFilt3DTRF64)\n",
137 |     "        hf.create_dataset(\"eeg_aSSR\", data=dataFilt3DSSR)\n",
138 |     "        hf.create_dataset(\"envAttended\", data=envAttended)\n",
139 |     "        hf.create_dataset(\"envUnattended\", data=envUnattended)\n",
140 |     "    \n",
141 |     "    return dataFilt3DTRF64, envAttended, envUnattended, dataFilt3DSSR, trialBehavior"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {},
147 |    "source": [
148 |     "We used the function checkLinkTrialsBehaviorEEG (see package doc [here]()) to check the coherence between behavior and EEG. The triggers have to be updated:\n",
149 |     "\n",
150 |     "Remove the following trials:\n",
151 |     "\n",
152 |     "- hijee_18_06: 10\n",
153 |     "- thomas_20_06_18: nothing\n",
154 |     "- isaac_21_06: nothing\n",
155 |     "- thomas_s_22_06_18: nothing"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "# Preprocess data for each participant"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {
169 |     "hideCode": true,
170 |     "hidePrompt": true
171 |    },
172 |    "outputs": [],
173 |    "source": [
174 |     "dataFilt3DTRF64_p1, envAttended_p1, envUnattended_p1, dataFilt3DSSR_p1, trialBehavior_p1 = processingEEG(fnEEG='data_raw/p1.bdf',\n",
175 |     "                                                      dbName=\"hijee_18_06\",\n",
176 |     "                                                      sessionNums=np.arange(1, 9),\n",
177 |     "                                                      trialsToRemove=[10],\n",
178 |     "                                                      fs=512.)\n"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {
185 |     "hideCode": true,
186 |     "hidePrompt": true
187 |    },
188 |    "outputs": [],
189 |    "source": [
190 |     "dataFilt3DTRF64_p2, envAttended_p2, envUnattended_p2, dataFilt3DSSR_p2, trialBehavior_p2 = processingEEG(fnEEG='data_raw/p2.bdf',\n",
191 |     "                                                               dbName=\"thomas_20_06\",\n",
192 |     "                                                               sessionNums=np.arange(1, 9),\n",
193 |     "                                                               trialsToRemove=[],\n",
194 |     "                                                               fs=512.)"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": null,
200 |    "metadata": {
201 |     "hideCode": true,
202 |     "hidePrompt": true
203 |    },
204 |    "outputs": [],
205 |    "source": [
206 |     "dataFilt3DTRF64_p3, envAttended_p3, envUnattended_p3, dataFilt3DSSR_p3, trialBehavior_p3 = processingEEG(fnEEG='data_raw/p3.bdf',\n",
207 |     "                                                               dbName=\"isaac_21_06\",\n",
208 |     "                                                               sessionNums=np.arange(1, 9),\n",
209 |     "                                                               trialsToRemove=[],\n",
210 |     "                                                               fs=512.)"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {
217 |     "hideCode": true,
218 |     "hidePrompt": true
219 |    },
220 |    "outputs": [],
221 |    "source": [
222 |     "dataFilt3DTRF64_p4, envAttended_p4, envUnattended_p4, dataFilt3DSSR_p4, trialBehavior_p4 = processingEEG(fnEEG='data_raw/p4.bdf',\n",
223 |     "                                                               dbName=\"thomas_22_06\",\n",
224 |     "                                                               sessionNums=np.arange(1, 9),\n",
225 |     "                                                               trialsToRemove=[],\n",
226 |     "                                                               fs=512.)"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": []
235 |   }
236 |  ],
237 |  "metadata": {
238 |   "kernelspec": {
239 |    "display_name": "Python 2",
240 |    "language": "python",
241 |    "name": "python2"
242 |   },
243 |   "language_info": {
244 |    "codemirror_mode": {
245 |     "name": "ipython",
246 |     "version": 2
247 |    },
248 |    "file_extension": ".py",
249 |    "mimetype": "text/x-python",
250 |    "name": "python",
251 |    "nbconvert_exporter": "python",
252 |    "pygments_lexer": "ipython2",
253 |    "version": "2.7.10"
254 |   },
255 |   "varInspector": {
256 |    "cols": {
257 |     "lenName": 16,
258 |     "lenType": 16,
259 |     "lenVar": 40
260 |    },
261 |    "kernels_config": {
262 |     "python": {
263 |      "delete_cmd_postfix": "",
264 |      "delete_cmd_prefix": "del ",
265 |      "library": "var_list.py",
266 |      "varRefreshCmd": "print(var_dic_list())"
267 |     },
268 |     "r": {
269 |      "delete_cmd_postfix": ") ",
270 |      "delete_cmd_prefix": "rm(",
271 |      "library": "var_list.r",
272 |      "varRefreshCmd": "cat(var_dic_list()) "
273 |     }
274 |    },
275 |    "types_to_exclude": [
276 |     "module",
277 |     "function",
278 |     "builtin_function_or_method",
279 |     "instance",
280 |     "_Feature"
281 |    ],
282 |    "window_display": false
283 |   }
284 |  },
285 |  "nbformat": 4,
286 |  "nbformat_minor": 2
287 | }
288 | 


--------------------------------------------------------------------------------
/tools/decodingSSR.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from eeg import computePickEnergy
  4 | from behavior import analyses
  5 | 
  6 | from sklearn import svm
  7 | from sklearn.model_selection import train_test_split
  8 | 
  9 | def calculateBaseline(data, fs):
 10 |     """
 11 |     Calculate the baseline in order to take into account the fact that the eeg
 12 |     response can be different for the two AM rates. This functions computes
 13 |     the ratio between the AM rates in the one stream condition.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     eegData : instance of numpy.array
 18 |         EEG data of shape (trial, time, electrode).
 19 | 
 20 |     Returns
 21 |     -------
 22 |     ratio : float
 23 |         Ratio between the 36 Hz stream and the 44 Hz stream.
 24 |     """
 25 |     # Take only trials from 1 stream condition
 26 |     data36 = data[:10, :, :]
 27 |     data44 = data[20:30, :, :]
 28 |     # Average trials time domain
 29 |     data36MeanTrial = data36.mean(axis=0)
 30 |     data44MeanTrial = data44.mean(axis=0)
 31 | 
 32 |     # Compute picks for both streams
 33 |     pick36 = computePickEnergy(data36MeanTrial, pickFreq=36, showPlot=False, fs=fs)
 34 |     pick44 = computePickEnergy(data44MeanTrial, pickFreq=44, showPlot=False, fs=fs)
 35 |     # Average electrodes
 36 |     pick36Mean = np.mean(pick36)
 37 |     pick44Mean = np.mean(pick44)
 38 |     # Calculate baseline
 39 |     baseline = pick36Mean/pick44Mean
 40 |     return baseline
 41 | 
 42 | def comparePicks(data, fs):
 43 |     """
 44 |     Calculate the baseline in order to take into account the fact that the eeg
 45 |     response can be different for the two AM rates. This functions computes
 46 |     the ratio between the AM rates in the one stream condition.
 47 | 
 48 |     Parameters
 49 |     ----------
 50 |     eegData : instance of numpy.array
 51 |         EEG data of shape (time, electrode).
 52 | 
 53 |     Returns
 54 |     -------
 55 |     ratio : float
 56 |         Ratio between the 36 Hz stream and the 44 Hz stream.
 57 |     """
 58 |     pick36 = computePickEnergy(data, pickFreq=36, showPlot=False, fs=fs)
 59 |     pick44 = computePickEnergy(data, pickFreq=44, showPlot=False, fs=fs)
 60 |     ratio = pick36/pick44
 61 |     return ratio
 62 | 
 63 | def getSSRAccuracyByDur(data, trials, fs):
 64 |     """
 65 |     Get the classification accuracy according to duration of trials and trials used.
 66 | 
 67 |     Parameters
 68 |     ----------
 69 |     data : array-type
 70 |         Data to use to check accuracy.
 71 |     trials : array-type
 72 |         Trials to consider.
 73 |     fs : float
 74 |         Sampling frequency in Hz.
 75 | 
 76 |     Returns
 77 |     -------
 78 |     allComparisons : array-type
 79 |         Array containing all comparison (for each duration).
 80 |     """
 81 |     # Average data across trials
 82 |     dataSub = data[trials, :, :]
 83 | 
 84 |     allComparisons = np.zeros((59, 64))
 85 |     for dur in range(1, 60):
 86 |         durSamples = int(np.round(dur*fs))
 87 |         dataMeanTrial = dataSub[:, :durSamples, :].mean(axis=0)
 88 | 
 89 |         baseline = calculateBaseline(data[:, :durSamples, :], fs)
 90 | 
 91 |         electrodeComparison = comparePicks(dataMeanTrial, fs)
 92 |         electrodeBool = electrodeComparison>baseline
 93 | 
 94 |         allComparisons[dur-1, :] = electrodeBool
 95 |     return allComparisons
 96 | 
 97 | def crossVal(data, data1, fs):
 98 |     """
 99 |     This function has changed. To update and rename...
100 | 
101 |     Parameters
102 |     ----------
103 |     data : array-type
104 |         Shape (trial, time, electrode). Compute pick at 36 Hz for each trial.
105 |     data1 : array-type
106 |         Shape (trial, time, electrode). Compute pick at 44 Hz for each trial.
107 |     fs : float
108 |         Sampling frequency in Hz.
109 | 
110 |     Returns
111 |     -------
112 |     aAll : array-type
113 |         List of pick values for 36 Hz from `data`. Length of trial number.
114 |     bAll : array-type
115 |         List of pick values for 44 Hz from `data1`. Length of trial number.
116 |     """
117 |     testRatios = []
118 |     aAll = []
119 |     bAll = []
120 |     accuracy = []
121 |     # data36 = data[40:60, :, :]
122 |     # data44 = data[60:80, :, :]
123 |     # ratio44All = comparePicks(data44.mean(axis=0), fs)
124 |     # ratio36All = comparePicks(data36.mean(axis=0), fs)
125 |     # Training
126 |     for trial in range(data.shape[0]):
127 |         # print trial
128 |         # if trial < 20:
129 |         #     print 'Categorizing 36 Hz trial...'
130 |         #     trainingData36 = np.delete(data36, trial, axis=0)
131 |         #     ratio36 = comparePicks(trainingData36.mean(axis=0), fs)
132 |         #     ratio44 = ratio44All
133 |         # else:
134 |         #     print 'Categorizing 44 Hz trial...'
135 |         #     trainingData44 = np.delete(data44, trial-20, axis=0)
136 |         #     ratio44 = comparePicks(trainingData44.mean(axis=0), fs)
137 |         #     ratio36 = ratio36All
138 |         # print '36, 44: ', ratio36.mean(), ratio44.mean()
139 |         # Testing
140 |         testData = data[trial, :, :]
141 |         testData1 = data1[trial, :, :]
142 |         # testRatio = comparePicks(testData, fs)
143 |         # testRatios.append(testRatio.mean())
144 | 
145 |         a = computePickEnergy(testData, pickFreq=36, showPlot=False, fs=fs)
146 |         b = computePickEnergy(testData1, pickFreq=44, showPlot=False, fs=fs)
147 |         aAll.append(a.mean())
148 |         bAll.append(b.mean())
149 |         del a, b
150 | 
151 |         # print 'test ratio: ', testRatio.mean()
152 |         # if np.abs(testRatio.mean() - ratio36.mean()) < np.abs(testRatio.mean() - ratio44.mean()):
153 |         #     print 'Categorized as 36 Hz trial'
154 |         #     if trial<20:
155 |         #         accuracy.append(1)
156 |         #     else:
157 |         #         accuracy.append(0)
158 |         # else:
159 |         #     print 'Categorized as 44 Hz trial'
160 |         #     if trial>20:
161 |         #         accuracy.append(1)
162 |         #     else:
163 |         #         accuracy.append(0)
164 |     return aAll, bAll
165 | 
166 | def hyperOptC(data, c_vals, durs, electrodes, dprimeThresh, subjNum, condition, fs, trialBehaviorAll):
167 |     """
168 |     Perform the hyper optimization of the c parameter of the SVM algorithm.
169 |     Also compute the accuracy for a set of durations.
170 | 
171 |     Parameters
172 |     ----------
173 |     data : array-type
174 |         Data to use. Shape (trial, time, electrode).
175 |     c_vals : array-type
176 |         List of c values to try.
177 |     durs : array-type
178 |         List of durations to use.
179 |     electrodes : array-type
180 |         List of electrodes to consider.
181 |     dprimeThresh : float
182 |         Threshold of dprime to include the trial as a good trial.
183 |     subjNum : array-type
184 |         List of subject to consider.
185 |     condition : str
186 |         'oneStream' or 'twoStreams': choose the condition.
187 |     fs : float
188 |         Sampling frequency in Hz.
189 |     trialBehaviorAll : instance of pandas.Dataframe
190 |         Behavior data from all participants.
191 | 
192 | 
193 |     Returns
194 |     -------
195 |     bestC : instance of pandas.Dataframe
196 |         Dataframe containing the accuracy for each c parameter and duration.
197 |     """
198 |     # Create dataframe to fill with the accuracy according to duration and c parameter
199 |     bestC = pd.DataFrame(columns=['participant', 'dur', 'c', 'acc'])
200 |     for dur in durs:
201 |         durSamples = int(np.round(fs*dur))
202 |         # Get pick values (36 and 44 Hz) for specific duration and electrodes
203 |         pick36, pick44 = crossVal(data[:, :durSamples, :electrodes],
204 |                                   data[:, :durSamples, :electrodes],
205 |                                   fs=fs)
206 |         # Reshape to have one column per participant and all trials (80) in each col
207 |         allPicks36 = np.zeros((80, subjNum))
208 |         allPicks44 = np.zeros((80, subjNum))
209 |         for subj in range(subjNum):
210 |             allPicks36[:80, subj] = pick36[80*subj:(80*subj)+80]
211 |             allPicks44[:80, subj] = pick44[80*subj:(80*subj)+80]
212 | 
213 |         for i in range(subjNum):
214 |             # remove bad trials (with dprime lower than dprime threshold) for this participant
215 |             performances = analyses(trialBehaviorAll[i], verbose=False)
216 |             badTrials = performances.trial[performances.dprime<dprimeThresh].values
217 |             if condition=='oneStream':
218 |                 # Trials to take into account: only the first 40 for one stream condition
219 |                 trials = np.arange(40)
220 |             elif condition=='twoStreams':
221 |                 # Trials to take into account: only the last 40 for two streams condition
222 |                 trials = np.arange(40, 80)
223 |             else:
224 |                 raise ValueError('Wrong argument `condition`!')
225 |             # Good trials are the wanted trials without the bad trials
226 |             goodTrials = trials[~np.isin(trials, badTrials)]
227 | 
228 |             for c_val in c_vals:
229 |                 # Get only the good trials in our dataset
230 |                 X = np.array([allPicks36[goodTrials, i], allPicks44[goodTrials, i]]).T
231 |                 # Create labels
232 |                 if condition=='oneStream':
233 |                     # We take the trials 40 to 80 and there were 20 36 Hz
234 |                     # trials and then 20 44 Hz trials
235 |                     y = np.concatenate([np.repeat(36, 20), np.repeat(44, 20)])[goodTrials]
236 |                 elif condition=='twoStreams':
237 |                     # We need to add 40 trials at the beginning because the two streams
238 |                     # trials are from 40 to 80
239 |                     y = np.concatenate([np.arange(40),
240 |                                         np.repeat(36, 20),
241 |                                         np.repeat(44, 20)])[goodTrials]
242 |                 else:
243 |                     raise ValueError('Wrong argument `condition`!')
244 | 
245 |                 # Split dataset in train and test sets
246 |                 X_train, X_test, y_train, y_test = train_test_split(X, y,
247 |                                                                     test_size=0.3,
248 |                                                                     random_state=0)
249 |                 # Train SVM on train data
250 |                 clf = svm.SVC(kernel='rbf', C=c_val).fit(X_train, y_train)
251 |                 # Calculate accuracy on test data
252 |                 acc = clf.score(X_test, y_test)
253 |                 # Store accuracy
254 |                 bestC = bestC.append({'participant': i, 'dur': dur, 'c': c_val, 'acc': acc},
255 |                              ignore_index=True)
256 |     return bestC
257 | 
258 | def getBestAcc(durs, bestC):
259 |     """
260 |     Return the c parameter corresponding to the better accuracy for the 4
261 |     participants and for each duration.
262 | 
263 |     Parameters
264 |     ----------
265 |     durs : array-type
266 |         .List of durations to consider.
267 |     bestC : instance of pandas.Dataframe
268 |         Dataframe returned from the function 'hyperOptC'.
269 | 
270 |     Returns
271 |     -------
272 |     p1AccAll : array-type
273 |         List of accuracies for each duration with the better c parameter
274 |         (at 60s) for the participant 1.
275 |     p2AccAll : array-type
276 |         List of accuracies for each duration with the better c parameter
277 |         (at 60s) for the participant 2.
278 |     p3AccAll : array-type
279 |         List of accuracies for each duration with the better c parameter
280 |         (at 60s) for the participant 3.
281 |     p4AccAll : array-type
282 |         List of accuracies for each duration with the better c parameter
283 |         (at 60s) for the participant 4.
284 |     """
285 |     # find value of c corresponding to the larger accuracy
286 |     pIdx = bestC[bestC['dur']==60].groupby(['participant', 'dur'])['acc'].idxmax().values
287 |     pC = bestC.iloc[pIdx].c.values.astype(int)
288 |     print pC
289 | 
290 |     p1AccAll = []
291 |     p2AccAll = []
292 |     p3AccAll = []
293 |     p4AccAll = []
294 | 
295 |     for i in range(len(durs)):
296 |         p1Acc = bestC.acc[((bestC.participant == 0) & (bestC.c == pC[0]) &
297 |                           (bestC.dur == durs[i]))].values
298 |         p2Acc = bestC.acc[((bestC.participant == 1) & (bestC.c == pC[1]) &
299 |                           (bestC.dur == durs[i]))].values
300 |         p3Acc = bestC.acc[((bestC.participant == 2) & (bestC.c == pC[2]) &
301 |                           (bestC.dur == durs[i]))].values
302 |         p4Acc = bestC.acc[((bestC.participant == 3) & (bestC.c == pC[3]) &
303 |                           (bestC.dur == durs[i]))].values
304 |         p1AccAll.append(p1Acc)
305 |         p2AccAll.append(p2Acc)
306 |         p3AccAll.append(p3Acc)
307 |         p4AccAll.append(p4Acc)
308 | 
309 |     return p1AccAll, p2AccAll, p3AccAll, p4AccAll
310 | 
311 | 


--------------------------------------------------------------------------------
/tools/behavior.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import couchdb
  4 | import matplotlib.pyplot as plt
  5 | from scipy.stats import norm
  6 | from eeg import getTrialNumList, plotDataSubset
  7 | 
  8 | def getBehaviorDataSession(dbAddress, dbName, sessionNum):
  9 |     """
 10 |     Fetch behavior data from couchdb (SOA, SNR and trial duration).
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     dbAddress : str
 15 |         Path to the couch database.
 16 |     dbName : str
 17 |         Name of the database on the couch instance.
 18 |     sessionNum : int
 19 |         Behavior data will be fetched from this sessionNum.
 20 | 
 21 |     Returns
 22 |     -------
 23 |     allDoc : instance of pandas.core.DataFrame
 24 |         A dataframe containing requested data.
 25 |     """
 26 | 
 27 |     couch = couchdb.Server(dbAddress)
 28 |     db = couch[dbName]
 29 | 
 30 |     count = 0
 31 |     alldoc = []
 32 |     for doc in db.view('_all_docs'):
 33 |         if (doc['id'].startswith('maskingEEG_%d' % sessionNum)):
 34 |             alldoc.append(db.get(doc['id']))
 35 | 
 36 |     alldoc = pd.DataFrame(alldoc)
 37 |     alldoc = alldoc.sort_values(['time']).reset_index(drop=True)
 38 |     return alldoc
 39 | 
 40 | def getBehaviorData(dbName, sessionNums):
 41 |     """
 42 |     Get behavior data from the couch database according to the name of the DB and
 43 |     the sessions.
 44 | 
 45 |     Parameters
 46 |     ----------
 47 |     dbName : str
 48 |         Name of the database on the couch instance.
 49 |     sessionNums : array-like
 50 |         List of sessions to keep.
 51 | 
 52 |     Returns
 53 |     -------
 54 |     behaviorData : instance of pandas.core.DataFrame
 55 |         Dataframe containing all parameters of all trials.
 56 |     """
 57 |     password = "a"
 58 |     dbAddress = "https://db.auditory.fr:6984/"
 59 |     dbAddressLog = "https://%s:%s@db.auditory.fr:6984/" % (dbName, password)
 60 | 
 61 |     behaviorData = []
 62 |     for session in sessionNums:
 63 |         print 'loading session', session
 64 |         behaviorData.append(getBehaviorDataSession(dbAddress=dbAddressLog,
 65 |                                dbName=dbName,
 66 |                                sessionNum=session))
 67 | 
 68 |     behaviorData = pd.concat(behaviorData)
 69 |     behaviorData.trialNum = np.arange(behaviorData.shape[0])
 70 |     behaviorData = behaviorData.reset_index()
 71 |     return behaviorData
 72 | 
 73 | def analyses(data, verbose):
 74 |     """
 75 |     Evaluate the behavior data by computing hits rate and false alarm rates. The
 76 |     continuous responses given by the participant are compared to the time stamps
 77 |     of the gaps in the attended stream and also in the unattended (if there is
 78 |     one). For each response: 1.calculate the delay between this response and each
 79 |     attended gap (`lagCorrect`). 2. calculate the delay between this response
 80 |     and each unattended gap (`laginCorrect`). 3. keep only positive values in
 81 |     each array because the response is done after the gap. This removes all
 82 |     other gaps for this response. 4. take the smaller value in each
 83 |     array: `minCorrect` and `minIncorrect`. 5. we consider that the response
 84 |     is linked to the gap if the delay is between `minThresh` and `maxThresh`.
 85 |     The margins should avoid having bumps in the two streams separated by less
 86 |     than maxThresh - minThresh.
 87 | 
 88 |     Parameters
 89 |     ----------
 90 |     data : instance of pandas.core.DataFrame
 91 |         Behavior data to use to run the analyses.
 92 |     verbose : bool
 93 |         Print more details about the process.
 94 | 
 95 |     Returns
 96 |     -------
 97 |     analyses : instance of pandas.core.DataFrame
 98 |         Dataframe containing the number of hits and false alarms for each trial.
 99 |     """
100 |     allHitTime = []
101 |     allFalseHitTime = []
102 | 
103 |     # we don't expect reaction time bellow minThresh
104 |     minThresh = 0.3
105 |     maxThresh = 1
106 | 
107 |     analyses = pd.DataFrame(columns=['trial', 'freqDiff', 'hit', 'hit1', 'FA',
108 |         'FA1', 'falseHit', 'allFA', 'dprime', 'TC', 'correctStream', 'twoStreams';
109 |         'gapNum'])
110 |     trial = 0
111 |     for i in data.trialNum:
112 |         # get subset of data corresponding to the current trial
113 |         dataTrial = data[data.trialNum==trial]
114 | 
115 |         if verbose:
116 |             print '\n\ntrial %s' % trial
117 |             print 'Freq diff: %d' % dataTrial.freqDiff
118 | 
119 |         gapNum = dataTrial.bumpNumber.values[0]
120 | 
121 |         if (data['correctStream'][trial]):
122 |             correctGap = np.array(dataTrial.delayBump1.values[0])
123 |             incorrectGap = np.array(dataTrial.delayBump0.values[0])
124 |         else:
125 |             correctGap = np.array(dataTrial.delayBump0.values[0])
126 |             incorrectGap = np.array(dataTrial.delayBump1.values[0])
127 |         resp = np.array(dataTrial.continuousResponses.values[0])
128 | 
129 |         hit = 0.
130 |         FA = 0.
131 |         miss = 0.
132 |         falseHit = 0.
133 |         falseHitTime = []
134 |         hitTime = []
135 |         FATime = []
136 |         for i in resp:
137 |             if not np.isnan(i):
138 |                 if verbose:
139 |                     print 'response: ', i
140 |                 lagCorrect = i-correctGap
141 |                 lagIncorrect = i-incorrectGap
142 |                 # response are made after gap: keep only positive values
143 |                 lagCorrect = lagCorrect[lagCorrect>0]
144 |                 lagIncorrect = lagIncorrect[lagIncorrect>0]
145 | 
146 |                 isCorrectExists = lagCorrect.shape[0] > 0
147 |                 isIncorrectExists = lagIncorrect.shape[0] > 0
148 | 
149 |                 if isCorrectExists:
150 |                     minCorrect = np.min(lagCorrect)
151 |                     if verbose:
152 |                         print 'min distance with correct = ', minCorrect
153 |                 if isIncorrectExists:
154 |                     minIncorrect = np.min(lagIncorrect)
155 |                     if verbose:
156 |                         print 'min distance with incorrect = ', minIncorrect
157 |                 # we check that there is a bump before the response
158 |                 if (isCorrectExists is True and isIncorrectExists is True and
159 |                        minCorrect < maxThresh and minCorrect > minThresh and
160 |                        minIncorrect < maxThresh and minIncorrect > minThresh):
161 |                     raise ValueError('It seems that there are two bumps very close...')
162 |                 if (isCorrectExists is True and minCorrect < maxThresh and minCorrect > minThresh):
163 |                     if verbose:
164 |                         print 'this is a hit'
165 |                     score = 1
166 |                     hit += 1
167 |                     answer = 'hit'
168 |                     hitTime.append(i)
169 |                     allHitTime.append(minCorrect)
170 |                 elif (isIncorrectExists is True and minIncorrect < maxThresh and minIncorrect > minThresh):
171 |                     if verbose:
172 |                         print 'this is a FA (false hit)'
173 |                     score = 0
174 |                     FA += 1
175 |                     answer = 'FA'
176 |                     falseHit += 1
177 |                     falseHitTime.append(i)
178 |                     allFalseHitTime.append(minIncorrect)
179 |                 else:
180 |                     if verbose:
181 |                         print 'this is a FA'
182 |                     score = 0
183 |                     FA += 1
184 |                     answer = 'FA'
185 |                     FATime.append(i)
186 | 
187 |         miss = gapNum - hit
188 |         allFA = FA + falseHit
189 | 
190 |         hitRatio = hit/gapNum
191 |         FARatio = allFA/gapNum
192 | 
193 |         # avoid infinite values in dprime calculation
194 |         hitRatio1 = hitRatio
195 |         FARatio1 = FARatio
196 |         if hitRatio >= 1:
197 |             hitRatio1 = 0.95
198 |         if hitRatio <= 0:
199 |             hitRatio1 = 0.05
200 |         if FARatio <= 0:
201 |             FARatio1 = 0.05
202 |         if FARatio >= 1:
203 |             FARatio1 = 0.95
204 | 
205 |         dprime = norm.ppf(hitRatio1) - norm.ppf(FARatio1)
206 | 
207 |         if verbose:
208 |             print '\nhit = ', hit
209 |             print 'FA = %s (including %s false hit)' % (FA, falseHit)
210 |             print 'miss =', miss
211 |             print 'gap =', gapNum
212 | 
213 | 
214 |         if dataTrial.cloudCompNum.values[0] == 0:
215 |             TC = False
216 |         else:
217 |             TC = True
218 | 
219 |         analyses.loc[trial] = [trial, dataTrial.freqDiff.values[0],
220 |             hitRatio, hitRatio1, FARatio, FARatio1, falseHit, allFA, dprime,
221 |             TC, dataTrial.correctStream.values[0], dataTrial.twoStreams.values[0],
222 |             gapNum]
223 | 
224 |         if verbose:
225 |             plt.figure()
226 |             plotTrial(data, correctGap, incorrectGap, gapNum=gapNum, trial=trial,
227 |                 hitTime=hitTime, FATime=FATime, falseHitTime=falseHitTime, resp=resp)
228 |             plt.show()
229 |             plt.close()
230 | 
231 |         trial += 1
232 |     return analyses
233 | 
234 | def plotTrial(data, correctBump, incorrectBump, gapNum, trial, hitTime, FATime, falseHitTime, resp):
235 |     """
236 |     Plot representation of the behavior trial. This shows the gaps of attended
237 |     and unattended streams in green and red respectively and responses as
238 |     vertical gray lines.
239 |     still to implement...
240 | 
241 |     Parameters
242 |     ----------
243 | 
244 |     Returns
245 |     -------
246 |     allTrials : instance of numpy.array
247 |         List of trial numbers.
248 |     """
249 |     allXTicks = []
250 |     for i in range(resp.shape[0]):
251 |         plt.axvline(x=resp[i], color='gray')
252 |     for i in range(gapNum):
253 |         plt.axvline(x=correctBump[i], color='green')
254 |         plt.axvline(x=incorrectBump[i], color='red')
255 | 
256 |         allXTicks.append(correctBump[i])
257 |         allXTicks.append(incorrectBump[i])
258 |     offset = 0.1
259 |     for i in hitTime:
260 |         plt.text(x=i-offset, y=0.5, s='H', color='green')
261 |     for i in FATime:
262 |         plt.text(x=i-offset, y=0.5, s='F', color='red')
263 |     for i in falseHitTime:
264 |         plt.text(x=i-offset, y=0.5, s='FH', color='red')
265 |     plt.xlim(0, 15)
266 |     plt.xticks(allXTicks, rotation=90)
267 |     plt.title(trial)
268 |     trial += 1
269 | 
270 | def checkLinkTrialsBehaviorEEG(trialBehavior, events, sessionNum, trigs, fs):
271 |     """
272 |     Check that answer recorded in behavior data correspond to triggers emitted
273 |     by this answer. This allows to be sure that EEG data correspond to behavior.
274 | 
275 |     Parameters
276 |     ----------
277 |     fs : float
278 |         EEG data sampling frequency in Hz.
279 | 
280 |     Returns
281 |     -------
282 | 
283 |     """
284 |     for trial in getTrialNumList(trialBehavior, sessionNum=sessionNum):
285 |         print trial
286 |         t0Sample = trigs.iloc[trial, 0]
287 |         # get response in this trial
288 |         ev = events[((events[:, 0]>t0Sample) & (events[:, 0]<t0Sample+(60*fs)) &
289 |             (events[:, 2]==65312))]
290 |         # why 100 ms difference with behavior data?
291 |         print 'eeg     ', (ev[:, 0] - t0Sample)/fs + 0.1
292 |         # print responses from behavior
293 |         print 'behavior', trialBehavior.continuousResponses[trialBehavior.trialNum==trial].values[0]
294 |         # plot 1 second before the beginning of the trial
295 |         t0 = (t0Sample/fs)-1
296 |         t1 = t0 + 32
297 |         # data = pd.DataFrame(data[trial, :, :15])
298 |         # plotDataSubset(data=data, stim=stim,
299 |         #                lineTrigs=trigs,
300 |         #                offset=1, t0=t0, t1=t1, fs=512.)
301 | 
302 | def getTrialNum(ref, allSubj, trialBehavior, **kwargs):
303 |     """
304 |     Get the trial numbers corresponding to specific conditions.
305 | 
306 |     Parameters
307 |     ----------
308 |     ref : int
309 |         If 1: the condition is all trials (like for overall analyses: exp 1 and 2).
310 |     allSubj : bool
311 |         Choose to return the trial number for one or all subjects.
312 |     trialBehavior : instance of Pandas.Dataframe
313 |         All behavior data. Trial numbers will be find related to condition present
314 |         in this dataset.
315 |     **kwargs : other arguments
316 |         All conditions can be passed as argument like `correctStream=[False]`.
317 | 
318 |     Returns
319 |     -------
320 |     allTrials : instance of numpy.array
321 |         List of trial numbers.
322 |     """
323 |     if ref == 1:
324 |         allTrials = np.arange(80)
325 |     elif ref == 2:
326 |         allTrials = np.arange(40)
327 |     else:
328 |         raise ValueError
329 | 
330 |     if (kwargs):
331 |         acc = pd.DataFrame()
332 |         for i in kwargs:
333 |             acc[i] = trialBehavior[i].isin(kwargs[i])
334 |         acc['res'] = acc.mean(axis=1)
335 |         print kwargs
336 |         results = trialBehavior.trialNum[acc['res']==1].values
337 | 
338 | 
339 |         results = results[results>=0]
340 |         if allSubj:
341 |             results = np.concatenate([results, results+80,
342 |                 results+(80*2), results+(80*3)])
343 |         return results
344 |     else:
345 |         if allSubj:
346 |             allTrials = np.concatenate([allTrials, allTrials+80,
347 |                 allTrials+(80*2), allTrials+(80*3)])
348 |         return allTrials
349 | 
350 | 


--------------------------------------------------------------------------------
/tools/audio.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.io import wavfile
  3 | from scipy import signal, fftpack
  4 | import urllib2, base64
  5 | from subprocess import Popen, PIPE
  6 | import soundfile as sf
  7 | import couchdb
  8 | from IPython.display import display, clear_output
  9 | 
 10 | def audioToNP(audioWebm, stream, verbose=False):
 11 |     """
 12 |     Get a list of matrices containing audio from a list of webm file.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     audioList : array-like
 17 |         List containing audio matrices. Its length is the number of trial
 18 |     audioLen : int
 19 |         The number of samples in each trial.
 20 |     stream : str
 21 |         Multiple sounds are associated with each trial.
 22 |         Stream contains character to discriminate.
 23 |     verbose : bool
 24 |         If True, more information are displayed.
 25 | 
 26 |     Returns:
 27 | 
 28 |     audioList : array-like
 29 |         List of matrices audio as elements.
 30 |     audioLen : int
 31 |         Number of samples for each trial.
 32 |     """
 33 | 
 34 |     audioList = []
 35 |     trialLenAll = []
 36 | 
 37 |     for i in range(len(audioWebm)):
 38 |         for j in audioWebm[i].keys():
 39 |             if stream in j:
 40 |                 audioName = j
 41 |         if verbose:
 42 |             # clear_output(wait=True)
 43 |             display('Fetching %s file...' % audioName)
 44 | 
 45 |         fs, audio = fromWebmToWav(inputFile=audioWebm[i][audioName],
 46 |             filename='test%d'%i, verbose=verbose)
 47 |         # remove second identical channel
 48 |         audio = audio[:, 0]
 49 |         # start with non 0 values
 50 |         audio = np.trim_zeros(audio, trim='f')
 51 |         trialLenAll.append(audio.shape[0])
 52 |         audioList.append(audio)
 53 | 
 54 |         trialLen = int(np.min(trialLenAll))
 55 | 
 56 |     return audioList, trialLen
 57 | 
 58 | def butterLpass(data, cutoff, fs, order=5):
 59 |     """
 60 |     Filter data with a low pass butterworth filter.
 61 | 
 62 |     Parameters
 63 |     ----------
 64 |     data : instance of numpy.array
 65 |         Matrix of shape (samples,) containing the signal to filter
 66 |     cutoff : float
 67 |         The cutoff frequency in Hz.
 68 |     fs : float
 69 |         The sampling frequency of the signal.
 70 |     order : int
 71 |         Order of the filter.
 72 | 
 73 |     Returns:
 74 | 
 75 |     y : instance of numpy.array
 76 |          Matrix of shape (samples,) containing the filtered signal.
 77 |     """
 78 |     nyq = 0.5 * fs
 79 |     normal_cutoff = cutoff / nyq
 80 |     b, a = signal.butter(order, normal_cutoff, btype='low', analog=False)
 81 |     # using filtfilt instead of lfilt to avoid the offset of the window size
 82 |     y = signal.filtfilt(b, a, data)
 83 |     return(y)
 84 | 
 85 | def downsampleTo64(data):
 86 |     """
 87 |     Decimate data with a factor 750 to go from 48000 to 64 Hz.
 88 | 
 89 |     Parameters
 90 |     ----------
 91 |     data : instance of numpy.array
 92 |         Matrix to downsample.
 93 | 
 94 |     Returns:
 95 | 
 96 |     newdata : instance of numpy.array
 97 |         Downsampled matrix of shape (trial, time).
 98 |     """
 99 |     # The initial sampling rate is 48000. and we want to got to 64
100 |     # It is done in multiple steps because the doc of scipy.signal.decimate
101 |     # advice to use a factor bellow 13
102 |     decimate_intermediate = [10, 5, 5, 3]
103 |     newdata = data
104 |     for i in decimate_intermediate:
105 |         newdata = signal.decimate(newdata, q=i, axis=1, zero_phase=True)
106 |     return newdata
107 | 
108 | def fromWebmToWav(inputFile, filename, verbose=False):
109 |     """
110 |     Convert webm file from database to wav by writing on disk. The files are
111 |     not removed.
112 | 
113 |     Parameters
114 |     ----------
115 |     inputFile : webm file
116 |         Webm audio file to convert into wav.
117 |     filename : str
118 |         Base name to use to store files on disk.
119 |     verbose : bool
120 |         If True, more information are displayed.
121 | 
122 |     Returns:
123 | 
124 |     allAudioFiles : array-like
125 |         List of all audio files corresponding to the session, db etc.
126 |     """
127 |     audio_wav_file = inputFile
128 |     filenameInput = '%s.webm' % filename
129 |     filenameOutput = '%s.wav' % filename
130 | 
131 | 
132 |     wavf = open(filenameInput, 'wrb')
133 |     wavf.write(audio_wav_file)
134 |     wavf.close()
135 | 
136 |     command = 'ffmpeg -i %s -y %s 2>&1' % (filenameInput, filenameOutput)
137 | 
138 |     conversion = Popen(command, shell = True, stdout = PIPE)
139 |     # wait for the process to terminate
140 |     out, err = conversion.communicate()
141 |     errcode = conversion.returncode
142 | 
143 |     if verbose:
144 |         # clear_output(wait=True)
145 |         display(filenameOutput)
146 | 
147 |     fs, audio = wavfile.read(filenameOutput)
148 |     return fs, audio
149 | 
150 | def getAudio(dbAddress, dbName, password, sessionNum, verbose=False):
151 |     """
152 |     Get names of audio files from couchdb. This allows for instance to use the
153 |     names in the query to get the actual audio files.
154 | 
155 |     Parameters
156 |     ----------
157 |     dbAddress : str
158 |         Path to the couch database.
159 |     dbName : str
160 |         Name of the database on the couch instance.
161 |     password : str
162 |         Password of the couch database
163 |     sessionNum : int
164 |         Filter trials from a specific session number.
165 | 
166 |     Returns:
167 | 
168 |     allAudioFiles : array-like
169 |         List of all audio files corresponding to the session, db etc.
170 |     """
171 |     allAudioFileNames = getAudioFilenames(dbAddress, dbName, password, sessionNum)
172 |     allAudioFiles = []
173 |     for trial in allAudioFileNames:
174 |         allAudioFiles.append({})
175 |         audioFileNames = allAudioFileNames[trial]
176 |         audioFilesTrial = []
177 |         for audioFileName in audioFileNames:
178 |             url = "%s%s/maskingEEG_%d_%d/%s" % (dbAddress, dbName, sessionNum, trial, audioFileName)
179 |             if verbose:
180 |                 print url
181 |             request = urllib2.Request(url)
182 |             base64string = base64.encodestring('%s:%s' % (dbName, password)).replace('\n', '')
183 |             request.add_header("Authorization", "Basic %s" % base64string)
184 |             result = None
185 |             while result is None:
186 |                 try:
187 |                     result = urllib2.urlopen(request)
188 |                 except:
189 |                     pass
190 |             snd = result.read()
191 |             allAudioFiles[trial][audioFileName] = snd
192 |     return allAudioFiles
193 | 
194 | def getAudioFilenames(dbAddress, dbName, password, sessionNum):
195 |     """
196 |     Get names of audio files from couchdb. This allows for instance to use the
197 |     names in the query to get the actual audio files.
198 | 
199 |     Parameters
200 |     ----------
201 |     dbAddress : str
202 |         Path to the couch database.
203 |     dbName : str
204 |         Name of the database on the couch instance.
205 |     password : str
206 |         Password of the couch database
207 |     sessionNum : int
208 |         Filter trials from a specific session number.
209 | 
210 |     Returns:
211 | 
212 |     allAudioFileNames : dict
213 |         Dictionary containing trial numbers as keys and array of audio file
214 |         names as values.
215 |     """
216 | 
217 |     couch = couchdb.Server(dbAddress)
218 |     couch.resource.credentials = (dbName, password)
219 |     db = couch[dbName]
220 | 
221 |     count = 0
222 |     allAudioFiles = {}
223 |     for doc in db.view('_all_docs'):
224 |         if (doc['id'].startswith('maskingEEG_%d' % sessionNum)):
225 |             allAudioFiles[db.get(doc['id'])['trialNum']] = db.get(doc['id'])['_attachments'].keys()
226 |     return allAudioFiles
227 | 
228 | def getWebm(dbAddress, dbName, password, sessionNums):
229 |     """
230 |     Get webm audio files from couchdb.
231 | 
232 |     Parameters
233 |     ----------
234 |     dbAddress : str
235 |         Path to the couch database.
236 |     dbName : str
237 |         Name of the database on the couch instance.
238 |     password : str
239 |         Password of the couch database
240 |     sessionNums : array-like
241 |         List of sessions to keep.
242 | 
243 |     Returns:
244 | 
245 |     allAudioFiles : array-like
246 |         List of all audio files corresponding to the session, db etc.
247 |     """
248 |     allAudioFiles = []
249 |     for sessionNum in sessionNums:
250 |         audioFile = getAudio(dbAddress, dbName, password, sessionNum)
251 |         allAudioFiles.append(audioFile)
252 |     allAudioFiles = [item for sublist in allAudioFiles for item in sublist]
253 |     return allAudioFiles
254 | 
255 | def getConcatAudio(audioList, trialLen, verbose=False):
256 |     """
257 |     Get all audio files under the form of one concatenated matrix containing the
258 |     raw audio and another one containing the envelopes.
259 | 
260 |     Parameters
261 |     ----------
262 |     audioList : array-like
263 |         List containing audio matrices. Its length is the number of trials.
264 |     audioLen : int
265 |         The number of samples in each trial.
266 |     verbose : bool
267 |         If True, more information are displayed.
268 | 
269 |     Returns:
270 | 
271 |     audioAll : instance of numpy.array
272 |         Matrix of shape (samples,) containing all audio trials concatenated.
273 |     audioAllEnv : instance of numpy.array
274 |         Matrix of shape (samples,) containing all audio envelopes concatenated.
275 |     """
276 |     # The Hilbert transform can be very slow according to the number of samples used
277 |     trialLenFastHilbert = fftpack.next_fast_len(trialLen)
278 |     trialNum = len(audioList)
279 | 
280 |     audioAllEnv = np.zeros((trialNum*trialLen))
281 |     audioAll = np.zeros((trialNum*trialLen))
282 | 
283 |     for i in range(len(audioList)):
284 |         audio = audioList[i]
285 |         audioAll[trialLen*i:trialLen*(i+1)] = audio[:trialLen]
286 | 
287 |         env = np.abs(signal.hilbert(audio, N=trialLenFastHilbert))
288 | 
289 |         if verbose:
290 |             # clear_output(wait=True)
291 |             display(i, 'envelope finished')
292 | 
293 |         audioAllEnv[trialLen*i:trialLen*(i+1)] = env[:trialLen]
294 |         del env, audio
295 | 
296 |     return(audioAll, audioAllEnv)
297 | 
298 | def getEnv(dbAddress, dbName, password, verbose, sessionNums, fs, stream):
299 |     """
300 |     Get the requested envelope corresponding to the user, sessionNum, stream etc.
301 | 
302 |     Parameters
303 |     ----------
304 |     dbAddress : str
305 |         Path to the couch database.
306 |     dbName : str
307 |         Name of the database on the couch instance.
308 |     password : str
309 |         Password of the couch database.
310 |     verbose : bool
311 |         If True, more information are displayed.
312 |     sessionNums : array-like
313 |         List of sessions to keep.
314 |     fs : float
315 |         Sampling frequency
316 |     stream : str
317 |         Stream to keep ('36' or '44').
318 | 
319 |     Returns:
320 | 
321 |     audioAllEnvFilt2DDS : instance of numpy.array
322 |         Matrix of shape (trial, time) containing the envelope filtered.
323 |     """
324 |     audioWebm = getWebm(dbAddress, dbName, password, sessionNums)
325 |     audioList, trialLen = audioToNP(audioWebm, stream, verbose)
326 |     audioAll, audioAllEnv = getConcatAudio(audioList, trialLen, verbose)
327 |     # Filtering
328 |     audioAllEnvFilt = butterLpass(audioAllEnv, cutoff=15, fs=fs, order=5)
329 |     totalTrialNum = len(audioList)
330 |     # converting to 2D matrix
331 |     audioAllEnvFilt2D = splitEnvInTrials(audioAllEnvFilt, totalTrialNum, trialLen)
332 |     return audioAllEnvFilt2D
333 | 
334 | def getAttendedAndUnattendedEnv(dbAddress, dbName, password, verbose, fs=48000.):
335 |     """
336 |     Get all envelopes required for the analyses. The function will return
337 |     3D matrices containing attended and unattended envelopes.
338 | 
339 |     Parameters
340 |     ----------
341 |     dbAddress : str
342 |         Path to the couch database.
343 |     dbName : str
344 |         Name of the database on the couch instance.
345 |     password : str
346 |         Password of the couch database.
347 |     verbose : bool
348 |         If True, more information are displayed.
349 |     sessionNums : array-like
350 |         List of sessions to keep.
351 |     stream : str
352 |         Stream to keep ('36' or '44').
353 |     fs : float
354 |         Sampling frequency
355 | 
356 |     Returns:
357 | 
358 |     attended : instance of numpy.array
359 |         Matrix of shape (trial, time) containing the envelope of all attended
360 |         streams.
361 |     unattended : instance of numpy.array
362 |         Matrix of shape (trial, time) containing the envelope of all unattended
363 |         streams.
364 |     """
365 |     print('This operation can takes few seconds/minutes... Please wait!')
366 |     if verbose:
367 |         print('noTC36...')
368 |     noTC36 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[1],
369 |         fs=fs, stream='36')
370 |     if verbose:
371 |         print('noTC44...')
372 |     noTC44 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[3],
373 |         fs=fs, stream='44')
374 |     if verbose:
375 |         print('TC36...')
376 |     TC36 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[2],
377 |         fs=fs, stream='36')
378 |     if verbose:
379 |         print('TC44...')
380 |     TC44 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[4],
381 |         fs=fs, stream='44')
382 | 
383 |     if verbose:
384 |         print('stim36Att36...')
385 |     stim36Att36 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[5, 6],
386 |         fs=fs, stream='36')
387 |     if verbose:
388 |         print('stim44Att36...')
389 |     stim44Att36 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[5, 6],
390 |         fs=fs, stream='44')
391 |     if verbose:
392 |         print('stim36Att44...')
393 |     stim36Att44 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[7, 8],
394 |         fs=fs, stream='36')
395 |     if verbose:
396 |         print('stim44Att44...')
397 |     stim44Att44 = getEnv(dbAddress, dbName, password, verbose, sessionNums=[7, 8],
398 |         fs=fs, stream='44')
399 | 
400 |     # Remove the first two seconds to avoid bias since in some trials one
401 |     # stream starts 2 seconds before the other
402 |     start = int(np.round(2*fs))
403 |     # Find the minimum duration among all envelopes in order to cut the others
404 |     end = np.min([TC36.shape[1], TC44.shape[1], noTC36.shape[1], noTC44.shape[1],
405 |         stim36Att36.shape[1], stim44Att36.shape[1], stim36Att44.shape[1],
406 |         stim44Att44.shape[1]])
407 | 
408 |     # Create attended and unattended streams
409 |     attended = np.concatenate([noTC36[:, start:end], TC36[:, start:end],
410 |         noTC44[:, start:end], TC44[:, start:end], stim36Att36[:, start:end],
411 |         stim44Att44[:, start:end]], axis=0)
412 |     unattended = np.concatenate([stim44Att36[:, start:end],
413 |         stim36Att44[:, start:end]], axis=0)
414 | 
415 |     # downsampling
416 |     attendedDS = downsampleTo64(attended)
417 |     unattendedDS = downsampleTo64(unattended)
418 |     print('Done!')
419 |     return attendedDS, unattendedDS
420 | 
421 | def splitEnvInTrials(data, totalTrialNum, trialLen, verbose=False):
422 |     """
423 |     Convert the concatenated array of sound to a 2D matrix of shape (trial, time).
424 | 
425 |     Parameters
426 |     ----------
427 |     data : instance of numpy.array
428 |         Matrix of shape (samples,). Concatenated audio containing all trials in
429 |         one 2D matrix.
430 |     totalTrialNum : int
431 |         The number of trials contained in the matrix data.
432 |     trialLen : int
433 |         The number of sample of one trial (we expect same length trials).
434 | 
435 |     Returns:
436 | 
437 |     newData : instance of numpy.array
438 |         Matrix of shape (trial, time).
439 |     """
440 | 
441 |     newData = np.zeros((totalTrialNum, trialLen))
442 |     for trial in range(totalTrialNum):
443 |         if verbose:
444 |             print trial
445 |         newData[trial, :trialLen] = data[trialLen*trial:trialLen*(trial+1)]
446 |     return newData
447 | 
448 | 
449 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # Project background
  2 | 
  3 | This is the repository containing all the code needed to analyse the data presented on the poster for the conference *CuttingEEG* in Paris 2nd-5th July 2018.
  4 | 
  5 | ![double AM](envAM.png)
  6 | 
  7 | The aim of this project is to evaluate the possibility to use both auditory steady-state responses (aSSR) and stimulus reconstruction (SR) techiques from the same stimulus. For this purpose, we designed a double amplitude envelope: continuous streams were modulated by a first constant rate amplitude envelope (36 or 44 Hz) and by a random slower one. We used these two analyses to investigate the effect of informational masking in a first experiment and the effect of attention to one of two streams in a second experiment. We evaluated these methods by calculating a decoding accuracy (first experiment: decoding of the condition, 36 or 44 Hz streams; second experiment: decoding of what stream was attended).
  8 | 
  9 | # Results
 10 | 
 11 | ## Auditory steady-state response
 12 | 
 13 | The FFT of the EEG signal has been calculated from the averaged trials according to rate (36 or 44 Hz) and condition (with or without tone cloud). We can see that the aSSR is larger for trials without tone cloud.
 14 | 
 15 | <img src="images/diff36.png" width="300">
 16 | 
 17 | <img src="images/diff36TC.png" width="300">
 18 | 
 19 | <img src="images/diff44.png" width="300">
 20 | 
 21 | <img src="images/diff44TC.png" width="300">
 22 | 
 23 | ## TRF
 24 | 
 25 | 
 26 | 
 27 | # Project structure
 28 | 
 29 | The anonymised raw EEG data (`.bdf` files) will be made available as soon as possible.
 30 | 
 31 | ## Notebooks
 32 | 
 33 | - The notebook `createStimViz.ipynb` is where some vizualisations from sound are made.
 34 | 
 35 | - The notebook `preprocessing.ipynb` has to be run first to transform the raw `.bdf` files to the Numpy arrays containing the preprocessed data. It uses the file format `.h5` to store these data. It is compatible with Python and Matlab. Different datasets can be stored in each `.h5` file. In our case, we create one file per participant each containing:
 36 | 
 37 | 1- eeg_TRF: data preprocessed for the stimulus reconstruction analyses
 38 | 
 39 | 2- eeg_aSSR: data preprocessed for the aSSR analyses
 40 | 
 41 | 3- envAttended: all attended envelopes (80 trials per participant = 320)
 42 | 
 43 | 4- envUnattended all unattended envelopes (only from the exp 2 where there are two streams: 40 trials per participant = 160)
 44 | 
 45 | - The notebook `analyses_aSSR.ipynb` contains the analyses related to the aSSR. It uses data created by the notebook `preprocessing.ipynb`.
 46 | 
 47 | Some analyses have been done in R: see the file `behavior.Rmd`.
 48 | 
 49 | Stimulus reconstruction has been done in the file `analyses_TRF.m` with the package [mTRF](https://sourceforge.net/projects/aespa/) by Crosse et al.
 50 | 
 51 | ## Tools
 52 | 
 53 | You can find in this folder all python functions used in the analyses. The file `audio.py` contains the audio processing functions (envelope extraction, fetch audio files from th database etc.). The file `behavior.py` contain functions related to behavior analyses. It goes from getting the data from couchDB to do analyses like d-prime calculation. The files `decodingSSR.py` and `decodingTRF.py` can be used to do the auditory steady-state response (aSSR) analyses and stimulus reconstruction. It includes functions used to prepare the data in a way required for the analyses. Finally, the file `eeg_utils.py` contains functions used for preprocessing, or loading the data.
 54 | 
 55 | # Credit
 56 | 
 57 | These analyses use the Matlab open source package `mTRF`:
 58 | 
 59 | You can find it [here](https://sourceforge.net/projects/aespa/).
 60 | 
 61 | Crosse, M. J., Di Liberto, G. M., Bednar, A., & Lalor, E. C. (2016). The Multivariate Temporal Response Function (mTRF) Toolbox: A MATLAB Toolbox for Relating Neural Signals to Continuous Stimuli. Frontiers in Human Neuroscience, 10, 604. http://doi.org/10.3389/fnhum.2016.00604
 62 | 
 63 | # API
 64 | 
 65 | You can find bellow the functions documentation:
 66 | 
 67 | ## audio.py
 68 | 
 69 | #### `audioToNP(audioWebm, stream, verbose=False)`
 70 | 
 71 | Get a list of matrices containing audio from a list of webm file.
 72 | 
 73 | - **`audioList`** `array-like`
 74 | 
 75 |    List containing audio matrices. Its length is the number of trial
 76 | - **`audioLen`** `int`
 77 | 
 78 |    The number of samples in each trial.
 79 | - **`stream`** `str`
 80 | 
 81 |    Multiple sounds are associated with each trial.
 82 | Stream contains character to discriminate.
 83 | - **`verbose`** `bool`
 84 | 
 85 |    If True, more information are displayed.
 86 | 
 87 | Returns:
 88 | 
 89 | - **`audioList`** `array-like`
 90 | 
 91 |    List of matrices audio as elements.
 92 | - **`audioLen`** `int`
 93 | 
 94 |    Number of samples for each trial.
 95 | 
 96 | #### `butterLpass(data, cutoff, fs, order=5)`
 97 | 
 98 | Filter data with a low pass butterworth filter.
 99 | 
100 | - **`data`** `instance of numpy.array`
101 | 
102 |    Matrix of shape (samples,) containing the signal to filter
103 | - **`cutoff`** `float`
104 | 
105 |    The cutoff frequency in Hz.
106 | - **`fs`** `float`
107 | 
108 |    The sampling frequency of the signal.
109 | - **`order`** `int`
110 | 
111 |    Order of the filter.
112 | 
113 | Returns:
114 | 
115 | - **`y`** `instance of numpy.array`
116 | 
117 |     Matrix of shape (samples,) containing the filtered signal.
118 | 
119 | #### `downsampleTo64(data)`
120 | 
121 | Decimate data with a factor 750 to go from 48000 to 64 Hz.
122 | 
123 | - **`data`** `instance of numpy.array`
124 | 
125 |    Matrix to downsample.
126 | 
127 | Returns:
128 | 
129 | - **`newdata`** `instance of numpy.array`
130 | 
131 |    Downsampled matrix of shape (trial, time).
132 | 
133 | #### `fromWebmToWav(inputFile, filename, verbose=False)`
134 | 
135 | Convert webm file from database to wav by writing on disk. The files are
136 | not removed.
137 | 
138 | - **`inputFile`** `webm file`
139 | 
140 |    Webm audio file to convert into wav.
141 | - **`filename`** `str`
142 | 
143 |    Base name to use to store files on disk.
144 | - **`verbose`** `bool`
145 | 
146 |    If True, more information are displayed.
147 | 
148 | Returns:
149 | 
150 | - **`allAudioFiles`** `array-like`
151 | 
152 |    List of all audio files corresponding to the session, db etc.
153 | 
154 | #### `getAttendedAndUnattendedEnv(dbAddress, dbName, password, verbose, fs=48000.)`
155 | 
156 | Get all envelopes required for the analyses. The function will return
157 | 3D matrices containing attended and unattended envelopes.
158 | 
159 | - **`dbAddress`** `str`
160 | 
161 |    Path to the couch database.
162 | - **`dbName`** `str`
163 | 
164 |    Name of the database on the couch instance.
165 | - **`password`** `str`
166 | 
167 |    Password of the couch database.
168 | - **`verbose`** `bool`
169 | 
170 |    If True, more information are displayed.
171 | - **`sessionNums`** `array-like`
172 | 
173 |    List of sessions to keep.
174 | - **`stream`** `str`
175 | 
176 |    Stream to keep ('36' or '44').
177 | - **`fs`** `float`
178 | 
179 |    Sampling frequency
180 | 
181 | Returns:
182 | 
183 | - **`attended`** `instance of numpy.array`
184 | 
185 |    Matrix of shape (trial, time) containing the envelope of all attended
186 | streams.
187 | - **`unattended`** `instance of numpy.array`
188 | 
189 |    Matrix of shape (trial, time) containing the envelope of all unattended
190 | streams.
191 | 
192 | #### `getAudio(dbAddress, dbName, password, sessionNum, verbose=False)`
193 | 
194 | Get names of audio files from couchdb. This allows for instance to use the
195 | names in the query to get the actual audio files.
196 | 
197 | - **`dbAddress`** `str`
198 | 
199 |    Path to the couch database.
200 | - **`dbName`** `str`
201 | 
202 |    Name of the database on the couch instance.
203 | - **`password`** `str`
204 | 
205 |    Password of the couch database
206 | - **`sessionNum`** `int`
207 | 
208 |    Filter trials from a specific session number.
209 | 
210 | Returns:
211 | 
212 | - **`allAudioFiles`** `array-like`
213 | 
214 |    List of all audio files corresponding to the session, db etc.
215 | 
216 | #### `getAudioFilenames(dbAddress, dbName, password, sessionNum)`
217 | 
218 | Get names of audio files from couchdb. This allows for instance to use the
219 | names in the query to get the actual audio files.
220 | 
221 | - **`dbAddress`** `str`
222 | 
223 |    Path to the couch database.
224 | - **`dbName`** `str`
225 | 
226 |    Name of the database on the couch instance.
227 | - **`password`** `str`
228 | 
229 |    Password of the couch database
230 | - **`sessionNum`** `int`
231 | 
232 |    Filter trials from a specific session number.
233 | 
234 | Returns:
235 | 
236 | - **`allAudioFileNames`** `dict`
237 | 
238 |    Dictionary containing trial numbers as keys and array of audio file
239 | names as values.
240 | 
241 | #### `getConcatAudio(audioList, trialLen, verbose=False)`
242 | 
243 | Get all audio files under the form of one concatenated matrix containing the
244 | raw audio and another one containing the envelopes.
245 | 
246 | - **`audioList`** `array-like`
247 | 
248 |    List containing audio matrices. Its length is the number of trials.
249 | - **`audioLen`** `int`
250 | 
251 |    The number of samples in each trial.
252 | - **`verbose`** `bool`
253 | 
254 |    If True, more information are displayed.
255 | 
256 | Returns:
257 | 
258 | - **`audioAll`** `instance of numpy.array`
259 | 
260 |    Matrix of shape (samples,) containing all audio trials concatenated.
261 | - **`audioAllEnv`** `instance of numpy.array`
262 | 
263 |    Matrix of shape (samples,) containing all audio envelopes concatenated.
264 | 
265 | #### `getEnv(dbAddress, dbName, password, verbose, sessionNums, fs, stream)`
266 | 
267 | Get the requested envelope corresponding to the user, sessionNum, stream etc.
268 | 
269 | - **`dbAddress`** `str`
270 | 
271 |    Path to the couch database.
272 | - **`dbName`** `str`
273 | 
274 |    Name of the database on the couch instance.
275 | - **`password`** `str`
276 | 
277 |    Password of the couch database.
278 | - **`verbose`** `bool`
279 | 
280 |    If True, more information are displayed.
281 | - **`sessionNums`** `array-like`
282 | 
283 |    List of sessions to keep.
284 | - **`fs`** `float`
285 | 
286 |    Sampling frequency
287 | - **`stream`** `str`
288 | 
289 |    Stream to keep ('36' or '44').
290 | 
291 | Returns:
292 | 
293 | - **`audioAllEnvFilt2DDS`** `instance of numpy.array`
294 | 
295 |    Matrix of shape (trial, time) containing the envelope filtered.
296 | 
297 | #### `getWebm(dbAddress, dbName, password, sessionNums)`
298 | 
299 | Get webm audio files from couchdb.
300 | 
301 | - **`dbAddress`** `str`
302 | 
303 |    Path to the couch database.
304 | - **`dbName`** `str`
305 | 
306 |    Name of the database on the couch instance.
307 | - **`password`** `str`
308 | 
309 |    Password of the couch database
310 | - **`sessionNums`** `array-like`
311 | 
312 |    List of sessions to keep.
313 | 
314 | Returns:
315 | 
316 | - **`allAudioFiles`** `array-like`
317 | 
318 |    List of all audio files corresponding to the session, db etc.
319 | 
320 | #### `splitEnvInTrials(data, totalTrialNum, trialLen, verbose=False)`
321 | 
322 | Convert the concatenated array of sound to a 2D matrix of shape (trial, time).
323 | 
324 | - **`data`** `instance of numpy.array`
325 | 
326 |    Matrix of shape (samples,). Concatenated audio containing all trials in
327 | one 2D matrix.
328 | - **`totalTrialNum`** `int`
329 | 
330 |    The number of trials contained in the matrix data.
331 | - **`trialLen`** `int`
332 | 
333 |    The number of sample of one trial (we expect same length trials).
334 | 
335 | Returns:
336 | 
337 | - **`newData`** `instance of numpy.array`
338 | 
339 |    Matrix of shape (trial, time).
340 | 
341 | ## behavior.py
342 | 
343 | #### `analyses(data, verbose)`
344 | 
345 | Evaluate the behavior data by computing hits rate and false alarm rates. The
346 | continuous responses given by the participant are compared to the time stamps
347 | of the gaps in the attended stream and also in the unattended (if there is
348 | one). For each response: 1.calculate the delay between this response and each
349 | attended gap (`lagCorrect`). 2. calculate the delay between this response
350 | and each unattended gap (`laginCorrect`). 3. keep only positive values in
351 | each array because the response is done after the gap. This removes all
352 | other gaps for this response. 4. take the smaller value in each
353 | array: `minCorrect` and `minIncorrect`. 5. we consider that the response
354 | is linked to the gap if the delay is between `minThresh` and `maxThresh`.
355 | The margins should avoid having bumps in the two streams separated by less
356 | than maxThresh - minThresh.
357 | 
358 | - **`data`** `instance of pandas.core.DataFrame`
359 | 
360 |    Behavior data to use to run the analyses.
361 | - **`verbose`** `bool`
362 | 
363 |    Print more details about the process.
364 | 
365 | Returns:
366 | 
367 |    - **`analyses`** `instance of pandas.core.DataFrame`
368 | 
369 | Dataframe containing the number of hits and false alarms for each trial.
370 | 
371 | #### `checkLinkTrialsBehaviorEEG(trialBehavior, events, sessionNum, trigs, fs)`
372 | 
373 | Check that answer recorded in behavior data correspond to triggers emitted
374 | by this answer. This allows to be sure that EEG data correspond to behavior.
375 | 
376 | - **`fs`** `float`
377 | 
378 |    EEG data sampling frequency in Hz.
379 | 
380 | Returns:
381 | 
382 |    
383 | 
384 | #### `getBehaviorData(dbName, sessionNums)`
385 | 
386 | Get behavior data from the couch database according to the name of the DB and
387 | the sessions.
388 | 
389 | - **`dbName`** `str`
390 | 
391 |    Name of the database on the couch instance.
392 | - **`sessionNums`** `array-like`
393 | 
394 |    List of sessions to keep.
395 | 
396 | Returns:
397 | 
398 |    - **`behaviorData`** `instance of pandas.core.DataFrame`
399 | 
400 | Dataframe containing all parameters of all trials.
401 | 
402 | #### `getBehaviorDataSession(dbAddress, dbName, sessionNum)`
403 | 
404 | Fetch behavior data from couchdb (SOA, SNR and trial duration).
405 | 
406 | - **`dbAddress`** `str`
407 | 
408 |    Path to the couch database.
409 | - **`dbName`** `str`
410 | 
411 |    Name of the database on the couch instance.
412 | - **`sessionNum`** `int`
413 | 
414 |    Behavior data will be fetched from this sessionNum.
415 | 
416 | Returns:
417 | 
418 |    - **`allDoc`** `instance of pandas.core.DataFrame`
419 | 
420 | A dataframe containing requested data.
421 | 
422 | #### `getTrialNum(ref, allSubj, trialBehavior, **kwargs)`
423 | 
424 | Get the trial numbers corresponding to specific conditions.
425 | 
426 | - **`ref`** `int`
427 | 
428 |    If 1: the condition is all trials (like for overall analyses: exp 1 and 2).
429 | - **`allSubj`** `bool`
430 | 
431 |    Choose to return the trial number for one or all subjects.
432 | - **`trialBehavior`** `instance of Pandas.Dataframe`
433 | 
434 |    All behavior data. Trial numbers will be find related to condition present
435 | in this dataset.
436 | - **`**kwargs`** `other arguments`
437 | 
438 |    All conditions can be passed as argument like `correctStream=[False]`.
439 | 
440 | Returns:
441 | 
442 |    - **`allTrials`** `instance of numpy.array`
443 | 
444 | List of trial numbers.
445 | 
446 | #### `plotTrial(data, correctBump, incorrectBump, gapNum, trial, hitTime, FATime, falseHitTime, resp)`
447 | 
448 | Plot representation of the behavior trial. This shows the gaps of attended
449 | and unattended streams in green and red respectively and responses as
450 | vertical gray lines.
451 | still to implement...
452 | 
453 | 
454 | Returns:
455 | 
456 |    - **`allTrials`** `instance of numpy.array`
457 | 
458 | List of trial numbers.
459 | 
460 | 
461 | ## decodingSSR.py
462 | 
463 | #### `calculateBaseline(data, fs)`
464 | 
465 | Calculate the baseline in order to take into account the fact that the eeg
466 | response can be different for the two AM rates. This functions computes
467 | the ratio between the AM rates in the one stream condition.
468 | 
469 | - **`eegData`** `instance of numpy.array`
470 | 
471 |    EEG data of shape (trial, time, electrode).
472 | 
473 | Returns:
474 | 
475 |    - **`ratio`** `float`
476 | 
477 | Ratio between the 36 Hz stream and the 44 Hz stream.
478 | 
479 | #### `comparePicks(data, fs)`
480 | 
481 | Calculate the baseline in order to take into account the fact that the eeg
482 | response can be different for the two AM rates. This functions computes
483 | the ratio between the AM rates in the one stream condition.
484 | 
485 | - **`eegData`** `instance of numpy.array`
486 | 
487 |    EEG data of shape (time, electrode).
488 | 
489 | Returns:
490 | 
491 |    - **`ratio`** `float`
492 | 
493 | Ratio between the 36 Hz stream and the 44 Hz stream.
494 | 
495 | #### `crossVal(data, data1, fs)`
496 | 
497 | This function has changed. To update...
498 | 
499 | - **`data`** `array-type`
500 | 
501 |    Shape (trial, time, electrode). Compute pick at 36 Hz for each trial.
502 | - **`data1`** `array-type`
503 | 
504 |    Shape (trial, time, electrode). Compute pick at 44 Hz for each trial.
505 | - **`fs`** `float`
506 | 
507 |    Sampling frequency in Hz.
508 | 
509 | Returns:
510 | 
511 |    - **`aAll`** `array-type`
512 | 
513 | List of pick values for 36 Hz from `data`. Length of trial number.
514 | - **`bAll`** `array-type`
515 | 
516 |    List of pick values for 44 Hz from `data1`. Length of trial number.
517 | 
518 | #### `getBestAcc(durs, bestC)`
519 | 
520 | Return the c parameter corresponding to the better accuracy for the 4
521 | participants and for each duration.
522 | 
523 | - **`durs`** `array-type`
524 | 
525 |    .List of durations to consider.
526 | - **`bestC`** `instance of pandas.Dataframe`
527 | 
528 |    Dataframe returned from the function 'hyperOptC'.
529 | 
530 | Returns:
531 | 
532 |    - **`p1AccAll`** `array-type`
533 | 
534 | List of accuracies for each duration with the better c parameter
535 | (at 60s) for the participant 1.
536 | - **`p2AccAll`** `array-type`
537 | 
538 |    List of accuracies for each duration with the better c parameter
539 | (at 60s) for the participant 2.
540 | - **`p3AccAll`** `array-type`
541 | 
542 |    List of accuracies for each duration with the better c parameter
543 | (at 60s) for the participant 3.
544 | - **`p4AccAll`** `array-type`
545 | 
546 |    List of accuracies for each duration with the better c parameter
547 | (at 60s) for the participant 4.
548 | 
549 | #### `getSSRAccuracyByDur(data, trials, fs)`
550 | 
551 | Get the classification accuracy according to duration of trials and trials used.
552 | 
553 | - **`data`** `array-type`
554 | 
555 |    Data to use to check accuracy.
556 | - **`trials`** `array-type`
557 | 
558 |    Trials to consider.
559 | - **`fs`** `float`
560 | 
561 |    Sampling frequency in Hz.
562 | 
563 | Returns:
564 | 
565 |    - **`allComparisons`** `array-type`
566 | 
567 | Array containing all comparison (for each duration).
568 | 
569 | #### `hyperOptC(data, c_vals, durs, electrodes, dprimeThresh, subjNum, condition, fs, trialBehaviorAll)`
570 | 
571 | Perform the hyper optimization of the c parameter of the SVM algorithm.
572 | Also compute the accuracy for a set of durations.
573 | 
574 | - **`data`** `array-type`
575 | 
576 |    Data to use. Shape (trial, time, electrode).
577 | - **`c_vals`** `array-type`
578 | 
579 |    List of c values to try.
580 | - **`durs`** `array-type`
581 | 
582 |    List of durations to use.
583 | - **`electrodes`** `array-type`
584 | 
585 |    List of electrodes to consider.
586 | - **`dprimeThresh`** `float`
587 | 
588 |    Threshold of dprime to include the trial as a good trial.
589 | - **`subjNum`** `array-type`
590 | 
591 |    List of subject to consider.
592 | - **`condition`** `str`
593 | 
594 |    'oneStream' or 'twoStreams': choose the condition.
595 | - **`fs`** `float`
596 | 
597 |    Sampling frequency in Hz.
598 | - **`trialBehaviorAll`** `instance of pandas.Dataframe`
599 | 
600 |    Behavior data from all participants.
601 | 
602 | 
603 | Returns:
604 | 
605 |    - **`bestC`** `instance of pandas.Dataframe`
606 | 
607 | Dataframe containing the accuracy for each c parameter and duration.
608 | 
609 | 
610 | ## decodingTRF.py
611 | 
612 | All functions used to do the decoding from stimulus reconstruction.
613 | 
614 | #### `calculateCorr(env1, env2, fs, end=None)`
615 | 
616 | Get correlations between env1 and env2 for each trials.
617 | 
618 | - **`env1`** `array-type`
619 | 
620 |    First list of envelope of shape (trial, time).
621 | - **`env2`** `array-type`
622 | 
623 |    Second list of envelope of shape (trial, time).
624 | - **`fs`** `float`
625 | 
626 |    Sampling frequency in Hz.
627 | - **`end`** `float`
628 | 
629 |    End limit in seconds to take for each trial.
630 | 
631 | Returns:
632 | 
633 | - **`corrs`** `array-type`
634 | 
635 |    List of correlations of shape (trial, 1).
636 | 
637 | #### `getTRFAccuracyByDur(envAttended, envUnattended, envMismatch, envReconstructed, trials, trialsDualStream)`
638 | 
639 | Get the classification accuracy according to duration of trials and trials used.
640 | 
641 | - **`envAttended`** `instance of numpy.array`
642 | 
643 |    Attended envelopes. Shape (trial, time).
644 | - **`envUnattended`** `instance of numpy.array`
645 | 
646 |    Unattended envelopes. Shape (trial, time).
647 | - **`envMismatch`** `instance of numpy.array`
648 | 
649 |    Mismatch envelopes (corresponding to another trial). Shape (trial, time).
650 | - **`envReconstructed`** `instance of numpy.array`
651 | 
652 |    Reconstructed envelopes. Shape (trial, time).
653 | - **`trials`** `array-type`
654 | 
655 |    Trials to consider.
656 | - **`trialsDualStream`** `array-like`
657 | 
658 |    Trials to consider in the exp 2 referential (attended vs unattended with
659 | only 40 trials)
660 | 
661 | Returns:
662 | 
663 |    - **`classifMismatchTime`** `array-type`
664 | 
665 | List of classification accuracies (one value per second) for attended versus
666 | mismatch stream.
667 | - **`classifAtt_unattTime`** `array-type`
668 | 
669 |    List of classification accuracies (one value per second) for attended versus
670 | unattended stream (only trials included in the dual stream part).
671 | 
672 | #### `getUnattendedTrialsNumber(trials)`
673 | 
674 | Get the trials for the condition dual streams. Since this condition was only from
675 | trial 40 to 80 these trials not all trials have to be considered.
676 | 
677 | - **`trials`** `array-type`
678 | 
679 |    Trials to consider.
680 | Returns:
681 | 
682 | - **`newEnv`** `array-type`
683 | 
684 |    Envelope of the selected trials of shape (trials, time).
685 | 
686 | 
687 | ## eeg_utils.py
688 | 
689 | #### `loadDataH5(path, pathReconstructed)`
690 | 
691 | Load data from .h5 file. This expects to load one file containing the EEG
692 | and the envelopes of the stimuli and another file the reconstructed
693 | envelope created from Matlab.
694 | 
695 | - **`path`** `str`
696 | 
697 |    Path to the `.h5` file containing EEG and stimuli envelopes.
698 | - **`pathReconstructed`** `str`
699 | 
700 |    Path to the `.h5` file containing the reconstructed envelopes.
701 | 
702 | Returns:
703 | 
704 |    - **`eeg_TRF`** `instance of numpy.array`
705 | 
706 | A matrix of shape (trial, time, electrode) containing the data processed
707 | for the TRF.
708 | - **`eeg_TRF`** `instance of numpy.array`
709 | 
710 |    to do.
711 | - **`envMismatch`** `instance of numpy.array`
712 | 
713 |    to do.
714 | - **`envUnattended`** `instance of numpy.array`
715 | 
716 |    to do.
717 | - **`envReconstructed`** `instance of numpy.array`
718 | 
719 |    to do.
720 | - **`eeg_aSSR`** `instance of numpy.array`
721 | 
722 |    to do.
723 | 
724 | #### `processEEG(fnEEG, dbName, sessionNums, trialsToRemove, trialBehavior, fs, ref)`
725 | 
726 | Load and process EEG from .bdf file. The data is filtered according to
727 | `freqFilter`, re-referenced according to the mastoids and downsampled
728 | to 64 Hz if `downsampling` is set to True.
729 | 
730 | - **`fn`** `str`
731 | 
732 |    Name of the bdf containing the EEG data.
733 | - **`dbName`** `str`
734 | 
735 |    Name of the database on the couch instance.
736 | - **`sessionNums`** `array-type`
737 | 
738 |    List of sessions to keep.
739 | - **`trialsToRemove`** `array-type`
740 | 
741 |    List of trials to remove from the analyses.
742 | - **`ref`** `str`
743 | 
744 |    Choose between referencing to mastoids ('mastoids') or to the average
745 | of all electrodes ('average').
746 | - **`fs`** `float`
747 | 
748 |    Sampling frequency in Hz.
749 | 
750 | Returns:
751 | 
752 |    - **`dataFilt3D64`** `instance of numpy.array`
753 | 
754 | A matrix of shape (trial, time, electrode) containing the processed data.
755 | 


--------------------------------------------------------------------------------