├── .gitignore
├── src
    ├── .-temp.csv
    ├── .ipynb_checkpoints
    │   ├── SVM-checkpoint.ipynb
    │   ├── look_into_data-checkpoint.ipynb
    │   ├── EEGFeatureExtraction-checkpoint.ipynb
    │   ├── E4_Extract_Feature-checkpoint.ipynb
    │   ├── Kmean-for-sth-idk-checkpoint.ipynb
    │   ├── DNN-checkpoint.ipynb
    │   ├── EEGPreprocessing-checkpoint.ipynb
    │   └── prepare_data_to_db-checkpoint.ipynb
    ├── EEGFeatureExtraction.ipynb
    ├── E4_Extract_Feature.ipynb
    ├── GridSearchCV.py
    ├── GridSearchCV-channel.py
    ├── GridSearchCV-frequency.py
    └── EEGPreprocessing.ipynb
├── .DS_Store
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | .ipynb_checkpoints/*
3 | 


--------------------------------------------------------------------------------
/src/.-temp.csv:
--------------------------------------------------------------------------------
1 | subj_id, clip_id, exclude,
2 | 0, 0, 1, 
3 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IoBT-VISTEC/EEG-Emotion-Recognition-INTERFACES-datasets/HEAD/.DS_Store


--------------------------------------------------------------------------------
/src/.ipynb_checkpoints/SVM-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 2
6 | }
7 | 


--------------------------------------------------------------------------------
/src/.ipynb_checkpoints/look_into_data-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 2
6 | }
7 | 


--------------------------------------------------------------------------------
/src/EEGFeatureExtraction.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import numpy as np\n",
10 |     "from scipy import signal\n",
11 |     "import math\n",
12 |     "from sklearn.preprocessing import MinMaxScaler"
13 |    ]
14 |   },
15 |   {
16 |    "cell_type": "code",
17 |    "execution_count": null,
18 |    "metadata": {},
19 |    "outputs": [],
20 |    "source": [
21 |     "class EEGFeatureExtraction:\n",
22 |     "    \n",
23 |     "    def __init__(self, data_path, fs):\n",
24 |     "        self.__data = np.load(data_path)\n",
25 |     "        self.__fs = fs\n",
26 |     "        self.__no_of_samples, self.__no_of_channels, self.__no_of_bands, self.__no_of_sampling = self.__data.shape\n",
27 |     "        print (self.__data.shape)\n",
28 |     "    \n",
29 |     "    def start(self):\n",
30 |     "        results = np.zeros(shape = (self.__no_of_samples, self.__no_of_channels * self.__no_of_bands))\n",
31 |     "        for sample_id, sample in enumerate(self.__data):\n",
32 |     "            for ch_id, channel in enumerate(sample):\n",
33 |     "                for band_id, band in enumerate(channel):\n",
34 |     "                    f, Pxx_den = signal.welch(band, self.__fs, nperseg=256, noverlap=128)\n",
35 |     "                    #print Pxx_den.shape, Pxx_den\n",
36 |     "                    \n",
37 |     "                    \"\"\"\n",
38 |     "                    import matplotlib.pyplot as plt\n",
39 |     "                    plt.plot(Pxx_den)\n",
40 |     "                    plt.show()\n",
41 |     "                    print np.max(Pxx_den), math.log(np.max(Pxx_den))\n",
42 |     "                    \"\"\"\n",
43 |     "                \n",
44 |     "                    results[sample_id][self.__no_of_bands*ch_id + band_id] = math.log(np.max(Pxx_den))\n",
45 |     "\n",
46 |     "        return results\n",
47 |     "    \n",
48 |     "    def save_to_numpy(self, features):\n",
49 |     "        print ('save feature vector to numpy', features.shape)\n",
50 |     "        np.save('../data/EEG/feature_extracted/EEG_ICA.npy', features)"
51 |    ]
52 |   },
53 |   {
54 |    "cell_type": "code",
55 |    "execution_count": null,
56 |    "metadata": {},
57 |    "outputs": [],
58 |    "source": [
59 |     "eegFeatureExtraction = EEGFeatureExtraction(data_path = '.../data/EEG/preprocessed/EEG_ICA.npy', fs = 250)\n",
60 |     "features = eegFeatureExtraction.start()\n",
61 |     "eegFeatureExtraction.save_to_numpy(features)\n",
62 |     "print (features)"
63 |    ]
64 |   },
65 |   {
66 |    "cell_type": "code",
67 |    "execution_count": null,
68 |    "metadata": {},
69 |    "outputs": [],
70 |    "source": []
71 |   }
72 |  ],
73 |  "metadata": {
74 |   "kernelspec": {
75 |    "display_name": "Python 2",
76 |    "language": "python",
77 |    "name": "python2"
78 |   },
79 |   "language_info": {
80 |    "codemirror_mode": {
81 |     "name": "ipython",
82 |     "version": 3
83 |    },
84 |    "file_extension": ".py",
85 |    "mimetype": "text/x-python",
86 |    "name": "python",
87 |    "nbconvert_exporter": "python",
88 |    "pygments_lexer": "ipython3",
89 |    "version": "3.5.2"
90 |   }
91 |  },
92 |  "nbformat": 4,
93 |  "nbformat_minor": 2
94 | }
95 | 


--------------------------------------------------------------------------------
/src/.ipynb_checkpoints/EEGFeatureExtraction-checkpoint.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import numpy as np\n",
10 |     "from scipy import signal\n",
11 |     "import math\n",
12 |     "from sklearn.preprocessing import MinMaxScaler"
13 |    ]
14 |   },
15 |   {
16 |    "cell_type": "code",
17 |    "execution_count": null,
18 |    "metadata": {},
19 |    "outputs": [],
20 |    "source": [
21 |     "class EEGFeatureExtraction:\n",
22 |     "    \n",
23 |     "    def __init__(self, data_path, fs):\n",
24 |     "        self.__data = np.load(data_path)\n",
25 |     "        self.__fs = fs\n",
26 |     "        self.__no_of_samples, self.__no_of_channels, self.__no_of_bands, self.__no_of_sampling = self.__data.shape\n",
27 |     "        print (self.__data.shape)\n",
28 |     "    \n",
29 |     "    def start(self):\n",
30 |     "        results = np.zeros(shape = (self.__no_of_samples, self.__no_of_channels * self.__no_of_bands))\n",
31 |     "        for sample_id, sample in enumerate(self.__data):\n",
32 |     "            for ch_id, channel in enumerate(sample):\n",
33 |     "                for band_id, band in enumerate(channel):\n",
34 |     "                    f, Pxx_den = signal.welch(band, self.__fs, nperseg=256, noverlap=128)\n",
35 |     "                    #print Pxx_den.shape, Pxx_den\n",
36 |     "                    \n",
37 |     "                    \"\"\"\n",
38 |     "                    import matplotlib.pyplot as plt\n",
39 |     "                    plt.plot(Pxx_den)\n",
40 |     "                    plt.show()\n",
41 |     "                    print np.max(Pxx_den), math.log(np.max(Pxx_den))\n",
42 |     "                    \"\"\"\n",
43 |     "                \n",
44 |     "                    results[sample_id][self.__no_of_bands*ch_id + band_id] = math.log(np.max(Pxx_den))\n",
45 |     "\n",
46 |     "        return results\n",
47 |     "    \n",
48 |     "    def save_to_numpy(self, features):\n",
49 |     "        print ('save feature vector to numpy', features.shape)\n",
50 |     "        np.save('../data/EEG/feature_extracted/EEG_ICA.npy', features)"
51 |    ]
52 |   },
53 |   {
54 |    "cell_type": "code",
55 |    "execution_count": null,
56 |    "metadata": {},
57 |    "outputs": [],
58 |    "source": [
59 |     "eegFeatureExtraction = EEGFeatureExtraction(data_path = '.../data/EEG/preprocessed/EEG_ICA.npy', fs = 250)\n",
60 |     "features = eegFeatureExtraction.start()\n",
61 |     "eegFeatureExtraction.save_to_numpy(features)\n",
62 |     "print (features)"
63 |    ]
64 |   },
65 |   {
66 |    "cell_type": "code",
67 |    "execution_count": null,
68 |    "metadata": {},
69 |    "outputs": [],
70 |    "source": []
71 |   }
72 |  ],
73 |  "metadata": {
74 |   "kernelspec": {
75 |    "display_name": "Python 2",
76 |    "language": "python",
77 |    "name": "python2"
78 |   },
79 |   "language_info": {
80 |    "codemirror_mode": {
81 |     "name": "ipython",
82 |     "version": 3
83 |    },
84 |    "file_extension": ".py",
85 |    "mimetype": "text/x-python",
86 |    "name": "python",
87 |    "nbconvert_exporter": "python",
88 |    "pygments_lexer": "ipython3",
89 |    "version": "3.5.2"
90 |   }
91 |  },
92 |  "nbformat": 4,
93 |  "nbformat_minor": 2
94 | }
95 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Consumer Grade Brain Sensing for Emotion Recognition (INTERFACES datasets)
  2 | 
  3 | ## Overview
  4 | Steps:
  5 |     - Data Preprocessing -> Feature Extraction -> Classifier (Emotion recognition)
  6 |     
  7 | Note:
  8 |     - Import data from data-lake [Dropbox](https://www.dropbox.com/sh/l0fxvbcvf15vnv1/AACMiqjFOMse6ODftzU7AKMxa?dl=0) or [Google Drive](https://drive.google.com/drive/folders/1uEdYurqxZb1hZX8IGuI-WYJAyyb8JDxn?usp=sharing) to folder ./data/
  9 | 
 10 | 
 11 | ## Database Description
 12 | After data acquisition, The data were processed and extracted features. Emotion database is available in a data lake. The structure and file description can be described as follows:
 13 | 
 14 | 
 15 |     • Task 2-5 Emotion/
 16 |         • EEG/ [*]
 17 |             • feature extracted/
 18 |                 · EEG ICA.npy: Power Spectral Density of each frequency band and channel as Table 4.1 with ICA method in shape (N.subjects x N.clips, N.channels, N.freq bands, 1) = (645,8, 4, 1)
 19 |                 · EEG no ICA.npy: Power Spectral Density of each frequency band and channel as Table 4.1 with out ICA method in shape (N.subjects x N.clips, N.channels, N.freq bands, 1) = (645, 8, 4, 1)
 20 |             • preprocessed/
 21 |                 · EEG ICA.npy: EEG signal with ICA method in shape (N.subjects x N.clips, N.channels, N.freq bands, N.sampling points (56sec)) = (645, 8, 4, 14000)
 22 |                 · EEG no ICA.npy: EEG signals with out ICA method in shape (N.subjects x N.clips, N.channels, N.freq bands, N.sampling points (56sec)) = (645, 8, 4, 14000)
 23 |             • raw/ [**]
 24 |                 · EEG.npy: raw EEG signals(µV) with sampling rate 250 Hz recorded from OpenBCI in shape (N.subjects, N.clips, N.channels, N.sampling points(56sec)) = (43, 15, 8, 14000) 
 25 |         • E4/
 26 |             • feature extracted/
 27 |                 · BVP.npy: Data from photoplethysmography after preprocessing as Table 4.1 in shape (N.subject, N.clip, N.of features) =(43, 15, 13)
 28 |                 · EDA.npy: Data from the electrodermal activity sensor after preprocessing as Table 4.1 in shape (N.subject, N. clip, N.features) = (43, 15, 21)
 29 |                 · TEMP.npy: Data from Data from temperature sensor after preprocessing as Table 4.1 in shape (N.subject, N.clip, N.features) = (43, 15, 4)
 30 |             • raw/ [**]
 31 |                 · ACC.npy: Data from 3-axis accelerometer sensor with sampling rate 32 Hz recorded from Empatica E4 in shape (N.subject, N.clip, N. x, y, and z axis, N.sampling points (56 sec)) = (43, 15, 3, 1792)
 32 |                 · BVP.npy: Data from photoplethysmography with sampling rate 64 Hz recorded from Empatica E4 in shape (N.subject, N. clip, N.sampling points (56 sec)) = (43, 15, 3584)
 33 |                 · EDA.npy: Data from the electrodermal activity sensor expressed as microsiemens (µS) with sampling rate 4 Hz recorded from Empatica E4 in shape (N.subject, N.clip, N.sampling points (56 sec)) = (43, 15, 224)
 34 |                 · HR.npy: Data from heart rate with sampling rate 1 Hz recorded from Empatica E4 in shape (N.subject, N.clip, N.sampling points (56 sec)) = (43, 15, 56)
 35 |                 · HRV.npy: Heart rate variability recorded from Empatica E4 in shape (N.subject, N.clip) = (43, 15)
 36 |                 · IBI.npy: Inter-beat interval recorded from Empatica E4 in shape (N.subject, N.clip) = (43, 15)
 37 |                 · TEMP.npy: Data from Data from temperature sensor (°C) with sampling rate 4 Hz recorded from Empatica E4 in shape (N.subject, N.clip, N.sampling points (56 sec)) = (43, 15, 224)
 38 |         • score/
 39 |     
 40 |         • label/
 41 |                 · arousal.npy: Labeling by of arousal score (0:low or 1:high).
 42 |                 · excite.npy: Labeling by of excite score (0:low or 1:high).
 43 |                 · fear.npy: Labeling of fear score (0:low or 1:high).
 44 |                 · happy.npy: Labeling of happy score (0:low or 1:high).
 45 |                 · rating.npy: Labeling of rating score (0:low or 1:high).
 46 |                 · valence.npy: Labeling of valence score (0:low or 1:high).
 47 |         • raw/
 48 |                 · Raw.npy: Self emotional score of all participants in shape (43, 15, 1) = ( N.subject, No. of a clip, emotional score) in each emotion (happy, fear, excite, arousal, valence, rating).
 49 |         • clip/
 50 |                 · All video clips which were played to participants.
 51 |         • Clip list.csv : Name of clips that were played for each participant. (15 clips/person)
 52 |         
 53 |     [*] The EEG channels include Fp1, Fp2, Fz, Cz, T3, T4, Pz and Oz, respectively.
 54 |         The frequency bands include theta (3–7 [Hz]), alpha(8–13 [Hz]), beta(14–29 [Hz]) and gamma(30–47[Hz]). 
 55 |     [**] (Raw data are not provided in this repository.)
 56 | 
 57 |       
 58 |       
 59 | ## System Manual
 60 | 1.Pre-installation
 61 |     
 62 |     • Set up python libraries: numpy, scipy, sklearn, mne, pandas, and matplotlib
 63 |     • Create a directory named data at emotion-monitoring-system/data
 64 |     • Copy data from data-lake to the above directory
 65 |     
 66 |     
 67 | 2.Pre-processing data
 68 | 
 69 |     • Go to ./src
 70 |     • Open and run all cells in EEGPreprocessing.ipynb
 71 |     • Answer the question "Do you want to re-run all? (y/n):"
 72 |         – If this is the first time of preprocessing the data, type y.
 73 |         – Otherwise, type y if you want to re-run all again or n if you want to continue from the latest pre-processed signal.
 74 |     • The program will perform preprocessing to each sample including
 75 |         – Independent Component Analysis (ICA): In this step, it allows experts to specify which components should be removed from
 76 |     the EEG signals.
 77 |         – Common Average Reference (CAR)
 78 |         – Bandpass filter to sub-frequency bands including
 79 |         – Reshape data to (number of samples per subject * number of subjects, number of channels, number of sub-frequency bands, number of sampling points) = (645, 8, 4, 14000)
 80 |     • The program automatically saves all data into data/EEG/preprocessed/EEG_ICA.npy
 81 |     
 82 |     
 83 | 3.Feature Extraction
 84 | 
 85 |     • EEG
 86 |         – Go to ./src
 87 |         – Open and run all cells in EEGFeatureExtraction.ipynb
 88 |         – The software automatically
 89 |             * Calculates Power Spectral Density (PSD) of each sub frequency band.
 90 |             * Saves into data/feature_extracted/EEG.npy
 91 |     • Body signals
 92 |         – Go to ./src
 93 |         – Open and run all cells in E4_Extract_Feature.ipynb
 94 |         – The software automatically
 95 |             * Calculate all features from E4 (Empatica)
 96 |             * Saves EDA.npy, TEMP.npy, and BVP.npy into data/E4/feature_extracted/
 97 | 
 98 | 
 99 | 
100 | ### Our Paper
101 | 
102 | When using (any part) of this dataset, please cite [our paper](https://ieeexplore.ieee.org/document/8762012)
103 | 
104 | ```
105 | @ARTICLE{8762012,  
106 |     author={P. {Lakhan} and N. {Banluesombatkul} and V. {Changniam} and R. {Dhithijaiyratn} and P. {Leelaarporn} and E. {Boonchieng} and S. {Hompoonsup} and T. {Wilaiprasitporn}}, 
107 |     journal={IEEE Sensors Journal}, 
108 |     title={Consumer Grade Brain Sensing for Emotion Recognition}, 
109 |     year={2019}, 
110 |     volume={19}, 
111 |     number={21}, 
112 |     pages={9896-9907}, 
113 |     keywords={brain;brain-computer interfaces;electroencephalography;emotion recognition;feature extraction;learning (artificial intelligence);medical signal processing;affective video clips;elicited signals;classification model;peripheral physiological signals;emotional EEG brainwaves;pre-selected clips;unsupervised machine learning;audio-visual stimuli;emotion classification;research grade EEG system;diverse emotion-eliciting stimuli;distinctive brain activities;emotional state recognition;emotion recognition;consumer grade brain sensing;Electroencephalography;Emotion recognition;Biomedical monitoring;Feature extraction;Sensors;Brain;Prediction algorithms;Consumer grade EEG;low-cost EEG;OpenBCI;emotion recognition;affective computing}, 
114 |     doi={10.1109/JSEN.2019.2928781}, 
115 |     ISSN={2379-9153}, 
116 |     month={Nov}, 
117 | }
118 | ```
119 | 


--------------------------------------------------------------------------------
/src/E4_Extract_Feature.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "from scipy import signal\n",
 11 |     "import math\n",
 12 |     "from scipy.signal import butter, lfilter, freqz\n",
 13 |     "from scipy.signal import argrelextrema"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "#Fillter\n",
 23 |     "def butter_highpass(cutoff, fs, order=2):\n",
 24 |     "    nyq = 0.5 * fs\n",
 25 |     "    normal_cutoff = cutoff / nyq\n",
 26 |     "    b, a = butter(order, normal_cutoff, btype='high', analog=False)\n",
 27 |     "    return b, a\n",
 28 |     "def butter_lowpass(cut, fs, order=2):\n",
 29 |     "    nyq = 0.5 * fs\n",
 30 |     "    normal_cutoff = cut / nyq\n",
 31 |     "    b, a = butter(order, normal_cutoff, btype='low', analog=False)\n",
 32 |     "    return b, a\n",
 33 |     "def butter_bandpass(lowcut, highcut, fs, order=2):\n",
 34 |     "    nyq = 0.5 * fs\n",
 35 |     "    low = lowcut / nyq\n",
 36 |     "    high = highcut / nyq\n",
 37 |     "    b, a = signal.butter(order, [low, high], btype='band')\n",
 38 |     "    return b, a"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "N_subject = 43\n",
 48 |     "N_clip =15"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 4,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "name": "stderr",
 58 |      "output_type": "stream",
 59 |      "text": [
 60 |       "/usr/local/lib/python3.5/dist-packages/scipy/signal/_arraytools.py:45: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n",
 61 |       "  b = a[a_slice]\n"
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "EDA = np.load('../data/E4/raw/EDA.npy',encoding=\"latin1\") #Raw EDA\n",
 67 |     "Num_EDA_fea =21\n",
 68 |     "smp_EDA = 64.0\n",
 69 |     "EDA_fea=np.zeros([N_subject*N_clip,Num_EDA_fea ])\n",
 70 |     "cnt=0\n",
 71 |     "for i in range(EDA.shape[0]):\n",
 72 |     "    for j in range(EDA.shape[1]):\n",
 73 |     "        \n",
 74 |     "        EDA_fea[cnt,0] = np.mean(EDA[i,j,:])          #average skin res\n",
 75 |     "        EDA_fea[cnt,1] = np.mean(np.diff(EDA[i,j,:])) #average diff\n",
 76 |     "        EDA_fea[cnt,2] = np.mean((np.diff(EDA[0,0,:])<0)) #average diff Nega\n",
 77 |     "        EDA_fea[cnt,3] = float (np.sum(np.diff(EDA[0,0,:])<0))/np.diff(EDA[0,0,:]).shape[0] #ratio Nega vs all\n",
 78 |     "        EDA_fea[cnt,4] = (argrelextrema(EDA[0,0,:], np.less)[0]).shape[0] #number local\n",
 79 |     "        \n",
 80 |     "        RiseTime=[]\n",
 81 |     "        RT=0\n",
 82 |     "        Dat = np.diff(EDA[0,0])\n",
 83 |     "        for k in range(Dat.shape[0]):\n",
 84 |     "            if(Dat[k]>0):\n",
 85 |     "                RT+=1\n",
 86 |     "            elif (Dat[k-1]>0):\n",
 87 |     "                RiseTime.append(RT)\n",
 88 |     "                RT = 0\n",
 89 |     "        \n",
 90 |     "        EDA_fea[cnt,5] = np.mean(RiseTime)/(smp_EDA)  #average rising time in sec\n",
 91 |     "        \n",
 92 |     "        data =EDA[0,0,:]\n",
 93 |     "        ps =  np.abs(np.fft.fft(data))**2\n",
 94 |     "        \n",
 95 |     "        EDA_fea[cnt,6:19] = ps[0:13] \n",
 96 |     "        \n",
 97 |     "        b, a = butter_lowpass(0.2, fs=4, order=2) # low pass filter at 0.2 Hz\n",
 98 |     "        databand1 = signal.filtfilt(b, a, EDA[i,j,:]) \n",
 99 |     "        b, a = butter_lowpass(0.08, fs=4, order=2) # low pass at 0.08 Hz\n",
100 |     "        databand2 = signal.filtfilt(b, a, EDA[i,j,:]) \n",
101 |     "        \n",
102 |     "        EDA_fea[cnt,19] = ((databand1[:-1] * databand1[1:]) < 0).sum()\n",
103 |     "        EDA_fea[cnt,20] = ((databand2[:-1] * databand2[1:]) < 0).sum()\n",
104 |     "        cnt+=1\n",
105 |     "np.save('../data/E4/feature_extracted/EDA.npy', EDA_fea)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 5,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "TEMP = np.load('../data/E4/raw/TEMP.npy',encoding=\"latin1\") #Raw Temp data\n",
115 |     "Num_TEMP_fea = 4 \n",
116 |     "TEMP_fea=np.zeros([N_subject*N_clip,Num_TEMP_fea])\n",
117 |     "cnt=0\n",
118 |     "for i in range(TEMP.shape[0]):\n",
119 |     "    for j in range(TEMP.shape[1]):\n",
120 |     "        TEMP_fea[cnt,0] = np.mean(TEMP[i,j,:])\n",
121 |     "        TEMP_fea[cnt,1] = np.mean(np.diff(TEMP[i,j,:]))\n",
122 |     "        data = TEMP[i,j,:]\n",
123 |     "        \n",
124 |     "        ps = np.abs(np.fft.fft(data))**2\n",
125 |     "        \n",
126 |     "        TEMP_fea[cnt,2] = sum(ps[0:5]) # band 0 - 0.1\n",
127 |     "        TEMP_fea[cnt,3] = sum(ps[5:10]) # band 0.1-0.2\n",
128 |     "        cnt+=1\n",
129 |     "np.save('../data/E4/feature_extracted/TEMP.npy', TEMP_fea)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 6,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "BVP = np.load('../data/E4/raw/BVP.npy',encoding=\"latin1\")\n",
139 |     "HR = np.load('../data/E4/raw/HR.npy',encoding=\"latin1\")\n",
140 |     "IBI = np.load('../data/E4/raw/IBI.npy',encoding=\"latin1\")\n",
141 |     "HRV = np.load('../data/E4/raw/HRV.npy',encoding=\"latin1\")\n",
142 |     "Num_BVP_fea = 13\n",
143 |     "BVP_fea=np.zeros([N_subject*N_clip,Num_BVP_fea])\n",
144 |     "cnt=0\n",
145 |     "for i in range(BVP.shape[0]):\n",
146 |     "    for j in range(BVP.shape[1]):\n",
147 |     "        BVP_fea[cnt,0] = np.mean(HR[i,j,:])\n",
148 |     "        BVP_fea[cnt,1] = np.std(HR[i,j,:])\n",
149 |     "        BVP_fea[cnt,2] = np.mean(HRV[i,j])\n",
150 |     "        BVP_fea[cnt,3] = np.std(HRV[i,j])\n",
151 |     "        BVP_fea[cnt,4] = np.mean(IBI[i,j])\n",
152 |     "        BVP_fea[cnt,5] = np.std(IBI[i,j])\n",
153 |     "        data =  BVP[i,j,:]\n",
154 |     "        ps = np.abs(np.fft.fft(data))**2\n",
155 |     "        BVP_fea[cnt,6] = sum(ps[6:12])\n",
156 |     "        BVP_fea[cnt,7] = sum(ps[12:17])\n",
157 |     "        BVP_fea[cnt,8] = sum(ps[12:23])\n",
158 |     "        b, a = butter_bandpass(0.04, 0.15, fs=64, order=2) # Band Pass filter 0.04-0.15 Hz\n",
159 |     "        databand4 = signal.filtfilt(b, a, BVP[i,j,:]) \n",
160 |     "        b, a = butter_bandpass(0.15, 0.5, fs=64, order=2) # Band Pass filter 0.15-0.5 Hz\n",
161 |     "        databand5 = signal.filtfilt(b, a, BVP[i,j,:]) \n",
162 |     "        Enegy1 =sum(databand4**2)\n",
163 |     "        Enegy2 =sum(databand5**2)\n",
164 |     "        BVP_fea[cnt,9] = Enegy1/Enegy2\n",
165 |     "        fs=HRV[i,j].shape[0]/56.0\n",
166 |     "        data =  HRV[0,0]\n",
167 |     "        ps = np.abs(np.fft.fft(data))**2\n",
168 |     "        fs = HRV[0,0].shape[0]/56.0\n",
169 |     "        time_step = 1 / fs \n",
170 |     "        freqs = np.fft.fftfreq(data.size, time_step)\n",
171 |     "        idx = np.argsort(freqs)\n",
172 |     "        for st in range(freqs.shape[0]):\n",
173 |     "            if(freqs[st] >= 0.01):\n",
174 |     "                for end in range(st,freqs.shape[0]):\n",
175 |     "                    if(freqs[end]>0.08):\n",
176 |     "                        break\n",
177 |     "                break\n",
178 |     "\n",
179 |     "        BVP_fea[cnt,10] = sum(ps[st:end])\n",
180 |     "        for st in range(freqs.shape[0]):\n",
181 |     "            if(freqs[st] >= 0.08):\n",
182 |     "                for end in range(st,freqs.shape[0]):\n",
183 |     "                    if(freqs[end]>0.15):\n",
184 |     "                        break\n",
185 |     "                break\n",
186 |     "        BVP_fea[cnt,11] = sum(ps[st:end])\n",
187 |     "        for st in range(freqs.shape[0]):\n",
188 |     "            if(freqs[st] >= 0.15):\n",
189 |     "                for end in range(st,freqs.shape[0]):\n",
190 |     "                    if(freqs[end]>0.5):\n",
191 |     "                        break\n",
192 |     "                break\n",
193 |     "        BVP_fea[cnt,12] = sum(ps[st:end])\n",
194 |     "        cnt+=1\n",
195 |     "np.save('../data/E4/feature_extracted/BVP.npy', BVP_fea)"
196 |    ]
197 |   }
198 |  ],
199 |  "metadata": {
200 |   "kernelspec": {
201 |    "display_name": "Python 3",
202 |    "language": "python",
203 |    "name": "python3"
204 |   },
205 |   "language_info": {
206 |    "codemirror_mode": {
207 |     "name": "ipython",
208 |     "version": 3
209 |    },
210 |    "file_extension": ".py",
211 |    "mimetype": "text/x-python",
212 |    "name": "python",
213 |    "nbconvert_exporter": "python",
214 |    "pygments_lexer": "ipython3",
215 |    "version": "3.5.2"
216 |   }
217 |  },
218 |  "nbformat": 4,
219 |  "nbformat_minor": 2
220 | }
221 | 


--------------------------------------------------------------------------------
/src/.ipynb_checkpoints/E4_Extract_Feature-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "from scipy import signal\n",
 11 |     "import math\n",
 12 |     "from scipy.signal import butter, lfilter, freqz\n",
 13 |     "from scipy.signal import argrelextrema"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "#Fillter\n",
 23 |     "def butter_highpass(cutoff, fs, order=2):\n",
 24 |     "    nyq = 0.5 * fs\n",
 25 |     "    normal_cutoff = cutoff / nyq\n",
 26 |     "    b, a = butter(order, normal_cutoff, btype='high', analog=False)\n",
 27 |     "    return b, a\n",
 28 |     "def butter_lowpass(cut, fs, order=2):\n",
 29 |     "    nyq = 0.5 * fs\n",
 30 |     "    normal_cutoff = cut / nyq\n",
 31 |     "    b, a = butter(order, normal_cutoff, btype='low', analog=False)\n",
 32 |     "    return b, a\n",
 33 |     "def butter_bandpass(lowcut, highcut, fs, order=2):\n",
 34 |     "    nyq = 0.5 * fs\n",
 35 |     "    low = lowcut / nyq\n",
 36 |     "    high = highcut / nyq\n",
 37 |     "    b, a = signal.butter(order, [low, high], btype='band')\n",
 38 |     "    return b, a"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "N_subject = 43\n",
 48 |     "N_clip =15"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 4,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "name": "stderr",
 58 |      "output_type": "stream",
 59 |      "text": [
 60 |       "/usr/local/lib/python3.5/dist-packages/scipy/signal/_arraytools.py:45: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n",
 61 |       "  b = a[a_slice]\n"
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "EDA = np.load('../data/E4/raw/EDA.npy',encoding=\"latin1\") #Raw EDA\n",
 67 |     "Num_EDA_fea =21\n",
 68 |     "smp_EDA = 64.0\n",
 69 |     "EDA_fea=np.zeros([N_subject*N_clip,Num_EDA_fea ])\n",
 70 |     "cnt=0\n",
 71 |     "for i in range(EDA.shape[0]):\n",
 72 |     "    for j in range(EDA.shape[1]):\n",
 73 |     "        \n",
 74 |     "        EDA_fea[cnt,0] = np.mean(EDA[i,j,:])          #average skin res\n",
 75 |     "        EDA_fea[cnt,1] = np.mean(np.diff(EDA[i,j,:])) #average diff\n",
 76 |     "        EDA_fea[cnt,2] = np.mean((np.diff(EDA[0,0,:])<0)) #average diff Nega\n",
 77 |     "        EDA_fea[cnt,3] = float (np.sum(np.diff(EDA[0,0,:])<0))/np.diff(EDA[0,0,:]).shape[0] #ratio Nega vs all\n",
 78 |     "        EDA_fea[cnt,4] = (argrelextrema(EDA[0,0,:], np.less)[0]).shape[0] #number local\n",
 79 |     "        \n",
 80 |     "        RiseTime=[]\n",
 81 |     "        RT=0\n",
 82 |     "        Dat = np.diff(EDA[0,0])\n",
 83 |     "        for k in range(Dat.shape[0]):\n",
 84 |     "            if(Dat[k]>0):\n",
 85 |     "                RT+=1\n",
 86 |     "            elif (Dat[k-1]>0):\n",
 87 |     "                RiseTime.append(RT)\n",
 88 |     "                RT = 0\n",
 89 |     "        \n",
 90 |     "        EDA_fea[cnt,5] = np.mean(RiseTime)/(smp_EDA)  #average rising time in sec\n",
 91 |     "        \n",
 92 |     "        data =EDA[0,0,:]\n",
 93 |     "        ps =  np.abs(np.fft.fft(data))**2\n",
 94 |     "        \n",
 95 |     "        EDA_fea[cnt,6:19] = ps[0:13] \n",
 96 |     "        \n",
 97 |     "        b, a = butter_lowpass(0.2, fs=4, order=2) # low pass filter at 0.2 Hz\n",
 98 |     "        databand1 = signal.filtfilt(b, a, EDA[i,j,:]) \n",
 99 |     "        b, a = butter_lowpass(0.08, fs=4, order=2) # low pass at 0.08 Hz\n",
100 |     "        databand2 = signal.filtfilt(b, a, EDA[i,j,:]) \n",
101 |     "        \n",
102 |     "        EDA_fea[cnt,19] = ((databand1[:-1] * databand1[1:]) < 0).sum()\n",
103 |     "        EDA_fea[cnt,20] = ((databand2[:-1] * databand2[1:]) < 0).sum()\n",
104 |     "        cnt+=1\n",
105 |     "np.save('../data/E4/feature_extracted/EDA.npy', EDA_fea)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 5,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "TEMP = np.load('../data/E4/raw/TEMP.npy',encoding=\"latin1\") #Raw Temp data\n",
115 |     "Num_TEMP_fea = 4 \n",
116 |     "TEMP_fea=np.zeros([N_subject*N_clip,Num_TEMP_fea])\n",
117 |     "cnt=0\n",
118 |     "for i in range(TEMP.shape[0]):\n",
119 |     "    for j in range(TEMP.shape[1]):\n",
120 |     "        TEMP_fea[cnt,0] = np.mean(TEMP[i,j,:])\n",
121 |     "        TEMP_fea[cnt,1] = np.mean(np.diff(TEMP[i,j,:]))\n",
122 |     "        data = TEMP[i,j,:]\n",
123 |     "        \n",
124 |     "        ps = np.abs(np.fft.fft(data))**2\n",
125 |     "        \n",
126 |     "        TEMP_fea[cnt,2] = sum(ps[0:5]) # band 0 - 0.1\n",
127 |     "        TEMP_fea[cnt,3] = sum(ps[5:10]) # band 0.1-0.2\n",
128 |     "        cnt+=1\n",
129 |     "np.save('../data/E4/feature_extracted/TEMP.npy', TEMP_fea)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 6,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "BVP = np.load('../data/E4/raw/BVP.npy',encoding=\"latin1\")\n",
139 |     "HR = np.load('../data/E4/raw/HR.npy',encoding=\"latin1\")\n",
140 |     "IBI = np.load('../data/E4/raw/IBI.npy',encoding=\"latin1\")\n",
141 |     "HRV = np.load('../data/E4/raw/HRV.npy',encoding=\"latin1\")\n",
142 |     "Num_BVP_fea = 13\n",
143 |     "BVP_fea=np.zeros([N_subject*N_clip,Num_BVP_fea])\n",
144 |     "cnt=0\n",
145 |     "for i in range(BVP.shape[0]):\n",
146 |     "    for j in range(BVP.shape[1]):\n",
147 |     "        BVP_fea[cnt,0] = np.mean(HR[i,j,:])\n",
148 |     "        BVP_fea[cnt,1] = np.std(HR[i,j,:])\n",
149 |     "        BVP_fea[cnt,2] = np.mean(HRV[i,j])\n",
150 |     "        BVP_fea[cnt,3] = np.std(HRV[i,j])\n",
151 |     "        BVP_fea[cnt,4] = np.mean(IBI[i,j])\n",
152 |     "        BVP_fea[cnt,5] = np.std(IBI[i,j])\n",
153 |     "        data =  BVP[i,j,:]\n",
154 |     "        ps = np.abs(np.fft.fft(data))**2\n",
155 |     "        BVP_fea[cnt,6] = sum(ps[6:12])\n",
156 |     "        BVP_fea[cnt,7] = sum(ps[12:17])\n",
157 |     "        BVP_fea[cnt,8] = sum(ps[12:23])\n",
158 |     "        b, a = butter_bandpass(0.04, 0.15, fs=64, order=2) # Band Pass filter 0.04-0.15 Hz\n",
159 |     "        databand4 = signal.filtfilt(b, a, BVP[i,j,:]) \n",
160 |     "        b, a = butter_bandpass(0.15, 0.5, fs=64, order=2) # Band Pass filter 0.15-0.5 Hz\n",
161 |     "        databand5 = signal.filtfilt(b, a, BVP[i,j,:]) \n",
162 |     "        Enegy1 =sum(databand4**2)\n",
163 |     "        Enegy2 =sum(databand5**2)\n",
164 |     "        BVP_fea[cnt,9] = Enegy1/Enegy2\n",
165 |     "        fs=HRV[i,j].shape[0]/56.0\n",
166 |     "        data =  HRV[0,0]\n",
167 |     "        ps = np.abs(np.fft.fft(data))**2\n",
168 |     "        fs = HRV[0,0].shape[0]/56.0\n",
169 |     "        time_step = 1 / fs \n",
170 |     "        freqs = np.fft.fftfreq(data.size, time_step)\n",
171 |     "        idx = np.argsort(freqs)\n",
172 |     "        for st in range(freqs.shape[0]):\n",
173 |     "            if(freqs[st] >= 0.01):\n",
174 |     "                for end in range(st,freqs.shape[0]):\n",
175 |     "                    if(freqs[end]>0.08):\n",
176 |     "                        break\n",
177 |     "                break\n",
178 |     "\n",
179 |     "        BVP_fea[cnt,10] = sum(ps[st:end])\n",
180 |     "        for st in range(freqs.shape[0]):\n",
181 |     "            if(freqs[st] >= 0.08):\n",
182 |     "                for end in range(st,freqs.shape[0]):\n",
183 |     "                    if(freqs[end]>0.15):\n",
184 |     "                        break\n",
185 |     "                break\n",
186 |     "        BVP_fea[cnt,11] = sum(ps[st:end])\n",
187 |     "        for st in range(freqs.shape[0]):\n",
188 |     "            if(freqs[st] >= 0.15):\n",
189 |     "                for end in range(st,freqs.shape[0]):\n",
190 |     "                    if(freqs[end]>0.5):\n",
191 |     "                        break\n",
192 |     "                break\n",
193 |     "        BVP_fea[cnt,12] = sum(ps[st:end])\n",
194 |     "        cnt+=1\n",
195 |     "np.save('../data/E4/feature_extracted/BVP.npy', BVP_fea)"
196 |    ]
197 |   }
198 |  ],
199 |  "metadata": {
200 |   "kernelspec": {
201 |    "display_name": "Python 3",
202 |    "language": "python",
203 |    "name": "python3"
204 |   },
205 |   "language_info": {
206 |    "codemirror_mode": {
207 |     "name": "ipython",
208 |     "version": 3
209 |    },
210 |    "file_extension": ".py",
211 |    "mimetype": "text/x-python",
212 |    "name": "python",
213 |    "nbconvert_exporter": "python",
214 |    "pygments_lexer": "ipython3",
215 |    "version": "3.5.2"
216 |   }
217 |  },
218 |  "nbformat": 4,
219 |  "nbformat_minor": 2
220 | }
221 | 


--------------------------------------------------------------------------------
/src/GridSearchCV.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # In[ ]:
  5 | 
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | from sklearn.metrics import f1_score
 10 | from sklearn.model_selection import GroupKFold, PredefinedSplit
 11 | from sklearn.utils.class_weight import compute_class_weight
 12 | from matplotlib import pyplot as plt
 13 | from sklearn.svm import SVC, LinearSVC
 14 | from sklearn.model_selection import GridSearchCV, cross_val_score, KFold
 15 | from sklearn.preprocessing import MinMaxScaler
 16 | from sklearn.feature_selection import VarianceThreshold
 17 | from sklearn.pipeline import Pipeline
 18 | import math
 19 | 
 20 | import warnings
 21 | warnings.filterwarnings('ignore')
 22 | 
 23 | 
 24 | # settings
 25 | nsubjects = 43
 26 | samples_per_subj = nclips = 15
 27 | print('nsubjects:', nsubjects, 'samples_per_subj:', samples_per_subj)
 28 | 
 29 | # specified source of labels (kmeans or threshold)
 30 | labelsType = "kmeans"
 31 | 
 32 | # Set up possible values of parameters to optimize over
 33 | p_grid = {'kernel': ['poly', 'rbf', 'sigmoid'],
 34 |           "C": [1, 10, 100],
 35 |           "gamma": [.01, .1], 
 36 |           "degree": [3, 4, 5],
 37 |           "coef0": [0, .01, .1]
 38 |          }
 39 | 
 40 | # prepare X, y
 41 | X1 = np.load('../data/EEG/feature_extracted/EEG_ICA.npy')
 42 | X2 = np.load('../data/E4/feature_extracted/TEMP.npy')
 43 | X3 = np.load('../data/E4/feature_extracted/BVP.npy')
 44 | X4 = np.load('../data/E4/feature_extracted/EDA.npy')
 45 | EEG = X1
 46 | E4 = np.concatenate((X2,X3,X4), axis=1)
 47 | 
 48 | Fea = np.concatenate((EEG,E4), axis=1)
 49 | print (EEG.shape, E4.shape, Fea.shape)
 50 | 
 51 | 
 52 | 
 53 | def save_to_csv(df, fname):
 54 |     df.to_csv('./results_SVM/'+emotion+'_'+fname+'.csv', index=False)
 55 | 
 56 | 
 57 | ##### LEAVE ONE CLIP OUT, USE ALL CHANNELS AND FREQ (ALL FEATURES)
 58 | 
 59 | # emotion = 'Arousal', 'Valence'
 60 | for emotion in ['Arousal', 'Valence']:
 61 | 
 62 |     # reshape from (nsubjs * nclips, nfeatures) to (nsubjs, nclips, nfeatures)
 63 |     # ** SELECT f = 'EEG_only', 'E4_only', 'EEG_E4'
 64 |     for f in ['EEG_only', 'E4_only', 'EEG_E4']:
 65 |         if f == 'EEG_only':
 66 |             features = np.copy(EEG)
 67 |         elif f == 'E4_only':
 68 |             features = np.copy(E4)
 69 |         elif f == 'EEG_E4':
 70 |             features = np.copy(Fea)
 71 |             
 72 |         y_preds = []
 73 |         df = pd.DataFrame(columns=['dataset_id', 'fold', 'train_acc', 'train_F1', 'test_acc', 'test_F1', 
 74 |                                    'best_kernel', 'best_coef0', 'best_degree', 
 75 |                                    'best_gamma', 'best_C', 
 76 |                                    'class_ratio_train (0/1)', 'class_ratio_test (0/1)'])
 77 | 
 78 |         # uncomment here to label from threshold = 4.5 # 
 79 |         if labels_from == "threshold":
 80 |             f += '_th'
 81 |             if(emotion == 'Arousal'):
 82 |                 y = np.load('../data/score/label/arousal.npy')
 83 |             elif(emotion == 'Valence'):
 84 |                 y = np.load('../data/score/label/valence.npy')
 85 |             elif(emotion == 'Happy'):
 86 |                 y = np.load('../data/score/label/happy.npy')
 87 |             elif(emotion == 'Fear'):
 88 |                 y = np.load('../data/score/label/fear.npy')
 89 |             elif(emotion == 'Excite'):
 90 |                 y = np.load('../data/score/label/excite.npy')
 91 |             elif(emotion == 'Reward'):
 92 |                 y = np.load('../data/score/label/reward.npy')
 93 |         elif:
 94 |             f += '_kmeans'
 95 |             label = np.load('../data/score/label/kmeans.npy') 
 96 |             y = label
 97 |             if(emotion == 'Arousal'):
 98 |                 y = [0 if(kk== 0 or kk==2) else 1 for kk in y]
 99 |             elif(emotion == 'Valence'):
100 |                 y = [0 if(kk== 0 or kk==1) else 1 for kk in y]
101 |             y=np.asarray(y)
102 |         else:
103 |             raise Exception("Please specify 'labels_from' = kmeans or threshold.")
104 |         
105 |         # ** setting upsampling test set or not
106 |         upsampling = False
107 | 
108 |         X = np.copy(features)
109 |         X = X.reshape(nsubjects, samples_per_subj, -1)
110 |         y = y.reshape(nsubjects, samples_per_subj)
111 |         print(X.shape, y.shape)
112 |         assert all(features[1] == X[0][1])
113 |         assert all(features[15] == X[1][0])
114 | 
115 |         # transpose to (nclips, nsubjs, nfeatures)
116 |         X = np.transpose(X, (1, 0, 2))
117 |         y = np.transpose(y)
118 |         assert all(features[1] == X[1][0])
119 |         assert all(features[15] == X[0][1])
120 | 
121 |         # rearrange clips
122 |         # to 0,1,2,5,6,7,10,11,12,others
123 |         X = np.concatenate([X[0:3], X[5:8], X[10:13], X[3:5], X[8:10], X[13:16]])
124 |         y = np.concatenate([y[0:3], y[5:8], y[10:13], y[3:5], y[8:10], y[13:16]])
125 |         assert X.shape[0] == nclips == len(y)
126 |         print(X.shape, y.shape)
127 | 
128 |         folds = KFold(n_splits=nclips) #to leave test set out
129 |         print(folds)
130 | 
131 |         clip_id = 0
132 |         for train_index, test_index in folds.split(X):
133 |             clip_id += 1
134 |             print('test index', test_index[0])
135 | 
136 |             X_train, y_train = X[train_index], y[train_index]
137 |             X_test, y_test = X[test_index], y[test_index]
138 |             n = len(X_train)
139 |             print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
140 | 
141 | 
142 |             X_train = X_train.reshape(X_train.shape[0]*X_train.shape[1], -1)
143 |             y_train = y_train.reshape(y_train.shape[0]*y_train.shape[1])
144 | 
145 |             X_test = X_test.reshape(X_test.shape[0]*X_test.shape[1], -1)
146 |             y_test = y_test.reshape(y_test.shape[0]*y_test.shape[1])
147 |             print('shape', X_train.shape, y_train.shape, X_test.shape, y_test.shape)
148 | 
149 | 
150 |             # norm by train set
151 |             scaler = MinMaxScaler()
152 |             scaler.fit(X_train)
153 |             X_train = scaler.transform(X_train)
154 |             X_test = scaler.transform(X_test)
155 | 
156 |             # upsampling class that has lower number
157 |             tmp_shape = X_test.shape
158 |             if upsampling:
159 |                 if not 'up' in f:
160 |                     f+='_up'
161 |                 nc0 = len(X_test[y_test==0])
162 |                 nc1 = len(X_test[y_test==1])
163 |                 if nc0==0 or nc1 == 0:
164 |                     df = df.append({'dataset_id': f, 'fold': test_index[0],
165 |                             'train_acc': '%.2f' % (-1), 'train_F1': '%.2f' % (-1), 
166 |                             'test_acc': '%.2f' % (-1), 'test_F1': '%.2f' % (-1), 
167 |                             'best_kernel': ' ', 'best_coef0': ' ', 
168 |                             'best_degree': ' ', 'best_gamma': ' ', 
169 |                             'best_C': ' ', 
170 |                             'class_ratio_train (0/1)': str(len(y_train[y_train==0]))+'/'+str(len(y_train[y_train==1])), 
171 |                             'class_ratio_test (0/1)': str(len(y_test[y_test==0]))+'/'+str(len(y_test[y_test==1]))}, 
172 |                            ignore_index=True)
173 |                     save_to_csv(df, f)
174 |                     continue
175 | 
176 |                 nc = [nc0, nc1]
177 |                 higher = np.argmax(nc)
178 |                 lower = abs(higher-1)
179 | 
180 |                 new_x = X_test
181 |                 new_y = y_test
182 |                 while True:
183 |                     print('nc', nc)
184 |                     if nc[lower] + len(new_y) < nc[higher]*2:
185 |                         new_x = np.concatenate([new_x, X_test[y_test==lower]])
186 |                         new_y = np.concatenate([new_y, y_test[y_test==lower]])
187 |                     else:
188 |                         remain = nc[higher]*2 - len(new_y)
189 |                         new_x = np.concatenate([new_x, X_test[y_test==lower][0:remain]])
190 |                         new_y = np.concatenate([new_y, y_test[y_test==lower][0:remain]])
191 |                         break
192 |                 X_test = np.array(new_x)
193 |                 y_test = np.array(new_y)
194 |                 print(X_test.shape, y_test.shape, len(y_test[y_test==0]), len(y_test[y_test==1]))
195 |                 assert X_test.shape[1] == tmp_shape[1]
196 |                 assert X_test.shape[0] >= tmp_shape[0]
197 |                 assert len(y_test[y_test==0]) == len(y_test[y_test==1])
198 | 
199 | 
200 |             # leave person out each fold
201 |             test_fold = np.concatenate([[0]*43, [1]*43, [2]*43, [3]*43, [4]*43, 
202 |                                         [5]*43, [6]*43, [7]*43, [-1]*((nsubjects*(nclips-1))-(8*nsubjects))])
203 |             gkf = PredefinedSplit(test_fold)
204 |             print('split train set into:', gkf.get_n_splits(), 'folds')
205 |             
206 | 
207 |             # We will use a Support Vector Classifier with class_weight balanced
208 |             svm = SVC(class_weight = 'balanced')
209 |             clf_best = GridSearchCV(estimator=svm, 
210 |                                param_grid=p_grid, 
211 |                                cv=gkf, 
212 |                                iid=False, 
213 |                                scoring=['accuracy', 'balanced_accuracy', 'f1_macro'],
214 |                                refit = 'f1_macro') # get params that give best 'refit' value
215 |             clf_best.fit(X_train, y_train)
216 |             y_pred = clf_best.predict(X_train)
217 |             train_f1 = clf_best.best_score_
218 | 
219 |             print('clf_best best_score:', train_f1)
220 |             print('clf_best best_params_:', clf_best.best_params_)
221 |             c0_train = len(y_train[(y_train==0) & (y_train==y_pred)])
222 |             c1_train = len(y_train[(y_train==1) & (y_train==y_pred)])
223 |             train_acc = (c0_train+c1_train)/len(y_train)
224 |             print('clf_best train correct: c0_train =', c0_train, '/', len(y_train[y_train==0]), 
225 |                   'clf_best c1_train =', c1_train, '/', len(y_train[y_train==1]), 'from', len(y_train), '=', train_acc)
226 |             
227 |             
228 |             # We will use a Linear SVC which allows regularization
229 |             # Set up possible values of parameters to optimize over
230 |             p_grid2 = {"C": [1, 10, 100],
231 |                        "penalty": ['l1', 'l2']
232 |                       }
233 | 
234 |             # sklearn: prefer dual=False when n_samples > n_features
235 |             linear_svm = LinearSVC(class_weight = 'balanced', dual=False) 
236 |             clf_linear_best = GridSearchCV(estimator=linear_svm, 
237 |                                param_grid=p_grid2, 
238 |                                cv=gkf, 
239 |                                iid=False, 
240 |                                scoring=['accuracy', 'balanced_accuracy', 'f1_macro'],
241 |                                refit = 'f1_macro') # get params that give best 'refit' value
242 |             clf_linear_best.fit(X_train, y_train)
243 |             y_pred = clf_linear_best.predict(X_train)
244 |             train_f1 = clf_linear_best.best_score_
245 | 
246 |             print('clf_linear_best best_score:', train_f1)
247 |             print('clf_linear_best best_params_:', clf_linear_best.best_params_)
248 |             c0_train = len(y_train[(y_train==0) & (y_train==y_pred)])
249 |             c1_train = len(y_train[(y_train==1) & (y_train==y_pred)])
250 |             train_acc = (c0_train+c1_train)/len(y_train)
251 |             print('clf_linear_best train correct: c0_train =', c0_train, '/', len(y_train[y_train==0]), 
252 |                   'clf_linear_best c1_train =', c1_train, '/', len(y_train[y_train==1]), 'from', len(y_train), '=', train_acc)
253 |             
254 | 
255 |             if clf_best.best_score_ > clf_linear_best.best_score_:
256 |                 clf = clf_best
257 |                 print('using SVC')
258 |                 degree = clf.best_params_['degree']
259 |                 gamma = clf.best_params_['gamma']
260 |                 kernel = clf.best_params_['kernel']
261 |                 penalty = '-'
262 |                 coef0 = clf.best_params_['coef0']
263 |             else:
264 |                 clf = clf_linear_best
265 |                 print('using LinearSVC')
266 |                 degree = '-'
267 |                 gamma = '-'
268 |                 kernel = 'Linear'
269 |                 penalty = clf.best_params_['penalty']
270 |                 coef0 = '-'
271 |             
272 |             y_test_pred = clf.predict(X_test)
273 |             c0_test = len(y_test[(y_test==0) & (y_test==y_test_pred)])
274 |             c1_test = len(y_test[(y_test==1) & (y_test==y_test_pred)])
275 |             test_acc = (c0_test+c1_test)/len(y_test)
276 |             test_f1 = f1_score(y_test, y_test_pred, average='macro')
277 |             print('test correct: c0_test =', c0_test, '/', len(y_test[y_test==0]), 
278 |                   'c1_test =', c1_test, '/', len(y_test[y_test==1]), 'from', len(y_test), '=', test_acc)
279 |             print()
280 |             df = df.append({'dataset_id': f, 'fold': test_index[0],
281 |                             'train_acc': '%.2f' % (train_acc*100), 'train_F1': '%.2f' % (train_f1*100), 
282 |                             'test_acc': '%.2f' % (test_acc*100), 'test_F1': '%.2f' % (test_f1*100), 
283 |                             'best_kernel': kernel, 'best_coef0': coef0, 
284 |                             'best_degree': degree, 'best_gamma': gamma, 
285 |                             'best_C': clf.best_params_['C'], 'penalty': penalty,
286 |                             'class_ratio_train (0/1)': str(len(y_train[y_train==0]))+'/'+str(len(y_train[y_train==1])), 
287 |                             'class_ratio_test (0/1)': str(len(y_test[y_test==0]))+'/'+str(len(y_test[y_test==1]))}, 
288 |                            ignore_index=True)
289 | 
290 |             save_to_csv(df, f)
291 |             y_preds.append(y_test_pred)
292 | 
293 |             # leave one clip out to be test set (only clip 0-8)
294 |             if clip_id == 9:
295 |                 np.save(emotion+'_y_pred', y_preds)
296 |                 break
297 |                 
298 |         del features, X_train, y_train, X_test, y_test, folds, X, y
299 | 
300 | 
301 | 


--------------------------------------------------------------------------------
/src/GridSearchCV-channel.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # In[ ]:
  5 | 
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | from sklearn.metrics import f1_score
 10 | from sklearn.model_selection import GroupKFold, PredefinedSplit
 11 | from sklearn.utils.class_weight import compute_class_weight
 12 | from matplotlib import pyplot as plt
 13 | from sklearn.svm import SVC, LinearSVC
 14 | from sklearn.model_selection import GridSearchCV, cross_val_score, KFold
 15 | from sklearn.preprocessing import MinMaxScaler
 16 | from sklearn.feature_selection import VarianceThreshold
 17 | import math
 18 | 
 19 | import warnings
 20 | warnings.filterwarnings('ignore')
 21 | 
 22 | 
 23 | # In[ ]:
 24 | 
 25 | 
 26 | # settings
 27 | nsubjects = 43
 28 | samples_per_subj = nclips = 15
 29 | print('nsubjects:', nsubjects, 'samples_per_subj:', samples_per_subj)
 30 | 
 31 | # Set up possible values of parameters to optimize over
 32 | p_grid = {'kernel': ['poly', 'rbf', 'sigmoid'],
 33 |           "C": [1, 10, 100],
 34 |           "gamma": [.01, .1], 
 35 |           "degree": [3, 4, 5],
 36 |           "coef0": [0, .01, .1]
 37 |          }
 38 | 
 39 | df = pd.DataFrame(columns=['dataset_id', 'fold', 'train_acc', 'train_F1', 'test_acc', 'test_F1', 
 40 |                    'best_kernel', 'best_coef0', 'best_degree', 
 41 |                    'best_gamma', 'best_C', 
 42 |                    'class_ratio_train (0/1)', 'class_ratio_test (0/1)'])
 43 | 
 44 | # prepare X, y
 45 | X1 = np.load('../data/EEG/feature_extracted/EEG_ICA.npy')
 46 | X2 = np.load('../data/E4/feature_extracted/TEMP.npy')
 47 | X3 = np.load('../data/E4/feature_extracted/BVP.npy')
 48 | X4 = np.load('../data/E4/feature_extracted/EDA.npy')
 49 | EEG = X1
 50 | E4 = np.concatenate((X2,X3,X4), axis=1)
 51 | 
 52 | Fea = np.concatenate((EEG,E4), axis=1)
 53 | print (EEG.shape, E4.shape, Fea.shape)
 54 | 
 55 | 
 56 | def save_to_csv(df, fname):
 57 |     df.to_csv('./results_SVM/'+emotion+'_'+fname+'.csv', index=False)
 58 | 
 59 | 
 60 | #### LEAVE ONE CLIP OUT, USE SOME CHANNELS AND ALL FREQ
 61 | 
 62 | ChannelSelect = [[0,1,2] , [0,1,3] , [0,1,6], [0,1,7] ,[4,5,2], [4,5,3], [4,5,6], [4,5,7],[2,3,6,7]]
 63 | 
 64 | # prepare data for channel selection
 65 | DataX1 = EEG
 66 | DataX2 = E4
 67 | DataX1 =DataX1.reshape(43,15,-1)
 68 | DataX2 =DataX2.reshape(43,15,-1)
 69 | DataX1 = np.transpose(DataX1, (1, 0, 2))
 70 | DataX2 = np.transpose(DataX2, (1, 0, 2))
 71 | 
 72 | DataAllCh=[]
 73 | DataCh_E4=[]
 74 | for j in ChannelSelect:
 75 |     DataCh=np.zeros([15,43,4*len(j)])
 76 |     for i,x in enumerate (j):
 77 |         DataCh[:,:,i*4:i*4+4] = DataX1[:,:,4*x:4*x+4]
 78 |     DataAllCh.append(DataCh)
 79 |     DataCh_E4.append(np.concatenate((DataCh,DataX2), axis=2))
 80 |     print(np.concatenate((DataCh,DataX2), axis=2).shape)
 81 | 
 82 | print('DataAllCh =', (len(DataAllCh), len(DataAllCh[0]), len(DataAllCh[0][1]), ))
 83 | print('DataCh_E4 =', (len(DataCh_E4), len(DataCh_E4[0]), len(DataCh_E4[0][1]), ))
 84 | 
 85 | # DataAllCh: select some channels and concat without E4 features
 86 | # shape = (9, nclips, nsubjects, nfeatures)
 87 | 
 88 | # DataCh_E4: select some channels and concat with E4 features
 89 | # shape = (9, nclips, nsubjects, nfeatures)
 90 | 
 91 | for emotion in ['Arousal', 'Valence']:
 92 | 
 93 |     for f in ['EEG_only', 'EEG_E4']:
 94 |         
 95 |         # ** SELECT f = 'EEG_only', 'EEG_E4'
 96 |         if f == 'EEG_only':
 97 |             Data = DataAllCh
 98 |         elif f == 'EEG_E4':
 99 |             Data = DataCh_E4
100 |             
101 |         df = pd.DataFrame(columns=['dataset_id', 'fold', 'train_acc', 'train_F1', 'test_acc', 'test_F1', 
102 |                            'best_kernel', 'best_coef0', 'best_degree', 
103 |                            'best_gamma', 'best_C', 
104 |                            'class_ratio_train (0/1)', 'class_ratio_test (0/1)'])
105 |         
106 |         # use label from kmeans #
107 |         label = np.load('../data/score/label/kmeans.npy') 
108 |         y = label
109 | 
110 |         if(emotion == 'Arousal'):
111 |             y = [0 if(kk== 0 or kk==2) else 1 for kk in y]
112 |         elif(emotion == 'Valence'):
113 |             y = [0 if(kk== 0 or kk==1) else 1 for kk in y]
114 |         y=np.asarray(y)
115 | 
116 |         # reshape from y shape = (nsubjects * samples_per_subj)
117 |         y = y.reshape(nsubjects, samples_per_subj)
118 | 
119 |         # transpose to (clips, subjects)
120 |         y = np.transpose(y)
121 | 
122 |         # rearrange clips
123 |         # to 0,1,2,5,6,7,10,11,12,others
124 |         y = np.concatenate([y[0:3], y[5:8], y[10:13], y[3:5], y[8:10], y[13:16]])
125 | 
126 |         for dataset_id, X in enumerate(Data):
127 |             print('==== dataset:', dataset_id, '====')
128 |             # y is same for every set, X is changed
129 |             # shape X = (nclips, nsubjs, nfeatures)
130 | 
131 |             # rearrange clips to 0,1,2,5,6,7,10,11,12,others
132 |             X = np.concatenate([X[0:3], X[5:8], X[10:13], X[3:5], X[8:10], X[13:16]])
133 |             assert X.shape[0] == nclips == len(y)
134 |             print(X.shape, y.shape)
135 | 
136 |             # ** The following code is as same as 2)
137 |             folds = KFold(n_splits=nclips) #to leave test set out
138 | 
139 |             clip_id = 0
140 |             for train_index, test_index in folds.split(X):
141 |                 clip_id += 1
142 |                 print('test index', test_index[0])
143 | 
144 |                 X_train, y_train = X[train_index], y[train_index]
145 |                 X_test, y_test = X[test_index], y[test_index]
146 |                 n = len(X_train)
147 |                 print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
148 | 
149 | 
150 |                 X_train = X_train.reshape(X_train.shape[0]*X_train.shape[1], -1)
151 |                 y_train = y_train.reshape(y_train.shape[0]*y_train.shape[1])
152 | 
153 |                 X_test = X_test.reshape(X_test.shape[0]*X_test.shape[1], -1)
154 |                 y_test = y_test.reshape(y_test.shape[0]*y_test.shape[1])
155 |                 print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
156 | 
157 | 
158 |                 # norm by train set
159 |                 scaler = MinMaxScaler()
160 |                 scaler.fit(X_train)
161 |                 X_train = scaler.transform(X_train)
162 |                 X_test = scaler.transform(X_test)
163 |                 
164 |                 
165 |                 # upsampling class that has lower number
166 |                 tmp_shape = X_test.shape
167 |                 upsampling = False
168 |                 if upsampling:
169 |                     if not 'up' in f:
170 |                         f+='_up'
171 |                     nc0 = len(X_test[y_test==0])
172 |                     nc1 = len(X_test[y_test==1])
173 |                     if nc0==0 or nc1 == 0:
174 |                         df = df.append({'dataset_id': ChannelSelect[dataset_id], 'fold': test_index[0],
175 |                                 'train_acc': '%.2f' % (-1), 'train_F1': '%.2f' % (-1), 
176 |                                 'test_acc': '%.2f' % (-1), 'test_F1': '%.2f' % (-1), 
177 |                                 'best_kernel': ' ', 'best_coef0': ' ', 
178 |                                 'best_degree': ' ', 'best_gamma': ' ', 
179 |                                 'best_C': ' ', 
180 |                                 'class_ratio_train (0/1)': str(len(y_train[y_train==0]))+'/'+str(len(y_train[y_train==1])), 
181 |                                 'class_ratio_test (0/1)': str(len(y_test[y_test==0]))+'/'+str(len(y_test[y_test==1]))}, 
182 |                                ignore_index=True)
183 |                         save_to_csv(df, f)
184 |                         continue
185 | 
186 |                     nc = [nc0, nc1]
187 |                     higher = np.argmax(nc)
188 |                     lower = abs(higher-1)
189 | 
190 |                     new_x = X_test
191 |                     new_y = y_test
192 |                     while True:
193 |                         print('nc', nc)
194 |                         if nc[lower] + len(new_y) < nc[higher]*2:
195 |                             new_x = np.concatenate([new_x, X_test[y_test==lower]])
196 |                             new_y = np.concatenate([new_y, y_test[y_test==lower]])
197 |                         else:
198 |                             remain = nc[higher]*2 - len(new_y)
199 |                             new_x = np.concatenate([new_x, X_test[y_test==lower][0:remain]])
200 |                             new_y = np.concatenate([new_y, y_test[y_test==lower][0:remain]])
201 |                             break
202 |                     X_test = np.array(new_x)
203 |                     y_test = np.array(new_y)
204 |                     print(X_test.shape, y_test.shape, len(y_test[y_test==0]), len(y_test[y_test==1]))
205 |                     assert X_test.shape[1] == tmp_shape[1]
206 |                     assert X_test.shape[0] >= tmp_shape[0]
207 |                     assert len(y_test[y_test==0]) == len(y_test[y_test==1])
208 |                 
209 | 
210 |                 # leave person out each fold
211 |                 test_fold = np.concatenate([[0]*43, [1]*43, [2]*43, [3]*43, [4]*43, 
212 |                                             [5]*43, [6]*43, [7]*43, [-1]*((nsubjects*(nclips-1))-(8*nsubjects))])
213 |                 gkf = PredefinedSplit(test_fold)
214 |                 print('split train set into:', gkf.get_n_splits(), 'folds')
215 | 
216 |                 # We will use a Support Vector Classifier with class_weight balanced
217 |                 svm = SVC(class_weight = 'balanced')
218 |                 clf_best = GridSearchCV(estimator=svm, 
219 |                                    param_grid=p_grid, 
220 |                                    cv=gkf, 
221 |                                    iid=False, 
222 |                                    scoring=['accuracy', 'balanced_accuracy', 'f1_macro'],
223 |                                    refit = 'f1_macro') # get params that give best 'refit' value
224 |                 clf_best.fit(X_train, y_train)
225 |                 y_pred = clf_best.predict(X_train)
226 |                 train_f1 = clf_best.best_score_
227 | 
228 |                 print('clf_best best_score:', train_f1)
229 |                 print('clf_best best_params_:', clf_best.best_params_)
230 |                 c0_train = len(y_train[(y_train==0) & (y_train==y_pred)])
231 |                 c1_train = len(y_train[(y_train==1) & (y_train==y_pred)])
232 |                 train_acc = (c0_train+c1_train)/len(y_train)
233 |                 print('clf_best train correct: c0_train =', c0_train, '/', len(y_train[y_train==0]), 
234 |                       'clf_best c1_train =', c1_train, '/', len(y_train[y_train==1]), 'from', len(y_train), '=', train_acc)
235 | 
236 | 
237 |                 # We will use a Linear SVC which allows regularization
238 |                 # Set up possible values of parameters to optimize over
239 |                 p_grid2 = {"C": [1, 10, 100],
240 |                            "penalty": ['l1', 'l2']
241 |                           }
242 | 
243 |                 # sklearn: prefer dual=False when n_samples > n_features
244 |                 linear_svm = LinearSVC(class_weight = 'balanced', dual=False) 
245 |                 clf_linear_best = GridSearchCV(estimator=linear_svm, 
246 |                                    param_grid=p_grid2, 
247 |                                    cv=gkf, 
248 |                                    iid=False, 
249 |                                    scoring=['accuracy', 'balanced_accuracy', 'f1_macro'],
250 |                                    refit = 'f1_macro') # get params that give best 'refit' value
251 |                 clf_linear_best.fit(X_train, y_train)
252 |                 y_pred = clf_linear_best.predict(X_train)
253 |                 train_f1 = clf_linear_best.best_score_
254 | 
255 |                 print('clf_linear_best best_score:', train_f1)
256 |                 print('clf_linear_best best_params_:', clf_linear_best.best_params_)
257 |                 c0_train = len(y_train[(y_train==0) & (y_train==y_pred)])
258 |                 c1_train = len(y_train[(y_train==1) & (y_train==y_pred)])
259 |                 train_acc = (c0_train+c1_train)/len(y_train)
260 |                 print('clf_linear_best train correct: c0_train =', c0_train, '/', len(y_train[y_train==0]), 
261 |                       'clf_linear_best c1_train =', c1_train, '/', len(y_train[y_train==1]), 
262 |                       'from', len(y_train), '=', train_acc)
263 | 
264 | 
265 |                 if clf_best.best_score_ > clf_linear_best.best_score_:
266 |                     clf = clf_best
267 |                     print('using SVC')
268 |                     degree = clf.best_params_['degree']
269 |                     gamma = clf.best_params_['gamma']
270 |                     kernel = clf.best_params_['kernel']
271 |                     penalty = '-'
272 |                     coef0 = clf.best_params_['coef0']
273 |                 else:
274 |                     clf = clf_linear_best
275 |                     print('using LinearSVC')
276 |                     degree = '-'
277 |                     gamma = '-'
278 |                     kernel = 'Linear'
279 |                     penalty = clf.best_params_['penalty']
280 |                     coef0 = '-'
281 | 
282 |                 y_test_pred = clf.predict(X_test)
283 |                 c0_test = len(y_test[(y_test==0) & (y_test==y_test_pred)])
284 |                 c1_test = len(y_test[(y_test==1) & (y_test==y_test_pred)])
285 |                 test_acc = (c0_test+c1_test)/len(y_test)
286 |                 test_f1 = f1_score(y_test, y_test_pred, average='macro')
287 |                 print('test correct: c0_test =', c0_test, '/', len(y_test[y_test==0]), 
288 |                       'c1_test =', c1_test, '/', len(y_test[y_test==1]), 'from', len(y_test), '=', test_acc)
289 |                 print()
290 |                 df = df.append({'dataset_id': ChannelSelect[dataset_id], 'fold': test_index[0],
291 |                                 'train_acc': '%.2f' % (train_acc*100), 'train_F1': '%.2f' % (train_f1*100), 
292 |                                 'test_acc': '%.2f' % (test_acc*100), 'test_F1': '%.2f' % (test_f1*100), 
293 |                                 'best_kernel': kernel, 'best_coef0': coef0, 
294 |                                 'best_degree': degree, 'best_gamma': gamma, 
295 |                                 'best_C': clf.best_params_['C'], 'penalty': penalty,
296 |                                 'class_ratio_train (0/1)': str(len(y_train[y_train==0]))+'/'+str(len(y_train[y_train==1])), 
297 |                                 'class_ratio_test (0/1)': str(len(y_test[y_test==0]))+'/'+str(len(y_test[y_test==1]))}, 
298 |                                ignore_index=True)
299 | 
300 |                 save_to_csv(df, f+'_chSelect')
301 | 
302 |                 # leave one clip out to be test set (only clip 0-8)
303 |                 if clip_id == 9:
304 |                     break
305 |                     
306 |         del Data, X_train, y_train, X_test, y_test, X, y
307 | 
308 | 
309 | 
310 | 


--------------------------------------------------------------------------------
/src/GridSearchCV-frequency.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # In[ ]:
  5 | 
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | from sklearn.metrics import f1_score
 10 | from sklearn.model_selection import GroupKFold, PredefinedSplit
 11 | from sklearn.utils.class_weight import compute_class_weight
 12 | from matplotlib import pyplot as plt
 13 | from sklearn.svm import SVC, LinearSVC
 14 | from sklearn.model_selection import GridSearchCV, cross_val_score, KFold
 15 | from sklearn.preprocessing import MinMaxScaler
 16 | from sklearn.feature_selection import VarianceThreshold
 17 | import math
 18 | 
 19 | import warnings
 20 | warnings.filterwarnings('ignore')
 21 | 
 22 | 
 23 | # settings
 24 | nsubjects = 43
 25 | samples_per_subj = nclips = 15
 26 | print('nsubjects:', nsubjects, 'samples_per_subj:', samples_per_subj)
 27 | 
 28 | # Set up possible values of parameters to optimize over
 29 | p_grid = {'kernel': ['poly', 'rbf', 'sigmoid'],
 30 |           "C": [1, 10, 100],
 31 |           "gamma": [.01, .1], 
 32 |           "degree": [3, 4, 5],
 33 |           "coef0": [0, .01, .1]
 34 |          }
 35 | 
 36 | # prepare X, y
 37 | X1 = np.load('../data/EEG/feature_extracted/EEG_ICA.npy')
 38 | X2 = np.load('../data/E4/feature_extracted/TEMP.npy')
 39 | X3 = np.load('../data/E4/feature_extracted/BVP.npy')
 40 | X4 = np.load('../data/E4/feature_extracted/EDA.npy')
 41 | EEG = X1
 42 | E4 = np.concatenate((X2,X3,X4), axis=1)
 43 | 
 44 | Fea = np.concatenate((EEG,E4), axis=1)
 45 | print (EEG.shape, E4.shape, Fea.shape)
 46 | 
 47 | def save_to_csv(df, fname):
 48 |     df.to_csv('./results_SVM/'+emotion+'_'+fname+'.csv', index=False)
 49 | 
 50 | 
 51 | #### LEAVE ONE CLIP OUT, USE ALL CHANNELS AND SOME FREQ
 52 | FreqSelect = [[0],[1],[2],[3] ,[0,1],[0,2],[0,3],[1,2],[1,3],[2,3],[0,1,2],[0,1,3],[0,2,3],[1,2,3]]
 53 | 
 54 | # prepare data for freq selection
 55 | DataX1 = EEG
 56 | DataX2 = E4
 57 | DataX1 =DataX1.reshape(43,15,-1)
 58 | DataX2 =DataX2.reshape(43,15,-1)
 59 | DataX1 = np.transpose(DataX1, (1, 0, 2))
 60 | DataX2 = np.transpose(DataX2, (1, 0, 2))
 61 | 
 62 | DataAllFreq=[]
 63 | DataFq_E4=[]
 64 | for j in FreqSelect:
 65 |     DataFq=[]
 66 |     for i,x in enumerate (j):
 67 |         #print (i,x)        
 68 |         d1 = DataX1[:,:,x]
 69 |         d1 = d1.reshape([15,43,1])
 70 |         d2 = (DataX1[:,:,x+4])
 71 |         d2 = d2.reshape([15,43,1])
 72 |         d3 = (DataX1[:,:,x+8])
 73 |         d3 = d3.reshape([15,43,1])
 74 |         d4 = (DataX1[:,:,x+12])
 75 |         d4 = d4.reshape([15,43,1])
 76 |         d5 = (DataX1[:,:,x+16])
 77 |         d5 = d5.reshape([15,43,1])
 78 |         d6 = (DataX1[:,:,x+20])
 79 |         d6 = d6.reshape([15,43,1])
 80 |         d7 = (DataX1[:,:,x+24])
 81 |         d7 = d7.reshape([15,43,1])
 82 |         d8 = (DataX1[:,:,x+28])
 83 |         d8 = d8.reshape([15,43,1])
 84 |         d= np.concatenate((d1,d2,d3,d4,d5,d6,d7,d8), axis=2)
 85 |         if(i==0):
 86 |             D=d
 87 |         else:
 88 |             D=np.concatenate((D,d),axis=2)
 89 |     DataFq = np.asarray(D)
 90 |     DataAllFreq.append(DataFq)
 91 |     DataFq_E4.append(np.concatenate((DataFq,DataX2), axis=2))
 92 |     print (np.concatenate((DataFq,DataX2), axis=2).shape)
 93 | 
 94 | print('DataFq_E4 =', (len(DataFq_E4), len(DataFq_E4[0]), len(DataFq_E4[0][1]), ))
 95 | print('DataAllFreq =', (len(DataAllFreq), len(DataAllFreq[0]), len(DataAllFreq[0][1]), ))
 96 | 
 97 | # DataFq_E4: select some frequencies and concat with E4 features
 98 | # shape = (14, nclips, nsubjects, nfeatures)
 99 | 
100 | # DataAllFreq: select some frequencies and concat without E4 features
101 | # shape = (14, nclips, nsubjects, nfeatures)
102 | 
103 | 
104 | for emotion in ['Arousal', 'Valence']:
105 | 
106 |     for f in ['EEG_only', 'EEG_E4']:
107 |     
108 |         if f == 'EEG_only':
109 |             Data = DataFq_E4
110 |         elif f == 'EEG_E4':
111 |             Data = DataAllFreq
112 |             
113 |         df = pd.DataFrame(columns=['dataset_id', 'fold', 'train_acc', 'train_F1', 'test_acc', 'test_F1', 
114 |                    'best_kernel', 'best_coef0', 'best_degree', 
115 |                    'best_gamma', 'best_C', 
116 |                    'class_ratio_train (0/1)', 'class_ratio_test (0/1)'])
117 |         
118 |         # use label from kmeans #
119 |         label = np.load('../data/score/label/kmeans.npy')
120 |         y = label
121 | 
122 |         if(emotion == 'Arousal'):
123 |             y = [0 if(kk== 0 or kk==2) else 1 for kk in y]
124 |         elif(emotion == 'Valence'):
125 |             y = [0 if(kk== 0 or kk==1) else 1 for kk in y]
126 |         y=np.asarray(y)
127 |         # ----------------------------------- #
128 | 
129 |         # reshape from y shape = (nsubjects * samples_per_subj)
130 |         y = y.reshape(nsubjects, samples_per_subj)
131 | 
132 |         # transpose to (clips, subjects)
133 |         y = np.transpose(y)
134 | 
135 |         # rearrange clips
136 |         # to 0,1,2,5,6,7,10,11,12,others
137 |         y = np.concatenate([y[0:3], y[5:8], y[10:13], y[3:5], y[8:10], y[13:16]])
138 | 
139 |         for dataset_id, X in enumerate(Data):
140 |             print('==== dataset:', dataset_id, '====')
141 |             # y is same for every set, X is changed
142 |             # shape X = (nclips, nsubjs, nfeatures)
143 | 
144 |             # rearrange clips to 0,1,2,5,6,7,10,11,12,others
145 |             X = np.concatenate([X[0:3], X[5:8], X[10:13], X[3:5], X[8:10], X[13:16]])
146 |             assert X.shape[0] == nclips == len(y)
147 |             print(X.shape, y.shape)
148 | 
149 |             # Do as previous cells
150 |             folds = KFold(n_splits=nclips) #to leave test set out
151 | 
152 |             clip_id = 0
153 |             for train_index, test_index in folds.split(X):
154 |                 clip_id += 1
155 |                 print('test index', test_index[0])
156 | 
157 |                 X_train, y_train = X[train_index], y[train_index]
158 |                 X_test, y_test = X[test_index], y[test_index]
159 |                 n = len(X_train)
160 |                 print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
161 | 
162 | 
163 |                 X_train = X_train.reshape(X_train.shape[0]*X_train.shape[1], -1)
164 |                 y_train = y_train.reshape(y_train.shape[0]*y_train.shape[1])
165 | 
166 |                 X_test = X_test.reshape(X_test.shape[0]*X_test.shape[1], -1)
167 |                 y_test = y_test.reshape(y_test.shape[0]*y_test.shape[1])
168 |                 print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
169 | 
170 | 
171 |                 # norm by train set
172 |                 scaler = MinMaxScaler()
173 |                 scaler.fit(X_train)
174 |                 X_train = scaler.transform(X_train)
175 |                 X_test = scaler.transform(X_test)
176 |                 
177 |                 # upsampling class that has lower number
178 |                 tmp_shape = X_test.shape
179 |                 upsampling = False
180 |                 if upsampling:
181 |                     if not 'up' in f:
182 |                         f+='_up'
183 |                     nc0 = len(X_test[y_test==0])
184 |                     nc1 = len(X_test[y_test==1])
185 |                     if nc0==0 or nc1 == 0:
186 |                         df = df.append({'dataset_id': FreqSelect[dataset_id], 'fold': test_index[0],
187 |                                 'train_acc': '%.2f' % (-1), 'train_F1': '%.2f' % (-1), 
188 |                                 'test_acc': '%.2f' % (-1), 'test_F1': '%.2f' % (-1), 
189 |                                 'best_kernel': ' ', 'best_coef0': ' ', 
190 |                                 'best_degree': ' ', 'best_gamma': ' ', 
191 |                                 'best_C': ' ', 
192 |                                 'class_ratio_train (0/1)': str(len(y_train[y_train==0]))+'/'+str(len(y_train[y_train==1])), 
193 |                                 'class_ratio_test (0/1)': str(len(y_test[y_test==0]))+'/'+str(len(y_test[y_test==1]))}, 
194 |                                ignore_index=True)
195 |                         save_to_csv(df, f)
196 |                         continue
197 | 
198 |                     nc = [nc0, nc1]
199 |                     higher = np.argmax(nc)
200 |                     lower = abs(higher-1)
201 | 
202 |                     new_x = X_test
203 |                     new_y = y_test
204 |                     while True:
205 |                         print('nc', nc)
206 |                         if nc[lower] + len(new_y) < nc[higher]*2:
207 |                             new_x = np.concatenate([new_x, X_test[y_test==lower]])
208 |                             new_y = np.concatenate([new_y, y_test[y_test==lower]])
209 |                         else:
210 |                             remain = nc[higher]*2 - len(new_y)
211 |                             new_x = np.concatenate([new_x, X_test[y_test==lower][0:remain]])
212 |                             new_y = np.concatenate([new_y, y_test[y_test==lower][0:remain]])
213 |                             break
214 |                     X_test = np.array(new_x)
215 |                     y_test = np.array(new_y)
216 |                     print(X_test.shape, y_test.shape, len(y_test[y_test==0]), len(y_test[y_test==1]))
217 |                     assert X_test.shape[1] == tmp_shape[1]
218 |                     assert X_test.shape[0] >= tmp_shape[0]
219 |                     assert len(y_test[y_test==0]) == len(y_test[y_test==1])
220 | 
221 |                 # leave person out each fold
222 |                 test_fold = np.concatenate([[0]*43, [1]*43, [2]*43, [3]*43, [4]*43, 
223 |                                             [5]*43, [6]*43, [7]*43, [-1]*((nsubjects*(nclips-1))-(8*nsubjects))])
224 |                 gkf = PredefinedSplit(test_fold)
225 |                 print('split train set into:', gkf.get_n_splits(), 'folds')
226 | 
227 |                 # We will use a Support Vector Classifier with class_weight balanced
228 |                 svm = SVC(class_weight = 'balanced')
229 |                 clf_best = GridSearchCV(estimator=svm, 
230 |                                    param_grid=p_grid, 
231 |                                    cv=gkf, 
232 |                                    iid=False, 
233 |                                    scoring=['accuracy', 'balanced_accuracy', 'f1_macro'],
234 |                                    refit = 'f1_macro') # get params that give best 'refit' value
235 |                 clf_best.fit(X_train, y_train)
236 |                 y_pred = clf_best.predict(X_train)
237 |                 train_f1 = clf_best.best_score_
238 | 
239 |                 print('clf_best best_score:', train_f1)
240 |                 print('clf_best best_params_:', clf_best.best_params_)
241 |                 c0_train = len(y_train[(y_train==0) & (y_train==y_pred)])
242 |                 c1_train = len(y_train[(y_train==1) & (y_train==y_pred)])
243 |                 train_acc = (c0_train+c1_train)/len(y_train)
244 |                 print('clf_best train correct: c0_train =', c0_train, '/', len(y_train[y_train==0]), 
245 |                       'clf_best c1_train =', c1_train, '/', len(y_train[y_train==1]), 'from', len(y_train), '=', train_acc)
246 | 
247 | 
248 |                 # We will use a Linear SVC which allows regularization
249 |                 # Set up possible values of parameters to optimize over
250 |                 p_grid2 = {"C": [1, 10, 100],
251 |                            "penalty": ['l1', 'l2']
252 |                           }
253 | 
254 |                 # sklearn: prefer dual=False when n_samples > n_features
255 |                 linear_svm = LinearSVC(class_weight = 'balanced', dual=False) 
256 |                 clf_linear_best = GridSearchCV(estimator=linear_svm, 
257 |                                    param_grid=p_grid2, 
258 |                                    cv=gkf, 
259 |                                    iid=False, 
260 |                                    scoring=['accuracy', 'balanced_accuracy', 'f1_macro'],
261 |                                    refit = 'f1_macro') # get params that give best 'refit' value
262 |                 clf_linear_best.fit(X_train, y_train)
263 |                 y_pred = clf_linear_best.predict(X_train)
264 |                 train_f1 = clf_linear_best.best_score_
265 | 
266 |                 print('clf_linear_best best_score:', train_f1)
267 |                 print('clf_linear_best best_params_:', clf_linear_best.best_params_)
268 |                 c0_train = len(y_train[(y_train==0) & (y_train==y_pred)])
269 |                 c1_train = len(y_train[(y_train==1) & (y_train==y_pred)])
270 |                 train_acc = (c0_train+c1_train)/len(y_train)
271 |                 print('clf_linear_best train correct: c0_train =', c0_train, '/', len(y_train[y_train==0]), 
272 |                       'clf_linear_best c1_train =', c1_train, '/', len(y_train[y_train==1]), 
273 |                       'from', len(y_train), '=', train_acc)
274 | 
275 | 
276 |                 if clf_best.best_score_ > clf_linear_best.best_score_:
277 |                     clf = clf_best
278 |                     print('using SVC')
279 |                     degree = clf.best_params_['degree']
280 |                     gamma = clf.best_params_['gamma']
281 |                     kernel = clf.best_params_['kernel']
282 |                     penalty = '-'
283 |                     coef0 = clf.best_params_['coef0']
284 |                 else:
285 |                     clf = clf_linear_best
286 |                     print('using LinearSVC')
287 |                     degree = '-'
288 |                     gamma = '-'
289 |                     kernel = 'Linear'
290 |                     penalty = clf.best_params_['penalty']
291 |                     coef0 = '-'
292 | 
293 |                 y_test_pred = clf.predict(X_test)
294 |                 c0_test = len(y_test[(y_test==0) & (y_test==y_test_pred)])
295 |                 c1_test = len(y_test[(y_test==1) & (y_test==y_test_pred)])
296 |                 test_acc = (c0_test+c1_test)/len(y_test)
297 |                 test_f1 = f1_score(y_test, y_test_pred, average='macro')
298 |                 print('test correct: c0_test =', c0_test, '/', len(y_test[y_test==0]), 
299 |                       'c1_test =', c1_test, '/', len(y_test[y_test==1]), 'from', len(y_test), '=', test_acc)
300 |                 print()
301 |                 df = df.append({'dataset_id': FreqSelect[dataset_id], 'fold': test_index[0],
302 |                                 'train_acc': '%.2f' % (train_acc*100), 'train_F1': '%.2f' % (train_f1*100), 
303 |                                 'test_acc': '%.2f' % (test_acc*100), 'test_F1': '%.2f' % (test_f1*100), 
304 |                                 'best_kernel': kernel, 'best_coef0': coef0, 
305 |                                 'best_degree': degree, 'best_gamma': gamma, 
306 |                                 'best_C': clf.best_params_['C'], 'penalty': penalty,
307 |                                 'class_ratio_train (0/1)': str(len(y_train[y_train==0]))+'/'+str(len(y_train[y_train==1])), 
308 |                                 'class_ratio_test (0/1)': str(len(y_test[y_test==0]))+'/'+str(len(y_test[y_test==1]))}, 
309 |                                ignore_index=True)
310 |             
311 | 
312 |                 save_to_csv(df, f+'_freqSelect')
313 | 
314 |                 # leave one clip out to be test set (only clip 0-8)
315 |                 if clip_id == 9:
316 |                     break
317 |                     
318 |         del Data, X_train, y_train, X_test, y_test, X, y
319 | 
320 | 
321 | 


--------------------------------------------------------------------------------
/src/.ipynb_checkpoints/Kmean-for-sth-idk-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import os\n",
 11 |     "from scipy import signal\n",
 12 |     "# from geometric_median import geometric_median\n",
 13 |     "import mne\n",
 14 |     "import matplotlib.pyplot as plt"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "__input_path = '../data/'\n",
 24 |     "emotions = ['Happiness', 'Fear', 'Excitement', \\\n",
 25 |     "              'Arousal', 'Valence', 'Reward']"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 3,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "label = np.load(os.path.join(__input_path, 'result.npy'))       "
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 4,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "name": "stdout",
 44 |      "output_type": "stream",
 45 |      "text": [
 46 |       "save results: Happiness (1, 180)\n",
 47 |       "save results: Fear (1, 180)\n",
 48 |       "save results: Excitement (1, 180)\n",
 49 |       "save results: Arousal (1, 180)\n",
 50 |       "save results: Valence (1, 180)\n",
 51 |       "save results: Reward (1, 180)\n"
 52 |      ]
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "for i, emo in enumerate(emotions):\n",
 57 |     "    label_res = label[:,:,i].reshape(1, -1)\n",
 58 |     "    #label_res = label_res[0]\n",
 59 |     "    print 'save results:', emo, label_res.shape\n",
 60 |     "    #print label_res\n",
 61 |     "    np.save(os.path.join('../Kmean/', 'result_' + emo + '.npy'), label_res)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 5,
 67 |    "metadata": {},
 68 |    "outputs": [
 69 |     {
 70 |      "name": "stdout",
 71 |      "output_type": "stream",
 72 |      "text": [
 73 |       "(180, 1)\n",
 74 |       "[[3]\n",
 75 |       " [3]\n",
 76 |       " [5]\n",
 77 |       " [1]\n",
 78 |       " [3]\n",
 79 |       " [3]\n",
 80 |       " [5]\n",
 81 |       " [2]\n",
 82 |       " [3]\n",
 83 |       " [4]\n",
 84 |       " [7]\n",
 85 |       " [6]\n",
 86 |       " [5]\n",
 87 |       " [5]\n",
 88 |       " [5]\n",
 89 |       " [5]\n",
 90 |       " [5]\n",
 91 |       " [6]\n",
 92 |       " [3]\n",
 93 |       " [6]\n",
 94 |       " [5]\n",
 95 |       " [2]\n",
 96 |       " [2]\n",
 97 |       " [5]\n",
 98 |       " [5]\n",
 99 |       " [3]\n",
100 |       " [4]\n",
101 |       " [2]\n",
102 |       " [2]\n",
103 |       " [3]\n",
104 |       " [2]\n",
105 |       " [3]\n",
106 |       " [4]\n",
107 |       " [3]\n",
108 |       " [2]\n",
109 |       " [4]\n",
110 |       " [2]\n",
111 |       " [5]\n",
112 |       " [6]\n",
113 |       " [5]\n",
114 |       " [2]\n",
115 |       " [4]\n",
116 |       " [5]\n",
117 |       " [3]\n",
118 |       " [2]\n",
119 |       " [2]\n",
120 |       " [5]\n",
121 |       " [4]\n",
122 |       " [6]\n",
123 |       " [2]\n",
124 |       " [2]\n",
125 |       " [4]\n",
126 |       " [3]\n",
127 |       " [5]\n",
128 |       " [1]\n",
129 |       " [2]\n",
130 |       " [1]\n",
131 |       " [1]\n",
132 |       " [1]\n",
133 |       " [1]\n",
134 |       " [1]\n",
135 |       " [1]\n",
136 |       " [2]\n",
137 |       " [1]\n",
138 |       " [1]\n",
139 |       " [2]\n",
140 |       " [1]\n",
141 |       " [1]\n",
142 |       " [1]\n",
143 |       " [2]\n",
144 |       " [1]\n",
145 |       " [2]\n",
146 |       " [4]\n",
147 |       " [5]\n",
148 |       " [6]\n",
149 |       " [5]\n",
150 |       " [4]\n",
151 |       " [3]\n",
152 |       " [6]\n",
153 |       " [6]\n",
154 |       " [5]\n",
155 |       " [3]\n",
156 |       " [4]\n",
157 |       " [5]\n",
158 |       " [3]\n",
159 |       " [3]\n",
160 |       " [3]\n",
161 |       " [4]\n",
162 |       " [3]\n",
163 |       " [4]\n",
164 |       " [5]\n",
165 |       " [6]\n",
166 |       " [5]\n",
167 |       " [7]\n",
168 |       " [5]\n",
169 |       " [5]\n",
170 |       " [8]\n",
171 |       " [2]\n",
172 |       " [7]\n",
173 |       " [3]\n",
174 |       " [3]\n",
175 |       " [4]\n",
176 |       " [4]\n",
177 |       " [3]\n",
178 |       " [2]\n",
179 |       " [4]\n",
180 |       " [5]\n",
181 |       " [5]\n",
182 |       " [2]\n",
183 |       " [4]\n",
184 |       " [4]\n",
185 |       " [2]\n",
186 |       " [3]\n",
187 |       " [1]\n",
188 |       " [5]\n",
189 |       " [6]\n",
190 |       " [6]\n",
191 |       " [1]\n",
192 |       " [4]\n",
193 |       " [2]\n",
194 |       " [1]\n",
195 |       " [1]\n",
196 |       " [1]\n",
197 |       " [2]\n",
198 |       " [3]\n",
199 |       " [4]\n",
200 |       " [1]\n",
201 |       " [2]\n",
202 |       " [4]\n",
203 |       " [1]\n",
204 |       " [1]\n",
205 |       " [1]\n",
206 |       " [2]\n",
207 |       " [4]\n",
208 |       " [3]\n",
209 |       " [2]\n",
210 |       " [4]\n",
211 |       " [3]\n",
212 |       " [7]\n",
213 |       " [4]\n",
214 |       " [3]\n",
215 |       " [4]\n",
216 |       " [5]\n",
217 |       " [8]\n",
218 |       " [7]\n",
219 |       " [7]\n",
220 |       " [7]\n",
221 |       " [6]\n",
222 |       " [7]\n",
223 |       " [6]\n",
224 |       " [8]\n",
225 |       " [6]\n",
226 |       " [9]\n",
227 |       " [2]\n",
228 |       " [4]\n",
229 |       " [4]\n",
230 |       " [5]\n",
231 |       " [3]\n",
232 |       " [2]\n",
233 |       " [4]\n",
234 |       " [4]\n",
235 |       " [6]\n",
236 |       " [1]\n",
237 |       " [2]\n",
238 |       " [1]\n",
239 |       " [3]\n",
240 |       " [1]\n",
241 |       " [1]\n",
242 |       " [2]\n",
243 |       " [3]\n",
244 |       " [6]\n",
245 |       " [1]\n",
246 |       " [2]\n",
247 |       " [3]\n",
248 |       " [3]\n",
249 |       " [2]\n",
250 |       " [1]\n",
251 |       " [2]\n",
252 |       " [2]\n",
253 |       " [5]]\n"
254 |      ]
255 |     }
256 |    ],
257 |    "source": [
258 |     "from sklearn.cluster import KMeans\n",
259 |     "import numpy as np\n",
260 |     "Y = np.load(os.path.join('../Kmean/', 'result_Valence.npy'))   \n",
261 |     "Z = np.load(os.path.join('../Kmean/', 'result_Arousal.npy')) \n",
262 |     "#X = np.\n",
263 |     "#print Y[0]\n",
264 |     "X=zip(Z[0])\n",
265 |     "#print Y[0].shape\n",
266 |     "X=np.asarray(X)\n",
267 |     "print X.shape\n",
268 |     "\n",
269 |     "kmeans = KMeans(n_clusters=2, random_state=0).fit(X)\n",
270 |     "kmeans.labels_\n",
271 |     "\n",
272 |     "#kmeans.predict([[0, 0], [4, 4]])\n",
273 |     "\n",
274 |     "kmeans.cluster_centers_\n",
275 |     "print(X)"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 6,
281 |    "metadata": {},
282 |    "outputs": [
283 |     {
284 |      "name": "stdout",
285 |      "output_type": "stream",
286 |      "text": [
287 |       "[1 1 0 1 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 1 1 0 1\n",
288 |       " 0 0 0 1 0 0 1 1 1 0 0 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0\n",
289 |       " 0 0 0 1 0 0 0 1 0 0 1 1 1 0 1 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 0 0 1 0 0\n",
290 |       " 1 1 1 0 0 0 1 0 1 1 1 1 1 1 0 1 1 0 1 1 1 1 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0\n",
291 |       " 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0]\n"
292 |      ]
293 |     }
294 |    ],
295 |    "source": [
296 |     "c= (kmeans.predict(X))\n",
297 |     "print (kmeans.predict(X))"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": 7,
303 |    "metadata": {},
304 |    "outputs": [
305 |     {
306 |      "data": {
307 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAD8CAYAAABkbJM/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAFrVJREFUeJzt3XuQnXWd5/H3Jwl3FCFECAQMq8GS1V3UruBtXEcgwKxlcEQNO6OZGRy0hFXHnXJwXAfEcRYoZ7RcXd0IKDoqN8cyumgMF8e1LC4dRbkoEhEmnSUQuYoiGPLdP/oJdpqT9C+cTp8G3q+qU32e3/M7z/OBSvpznss5SVUhSdJEZgw6gCTpicHCkCQ1sTAkSU0sDElSEwtDktTEwpAkNbEwJElNJqUwkhyd5KYkq5Oc0mP9Tkku6NZflWR+Nz47yRVJHkjyiXGv+U63zWu7xzMnI6sk6fGZ1e8GkswEPgkcCYwA1yRZXlU3jpl2AnBPVT0nyRLgTOBNwG+BDwDP7x7j/UlVDfebUZLUv74LA1gIrK6qWwCSnA8sBsYWxmLgtO75xcAnkqSqfg18L8lzJiEHe++9d82fP38yNiVJTxmrVq36ZVXNmWjeZBTG/sCaMcsjwGFbmlNVG5LcB8wGfjnBtj+b5BHgK8Df1wTfYzJ//nyGhz0gkaRtkeS2lnnT+aL3n1TVC4A/6B5v7jUpyYlJhpMMr1+/fkoDStJTyWQUxlrggDHL87qxnnOSzAL2AO7a2karam3381fAlxg99dVr3rKqGqqqoTlzJjyikiQ9TpNRGNcAC5IclGRHYAmwfNyc5cDS7vlxwOVbO72UZFaSvbvnOwCvAa6fhKySpMep72sY3TWJk4EVwEzg3Kq6IcnpwHBVLQfOAb6QZDVwN6OlAkCSW4GnAzsmORZYBNwGrOjKYiZwKfCZfrNKkh6/PJn+PYyhoaHyorckbZskq6pqaKJ50/mityRpGrEwJElNLAxJUhMLQ5LUxMKQJDWxMCRJTSwMSVITC0OS1MTCkCQ1sTAkSU0sDElSEwtDktTEwpAkNbEwJElNLAxJUhMLQ5LUxMKQJDWxMCRJTSwMSVITC0OS1MTCkCQ1sTAkSU0sDElSEwtDktTEwpAkNbEwJElNLAxJUpNJKYwkRye5KcnqJKf0WL9Tkgu69Vclmd+Nz05yRZIHknxi3GtenOS67jUfT5LJyCpJenz6LowkM4FPAscAhwDHJzlk3LQTgHuq6jnAR4Ezu/HfAh8A/rrHpj8F/CWwoHsc3W9WSdLjNxlHGAuB1VV1S1U9DJwPLB43ZzFwXvf8YuDwJKmqX1fV9xgtjkclmQs8vaqurKoCPg8cOwlZJUmP02QUxv7AmjHLI91YzzlVtQG4D5g9wTZHJtgmAElOTDKcZHj9+vXbGF2S1OoJf9G7qpZV1VBVDc2ZM2fQcSTpSWsyCmMtcMCY5XndWM85SWYBewB3TbDNeRNsU5I0hSajMK4BFiQ5KMmOwBJg+bg5y4Gl3fPjgMu7axM9VdXtwP1JXtLdHfUW4GuTkFWS9DjN6ncDVbUhycnACmAmcG5V3ZDkdGC4qpYD5wBfSLIauJvRUgEgya3A04EdkxwLLKqqG4F3AJ8DdgG+2T0kSQOSrbzRf8IZGhqq4eHhQceQpCeUJKuqamiieU/4i96SpKlhYUiSmlgYkqQmFoYkqYmFIUlqYmFIkppYGJKkJhaGJKmJhSFJamJhSJKaWBiSpCYWhiSpiYUhSWpiYUiSmlgYkqQmFoYkqYmFIUlqYmFIkppYGJKkJhaGJKmJhSFJamJhSJKaWBiSpCYWhiSpiYUhSWpiYUiSmlgYkqQmk1IYSY5OclOS1UlO6bF+pyQXdOuvSjJ/zLr3deM3JTlqzPitSa5Lcm2S4cnIKUl6/Gb1u4EkM4FPAkcCI8A1SZZX1Y1jpp0A3FNVz0myBDgTeFOSQ4AlwL8H9gMuTXJwVT3Sve4Pq+qX/WaUJPVvMo4wFgKrq+qWqnoYOB9YPG7OYuC87vnFwOFJ0o2fX1UPVdUvgNXd9iRJ08xkFMb+wJoxyyPdWM85VbUBuA+YPcFrC/h2klVJTtzSzpOcmGQ4yfD69ev7+g+RJG3ZdL7o/YqqehFwDHBSklf2mlRVy6pqqKqG5syZM7UJJekpZDIKYy1wwJjled1YzzlJZgF7AHdt7bVVtennncBX8VSVJA3UZBTGNcCCJAcl2ZHRi9jLx81ZDiztnh8HXF5V1Y0v6e6iOghYAFydZLckTwNIshuwCLh+ErJKkh6nvu+SqqoNSU4GVgAzgXOr6oYkpwPDVbUcOAf4QpLVwN2MlgrdvAuBG4ENwElV9UiSfYCvjl4XZxbwpar6Vr9ZJUmPX0bf6D85DA0N1fCwH9mQpG2RZFVVDU00bzpf9JYkTSMWhiSpiYUhSWpiYUiSmlgYkqQmFoYkqYmFIUlqYmFIkppYGJKkJhaGJKmJhSFJamJhSJKaWBiSpCYWhiSpiYUhSWpiYUiSmlgYkqQmFoYkqYmFIUlqYmFIkppYGJKkJhaGJKmJhSFJamJhSJKaWBiSpCYWhiSpyaQURpKjk9yUZHWSU3qs3ynJBd36q5LMH7Pufd34TUmOat3mZNq47uAxjy9sz10127ju5+NyXTjoSABsXHfbuFzfHXQkAEZGRjhyxhsefYyMjAw6EgDf//6qzXKtWbNm0JEAuPXWWx/NtGjWG7n33nsHHQmAO9fdw+v2WspRs97I0oNP5re//e2gIwHw4AMP8g//5WOctPBv+OKHvzLoOI/a8LsNfH/5NXzjf6/klh/ftt33l6rqbwPJTOBnwJHACHANcHxV3ThmzjuA/1BVb0+yBHhdVb0pySHAl4GFwH7ApcDB3cu2us1ehoaGanh4uDn7xnVnAWf3WDODGfv+tHk7k23jur8Avtdz3Yx9fza1YcbYuO4NwI96rNmVGfteO9VxHvXnh7yTkZ/e/pjxZz5rNl/8xacHkGjUG/Y7gXvX3f+Y8YNe+CyWrfrIABKNOumlp/Czq37+mPE3n/YG3vJ3bxxAolEffdunueQzlz1m/ONXfpjnLTy4xyumxr9e9H3+/k0f3Wxs1o6zuOiOz7D7HrsPKBWM3Hw77/lPf8dDv36IRzY8AoGFx7yI95//bmbOnLlN20qyqqqGJpo3GUcYC4HVVXVLVT0MnA8sHjdnMXBe9/xi4PAk6cbPr6qHquoXwOpuey3bnAS9ygJg4+Tvapv0LovB61UWAL+Z0hTj9SoLgDtvu2uKk2yuV1kA/OKH2/+d4Nb0KguAL5x20RQn2VyvsgB450veP8VJNvfh4z/2mLEND2/gnS/97wNI83unH/cR7r3jPn7zqwd56MGHeeg3D3P1N3/IJcsu3W77nIzC2B8Ye5w90o31nFNVG4D7gNlbeW3LNrerjesmLNuB2Lhu0aAj9LRx3WP/Uk0Hgzo1NV1OPY13yWe33y+Tfnzrc1cMOkJPP/7ujdTG3mdh1vx07RSn+b11t97J2tXrGH+G6KHfPMT/WbZyu+33CX/RO8mJSYaTDK9fv34Stzw9zp0+1j2DDrAF6wYdYFpZs+bOQUfo6f71vxp0hJ7u/2Xvo7FB+/X9Dw46Qk8bHt7AjBnpue7hhzZst/1ORmGsBQ4YszyvG+s5J8ksYA/grq28tmWbAFTVsqoaqqqhOXPm9PGfsbkZ+14/aduaTDP2vWbQEXqase8Zg47Q07x58way35e97MUD2e9Elrz3dYOO0NMb/3o7nHGeBAuPOXSL6/aa+4wpTLK5/RfM5Wl7Pvb6yY4778Crj3/5dtvvZBTGNcCCJAcl2RFYAiwfN2c5sLR7fhxweY0eSy0HlnR3UR0ELACubtzmJJjSs1zbYKdBB9iCWYMO0NOMWb3fabGF4UGbscNgg+38tN5/vhYMHTTFSTb37EOf1XP8L/7H8VOc5PdmzpzJn37g9T3XnbniA1Oc5veS8Ldfehc777YTO+y0AwA7774z8567H6//q9dst/32XRjdNYmTgRXAT4ALq+qGJKcneW037RxgdpLVwHuAU7rX3gBcCNwIfAs4qaoe2dI2+8063ox9r2C0i8bafaB3IgHM2Pc6YPw1lF2mQa4bgf84bnSfgeda8fCFPHvcL7v5L5jHykcGexF35caL2G/BPpuNHXzYv2PFQ4O9Rfrr9/0zLz128z9fbz7tDfyvq88aUKJRn/7BRzYrh5k7zOSfvvtBjv+bPx5gKlj6wSX843c+yLznzmX3PXflRUe8gIvuOJv5zz9woLme/4rn8bmf/U/efOpxvObti/hvn3k7n7z6DHbZfZftts++b6udTrb1tlpJ0tTeVitJegqwMCRJTSwMSVITC0OS1MTCkCQ1sTAkSU0sDElSEwtDktTEwpAkNbEwJElNLAxJUhMLQ5LUxMKQJDWxMCRJTSwMSVITC0OS1MTCkCQ1sTAkSU0sDElSEwtDktTEwpAkNbEwJElNLAxJUhMLQ5LUxMKQJDWxMCRJTSwMSVKTvgojyV5JVia5ufu55xbmLe3m3Jxk6ZjxFye5LsnqJB9Pkm78tCRrk1zbPf6on5ySpP71e4RxCnBZVS0ALuuWN5NkL+BU4DBgIXDqmGL5FPCXwILucfSYl360qg7tHpf0mVOS1Kd+C2MxcF73/Dzg2B5zjgJWVtXdVXUPsBI4Oslc4OlVdWVVFfD5LbxekjQN9FsY+1TV7d3zdcA+PebsD6wZszzSje3fPR8/vsnJSX6c5NwtneqSJE2dCQsjyaVJru/xWDx2XneUUJOU61PAs4FDgduBf9xKvhOTDCcZXr9+/STtXpI03qyJJlTVEVtal+SOJHOr6vbuFNOdPaatBV41Znke8J1ufN648bXdPu8Ys4/PAN/YSr5lwDKAoaGhySosSdI4/Z6SWg5suutpKfC1HnNWAIuS7NmdWloErOhOZd2f5CXd3VFv2fT6rnw2eR1wfZ85JUl9mvAIYwJnABcmOQG4DXgjQJIh4O1V9daqujvJh4BrutecXlV3d8/fAXwO2AX4ZvcAOCvJoYye4roVeFufOSVJfcropYcnh6GhoRoeHh50DEl6QkmyqqqGJprnJ70lSU0sDElSEwtDktTEwpAkNbEwJElNLAxJUhMLQ5LUxMKQJDWxMCRJTSwMSVITC0OS1MTCkCQ1sTAkSU0sDElSEwtDktTEwpAkNbEwJElNLAxJUhMLQ5LUxMKQJDWxMCRJTSwMSVITC0OS1MTCkCQ1sTAkSU0sDElSEwtDktSkr8JIsleSlUlu7n7uuYV5S7s5NydZOmb8w0nWJHlg3PydklyQZHWSq5LM7yenJKl//R5hnAJcVlULgMu65c0k2Qs4FTgMWAicOqZYvt6NjXcCcE9VPQf4KHBmnzklSX3qtzAWA+d1z88Dju0x5yhgZVXdXVX3ACuBowGq6sqqun2C7V4MHJ4kfWaVJPWh38LYZ8wv/HXAPj3m7A+sGbM80o1tzaOvqaoNwH3A7P6iSpL6MWuiCUkuBfbtser9YxeqqpLUZAVrleRE4ESAAw88cKp3L0lPGRMWRlUdsaV1Se5IMreqbk8yF7izx7S1wKvGLM8DvjPBbtcCBwAjSWYBewB3bSHfMmAZwNDQ0JQXliQ9VfR7Smo5sOmup6XA13rMWQEsSrJnd7F7UTfWut3jgMuryjKQpAHqtzDOAI5McjNwRLdMkqEkZwNU1d3Ah4Brusfp3RhJzkoyAuyaZCTJad12zwFmJ1kNvIced19JkqZWnkxv3IeGhmp4eHjQMSTpCSXJqqoammien/SWJDWxMCRJTSwMSVITC0OS1MTCkCQ1sTAkSU0sDElSEwtDktTEwpAkNbEwJElNLAxJUhMLQ5LUxMKQJDWxMCRJTSwMSVITC0OS1MTCkCQ1sTAkSU0sDElSEwtDktTEwpAkNbEwJElNLAxJUhMLQ5LUxMKQJDWxMCRJTSwMSVKTvgojyV5JVia5ufu55xbmLe3m3Jxk6ZjxDydZk+SBcfP/LMn6JNd2j7f2k1OS1L9+jzBOAS6rqgXAZd3yZpLsBZwKHAYsBE4dUyxf78Z6uaCqDu0eZ/eZU5LUp34LYzFwXvf8PODYHnOOAlZW1d1VdQ+wEjgaoKqurKrb+8wgSZoC/RbGPmN+4a8D9ukxZ39gzZjlkW5sIq9P8uMkFyc5oM+ckqQ+zZpoQpJLgX17rHr/2IWqqiQ1Sbm+Dny5qh5K8jZGj15evYV8JwInAhx44IGTtHtJ0ngTFkZVHbGldUnuSDK3qm5PMhe4s8e0tcCrxizPA74zwT7vGrN4NnDWVuYuA5Z1edYnuW1r257A3sAv+3j99mKubTMdc03HTGCubfVkzfWslkkTFsYElgNLgTO6n1/rMWcF8A9jLnQvAt63tY1uKqFu8bXAT1rCVNWclnlb2e9wVQ31s43twVzbZjrmmo6ZwFzb6qmeq99rGGcARya5GTiiWybJUJKzAarqbuBDwDXd4/RujCRnJRkBdk0ykuS0brvvTHJDkh8B7wT+rM+ckqQ+9XWE0Z06OrzH+DDw1jHL5wLn9pj3XuC9PcbfxwRHIZKkqeUnvTe3bNABtsBc22Y65pqOmcBc2+opnStVk3VjkyTpycwjDElSEwsDSHJukjuTXD/oLJskOSDJFUlu7G4AeNegMwEk2TnJ1Ul+1OX64KAzjZVkZpIfJvnGoLNskuTWJNd134s2POg8myR5RvfB2J8m+UmSl06DTM8d8x1y1ya5P8m7B50LIMlfdX/mr0/y5SQ7DzoTQJJ3dZlu2N7/rzwlBSR5JfAA8Pmqev6g88DorcXA3Kr6QZKnAauAY6vqxgHnCrBbVT2QZAfge8C7qurKQebaJMl7gCHg6VX1mkHngdHCAIaqalrdv5/kPOD/VtXZSXYEdq2qeweda5MkMxn9HNdhVdXP56smI8v+jP5ZP6SqHkxyIXBJVX1uwLmeD5zP6HfyPQx8C3h7Va3eHvvzCAOoqu8Cdw86x1hVdXtV/aB7/itGP4vS8pUq21WN2vTtwjt0j2nxriPJPOA/M/phT21Fkj2AVwLnAFTVw9OpLDqHAz8fdFmMMQvYJcksYFfg/w04D8DzgKuq6jdVtQH4V+CPt9fOLIwngCTzgRcCVw02yajutM+1jH6yf2VVTYtcwMcYvU1746CDjFPAt5Os6r7KZjo4CFgPfLY7hXd2kt0GHWqcJcCXBx0CoKrWAh8B/g24Hbivqr492FQAXA/8QZLZSXYF/gjYbt+9Z2FMc0l2B74CvLuq7h90HoCqeqSqDmX0a14WdofFA5XkNcCdVbVq0Fl6eEVVvQg4BjipOwU6aLOAFwGfqqoXAr+mxz9PMCjdKbLXAhcNOgtA900Vixkt2v2A3ZL86WBTQVX9BDgT+Dajp6OuBR7ZXvuzMKax7hrBV4AvVtW/DDrPeN0pjCvovq5+wF4OvLa7XnA+8Ook/zzYSKO6d6dU1Z3AV9nyvwEzlUaAkTFHhxczWiDTxTHAD6rqjkEH6RwB/KKq1lfV74B/AV424EwAVNU5VfXiqnolcA/ws+21LwtjmuouLp8D/KSq/mnQeTZJMifJM7rnuwBHAj8dbKrRbweoqnlVNZ/RUxmXV9XA3wEm2a27aYHulM8iRk8jDFRVrQPWJHluN3Q4MNAbKsY5nmlyOqrzb8BLkuza/d08nMbvuNvekjyz+3kgo9cvvrS99tXvlw8+KST5MqPfqLt3991Wp1bVOYNNxcuBNwPXddcLAP62qi4ZYCaAucB53R0sM4ALq2ra3MI6De0DfHX0dwyzgC9V1bcGG+lR/xX4Ynf65xbgzwecB3i0WI8E3jboLJtU1VVJLgZ+AGwAfsj0+dT3V5LMBn4HnLQ9b17wtlpJUhNPSUmSmlgYkqQmFoYkqYmFIUlqYmFIkppYGJKkJhaGJKmJhSFJavL/AdL3uR25FGAMAAAAAElFTkSuQmCC\n",
308 |       "text/plain": [
309 |        "<Figure size 432x288 with 1 Axes>"
310 |       ]
311 |      },
312 |      "metadata": {},
313 |      "output_type": "display_data"
314 |     }
315 |    ],
316 |    "source": [
317 |     "a0=np.zeros([X.shape[0]])\n",
318 |     "plt.scatter(X,a0,c=c);"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": 8,
324 |    "metadata": {},
325 |    "outputs": [
326 |     {
327 |      "data": {
328 |       "text/plain": [
329 |        "85"
330 |       ]
331 |      },
332 |      "execution_count": 8,
333 |      "metadata": {},
334 |      "output_type": "execute_result"
335 |     }
336 |    ],
337 |    "source": [
338 |     "np.count_nonzero(c == 0)"
339 |    ]
340 |   }
341 |  ],
342 |  "metadata": {
343 |   "kernelspec": {
344 |    "display_name": "Python 2",
345 |    "language": "python",
346 |    "name": "python2"
347 |   },
348 |   "language_info": {
349 |    "codemirror_mode": {
350 |     "name": "ipython",
351 |     "version": 2
352 |    },
353 |    "file_extension": ".py",
354 |    "mimetype": "text/x-python",
355 |    "name": "python",
356 |    "nbconvert_exporter": "python",
357 |    "pygments_lexer": "ipython2",
358 |    "version": "2.7.12"
359 |   }
360 |  },
361 |  "nbformat": 4,
362 |  "nbformat_minor": 2
363 | }
364 | 


--------------------------------------------------------------------------------
/src/EEGPreprocessing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import os\n",
 11 |     "from scipy import signal\n",
 12 |     "import mne\n",
 13 |     "from mne.preprocessing.ica import corrmap\n",
 14 |     "from mne.preprocessing import ICA\n",
 15 |     "from matplotlib.pyplot import savefig\n",
 16 |     "import pandas as pd\n",
 17 |     "import datetime"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "class ICA:\n",
 27 |     "    \n",
 28 |     "    def __init__(self, channel_list, sampling_rate, log_filename):\n",
 29 |     "        self.__ch_names = channel_list\n",
 30 |     "        self.__sampling_rate = sampling_rate\n",
 31 |     "        self.__no_of_channels = len(self.__ch_names)\n",
 32 |     "        \n",
 33 |     "        # set constant\n",
 34 |     "        ## ['Fp1', 'Fp2', 'Fz', 'Cz', 'T3', 'T4', 'Pz', 'Oz']\n",
 35 |     "        ch_f = [\"Fz\"]\n",
 36 |     "        ch_cp = [\"Cz\"]\n",
 37 |     "        ch_t = ['T3', 'T4']\n",
 38 |     "        ch_op = [\"Oz\", 'Pz']\n",
 39 |     "        ch_fp = ['Fp1', 'Fp2']\n",
 40 |     "\n",
 41 |     "        self.__chs_dict = {'Frontal':ch_f,\n",
 42 |     "                    'CP':ch_cp,\n",
 43 |     "                    'Temporal':ch_t,\n",
 44 |     "                    'OP':ch_op,\n",
 45 |     "                    'FP':ch_fp\n",
 46 |     "                    }\n",
 47 |     "        self.__info = mne.create_info(self.__ch_names, self.__sampling_rate, ch_types=[\"eeg\"] * self.__no_of_channels)\n",
 48 |     "        self.__log_filename = log_filename\n",
 49 |     "        \n",
 50 |     "        try:\n",
 51 |     "            self.__exclude_df = pd.read_csv(self.__log_filename)\n",
 52 |     "        except:\n",
 53 |     "            print ('Cannot read from', self.__log_filename)\n",
 54 |     "\n",
 55 |     "    \n",
 56 |     "    #find min max of each channel in ica components \n",
 57 |     "    #imported from: Karis Matchaparn\n",
 58 |     "    def find_exclude_ica(self, ica_dict, channel, focus_part):\n",
 59 |     "        exclude_list = [] \n",
 60 |     "#         print(channel.keys())\n",
 61 |     "\n",
 62 |     "        for ica_idx in range(len(ica_dict)): # get into each component\n",
 63 |     "\n",
 64 |     "            #find summation of each brain part\n",
 65 |     "            sum_chs = {}\n",
 66 |     "            for brain_part in channel.keys(): # loop for each brain part\n",
 67 |     "                sum_buffer = 0\n",
 68 |     "\n",
 69 |     "                for chs in channel[brain_part]: #loop for each channel in brain part\n",
 70 |     "#                     print(\"---\",chs, ica_dict[ica_idx][chs])\n",
 71 |     "                    sum_buffer += ica_dict[ica_idx][chs]\n",
 72 |     "#                 print(\"x\"*20, brain_part)\n",
 73 |     "                sum_chs[brain_part] = sum_buffer\n",
 74 |     "\n",
 75 |     "            # **************************  \n",
 76 |     "#             print(\"----------\")\n",
 77 |     "#             print(ica_idx,sum_chs)\n",
 78 |     "#             print(\"----------\")\n",
 79 |     "            for chs in sum_chs.keys():\n",
 80 |     "                if sum_chs[focus_part] < sum_chs[chs]:\n",
 81 |     "                    exclude_list.append(ica_idx)\n",
 82 |     "                    break\n",
 83 |     "\n",
 84 |     "        print(exclude_list)\n",
 85 |     "        return exclude_list\n",
 86 |     "    \n",
 87 |     "    def label_exclude_list(self, eeg, subj_id, clip_id):\n",
 88 |     "        raw = mne.io.RawArray(eeg, self.__info)\n",
 89 |     "        raw.set_montage(mne.channels.read_montage(\"standard_1020\"))\n",
 90 |     "        raw_tmp = raw.copy()\n",
 91 |     "        \n",
 92 |     "        ica = mne.preprocessing.ICA(method=\"extended-infomax\", random_state=1)\n",
 93 |     "        ica.fit(raw_tmp)       \n",
 94 |     "\n",
 95 |     "        ica.plot_components(inst=raw_tmp)\n",
 96 |     "        ica.plot_sources(raw_tmp)\n",
 97 |     "        \n",
 98 |     "        complete = False\n",
 99 |     "        while not complete:\n",
100 |     "            try:\n",
101 |     "                exc = input(\"Which components do you want to delete? (0-7 or -1 if no component) (put , between each component no.): \")\n",
102 |     "                if type(exc) == int:\n",
103 |     "                    exclude_list = [exc]\n",
104 |     "                    if exc < -1 or exc > 7:\n",
105 |     "                        raise Exception\n",
106 |     "                else:\n",
107 |     "                    exclude_list = [int(x) for x in exc.split(',')]\n",
108 |     "                    for x in exclude_list:\n",
109 |     "                        if x < -1 or x > 7 :\n",
110 |     "                            raise Exception\n",
111 |     "\n",
112 |     "                self.__log(subj_id, clip_id, '|'.join([str(x) for x in exclude_list]))\n",
113 |     "                complete = True\n",
114 |     "            except:\n",
115 |     "                print ('Error: Please try to input component no. again (-1 to 7 only)')\n",
116 |     "                complete = False\n",
117 |     "        \n",
118 |     "        print(\"Component to delete =\", exclude_list)\n",
119 |     "        \n",
120 |     "    \n",
121 |     "    def remove_exclude_components(self, eeg, subj_id, clip_id):\n",
122 |     "        raw = mne.io.RawArray(eeg, self.__info)\n",
123 |     "        raw.set_montage(mne.channels.read_montage(\"standard_1020\"))\n",
124 |     "\n",
125 |     "        raw_tmp = raw.copy()\n",
126 |     "#         raw_tmp.filter(1, None, fir_design=\"firwin\")\n",
127 |     "        \n",
128 |     "        ica = mne.preprocessing.ICA(method=\"extended-infomax\", random_state=1)\n",
129 |     "        ica.fit(raw_tmp)\n",
130 |     "        \n",
131 |     "        df = self.__exclude_df\n",
132 |     "        d = df.loc[(df['subj_id'] == subj_id) & (df[' clip_id'] == clip_id)]\n",
133 |     "        exc = d[' exclude'].values[0]\n",
134 |     "        exclude_list = [int(e) for e in exc.split('|')]\n",
135 |     "        \n",
136 |     "        if -1 in exclude_list:\n",
137 |     "            print('No component to delete.')\n",
138 |     "            return np.array(raw_tmp[:])[0]\n",
139 |     "        else:\n",
140 |     "            print(\"Delete components :\", exclude_list)\n",
141 |     "            ica.exclude = exclude_list #select components to exclude\n",
142 |     "\n",
143 |     "            raw_corrected = raw.copy()\n",
144 |     "            ica.apply(raw_corrected)\n",
145 |     "            \n",
146 |     "            result = np.array(raw_corrected[:])[0] #get data after ica\n",
147 |     "            return result\n",
148 |     "    \n",
149 |     "    \n",
150 |     "    def __log(self, *msg):\n",
151 |     "        ## save subject id, clip id, list of excluding components (concatenated with |)\n",
152 |     "        st = ''\n",
153 |     "        for m in msg:\n",
154 |     "            st += str(m) + ', '\n",
155 |     "        f = open(self.__log_filename, \"a\")\n",
156 |     "        f.write(st + '\\n') \n",
157 |     "        print(msg)\n",
158 |     "        f.close()\n",
159 |     "        \n",
160 |     "        now = datetime.datetime.now()\n",
161 |     "        copyfile(self.__log_filename, './log-exclude-list/ica_exclude_part'+str(now)+'.csv')"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "class EEGPreprocessing:\n",
171 |     "    \n",
172 |     "    def __init__(self):\n",
173 |     "        self.a = 6\n",
174 |     "        self.__input_path = '../data/EEG/'\n",
175 |     "        self.__emotions = ['Happiness', 'Fear', 'Excitement', \\\n",
176 |     "                           'Arousal', 'Valence', 'Reward']\n",
177 |     "        self.__band_list = [{ 'name': 'theta', 'low': 3, 'high': 7}, \n",
178 |     "                            { 'name': 'alpha', 'low': 8, 'high': 13}, \n",
179 |     "                            { 'name': 'beta', 'low': 14, 'high': 29},\n",
180 |     "                            { 'name': 'gamma', 'low': 30, 'high': 47},\n",
181 |     "#                            { 'name': 'all', 'low': 4, 'high': 47}\n",
182 |     "                           ]\n",
183 |     "        self.__channel_list = ['Fp1', 'Fp2', 'Fz', 'Cz', 'T3', 'T4', 'Pz', 'Oz']\n",
184 |     "        self.__log_filename = './ica_exclude_part.csv'\n",
185 |     "\n",
186 |     "            \n",
187 |     "    def read_signals(self):\n",
188 |     "        self.__data = np.load(os.path.join(self.__input_path, 'raw/EEG.npy'))\n",
189 |     "        self.get_input_data_info()\n",
190 |     "        \n",
191 |     "    \n",
192 |     "    def set_labels(self, thresholds = None, emotions = None):\n",
193 |     "        self.__label = np.load(os.path.join(self.__input_path, 'raw/result.npy'))\n",
194 |     "        \n",
195 |     "        if thresholds and len(thresholds) == 1:\n",
196 |     "            above_thres = self.__label > thresholds[0]\n",
197 |     "            below_thres = self.__label <= thresholds[0]\n",
198 |     "            self.__label[above_thres] = 1\n",
199 |     "            self.__label[below_thres] = 0\n",
200 |     "\n",
201 |     "            for i, emo in enumerate(self.__emotions):\n",
202 |     "                label_res = self.__label[:,:,i].reshape(1, -1)\n",
203 |     "                label_res = np.array(label_res[0])\n",
204 |     "\n",
205 |     "                print ('save results:', emo, label_res.shape)\n",
206 |     "                print (len(label_res[label_res==0]), len(label_res[label_res==1]) )\n",
207 |     "                print (label_res)\n",
208 |     "                print  ()         \n",
209 |     "                np.savez(os.path.join('../data/score/label/result_' + emo + '_binclass'), \n",
210 |     "                     y = label_res, \n",
211 |     "                     threshold = thresholds)\n",
212 |     "                \n",
213 |     "        elif thresholds:\n",
214 |     "\n",
215 |     "            for i, emo in enumerate(self.__emotions):\n",
216 |     "                if emo in emotions:\n",
217 |     "                    label_res = self.__label[:,:,i].reshape(1, -1)\n",
218 |     "                    label_res = label_res[0]\n",
219 |     "                    print (label_res)\n",
220 |     "                    label_res[label_res < thresholds[0]] = 0\n",
221 |     "                    label_res[((label_res >= thresholds[0]) & (label_res < thresholds[1]))] = 1\n",
222 |     "                    label_res[label_res >= thresholds[1]] = 2\n",
223 |     "\n",
224 |     "\n",
225 |     "                    print ('save results:', emo, label_res.shape)\n",
226 |     "                    print (label_res)\n",
227 |     "                    \n",
228 |     "                    for i in range(0, 3):\n",
229 |     "                        print( i, len(label_res[label_res==i]))\n",
230 |     "                    print()\n",
231 |     "                    \n",
232 |     "                    np.savez(os.path.join('../data/score/label/result_' + emo + '_3class'), \n",
233 |     "                         y = label_res, \n",
234 |     "                         threshold = thresholds)\n",
235 |     "            \n",
236 |     "        else:\n",
237 |     "            thresholds = range(1, 10)\n",
238 |     "            for i, emo in enumerate(self.__emotions):\n",
239 |     "                print ('Finding threshold for', emo)\n",
240 |     "                min_diff = 300\n",
241 |     "                best_threshold = -1\n",
242 |     "                used_labels = None\n",
243 |     "                \n",
244 |     "                for current_thres in thresholds:\n",
245 |     "                    labels = np.copy(self.__label)\n",
246 |     "                    above_thres = labels > current_thres\n",
247 |     "                    below_thres = labels <= current_thres\n",
248 |     "                    labels[above_thres] = 1\n",
249 |     "                    labels[below_thres] = 0\n",
250 |     "                    label_res = labels[:,:,i].reshape(1, -1)\n",
251 |     "                    label_res = np.array(label_res[0])\n",
252 |     "                    \n",
253 |     "                    diff = abs(len(label_res[label_res==0]) - len(label_res[label_res==1]))\n",
254 |     "                    print ('diff', diff)\n",
255 |     "                    if diff < min_diff:\n",
256 |     "                        used_labels = np.copy(label_res)\n",
257 |     "                        best_threshold = current_thres\n",
258 |     "                        min_diff = diff\n",
259 |     "                        \n",
260 |     "                print (\"Threshold = \", best_threshold, min_diff )\n",
261 |     "                print (len(used_labels[used_labels==0]), len(used_labels[used_labels==1]) )\n",
262 |     "\n",
263 |     "                print ('save results:', emo, used_labels.shape)\n",
264 |     "                print (used_labels)\n",
265 |     "                print()\n",
266 |     "                np.savez(os.path.join(self.__input_path, 'result_' + emo + '_binclass'), \n",
267 |     "                         y = used_labels, \n",
268 |     "                         threshold = best_threshold)\n",
269 |     "\n",
270 |     "        \n",
271 |     "    def get_input_data_info(self):\n",
272 |     "        shp = self.__data.shape\n",
273 |     "        self.__no_of_subj, self.__no_of_clips, self.__no_of_channels, self.__no_of_sampling = shp\n",
274 |     "        self.__sampling_rate = self.__no_of_sampling / 56\n",
275 |     "        \n",
276 |     "        print ('====== input data ======')\n",
277 |     "        print ('No. of subject:', self.__no_of_subj)\n",
278 |     "        print ('No. of clips:', self.__no_of_clips)\n",
279 |     "        print ('No. of channels:', self.__no_of_channels)\n",
280 |     "        print ('No. of points (time series data):', self.__no_of_sampling)\n",
281 |     "        print ('Samping rate:', self.__sampling_rate)\n",
282 |     "        print()\n",
283 |     "        \n",
284 |     "    def __get_already_input_txt(self):\n",
285 |     "        try:\n",
286 |     "            f = open(self.__log_filename, 'r') \n",
287 |     "            results = []\n",
288 |     "            i = 0\n",
289 |     "            for line in f.readlines():\n",
290 |     "                spl = line.split(' ')\n",
291 |     "                results.append([int(spl[0]), int(spl[1])])\n",
292 |     "            print (results)\n",
293 |     "            return results\n",
294 |     "        except:\n",
295 |     "            print ('No file')\n",
296 |     "            return []\n",
297 |     "        \n",
298 |     "    def __get_already_input_csv(self):\n",
299 |     "        try:\n",
300 |     "            df = pd.read_csv(self.__log_filename)\n",
301 |     "            arr = np.array(df)\n",
302 |     "            return [tuple(a) for a in arr[:,0:2]]\n",
303 |     "        except:\n",
304 |     "            print ('No file')\n",
305 |     "            return []\n",
306 |     "        \n",
307 |     "        \n",
308 |     "    def preprocessing(self):\n",
309 |     "        print ('====== Preprocessing ======')\n",
310 |     "        self.__data_prep = np.zeros((self.__no_of_subj*self.__no_of_clips, \\\n",
311 |     "                                     self.__no_of_channels, len(self.__band_list), self.__no_of_sampling))\n",
312 |     "        self.__info = mne.create_info(ch_names = self.__channel_list,\n",
313 |     "                   sfreq = self.__sampling_rate,\n",
314 |     "                   ch_types = 'eeg')\n",
315 |     "        \n",
316 |     "        self.__ica = ICA(self.__channel_list, self.__sampling_rate, self.__log_filename)\n",
317 |     "        \n",
318 |     "        rerun = input(\"Do you want to re-run all? (y/n): \")\n",
319 |     "        if rerun.lower() == 'n':\n",
320 |     "            already_done = self.__get_already_input_csv()\n",
321 |     "        else:\n",
322 |     "            rerun = input(\"Are you sure that you want to re-run all? (y/n): \")\n",
323 |     "            if rerun.lower() == 'y':\n",
324 |     "                already_done = []\n",
325 |     "\n",
326 |     "                try:\n",
327 |     "                    os.remove(self.__log_filename)\n",
328 |     "                except OSError:\n",
329 |     "                    pass\n",
330 |     "\n",
331 |     "                f = open(self.__log_filename, \"a\")\n",
332 |     "                f.write('subj_id, clip_id, exclude,\\n') \n",
333 |     "                f.close()\n",
334 |     "            else:\n",
335 |     "                print ('Continue using previous data..')\n",
336 |     "            \n",
337 |     "        index = 0\n",
338 |     "        for subject_id, data in enumerate(self.__data):\n",
339 |     "            #each subject \n",
340 |     "            for clip_id, dt in enumerate(data):\n",
341 |     "                #each clip\n",
342 |     "                print ('\\nPreprocessing: subject_id =', subject_id, 'clip_id =', clip_id )\n",
343 |     "                raw = mne.io.RawArray(data = dt, info = self.__info)\n",
344 |     "                \n",
345 |     "                # with CAR\n",
346 |     "                after_car = self.__calculate_CAR(raw)\n",
347 |     "                # without CAR\n",
348 |     "#                 after_car = dt\n",
349 |     "#                 print('NO CAR')\n",
350 |     "                \n",
351 |     "                if len(already_done) < self.__no_of_subj*self.__no_of_clips:\n",
352 |     "                    print( 'Continue labelling..')\n",
353 |     "                    if (subject_id, clip_id) in already_done:\n",
354 |     "                        print ('already_done', (subject_id, clip_id))\n",
355 |     "                        continue\n",
356 |     "                    else:\n",
357 |     "                        self.__ica.label_exclude_list(after_car, subject_id, clip_id)\n",
358 |     "                        continue\n",
359 |     "\n",
360 |     "                else:\n",
361 |     "                    print ('All data is labeled for ICA: Actual removing components from saved csv..')\n",
362 |     "                    ## with ICA                \n",
363 |     "                    after_ica = self.__ica.remove_exclude_components(after_car, subject_id, clip_id)\n",
364 |     "                    ## without ICA\n",
365 |     "#                     after_ica = after_car # without ICA\n",
366 |     "#                     print('NO ICA')\n",
367 |     "\n",
368 |     "                    del after_car\n",
369 |     "\n",
370 |     "                    for channel_id, ch in enumerate(after_ica):\n",
371 |     "                        #each channel\n",
372 |     "\n",
373 |     "                        #! already done notch at 50 Hz\n",
374 |     "                        #! asr = self.__calculate_ASR(bands_data) -> cannot do this\n",
375 |     "\n",
376 |     "                        index = self.__no_of_clips*subject_id + clip_id\n",
377 |     "                        #print 'index', index, self.__no_of_clips, subject_id, clip_id, channel_id\n",
378 |     "                        self.__data_prep[index, channel_id, :, :] = self.__bands_filter(ch)\n",
379 |     "                    \n",
380 |     "        return self.__data_prep\n",
381 |     "        \n",
382 |     "        \n",
383 |     "    def __bands_filter(self, data):\n",
384 |     "        results = np.zeros(shape = (len(self.__band_list), len(data)))\n",
385 |     "\n",
386 |     "        for i, band in enumerate(self.__band_list):\n",
387 |     "            results[i] = self.__bandpass_filter(data, band['low'], band['high'])\n",
388 |     "        \n",
389 |     "        return results\n",
390 |     "            \n",
391 |     "    def __bandpass_filter(self, data, low, high):\n",
392 |     "        nyq = 0.5 * self.__sampling_rate\n",
393 |     "        low = low / nyq\n",
394 |     "        high = high / nyq\n",
395 |     "        order = 2\n",
396 |     "        b, a = signal.butter(order, [low, high], btype='band')\n",
397 |     "        filtered = signal.lfilter(b, a, data)\n",
398 |     "\n",
399 |     "        return filtered\n",
400 |     "    \n",
401 |     "    def __calculate_ASR(self, data):\n",
402 |     "        geo = geometric_median(data)\n",
403 |     "        print (data)\n",
404 |     "        print (geo.shape, geo)\n",
405 |     "        print ()\n",
406 |     "    \n",
407 |     "    def __calculate_CAR(self, raw):\n",
408 |     "        #calculate CAR from all channels of one freq band in one clip \n",
409 |     "        raw_car, _ = mne.set_eeg_reference(raw, 'average', projection=True) #Bad EEG channels are automatically excluded if they are properly set in info['bads']\n",
410 |     "        applied = raw_car.apply_proj()\n",
411 |     "        car_npy = applied.get_data()\n",
412 |     "        \n",
413 |     "        return car_npy\n",
414 |     "    \n",
415 |     "    def save_to_numpy(self, result):\n",
416 |     "        np.save(os.path.join(self.__input_path, 'preprocessed/EEG_ICA.npy'), result)\n"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "code",
421 |    "execution_count": null,
422 |    "metadata": {
423 |     "scrolled": false
424 |    },
425 |    "outputs": [],
426 |    "source": [
427 |     "eegPreprocessing = EEGPreprocessing()\n",
428 |     "eegPreprocessing.read_signals()\n",
429 |     "result = eegPreprocessing.preprocessing()\n",
430 |     "print (result.shape)\n",
431 |     "print (result)"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": null,
437 |    "metadata": {},
438 |    "outputs": [],
439 |    "source": [
440 |     "eegPreprocessing.save_to_numpy(result)"
441 |    ]
442 |   }
443 |  ],
444 |  "metadata": {
445 |   "kernelspec": {
446 |    "display_name": "Python 2",
447 |    "language": "python",
448 |    "name": "python2"
449 |   },
450 |   "language_info": {
451 |    "codemirror_mode": {
452 |     "name": "ipython",
453 |     "version": 3
454 |    },
455 |    "file_extension": ".py",
456 |    "mimetype": "text/x-python",
457 |    "name": "python",
458 |    "nbconvert_exporter": "python",
459 |    "pygments_lexer": "ipython3",
460 |    "version": "3.5.2"
461 |   }
462 |  },
463 |  "nbformat": 4,
464 |  "nbformat_minor": 2
465 | }
466 | 


--------------------------------------------------------------------------------
/src/.ipynb_checkpoints/DNN-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 19,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from sklearn import svm\n",
 10 |     "import numpy as np\n",
 11 |     "from sklearn.model_selection import train_test_split\n",
 12 |     "from sklearn.model_selection import StratifiedShuffleSplit\n",
 13 |     "import tensorflow as tf\n",
 14 |     "import numpy as np\n",
 15 |     "import random\n",
 16 |     "import keras\n",
 17 |     "from keras.models import Sequential,load_model\n",
 18 |     "from keras.layers import Dense, Dropout, Activation, Flatten\n",
 19 |     "from keras.layers import Conv2D, MaxPooling1D, GRU, TimeDistributed, LSTM\n",
 20 |     "from matplotlib import pyplot as plt\n",
 21 |     "from keras.callbacks import ReduceLROnPlateau,ModelCheckpoint,CSVLogger\n",
 22 |     "from sklearn.preprocessing import StandardScaler\n",
 23 |     "from keras.layers.normalization import BatchNormalization\n",
 24 |     "from datetime import datetime\n",
 25 |     "import os\n",
 26 |     "from itertools import izip\n",
 27 |     "from sklearn.preprocessing import MinMaxScaler\n",
 28 |     "import random\n",
 29 |     "from sklearn import metrics, model_selection"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "emotions = ['Arousal', 'Valence', 'Happiness', 'Fear', 'Excitement', 'Reward']\n",
 39 |     "class_types = ['binclass']\n",
 40 |     "feature_types = ['with_ICA', 'without_ICA']\n",
 41 |     "no_of_clips = 15\n",
 42 |     "svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']\n",
 43 |     "degrees = range(2, 11)\n",
 44 |     "power_of_c = range(-5, 15, 2)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 3,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "def leave_one_video_out(X, y, train = True, test = False, val = True):\n",
 54 |     "    test_video_id = 0\n",
 55 |     "    val_video_id = 1\n",
 56 |     "    X_train = []\n",
 57 |     "    y_train = []\n",
 58 |     "    X_test = []\n",
 59 |     "    y_test = []\n",
 60 |     "    X_val = []\n",
 61 |     "    y_val = []\n",
 62 |     "    \n",
 63 |     "    if test:\n",
 64 |     "        ## split to train & test & val \n",
 65 |     "        for clip_id, data in enumerate(izip(X, y)):\n",
 66 |     "            if clip_id % no_of_clips == test_video_id:\n",
 67 |     "                X_test.append(data[0])\n",
 68 |     "                y_test.append(data[1])\n",
 69 |     "            elif clip_id % no_of_clips == val_video_id:\n",
 70 |     "                X_val.append(data[0])\n",
 71 |     "                y_val.append(data[1])\n",
 72 |     "            else:\n",
 73 |     "                X_train.append(data[0])\n",
 74 |     "                y_train.append(data[1])\n",
 75 |     "            \n",
 76 |     "    else:\n",
 77 |     "        ## split to train & val\n",
 78 |     "        for clip_id, data in enumerate(izip(X, y)):\n",
 79 |     "            if clip_id % no_of_clips == val_video_id:\n",
 80 |     "                X_val.append(data[0])\n",
 81 |     "                y_val.append(data[1])\n",
 82 |     "            else:\n",
 83 |     "                X_train.append(data[0])\n",
 84 |     "                y_train.append(data[1])\n",
 85 |     "        \n",
 86 |     "    X_train = np.array(X_train)\n",
 87 |     "    y_train = np.array(y_train)\n",
 88 |     "    X_test = np.array(X_test)\n",
 89 |     "    y_test = np.array(y_test)\n",
 90 |     "    X_val = np.array(X_val)\n",
 91 |     "    y_val = np.array(y_val)\n",
 92 |     "    print X_train.shape, y_train.shape\n",
 93 |     "    print X_test.shape, y_test.shape\n",
 94 |     "    print X_val.shape, y_val.shape\n",
 95 |     "\n",
 96 |     "    max_class = np.max(y_train)\n",
 97 |     "    for m in range(0, max_class+1):\n",
 98 |     "        print 'y_train ==', m, ':', len(y_train[y_train==m])\n",
 99 |     "\n",
100 |     "    for m in range(0, max_class+1):\n",
101 |     "        print 'y_test ==', m, ':', len(y_test[y_test==m])\n",
102 |     "\n",
103 |     "    for m in range(0, max_class+1):\n",
104 |     "        print 'y_val ==', m, ':', len(y_val[y_val==m])\n",
105 |     "\n",
106 |     "    return X_train, y_train, X_test, y_test, X_val, y_val"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 4,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "def k_folds_splitter(X, k = 10, test_set = False):\n",
116 |     "    # function to return indices of validation sets in each fold and residual data \n",
117 |     "    # which will be put in validation sets of all folds (if any)\n",
118 |     "    \n",
119 |     "    no_of_subjects = X.shape[0] / no_of_clips\n",
120 |     "    no_of_subjects_per_fold = no_of_subjects / k\n",
121 |     "    print 'From:', no_of_subjects, 'subjects'\n",
122 |     "    print 'Getting:', no_of_subjects_per_fold, 'subjects per fold'\n",
123 |     "    results = []\n",
124 |     "    subject_used = []\n",
125 |     "    residuals = []\n",
126 |     "    \n",
127 |     "    if not test_set:\n",
128 |     "        for k_index in range(0, k):\n",
129 |     "            subjects = []\n",
130 |     "            for n in range(0, no_of_subjects_per_fold):\n",
131 |     "                number = random.choice(list(set(range(0, no_of_subjects))-set(subject_used)))\n",
132 |     "                subjects.append(number)\n",
133 |     "                subject_used.append(number)\n",
134 |     "            print 'Fold', k_index, ':', subjects\n",
135 |     "            results.append(subjects)\n",
136 |     "    \n",
137 |     "    residuals = [x for x in range(0, no_of_subjects) if not x in subject_used]\n",
138 |     "    print 'residuals data:', residuals\n",
139 |     "    return results, residuals"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 5,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "def normalize_features(X_train, X_test, X_val):\n",
149 |     "    # normalize all samples\n",
150 |     "    for feature_index in range(0, X_train.shape[1]):\n",
151 |     "        scaler = MinMaxScaler()\n",
152 |     "        scaler.fit(X_train[:,feature_index].reshape(-1, 1))\n",
153 |     "        X_train[:,feature_index] = scaler.transform(X_train[:,feature_index].reshape(1, -1))[0]\n",
154 |     "        X_val[:,feature_index] = scaler.transform(X_val[:,feature_index].reshape(1, -1))[0]\n",
155 |     "        \n",
156 |     "        if len(X_test) != 0:\n",
157 |     "            X_test[:,feature_index] = scaler.transform(X_test[:,feature_index].reshape(1, -1))[0]\n",
158 |     "        \n",
159 |     "    return X_train, X_test, X_val"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 6,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "def do_SVM(X_train, y_train, X_val, y_val, c=None):\n",
169 |     "    max_f1 = 0\n",
170 |     "    max_c = c\n",
171 |     "    \n",
172 |     "    for kernel in svm_kernels:\n",
173 |     "        print 'kernel:', kernel\n",
174 |     "        \n",
175 |     "        if not c:\n",
176 |     "            #find optimal c\n",
177 |     "            for p in power_of_c:\n",
178 |     "                c = pow(2, p)\n",
179 |     "                print 'C:', c\n",
180 |     "                if kernel == 'poly':\n",
181 |     "                    for degree in degrees:\n",
182 |     "                        print 'degree:', degree\n",
183 |     "                        clf = svm.SVC(kernel = kernel, C = c, degree = degree)\n",
184 |     "                        clf.fit(X_train, y_train)\n",
185 |     "                        y_pred = clf.predict(X_val)\n",
186 |     "                        print len(y_pred[y_pred==y_val]), len(y_val), len(y_pred[y_pred==y_val])*1.0 / len(y_val)\n",
187 |     "                        print y_val\n",
188 |     "                        print y_pred\n",
189 |     "                        \n",
190 |     "                        f_score = metrics.f1_score(y_val, y_pred)\n",
191 |     "                        if f_score > max_f1:\n",
192 |     "                            max_c = c\n",
193 |     "                            max_f1 = f_score\n",
194 |     "                            \n",
195 |     "                        print 'f_score:', f_score\n",
196 |     "                    \n",
197 |     "                        fpr, tpr, thresholds = metrics.roc_curve(y_val, y_pred, pos_label=2)\n",
198 |     "                        auc_value = metrics.auc(fpr, tpr)\n",
199 |     "                        print 'auc_value:', auc_value\n",
200 |     "                else:\n",
201 |     "                    clf = svm.SVC(kernel = kernel, C = c)\n",
202 |     "                    clf.fit(X_train, y_train)\n",
203 |     "                    y_pred = clf.predict(X_val)\n",
204 |     "                    print len(y_pred[y_pred==y_val]), len(y_val), len(y_pred[y_pred==y_val])*1.0 / len(y_val)\n",
205 |     "                    print y_val\n",
206 |     "                    print y_pred\n",
207 |     "                    \n",
208 |     "                    f_score = metrics.f1_score(y_val, y_pred)\n",
209 |     "                    if f_score > max_f1:\n",
210 |     "                        max_c = c\n",
211 |     "                        max_f1 = f_score\n",
212 |     "                        \n",
213 |     "                    print 'f_score:', f_score\n",
214 |     "                    \n",
215 |     "                    fpr, tpr, thresholds = metrics.roc_curve(y_val, y_pred, pos_label=2)\n",
216 |     "                    auc_value = metrics.auc(fpr, tpr)\n",
217 |     "                    print 'auc_value:', auc_value\n",
218 |     "        else:\n",
219 |     "            #already know optimal c\n",
220 |     "            print 'C:', c\n",
221 |     "            if kernel == 'poly':\n",
222 |     "                for degree in degrees:\n",
223 |     "                    print 'degree:', degree\n",
224 |     "                    clf = svm.SVC(kernel = kernel, C = c, degree = degree)\n",
225 |     "                    clf.fit(X_train, y_train)\n",
226 |     "                    y_pred = clf.predict(X_val)\n",
227 |     "                    print len(y_pred[y_pred==y_val]), len(y_val), len(y_pred[y_pred==y_val])*1.0 / len(y_val)\n",
228 |     "                    print y_val\n",
229 |     "                    print y_pred\n",
230 |     "\n",
231 |     "                    f_score = metrics.f1_score(y_val, y_pred)\n",
232 |     "                    print 'f_score:', f_score\n",
233 |     "                    \n",
234 |     "                    fpr, tpr, thresholds = metrics.roc_curve(y_val, y_pred, pos_label=2)\n",
235 |     "                    auc_value = metrics.auc(fpr, tpr)\n",
236 |     "                    print 'auc_value:', auc_value\n",
237 |     "\n",
238 |     "            else:\n",
239 |     "                clf = svm.SVC(kernel = kernel, C = c)\n",
240 |     "                clf.fit(X_train, y_train)\n",
241 |     "                y_pred = clf.predict(X_val)\n",
242 |     "                print len(y_pred[y_pred==y_val]), len(y_val), len(y_pred[y_pred==y_val])*1.0 / len(y_val)\n",
243 |     "                print y_val\n",
244 |     "                print y_pred\n",
245 |     "\n",
246 |     "                f_score = metrics.f1_score(y_val, y_pred)\n",
247 |     "                print 'f_score:', f_score\n",
248 |     "                \n",
249 |     "                fpr, tpr, thresholds = metrics.roc_curve(y_val, y_pred, pos_label=2)\n",
250 |     "                auc_value = metrics.auc(fpr, tpr)\n",
251 |     "                print 'auc_value:', auc_value\n",
252 |     "                \n",
253 |     "    return max_c"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": 35,
259 |    "metadata": {},
260 |    "outputs": [
261 |     {
262 |      "name": "stdout",
263 |      "output_type": "stream",
264 |      "text": [
265 |       "('TRAIN:', array([ 0,  1,  2,  3,  5,  6,  7,  8,  9, 11, 12, 13, 14, 15, 16, 17, 18,\n",
266 |       "       19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 31, 32, 33, 34, 35, 37, 38,\n",
267 |       "       39, 40, 41, 42]), 'TEST:', array([ 4, 10, 27, 30, 36]))\n",
268 |       "('TRAIN:', array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 12, 13, 14, 15, 16, 17,\n",
269 |       "       18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 32, 33, 34, 35, 36, 38,\n",
270 |       "       39, 40, 41, 42]), 'TEST:', array([11, 25, 28, 31, 37]))\n",
271 |       "('TRAIN:', array([ 0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,\n",
272 |       "       18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36,\n",
273 |       "       37, 38, 39, 41]), 'TEST:', array([ 2, 20, 29, 40, 42]))\n",
274 |       "('TRAIN:', array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 17, 19,\n",
275 |       "       20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,\n",
276 |       "       38, 39, 40, 41, 42]), 'TEST:', array([15, 16, 18, 22]))\n",
277 |       "('TRAIN:', array([ 0,  1,  2,  3,  4,  6,  7,  9, 10, 11, 12, 14, 15, 16, 17, 18, 19,\n",
278 |       "       20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,\n",
279 |       "       37, 39, 40, 41, 42]), 'TEST:', array([ 5,  8, 13, 38]))\n",
280 |       "('TRAIN:', array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 15, 16, 18,\n",
281 |       "       19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37,\n",
282 |       "       38, 39, 40, 41, 42]), 'TEST:', array([14, 17, 32, 35]))\n",
283 |       "('TRAIN:', array([ 0,  2,  3,  4,  5,  6,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,\n",
284 |       "       19, 20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37,\n",
285 |       "       38, 39, 40, 41, 42]), 'TEST:', array([ 1,  7, 26, 34]))\n",
286 |       "('TRAIN:', array([ 0,  1,  2,  3,  4,  5,  7,  8,  9, 10, 11, 13, 14, 15, 16, 17, 18,\n",
287 |       "       19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37,\n",
288 |       "       38, 39, 40, 41, 42]), 'TEST:', array([ 6, 12, 24, 33]))\n",
289 |       "('TRAIN:', array([ 0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 11, 12, 13, 14, 15, 16, 17,\n",
290 |       "       18, 20, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,\n",
291 |       "       38, 39, 40, 41, 42]), 'TEST:', array([ 9, 19, 21, 23]))\n",
292 |       "('TRAIN:', array([ 1,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,\n",
293 |       "       19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,\n",
294 |       "       36, 37, 38, 40, 42]), 'TEST:', array([ 0,  3, 39, 41]))\n"
295 |      ]
296 |     }
297 |    ],
298 |    "source": [
299 |     "kf = model_selection.KFold(n_splits=10, shuffle=True, random_state=0)\n",
300 |     "X = np.load('../data/DatAllSbj/EEG_' + feature_type + '_features.npy')\n",
301 |     "X = X.reshape(43,15,32)\n",
302 |     "\n",
303 |     "for train_index, test_index in kf.split(X):\n",
304 |     "    print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n",
305 |     "    X_train, X_test = X[train_index], X[test_index]\n",
306 |     "#     y_train, y_test = y[train_index], y[test_index]"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "code",
311 |    "execution_count": 7,
312 |    "metadata": {},
313 |    "outputs": [
314 |     {
315 |      "name": "stdout",
316 |      "output_type": "stream",
317 |      "text": [
318 |       "=== feature_type: with_ICA ===\n",
319 |       "From: 43 subjects\n",
320 |       "Getting: 4 subjects per fold\n",
321 |       "Fold 0 : [10, 3, 30, 33]\n",
322 |       "Fold 1 : [2, 29, 31, 24]\n",
323 |       "Fold 2 : [42, 19, 15, 40]\n",
324 |       "Fold 3 : [35, 0, 5, 22]\n",
325 |       "Fold 4 : [28, 16, 13, 41]\n",
326 |       "Fold 5 : [17, 39, 27, 18]\n",
327 |       "Fold 6 : [14, 32, 11, 37]\n",
328 |       "Fold 7 : [6, 4, 36, 26]\n",
329 |       "Fold 8 : [38, 9, 23, 12]\n",
330 |       "Fold 9 : [8, 1, 34, 25]\n",
331 |       "residuals data: [7, 20, 21]\n",
332 |       "=== feature_type: without_ICA ===\n"
333 |      ]
334 |     }
335 |    ],
336 |    "source": [
337 |     "# split dataset by leaving group of subjects out (k-fold)\n",
338 |     "all_folds_data = None\n",
339 |     "c = None\n",
340 |     "\n",
341 |     "for feature_type in feature_types:\n",
342 |     "    print '=== feature_type:', feature_type, '==='\n",
343 |     "    X = np.load('../data/DatAllSbj/EEG_' + feature_type + '_features.npy')\n",
344 |     "    \n",
345 |     "    if not all_folds_data:\n",
346 |     "        all_folds_data, residuals_data = k_folds_splitter(X, k = 10)\n",
347 |     "    \n",
348 |     "    ## add other features to X here !\n",
349 |     "    '''\n",
350 |     "    for class_type in class_types:\n",
351 |     "        print '## class_type:', class_type\n",
352 |     "        \n",
353 |     "        for emotion in emotions:\n",
354 |     "            print '>> Emotion:', emotion\n",
355 |     "            label = np.load('../data/DatAllSbj/result_' + emotion + '_' + class_type + '.npz')\n",
356 |     "            y = label['y']\n",
357 |     "            print 'threshold:', label['threshold']\n",
358 |     "            print X.shape\n",
359 |     "            print y.shape\n",
360 |     "\n",
361 |     "            max_class = np.max(y)\n",
362 |     "            for m in range(0, max_class+1):\n",
363 |     "                print 'y ==', m, ':', len(y[y==m])\n",
364 |     "            \n",
365 |     "            X_train, y_train, X_test, y_test, X_val, y_val = leave_one_video_out(X, y)\n",
366 |     "            X_train, X_test, X_val = normalize_features(X_train, X_test, X_val)\n",
367 |     "            \n",
368 |     "            c = do_SVM(X_train, y_train, X_val, y_val, c)\n",
369 |     "    print\n",
370 |     "    '''"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "code",
375 |    "execution_count": 17,
376 |    "metadata": {
377 |     "scrolled": true
378 |    },
379 |    "outputs": [],
380 |    "source": [
381 |     "# # split dataset by leaving one video out (do one fold only)\n",
382 |     "# for feature_type in feature_types:\n",
383 |     "#     print '=== feature_type:', feature_type, '==='\n",
384 |     "#     X = np.load('../data/DatAllSbj/EEG_' + feature_type + '_features.npy')\n",
385 |     "    \n",
386 |     "#     ## add other features to X here !\n",
387 |     "    \n",
388 |     "#     for class_type in class_types:\n",
389 |     "#         print '## class_type:', class_type\n",
390 |     "        \n",
391 |     "#         for emotion in emotions:\n",
392 |     "#             print '>> Emotion:', emotion\n",
393 |     "#             label = np.load('../data/DatAllSbj/result_' + emotion + '_' + class_type + '.npz')\n",
394 |     "#             y = label['y']\n",
395 |     "#             print 'threshold:', label['threshold']\n",
396 |     "#             print X.shape\n",
397 |     "#             print y.shape\n",
398 |     "\n",
399 |     "#             max_class = np.max(y)\n",
400 |     "#             for m in range(0, max_class+1):\n",
401 |     "#                 print 'y ==', m, ':', len(y[y==m])\n",
402 |     "            \n",
403 |     "#             X_train, y_train, X_test, y_test, X_val, y_val = leave_one_video_out(X, y)\n",
404 |     "#             X_train, X_test, X_val = normalize_features(X_train, X_test, X_val)\n",
405 |     "            \n",
406 |     "#             do_SVM(X_train, y_train, X_val, y_val)\n",
407 |     "#     print"
408 |    ]
409 |   },
410 |   {
411 |    "cell_type": "code",
412 |    "execution_count": 9,
413 |    "metadata": {},
414 |    "outputs": [],
415 |    "source": [
416 |     "# split by Random\n",
417 |     "# # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
418 |     "# spl = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)   \n",
419 |     "\n",
420 |     "# for train_index, test_index in spl.split(X, y):\n",
421 |     "#     X_train, X_test = X[train_index], X[test_index]\n",
422 |     "#     y_train, y_test = y[train_index], y[test_index]\n",
423 |     "#     print y_train\n",
424 |     "#     print y_test\n",
425 |     "    \n",
426 |     "# spl = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0)   \n",
427 |     "\n",
428 |     "# for train_index, test_index in spl.split(X_test, y_test):\n",
429 |     "#     X_test, X_val = X[train_index], X[test_index]\n",
430 |     "#     y_test, y_val = y[train_index], y[test_index]\n",
431 |     "#     print y_test\n",
432 |     "#     print y_val\n",
433 |     "    \n",
434 |     "# print X_train.shape\n",
435 |     "# print y_train.shape\n",
436 |     "# print X_test.shape\n",
437 |     "# print y_test.shape\n",
438 |     "# print len(y[y==0]), len(y[y==1]), len(y[y==2])\n",
439 |     "# print len(y_train[y_train==0]), len(y_train[y_train==1]), len(y_train[y_train==2])\n",
440 |     "# print len(y_test[y_test==0]), len(y_test[y_test==1]), len(y_test[y_test==2])\n",
441 |     "# print len(y_val[y_val==0]), len(y_val[y_val==1]), len(y_val[y_val==2])"
442 |    ]
443 |   },
444 |   {
445 |    "cell_type": "code",
446 |    "execution_count": 10,
447 |    "metadata": {},
448 |    "outputs": [],
449 |    "source": [
450 |     "# ## set model params\n",
451 |     "# batch_size = 256\n",
452 |     "# epochs = 1000\n",
453 |     "# act ='tanh'\n",
454 |     "# drop_rate=0.4\n",
455 |     "# lr = 0.0001\n",
456 |     "# rho = 0.9\n",
457 |     "# monitor='val_loss'\n",
458 |     "# factor=0.5\n",
459 |     "# patience=10\n",
460 |     "# min_lr=0.00001\n",
461 |     "# num_classes = 2"
462 |    ]
463 |   },
464 |   {
465 |    "cell_type": "code",
466 |    "execution_count": 11,
467 |    "metadata": {},
468 |    "outputs": [],
469 |    "source": [
470 |     "# model = Sequential()\n",
471 |     "# # model.add(Dense(1024 , activation=act))\n",
472 |     "# # model.add(Dense(512 , activation=act))\n",
473 |     "# # model.add(Dense(256 , activation=act))\n",
474 |     "# model.add(Dense(1024 , activation=act, input_shape=(X_train.shape[1],)))\n",
475 |     "# #model.add(Dense(2048 , activation=act))\n",
476 |     "# model.add(Dense(512 , activation=act))\n",
477 |     "# model.add(Dense(256 , activation=act))\n",
478 |     "# model.add(Dense(128 , activation=act))\n",
479 |     "# model.add(Dense(64 , activation=act))\n",
480 |     "# model.add(Dense(32 , activation=act))\n",
481 |     "# model.add(Dense(16 , activation=act))\n",
482 |     "# model.add(Dense(8 , activation=act))\n",
483 |     "# model.add(Dense(4 , activation=act))\n",
484 |     "# model.add(Dense(num_classes , activation='softmax'))\n",
485 |     "# model.summary()"
486 |    ]
487 |   },
488 |   {
489 |    "cell_type": "code",
490 |    "execution_count": 12,
491 |    "metadata": {},
492 |    "outputs": [],
493 |    "source": [
494 |     "# def log(*msg):\n",
495 |     "#     print msg"
496 |    ]
497 |   },
498 |   {
499 |    "cell_type": "code",
500 |    "execution_count": 13,
501 |    "metadata": {},
502 |    "outputs": [],
503 |    "source": [
504 |     "# rmsprop = keras.optimizers.RMSprop(lr=lr, rho=rho, epsilon=None, decay=0.0)\n",
505 |     "# log('lr: ' + str(lr) + ', rho: ' + str(rho))\n",
506 |     "\n",
507 |     "# model.compile(loss='sparse_categorical_crossentropy', optimizer=rmsprop, metrics=['accuracy'])\n",
508 |     "\n",
509 |     "# datetimestr = datetime.now().strftime('%Y-%m-%d_%H-%M')\n",
510 |     "# directory = './DNN-weight/logs'\n",
511 |     "# if not os.path.exists(directory):\n",
512 |     "#     os.makedirs(directory)\n",
513 |     "#     log('Created: ' + directory)\n",
514 |     "    \n",
515 |     "# path = './DNN-weight/' + datetimestr\n",
516 |     "# if not os.path.exists(path):\n",
517 |     "#     os.makedirs(path)    \n",
518 |     "#     log('Created: ' + path)\n",
519 |     "# log('=============================================================\\n')\n",
520 |     "\n",
521 |     "\n",
522 |     "# file_name = 'train'\n",
523 |     "# filepath = path + \"/weights-v-\" + datetimestr + \"-{epoch:02d}.hdf5\"\n",
524 |     "# checkpointer = ModelCheckpoint(filepath, monitor=monitor, verbose=1, save_best_only=True)\n",
525 |     "# csv_logger = CSVLogger(directory + '/' + file_name + '_latest' + datetimestr + '.csv')\n",
526 |     "# reduce_lr = ReduceLROnPlateau(monitor=monitor, factor=factor, patience=patience, min_lr=min_lr)\n",
527 |     "\n",
528 |     "\n",
529 |     "# # In[ ]:\n",
530 |     "\n",
531 |     "\n",
532 |     "# from sklearn.utils import class_weight\n",
533 |     "# class_weight = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)\n"
534 |    ]
535 |   },
536 |   {
537 |    "cell_type": "code",
538 |    "execution_count": 14,
539 |    "metadata": {},
540 |    "outputs": [],
541 |    "source": [
542 |     "# hist = model.fit(np.array(X_train), np.array(y_train), batch_size=batch_size, epochs=epochs, shuffle=True,\n",
543 |     "#                  validation_data=(np.array(X_val),np.array(y_val)), callbacks= [checkpointer,csv_logger,reduce_lr])"
544 |    ]
545 |   },
546 |   {
547 |    "cell_type": "code",
548 |    "execution_count": 15,
549 |    "metadata": {},
550 |    "outputs": [],
551 |    "source": [
552 |     "# print y_test\n",
553 |     "# y_test = np.array(y_test)\n",
554 |     "# model = load_model('DNN-weight/2018-07-29_23-35/weights-v-2018-07-29_23-35-470.hdf5')\n",
555 |     "# y_pred = model.predict(X_test)\n",
556 |     "\n",
557 |     "# pr = np.array([np.argmax(x) for x in y_pred])\n",
558 |     "# print pr==y_test\n",
559 |     "# print len(pr[pr==y_test]) * 1.0 / len(pr)"
560 |    ]
561 |   },
562 |   {
563 |    "cell_type": "code",
564 |    "execution_count": 16,
565 |    "metadata": {},
566 |    "outputs": [],
567 |    "source": [
568 |     "# print len(y_pred[y_pred==y_test])*1.0 / len(y_test)\n",
569 |     "# print y_test\n",
570 |     "# print\n",
571 |     "# print y_pred"
572 |    ]
573 |   }
574 |  ],
575 |  "metadata": {
576 |   "kernelspec": {
577 |    "display_name": "Python 2",
578 |    "language": "python",
579 |    "name": "python2"
580 |   },
581 |   "language_info": {
582 |    "codemirror_mode": {
583 |     "name": "ipython",
584 |     "version": 2
585 |    },
586 |    "file_extension": ".py",
587 |    "mimetype": "text/x-python",
588 |    "name": "python",
589 |    "nbconvert_exporter": "python",
590 |    "pygments_lexer": "ipython2",
591 |    "version": "2.7.12"
592 |   }
593 |  },
594 |  "nbformat": 4,
595 |  "nbformat_minor": 2
596 | }
597 | 


--------------------------------------------------------------------------------
/src/.ipynb_checkpoints/EEGPreprocessing-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import os\n",
 11 |     "from scipy import signal\n",
 12 | <<<<<<< HEAD
 13 | =======
 14 |     "# from geometric_median import geometric_median\n",
 15 | >>>>>>> 3e145c934804827d50575ca22276f140f99e941a
 16 |     "import mne\n",
 17 |     "from mne.preprocessing.ica import corrmap\n",
 18 |     "from mne.preprocessing import ICA\n",
 19 |     "from matplotlib.pyplot import savefig\n",
 20 |     "import pandas as pd\n",
 21 | <<<<<<< HEAD
 22 | =======
 23 |     "from shutil import copyfile\n",
 24 | >>>>>>> 3e145c934804827d50575ca22276f140f99e941a
 25 |     "import datetime"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "class ICA:\n",
 35 |     "    \n",
 36 |     "    def __init__(self, channel_list, sampling_rate, log_filename):\n",
 37 |     "        self.__ch_names = channel_list\n",
 38 |     "        self.__sampling_rate = sampling_rate\n",
 39 |     "        self.__no_of_channels = len(self.__ch_names)\n",
 40 |     "        \n",
 41 |     "        # set constant\n",
 42 |     "        ## ['Fp1', 'Fp2', 'Fz', 'Cz', 'T3', 'T4', 'Pz', 'Oz']\n",
 43 |     "        ch_f = [\"Fz\"]\n",
 44 |     "        ch_cp = [\"Cz\"]\n",
 45 |     "        ch_t = ['T3', 'T4']\n",
 46 |     "        ch_op = [\"Oz\", 'Pz']\n",
 47 |     "        ch_fp = ['Fp1', 'Fp2']\n",
 48 |     "\n",
 49 |     "        self.__chs_dict = {'Frontal':ch_f,\n",
 50 |     "                    'CP':ch_cp,\n",
 51 |     "                    'Temporal':ch_t,\n",
 52 |     "                    'OP':ch_op,\n",
 53 |     "                    'FP':ch_fp\n",
 54 |     "                    }\n",
 55 |     "        self.__info = mne.create_info(self.__ch_names, self.__sampling_rate, ch_types=[\"eeg\"] * self.__no_of_channels)\n",
 56 |     "        self.__log_filename = log_filename\n",
 57 |     "        \n",
 58 |     "        try:\n",
 59 |     "            self.__exclude_df = pd.read_csv(self.__log_filename)\n",
 60 |     "        except:\n",
 61 |     "            print ('Cannot read from', self.__log_filename)\n",
 62 |     "\n",
 63 |     "    \n",
 64 |     "    #find min max of each channel in ica components \n",
 65 |     "    #imported from: Karis Matchaparn\n",
 66 |     "    def find_exclude_ica(self, ica_dict, channel, focus_part):\n",
 67 |     "        exclude_list = [] \n",
 68 |     "#         print(channel.keys())\n",
 69 |     "\n",
 70 |     "        for ica_idx in range(len(ica_dict)): # get into each component\n",
 71 |     "\n",
 72 |     "            #find summation of each brain part\n",
 73 |     "            sum_chs = {}\n",
 74 |     "            for brain_part in channel.keys(): # loop for each brain part\n",
 75 |     "                sum_buffer = 0\n",
 76 |     "\n",
 77 |     "                for chs in channel[brain_part]: #loop for each channel in brain part\n",
 78 |     "#                     print(\"---\",chs, ica_dict[ica_idx][chs])\n",
 79 |     "                    sum_buffer += ica_dict[ica_idx][chs]\n",
 80 |     "#                 print(\"x\"*20, brain_part)\n",
 81 |     "                sum_chs[brain_part] = sum_buffer\n",
 82 |     "\n",
 83 |     "            # **************************  \n",
 84 |     "#             print(\"----------\")\n",
 85 |     "#             print(ica_idx,sum_chs)\n",
 86 |     "#             print(\"----------\")\n",
 87 |     "            for chs in sum_chs.keys():\n",
 88 |     "                if sum_chs[focus_part] < sum_chs[chs]:\n",
 89 |     "                    exclude_list.append(ica_idx)\n",
 90 |     "                    break\n",
 91 |     "\n",
 92 |     "        print(exclude_list)\n",
 93 |     "        return exclude_list\n",
 94 |     "    \n",
 95 |     "    def label_exclude_list(self, eeg, subj_id, clip_id):\n",
 96 |     "        raw = mne.io.RawArray(eeg, self.__info)\n",
 97 |     "        raw.set_montage(mne.channels.read_montage(\"standard_1020\"))\n",
 98 |     "        raw_tmp = raw.copy()\n",
 99 |     "        \n",
100 |     "        ica = mne.preprocessing.ICA(method=\"extended-infomax\", random_state=1)\n",
101 |     "        ica.fit(raw_tmp)       \n",
102 |     "\n",
103 |     "        ica.plot_components(inst=raw_tmp)\n",
104 |     "        ica.plot_sources(raw_tmp)\n",
105 |     "        \n",
106 |     "        complete = False\n",
107 |     "        while not complete:\n",
108 |     "            try:\n",
109 |     "                exc = input(\"Which components do you want to delete? (0-7 or -1 if no component) (put , between each component no.): \")\n",
110 |     "                if type(exc) == int:\n",
111 |     "                    exclude_list = [exc]\n",
112 |     "                    if exc < -1 or exc > 7:\n",
113 |     "                        raise Exception\n",
114 |     "                else:\n",
115 |     "                    exclude_list = [int(x) for x in exc.split(',')]\n",
116 |     "                    for x in exclude_list:\n",
117 |     "                        if x < -1 or x > 7 :\n",
118 |     "                            raise Exception\n",
119 |     "\n",
120 |     "                self.__log(subj_id, clip_id, '|'.join([str(x) for x in exclude_list]))\n",
121 |     "                complete = True\n",
122 |     "            except:\n",
123 |     "                print ('Error: Please try to input component no. again (-1 to 7 only)')\n",
124 |     "                complete = False\n",
125 |     "        \n",
126 |     "        print(\"Component to delete =\", exclude_list)\n",
127 |     "        \n",
128 |     "    \n",
129 |     "    def remove_exclude_components(self, eeg, subj_id, clip_id):\n",
130 |     "        raw = mne.io.RawArray(eeg, self.__info)\n",
131 |     "        raw.set_montage(mne.channels.read_montage(\"standard_1020\"))\n",
132 |     "\n",
133 |     "        raw_tmp = raw.copy()\n",
134 |     "#         raw_tmp.filter(1, None, fir_design=\"firwin\")\n",
135 |     "        \n",
136 |     "        ica = mne.preprocessing.ICA(method=\"extended-infomax\", random_state=1)\n",
137 |     "        ica.fit(raw_tmp)\n",
138 |     "        \n",
139 |     "        df = self.__exclude_df\n",
140 |     "        d = df.loc[(df['subj_id'] == subj_id) & (df[' clip_id'] == clip_id)]\n",
141 |     "        exc = d[' exclude'].values[0]\n",
142 |     "        exclude_list = [int(e) for e in exc.split('|')]\n",
143 |     "        \n",
144 |     "        if -1 in exclude_list:\n",
145 |     "            print('No component to delete.')\n",
146 |     "            return np.array(raw_tmp[:])[0]\n",
147 |     "        else:\n",
148 |     "            print(\"Delete components :\", exclude_list)\n",
149 |     "            ica.exclude = exclude_list #select components to exclude\n",
150 |     "\n",
151 |     "            raw_corrected = raw.copy()\n",
152 |     "            ica.apply(raw_corrected)\n",
153 |     "            \n",
154 |     "            result = np.array(raw_corrected[:])[0] #get data after ica\n",
155 |     "            return result\n",
156 |     "    \n",
157 |     "    \n",
158 |     "    def __log(self, *msg):\n",
159 |     "        ## save subject id, clip id, list of excluding components (concatenated with |)\n",
160 |     "        st = ''\n",
161 |     "        for m in msg:\n",
162 |     "            st += str(m) + ', '\n",
163 |     "        f = open(self.__log_filename, \"a\")\n",
164 |     "        f.write(st + '\\n') \n",
165 |     "        print(msg)\n",
166 |     "        f.close()\n",
167 |     "        \n",
168 |     "        now = datetime.datetime.now()\n",
169 |     "        copyfile(self.__log_filename, './log-exclude-list/ica_exclude_part'+str(now)+'.csv')"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "class EEGPreprocessing:\n",
179 |     "    \n",
180 |     "    def __init__(self):\n",
181 |     "        self.a = 6\n",
182 | <<<<<<< HEAD
183 |     "        self.__input_path = '../data/EEG/'\n",
184 | =======
185 |     "        self.__input_path = '../../work/'\n",
186 | >>>>>>> 3e145c934804827d50575ca22276f140f99e941a
187 |     "        self.__emotions = ['Happiness', 'Fear', 'Excitement', \\\n",
188 |     "                           'Arousal', 'Valence', 'Reward']\n",
189 |     "        self.__band_list = [{ 'name': 'theta', 'low': 3, 'high': 7}, \n",
190 |     "                            { 'name': 'alpha', 'low': 8, 'high': 13}, \n",
191 |     "                            { 'name': 'beta', 'low': 14, 'high': 29},\n",
192 |     "                            { 'name': 'gamma', 'low': 30, 'high': 47},\n",
193 |     "#                            { 'name': 'all', 'low': 4, 'high': 47}\n",
194 |     "                           ]\n",
195 |     "        self.__channel_list = ['Fp1', 'Fp2', 'Fz', 'Cz', 'T3', 'T4', 'Pz', 'Oz']\n",
196 |     "        self.__log_filename = './ica_exclude_part.csv'\n",
197 |     "\n",
198 |     "            \n",
199 |     "    def read_signals(self):\n",
200 | <<<<<<< HEAD
201 |     "        self.__data = np.load(os.path.join(self.__input_path, 'raw/EEG.npy'))\n",
202 | =======
203 |     "        self.__data = np.load(os.path.join(self.__input_path, 'EEG.npy'))\n",
204 | >>>>>>> 3e145c934804827d50575ca22276f140f99e941a
205 |     "        self.get_input_data_info()\n",
206 |     "        \n",
207 |     "    \n",
208 |     "    def set_labels(self, thresholds = None, emotions = None):\n",
209 | <<<<<<< HEAD
210 |     "        self.__label = np.load(os.path.join(self.__input_path, 'raw/result.npy'))\n",
211 | =======
212 |     "        self.__label = np.load(os.path.join(self.__input_path, 'result.npy'))\n",
213 | >>>>>>> 3e145c934804827d50575ca22276f140f99e941a
214 |     "        \n",
215 |     "        if thresholds and len(thresholds) == 1:\n",
216 |     "            above_thres = self.__label > thresholds[0]\n",
217 |     "            below_thres = self.__label <= thresholds[0]\n",
218 |     "            self.__label[above_thres] = 1\n",
219 |     "            self.__label[below_thres] = 0\n",
220 |     "\n",
221 |     "            for i, emo in enumerate(self.__emotions):\n",
222 |     "                label_res = self.__label[:,:,i].reshape(1, -1)\n",
223 |     "                label_res = np.array(label_res[0])\n",
224 |     "\n",
225 |     "                print ('save results:', emo, label_res.shape)\n",
226 |     "                print (len(label_res[label_res==0]), len(label_res[label_res==1]) )\n",
227 |     "                print (label_res)\n",
228 |     "                print  ()         \n",
229 | <<<<<<< HEAD
230 |     "                np.savez(os.path.join('../data/score/label/result_' + emo + '_binclass'), \n",
231 | =======
232 |     "                np.savez(os.path.join(self.__input_path, 'result_' + emo + '_binclass'), \n",
233 | >>>>>>> 3e145c934804827d50575ca22276f140f99e941a
234 |     "                     y = label_res, \n",
235 |     "                     threshold = thresholds)\n",
236 |     "                \n",
237 |     "        elif thresholds:\n",
238 |     "\n",
239 |     "            for i, emo in enumerate(self.__emotions):\n",
240 |     "                if emo in emotions:\n",
241 |     "                    label_res = self.__label[:,:,i].reshape(1, -1)\n",
242 |     "                    label_res = label_res[0]\n",
243 |     "                    print (label_res)\n",
244 |     "                    label_res[label_res < thresholds[0]] = 0\n",
245 |     "                    label_res[((label_res >= thresholds[0]) & (label_res < thresholds[1]))] = 1\n",
246 |     "                    label_res[label_res >= thresholds[1]] = 2\n",
247 |     "\n",
248 |     "\n",
249 |     "                    print ('save results:', emo, label_res.shape)\n",
250 |     "                    print (label_res)\n",
251 |     "                    \n",
252 |     "                    for i in range(0, 3):\n",
253 |     "                        print( i, len(label_res[label_res==i]))\n",
254 |     "                    print()\n",
255 |     "                    \n",
256 | <<<<<<< HEAD
257 |     "                    np.savez(os.path.join('../data/score/label/result_' + emo + '_3class'), \n",
258 | =======
259 |     "                    np.savez(os.path.join(self.__input_path, 'result_' + emo + '_3class'), \n",
260 | >>>>>>> 3e145c934804827d50575ca22276f140f99e941a
261 |     "                         y = label_res, \n",
262 |     "                         threshold = thresholds)\n",
263 |     "            \n",
264 |     "        else:\n",
265 |     "            thresholds = range(1, 10)\n",
266 |     "            for i, emo in enumerate(self.__emotions):\n",
267 |     "                print ('Finding threshold for', emo)\n",
268 |     "                min_diff = 300\n",
269 |     "                best_threshold = -1\n",
270 |     "                used_labels = None\n",
271 |     "                \n",
272 |     "                for current_thres in thresholds:\n",
273 |     "                    labels = np.copy(self.__label)\n",
274 |     "                    above_thres = labels > current_thres\n",
275 |     "                    below_thres = labels <= current_thres\n",
276 |     "                    labels[above_thres] = 1\n",
277 |     "                    labels[below_thres] = 0\n",
278 |     "                    label_res = labels[:,:,i].reshape(1, -1)\n",
279 |     "                    label_res = np.array(label_res[0])\n",
280 |     "                    \n",
281 |     "                    diff = abs(len(label_res[label_res==0]) - len(label_res[label_res==1]))\n",
282 |     "                    print ('diff', diff)\n",
283 |     "                    if diff < min_diff:\n",
284 |     "                        used_labels = np.copy(label_res)\n",
285 |     "                        best_threshold = current_thres\n",
286 |     "                        min_diff = diff\n",
287 |     "                        \n",
288 |     "                print (\"Threshold = \", best_threshold, min_diff )\n",
289 |     "                print (len(used_labels[used_labels==0]), len(used_labels[used_labels==1]) )\n",
290 |     "\n",
291 |     "                print ('save results:', emo, used_labels.shape)\n",
292 |     "                print (used_labels)\n",
293 |     "                print()\n",
294 |     "                np.savez(os.path.join(self.__input_path, 'result_' + emo + '_binclass'), \n",
295 |     "                         y = used_labels, \n",
296 |     "                         threshold = best_threshold)\n",
297 |     "\n",
298 |     "        \n",
299 |     "    def get_input_data_info(self):\n",
300 |     "        shp = self.__data.shape\n",
301 |     "        self.__no_of_subj, self.__no_of_clips, self.__no_of_channels, self.__no_of_sampling = shp\n",
302 |     "        self.__sampling_rate = self.__no_of_sampling / 56\n",
303 |     "        \n",
304 |     "        print ('====== input data ======')\n",
305 |     "        print ('No. of subject:', self.__no_of_subj)\n",
306 |     "        print ('No. of clips:', self.__no_of_clips)\n",
307 |     "        print ('No. of channels:', self.__no_of_channels)\n",
308 |     "        print ('No. of points (time series data):', self.__no_of_sampling)\n",
309 |     "        print ('Samping rate:', self.__sampling_rate)\n",
310 |     "        print()\n",
311 |     "        \n",
312 |     "    def __get_already_input_txt(self):\n",
313 |     "        try:\n",
314 |     "            f = open(self.__log_filename, 'r') \n",
315 |     "            results = []\n",
316 |     "            i = 0\n",
317 |     "            for line in f.readlines():\n",
318 |     "                spl = line.split(' ')\n",
319 |     "                results.append([int(spl[0]), int(spl[1])])\n",
320 |     "            print (results)\n",
321 |     "            return results\n",
322 |     "        except:\n",
323 |     "            print ('No file')\n",
324 |     "            return []\n",
325 |     "        \n",
326 |     "    def __get_already_input_csv(self):\n",
327 |     "        try:\n",
328 |     "            df = pd.read_csv(self.__log_filename)\n",
329 |     "            arr = np.array(df)\n",
330 |     "            return [tuple(a) for a in arr[:,0:2]]\n",
331 |     "        except:\n",
332 |     "            print ('No file')\n",
333 |     "            return []\n",
334 |     "        \n",
335 |     "        \n",
336 |     "    def preprocessing(self):\n",
337 |     "        print ('====== Preprocessing ======')\n",
338 |     "        self.__data_prep = np.zeros((self.__no_of_subj*self.__no_of_clips, \\\n",
339 |     "                                     self.__no_of_channels, len(self.__band_list), self.__no_of_sampling))\n",
340 |     "        self.__info = mne.create_info(ch_names = self.__channel_list,\n",
341 |     "                   sfreq = self.__sampling_rate,\n",
342 |     "                   ch_types = 'eeg')\n",
343 |     "        \n",
344 |     "        self.__ica = ICA(self.__channel_list, self.__sampling_rate, self.__log_filename)\n",
345 |     "        \n",
346 |     "        rerun = input(\"Do you want to re-run all? (y/n): \")\n",
347 |     "        if rerun.lower() == 'n':\n",
348 |     "            already_done = self.__get_already_input_csv()\n",
349 |     "        else:\n",
350 |     "            rerun = input(\"Are you sure that you want to re-run all? (y/n): \")\n",
351 |     "            if rerun.lower() == 'y':\n",
352 |     "                already_done = []\n",
353 |     "\n",
354 |     "                try:\n",
355 |     "                    os.remove(self.__log_filename)\n",
356 |     "                except OSError:\n",
357 |     "                    pass\n",
358 |     "\n",
359 |     "                f = open(self.__log_filename, \"a\")\n",
360 |     "                f.write('subj_id, clip_id, exclude,\\n') \n",
361 |     "                f.close()\n",
362 |     "            else:\n",
363 |     "                print ('Continue using previous data..')\n",
364 |     "            \n",
365 |     "        index = 0\n",
366 |     "        for subject_id, data in enumerate(self.__data):\n",
367 |     "            #each subject \n",
368 |     "            for clip_id, dt in enumerate(data):\n",
369 |     "                #each clip\n",
370 |     "                print ('\\nPreprocessing: subject_id =', subject_id, 'clip_id =', clip_id )\n",
371 |     "                raw = mne.io.RawArray(data = dt, info = self.__info)\n",
372 |     "                \n",
373 |     "                # with CAR\n",
374 |     "                after_car = self.__calculate_CAR(raw)\n",
375 |     "                # without CAR\n",
376 |     "#                 after_car = dt\n",
377 |     "#                 print('NO CAR')\n",
378 |     "                \n",
379 |     "                if len(already_done) < self.__no_of_subj*self.__no_of_clips:\n",
380 |     "                    print( 'Continue labelling..')\n",
381 |     "                    if (subject_id, clip_id) in already_done:\n",
382 |     "                        print ('already_done', (subject_id, clip_id))\n",
383 |     "                        continue\n",
384 |     "                    else:\n",
385 |     "                        self.__ica.label_exclude_list(after_car, subject_id, clip_id)\n",
386 |     "                        continue\n",
387 |     "\n",
388 |     "                else:\n",
389 |     "                    print ('All data is labeled for ICA: Actual removing components from saved csv..')\n",
390 |     "                    ## with ICA                \n",
391 |     "                    after_ica = self.__ica.remove_exclude_components(after_car, subject_id, clip_id)\n",
392 |     "                    ## without ICA\n",
393 |     "#                     after_ica = after_car # without ICA\n",
394 |     "#                     print('NO ICA')\n",
395 |     "\n",
396 |     "                    del after_car\n",
397 |     "\n",
398 |     "                    for channel_id, ch in enumerate(after_ica):\n",
399 |     "                        #each channel\n",
400 |     "\n",
401 |     "                        #! already done notch at 50 Hz\n",
402 |     "                        #! asr = self.__calculate_ASR(bands_data) -> cannot do this\n",
403 |     "\n",
404 |     "                        index = self.__no_of_clips*subject_id + clip_id\n",
405 |     "                        #print 'index', index, self.__no_of_clips, subject_id, clip_id, channel_id\n",
406 |     "                        self.__data_prep[index, channel_id, :, :] = self.__bands_filter(ch)\n",
407 |     "                    \n",
408 |     "        return self.__data_prep\n",
409 |     "        \n",
410 |     "        \n",
411 |     "    def __bands_filter(self, data):\n",
412 |     "        results = np.zeros(shape = (len(self.__band_list), len(data)))\n",
413 |     "\n",
414 |     "        for i, band in enumerate(self.__band_list):\n",
415 |     "            results[i] = self.__bandpass_filter(data, band['low'], band['high'])\n",
416 |     "        \n",
417 |     "        return results\n",
418 |     "            \n",
419 |     "    def __bandpass_filter(self, data, low, high):\n",
420 |     "        nyq = 0.5 * self.__sampling_rate\n",
421 |     "        low = low / nyq\n",
422 |     "        high = high / nyq\n",
423 |     "        order = 2\n",
424 |     "        b, a = signal.butter(order, [low, high], btype='band')\n",
425 |     "        filtered = signal.lfilter(b, a, data)\n",
426 |     "\n",
427 |     "        return filtered\n",
428 |     "    \n",
429 |     "    def __calculate_ASR(self, data):\n",
430 |     "        geo = geometric_median(data)\n",
431 |     "        print (data)\n",
432 |     "        print (geo.shape, geo)\n",
433 |     "        print ()\n",
434 |     "    \n",
435 |     "    def __calculate_CAR(self, raw):\n",
436 |     "        #calculate CAR from all channels of one freq band in one clip \n",
437 |     "        raw_car, _ = mne.set_eeg_reference(raw, 'average', projection=True) #Bad EEG channels are automatically excluded if they are properly set in info['bads']\n",
438 |     "        applied = raw_car.apply_proj()\n",
439 |     "        car_npy = applied.get_data()\n",
440 |     "        \n",
441 |     "        return car_npy\n",
442 |     "    \n",
443 |     "    def save_to_numpy(self, result):\n",
444 | <<<<<<< HEAD
445 |     "        np.save(os.path.join(self.__input_path, 'preprocessed/EEG_ICA.npy'), result)\n"
446 | =======
447 |     "        np.save(os.path.join(self.__input_path, 'EEG_preprocessed_with_ICA.npy'), result)\n"
448 | >>>>>>> 3e145c934804827d50575ca22276f140f99e941a
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "code",
453 |    "execution_count": null,
454 |    "metadata": {
455 |     "scrolled": false
456 |    },
457 |    "outputs": [],
458 |    "source": [
459 |     "eegPreprocessing = EEGPreprocessing()\n",
460 |     "eegPreprocessing.read_signals()\n",
461 |     "result = eegPreprocessing.preprocessing()\n",
462 |     "print (result.shape)\n",
463 |     "print (result)"
464 |    ]
465 |   },
466 |   {
467 |    "cell_type": "code",
468 |    "execution_count": null,
469 |    "metadata": {},
470 |    "outputs": [],
471 |    "source": [
472 |     "eegPreprocessing.save_to_numpy(result)"
473 |    ]
474 | <<<<<<< HEAD
475 | =======
476 |   },
477 |   {
478 |    "cell_type": "code",
479 |    "execution_count": null,
480 |    "metadata": {},
481 |    "outputs": [],
482 |    "source": [
483 |     "# eegPreprocessing.set_labels(thresholds = None)"
484 |    ]
485 |   },
486 |   {
487 |    "cell_type": "code",
488 |    "execution_count": null,
489 |    "metadata": {},
490 |    "outputs": [],
491 |    "source": [
492 |     "#note: Happiness -> thresholds = [2,5], DNN-Acc = 89%\n",
493 |     "#note: Fear -> thresholds = [], DNN-Acc = %\n",
494 |     "#note: Excitement -> thresholds = [3,5], DNN-Acc = 100%\n"
495 |    ]
496 |   },
497 |   {
498 |    "cell_type": "code",
499 |    "execution_count": null,
500 |    "metadata": {},
501 |    "outputs": [],
502 |    "source": [
503 |     "# #unused\n",
504 |     "# def __calculate_ICA(self, data):\n",
505 |     "#     #ICA : remove EOG artifact from frontal part of a brain\n",
506 |     "#     raw = mne.io.RawArray(data = data, info = self.__info)\n",
507 |     "\n",
508 |     "#     method = 'fastica'\n",
509 |     "\n",
510 |     "#     # Choose other parameters\n",
511 |     "#     n_components = 5  # if float, select n_components by explained variance of PCA\n",
512 |     "#     decim = 3  # we need sufficient statistics, not all time points -> saves time\n",
513 |     "\n",
514 |     "#     # we will also set state of the random number generator - ICA is a\n",
515 |     "#     # non-deterministic algorithm, but we want to have the same decomposition\n",
516 |     "#     # and the same order of components each time this tutorial is run\n",
517 |     "#     random_state = 23\n",
518 |     "\n",
519 |     "#     # we avoid fitting ICA on crazy environmental artifacts that would dominate the variance and decomposition\n",
520 |     "#     reject = dict(mag=5e-12, grad=4000e-13)\n",
521 |     "\n",
522 |     "#     # Define the ICA object instance\n",
523 |     "#     ica = ICA(n_components=n_components, method=method, random_state=random_state)\n",
524 |     "\n",
525 |     "#     # We'll start by simulating a group of subjects or runs from a subject\n",
526 |     "#     start, stop = [0, raw.times[-1]]\n",
527 |     "#     intervals = np.linspace(start, stop, 4, dtype=np.float)\n",
528 |     "#     icas_from_other_data = list()\n",
529 |     "#     raw.pick_types(meg=False, eeg=True)  # take only MEG channels\n",
530 |     "#     for ii, start in enumerate(intervals):\n",
531 |     "#         if ii + 1 < len(intervals):\n",
532 |     "#             stop = intervals[ii + 1]\n",
533 |     "#             print('fitting ICA from {0} to {1} seconds'.format(start, stop))\n",
534 |     "#             this_ica = ICA(n_components=n_components, method=method).fit(\n",
535 |     "#                 raw, start=start, stop=stop, reject=reject)\n",
536 |     "#             print this_ica\n",
537 |     "#             icas_from_other_data.append(this_ica)\n",
538 |     "\n",
539 |     "#     reference_ica = ica\n",
540 |     "#     reference_ica.plot_components()\n",
541 |     "\n",
542 |     "#     print \n",
543 |     "#     print icas_from_other_data"
544 |    ]
545 |   },
546 |   {
547 |    "cell_type": "markdown",
548 |    "metadata": {},
549 |    "source": [
550 |     "###### "
551 |    ]
552 | >>>>>>> 3e145c934804827d50575ca22276f140f99e941a
553 |   }
554 |  ],
555 |  "metadata": {
556 |   "kernelspec": {
557 |    "display_name": "Python 2",
558 |    "language": "python",
559 |    "name": "python2"
560 |   },
561 |   "language_info": {
562 |    "codemirror_mode": {
563 |     "name": "ipython",
564 |     "version": 3
565 |    },
566 |    "file_extension": ".py",
567 |    "mimetype": "text/x-python",
568 |    "name": "python",
569 |    "nbconvert_exporter": "python",
570 |    "pygments_lexer": "ipython3",
571 |    "version": "3.5.2"
572 |   }
573 |  },
574 |  "nbformat": 4,
575 |  "nbformat_minor": 2
576 | }
577 | 


--------------------------------------------------------------------------------
/src/.ipynb_checkpoints/prepare_data_to_db-checkpoint.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 1,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "import numpy as np\n",
  10 |     "from scipy import signal\n",
  11 |     "import pandas as pd"
  12 |    ]
  13 |   },
  14 |   {
  15 |    "cell_type": "code",
  16 |    "execution_count": 2,
  17 |    "metadata": {},
  18 |    "outputs": [],
  19 |    "source": [
  20 |     "def get_max_channel(data):\n",
  21 |     "    index_sorted_data = np.argsort(data)\n",
  22 |     "    max_ch_index = index_sorted_data[len(index_sorted_data)-1]\n",
  23 |     "    \n",
  24 |     "    brain_parts = str(channel_list_part[max_ch_index])\n",
  25 |     "    for i in range(len(index_sorted_data)-2, -1, -1):\n",
  26 |     "        current_max = index_sorted_data[i]\n",
  27 |     "        part = str(channel_list_part[current_max])\n",
  28 |     "        \n",
  29 |     "        if not part in brain_parts:\n",
  30 |     "            brain_parts += part\n",
  31 |     "        if len(brain_parts) == 5:\n",
  32 |     "            break\n",
  33 |     "            \n",
  34 |     "    return channel_list[max_ch_index], brain_parts"
  35 |    ]
  36 |   },
  37 |   {
  38 |    "cell_type": "code",
  39 |    "execution_count": 3,
  40 |    "metadata": {},
  41 |    "outputs": [],
  42 |    "source": [
  43 |     "def get_eeg_data(data, data_length, sample_id_list):\n",
  44 |     "    sampling_rate = data_length / seconds_recorded\n",
  45 |     "    samples = []\n",
  46 |     "    start = start_cutting_at * sampling_rate\n",
  47 |     "    end = (start_cutting_at + seconds_used) * sampling_rate\n",
  48 |     "    \n",
  49 |     "    # select only no_of_samples samples\n",
  50 |     "    for s in sample_id_list:\n",
  51 |     "        samples.append(data[s/no_of_clips][s%no_of_clips])\n",
  52 |     "    \n",
  53 |     "    \n",
  54 |     "    results = np.empty(shape=(no_of_samples, no_of_channel, seconds_used))\n",
  55 |     "    for sample_id, sample in enumerate(samples):\n",
  56 |     "        for channel_id, one_channel_data in enumerate(sample):\n",
  57 |     "            # cut to seconds_used seconds\n",
  58 |     "            cut_sample = one_channel_data[start:end]\n",
  59 |     "            \n",
  60 |     "            # resample to 1 Hz\n",
  61 |     "            res = signal.resample(one_channel_data, seconds_used)\n",
  62 |     "\n",
  63 |     "            results[sample_id][channel_id] = res\n",
  64 |     "            \n",
  65 |     "    # get max channel for each second\n",
  66 |     "    max_channels = np.empty(shape=(no_of_samples, seconds_used), dtype=\"S10\")\n",
  67 |     "    brain_patterns = np.empty(shape=(no_of_samples, seconds_used), dtype=\"S10\")\n",
  68 |     "\n",
  69 |     "    for sample_id in range(len(samples)):\n",
  70 |     "        for sec in range(seconds_used):\n",
  71 |     "            max_channels[sample_id][sec], brain_patterns[sample_id][sec] = get_max_channel(results[sample_id,:,sec])\n",
  72 |     "    \n",
  73 |     "    return max_channels, brain_patterns"
  74 |    ]
  75 |   },
  76 |   {
  77 |    "cell_type": "code",
  78 |    "execution_count": 4,
  79 |    "metadata": {},
  80 |    "outputs": [],
  81 |    "source": [
  82 |     "def get_data(data, data_length, sample_id_list):\n",
  83 |     "    sampling_rate = data_length / seconds_recorded\n",
  84 |     "    print 'data_length:', data_length\n",
  85 |     "    print 'seconds_recorded:', seconds_recorded\n",
  86 |     "    print 'sampling_rate:', sampling_rate\n",
  87 |     "    samples = []\n",
  88 |     "    \n",
  89 |     "    if sampling_rate != 0:\n",
  90 |     "        start = start_cutting_at * sampling_rate\n",
  91 |     "        end = (start_cutting_at + seconds_used) * sampling_rate\n",
  92 |     "    else:\n",
  93 |     "        start = start_cutting_at\n",
  94 |     "        end = start + seconds_used\n",
  95 |     "    print 'start:', start, 'end:', end\n",
  96 |     "    \n",
  97 |     "    \n",
  98 |     "    # select only no_of_samples samples\n",
  99 |     "    for s in sample_id_list:\n",
 100 |     "        samples.append(data[s/no_of_clips][s%no_of_clips])\n",
 101 |     "    \n",
 102 |     "    results = np.empty(shape=(no_of_samples, seconds_used))\n",
 103 |     "    for index, sample in enumerate(samples):\n",
 104 |     "        # cut to seconds_used seconds\n",
 105 |     "        res = sample[start:end]\n",
 106 |     "\n",
 107 |     "        # resample to 1 Hz\n",
 108 |     "        if sampling_rate > 0 and not sampling_rate == desired_sampling_rate:\n",
 109 |     "            res = signal.resample(res, seconds_used)\n",
 110 |     "        \n",
 111 |     "        results[index] = res\n",
 112 |     "    \n",
 113 |     "    return results"
 114 |    ]
 115 |   },
 116 |   {
 117 |    "cell_type": "code",
 118 |    "execution_count": 5,
 119 |    "metadata": {},
 120 |    "outputs": [],
 121 |    "source": [
 122 |     "seconds_recorded = 56\n",
 123 |     "seconds_used = 15\n",
 124 |     "desired_sampling_rate = 1\n",
 125 |     "no_of_samples = 3 #number of samples per emotion type\n",
 126 |     "no_of_emotions = 3 #happy, excited, fear\n",
 127 |     "emotion_type_used = 'Fear'\n",
 128 |     "all_emotions = {\n",
 129 |     "    'Happiness': 0,\n",
 130 |     "    'Excitement': 1,\n",
 131 |     "    'Fear': 2\n",
 132 |     "}\n",
 133 |     "start_cutting_at = 0 #second\n",
 134 |     "channel_list = ['Fp1', 'Fp2', 'Fz', 'Cz', 'T3', 'T4', 'Pz', 'Oz']\n",
 135 |     "channel_list_part = [1, 1, 1, 2, 3, 3, 4, 5]\n",
 136 |     "\n",
 137 |     "labels = np.zeros(shape=(no_of_samples, no_of_emotions))\n",
 138 |     "\n",
 139 |     "# X = np.load('../data/EEG_features.npy')\n",
 140 |     "y = np.load('../data/result_'+emotion_type_used+'.npy')\n",
 141 |     "\n",
 142 |     "# print X.shape\n",
 143 |     "# print y.shape\n",
 144 |     "# print y"
 145 |    ]
 146 |   },
 147 |   {
 148 |    "cell_type": "code",
 149 |    "execution_count": 6,
 150 |    "metadata": {},
 151 |    "outputs": [
 152 |     {
 153 |      "name": "stdout",
 154 |      "output_type": "stream",
 155 |      "text": [
 156 |       "sample_id_list: [6, 8, 15]\n"
 157 |      ]
 158 |     }
 159 |    ],
 160 |    "source": [
 161 |     "## select 3 samples with class 2 \n",
 162 |     "sample_id_list = []\n",
 163 |     "for i, index in enumerate(y):\n",
 164 |     "    if index == 2:\n",
 165 |     "        sample_id_list.append(i)\n",
 166 |     "    if len(sample_id_list) == 3:\n",
 167 |     "        break\n",
 168 |     "print 'sample_id_list:', sample_id_list"
 169 |    ]
 170 |   },
 171 |   {
 172 |    "cell_type": "code",
 173 |    "execution_count": 7,
 174 |    "metadata": {},
 175 |    "outputs": [
 176 |     {
 177 |      "name": "stdout",
 178 |      "output_type": "stream",
 179 |      "text": [
 180 |       "(20, 9, 8, 14000)\n",
 181 |       "(3, 15) (3, 15)\n",
 182 |       "[['Fz' 'Fz' 'T3' 'Fz' 'T3' 'Fz' 'T3' 'Fz' 'T3' 'Fz' 'T3' 'Fz' 'T3' 'Fz'\n",
 183 |       "  'Oz']\n",
 184 |       " ['T3' 'Oz' 'T3' 'Oz' 'T3' 'Oz' 'Cz' 'Oz' 'T3' 'Oz' 'T3' 'Oz' 'T3' 'Oz'\n",
 185 |       "  'T3']\n",
 186 |       " ['Fp2' 'T4' 'T3' 'T4' 'T3' 'T4' 'T3' 'T4' 'T3' 'T4' 'T3' 'T4' 'T3' 'T4'\n",
 187 |       "  'T3']]\n",
 188 |       "[['12345' '12345' '35142' '12345' '31542' '13245' '31542' '13245' '31542'\n",
 189 |       "  '12345' '35142' '12345' '35142' '12345' '53412']\n",
 190 |       " ['32145' '53412' '32145' '54312' '31245' '53412' '23145' '54312' '32145'\n",
 191 |       "  '53412' '32145' '53412' '32145' '53412' '32145']\n",
 192 |       " ['13254' '31254' '34521' '31524' '34215' '31524' '34215' '31524' '34251'\n",
 193 |       "  '31524' '34512' '31245' '35421' '31245' '34521']]\n"
 194 |      ]
 195 |     }
 196 |    ],
 197 |    "source": [
 198 |     "## EEG\n",
 199 |     "data = np.load('../data/EEG.npy')\n",
 200 |     "no_of_clips = data.shape[1]\n",
 201 |     "no_of_channel = data.shape[2]\n",
 202 |     "print data.shape\n",
 203 |     "eeg_max_ch, eeg_brain_pattern = get_eeg_data(data, data.shape[3], sample_id_list)\n",
 204 |     "print eeg_max_ch.shape, eeg_brain_pattern.shape\n",
 205 |     "print eeg_max_ch\n",
 206 |     "print eeg_brain_pattern"
 207 |    ]
 208 |   },
 209 |   {
 210 |    "cell_type": "code",
 211 |    "execution_count": 8,
 212 |    "metadata": {},
 213 |    "outputs": [
 214 |     {
 215 |      "name": "stdout",
 216 |      "output_type": "stream",
 217 |      "text": [
 218 |       "(20, 9, 56)\n",
 219 |       "data_length: 56\n",
 220 |       "seconds_recorded: 56\n",
 221 |       "sampling_rate: 1\n",
 222 |       "start: 0 end: 15\n",
 223 |       "(3, 15)\n",
 224 |       "\n",
 225 |       "[[81.4  81.37 81.33 81.3  81.28 81.28 81.3  81.32 81.32 81.28 81.25 81.22\n",
 226 |       "  81.18 81.17 81.15]\n",
 227 |       " [75.98 76.03 76.07 76.1  76.13 76.13 76.13 76.12 76.12 76.12 76.12 76.12\n",
 228 |       "  76.12 76.12 76.1 ]\n",
 229 |       " [82.68 82.58 82.52 82.5  82.57 82.67 82.8  82.95 83.13 83.37 83.63 83.93\n",
 230 |       "  84.23 84.53 84.8 ]]\n"
 231 |      ]
 232 |     }
 233 |    ],
 234 |    "source": [
 235 |     "## HR\n",
 236 |     "data = np.load('../data/HR.npy')\n",
 237 |     "print data.shape\n",
 238 |     "hr = get_data(data, data.shape[2], sample_id_list)\n",
 239 |     "print hr.shape\n",
 240 |     "print\n",
 241 |     "print hr"
 242 |    ]
 243 |   },
 244 |   {
 245 |    "cell_type": "code",
 246 |    "execution_count": 9,
 247 |    "metadata": {},
 248 |    "outputs": [
 249 |     {
 250 |      "name": "stdout",
 251 |      "output_type": "stream",
 252 |      "text": [
 253 |       "(20, 9)\n",
 254 |       "29\n",
 255 |       "data_length: 29\n",
 256 |       "seconds_recorded: 56\n",
 257 |       "sampling_rate: 0\n",
 258 |       "start: 0 end: 15\n",
 259 |       "(3, 15)\n",
 260 |       "\n",
 261 |       "[[0.734409 0.734409 0.734409 0.796911 0.734409 0.796911 0.718783 0.703157\n",
 262 |       "  0.65628  0.718783 0.781286 0.812537 0.828163 0.796911 0.796911]\n",
 263 |       " [0.796911 0.796911 0.812537 0.843789 0.76566  0.828163 0.828163 0.796911\n",
 264 |       "  0.796911 0.828163 0.781286 0.843789 0.812537 0.781286 0.843789]\n",
 265 |       " [0.687531 0.671906 0.718783 0.687531 0.65628  0.671906 0.671906 0.625029\n",
 266 |       "  0.640654 0.609403 0.625029 0.625029 0.609403 0.640654 0.65628 ]]\n"
 267 |      ]
 268 |     }
 269 |    ],
 270 |    "source": [
 271 |     "## IBI\n",
 272 |     "data = np.load('../data/IBI.npy')\n",
 273 |     "print data.shape\n",
 274 |     "\n",
 275 |     "min_amount = np.min(np.array([len(x) for xx in data\n",
 276 |     "                                         for x in xx]))\n",
 277 |     "print min_amount\n",
 278 |     "data2 = np.empty(shape=(data.shape[0], data.shape[1], min_amount))\n",
 279 |     "for i, xx in enumerate(data):\n",
 280 |     "    for j, x in enumerate(xx):\n",
 281 |     "        data2[i][j] = x[0:min_amount]\n",
 282 |     "        \n",
 283 |     "data = data2\n",
 284 |     "ibi = get_data(data, data.shape[2], sample_id_list)\n",
 285 |     "print ibi.shape\n",
 286 |     "print\n",
 287 |     "print ibi"
 288 |    ]
 289 |   },
 290 |   {
 291 |    "cell_type": "code",
 292 |    "execution_count": 10,
 293 |    "metadata": {},
 294 |    "outputs": [
 295 |     {
 296 |      "name": "stdout",
 297 |      "output_type": "stream",
 298 |      "text": [
 299 |       "(20, 9, 3584)\n",
 300 |       "data_length: 3584\n",
 301 |       "seconds_recorded: 56\n",
 302 |       "sampling_rate: 64\n",
 303 |       "start: 0 end: 960\n",
 304 |       "(3, 15)\n",
 305 |       "\n",
 306 |       "[[-0.2692758  -0.55884788  0.34182638  0.83956395  1.53658115  0.47106878\n",
 307 |       "  -0.22612061 -1.17681371 -0.01013022 -1.23612091  0.15060774 -0.79410534\n",
 308 |       "  -0.85140287 -0.52996724  1.12907409]\n",
 309 |       " [ 3.59327665 -1.80216911 -1.8540378  -1.50720894 -0.2010447  -1.45216497\n",
 310 |       "   0.13555399  1.12321526  1.07078506  1.23299659 -0.36545367  1.02040498\n",
 311 |       "  -0.56479179 -0.94530931  0.59860402]\n",
 312 |       " [ 4.05300665 -0.1831846   0.88532207  1.10158178  0.84597522  0.53079778\n",
 313 |       "   0.07238115  3.0326111  -0.16156119 -0.09203145 -2.23711364 -3.78250802\n",
 314 |       "  -0.20619719 -2.74542742  6.335879  ]]\n"
 315 |      ]
 316 |     }
 317 |    ],
 318 |    "source": [
 319 |     "## BVP\n",
 320 |     "data = np.load('../data/BVP.npy')\n",
 321 |     "print data.shape\n",
 322 |     "bvp = get_data(data, data.shape[2], sample_id_list)\n",
 323 |     "print bvp.shape\n",
 324 |     "print\n",
 325 |     "print bvp"
 326 |    ]
 327 |   },
 328 |   {
 329 |    "cell_type": "code",
 330 |    "execution_count": 11,
 331 |    "metadata": {},
 332 |    "outputs": [
 333 |     {
 334 |      "name": "stdout",
 335 |      "output_type": "stream",
 336 |      "text": [
 337 |       "(20, 9, 224)\n",
 338 |       "data_length: 224\n",
 339 |       "seconds_recorded: 56\n",
 340 |       "sampling_rate: 4\n",
 341 |       "start: 0 end: 60\n",
 342 |       "(3, 15)\n",
 343 |       "\n",
 344 |       "[[1.48241777 1.49113557 1.4891314  1.48796745 1.48239448 1.48328612\n",
 345 |       "  1.48029846 1.47972648 1.47887765 1.47471121 1.47536268 1.47428035\n",
 346 |       "  1.47028682 1.4702046  1.4643562 ]\n",
 347 |       " [2.10331058 2.11291317 2.12465974 2.12061526 2.10999963 2.09664154\n",
 348 |       "  2.08401912 2.08362786 2.08163085 2.08215983 2.08071529 2.08259528\n",
 349 |       "  2.10032036 2.10357506 2.09790018]\n",
 350 |       " [1.19957534 1.19927651 1.19418336 1.19317942 1.19242642 1.19699021\n",
 351 |       "  1.19288829 1.19144405 1.19155861 1.19205804 1.19160182 1.19103763\n",
 352 |       "  1.19266869 1.19462375 1.19311434]]\n"
 353 |      ]
 354 |     }
 355 |    ],
 356 |    "source": [
 357 |     "## EDA\n",
 358 |     "data = np.load('../data/EDA.npy')\n",
 359 |     "print data.shape\n",
 360 |     "eda = get_data(data, data.shape[2], sample_id_list)\n",
 361 |     "print eda.shape\n",
 362 |     "print\n",
 363 |     "print eda"
 364 |    ]
 365 |   },
 366 |   {
 367 |    "cell_type": "code",
 368 |    "execution_count": 12,
 369 |    "metadata": {},
 370 |    "outputs": [
 371 |     {
 372 |      "name": "stdout",
 373 |      "output_type": "stream",
 374 |      "text": [
 375 |       "(20, 9, 224)\n",
 376 |       "data_length: 224\n",
 377 |       "seconds_recorded: 56\n",
 378 |       "sampling_rate: 4\n",
 379 |       "start: 0 end: 60\n",
 380 |       "(3, 15)\n",
 381 |       "\n",
 382 |       "[[33.58104998 33.57388603 33.58631546 33.57387223 33.58583797 33.57433381\n",
 383 |       "  33.56856479 33.55395725 33.56533441 33.55519798 33.56425735 33.55638018\n",
 384 |       "  33.5629817  33.57450156 33.5485293 ]\n",
 385 |       " [33.83751013 33.83233809 33.83841077 33.84570912 33.86438429 33.83035265\n",
 386 |       "  33.84035331 33.82788515 33.8333968  33.83698527 33.83248123 33.83903134\n",
 387 |       "  33.84127229 33.83724136 33.83264818]\n",
 388 |       " [34.43690633 34.4519388  34.449487   34.44473098 34.42530939 34.44022563\n",
 389 |       "  34.44384445 34.4266401  34.43874803 34.44545336 34.42483836 34.44087392\n",
 390 |       "  34.44272514 34.42895283 34.42932567]]\n"
 391 |      ]
 392 |     }
 393 |    ],
 394 |    "source": [
 395 |     "## Temp\n",
 396 |     "data = np.load('../data/TEMP.npy')\n",
 397 |     "print data.shape\n",
 398 |     "temp = get_data(data, data.shape[2], sample_id_list)\n",
 399 |     "print temp.shape\n",
 400 |     "print\n",
 401 |     "print temp"
 402 |    ]
 403 |   },
 404 |   {
 405 |    "cell_type": "code",
 406 |    "execution_count": 13,
 407 |    "metadata": {},
 408 |    "outputs": [
 409 |     {
 410 |      "name": "stdout",
 411 |      "output_type": "stream",
 412 |      "text": [
 413 |       "Excitement 1\n",
 414 |       "Fear 2\n",
 415 |       "Happiness 0\n",
 416 |       "(3, 3) [[1. 2. 2.]\n",
 417 |       " [1. 1. 2.]\n",
 418 |       " [1. 1. 2.]]\n"
 419 |      ]
 420 |     }
 421 |    ],
 422 |    "source": [
 423 |     "## get emotion label for each sample\n",
 424 |     "for emotion in all_emotions:\n",
 425 |     "    emotion_id = all_emotions[emotion]\n",
 426 |     "    print emotion, emotion_id\n",
 427 |     "    \n",
 428 |     "    l = np.load('../data/result_'+emotion+'.npy')\n",
 429 |     "    labels[:,emotion_id] = l[sample_id_list]\n",
 430 |     "    \n",
 431 |     "print labels.shape, labels"
 432 |    ]
 433 |   },
 434 |   {
 435 |    "cell_type": "code",
 436 |    "execution_count": 14,
 437 |    "metadata": {
 438 |     "scrolled": true
 439 |    },
 440 |    "outputs": [
 441 |     {
 442 |      "name": "stdout",
 443 |      "output_type": "stream",
 444 |      "text": [
 445 |       "(3, 10, 15)\n",
 446 |       "[[['Fz' 'Fz' 'T3' 'Fz' 'T3' 'Fz' 'T3' 'Fz' 'T3' 'Fz' 'T3' 'Fz' 'T3' 'Fz'\n",
 447 |       "   'Oz']\n",
 448 |       "  ['12345' '12345' '35142' '12345' '31542' '13245' '31542' '13245'\n",
 449 |       "   '31542' '12345' '35142' '12345' '35142' '12345' '53412']\n",
 450 |       "  ['81.4' '81.37' '81.33' '81.3' '81.28' '81.28' '81.3' '81.32' '81.32'\n",
 451 |       "   '81.28' '81.25' '81.22' '81.18' '81.17' '81.15']\n",
 452 |       "  ['0.734409' '0.734409' '0.734409' '0.7969109999999999' '0.734409'\n",
 453 |       "   '0.7969109999999999' '0.718783' '0.7031569999999999' '0.65628'\n",
 454 |       "   '0.718783' '0.7812859999999999' '0.812537' '0.8281629999999999'\n",
 455 |       "   '0.7969109999999999' '0.7969109999999999']\n",
 456 |       "  ['-0.26927579528588186' '-0.5588478778851528' '0.3418263774759193'\n",
 457 |       "   '0.8395639515363473' '1.5365811488670178' '0.4710687831267232'\n",
 458 |       "   '-0.2261206129154618' '-1.1768137116079' '-0.01013021843250789'\n",
 459 |       "   '-1.2361209062555765' '0.1506077354797684' '-0.794105344651547'\n",
 460 |       "   '-0.8514028731615745' '-0.5299672415725517' '1.12907408528238']\n",
 461 |       "  ['1.4824177706616941' '1.4911355711645364' '1.4891314047923305'\n",
 462 |       "   '1.487967446388503' '1.4823944826940016' '1.4832861177659982'\n",
 463 |       "   '1.480298456513354' '1.4797264821181584' '1.4788776503718737'\n",
 464 |       "   '1.4747112098672046' '1.4753626843370382' '1.4742803529232011'\n",
 465 |       "   '1.4702868171880499' '1.4702046034827025' '1.4643561997313566']\n",
 466 |       "  ['33.581049975719274' '33.573886026410186' '33.58631546090638'\n",
 467 |       "   '33.573872231355615' '33.5858379741147' '33.57433381196757'\n",
 468 |       "   '33.56856478813554' '33.55395725062644' '33.565334408649996'\n",
 469 |       "   '33.55519797641607' '33.56425735428033' '33.55638017944211'\n",
 470 |       "   '33.562981695040186' '33.574501563926894' '33.54852930300873']\n",
 471 |       "  ['1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0'\n",
 472 |       "   '1.0' '1.0' '1.0' '1.0']\n",
 473 |       "  ['2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0'\n",
 474 |       "   '2.0' '2.0' '2.0' '2.0']\n",
 475 |       "  ['2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0'\n",
 476 |       "   '2.0' '2.0' '2.0' '2.0']]\n",
 477 |       "\n",
 478 |       " [['T3' 'Oz' 'T3' 'Oz' 'T3' 'Oz' 'Cz' 'Oz' 'T3' 'Oz' 'T3' 'Oz' 'T3' 'Oz'\n",
 479 |       "   'T3']\n",
 480 |       "  ['32145' '53412' '32145' '54312' '31245' '53412' '23145' '54312'\n",
 481 |       "   '32145' '53412' '32145' '53412' '32145' '53412' '32145']\n",
 482 |       "  ['75.98' '76.03' '76.07' '76.1' '76.13' '76.13' '76.13' '76.12'\n",
 483 |       "   '76.12' '76.12' '76.12' '76.12' '76.12' '76.12' '76.1']\n",
 484 |       "  ['0.7969109999999999' '0.7969109999999999' '0.812537'\n",
 485 |       "   '0.8437889999999999' '0.76566' '0.8281629999999999'\n",
 486 |       "   '0.8281629999999999' '0.7969109999999999' '0.7969109999999999'\n",
 487 |       "   '0.8281629999999999' '0.7812859999999999' '0.8437889999999999'\n",
 488 |       "   '0.812537' '0.7812859999999999' '0.8437889999999999']\n",
 489 |       "  ['3.593276650120296' '-1.8021691147015666' '-1.8540377952390494'\n",
 490 |       "   '-1.507208942123853' '-0.2010446997228169' '-1.4521649689192804'\n",
 491 |       "   '0.13555399182825478' '1.123215256210277' '1.070785064412774'\n",
 492 |       "   '1.2329965892498158' '-0.36545367412744484' '1.0204049767703953'\n",
 493 |       "   '-0.5647917931209475' '-0.9453093088561817' '0.5986040182193272']\n",
 494 |       "  ['2.103310580483144' '2.1129131700509167' '2.124659737781747'\n",
 495 |       "   '2.120615258965631' '2.109999628947125' '2.0966415389649233'\n",
 496 |       "   '2.084019123469333' '2.0836278577775897' '2.0816308499625555'\n",
 497 |       "   '2.082159831548873' '2.0807152925288075' '2.0825952769341045'\n",
 498 |       "   '2.100320362662788' '2.1035750572293406' '2.0979001826931207']\n",
 499 |       "  ['33.8375101326174' '33.83233808731925' '33.838410771079154'\n",
 500 |       "   '33.84570912354651' '33.86438429029343' '33.83035265441819'\n",
 501 |       "   '33.840353308667545' '33.827885154432074' '33.833396798738484'\n",
 502 |       "   '33.83698527320212' '33.832481234761815' '33.83903134443828'\n",
 503 |       "   '33.84127228898536' '33.83724136021236' '33.83264817728806']\n",
 504 |       "  ['1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0'\n",
 505 |       "   '1.0' '1.0' '1.0' '1.0']\n",
 506 |       "  ['1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0'\n",
 507 |       "   '1.0' '1.0' '1.0' '1.0']\n",
 508 |       "  ['2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0'\n",
 509 |       "   '2.0' '2.0' '2.0' '2.0']]\n",
 510 |       "\n",
 511 |       " [['Fp2' 'T4' 'T3' 'T4' 'T3' 'T4' 'T3' 'T4' 'T3' 'T4' 'T3' 'T4' 'T3'\n",
 512 |       "   'T4' 'T3']\n",
 513 |       "  ['13254' '31254' '34521' '31524' '34215' '31524' '34215' '31524'\n",
 514 |       "   '34251' '31524' '34512' '31245' '35421' '31245' '34521']\n",
 515 |       "  ['82.68' '82.58' '82.52' '82.5' '82.57' '82.67' '82.8' '82.95' '83.13'\n",
 516 |       "   '83.37' '83.63' '83.93' '84.23' '84.53' '84.8']\n",
 517 |       "  ['0.687531' '0.671906' '0.718783' '0.687531' '0.65628' '0.671906'\n",
 518 |       "   '0.671906' '0.625029' '0.640654' '0.609403' '0.625029' '0.625029'\n",
 519 |       "   '0.609403' '0.640654' '0.65628']\n",
 520 |       "  ['4.0530066510939955' '-0.18318459653342575' '0.8853220749388773'\n",
 521 |       "   '1.101581780645237' '0.8459752235378747' '0.5307977822396893'\n",
 522 |       "   '0.07238114628940619' '3.0326111000085083' '-0.1615611943226964'\n",
 523 |       "   '-0.09203145177699404' '-2.2371136408751418' '-3.7825080233004713'\n",
 524 |       "   '-0.20619718778918816' '-2.745427415012558' '6.33587900085689']\n",
 525 |       "  ['1.199575340310592' '1.1992765064997448' '1.1941833632335217'\n",
 526 |       "   '1.1931794239213276' '1.1924264218194012' '1.1969902149707787'\n",
 527 |       "   '1.192888287452217' '1.1914440467409884' '1.191558613843376'\n",
 528 |       "   '1.1920580435310018' '1.1916018242257427' '1.1910376332589652'\n",
 529 |       "   '1.1926686934599962' '1.1946237498199608' '1.1931143369123864']\n",
 530 |       "  ['34.43690633487135' '34.451938804877514' '34.44948700040381'\n",
 531 |       "   '34.44473097500206' '34.425309393019845' '34.44022563363777'\n",
 532 |       "   '34.4438444531937' '34.42664010132504' '34.438748033228386'\n",
 533 |       "   '34.44545335556807' '34.42483835902005' '34.44087391742899'\n",
 534 |       "   '34.442725135402675' '34.42895283368188' '34.429325669338894']\n",
 535 |       "  ['1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0'\n",
 536 |       "   '1.0' '1.0' '1.0' '1.0']\n",
 537 |       "  ['1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0' '1.0'\n",
 538 |       "   '1.0' '1.0' '1.0' '1.0']\n",
 539 |       "  ['2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0' '2.0'\n",
 540 |       "   '2.0' '2.0' '2.0' '2.0']]]\n"
 541 |      ]
 542 |     }
 543 |    ],
 544 |    "source": [
 545 |     "## save all signals data of this emotion with label of three emotions (happy, excited, fear)\n",
 546 |     "results = []\n",
 547 |     "for i in range(0, no_of_samples):\n",
 548 |     "    r = []\n",
 549 |     "    r.append(eeg_max_ch[i])\n",
 550 |     "    r.append(eeg_brain_pattern[i])\n",
 551 |     "    r.append(hr[i])\n",
 552 |     "    r.append(ibi[i])\n",
 553 |     "    r.append(bvp[i])\n",
 554 |     "    r.append(eda[i])\n",
 555 |     "    r.append(temp[i])\n",
 556 |     "    \n",
 557 |     "    for l_index, l in enumerate(labels[i]):\n",
 558 |     "        r.append([l]*seconds_used)\n",
 559 |     "        \n",
 560 |     "    results.append(r)\n",
 561 |     "    \n",
 562 |     "results = np.array(results)\n",
 563 |     "print results.shape\n",
 564 |     "print results\n",
 565 |     "\n",
 566 |     "np.save('../data/to_db_'+emotion_type_used, results)"
 567 |    ]
 568 |   },
 569 |   {
 570 |    "cell_type": "code",
 571 |    "execution_count": 23,
 572 |    "metadata": {},
 573 |    "outputs": [
 574 |     {
 575 |      "name": "stdout",
 576 |      "output_type": "stream",
 577 |      "text": [
 578 |       "happy 0 ['Oz' '54312' '84.9' '0.687531' '-4.55078166902484' '1.404545697131073'\n",
 579 |       " '33.22175052042639' '2.0' '0.0' '0.0']\n",
 580 |       "happy 0 ['Pz' '42513' '85.02' '0.718783' '-0.4408397303188701'\n",
 581 |       " '1.4059218404262792' '33.2330202460298' '2.0' '0.0' '0.0']\n",
 582 |       "happy 0 ['T4' '31524' '85.12' '0.687531' '-2.261250175994979' '1.4053177023885546'\n",
 583 |       " '33.22508311478038' '2.0' '0.0' '0.0']\n",
 584 |       "happy 0 ['Pz' '42513' '85.2' '0.7031569999999999' '-0.5592622079354549'\n",
 585 |       " '1.4045685288905447' '33.21375297852862' '2.0' '0.0' '0.0']\n",
 586 |       "happy 0 ['T4' '31524' '85.27' '0.7031569999999999' '0.2585891191377308'\n",
 587 |       " '1.4046934557677224' '33.22953231584786' '2.0' '0.0' '0.0']\n",
 588 |       "happy 0 ['Pz' '45213' '85.27' '0.718783' '-0.21866234574152799'\n",
 589 |       " '1.4046159906540399' '33.230593521864456' '2.0' '0.0' '0.0']\n",
 590 |       "happy 0 ['T4' '31524' '85.28' '0.76566' '2.2161318293549632' '1.4049554957133694'\n",
 591 |       " '33.22877192664639' '2.0' '0.0' '0.0']\n",
 592 |       "happy 0 ['Pz' '42513' '85.28' '0.718783' '1.121558138852573' '1.4043784486084998'\n",
 593 |       " '33.234121259741826' '2.0' '0.0' '0.0']\n",
 594 |       "happy 0 ['T4' '31524' '85.3' '0.640654' '-0.6403204100316248' '1.4056235883166583'\n",
 595 |       " '33.24477075035723' '2.0' '0.0' '0.0']\n",
 596 |       "happy 0 ['Pz' '42153' '85.3' '0.65628' '0.09778859247155212' '1.4057573825425953'\n",
 597 |       " '33.20839267792665' '2.0' '0.0' '0.0']\n",
 598 |       "happy 0 ['T4' '31524' '85.32' '0.7031569999999999' '0.3117063141551386'\n",
 599 |       " '1.4059360268528507' '33.21398044361345' '2.0' '0.0' '0.0']\n",
 600 |       "happy 0 ['Pz' '42153' '85.33' '0.65628' '-0.31164829436069846'\n",
 601 |       " '1.4042590967314146' '33.22927779383731' '2.0' '0.0' '0.0']\n",
 602 |       "happy 0 ['T4' '31524' '85.35' '0.687531' '-0.013391629188320663'\n",
 603 |       " '1.4055989769696096' '33.230784891088156' '2.0' '0.0' '0.0']\n",
 604 |       "happy 0 ['Pz' '42513' '85.37' '0.687531' '-0.013364412095818306'\n",
 605 |       " '1.4075265051213866' '33.22723248611298' '2.0' '0.0' '0.0']\n",
 606 |       "happy 0 ['T4' '31254' '85.38' '0.7031569999999999' '-1.4865656192798251'\n",
 607 |       " '1.4012132638854' '33.20893507319845' '2.0' '0.0' '0.0']\n",
 608 |       "excitement 1 ['Fz' '12345' '81.4' '0.734409' '-0.26927579528588186'\n",
 609 |       " '1.4824177706616941' '33.581049975719274' '1.0' '2.0' '2.0']\n",
 610 |       "excitement 1 ['Fz' '12345' '81.37' '0.734409' '-0.5588478778851528'\n",
 611 |       " '1.4911355711645364' '33.573886026410186' '1.0' '2.0' '2.0']\n",
 612 |       "excitement 1 ['T3' '35142' '81.33' '0.734409' '0.3418263774759193' '1.4891314047923305'\n",
 613 |       " '33.58631546090638' '1.0' '2.0' '2.0']\n",
 614 |       "excitement 1 ['Fz' '12345' '81.3' '0.7969109999999999' '0.8395639515363473'\n",
 615 |       " '1.487967446388503' '33.573872231355615' '1.0' '2.0' '2.0']\n",
 616 |       "excitement 1 ['T3' '31542' '81.28' '0.734409' '1.5365811488670178' '1.4823944826940016'\n",
 617 |       " '33.5858379741147' '1.0' '2.0' '2.0']\n",
 618 |       "excitement 1 ['Fz' '13245' '81.28' '0.7969109999999999' '0.4710687831267232'\n",
 619 |       " '1.4832861177659982' '33.57433381196757' '1.0' '2.0' '2.0']\n",
 620 |       "excitement 1 ['T3' '31542' '81.3' '0.718783' '-0.2261206129154618' '1.480298456513354'\n",
 621 |       " '33.56856478813554' '1.0' '2.0' '2.0']\n",
 622 |       "excitement 1 ['Fz' '13245' '81.32' '0.7031569999999999' '-1.1768137116079'\n",
 623 |       " '1.4797264821181584' '33.55395725062644' '1.0' '2.0' '2.0']\n",
 624 |       "excitement 1 ['T3' '31542' '81.32' '0.65628' '-0.01013021843250789'\n",
 625 |       " '1.4788776503718737' '33.565334408649996' '1.0' '2.0' '2.0']\n",
 626 |       "excitement 1 ['Fz' '12345' '81.28' '0.718783' '-1.2361209062555765'\n",
 627 |       " '1.4747112098672046' '33.55519797641607' '1.0' '2.0' '2.0']\n",
 628 |       "excitement 1 ['T3' '35142' '81.25' '0.7812859999999999' '0.1506077354797684'\n",
 629 |       " '1.4753626843370382' '33.56425735428033' '1.0' '2.0' '2.0']\n",
 630 |       "excitement 1 ['Fz' '12345' '81.22' '0.812537' '-0.794105344651547' '1.4742803529232011'\n",
 631 |       " '33.55638017944211' '1.0' '2.0' '2.0']\n",
 632 |       "excitement 1 ['T3' '35142' '81.18' '0.8281629999999999' '-0.8514028731615745'\n",
 633 |       " '1.4702868171880499' '33.562981695040186' '1.0' '2.0' '2.0']\n",
 634 |       "excitement 1 ['Fz' '12345' '81.17' '0.7969109999999999' '-0.5299672415725517'\n",
 635 |       " '1.4702046034827025' '33.574501563926894' '1.0' '2.0' '2.0']\n",
 636 |       "excitement 1 ['Oz' '53412' '81.15' '0.7969109999999999' '1.12907408528238'\n",
 637 |       " '1.4643561997313566' '33.54852930300873' '1.0' '2.0' '2.0']\n",
 638 |       "fear 2 ['Fz' '12345' '81.4' '0.734409' '-0.26927579528588186'\n",
 639 |       " '1.4824177706616941' '33.581049975719274' '1.0' '2.0' '2.0']\n",
 640 |       "fear 2 ['Fz' '12345' '81.37' '0.734409' '-0.5588478778851528'\n",
 641 |       " '1.4911355711645364' '33.573886026410186' '1.0' '2.0' '2.0']\n",
 642 |       "fear 2 ['T3' '35142' '81.33' '0.734409' '0.3418263774759193' '1.4891314047923305'\n",
 643 |       " '33.58631546090638' '1.0' '2.0' '2.0']\n",
 644 |       "fear 2 ['Fz' '12345' '81.3' '0.7969109999999999' '0.8395639515363473'\n",
 645 |       " '1.487967446388503' '33.573872231355615' '1.0' '2.0' '2.0']\n",
 646 |       "fear 2 ['T3' '31542' '81.28' '0.734409' '1.5365811488670178' '1.4823944826940016'\n",
 647 |       " '33.5858379741147' '1.0' '2.0' '2.0']\n",
 648 |       "fear 2 ['Fz' '13245' '81.28' '0.7969109999999999' '0.4710687831267232'\n",
 649 |       " '1.4832861177659982' '33.57433381196757' '1.0' '2.0' '2.0']\n",
 650 |       "fear 2 ['T3' '31542' '81.3' '0.718783' '-0.2261206129154618' '1.480298456513354'\n",
 651 |       " '33.56856478813554' '1.0' '2.0' '2.0']\n",
 652 |       "fear 2 ['Fz' '13245' '81.32' '0.7031569999999999' '-1.1768137116079'\n",
 653 |       " '1.4797264821181584' '33.55395725062644' '1.0' '2.0' '2.0']\n",
 654 |       "fear 2 ['T3' '31542' '81.32' '0.65628' '-0.01013021843250789'\n",
 655 |       " '1.4788776503718737' '33.565334408649996' '1.0' '2.0' '2.0']\n",
 656 |       "fear 2 ['Fz' '12345' '81.28' '0.718783' '-1.2361209062555765'\n",
 657 |       " '1.4747112098672046' '33.55519797641607' '1.0' '2.0' '2.0']\n",
 658 |       "fear 2 ['T3' '35142' '81.25' '0.7812859999999999' '0.1506077354797684'\n",
 659 |       " '1.4753626843370382' '33.56425735428033' '1.0' '2.0' '2.0']\n",
 660 |       "fear 2 ['Fz' '12345' '81.22' '0.812537' '-0.794105344651547' '1.4742803529232011'\n",
 661 |       " '33.55638017944211' '1.0' '2.0' '2.0']\n",
 662 |       "fear 2 ['T3' '35142' '81.18' '0.8281629999999999' '-0.8514028731615745'\n",
 663 |       " '1.4702868171880499' '33.562981695040186' '1.0' '2.0' '2.0']\n",
 664 |       "fear 2 ['Fz' '12345' '81.17' '0.7969109999999999' '-0.5299672415725517'\n",
 665 |       " '1.4702046034827025' '33.574501563926894' '1.0' '2.0' '2.0']\n",
 666 |       "fear 2 ['Oz' '53412' '81.15' '0.7969109999999999' '1.12907408528238'\n",
 667 |       " '1.4643561997313566' '33.54852930300873' '1.0' '2.0' '2.0']\n",
 668 |       "happy 3 ['Oz' '54312' '84.62' '0.750034' '-1.5369140855459729'\n",
 669 |       " '1.3578466279380474' '33.296682500041996' '2.0' '0.0' '0.0']\n",
 670 |       "happy 3 ['T3' '35142' '84.63' '0.7969109999999999' '1.9845199689638098'\n",
 671 |       " '1.356737879146474' '33.29513619361375' '2.0' '0.0' '0.0']\n",
 672 |       "happy 3 ['Cz' '21453' '84.63' '0.8281629999999999' '-0.49993824888540994'\n",
 673 |       " '1.3602433160618743' '33.30522485698125' '2.0' '0.0' '0.0']\n",
 674 |       "happy 3 ['T3' '35142' '84.63' '0.734409' '-1.806289379968065' '1.3582457557845415'\n",
 675 |       " '33.29483543553489' '2.0' '0.0' '0.0']\n",
 676 |       "happy 3 ['Cz' '21453' '84.62' '0.76566' '-1.379801337345605' '1.3572583790899855'\n",
 677 |       " '33.28802419207732' '2.0' '0.0' '0.0']\n",
 678 |       "happy 3 ['T3' '31542' '84.62' '0.734409' '0.20664436216513554'\n",
 679 |       " '1.3579637672992115' '33.27462383588453' '2.0' '0.0' '0.0']\n",
 680 |       "happy 3 ['Cz' '21435' '84.6' '0.7812859999999999' '-2.1400236473013172'\n",
 681 |       " '1.3563152396392992' '33.28463303633774' '2.0' '0.0' '0.0']\n",
 682 |       "happy 3 ['T3' '35142' '84.58' '0.76566' '0.8828878473176548' '1.3555691192924786'\n",
 683 |       " '33.29256026618659' '2.0' '0.0' '0.0']\n",
 684 |       "happy 3 ['Fp2' '12435' '84.55' '0.750034' '-0.2249414873342827'\n",
 685 |       " '1.356055158741385' '33.27135353589151' '2.0' '0.0' '0.0']\n",
 686 |       "happy 3 ['T3' '35412' '84.48' '0.671906' '0.44787412131486576'\n",
 687 |       " '1.3561899200157126' '33.287362161228664' '2.0' '0.0' '0.0']\n",
 688 |       "happy 3 ['Cz' '21453' '84.4' '0.76566' '-0.278819066975118' '1.3562493901408708'\n",
 689 |       " '33.29008811649132' '2.0' '0.0' '0.0']\n",
 690 |       "happy 3 ['T3' '35142' '84.3' '0.750034' '-0.0179744916916604' '1.3559673389871718'\n",
 691 |       " '33.27374499515571' '2.0' '0.0' '0.0']\n",
 692 |       "happy 3 ['Cz' '21453' '84.2' '0.7812859999999999' '0.5088851231266968'\n",
 693 |       " '1.3605572717989152' '33.28470053890721' '2.0' '0.0' '0.0']\n",
 694 |       "happy 3 ['T3' '35142' '84.1' '0.718783' '2.2557361334947217' '1.3536949829718101'\n",
 695 |       " '33.27663140098961' '2.0' '0.0' '0.0']\n",
 696 |       "happy 3 ['Cz' '21453' '84.0' '0.76566' '0.5244041886645472' '1.3572153530922242'\n",
 697 |       " '33.26439893467801' '2.0' '0.0' '0.0']\n",
 698 |       "excitement 4 ['T4' '31524' '69.77' '0.734409' '-6.166753922959058' '2.3517621411181917'\n",
 699 |       " '34.47349828151966' '2.0' '2.0' '0.0']\n",
 700 |       "excitement 4 ['Fz' '13245' '69.85' '0.750034' '0.2237341238788005' '2.469049596890264'\n",
 701 |       " '34.496295704929736' '2.0' '2.0' '0.0']\n",
 702 |       "excitement 4 ['Oz' '53412' '69.9' '0.671906' '4.315071820807172' '2.412078125727334'\n",
 703 |       " '34.478169027993246' '2.0' '2.0' '0.0']\n",
 704 |       "excitement 4 ['Fz' '12345' '69.95' '1.046923' '-1.6100860064856473' '2.415898541284612'\n",
 705 |       " '34.4788799625469' '2.0' '2.0' '0.0']\n",
 706 |       "excitement 4 ['Oz' '54312' '69.98' '0.7969109999999999' '0.6010398042611513'\n",
 707 |       " '2.3820953226690564' '34.48362813904861' '2.0' '2.0' '0.0']\n",
 708 |       "excitement 4 ['T4' '31245' '70.02' '0.750034' '2.2569535683594966' '2.3777094348715346'\n",
 709 |       " '34.46904403724456' '2.0' '2.0' '0.0']\n",
 710 |       "excitement 4 ['Oz' '54312' '70.05' '0.968794' '-0.1482419475446118'\n",
 711 |       " '2.3396625078014277' '34.46336764208341' '2.0' '2.0' '0.0']\n",
 712 |       "excitement 4 ['T4' '31245' '70.08' '1.0938' '-1.6603674496322662' '2.330439170929366'\n",
 713 |       " '34.45381357804502' '2.0' '2.0' '0.0']\n",
 714 |       "excitement 4 ['Oz' '53412' '70.1' '0.8281629999999999' '0.9203978477323261'\n",
 715 |       " '2.2968480420934982' '34.47002449742809' '2.0' '2.0' '0.0']\n",
 716 |       "excitement 4 ['Fz' '13245' '70.12' '0.7031569999999999' '1.0322103639927545'\n",
 717 |       " '2.2877035559875347' '34.434392658591555' '2.0' '2.0' '0.0']\n",
 718 |       "excitement 4 ['Oz' '53412' '70.1' '0.671906' '-1.563367444020823' '2.2552013211552198'\n",
 719 |       " '34.447457681235804' '2.0' '2.0' '0.0']\n",
 720 |       "excitement 4 ['Fz' '13245' '70.07' '0.718783' '-2.8797646940564494' '2.247084387605589'\n",
 721 |       " '34.46502350095789' '2.0' '2.0' '0.0']\n",
 722 |       "excitement 4 ['Oz' '53412' '70.05' '0.7812859999999999' '0.1456151035519344'\n",
 723 |       " '2.2116747826711136' '34.447094365195426' '2.0' '2.0' '0.0']\n",
 724 |       "excitement 4 ['Fz' '13245' '70.08' '1.015671' '3.349920518065063' '2.2155328732964583'\n",
 725 |       " '34.45201856879133' '2.0' '2.0' '0.0']\n",
 726 |       "excitement 4 ['Oz' '53412' '70.13' '1.031297' '5.269732064050161' '2.1548839458988027'\n",
 727 |       " '34.447292354388836' '2.0' '2.0' '0.0']\n",
 728 |       "fear 5 ['T3' '32145' '75.98' '0.7969109999999999' '3.593276650120296'\n",
 729 |       " '2.103310580483144' '33.8375101326174' '1.0' '1.0' '2.0']\n",
 730 |       "fear 5 ['Oz' '53412' '76.03' '0.7969109999999999' '-1.8021691147015666'\n",
 731 |       " '2.1129131700509167' '33.83233808731925' '1.0' '1.0' '2.0']\n",
 732 |       "fear 5 ['T3' '32145' '76.07' '0.812537' '-1.8540377952390494' '2.124659737781747'\n",
 733 |       " '33.838410771079154' '1.0' '1.0' '2.0']\n",
 734 |       "fear 5 ['Oz' '54312' '76.1' '0.8437889999999999' '-1.507208942123853'\n",
 735 |       " '2.120615258965631' '33.84570912354651' '1.0' '1.0' '2.0']\n",
 736 |       "fear 5 ['T3' '31245' '76.13' '0.76566' '-0.2010446997228169' '2.109999628947125'\n",
 737 |       " '33.86438429029343' '1.0' '1.0' '2.0']\n",
 738 |       "fear 5 ['Oz' '53412' '76.13' '0.8281629999999999' '-1.4521649689192804'\n",
 739 |       " '2.0966415389649233' '33.83035265441819' '1.0' '1.0' '2.0']\n",
 740 |       "fear 5 ['Cz' '23145' '76.13' '0.8281629999999999' '0.13555399182825478'\n",
 741 |       " '2.084019123469333' '33.840353308667545' '1.0' '1.0' '2.0']\n",
 742 |       "fear 5 ['Oz' '54312' '76.12' '0.7969109999999999' '1.123215256210277'\n",
 743 |       " '2.0836278577775897' '33.827885154432074' '1.0' '1.0' '2.0']\n",
 744 |       "fear 5 ['T3' '32145' '76.12' '0.7969109999999999' '1.070785064412774'\n",
 745 |       " '2.0816308499625555' '33.833396798738484' '1.0' '1.0' '2.0']\n",
 746 |       "fear 5 ['Oz' '53412' '76.12' '0.8281629999999999' '1.2329965892498158'\n",
 747 |       " '2.082159831548873' '33.83698527320212' '1.0' '1.0' '2.0']\n",
 748 |       "fear 5 ['T3' '32145' '76.12' '0.7812859999999999' '-0.36545367412744484'\n",
 749 |       " '2.0807152925288075' '33.832481234761815' '1.0' '1.0' '2.0']\n",
 750 |       "fear 5 ['Oz' '53412' '76.12' '0.8437889999999999' '1.0204049767703953'\n",
 751 |       " '2.0825952769341045' '33.83903134443828' '1.0' '1.0' '2.0']\n",
 752 |       "fear 5 ['T3' '32145' '76.12' '0.812537' '-0.5647917931209475' '2.100320362662788'\n",
 753 |       " '33.84127228898536' '1.0' '1.0' '2.0']\n",
 754 |       "fear 5 ['Oz' '53412' '76.12' '0.7812859999999999' '-0.9453093088561817'\n",
 755 |       " '2.1035750572293406' '33.83724136021236' '1.0' '1.0' '2.0']\n",
 756 |       "fear 5 ['T3' '32145' '76.1' '0.8437889999999999' '0.5986040182193272'\n",
 757 |       " '2.0979001826931207' '33.83264817728806' '1.0' '1.0' '2.0']\n",
 758 |       "happy 6 ['Pz' '45132' '79.15' '0.7969109999999999' '0.6694296874625479'\n",
 759 |       " '1.407283249069112' '33.535102977540696' '2.0' '0.0' '0.0']\n",
 760 |       "happy 6 ['T3' '31245' '79.12' '0.76566' '-1.6462544827551295' '1.4136721907314718'\n",
 761 |       " '33.52041912428691' '2.0' '0.0' '0.0']\n",
 762 |       "happy 6 ['Oz' '54213' '79.1' '0.718783' '0.30247828452648523' '1.4136843735778613'\n",
 763 |       " '33.51379282258109' '2.0' '0.0' '0.0']\n",
 764 |       "happy 6 ['T3' '31425' '79.08' '0.7812859999999999' '-0.2823837880549206'\n",
 765 |       " '1.4118746475247763' '33.518636040491465' '2.0' '0.0' '0.0']\n",
 766 |       "happy 6 ['Oz' '52413' '79.07' '0.76566' '1.959565314641038' '1.4089640022120067'\n",
 767 |       " '33.49164331259344' '2.0' '0.0' '0.0']\n",
 768 |       "happy 6 ['T3' '31425' '79.07' '0.718783' '0.9226504305272188' '1.409256671013323'\n",
 769 |       " '33.52167322343919' '2.0' '0.0' '0.0']\n",
 770 |       "happy 6 ['Oz' '52413' '79.07' '0.812537' '0.27500779740265163'\n",
 771 |       " '1.4058315940863066' '33.53230392709755' '2.0' '0.0' '0.0']\n",
 772 |       "happy 6 ['T3' '31425' '79.07' '0.76566' '-1.5089562134051175' '1.40583644540533'\n",
 773 |       " '33.510645954989904' '2.0' '0.0' '0.0']\n",
 774 |       "happy 6 ['Oz' '54213' '79.07' '0.812537' '-0.26173236922060206'\n",
 775 |       " '1.4044186201426792' '33.51509709701057' '2.0' '0.0' '0.0']\n",
 776 |       "happy 6 ['T3' '31245' '79.05' '0.687531' '1.0416852723239831' '1.4074977377919824'\n",
 777 |       " '33.519206084262464' '2.0' '0.0' '0.0']\n",
 778 |       "happy 6 ['Oz' '52413' '79.05' '0.76566' '0.1544210541043268' '1.403873784806386'\n",
 779 |       " '33.49049304182854' '2.0' '0.0' '0.0']\n",
 780 |       "happy 6 ['T3' '31452' '79.05' '0.7812859999999999' '-1.2727759797688016'\n",
 781 |       " '1.4022805696793328' '33.50875359897696' '2.0' '0.0' '0.0']\n",
 782 |       "happy 6 ['Cz' '25413' '79.05' '0.812537' '-0.09420160008531005'\n",
 783 |       " '1.4013342962184199' '33.48979476967718' '2.0' '0.0' '0.0']\n",
 784 |       "happy 6 ['T3' '31425' '79.07' '0.812537' '-0.6202851067401004'\n",
 785 |       " '1.4014874434886562' '33.52836160325704' '2.0' '0.0' '0.0']\n",
 786 |       "happy 6 ['Oz' '52413' '79.1' '0.76566' '-0.6441170509582707' '1.3977868742523534'\n",
 787 |       " '33.504076421967' '2.0' '0.0' '0.0']\n",
 788 |       "excitement 7 ['Fp2' '13245' '68.17' '0.906291' '1.3193244495821541'\n",
 789 |       " '1.5477139469161285' '34.49178491332009' '2.0' '2.0' '0.0']\n",
 790 |       "excitement 7 ['T4' '31542' '68.13' '0.7812859999999999' '4.107369069143834'\n",
 791 |       " '1.597438448706406' '34.502383443612565' '2.0' '2.0' '0.0']\n",
 792 |       "excitement 7 ['Fp1' '15243' '68.17' '0.718783' '0.7978528385327716'\n",
 793 |       " '1.5685838233666454' '34.4986151098381' '2.0' '2.0' '0.0']\n",
 794 |       "excitement 7 ['T4' '31425' '68.18' '0.687531' '5.496184750549107' '1.5697719852264358'\n",
 795 |       " '34.492855509069805' '2.0' '2.0' '0.0']\n",
 796 |       "excitement 7 ['Oz' '51243' '68.22' '0.671906' '1.917568220196811' '1.5545787446432462'\n",
 797 |       " '34.52195565512419' '2.0' '2.0' '0.0']\n",
 798 |       "excitement 7 ['T4' '31425' '68.28' '0.890666' '-0.87980803454923' '1.5517350159413525'\n",
 799 |       " '34.533162497872055' '2.0' '2.0' '0.0']\n",
 800 |       "excitement 7 ['Oz' '51243' '68.37' '0.937543' '0.8589830805285692' '1.5380198078297014'\n",
 801 |       " '34.494116760546916' '2.0' '2.0' '0.0']\n",
 802 |       "excitement 7 ['T4' '31245' '68.45' '0.734409' '-1.4661725880234684'\n",
 803 |       " '1.5321940049526261' '34.49837731232687' '2.0' '2.0' '0.0']\n",
 804 |       "excitement 7 ['Fp1' '15423' '68.55' '0.906291' '-2.671277070201149'\n",
 805 |       " '1.5200942138516844' '34.49985785620541' '2.0' '2.0' '0.0']\n",
 806 |       "excitement 7 ['T4' '34125' '68.67' '1.015671' '-1.0789802900479928'\n",
 807 |       " '1.5148597104186712' '34.492186694575544' '2.0' '2.0' '0.0']\n",
 808 |       "excitement 7 ['Fp1' '12543' '68.77' '0.7969109999999999' '1.0791109759882'\n",
 809 |       " '1.5044195978276658' '34.48870705832745' '2.0' '2.0' '0.0']\n",
 810 |       "excitement 7 ['T4' '31542' '68.85' '0.718783' '-0.2723437542514626'\n",
 811 |       " '1.5031249754247178' '34.49969020258051' '2.0' '2.0' '0.0']\n",
 812 |       "excitement 7 ['Cz' '21453' '68.92' '0.7969109999999999' '-0.2644117571015296'\n",
 813 |       " '1.4843273784947568' '34.49067899008492' '2.0' '2.0' '0.0']\n",
 814 |       "excitement 7 ['T4' '31542' '68.97' '0.968794' '2.3595229109872586' '1.4900023209273787'\n",
 815 |       " '34.499066911665686' '2.0' '2.0' '0.0']\n",
 816 |       "excitement 7 ['Cz' '21453' '69.0' '0.953169' '-2.449954051333873' '1.4621332754725833'\n",
 817 |       " '34.47406108484984' '2.0' '2.0' '0.0']\n",
 818 |       "fear 8 ['Fp2' '13254' '82.68' '0.687531' '4.0530066510939955' '1.199575340310592'\n",
 819 |       " '34.43690633487135' '1.0' '1.0' '2.0']\n",
 820 |       "fear 8 ['T4' '31254' '82.58' '0.671906' '-0.18318459653342575'\n",
 821 |       " '1.1992765064997448' '34.451938804877514' '1.0' '1.0' '2.0']\n",
 822 |       "fear 8 ['T3' '34521' '82.52' '0.718783' '0.8853220749388773' '1.1941833632335217'\n",
 823 |       " '34.44948700040381' '1.0' '1.0' '2.0']\n",
 824 |       "fear 8 ['T4' '31524' '82.5' '0.687531' '1.101581780645237' '1.1931794239213276'\n",
 825 |       " '34.44473097500206' '1.0' '1.0' '2.0']\n",
 826 |       "fear 8 ['T3' '34215' '82.57' '0.65628' '0.8459752235378747' '1.1924264218194012'\n",
 827 |       " '34.425309393019845' '1.0' '1.0' '2.0']\n",
 828 |       "fear 8 ['T4' '31524' '82.67' '0.671906' '0.5307977822396893' '1.1969902149707787'\n",
 829 |       " '34.44022563363777' '1.0' '1.0' '2.0']\n",
 830 |       "fear 8 ['T3' '34215' '82.8' '0.671906' '0.07238114628940619' '1.192888287452217'\n",
 831 |       " '34.4438444531937' '1.0' '1.0' '2.0']\n",
 832 |       "fear 8 ['T4' '31524' '82.95' '0.625029' '3.0326111000085083' '1.1914440467409884'\n",
 833 |       " '34.42664010132504' '1.0' '1.0' '2.0']\n",
 834 |       "fear 8 ['T3' '34251' '83.13' '0.640654' '-0.1615611943226964' '1.191558613843376'\n",
 835 |       " '34.438748033228386' '1.0' '1.0' '2.0']\n",
 836 |       "fear 8 ['T4' '31524' '83.37' '0.609403' '-0.09203145177699404'\n",
 837 |       " '1.1920580435310018' '34.44545335556807' '1.0' '1.0' '2.0']\n",
 838 |       "fear 8 ['T3' '34512' '83.63' '0.625029' '-2.2371136408751418'\n",
 839 |       " '1.1916018242257427' '34.42483835902005' '1.0' '1.0' '2.0']\n",
 840 |       "fear 8 ['T4' '31245' '83.93' '0.625029' '-3.7825080233004713'\n",
 841 |       " '1.1910376332589652' '34.44087391742899' '1.0' '1.0' '2.0']\n",
 842 |       "fear 8 ['T3' '35421' '84.23' '0.609403' '-0.20619718778918816'\n",
 843 |       " '1.1926686934599962' '34.442725135402675' '1.0' '1.0' '2.0']\n",
 844 |       "fear 8 ['T4' '31245' '84.53' '0.640654' '-2.745427415012558' '1.1946237498199608'\n",
 845 |       " '34.42895283368188' '1.0' '1.0' '2.0']\n",
 846 |       "fear 8 ['T3' '34521' '84.8' '0.65628' '6.33587900085689' '1.1931143369123864'\n",
 847 |       " '34.429325669338894' '1.0' '1.0' '2.0']\n",
 848 |       "(135, 11)\n",
 849 |       "    clip_id eeg_max_ch eeg_brain_pattern     hr                 ibi  \\\n",
 850 |       "0         0         Oz             54312   84.9            0.687531   \n",
 851 |       "1         0         Pz             42513  85.02            0.718783   \n",
 852 |       "2         0         T4             31524  85.12            0.687531   \n",
 853 |       "3         0         Pz             42513   85.2  0.7031569999999999   \n",
 854 |       "4         0         T4             31524  85.27  0.7031569999999999   \n",
 855 |       "5         0         Pz             45213  85.27            0.718783   \n",
 856 |       "6         0         T4             31524  85.28             0.76566   \n",
 857 |       "7         0         Pz             42513  85.28            0.718783   \n",
 858 |       "8         0         T4             31524   85.3            0.640654   \n",
 859 |       "9         0         Pz             42153   85.3             0.65628   \n",
 860 |       "10        0         T4             31524  85.32  0.7031569999999999   \n",
 861 |       "11        0         Pz             42153  85.33             0.65628   \n",
 862 |       "12        0         T4             31524  85.35            0.687531   \n",
 863 |       "13        0         Pz             42513  85.37            0.687531   \n",
 864 |       "14        0         T4             31254  85.38  0.7031569999999999   \n",
 865 |       "15        1         Fz             12345   81.4            0.734409   \n",
 866 |       "16        1         Fz             12345  81.37            0.734409   \n",
 867 |       "17        1         T3             35142  81.33            0.734409   \n",
 868 |       "18        1         Fz             12345   81.3  0.7969109999999999   \n",
 869 |       "19        1         T3             31542  81.28            0.734409   \n",
 870 |       "20        1         Fz             13245  81.28  0.7969109999999999   \n",
 871 |       "21        1         T3             31542   81.3            0.718783   \n",
 872 |       "22        1         Fz             13245  81.32  0.7031569999999999   \n",
 873 |       "23        1         T3             31542  81.32             0.65628   \n",
 874 |       "24        1         Fz             12345  81.28            0.718783   \n",
 875 |       "25        1         T3             35142  81.25  0.7812859999999999   \n",
 876 |       "26        1         Fz             12345  81.22            0.812537   \n",
 877 |       "27        1         T3             35142  81.18  0.8281629999999999   \n",
 878 |       "28        1         Fz             12345  81.17  0.7969109999999999   \n",
 879 |       "29        1         Oz             53412  81.15  0.7969109999999999   \n",
 880 |       "..      ...        ...               ...    ...                 ...   \n",
 881 |       "105       7        Fp2             13245  68.17            0.906291   \n",
 882 |       "106       7         T4             31542  68.13  0.7812859999999999   \n",
 883 |       "107       7        Fp1             15243  68.17            0.718783   \n",
 884 |       "108       7         T4             31425  68.18            0.687531   \n",
 885 |       "109       7         Oz             51243  68.22            0.671906   \n",
 886 |       "110       7         T4             31425  68.28            0.890666   \n",
 887 |       "111       7         Oz             51243  68.37            0.937543   \n",
 888 |       "112       7         T4             31245  68.45            0.734409   \n",
 889 |       "113       7        Fp1             15423  68.55            0.906291   \n",
 890 |       "114       7         T4             34125  68.67            1.015671   \n",
 891 |       "115       7        Fp1             12543  68.77  0.7969109999999999   \n",
 892 |       "116       7         T4             31542  68.85            0.718783   \n",
 893 |       "117       7         Cz             21453  68.92  0.7969109999999999   \n",
 894 |       "118       7         T4             31542  68.97            0.968794   \n",
 895 |       "119       7         Cz             21453   69.0            0.953169   \n",
 896 |       "120       8        Fp2             13254  82.68            0.687531   \n",
 897 |       "121       8         T4             31254  82.58            0.671906   \n",
 898 |       "122       8         T3             34521  82.52            0.718783   \n",
 899 |       "123       8         T4             31524   82.5            0.687531   \n",
 900 |       "124       8         T3             34215  82.57             0.65628   \n",
 901 |       "125       8         T4             31524  82.67            0.671906   \n",
 902 |       "126       8         T3             34215   82.8            0.671906   \n",
 903 |       "127       8         T4             31524  82.95            0.625029   \n",
 904 |       "128       8         T3             34251  83.13            0.640654   \n",
 905 |       "129       8         T4             31524  83.37            0.609403   \n",
 906 |       "130       8         T3             34512  83.63            0.625029   \n",
 907 |       "131       8         T4             31245  83.93            0.625029   \n",
 908 |       "132       8         T3             35421  84.23            0.609403   \n",
 909 |       "133       8         T4             31245  84.53            0.640654   \n",
 910 |       "134       8         T3             34521   84.8             0.65628   \n",
 911 |       "\n",
 912 |       "                       bvp                 eda                temp happy  \\\n",
 913 |       "0        -4.55078166902484   1.404545697131073   33.22175052042639   2.0   \n",
 914 |       "1      -0.4408397303188701  1.4059218404262792    33.2330202460298   2.0   \n",
 915 |       "2       -2.261250175994979  1.4053177023885546   33.22508311478038   2.0   \n",
 916 |       "3      -0.5592622079354549  1.4045685288905447   33.21375297852862   2.0   \n",
 917 |       "4       0.2585891191377308  1.4046934557677224   33.22953231584786   2.0   \n",
 918 |       "5     -0.21866234574152799  1.4046159906540399  33.230593521864456   2.0   \n",
 919 |       "6       2.2161318293549632  1.4049554957133694   33.22877192664639   2.0   \n",
 920 |       "7        1.121558138852573  1.4043784486084998  33.234121259741826   2.0   \n",
 921 |       "8      -0.6403204100316248  1.4056235883166583   33.24477075035723   2.0   \n",
 922 |       "9      0.09778859247155212  1.4057573825425953   33.20839267792665   2.0   \n",
 923 |       "10      0.3117063141551386  1.4059360268528507   33.21398044361345   2.0   \n",
 924 |       "11    -0.31164829436069846  1.4042590967314146   33.22927779383731   2.0   \n",
 925 |       "12   -0.013391629188320663  1.4055989769696096  33.230784891088156   2.0   \n",
 926 |       "13   -0.013364412095818306  1.4075265051213866   33.22723248611298   2.0   \n",
 927 |       "14     -1.4865656192798251     1.4012132638854   33.20893507319845   2.0   \n",
 928 |       "15    -0.26927579528588186  1.4824177706616941  33.581049975719274   1.0   \n",
 929 |       "16     -0.5588478778851528  1.4911355711645364  33.573886026410186   1.0   \n",
 930 |       "17      0.3418263774759193  1.4891314047923305   33.58631546090638   1.0   \n",
 931 |       "18      0.8395639515363473   1.487967446388503  33.573872231355615   1.0   \n",
 932 |       "19      1.5365811488670178  1.4823944826940016    33.5858379741147   1.0   \n",
 933 |       "20      0.4710687831267232  1.4832861177659982   33.57433381196757   1.0   \n",
 934 |       "21     -0.2261206129154618   1.480298456513354   33.56856478813554   1.0   \n",
 935 |       "22        -1.1768137116079  1.4797264821181584   33.55395725062644   1.0   \n",
 936 |       "23    -0.01013021843250789  1.4788776503718737  33.565334408649996   1.0   \n",
 937 |       "24     -1.2361209062555765  1.4747112098672046   33.55519797641607   1.0   \n",
 938 |       "25      0.1506077354797684  1.4753626843370382   33.56425735428033   1.0   \n",
 939 |       "26      -0.794105344651547  1.4742803529232011   33.55638017944211   1.0   \n",
 940 |       "27     -0.8514028731615745  1.4702868171880499  33.562981695040186   1.0   \n",
 941 |       "28     -0.5299672415725517  1.4702046034827025  33.574501563926894   1.0   \n",
 942 |       "29        1.12907408528238  1.4643561997313566   33.54852930300873   1.0   \n",
 943 |       "..                     ...                 ...                 ...   ...   \n",
 944 |       "105     1.3193244495821541  1.5477139469161285   34.49178491332009   2.0   \n",
 945 |       "106      4.107369069143834   1.597438448706406  34.502383443612565   2.0   \n",
 946 |       "107     0.7978528385327716  1.5685838233666454    34.4986151098381   2.0   \n",
 947 |       "108      5.496184750549107  1.5697719852264358  34.492855509069805   2.0   \n",
 948 |       "109      1.917568220196811  1.5545787446432462   34.52195565512419   2.0   \n",
 949 |       "110      -0.87980803454923  1.5517350159413525  34.533162497872055   2.0   \n",
 950 |       "111     0.8589830805285692  1.5380198078297014  34.494116760546916   2.0   \n",
 951 |       "112    -1.4661725880234684  1.5321940049526261   34.49837731232687   2.0   \n",
 952 |       "113     -2.671277070201149  1.5200942138516844   34.49985785620541   2.0   \n",
 953 |       "114    -1.0789802900479928  1.5148597104186712  34.492186694575544   2.0   \n",
 954 |       "115        1.0791109759882  1.5044195978276658   34.48870705832745   2.0   \n",
 955 |       "116    -0.2723437542514626  1.5031249754247178   34.49969020258051   2.0   \n",
 956 |       "117    -0.2644117571015296  1.4843273784947568   34.49067899008492   2.0   \n",
 957 |       "118     2.3595229109872586  1.4900023209273787  34.499066911665686   2.0   \n",
 958 |       "119     -2.449954051333873  1.4621332754725833   34.47406108484984   2.0   \n",
 959 |       "120     4.0530066510939955   1.199575340310592   34.43690633487135   1.0   \n",
 960 |       "121   -0.18318459653342575  1.1992765064997448  34.451938804877514   1.0   \n",
 961 |       "122     0.8853220749388773  1.1941833632335217   34.44948700040381   1.0   \n",
 962 |       "123      1.101581780645237  1.1931794239213276   34.44473097500206   1.0   \n",
 963 |       "124     0.8459752235378747  1.1924264218194012  34.425309393019845   1.0   \n",
 964 |       "125     0.5307977822396893  1.1969902149707787   34.44022563363777   1.0   \n",
 965 |       "126    0.07238114628940619   1.192888287452217    34.4438444531937   1.0   \n",
 966 |       "127     3.0326111000085083  1.1914440467409884   34.42664010132504   1.0   \n",
 967 |       "128    -0.1615611943226964   1.191558613843376  34.438748033228386   1.0   \n",
 968 |       "129   -0.09203145177699404  1.1920580435310018   34.44545335556807   1.0   \n",
 969 |       "130    -2.2371136408751418  1.1916018242257427   34.42483835902005   1.0   \n",
 970 |       "131    -3.7825080233004713  1.1910376332589652   34.44087391742899   1.0   \n",
 971 |       "132   -0.20619718778918816  1.1926686934599962  34.442725135402675   1.0   \n",
 972 |       "133     -2.745427415012558  1.1946237498199608   34.42895283368188   1.0   \n",
 973 |       "134       6.33587900085689  1.1931143369123864  34.429325669338894   1.0   \n",
 974 |       "\n",
 975 |       "    excitement fear  \n",
 976 |       "0          0.0  0.0  \n",
 977 |       "1          0.0  0.0  \n",
 978 |       "2          0.0  0.0  \n",
 979 |       "3          0.0  0.0  \n",
 980 |       "4          0.0  0.0  \n",
 981 |       "5          0.0  0.0  \n",
 982 |       "6          0.0  0.0  \n",
 983 |       "7          0.0  0.0  \n",
 984 |       "8          0.0  0.0  \n",
 985 |       "9          0.0  0.0  \n",
 986 |       "10         0.0  0.0  \n",
 987 |       "11         0.0  0.0  \n",
 988 |       "12         0.0  0.0  \n",
 989 |       "13         0.0  0.0  \n",
 990 |       "14         0.0  0.0  \n",
 991 |       "15         2.0  2.0  \n",
 992 |       "16         2.0  2.0  \n",
 993 |       "17         2.0  2.0  \n",
 994 |       "18         2.0  2.0  \n",
 995 |       "19         2.0  2.0  \n",
 996 |       "20         2.0  2.0  \n",
 997 |       "21         2.0  2.0  \n",
 998 |       "22         2.0  2.0  \n",
 999 |       "23         2.0  2.0  \n",
1000 |       "24         2.0  2.0  \n",
1001 |       "25         2.0  2.0  \n",
1002 |       "26         2.0  2.0  \n",
1003 |       "27         2.0  2.0  \n",
1004 |       "28         2.0  2.0  \n",
1005 |       "29         2.0  2.0  \n",
1006 |       "..         ...  ...  \n",
1007 |       "105        2.0  0.0  \n",
1008 |       "106        2.0  0.0  \n",
1009 |       "107        2.0  0.0  \n",
1010 |       "108        2.0  0.0  \n",
1011 |       "109        2.0  0.0  \n",
1012 |       "110        2.0  0.0  \n",
1013 |       "111        2.0  0.0  \n",
1014 |       "112        2.0  0.0  \n",
1015 |       "113        2.0  0.0  \n",
1016 |       "114        2.0  0.0  \n",
1017 |       "115        2.0  0.0  \n",
1018 |       "116        2.0  0.0  \n",
1019 |       "117        2.0  0.0  \n",
1020 |       "118        2.0  0.0  \n",
1021 |       "119        2.0  0.0  \n",
1022 |       "120        1.0  2.0  \n",
1023 |       "121        1.0  2.0  \n",
1024 |       "122        1.0  2.0  \n",
1025 |       "123        1.0  2.0  \n",
1026 |       "124        1.0  2.0  \n",
1027 |       "125        1.0  2.0  \n",
1028 |       "126        1.0  2.0  \n",
1029 |       "127        1.0  2.0  \n",
1030 |       "128        1.0  2.0  \n",
1031 |       "129        1.0  2.0  \n",
1032 |       "130        1.0  2.0  \n",
1033 |       "131        1.0  2.0  \n",
1034 |       "132        1.0  2.0  \n",
1035 |       "133        1.0  2.0  \n",
1036 |       "134        1.0  2.0  \n",
1037 |       "\n",
1038 |       "[135 rows x 11 columns]\n"
1039 |      ]
1040 |     }
1041 |    ],
1042 |    "source": [
1043 |     "# after running with all emotions\n",
1044 |     "happiness = np.load('../data/to_db_Happiness.npy')\n",
1045 |     "excitement = np.load('../data/to_db_Excitement.npy')\n",
1046 |     "fear = np.load('../data/to_db_Fear.npy')\n",
1047 |     "\n",
1048 |     "combine_samples = []\n",
1049 |     "for i in range(3):\n",
1050 |     "    \n",
1051 |     "    # Happiness\n",
1052 |     "    clip_id = i*3\n",
1053 |     "    for j in range(15):\n",
1054 |     "        print 'happy', clip_id, happiness[i,:,j]\n",
1055 |     "        combine_samples.append(np.insert(happiness[i,:,j], 0, clip_id))\n",
1056 |     "    \n",
1057 |     "    # Excitement\n",
1058 |     "    clip_id = i*3+1\n",
1059 |     "    for j in range(15):\n",
1060 |     "        print 'excitement', clip_id, excitement[i,:,j]\n",
1061 |     "        combine_samples.append(np.insert(excitement[i,:,j], 0, clip_id))\n",
1062 |     "   \n",
1063 |     "    # Fear\n",
1064 |     "    clip_id = i*3+2\n",
1065 |     "    for j in range(15):\n",
1066 |     "        print 'fear', clip_id, fear[i,:,j]\n",
1067 |     "        combine_samples.append(np.insert(fear[i,:,j], 0, clip_id))\n",
1068 |     "    \n",
1069 |     "    \n",
1070 |     "combine_samples = np.array(combine_samples)\n",
1071 |     "print combine_samples.shape\n",
1072 |     "# print combine_samples\n",
1073 |     "\n",
1074 |     "df = pd.DataFrame(combine_samples, \\\n",
1075 |     "                  columns = ['clip_id', 'eeg_max_ch', 'eeg_brain_pattern', 'hr', 'ibi', 'bvp', 'eda', 'temp', 'happy', 'excited', 'fear'])\n",
1076 |     "print df\n",
1077 |     "\n",
1078 |     "df.to_csv('../data/to_db.csv', index_label='row_id')"
1079 |    ]
1080 |   },
1081 |   {
1082 |    "cell_type": "code",
1083 |    "execution_count": null,
1084 |    "metadata": {},
1085 |    "outputs": [],
1086 |    "source": []
1087 |   }
1088 |  ],
1089 |  "metadata": {
1090 |   "kernelspec": {
1091 |    "display_name": "Python 2",
1092 |    "language": "python",
1093 |    "name": "python2"
1094 |   },
1095 |   "language_info": {
1096 |    "codemirror_mode": {
1097 |     "name": "ipython",
1098 |     "version": 2
1099 |    },
1100 |    "file_extension": ".py",
1101 |    "mimetype": "text/x-python",
1102 |    "name": "python",
1103 |    "nbconvert_exporter": "python",
1104 |    "pygments_lexer": "ipython2",
1105 |    "version": "2.7.12"
1106 |   }
1107 |  },
1108 |  "nbformat": 4,
1109 |  "nbformat_minor": 2
1110 | }
1111 | 


--------------------------------------------------------------------------------