├── README.md
├── gcForestConfig.py
├── DEAP_RSP_feature_extract.py
├── EEGFeatV2.py
├── test.py
├── FeatureExtract.py
├── test_2.py
├── sampleEntropy.py
├── train_test.py
├── DNN_inference.py
├── CNN.py
├── stackingFunc.py
├── DEAP_data_xgb.py
├── DEAP_xgb_2c.py
├── CNNFunction.py
├── config.py
├── DEAP_Classification_KFold.py
├── DEAP_linearR_GridSearchCV.py
├── XGB_multiCla.py
├── DEAP_feat_select.py
├── XGB.py
├── XGBRegression.py
├── DNN.py
├── GCF_2cEEG_multiCH_TimeIMFS.ipynb
├── DEAP_data_preprocess.py
├── DEAP_linearR_KFold.py
├── DEAP_linearR_plots.py
├── XGB_4emotion.py
├── DEAP_linearR.py
├── XGB_2cEEG.ipynb
├── XGB_2c.ipynb
├── plot_test.py
├── RSP_feat_extract.py
├── GCF_2cGSR_TimeDomain.ipynb
├── EEG_EMD.ipynb
├── GCF_2cPPG_TimeDomain.ipynb
├── GCF_2cRandom.ipynb
├── GCF_2cTMP__TimeDomain.ipynb
├── EEG_feat_extract.py
├── DEAP_GSR_feature_extract.py
└── GCF_2cRSP__TimeDomain.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # Emotion-recognition
2 | Emotion recognition using DEAP dataset，GSR。
3 | 


--------------------------------------------------------------------------------
/gcForestConfig.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Mon Nov 12 18:43:24 2018
3 | 
4 | @author: jinyx
5 | """
6 | 
7 | 
8 | if __name__ == "__main__":
9 |     


--------------------------------------------------------------------------------
/DEAP_RSP_feature_extract.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Feb 27 19:52:51 2018
4 | 
5 | @author: jinyx
6 | """
7 | d 
8 | 


--------------------------------------------------------------------------------
/EEGFeatV2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sat Oct  6 14:51:59 2018
5 | 
6 | @author: jinyx
7 | """
8 | 
9 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import pandas as pd
 3 | import numpy as np
 4 | import pickle
 5 | 
 6 | GSR_feature_df = pickle.load(open("./dump_file/GSR_feature_df","rb"))
 7 | all_df_y_mutiLable = pickle.load(open("./dump_file/all_df_y_mutiLable","rb"))
 8 | 
 9 | 
10 | 
11 | feature_cols=GSR_feature_df.columns
12 | corrs=GSR_feature_df[feature_cols].apply(lambda col:np.abs(all_df_y_mutiLable['emotion'].corr(col)))
13 | sort_corrs = corrs.sort_values()
14 | 


--------------------------------------------------------------------------------
/FeatureExtract.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 特征处理函数
 3 | """
 4 | '''1-EEG特征'''
 5 | '''1.1-EEG时域特征'''
 6 | '''
 7 | 1.1.1-EEG时域均值
 8 | IN：时域的离散数据值，比如8064个采样点
 9 | OUT：均值
10 | '''
11 | def eeg_mean(df):
12 |     return df.mean(axis=1)
13 | '''
14 | 1.1.2-EEG时域中值
15 | IN：时域的离散数据值，比如8064个采样点
16 | OUT：中值
17 | '''
18 | def eeg_median(df):
19 |     return df.median(axis=1)
20 | '''
21 | 1.1.2-EEG时域标准差
22 | IN：时域的离散数据值，比如8064个采样点
23 | OUT：标准差
24 | '''
25 | def eeg_std(df):
26 |     return df.std(axis=1)
27 | '''
28 | 1.1.2-EEG时域香农熵
29 | IN：时域的离散数据值，比如8064个采样点
30 | OUT：香农熵
31 | '''


--------------------------------------------------------------------------------
/test_2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu May 17 14:41:51 2018
 4 | 
 5 | @author: jinyu
 6 | """
 7 | '''
 8 | import tensorflow as tf  
 9 | # 设计Graph  
10 | x1 = tf.constant([2, 3, 4])  
11 | x2 = tf.constant([4, 0, 1])  
12 | x3 = tf.constant([1, 1, 1]) 
13 | y = tf.add(x1, x2)  
14 | # 打开一个session --> 计算y  
15 | with tf.Session() as sess:  
16 |     print(sess.run(y))  
17 | '''
18 | import tensorflow as tf  
19 | # 设计Graph  
20 | x1 = tf.placeholder(tf.int16)  
21 | x2 = tf.placeholder(tf.int16)  
22 | y = tf.add(x1, x2)  
23 | # 用Python产生数据  
24 | li1 = [2, 3, 4]  
25 | li2 = [4, 0, 1]  
26 | # 打开一个session --> 喂数据 --> 计算y  
27 | with tf.Session() as sess:  
28 |     print(sess.run(y, feed_dict={x1: li1, x2: li2})) 


--------------------------------------------------------------------------------
/sampleEntropy.py:
--------------------------------------------------------------------------------
 1 | """
 2 | SampEn  计算时间序列data的样本熵
 3 | 输入：data是数据一维行向量
 4 | m重构维数，一般选择1或2，优先选择2，一般不取m>2
 5 | r 阈值大小，一般选择r=0.1~0.25*Std(data)
 6 | 输出：SampEnVal样本熵值大小
 7 | """
 8 | import numpy as np
 9 | 
10 | def sampEn(U,m,r):
11 |     def _maxdist(x_i, x_j):
12 |         return max([abs(ua - va) for ua, va in zip(x_i, x_j)])
13 | 
14 |     def _phi(m):
15 |         x = [[U[j] for j in range(i, i + m - 1 + 1)] for i in range(N - m + 1)]
16 |         B = [(len([1 for x_j in x if _maxdist(x_i, x_j) <= r]) - 1.0) / (N - m) for x_i in x]
17 |         return (N - m + 1.0)**(-1) * sum(B)
18 |      
19 |     N = len(U)
20 |     return -np.log(_phi(m+1) / _phi(m))
21 | 
22 | if __name__ == "__main__":
23 |     # Usage example
24 |     U = np.array([85, 80, 89] *17)
25 |     print(sampEn(U,2,3))


--------------------------------------------------------------------------------
/train_test.py:
--------------------------------------------------------------------------------
 1 | import pickle 
 2 | from sklearn.model_selection import train_test_split
 3 | #读取Y
 4 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
 5 | all_df_y['2cArousal'] = 0
 6 | all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1
 7 | all_df_y['2cValence'] = 0
 8 | all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1
 9 | print(all_df_y.head(5))
10 | #读取32个通道的EEG数据，每个通道包含32×40=1280个信号样本（人次×每人次40实验）
11 | #每个样本向量大小为8064点（63s*128Hz）
12 | for eegCH in range(1,2,1):
13 |     file_path = "./dump_file/CH{}_df_EEG_x".format(eegCH)
14 |     locals()['CH{}_df_EEG_x'.format(eegCH)] = pickle.load(open(file_path,"rb"))
15 |     #file_path = "./dump_file/CH{}eegfft_df".format(eegCH)
16 |     #locals()["CH{}eegfft_df".format(eegCH)] = pickle.load(open(file_path,"rb"))
17 | 
18 | X = CH1_df_EEG_x
19 | y = all_df_y[['2cValence']]
20 | if True:
21 |     for seed in [0,100,200,300,400,500,600,700,800,900]:
22 |         X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3,stratify=y,random_state=seed)   
23 |         xTrainIdx = X_tr.index
24 |         xTestIdx = X_te.index
25 |         pickle.dump(xTrainIdx,open("./dump_file/xTrainIdx_{}".format(seed),"wb"))
26 |         pickle.dump(xTestIdx,open("./dump_file/xTestIdx_{}".format(seed),"wb"))


--------------------------------------------------------------------------------
/DNN_inference.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | Created on Thu Sep 27 15:43:31 2018
 4 | DNN 前向传播过程
 5 | @author: jinyx
 6 | """
 7 | import tensorflow as tf
 8 | 
 9 | #定义神经网络结构相关的参数
10 | INPUT_NODE = 258048          # 32*8064，输入节点
11 | OUTPUT_NODE = 2              # 2分类，输出节点
12 | LAYER1_NODE = 500            # 隐藏层节点数
13 | 
14 | def get_weight_variable(shape, regularizer):
15 |     weights = tf.get_variable("weights", shape, initializer=tf.truncated_normal_initializer(stddev=0.1))
16 |     if regularizer != None: 
17 |         tf.add_to_collection('losses', regularizer(weights))
18 |     return weights
19 | 
20 | def inference(input_tensor, regularizer):
21 |     with tf.variable_scope('layer1'):
22 |         weights = get_weight_variable([INPUT_NODE, LAYER1_NODE], regularizer)
23 |         biases = tf.get_variable("biases", [LAYER1_NODE], initializer=tf.constant_initializer(0.0))
24 |         layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)
25 | 
26 |     with tf.variable_scope('layer2'):
27 |         weights = get_weight_variable([LAYER1_NODE, OUTPUT_NODE], regularizer)
28 |         biases = tf.get_variable("biases", [OUTPUT_NODE], initializer=tf.constant_initializer(0.0))
29 |         layer2 = tf.matmul(layer1, weights) + biases
30 | 
31 |     return layer2


--------------------------------------------------------------------------------
/CNN.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Mon Sep 17 10:10:21 2018
 5 | CNN做分类
 6 | @author: jinyx
 7 | """
 8 | import pandas as pd
 9 | import numpy as np
10 | import pickle
11 | import random
12 | import tensorflow as tf
13 | from sklearn.preprocessing import MinMaxScaler
14 | from sklearn import preprocessing
15 | from sklearn.model_selection import cross_val_score
16 | from sklearn.model_selection import cross_val_predict
17 | from sklearn.metrics import accuracy_score
18 | import warnings
19 | warnings.filterwarnings("ignore")
20 | #用来计算程序运行时间
21 | import datetime
22 | starttime = datetime.datetime.now()
23 | #读取数据
24 | for eeg_CH in range(1,33,1):
25 |     file_path = "./dump_file/CH{}_df_EEG_x".format(eeg_CH)
26 |     df_data = pickle.load(open(file_path,"rb"))
27 |     locals()["CH{}_df_EEG_x".format(eeg_CH)] = df_data
28 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
29 | for i in range(0,1280,1): #总共1280个实验所以会有1280个二维矩阵
30 |     locals()["mat{}".format(i)]=pd.DataFrame()
31 |     for eeg_CH in range(1,33,1): #脑电共有32个通道，所以一个矩阵大小32*8064
32 |         locals()["mat{}".format(i)] = locals()["mat{}".format(i)].\
33 |             append(locals()["CH{}_df_EEG_x".format(eeg_CH)].iloc[i:i+1],ignore_index=True)
34 | 
35 | #
36 | INPUT_NODE = 258048 #32*8064
37 | OUTPUT_NODE = 2 #2分类
38 | 
39 | #先简单的用1000个作为训练集，280个作为测试集


--------------------------------------------------------------------------------
/stackingFunc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Fri Nov  9 09:44:31 2018
 3 | stacking model fusion functions
 4 | @author: jinyx
 5 | """
 6 | import numpy as np
 7 | from sklearn.model_selection import StratifiedKFold
 8 | 
 9 | def get_stacking(clf, x_train, y_train, x_test, n_folds=10):
10 |     """
11 |     这个函数是stacking的核心，使用交叉验证的方法得到次级训练集
12 |     x_train, y_train, x_test 的值应该为numpy里面的数组类型 numpy.ndarray .
13 |     如果输入为pandas的DataFrame类型则会把报错
14 |     """
15 |     train_num, test_num = x_train.shape[0], x_test.shape[0]
16 |     second_level_train_set = np.zeros((train_num,))
17 |     second_level_test_set = np.zeros((test_num,))
18 |     test_nfolds_sets = np.zeros((test_num, n_folds))
19 |     skf = StratifiedKFold(n_splits=n_folds)
20 |     
21 |     i=0
22 |     for (trainIdx, valiIdx) in skf.split(x_train,y_train):
23 |         #print(x_train[trainIdx].shape,x_train[valiIdx].shape)        
24 |         x_tra, y_tra = x_train[trainIdx], y_train[trainIdx]
25 |         x_tst, y_tst =  x_train[valiIdx], y_train[valiIdx]
26 |         clf.fit(x_tra, y_tra)
27 |     
28 |         second_level_train_set[valiIdx] = clf.predict(x_tst)
29 |         test_nfolds_sets[:,i] = clf.predict(x_test)
30 |         i+=1
31 |                
32 |     #回归预测取均值，分类呢？
33 |     second_level_test_set[:] = test_nfolds_sets.mean(axis=1)
34 |     return second_level_train_set, second_level_test_set
35 | 
36 | if __name__ == "__main__":
37 |     X = [1,2,3,4,5,6,7,8,9,10]
38 |     X = np.array(X)
39 |     print(X)
40 |     y = [0,0,0,0,1,1,1,1,1,1]
41 |     get_stacking(clf=None,x_train=X ,y_train=y ,x_test=X,n_folds=2 )


--------------------------------------------------------------------------------
/DEAP_data_xgb.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | import xgboost as xgb
 6 | import pickle
 7 | from sklearn.preprocessing import MinMaxScaler
 8 | 
 9 | GSR_selected_feature_df = pickle.load(open("./dump_file/GSR_selected_feature_df","rb"))
10 | all_df_y_mutiLable = pickle.load(open("./dump_file/all_df_y_mutiLable","rb"))
11 | print('GSR_selected_feature_df.shape:',GSR_selected_feature_df.shape)
12 | print('all_df_y_mutiLable.shape:',all_df_y_mutiLable.shape)
13 | 
14 | scaler = MinMaxScaler()
15 | scaler.fit(GSR_selected_feature_df)
16 | data = scaler.transform(GSR_selected_feature_df)
17 | data_df = pd.DataFrame(data)
18 | 
19 | train_X = data_df.iloc[:int(1280*0.7), :].values
20 | test_X = data_df.iloc[int(1280*0.7):,:].values
21 | 
22 | train_Y = all_df_y_mutiLable.iloc[:int(1280*0.7), :].values
23 | test_Y = all_df_y_mutiLable.iloc[int(1280*0.7):,:].values
24 | 
25 | xg_train = xgb.DMatrix(train_X, label=train_Y)
26 | xg_test = xgb.DMatrix(test_X, label=test_Y)
27 | 
28 | # setup parameters for xgboost
29 | param = {}
30 | # use softmax multi-class classification
31 | param['objective'] = 'multi:softmax'
32 | # scale weight of positive examples
33 | param['eta'] = 0.1
34 | param['max_depth'] = 6
35 | param['silent'] = 1
36 | param['nthread'] = 8
37 | param['num_class'] = 4
38 | 
39 | watchlist = [(xg_train, 'train'), (xg_test, 'test')]
40 | num_round = 500
41 | bst = xgb.train(param, xg_train, num_round, watchlist)
42 | # get prediction
43 | pred = bst.predict(xg_test)
44 | error_rate = np.sum(pred != test_Y) / test_Y.shape[0]
45 | print('Test error using softmax = {}'.format(error_rate))
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/DEAP_xgb_2c.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Tue Jan 16 13:04:47 2018
 4 | 
 5 | @author: jinyx
 6 | """
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | import xgboost as xgb
11 | import pickle
12 | from sklearn.preprocessing import MinMaxScaler
13 | from sklearn.model_selection import train_test_split
14 | from sklearn.metrics import accuracy_score
15 | import warnings
16 | warnings.filterwarnings("ignore")
17 | 
18 | #读取原始特征
19 | df_feat = pickle.load(open("./dump_file/df_feat_selected","rb"))
20 | #读取标签（Y值）
21 | all_df_y_2c = pickle.load(open("./dump_file/all_df_y_2c","rb"))
22 | print("df_feat.shape:",df_feat.shape)
23 | 
24 | train_X,test_X,train_Y,test_Y = \
25 |     train_test_split(df_feat,all_df_y_2c,test_size=0.2,random_state=1000)
26 | 
27 | print("train_X.shape:",train_X.shape)
28 | print("test_X.shape:",test_X.shape)  
29 |  
30 | # 
31 | dtrain = xgb.DMatrix(train_X, train_Y)
32 | dtest = xgb.DMatrix(test_X,test_Y)
33 |     
34 | xgb_params = {
35 |     'booster': 'gbtree',
36 |     
37 |     'colsample_bytree': 0.8,
38 |     
39 |     'colsample_bylevel': 0.8,
40 | 
41 |     'eta': 0.01,
42 | 
43 |     'max_depth': 6,
44 | 
45 |     'objective': 'binary:logistic',
46 | 
47 |     'eval_metric': 'error',
48 | 
49 |     'silent':0,
50 | }
51 | 
52 | watchlist = [(dtrain, 'train'), (dtest, 'test')]
53 | num_round = 300
54 | bst = xgb.train(xgb_params, dtrain, num_round,evals=watchlist)
55 | 
56 | y_pred = bst.predict(dtest)
57 | 
58 | df_y_pred = pd.DataFrame(y_pred,columns=['temp_pred_y'])
59 | df_y_pred['pred_y'] = 0
60 | df_y_pred['pred_y'][df_y_pred['temp_pred_y'] >= 0.5] = 1 
61 | df_y_pred['pred_y'][df_y_pred['temp_pred_y'] < 0.5] = 0     
62 | print(accuracy_score(test_Y, df_y_pred['pred_y']))
63 | 
64 | 
65 | 
66 |                                           


--------------------------------------------------------------------------------
/CNNFunction.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat Oct  6 16:23:16 2018
 5 | 
 6 | @author: jinyx
 7 | """
 8 | import tensorflow as tf
 9 | # 构建网络
10 | def buildCNN(w, h, c):
11 |     # 占位符
12 |     x = tf.placeholder(tf.float32, shape=[None, w, h, c], name='x')
13 |     y_ = tf.placeholder(tf.int32, shape=[None, ], name='y_')
14 | 
15 |     # 第一个卷积层 + 池化层
16 |     conv1 = tf.layers.conv2d(
17 |         inputs=x,
18 |         filters=5,
19 |         kernel_size=[1, 171],
20 |         padding="same", #全零填充
21 |         activation=tf.nn.relu,
22 |         kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
23 |     pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[1, 5], strides=2)
24 | 
25 |     re1 = tf.reshape(pool1, [-1, 6 * 6 * 128])
26 |     # 全连接层
27 |     dense1 = tf.layers.dense(inputs=re1,
28 |                              units=1024,
29 |                              activation=tf.nn.relu,
30 |                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
31 |                              kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))
32 |     logits = tf.layers.dense(inputs=dense1,
33 |                              units=2,  
34 |                              activation=None,
35 |                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
36 |                              kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))
37 | 
38 |     return logits, x, y_
39 | 
40 | # 返回损失函数的值，准确值等参数
41 | def accCNN(logits, y_):
42 |     loss = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=logits)
43 |     train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
44 |     correct_prediction = tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), y_)
45 |     acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
46 | 
47 |     return loss, train_op, correct_prediction, acc
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Tue Jan  2 15:14:09 2018
 4 | 
 5 | @author: jinyx
 6 | """
 7 | s01_file_path = './data_preprocessed_python/s01.dat'
 8 | s02_file_path = './data_preprocessed_python/s02.dat'
 9 | s03_file_path = './data_preprocessed_python/s03.dat'
10 | s04_file_path = './data_preprocessed_python/s04.dat'
11 | s05_file_path = './data_preprocessed_python/s05.dat'
12 | s06_file_path = './data_preprocessed_python/s06.dat'
13 | s07_file_path = './data_preprocessed_python/s07.dat'
14 | s08_file_path = './data_preprocessed_python/s08.dat'
15 | s09_file_path = './data_preprocessed_python/s09.dat'
16 | s10_file_path = './data_preprocessed_python/s10.dat'
17 | s11_file_path = './data_preprocessed_python/s11.dat'
18 | s12_file_path = './data_preprocessed_python/s12.dat'
19 | s13_file_path = './data_preprocessed_python/s13.dat'
20 | s14_file_path = './data_preprocessed_python/s14.dat'
21 | s15_file_path = './data_preprocessed_python/s15.dat'
22 | s16_file_path = './data_preprocessed_python/s16.dat'
23 | s17_file_path = './data_preprocessed_python/s17.dat'
24 | s18_file_path = './data_preprocessed_python/s18.dat'
25 | s19_file_path = './data_preprocessed_python/s19.dat'
26 | s20_file_path = './data_preprocessed_python/s20.dat'
27 | s21_file_path = './data_preprocessed_python/s21.dat'
28 | s22_file_path = './data_preprocessed_python/s22.dat'
29 | s23_file_path = './data_preprocessed_python/s23.dat'
30 | s24_file_path = './data_preprocessed_python/s24.dat'
31 | s25_file_path = './data_preprocessed_python/s25.dat'
32 | s26_file_path = './data_preprocessed_python/s26.dat'
33 | s27_file_path = './data_preprocessed_python/s27.dat'
34 | s28_file_path = './data_preprocessed_python/s28.dat'
35 | s29_file_path = './data_preprocessed_python/s29.dat'
36 | s30_file_path = './data_preprocessed_python/s30.dat'
37 | s31_file_path = './data_preprocessed_python/s31.dat'
38 | s32_file_path = './data_preprocessed_python/s32.dat'
39 | 


--------------------------------------------------------------------------------
/DEAP_Classification_KFold.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import pandas as pd
 3 | import numpy as np
 4 | import pickle
 5 | from sklearn.preprocessing import MinMaxScaler
 6 | from sklearn import preprocessing
 7 | from sklearn.model_selection import cross_val_score
 8 | from sklearn.metrics import mean_absolute_error
 9 | from sklearn.naive_bayes import GaussianNB
10 | from sklearn.neighbors import KNeighborsClassifier
11 | from sklearn.neural_network import MLPClassifier
12 | import xgboost as xgb 
13 | import warnings
14 | warnings.filterwarnings("ignore")
15 | #用来计算程序运行时间
16 | import datetime
17 | starttime = datetime.datetime.now()
18 | 
19 | print("######读取数据（基于皮肤电）######")
20 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb"))
21 | all_df_y_valence = pickle.load(open("./dump_file/all_df_y_valence","rb"))
22 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
23 | all_df_y_2c = pickle.load(open("./dump_file/all_df_y_2c","rb"))
24 | print("GSR_feature_df.shape:",GSR_feature_df.shape)
25 | 
26 | print("######数据缩放处理，归一化处理######")
27 | min_max_scaler = MinMaxScaler()
28 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df)
29 | 
30 | #############################下面用唤醒度做分类#################################
31 | if True:
32 |     print("----------------这是高低愉悦度度二分类预测----------------")
33 |     data = GSR_feature_df
34 |     target = all_df_y_2c #高低愉悦度
35 |     
36 |     #贝叶斯效果不好，可能是数据不服从高斯（正态）分布
37 |     #print("######NB classification CV######") 
38 |     #NB_model = GaussianNB()
39 |     #NB_scores = cross_val_score(NB_model,data,target,cv=5,scoring='accuracy')
40 |     #print("NB_scores:",abs(NB_scores))
41 |     #print("NB_scores_mean:",abs(NB_scores.mean()))
42 |     
43 |     print("######KNN classification CV######")
44 |     KNN_model = KNeighborsClassifier(n_neighbors=20)
45 |     KNN_scores = cross_val_score(KNN_model,data,target,cv=5,scoring='accuracy')
46 |     print("KNN_scores:",abs(KNN_scores))
47 |     print("KNN_scores_mean:",abs(KNN_scores.mean()))
48 |     
49 |     print("######xgb classification CV######")
50 |     xgb_model = xgb.XGBClassifier(max_depth=6,learning_rate=0.01,n_estimators=300,
51 |                                       objective='binary:logistic',booster='gbtree',n_jobs=10,
52 |                                       subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,
53 |                                       reg_alpha=0.5, reg_lambda=1.0,gamma=0,
54 |                                       scale_pos_weight=1)
55 |     xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy')
56 |     print("xgb_scores:",abs(xgb_scores))
57 |     print("xgb_scores_mean:",abs(xgb_scores.mean()))
58 |     
59 |     print("######MLP classification CV######")
60 |     mlp_model = MLPClassifier(hidden_layer_sizes=(500,2),alpha=0.1)
61 |     mlp_scores = cross_val_score(mlp_model,data,target,cv=5,scoring='accuracy')
62 |     print("mlp_scores:",abs(mlp_scores))
63 |     print("mlp_scores_mean:",abs(mlp_scores.mean()))
64 |     
65 | 
66 | #用来计算程序运行时间
67 | endtime = datetime.datetime.now()
68 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds)
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/DEAP_linearR_GridSearchCV.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import pandas as pd
  3 | import numpy as np
  4 | import pickle
  5 | import xgboost as xgb
  6 | from sklearn.linear_model import LinearRegression
  7 | from sklearn.linear_model import Lasso
  8 | from sklearn.linear_model import Ridge
  9 | from sklearn.svm import SVR
 10 | from sklearn.preprocessing import MinMaxScaler
 11 | from sklearn import preprocessing
 12 | from sklearn.model_selection import train_test_split
 13 | from sklearn.model_selection import cross_val_score
 14 | from sklearn.model_selection import GridSearchCV
 15 | from sklearn.metrics import mean_squared_error
 16 | from sklearn.metrics import mean_absolute_error
 17 | import warnings
 18 | warnings.filterwarnings("ignore")
 19 | #用来计算程序运行时间
 20 | import datetime
 21 | starttime = datetime.datetime.now()
 22 | 
 23 | #读取数据
 24 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb"))
 25 | all_df_y_valence = pickle.load(open("./dump_file/all_df_y_valence","rb"))
 26 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
 27 | print("GSR_feature_df.shape:",GSR_feature_df.shape)
 28 | 
 29 | print("数据缩放处理，归一化处理")
 30 | min_max_scaler = MinMaxScaler()
 31 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df)
 32 | 
 33 | ##############################下面用交叉验证做##################################
 34 | print("----------------这是愉悦度预测----------------")
 35 | data = GSR_feature_df
 36 | target = all_df_y_valence
 37 | print("######linear regression CV######")
 38 | linearR_model = LinearRegression()
 39 | linearR_scores = cross_val_score(linearR_model,data,target,cv=5,scoring='neg_mean_absolute_error')
 40 | print("linearR_scores:",abs(linearR_scores))
 41 | print("linearR_scores_mean:",abs(linearR_scores.mean()))
 42 | 
 43 | print("######lasso model CV######")
 44 | param_grid = {'alpha':[1.0,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1]}
 45 | lasso_model = Lasso()
 46 | gsearch = GridSearchCV(lasso_model,param_grid,cv=5,scoring='neg_mean_absolute_error')
 47 | gsearch.fit(data,target)
 48 | print("lasso->best_params:",gsearch.best_score_)
 49 | 
 50 | print("######ridge model CV######")
 51 | param_grid = {'alpha':[1.0,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1,0.0]}
 52 | Ridge_model = Ridge()
 53 | gsearch = GridSearchCV(Ridge_model,param_grid,cv=5,scoring='neg_mean_absolute_error')
 54 | gsearch.fit(data,target)
 55 | print("Ridge->best_params:",gsearch.best_score_)
 56 | 
 57 | print("######xgboost model CV######")
 58 | param_grid = {'max_depth':[3],
 59 |               'learning_rate':[0.1],
 60 |               'n_estimators':[50],
 61 |               'objective':['reg:linear'],
 62 |               'booster':['gbtree'],
 63 |               'n_jobs':[10],
 64 |               'subsample':[1],
 65 |               'colsample_bytree':[1.0],
 66 |               'colsample_bylevel':[1.0],
 67 |               'reg_alpha':[1.0],
 68 |               'reg_lambda':[1.0],
 69 |               'gamma':[1.0],
 70 |         }
 71 | xgb_model  = xgb.XGBRegressor()
 72 | gsearch = GridSearchCV(xgb_model,param_grid,cv=5,scoring='neg_mean_absolute_error',n_jobs=10)
 73 | gsearch.fit(data,target)
 74 | print("Ridge->best_params:",gsearch.best_score_)
 75 | #用来计算程序运行时间
 76 | endtime = datetime.datetime.now()
 77 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds)
 78 | 
 79 | 
 80 | 
 81 | 
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/XGB_multiCla.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Aug  3 13:31:09 2018
 4 | 情绪的多分类问题
 5 | @author: jinyu
 6 | """
 7 | 
 8 | import pandas as pd
 9 | import numpy as np
10 | import pickle
11 | import random
12 | import xgboost as xgb
13 | from sklearn.preprocessing import MinMaxScaler
14 | from sklearn import preprocessing
15 | from sklearn.model_selection import cross_val_score
16 | from sklearn.model_selection import cross_val_predict
17 | from sklearn.metrics import accuracy_score
18 | import warnings
19 | warnings.filterwarnings("ignore")
20 | #用来计算程序运行时间
21 | import datetime
22 | starttime = datetime.datetime.now()
23 | 
24 | #读取数据
25 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb"))
26 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
27 | print("GSR_feature_df.shape:",GSR_feature_df.shape)
28 | 
29 | print("数据缩放处理，归一化处理")
30 | min_max_scaler = MinMaxScaler()
31 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df)
32 | 
33 | print("把连续的唤醒度和愉悦度转化为离散的4个类别值")
34 | print("---------happy emotion----------")
35 | df_result = all_df_y
36 | a = df_result[df_result.valence>=5].index
37 | b = df_result[df_result.arousal>=5].index
38 | #happy_index = [val for val in a if val in b]
39 | happy_index = set(a).intersection(set(b))
40 | print("len(happy_index)=",len(happy_index)) 
41 | df_result['4emotion'] = -1
42 | for i in happy_index:
43 |     df_result['4emotion'].loc[i] = 0
44 | print("---------sad emotion----------")
45 | df_result = all_df_y
46 | a = df_result[df_result.valence<=5].index
47 | b = df_result[df_result.arousal<=5].index
48 | #sad_index = [val for val in a if val in b]
49 | sad_index = set(a).intersection(set(b))
50 | print("len(sad_index)=",len(sad_index)) 
51 | for i in sad_index:
52 |     df_result['4emotion'].loc[i] = 1
53 | print("---------nervous emotion----------")
54 | df_result = all_df_y
55 | a = df_result[df_result.valence<5].index
56 | b = df_result[df_result.arousal>5].index
57 | #nervous_index = [val for val in a if val in b]
58 | nervous_index = set(a).intersection(set(b))
59 | print("len(nervous_index)=",len(nervous_index)) 
60 | for i in nervous_index:
61 |     df_result['4emotion'].loc[i] = 2
62 | print("---------calm emotion----------")
63 | df_result = all_df_y
64 | a = df_result[df_result.valence>5].index
65 | b = df_result[df_result.arousal<5].index
66 | #calm_index = [val for val in a if val in b]
67 | calm_index = set(a).intersection(set(b))
68 | print("len(calm_index)=",len(calm_index)) 
69 | for i in calm_index:
70 |     df_result['4emotion'].loc[i] = 3
71 |     
72 | ###############################################################################     
73 | if True:
74 |     print("训练多分类器") 
75 |     data = GSR_feature_df
76 |     target = all_df_y[['4emotion']]       
77 |     print("######xgboost model CV######")
78 |     for xgb_rounds in [50]:  
79 |         xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50,
80 |                                       objective='multi:softmax',booster='gbtree',n_jobs=10,
81 |                                       subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,
82 |                                       reg_alpha=0.5, reg_lambda=1.0,gamma=0,
83 |                                       scale_pos_weight=1)
84 |         xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy')
85 |         print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean())))
86 |         xgb_pred_4emotion = cross_val_predict(xgb_model,data,target,cv=5)
87 |         acc_4emotion = accuracy_score(xgb_pred_4emotion,df_result['4emotion'])
88 |         print("4emotion_acc:",acc_4emotion)   
89 |     
90 | #用来计算程序运行时间
91 | endtime = datetime.datetime.now()
92 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds)     
93 |     
94 |     
95 |     
96 |     
97 |     


--------------------------------------------------------------------------------
/DEAP_feat_select.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import pickle
 4 | 
 5 | def feat_select(use_GSR,use_RSP,use_EEG,complex_abs,complex_real,complex_imag):
 6 |     #读取原始特征
 7 |     GSR_feature_df = pickle.load(open("./dump_file/GSR_feature_df","rb"))
 8 |     RSP_feature_df = pickle.load(open("./dump_file/RSP_feature_df","rb"))    
 9 |     for eeg_CH in range(1,33,1):
10 |         locals()["CH{}EEG_feature_df".format(eeg_CH)] = pickle.load(open("./dump_file/CH{}_eeg_feat_df".format(eeg_CH),"rb"))
11 |     all_df_y_mutiLable = pickle.load(open("./dump_file/all_df_y_mutiLable","rb"))
12 |     all_df_y_valence = pickle.load(open("./dump_file/all_df_y_valence","rb"))
13 |     all_df_y= pickle.load(open("./dump_file/all_df_y","rb"))
14 |     if use_GSR == False:
15 |         GSR_feature_df = pd.DataFrame()
16 |     if use_RSP == False:
17 |         RSP_feature_df = pd.DataFrame()
18 |     #把特征都合并在一起
19 |     df_feat = pd.concat([GSR_feature_df,RSP_feature_df],axis=1)
20 |     if use_EEG == True:
21 |            for eeg_CH in range(1,33,1):
22 |                df_feat = pd.concat([df_feat,locals()["CH{}EEG_feature_df".format(eeg_CH)]],axis=1)
23 |     
24 |     #复数的实数部分特征
25 |     if complex_real == True:
26 |         df_real = df_feat.select_dtypes(["complex128"]).apply(lambda x:x.real)
27 |         list_new_col=[]
28 |         for col in df_real.columns:
29 |             list_new_col.append('real_{}'.format(col))
30 |         df_real.columns = list_new_col
31 |         df_feat = pd.concat([df_real,df_feat],axis=1)
32 |         
33 |     #复数的虚数部分特征
34 |     if complex_imag == True:
35 |         df_imag = df_feat.select_dtypes(["complex128"]).apply(lambda x:x.imag)
36 |         list_new_col=[]
37 |         for col in df_imag.columns:
38 |             list_new_col.append('imag_{}'.format(col))
39 |         df_imag.columns = list_new_col
40 |         df_feat = pd.concat([df_imag,df_feat],axis=1)
41 |     
42 |     #True: drop complex data
43 |     if complex_abs == False:
44 |         if use_GSR == True:
45 |             df_feat.drop(['scfft_mean','scfft_median','scfft_std',
46 |                      'scfft_min','scfft_max','scfft_range'
47 |                      ],inplace=True,axis=1)  
48 |         if use_RSP == True:
49 |             df_feat.drop(['rspfft_max',
50 |                      'rspfft_range','rspfft_min','rspfft_median','rspfft_mean'
51 |                      ],inplace=True,axis=1)
52 |         if use_EEG == True:
53 |             df_feat.drop(['CH2eeg2Diff_range','CH2eeg2Diff_max',
54 |                          'CH2eeg2Diff_min','CH2eeg1Diff_range','CH2eeg1Diff_max','CH2eeg1Diff_min',
55 |                          'CH2eeg1Diff_median','CH2eeg1Diff_mean','CH2eeg_range','CH2eeg_max',
56 |                         'CH2eeg_min','CH2eeg_median','CH2eeg_mean'],inplace=True,axis=1)    
57 |             for eeg_CH in range(1,33,1):
58 |                 df_feat.drop(['CH{}eegfft_mean'.format(eeg_CH),'CH{}eegfft_median'.format(eeg_CH),
59 |                          'CH{}eegfft_std'.format(eeg_CH), 'CH{}eegfft_min'.format(eeg_CH),
60 |                          'CH{}eegfft_max'.format(eeg_CH),'CH{}eegfft_range'.format(eeg_CH),],inplace=True,axis=1)
61 |     
62 |     elif complex_abs == True:
63 |         #compute abs for complex        
64 |         df_abs = df_feat.select_dtypes(["complex128"]).apply(np.abs)
65 |         list_drop = df_abs.columns
66 |         df_feat.drop(labels=list_drop,axis=1,inplace=True)
67 |         df_feat = pd.concat([df_abs,df_feat],axis=1)
68 |     
69 | 
70 |      
71 |     df_feat_selected = df_feat
72 |     #根据相关程度筛选数据
73 |     if True:
74 |         feature_cols = df_feat.columns
75 |         #测试愉悦度
76 |         corrs = df_feat[feature_cols].apply(lambda col:np.abs(all_df_y['valence'].corr(col)))
77 |         #测试唤醒度
78 |         #corrs = df_feat[feature_cols].apply(lambda col:np.abs(all_df_y['arousal'].corr(col)))
79 |         sort_corrs = corrs.sort_values()
80 |         selected_feature = sort_corrs[sort_corrs > 0.00].index
81 |         df_feat_selected = df_feat[selected_feature]
82 |         print(sort_corrs)      
83 |     return df_feat_selected
84 |         
85 | if __name__ == '__main__':    
86 |     df_feat_selected = feat_select(use_GSR=True,use_RSP=False,use_EEG=False,complex_abs=True,complex_real=False,complex_imag=False)
87 |     #df_feat_selected = feat_select(use_GSR=True,use_RSP=False,use_EEG=False,complex_abs=True,complex_real=True,complex_imag=True)
88 |     print('df_feat_selected.shape:',df_feat_selected.shape)
89 | 
90 |     pickle.dump(df_feat_selected,open("./dump_file/df_feat_selected","wb"))
91 |     
92 |     
93 |     
94 |     
95 |     
96 |     
97 |     
98 |     


--------------------------------------------------------------------------------
/XGB.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Aug  2 02:57:14 2018
  4 | 代码功能：XGB回归预测唤醒度和愉悦度，并根据结果组合预测4个象限的情绪
  5 | @author: jinyu
  6 | """
  7 | import pandas as pd
  8 | import numpy as np
  9 | import pickle
 10 | import xgboost as xgb
 11 | from sklearn.preprocessing import MinMaxScaler
 12 | from sklearn import preprocessing
 13 | from sklearn.model_selection import cross_val_score
 14 | from sklearn.model_selection import cross_val_predict
 15 | import warnings
 16 | warnings.filterwarnings("ignore")
 17 | #用来计算程序运行时间
 18 | import datetime
 19 | starttime = datetime.datetime.now()
 20 | 
 21 | #读取数据
 22 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb"))
 23 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
 24 | print("GSR_feature_df.shape:",GSR_feature_df.shape)
 25 | 
 26 | print("数据缩放处理，归一化处理")
 27 | min_max_scaler = MinMaxScaler()
 28 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df)
 29 | 
 30 | ##############################下面用交叉验证做愉悦度预测########################
 31 | if True:
 32 |     print("----------------这是愉悦度预测----------------")
 33 |     data = GSR_feature_df
 34 |     target = all_df_y[['valence']]       
 35 |     print("######xgboost model CV######")
 36 |     for xgb_rounds in [50]:  
 37 |         xgb_model  = xgb.XGBRegressor(max_depth=5,learning_rate=0.1,n_estimators=xgb_rounds,
 38 |                                       objective='reg:linear',booster='gbtree',n_jobs=10,
 39 |                                       subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,
 40 |                                        reg_alpha=0.1, reg_lambda=0.8,gamma=1.0)
 41 |         xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error')
 42 |         print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean())))
 43 |         xgb_pred_valence = cross_val_predict(xgb_model,data,target,cv=5)
 44 |         
 45 | #############################下面用唤醒度做回归#################################
 46 | if True:
 47 |     print("----------------这是唤醒度预测----------------")
 48 |     data = GSR_feature_df
 49 |     target = all_df_y[['arousal']]   
 50 |     print("######xgboost regression model CV######")
 51 |     for xgb_rounds in [50]:  
 52 |         xgb_model  = xgb.XGBRegressor(max_depth=7,learning_rate=0.1,n_estimators=xgb_rounds,
 53 |                                       objective='reg:linear',booster='gbtree',n_jobs=10,
 54 |                                       subsample=0.9, colsample_bytree=0.90, colsample_bylevel=0.90,
 55 |                                        reg_alpha=0.1, reg_lambda=0.5,gamma=0)
 56 |         xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error')
 57 |         print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean())))
 58 |         xgb_pred_arousal = cross_val_predict(xgb_model,data,target,cv=5)
 59 | 
 60 | print("根据回归预测值构造4个象限的情绪2分类模型") 
 61 | df_v = pd.DataFrame(xgb_pred_valence,columns=['pred_v'],index=all_df_y.index)
 62 | df_a = pd.DataFrame(xgb_pred_arousal,columns=['pred_a'],index=all_df_y.index)
 63 | df_true_v = all_df_y[['valence']]
 64 | df_true_a = all_df_y[['arousal']]          
 65 | df_result = pd.concat([df_v,df_a,df_true_v,df_true_a],axis=1)
 66 | 
 67 | def count_accuracy(ser1,ser2):
 68 |     sum_all = len(ser1)
 69 |     tmp = ser1==ser2
 70 |     sum_acc= len(tmp[tmp==True])
 71 |     return sum_acc/sum_all
 72 |        
 73 | print("---------happy emotion----------")
 74 | happy_index = df_result[df_result.valence>=5].index.append(df_result[df_result.arousal>=5].index)
 75 | happy_index = set(happy_index)
 76 | print("len(happy_index)=",len(happy_index)) 
 77 | df_result['happy'] = -1
 78 | for i in happy_index:
 79 |     df_result['happy'].loc[i] = 1  
 80 | pred_happy_index = df_result[df_result.pred_v>=5].index.append(df_result[df_result.pred_a>=5].index)
 81 | pred_happy_index = set(pred_happy_index)
 82 | print("len(pred_happy_index)=",len(pred_happy_index)) 
 83 | df_result['pred_happy'] = -1
 84 | for i in pred_happy_index:
 85 |     df_result['pred_happy'].loc[i] = 1
 86 | acc = count_accuracy(df_result['pred_happy'],df_result['happy']) 
 87 | print("happy acc:",acc)
 88 | 
 89 | print("---------sad emotion----------")
 90 | sad_index = df_result[df_result.valence<5].index.append(df_result[df_result.arousal<5].index)
 91 | sad_index = set(sad_index)
 92 | print("len(sad_index)=",len(sad_index)) 
 93 | df_result['sad'] = -1
 94 | for i in sad_index:
 95 |     df_result['sad'].loc[i] = 1  
 96 | pred_sad_index = df_result[df_result.pred_v<5].index.append(df_result[df_result.pred_a<5].index)
 97 | pred_sad_index = set(pred_sad_index)
 98 | print("len(pred_sad_index)=",len(pred_sad_index)) 
 99 | df_result['pred_sad'] = -1
100 | for i in pred_sad_index:
101 |     df_result['pred_sad'].loc[i] = 1
102 | acc = count_accuracy(df_result['pred_sad'],df_result['sad']) 
103 | print("sad acc:",acc)
104 | 
105 | 
106 | #用来计算程序运行时间
107 | endtime = datetime.datetime.now()
108 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds)    
109 |         
110 |         
111 |         
112 |         
113 |         
114 |         
115 |         
116 |         
117 |         
118 |         
119 |         
120 |         
121 |         
122 |         
123 |         
124 |         
125 |         
126 |         
127 |         
128 |         
129 |         
130 |         


--------------------------------------------------------------------------------
/XGBRegression.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Aug  2 02:57:14 2018
  4 | 代码功能：XGB回归预测唤醒度和愉悦度，并根据结果组合预测4个象限的情绪
  5 | @author: jinyu
  6 | """
  7 | import pandas as pd
  8 | import numpy as np
  9 | import pickle
 10 | import xgboost as xgb
 11 | from sklearn.preprocessing import MinMaxScaler
 12 | from sklearn import preprocessing
 13 | from sklearn.model_selection import cross_val_score
 14 | from sklearn.model_selection import cross_val_predict
 15 | import warnings
 16 | warnings.filterwarnings("ignore")
 17 | #用来计算程序运行时间
 18 | import datetime
 19 | starttime = datetime.datetime.now()
 20 | 
 21 | #读取数据
 22 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb"))
 23 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
 24 | print("GSR_feature_df.shape:",GSR_feature_df.shape)
 25 | 
 26 | print("数据缩放处理，归一化处理")
 27 | min_max_scaler = MinMaxScaler()
 28 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df)
 29 | 
 30 | ##############################下面用交叉验证做愉悦度预测########################
 31 | if True:
 32 |     print("----------------这是愉悦度预测----------------")
 33 |     data = GSR_feature_df
 34 |     target = all_df_y[['valence']]       
 35 |     print("######xgboost model CV######")
 36 |     for xgb_rounds in [50]:  
 37 |         xgb_model  = xgb.XGBRegressor(max_depth=5,learning_rate=0.1,n_estimators=xgb_rounds,
 38 |                                       objective='reg:linear',booster='gbtree',n_jobs=10,
 39 |                                       subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,
 40 |                                        reg_alpha=0.1, reg_lambda=0.8,gamma=1.0)
 41 |         xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error')
 42 |         print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean())))
 43 |         xgb_pred_valence = cross_val_predict(xgb_model,data,target,cv=5)
 44 |         
 45 | #############################下面用唤醒度做回归#################################
 46 | if True:
 47 |     print("----------------这是唤醒度预测----------------")
 48 |     data = GSR_feature_df
 49 |     target = all_df_y[['arousal']]   
 50 |     print("######xgboost regression model CV######")
 51 |     for xgb_rounds in [50]:  
 52 |         xgb_model  = xgb.XGBRegressor(max_depth=7,learning_rate=0.1,n_estimators=xgb_rounds,
 53 |                                       objective='reg:linear',booster='gbtree',n_jobs=10,
 54 |                                       subsample=0.9, colsample_bytree=0.90, colsample_bylevel=0.90,
 55 |                                        reg_alpha=0.1, reg_lambda=0.5,gamma=0)
 56 |         xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error')
 57 |         print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean())))
 58 |         xgb_pred_arousal = cross_val_predict(xgb_model,data,target,cv=5)
 59 | 
 60 | print("根据回归预测值构造4个象限的情绪2分类模型") 
 61 | df_v = pd.DataFrame(xgb_pred_valence,columns=['pred_v'],index=all_df_y.index)
 62 | df_a = pd.DataFrame(xgb_pred_arousal,columns=['pred_a'],index=all_df_y.index)
 63 | df_true_v = all_df_y[['valence']]
 64 | df_true_a = all_df_y[['arousal']]          
 65 | df_result = pd.concat([df_v,df_a,df_true_v,df_true_a],axis=1)
 66 | 
 67 | def count_accuracy(ser1,ser2):
 68 |     sum_all = len(ser1)
 69 |     tmp = ser1==ser2
 70 |     sum_acc= len(tmp[tmp==True])
 71 |     return sum_acc/sum_all
 72 |        
 73 | print("---------happy emotion----------")
 74 | happy_index = df_result[df_result.valence>=5].index.append(df_result[df_result.arousal>=5].index)
 75 | happy_index = set(happy_index)
 76 | print("len(happy_index)=",len(happy_index)) 
 77 | df_result['happy'] = -1
 78 | for i in happy_index:
 79 |     df_result['happy'].loc[i] = 1  
 80 | pred_happy_index = df_result[df_result.pred_v>=5].index.append(df_result[df_result.pred_a>=5].index)
 81 | pred_happy_index = set(pred_happy_index)
 82 | print("len(pred_happy_index)=",len(pred_happy_index)) 
 83 | df_result['pred_happy'] = -1
 84 | for i in pred_happy_index:
 85 |     df_result['pred_happy'].loc[i] = 1
 86 | acc = count_accuracy(df_result['pred_happy'],df_result['happy']) 
 87 | print("happy acc:",acc)
 88 | 
 89 | print("---------sad emotion----------")
 90 | sad_index = df_result[df_result.valence<5].index.append(df_result[df_result.arousal<5].index)
 91 | sad_index = set(sad_index)
 92 | print("len(sad_index)=",len(sad_index)) 
 93 | df_result['sad'] = -1
 94 | for i in sad_index:
 95 |     df_result['sad'].loc[i] = 1  
 96 | pred_sad_index = df_result[df_result.pred_v<5].index.append(df_result[df_result.pred_a<5].index)
 97 | pred_sad_index = set(pred_sad_index)
 98 | print("len(pred_sad_index)=",len(pred_sad_index)) 
 99 | df_result['pred_sad'] = -1
100 | for i in pred_sad_index:
101 |     df_result['pred_sad'].loc[i] = 1
102 | acc = count_accuracy(df_result['pred_sad'],df_result['sad']) 
103 | print("sad acc:",acc)
104 | 
105 | 
106 | #用来计算程序运行时间
107 | endtime = datetime.datetime.now()
108 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds)    
109 |         
110 |         
111 |         
112 |         
113 |         
114 |         
115 |         
116 |         
117 |         
118 |         
119 |         
120 |         
121 |         
122 |         
123 |         
124 |         
125 |         
126 |         
127 |         
128 |         
129 |         
130 |         


--------------------------------------------------------------------------------
/DNN.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Sep 26 16:13:58 2018
  5 | DNN做分类
  6 | @author: jinyx
  7 | """
  8 | 
  9 | import pandas as pd
 10 | import numpy as np
 11 | import pickle
 12 | import random
 13 | import tensorflow as tf
 14 | from sklearn.preprocessing import MinMaxScaler
 15 | from sklearn import preprocessing
 16 | from sklearn.model_selection import cross_val_score
 17 | from sklearn.model_selection import cross_val_predict
 18 | from sklearn.metrics import accuracy_score
 19 | import warnings
 20 | warnings.filterwarnings("ignore")
 21 | #用来计算程序运行时间
 22 | import datetime
 23 | starttime = datetime.datetime.now()
 24 | #读取数据
 25 | for eeg_CH in range(1,33,1):
 26 |     file_path = "./dump_file/CH{}_df_EEG_x".format(eeg_CH)
 27 |     df_data = pickle.load(open(file_path,"rb"))
 28 |     locals()["CH{}_df_EEG_x".format(eeg_CH)] = df_data
 29 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
 30 | for i in range(0,1280,1): #总共1280个实验所以会有1280个二维矩阵
 31 |     locals()["mat{}".format(i)]=pd.DataFrame()
 32 |     for eeg_CH in range(1,33,1): #脑电共有32个通道，所以一个矩阵大小32*8064
 33 |         locals()["mat{}".format(i)] = locals()["mat{}".format(i)].\
 34 |             append(locals()["CH{}_df_EEG_x".format(eeg_CH)].iloc[i:i+1],ignore_index=True)
 35 | 
 36 | #模型相关的参数
 37 | INPUT_NODE = 258048          #32*8064，输入节点
 38 | OUTPUT_NODE = 2              #2分类，输出节点
 39 | LAYER1_NODE = 500            # 隐藏层节点数                                    
 40 | BATCH_SIZE = 100             # 每次batch打包的样本个数        
 41 | LEARNING_RATE_BASE = 0.8     # 基础学习率  
 42 | LEARNING_RATE_DECAY = 0.99   # 学习率的衰减率
 43 | REGULARAZTION_RATE = 0.0001  # 正则化的系数
 44 | TRAINING_STEPS = 5000        # 训练轮数
 45 | MOVING_AVERAGE_DECAY = 0.99  # 滑动平均衰减率
 46 | 
 47 | def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
 48 |     # 不使用滑动平均类
 49 |     if avg_class == None:
 50 |         layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
 51 |         return tf.matmul(layer1, weights2) + biases2
 52 |     else:
 53 |         # 使用滑动平均类
 54 |         layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
 55 |         return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2) 
 56 | 
 57 | def train(mnist):
 58 |     x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
 59 |     y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
 60 |     # 生成隐藏层的参数。
 61 |     # tf.truncated_normal(shape, mean, stddev)，正太分布数据 
 62 |     weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
 63 |     biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
 64 |     # 生成输出层的参数。
 65 |     weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
 66 |     biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
 67 | 
 68 |     # 计算不含滑动平均类的前向传播结果
 69 |     y = inference(x, None, weights1, biases1, weights2, biases2)
 70 |     
 71 |     # 定义训练轮数及相关的滑动平均类 
 72 |     global_step = tf.Variable(0, trainable=False)
 73 |     variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
 74 |     variables_averages_op = variable_averages.apply(tf.trainable_variables())
 75 |     average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
 76 |     
 77 |     # 计算交叉熵及其平均值
 78 |     cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
 79 |     cross_entropy_mean = tf.reduce_mean(cross_entropy)
 80 |     
 81 |     # 损失函数的计算
 82 |     regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
 83 |     regularaztion = regularizer(weights1) + regularizer(weights2)
 84 |     loss = cross_entropy_mean + regularaztion
 85 |     
 86 |     # 设置指数衰减的学习率。
 87 |     learning_rate = tf.train.exponential_decay(
 88 |         LEARNING_RATE_BASE,
 89 |         global_step,
 90 |         mnist.train.num_examples / BATCH_SIZE,
 91 |         LEARNING_RATE_DECAY,
 92 |         staircase=True)
 93 |     
 94 |     # 优化损失函数
 95 |     train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
 96 |     
 97 |     # 反向传播更新参数和更新每一个参数的滑动平均值
 98 |     with tf.control_dependencies([train_step, variables_averages_op]):
 99 |         train_op = tf.no_op(name='train')
100 | 
101 |     # 计算正确率
102 |     correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
103 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
104 |     
105 |     # 初始化会话，并开始训练过程。
106 |     with tf.Session() as sess:
107 |         tf.global_variables_initializer().run()
108 |         validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
109 |         test_feed = {x: mnist.test.images, y_: mnist.test.labels} 
110 |         
111 |         # 循环的训练神经网络。
112 |         for i in range(TRAINING_STEPS):
113 |             if i % 1000 == 0:
114 |                 validate_acc = sess.run(accuracy, feed_dict=validate_feed)
115 |                 print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
116 |             
117 |             xs,ys=mnist.train.next_batch(BATCH_SIZE)
118 |             sess.run(train_op,feed_dict={x:xs,y_:ys})
119 | 
120 |         test_acc=sess.run(accuracy,feed_dict=test_feed)
121 |         print(("After %d training step(s), test accuracy using average model is %g" %(TRAINING_STEPS, test_acc)))
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 


--------------------------------------------------------------------------------
/GCF_2cEEG_multiCH_TimeIMFS.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import multiprocessing\n",
 10 |     "from GCForest import gcForest\n",
 11 |     "import pandas as pd\n",
 12 |     "import numpy as np\n",
 13 |     "import pickle \n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "from sklearn.model_selection import train_test_split\n",
 16 |     "from sklearn.model_selection import StratifiedKFold\n",
 17 |     "from sklearn.preprocessing import MinMaxScaler\n",
 18 |     "from sklearn.metrics import accuracy_score\n",
 19 |     "from sklearn.metrics import f1_score\n",
 20 |     "from sklearn.metrics import precision_score\n",
 21 |     "from sklearn.metrics import recall_score\n",
 22 |     "from sklearn.preprocessing import Imputer\n",
 23 |     "import warnings\n",
 24 |     "warnings.filterwarnings(\"ignore\")\n",
 25 |     "#用来计算程序运行时间\n",
 26 |     "import datetime\n",
 27 |     "starttime = datetime.datetime.now()"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "#读取Y\n",
 37 |     "all_df_y_2c = pickle.load(open(\"./dump_file/all_df_y_2c\",\"rb\"))\n",
 38 |     "print(\"Y.shape:\",all_df_y_2c.shape)\n",
 39 |     "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n",
 40 |     "all_df_y['2cArousal'] = 0\n",
 41 |     "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n",
 42 |     "all_df_y['2cValence'] = 0\n",
 43 |     "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n",
 44 |     "print(all_df_y.head(5))\n",
 45 |     "\n",
 46 |     "#读取32个通道的EEG数据，每个通道包含32×40=1280个信号样本（人次×每人次40实验）\n",
 47 |     "#每个样本向量大小为8064点（63s*128Hz）\n",
 48 |     "for eegCH in range(1,33,1):\n",
 49 |     "    #file_path = \"./dump_file/CH{}_df_EEG_x\".format(eegCH)\n",
 50 |     "    #locals()['CH{}_df_EEG_x'.format(eegCH)] = pickle.load(open(file_path,\"rb\"))\n",
 51 |     "    file_path = \"./dump_file/CH{}eegfft_df\".format(eegCH)\n",
 52 |     "    locals()[\"CH{}eegfft_df\".format(eegCH)] = pickle.load(open(file_path,\"rb\"))\n",
 53 |     "'''\n",
 54 |     "X = CH1eegfft_df\n",
 55 |     "y = all_df_y[[\"2cArousal\"]]\n",
 56 |     "X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3,stratify=y,random_state=2018)   \n",
 57 |     "xTrainIdx = X_tr.index\n",
 58 |     "xTestIdx = X_te.index\n",
 59 |     "'''\n",
 60 |     "y = all_df_y[[\"2cValence\"]]\n",
 61 |     "xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx\",\"rb\"))\n",
 62 |     "xTestIdx = pickle.load(open(\"./dump_file/xTestIdx\",\"rb\"))\n",
 63 |     "y_tr = y.loc[xTrainIdx]\n",
 64 |     "y_te = y.loc[xTestIdx]"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "for eegCH in range(1,33,1):\n",
 74 |     "    for i in range(0,1280,1):\n",
 75 |     "for eegCH in range(1,33,1): \n",
 76 |     "    #时域数据\n",
 77 |     "    #locals()['CH{}TrainSet'.format(eegCH)] = locals()['CH{}_df_EEG_x'.format(eegCH)].loc[xTrainIdx]\n",
 78 |     "    #locals()['CH{}TestSet'.format(eegCH)] = locals()['CH{}_df_EEG_x'.format(eegCH)].loc[xTestIdx]\n",
 79 |     "    #频域数据\n",
 80 |     "    locals()['CH{}TrainSet'.format(eegCH)] = locals()['CH{}eegfft_df'.format(eegCH)].loc[xTrainIdx]\n",
 81 |     "    locals()['CH{}TestSet'.format(eegCH)] = locals()['CH{}eegfft_df'.format(eegCH)].loc[xTestIdx]"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "myWindowsSize = 256\n",
 91 |     "myStrideSize = 64\n",
 92 |     "gcf = gcForest(shape_1X=8064, window=myWindowsSize, stride=myStrideSize,tolerance=0.0,n_cascadeRF=1, \n",
 93 |     "               min_samples_mgs=0.1, min_samples_cascade=0.1,n_jobs=19)\n",
 94 |     "if True:\n",
 95 |     "    for eegCH in range(1,33,1): \n",
 96 |     "        print(\"CH{} running multi-grain scan\".format(eegCH))\n",
 97 |     "        xTrain,yTrain = locals()['CH{}TrainSet'.format(eegCH)].values,y_tr.values\n",
 98 |     "        xTest = locals()['CH{}TestSet'.format(eegCH)].values   \n",
 99 |     "        locals()['CH{}mgsTrainVector'.format(eegCH)] = gcf.mg_scanning(xTrain,yTrain)\n",
100 |     "        locals()['CH{}mgsTestVector'.format(eegCH)] = gcf.mg_scanning(xTest)\n",
101 |     "        filePath = \"./dump_file_V2/CH{}mgsTrainVector_{}_{}\".format(eegCH,myWindowsSize,myStrideSize)\n",
102 |     "        pickle.dump(locals()['CH{}mgsTrainVector'.format(eegCH)],open(filePath,\"wb\"))\n",
103 |     "        filePath = \"./dump_file_V2/CH{}mgsTestVector_{}_{}\".format(eegCH,myWindowsSize,myStrideSize)\n",
104 |     "        pickle.dump(locals()['CH{}mgsTestVector'.format(eegCH)],open(filePath,\"wb\"))\n",
105 |     "else:\n",
106 |     "    for eegCH in range(1,33,1):\n",
107 |     "        filePath = \"./dump_file_V2/CH{}mgsTrainVector_{}_{}\".format(eegCH,myWindowsSize,myStrideSize)\n",
108 |     "        locals()['CH{}mgsTrainVector'.format(eegCH)] = pickle.load(open(filePath,\"rb\"))\n",
109 |     "        filePath = \"./dump_file_V2/CH{}mgsTestVector_{}_{}\".format(eegCH,myWindowsSize,myStrideSize)\n",
110 |     "        locals()['CH{}mgsTestVector'.format(eegCH)]= pickle.load(open(filePath,\"rb\"))"
111 |    ]
112 |   }
113 |  ],
114 |  "metadata": {
115 |   "kernelspec": {
116 |    "display_name": "Python 3",
117 |    "language": "python",
118 |    "name": "python3"
119 |   },
120 |   "language_info": {
121 |    "codemirror_mode": {
122 |     "name": "ipython",
123 |     "version": 3
124 |    },
125 |    "file_extension": ".py",
126 |    "mimetype": "text/x-python",
127 |    "name": "python",
128 |    "nbconvert_exporter": "python",
129 |    "pygments_lexer": "ipython3",
130 |    "version": "3.6.5"
131 |   }
132 |  },
133 |  "nbformat": 4,
134 |  "nbformat_minor": 2
135 | }
136 | 


--------------------------------------------------------------------------------
/DEAP_data_preprocess.py:
--------------------------------------------------------------------------------
  1 | # =============================================================================
  2 | # .# -*- coding: utf-8 -*-
  3 | # =============================================================================
  4 | """
  5 | Spyder Editor
  6 | 
  7 | This is a temporary script file.
  8 | """
  9 | 
 10 | import pandas as pd
 11 | import numpy as np
 12 | import pickle 
 13 | import matplotlib.pyplot as plt
 14 | from config import *
 15 | 
 16 | #32个实验者，每个实验者参与40个实验，每人共40路信号采集
 17 | sXX = ['s01','s02','s03','s04','s05','s06','s07','s08','s09',
 18 |        's10','s11','s12','s13','s14','s15','s16','s17','s18','s19',
 19 |        's20','s21','s22','s23','s24','s25','s26','s27','s28','s29',
 20 |        's30','s31','s32']
 21 | 
 22 | #read data from .dat files
 23 | for i in sXX:
 24 |     sXX_file_path ='./data_preprocessed_python/'+i+'.dat'
 25 |     f = open(sXX_file_path,'rb')
 26 |     locals()[i] = pickle.load(f, encoding='bytes')
 27 |     
 28 | #read labels 32 people(Y)
 29 | for i in sXX:
 30 |     locals()['%s_df_y'%i] = pd.DataFrame(locals()[i][b'labels'])
 31 |     locals()['%s_df_y'%i].columns = ['valence','arousal','dominance','liking'] 
 32 | 
 33 | #concat all sXX_df_y in one df
 34 | all_df_y = pd.DataFrame()
 35 | for i in sXX:
 36 |     temp_index = []
 37 |     for j in range(0,40,1):
 38 |         temp_index.append(i+'_'+str(j))
 39 |     locals()['%s_df_y'%i].index = temp_index
 40 |     all_df_y = pd.concat([all_df_y,locals()['%s_df_y'%i]],axis=0)
 41 | 
 42 | #index最终的表示方式例子：s01_0 ->(s01实验者 第0号情绪测量实验)
 43 | pickle.dump(all_df_y,open("./dump_file/all_df_y","wb"))
 44 | 
 45 | #############################提取32路EEG信号####################################
 46 | #read #32路EEG脑电信号,1到32路是脑电信号
 47 | for eeg_channel in range(1,33,1):
 48 |     for i in sXX:
 49 |         locals()['CH{}_{}_df_EEG_x'.format(eeg_channel,i)] = pd.DataFrame(locals()[i][b'data'][:][eeg_channel][:])
 50 |         temp_index = []
 51 |         for j in range(0,40,1):
 52 |             temp_index.append(i+'_'+str(j))
 53 |         locals()['CH{}_{}_df_EEG_x'.format(eeg_channel,i)].index = temp_index
 54 |     #concat all CHX_sXX_df_EEG_x in one df
 55 |     locals()['CH{}_df_EEG_x'.format(eeg_channel)] = pd.DataFrame()
 56 |     for i in sXX:
 57 |         locals()['CH{}_df_EEG_x'.format(eeg_channel)] = \
 58 |             pd.concat([locals()['CH{}_df_EEG_x'.format(eeg_channel)],locals()['CH{}_{}_df_EEG_x'.format(eeg_channel,i)]],axis=0)
 59 |     file_path = "./dump_file/{}".format('CH{}_df_EEG_x'.format(eeg_channel))
 60 |     pickle.dump(locals()['CH%s_df_EEG_x'%eeg_channel],open(file_path,"wb"))
 61 | ###############################################################################
 62 | ###########################提取1路GSR皮肤电信号################################
 63 | #read GSR data
 64 | for i in sXX:
 65 |     locals()['%s_df_GSR_x'%i] = pd.DataFrame(locals()[i][b'data'][:][36][:])
 66 |     temp_index = []
 67 |     for j in range(0,40,1):
 68 |         temp_index.append(i+'_'+str(j))
 69 |     locals()['%s_df_GSR_x'%i].index = temp_index
 70 | 
 71 | #concat all sXX_df_GSR_x in one df
 72 | all_df_GSR_x = pd.DataFrame()
 73 | for i in sXX:
 74 |     all_df_GSR_x = pd.concat([all_df_GSR_x,locals()['%s_df_GSR_x'%i]],axis=0)
 75 | 
 76 | pickle.dump(all_df_GSR_x,open("./dump_file/all_df_GSR_x","wb"))
 77 | ###############################################################################
 78 | 
 79 | ############################提取1路RSP呼吸信号#################################
 80 | #read Respiration belt data
 81 | for i in sXX:
 82 |     locals()['%s_df_RSP_x'%i] = pd.DataFrame(locals()[i][b'data'][:][37][:])
 83 |     temp_index = []
 84 |     for j in range(0,40,1):
 85 |         temp_index.append(i+'_'+str(j))
 86 |     locals()['%s_df_RSP_x'%i].index = temp_index
 87 | 
 88 | #concat all sXX_df_RSP_x in one df
 89 | all_df_RSP_x = pd.DataFrame()
 90 | for i in sXX:
 91 |     all_df_RSP_x = pd.concat([all_df_RSP_x,locals()['%s_df_RSP_x'%i]],axis=0)
 92 | 
 93 | pickle.dump(all_df_RSP_x,open("./dump_file/all_df_RSP_x","wb"))
 94 | ###############################################################################
 95 | 
 96 | ############################提取1路BVP信号#################################
 97 | #read Respiration belt data
 98 | for i in sXX:
 99 |     locals()['%s_df_BVP_x'%i] = pd.DataFrame(locals()[i][b'data'][:][38][:])
100 |     temp_index = []
101 |     for j in range(0,40,1):
102 |         temp_index.append(i+'_'+str(j))
103 |     locals()['%s_df_BVP_x'%i].index = temp_index
104 | 
105 | #concat all sXX_df_BVP_x in one df
106 | all_df_BVP_x = pd.DataFrame()
107 | for i in sXX:
108 |     all_df_BVP_x = pd.concat([all_df_BVP_x,locals()['%s_df_BVP_x'%i]],axis=0)
109 | 
110 | pickle.dump(all_df_BVP_x,open("./dump_file/all_df_BVP_x","wb"))
111 | ###############################################################################
112 | 
113 | ############################提取1路TMP信号#################################
114 | #read Respiration belt data
115 | for i in sXX:
116 |     locals()['%s_df_TMP_x'%i] = pd.DataFrame(locals()[i][b'data'][:][39][:])
117 |     temp_index = []
118 |     for j in range(0,40,1):
119 |         temp_index.append(i+'_'+str(j))
120 |     locals()['%s_df_TMP_x'%i].index = temp_index
121 | 
122 | #concat all sXX_df_BVP_x in one df
123 | all_df_TMP_x = pd.DataFrame()
124 | for i in sXX:
125 |     all_df_TMP_x = pd.concat([all_df_TMP_x,locals()['%s_df_TMP_x'%i]],axis=0)
126 | 
127 | pickle.dump(all_df_TMP_x,open("./dump_file/all_df_TMP_x","wb"))
128 | ###############################################################################
129 | 
130 | #################################画GSR信号的图##################################
131 | #read .dat files(32 total)
132 | f = open(s01_file_path,'rb')
133 | s01 = pickle.load(f, encoding='bytes')
134 | #s01_GSR_df_x,index:40 expriments,columns:8064 datas(128Hz)
135 | s01_GSR_df_x = pd.DataFrame(s01[b'data'][:][36][:])
136 | #s01_df_y,index:40 expriments,columns:Y
137 | s01_df_y = pd.DataFrame(s01[ b'labels'])
138 | s01_df_y.columns=['valence','arousal','dominance','liking']
139 | 
140 | plt.plot(s01_GSR_df_x.iloc[0,:])
141 | plt.ylabel('GSR value')
142 | plt.show()
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/DEAP_linearR_KFold.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import pandas as pd
  3 | import numpy as np
  4 | import pickle
  5 | import xgboost as xgb
  6 | from sklearn.linear_model import LinearRegression
  7 | from sklearn.linear_model import Lasso
  8 | from sklearn.linear_model import Ridge
  9 | from sklearn.svm import SVR
 10 | from sklearn.preprocessing import MinMaxScaler
 11 | from sklearn import preprocessing
 12 | from sklearn.model_selection import train_test_split
 13 | from sklearn.model_selection import cross_val_score
 14 | from sklearn.model_selection import GridSearchCV
 15 | from sklearn.model_selection import KFold
 16 | from sklearn.metrics import mean_squared_error
 17 | from sklearn.metrics import mean_absolute_error
 18 | from sklearn.metrics import accuracy_score
 19 | import warnings
 20 | warnings.filterwarnings("ignore")
 21 | #用来计算程序运行时间
 22 | import datetime
 23 | starttime = datetime.datetime.now()
 24 | 
 25 | #读取数据
 26 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb"))
 27 | all_df_y_valence = pickle.load(open("./dump_file/all_df_y_valence","rb"))
 28 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
 29 | all_df_y_2c = pickle.load(open("./dump_file/all_df_y_2c","rb"))
 30 | print("GSR_feature_df.shape:",GSR_feature_df.shape)
 31 | 
 32 | print("数据缩放处理，归一化处理")
 33 | features_col = GSR_feature_df.columns
 34 | min_max_scaler = MinMaxScaler()
 35 | GSR_feature_ndarray = min_max_scaler.fit_transform(GSR_feature_df)
 36 | GSR_feature_df = pd.DataFrame(GSR_feature_ndarray)
 37 | GSR_feature_df.columns = features_col
 38 | 
 39 | 
 40 | #数据通过5折交叉验证划分
 41 | kf = KFold(n_splits=5)
 42 | k=[0,0,0,0,0]
 43 | k[0],k[1],k[2],k[3],k[4] = kf.split(GSR_feature_df)
 44 | ##############################下面用交叉验证做愉悦度预测##################################
 45 | if True:
 46 |     print("##########愉悦度############")
 47 |     y_valence = all_df_y_valence.copy()
 48 |     df_predy = pd.DataFrame() #存放预测结果df
 49 |     MAE_sum = 0 #存放手动CV后的MAE和
 50 |     for i in range(0,5):
 51 |         data = GSR_feature_df.iloc[k[i][0]]
 52 |         target = all_df_y_valence.iloc[k[i][0]]
 53 |         test_x = GSR_feature_df.iloc[k[i][1]] 
 54 |         test_y = all_df_y_valence.iloc[k[i][1]]
 55 |         xgb_model  = xgb.XGBRegressor(max_depth=5,learning_rate=0.1,n_estimators=60,
 56 |                                       objective='reg:linear',booster='gbtree',n_jobs=10,
 57 |                                       subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,
 58 |                                       reg_alpha=0.1, reg_lambda=1.0,gamma=0)
 59 |         xgb_model.fit(data,target)
 60 |         test_predy = xgb_model.predict(test_x)
 61 |         MAE = mean_absolute_error(test_y,test_predy)
 62 |         MAE_sum = MAE + MAE_sum
 63 |         test_predy = pd.DataFrame(test_predy,columns=['y_pred'],index=test_y.index)
 64 |         df_predy = pd.concat([df_predy,test_predy],axis=0)
 65 |         print("[%d]MAE:%f"%(i,MAE))
 66 |     y_valence = pd.merge(y_valence,df_predy,how='outer',left_index=True,right_index=True)
 67 |     print("MAE_mean:%f"%(MAE_sum/5))
 68 |     y_valence['2C_pred_true']=0
 69 |     y_valence['2C_pred_true'][(y_valence['valence']>=5) & (y_valence['y_pred']>=5)] = 1  
 70 |     y_valence['2C_pred_true'][(y_valence['valence']<5) & (y_valence['y_pred']<5)] = 1
 71 |     accuracy = y_valence['2C_pred_true'].sum()/1280
 72 |     print("Accuracy:%f"%(accuracy))
 73 |     pickle.dump(y_valence,open("./dump_file/y_valence","wb"))
 74 | 
 75 | if False:
 76 |     print("##########唤醒度############")
 77 |     y_arousal = all_df_y[['arousal']].copy()
 78 |     df_predy = pd.DataFrame() #存放预测结果df
 79 |     MAE_sum = 0 #存放手动CV后的MAE和
 80 |     for i in range(0,5):
 81 |         data = GSR_feature_df.iloc[k[i][0]]
 82 |         target = all_df_y[['arousal']].iloc[k[i][0]]
 83 |         test_x = GSR_feature_df.iloc[k[i][1]] 
 84 |         test_y = all_df_y[['arousal']].iloc[k[i][1]]
 85 |         xgb_model  = xgb.XGBRegressor(max_depth=5,learning_rate=0.1,n_estimators=50,
 86 |                                       objective='reg:linear',booster='gbtree',n_jobs=10,
 87 |                                       subsample=0.90, colsample_bytree=0.90, colsample_bylevel=0.9,
 88 |                                       reg_alpha=0.1, reg_lambda=0.8,gamma=0)
 89 |         xgb_model.fit(data,target)
 90 |         test_predy = xgb_model.predict(test_x)
 91 |         MAE = mean_absolute_error(test_y,test_predy)
 92 |         MAE_sum = MAE + MAE_sum
 93 |         test_predy = pd.DataFrame(test_predy,columns=['y_pred'],index=test_y.index)
 94 |         df_predy = pd.concat([df_predy,test_predy],axis=0)
 95 |         print("[%d]MAE:%f"%(i,MAE))
 96 |     y_arousal = pd.merge(y_arousal,df_predy,how='outer',left_index=True,right_index=True)
 97 |     print("MAE_mean:%f"%(MAE_sum/5))
 98 |     y_arousal['2C_pred_true']=0
 99 |     y_arousal['2C_pred_true'][(y_arousal['arousal']>=5) & (y_arousal['y_pred']>=5)] = 1  
100 |     y_arousal['2C_pred_true'][(y_arousal['arousal']<5) & (y_arousal['y_pred']<5)] = 1
101 |     accuracy = y_arousal['2C_pred_true'].sum()/1280
102 |     print("Accuracy:%f"%(accuracy))
103 |     pickle.dump(y_arousal,open("./dump_file/y_arousal","wb"))
104 | 
105 | if False:
106 |     y_arousal_2c = all_df_y[['arousal']].copy() 
107 |     y_arousal_2c['2C'] = 0
108 |     y_arousal_2c['2C'][y_arousal_2c['arousal'] >= 5] = 1
109 |     df_predy = pd.DataFrame() #存放预测结果df
110 |     for i in range(0,5):
111 |         data = GSR_feature_df.iloc[k[i][0]]
112 |         target = y_arousal_2c['2C'].iloc[k[i][0]]
113 |         test_x = GSR_feature_df.iloc[k[i][1]] 
114 |         test_y = y_arousal_2c['2C'].iloc[k[i][1]]
115 |         xgb_model  = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50,
116 |                                       objective='binary:logistic',booster='gbtree',n_jobs=10,
117 |                                       subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,
118 |                                       reg_alpha=0.5, reg_lambda=1.0,gamma=0)
119 |         xgb_model.fit(data,target)
120 |         test_predy = xgb_model.predict(test_x)
121 |         ACC = accuracy_score(test_y,test_predy)
122 |         print("[%d]ACC:%f"%(i,ACC)) 
123 |         test_predy = pd.DataFrame(test_predy,columns=['y_pred_2c'],index=test_y.index)
124 |         df_predy = pd.concat([df_predy,test_predy],axis=0)
125 | #用来计算程序运行时间
126 | endtime = datetime.datetime.now()
127 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds)
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 


--------------------------------------------------------------------------------
/DEAP_linearR_plots.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import pandas as pd
  3 | import numpy as np
  4 | import pickle
  5 | import xgboost as xgb
  6 | from sklearn.linear_model import LinearRegression
  7 | from sklearn.linear_model import Lasso
  8 | from sklearn.linear_model import Ridge
  9 | from sklearn.neighbors import KNeighborsRegressor
 10 | from sklearn.svm import SVR
 11 | from sklearn.neural_network import MLPRegressor
 12 | from sklearn.preprocessing import MinMaxScaler
 13 | from sklearn import preprocessing
 14 | from sklearn.model_selection import train_test_split
 15 | from sklearn.model_selection import cross_val_score
 16 | from sklearn.model_selection import GridSearchCV
 17 | from sklearn.model_selection import KFold
 18 | from sklearn.metrics import mean_squared_error
 19 | from sklearn.metrics import mean_absolute_error
 20 | import warnings
 21 | warnings.filterwarnings("ignore")
 22 | #用来计算程序运行时间
 23 | import datetime
 24 | starttime = datetime.datetime.now()
 25 | 
 26 | #读取数据
 27 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb"))
 28 | all_df_y_valence = pickle.load(open("./dump_file/all_df_y_valence","rb"))
 29 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
 30 | all_df_y_2c = pickle.load(open("./dump_file/all_df_y_2c","rb"))
 31 | print("GSR_feature_df.shape:",GSR_feature_df.shape)
 32 | 
 33 | print("数据缩放处理，归一化处理")
 34 | min_max_scaler = MinMaxScaler()
 35 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df)
 36 | 
 37 | ##############################下面用交叉验证做愉悦度预测##################################
 38 | if False:
 39 |     print("----------------这是愉悦度预测----------------")
 40 |     data = GSR_feature_df
 41 |     target = all_df_y_valence
 42 |     
 43 |     print("######linear regression CV######")
 44 |     linearR_model = LinearRegression()
 45 |     linearR_scores = cross_val_score(linearR_model,data,target,cv=5,scoring='neg_mean_absolute_error')
 46 |     print("linearR_scores:",abs(linearR_scores))
 47 |     print("linearR_scores_mean:",abs(linearR_scores.mean()))
 48 |     
 49 |     print("######ridge model CV######")
 50 |     for alpha in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5]:
 51 |         ridge_model = Ridge(alpha=alpha)
 52 |         ridge_scores = cross_val_score(ridge_model,data,target,cv=5,scoring='neg_mean_absolute_error')
 53 |         print("alpha:%.1f->ridge_scores_mean:%f"%(alpha,abs(ridge_scores.mean())))
 54 |     
 55 |     print("######SVR model CV######")
 56 |     for c in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2.0,2.1,2.2,2.3,2.4,2.5,4.0]:
 57 |         svr_model =  SVR(C=c,kernel='rbf')
 58 |         svr_scores = cross_val_score(svr_model,data,target,cv=5,scoring='neg_mean_absolute_error')   
 59 |         print("c:%.1f->svr_scores_mean:%f"%(c,abs(svr_scores.mean())))
 60 |         
 61 |     print("######xgboost model CV######")
 62 |     for xgb_rounds in [20,30,40,50,60,70]:  
 63 |         xgb_model  = xgb.XGBRegressor(max_depth=5,learning_rate=0.1,n_estimators=xgb_rounds,
 64 |                                       objective='reg:linear',booster='gbtree',n_jobs=10,
 65 |                                       subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,
 66 |                                        reg_alpha=0.1, reg_lambda=0.8,gamma=1.0)
 67 |         xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error')
 68 |         print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean())))
 69 |     
 70 |     print("######xgboost classification model CV######")
 71 |     target = all_df_y_2c['emotion_2']
 72 |     for xgb_rounds in [40,50,60,70]:  
 73 |         xgb_model  = xgb.XGBClassifier(max_depth=7,learning_rate=0.1,n_estimators=xgb_rounds,
 74 |                                       objective='binary:logistic',booster='gbtree',n_jobs=10,
 75 |                                       subsample=0.9, colsample_bytree=0.90, colsample_bylevel=0.90,
 76 |                                        reg_alpha=0.1, reg_lambda=0.5,gamma=0)
 77 |         xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy')
 78 |         print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean())))
 79 | 
 80 | #############################下面用唤醒度做回归#################################
 81 | if True:
 82 |     print("----------------这是唤醒度预测----------------")
 83 |     data = GSR_feature_df
 84 |     target = all_df_y[['arousal']]
 85 |     '''    
 86 |     print("######linear regression CV######")
 87 |     linearR_model = LinearRegression()
 88 |     linearR_scores = cross_val_score(linearR_model,data,target,cv=5,scoring='neg_mean_absolute_error')
 89 |     print("linearR_scores:",abs(linearR_scores))
 90 |     print("linearR_scores_mean:",abs(linearR_scores.mean()))
 91 |     
 92 |     print("######ridge model CV######")
 93 |     for alpha in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5]:
 94 |         ridge_model = Ridge(alpha=alpha)
 95 |         ridge_scores = cross_val_score(ridge_model,data,target,cv=5,scoring='neg_mean_absolute_error')
 96 |         print("alpha:%.1f->ridge_scores_mean:%f"%(alpha,abs(ridge_scores.mean())))
 97 |         
 98 |     print("######SVR model CV######")
 99 |     for c in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5]:
100 |         svr_model =  SVR(C=c,kernel='rbf')
101 |         svr_scores = cross_val_score(svr_model,data,target,cv=5,scoring='neg_mean_absolute_error')   
102 |         print("c:%.1f->svr_scores_mean:%f"%(c,abs(svr_scores.mean())))
103 |     ''' 
104 |     print("######xgboost regression model CV######")
105 |     for xgb_rounds in [40,50,60,70]:  
106 |         xgb_model  = xgb.XGBRegressor(max_depth=7,learning_rate=0.1,n_estimators=xgb_rounds,
107 |                                       objective='reg:linear',booster='gblinear',n_jobs=10,
108 |                                       subsample=0.9, colsample_bytree=0.90, colsample_bylevel=0.90,
109 |                                        reg_alpha=0.1, reg_lambda=0.5,gamma=0)
110 |         xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error')
111 |         print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean())))
112 |     
113 |     '''
114 |     print("######KNN regression model CV######")
115 |     for knn_neighbors in [3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]:
116 |         knn_model = KNeighborsRegressor(n_neighbors=knn_neighbors)
117 |         knn_scores = cross_val_score(knn_model,data,target,cv=5,scoring='neg_mean_absolute_error')
118 |         print("knn_neighbors:%d->knn_scores_mean:%f"%(knn_neighbors,abs(knn_scores.mean())))
119 |            
120 |     print("######MLP regression model CV######")
121 |     for mlp_alpha in [0.1,0.01,0.001,0.0001]:
122 |         mlp_model = MLPRegressor(hidden_layer_sizes=(1000, ),alpha=mlp_alpha)
123 |         mlp_scores = cross_val_score(mlp_model,data,target,cv=5,scoring='neg_mean_absolute_error',n_jobs=1)
124 |         print("mlp_alpha:%f->mlp_scores_mean:%f"%(mlp_alpha,abs(mlp_scores.mean())))
125 |     '''
126 |     
127 | #用来计算程序运行时间
128 | endtime = datetime.datetime.now()
129 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds)
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 


--------------------------------------------------------------------------------
/XGB_4emotion.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Aug  2 18:53:45 2018
  4 | 直接对4个象限的情绪结果做预测，因为皮肤电对唤醒度的预测没啥效果
  5 | @author: jinyu
  6 | """
  7 | import pandas as pd
  8 | import numpy as np
  9 | import pickle
 10 | import random
 11 | import xgboost as xgb
 12 | from sklearn.preprocessing import MinMaxScaler
 13 | from sklearn import preprocessing
 14 | from sklearn.model_selection import cross_val_score
 15 | from sklearn.model_selection import cross_val_predict
 16 | from sklearn.metrics import accuracy_score
 17 | import warnings
 18 | warnings.filterwarnings("ignore")
 19 | #用来计算程序运行时间
 20 | import datetime
 21 | starttime = datetime.datetime.now()
 22 | 
 23 | #读取数据
 24 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb"))
 25 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
 26 | print("GSR_feature_df.shape:",GSR_feature_df.shape)
 27 | 
 28 | print("数据缩放处理，归一化处理")
 29 | min_max_scaler = MinMaxScaler()
 30 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df)
 31 | 
 32 | ###############################################################################
 33 | if False: #计算四个象限的情绪
 34 |     print("把连续的唤醒度和愉悦度转化为离散的二分类值（4个象限对应四种情绪）")
 35 |     print("---------happy emotion----------")
 36 |     df_result = all_df_y
 37 |     a = df_result[df_result.valence>=5].index
 38 |     b = df_result[df_result.arousal>=5].index
 39 |     happy_index = [val for val in a if val in b]
 40 |     print("len(happy_index)=",len(happy_index)) 
 41 |     df_result['happy'] = -1
 42 |     for i in happy_index:
 43 |         df_result['happy'].loc[i] = 1
 44 |     print("---------sad emotion----------")
 45 |     df_result = all_df_y
 46 |     a = df_result[df_result.valence<=5].index
 47 |     b = df_result[df_result.arousal<=5].index
 48 |     sad_index = [val for val in a if val in b]
 49 |     print("len(sad_index)=",len(sad_index)) 
 50 |     df_result['sad'] = -1
 51 |     for i in sad_index:
 52 |         df_result['sad'].loc[i] = 1
 53 |     print("---------nervous emotion----------")
 54 |     df_result = all_df_y
 55 |     a = df_result[df_result.valence<5].index
 56 |     b = df_result[df_result.arousal>5].index
 57 |     nervous_index = [val for val in a if val in b]
 58 |     print("len(nervous_index)=",len(nervous_index)) 
 59 |     df_result['nervous'] = -1
 60 |     for i in nervous_index:
 61 |         df_result['nervous'].loc[i] = 1
 62 |     print("---------calm emotion----------")
 63 |     df_result = all_df_y
 64 |     a = df_result[df_result.valence>5].index
 65 |     b = df_result[df_result.arousal<5].index
 66 |     calm_index = [val for val in a if val in b]
 67 |     print("len(calm_index)=",len(calm_index)) 
 68 |     df_result['calm'] = -1
 69 |     for i in calm_index:
 70 |         df_result['calm'].loc[i] = 1
 71 |     print("四个情绪划分结果dump处理")
 72 |     pickle.dump(df_result,open("./dump_file/df_result","wb"))
 73 | else:
 74 |     print("读取四种情绪的dump文件")
 75 |     df_result = pickle.load(open("./dump_file/df_result","rb"))    
 76 | ###############################################################################
 77 | def count_accuracy(ser1,ser2):
 78 |     sum_all = len(ser1)
 79 |     tmp = ser1==ser2
 80 |     sum_acc= len(tmp[tmp==True])
 81 |     return sum_acc/sum_all
 82 | if True:
 83 |     print("----------------‘happy’情绪预测----------------")
 84 |     data = GSR_feature_df
 85 |     target = df_result[['happy']]       
 86 |     print("######xgboost model CV######")
 87 |     for xgb_rounds in [50]:  
 88 |         xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50,
 89 |                                       objective='binary:logistic',booster='gbtree',n_jobs=-1,
 90 |                                       subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,
 91 |                                       reg_alpha=0.5, reg_lambda=1.0,gamma=0,
 92 |                                       scale_pos_weight=1)
 93 |         xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy')
 94 |         print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean())))
 95 |         xgb_pred_happy = cross_val_predict(xgb_model,data,target,cv=5)
 96 |         happy_acc = accuracy_score(xgb_pred_happy,df_result['happy'])
 97 |         print("happy_acc:",happy_acc)
 98 | 
 99 | if True:
100 |     print("----------------‘sad’情绪预测----------------")
101 |     data = GSR_feature_df
102 |     target = df_result[['sad']]       
103 |     print("######xgboost model CV######")
104 |     for xgb_rounds in [50]:  
105 |         xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50,
106 |                                       objective='binary:logistic',booster='gbtree',n_jobs=-1,
107 |                                       subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,
108 |                                       reg_alpha=0.5, reg_lambda=1.0,gamma=0,
109 |                                       scale_pos_weight=1)
110 |         xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy')
111 |         print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean())))
112 |         xgb_pred_sad = cross_val_predict(xgb_model,data,target,cv=5)
113 |         sad_acc = accuracy_score(xgb_pred_sad,df_result['sad'])
114 |         print("sad_acc:",sad_acc)      
115 | 
116 | if True:
117 |     print("----------------‘nervous’情绪预测----------------")
118 |     data = GSR_feature_df
119 |     target = df_result[['nervous']]       
120 |     print("######xgboost model CV######")
121 |     for xgb_rounds in [50]:  
122 |         xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50,
123 |                                       objective='binary:logistic',booster='gbtree',n_jobs=-1,
124 |                                       subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,
125 |                                       reg_alpha=0.5, reg_lambda=1.0,gamma=0,
126 |                                       scale_pos_weight=1)
127 |         xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy')
128 |         print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean())))
129 |         xgb_pred_nervous = cross_val_predict(xgb_model,data,target,cv=5)
130 |         nervous_acc = accuracy_score(xgb_pred_nervous,df_result['nervous'])
131 |         print("nervous_acc:",nervous_acc)
132 | 
133 | if True:
134 |     print("----------------‘calm’情绪预测----------------")
135 |     data = GSR_feature_df
136 |     target = df_result[['calm']]       
137 |     print("######xgboost model CV######")
138 |     for xgb_rounds in [50]:  
139 |         xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50,
140 |                                       objective='binary:logistic',booster='gbtree',n_jobs=-1,
141 |                                       subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,
142 |                                       reg_alpha=0.5, reg_lambda=1.0,gamma=0,
143 |                                       scale_pos_weight=1)
144 |         xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy')
145 |         print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean())))
146 |         xgb_pred_calm = cross_val_predict(xgb_model,data,target,cv=5)
147 |         calm_acc = accuracy_score(xgb_pred_calm,df_result['calm'])
148 |         print("calm_acc:",calm_acc)
149 | 
150 | #用来计算程序运行时间
151 | endtime = datetime.datetime.now()
152 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds)  


--------------------------------------------------------------------------------
/DEAP_linearR.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import pandas as pd
  3 | import numpy as np
  4 | import pickle
  5 | import xgboost as xgb
  6 | from sklearn.linear_model import LinearRegression
  7 | from sklearn.linear_model import Lasso
  8 | from sklearn.linear_model import Ridge
  9 | from sklearn.svm import SVR
 10 | from sklearn.preprocessing import MinMaxScaler
 11 | from sklearn import preprocessing
 12 | from sklearn.model_selection import train_test_split
 13 | from sklearn.model_selection import cross_val_score
 14 | from sklearn.model_selection import GridSearchCV
 15 | from sklearn.metrics import mean_squared_error
 16 | from sklearn.metrics import mean_absolute_error
 17 | import warnings
 18 | warnings.filterwarnings("ignore")
 19 | #用来计算程序运行时间
 20 | import datetime
 21 | starttime = datetime.datetime.now()
 22 | 
 23 | #读取数据
 24 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb"))
 25 | all_df_y_valence = pickle.load(open("./dump_file/all_df_y_valence","rb"))
 26 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
 27 | print("GSR_feature_df.shape:",GSR_feature_df.shape)
 28 | 
 29 | #print("数据缩放处理，具有零均值和单位方差")
 30 | #stdScaler = preprocessing.StandardScaler()
 31 | #stdScaler.fit(GSR_feature_df)
 32 | #stdScaler.transform(GSR_feature_df)
 33 | #print("mean:\n{}".format(GSR_feature_df.mean(axis=0)))
 34 | #print("std:\n{}".format(GSR_feature_df.std(axis=0)))
 35 | 
 36 | print("数据缩放处理，归一化处理")
 37 | min_max_scaler = MinMaxScaler()
 38 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df)
 39 | 
 40 | '''
 41 | print("----------------愉悦度不使用交叉验证----------------")
 42 | train_X,test_X,train_Y,test_Y = \
 43 |     train_test_split(GSR_feature_df,all_df_y_valence,test_size=0.2,random_state=1000)
 44 | print("######linear regression######")
 45 | linearR_model = LinearRegression()
 46 | linearR_model.fit(train_X,train_Y)
 47 | linear_pred_Y = linearR_model.predict(test_X)
 48 | df_linear_pred_Y = pd.DataFrame(linear_pred_Y,columns=['valence'])
 49 | mse = mean_squared_error(linear_pred_Y,test_Y)
 50 | print("mse=",mse)
 51 | mae = mean_absolute_error(linear_pred_Y,test_Y)
 52 | print("mae=",mae)
 53 | 
 54 | print("######lasso model######")
 55 | lasso_model = Lasso(alpha=0.1)
 56 | lasso_model.fit(train_X,train_Y)
 57 | lasso_pred_Y = lasso_model.predict(test_X)
 58 | df_lasso_pred_Y = pd.DataFrame(lasso_pred_Y,columns=['valence'])
 59 | mse = mean_squared_error(lasso_pred_Y,test_Y)
 60 | print("mse=",mse)
 61 | mae = mean_absolute_error(lasso_pred_Y,test_Y)
 62 | print("mae=",mae)
 63 | 
 64 | print("######ridge model######")
 65 | ridge_model = Ridge(alpha=0.1)
 66 | ridge_model.fit(train_X,train_Y)
 67 | ridge_pred_Y = ridge_model.predict(test_X)
 68 | df_ridge_pred_Y = pd.DataFrame(ridge_pred_Y,columns=['valence'])
 69 | mse = mean_squared_error(ridge_pred_Y,test_Y)
 70 | print("mse=",mse)
 71 | mae = mean_absolute_error(ridge_pred_Y,test_Y)
 72 | print("mae=",mae)
 73 | 
 74 | print("######xgb(gbtree) regression model######")
 75 | dtrain = xgb.DMatrix(train_X,train_Y)      
 76 | dtest = xgb.DMatrix(test_X,test_Y)     
 77 | xgb_params = {
 78 |     'booster': 'gbtree',
 79 | 
 80 |     'eta': 0.1,
 81 | 
 82 |     'max_depth': 7,
 83 | 
 84 |     'objective': 'reg:linear',
 85 | 
 86 |     'eval_metric': 'mae',
 87 |     
 88 |     'colsample_bytree': 0.90,
 89 |     
 90 |     'alpha':0.6,
 91 |     
 92 |     'gamma':1,
 93 | 
 94 |     'silent':0,
 95 | }
 96 | watchlist = [(dtrain, 'train'), (dtest, 'test')]   
 97 | num_rounds = 50
 98 | #True 会使用watchlist
 99 | if False:   
100 |     xgb_reg_model=xgb.train(xgb_params,dtrain,num_rounds,evals=watchlist)
101 | else:
102 |     xgb_reg_model=xgb.train(xgb_params,dtrain,num_rounds)
103 | xgb_pred_Y = xgb_reg_model.predict(dtest)
104 | mse = mean_squared_error(xgb_pred_Y,test_Y)
105 | print("mse=",mse)
106 | mae = mean_absolute_error(xgb_pred_Y,test_Y)
107 | print("mae=",mae)
108 | 
109 | pickle.dump(xgb_pred_Y,open("./dump_file/xgb_pred_Y","wb"))
110 | pickle.dump(test_Y,open("./dump_file/test_Y","wb"))
111 | '''
112 | 
113 | ##############################下面用交叉验证做##################################
114 | print("----------------这是愉悦度预测----------------")
115 | data = GSR_feature_df
116 | target = all_df_y_valence
117 | print("######linear regression CV######")
118 | linearR_model = LinearRegression()
119 | linearR_scores = cross_val_score(linearR_model,data,target,cv=5,scoring='neg_mean_absolute_error')
120 | print("linearR_scores:",abs(linearR_scores))
121 | print("linearR_scores_mean:",abs(linearR_scores.mean()))
122 | 
123 | print("######lasso model CV######")
124 | lasso_model = Lasso(alpha=0.1)
125 | lasso_scores = cross_val_score(lasso_model,data,target,cv=5,scoring='neg_mean_absolute_error')
126 | print("lasso_scores:",abs(lasso_scores))
127 | print("lasso_scores_mean:",abs(lasso_scores.mean()))
128 | 
129 | print("######ridge model CV######")
130 | ridge_model = Ridge(alpha=0.1)
131 | ridge_scores = cross_val_score(ridge_model,data,target,cv=5,scoring='neg_mean_absolute_error')
132 | print("ridge_scores:",abs(ridge_scores))
133 | print("ridge_scores_mean:",abs(ridge_scores.mean()))
134 | 
135 | print("######xgboost model CV######")
136 | xgb_model  = xgb.XGBRegressor(max_depth=6,learning_rate=0.1,n_estimators=50,
137 |                               objective='reg:linear',booster='gbtree',n_jobs=10,
138 |                               subsample=1, colsample_bytree=0.9, colsample_bylevel=1,
139 |                                reg_alpha=1.0, reg_lambda=1,gamma=1.0)
140 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error')
141 | print("xgb_scores:",abs(xgb_scores))
142 | print("xgb_scores_mean:",abs(xgb_scores.mean()))
143 | #############################下面用唤醒度做回归#################################
144 | print("----------------这是唤醒度预测----------------")
145 | data = GSR_feature_df
146 | target = all_df_y[['arousal']]
147 | print("######linear regression CV######")
148 | linearR_model = LinearRegression()
149 | linearR_scores = cross_val_score(linearR_model,data,target,cv=5,scoring='neg_mean_absolute_error')
150 | print("linearR_scores:",abs(linearR_scores))
151 | print("linearR_scores_mean:",abs(linearR_scores.mean()))
152 | 
153 | print("######lasso model CV######")
154 | lasso_model = Lasso(alpha=0.1)
155 | lasso_scores = cross_val_score(lasso_model,data,target,cv=5,scoring='neg_mean_absolute_error')
156 | print("lasso_scores:",abs(lasso_scores))
157 | print("lasso_scores_mean:",abs(lasso_scores.mean()))
158 | 
159 | print("######ridge model CV######")
160 | ridge_model = Ridge(alpha=0.1)
161 | ridge_scores = cross_val_score(ridge_model,data,target,cv=5,scoring='neg_mean_absolute_error')
162 | print("ridge_scores:",abs(ridge_scores))
163 | print("ridge_scores_mean:",abs(ridge_scores.mean()))
164 | '''
165 | print("######SVR model CV######")
166 | svr_model =  SVR()
167 | svr_scores = cross_val_score(svr_model,data,target,cv=5,scoring='neg_mean_absolute_error')   
168 | print("svr_scores:",svr_scores)
169 | print("svr_scores_mean:",svr_scores.mean())
170 | '''
171 | print("######GridSearchCV######")
172 | param_grid = {'alpha':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]}
173 | lasso_model = Lasso()
174 | gsearch = GridSearchCV(lasso_model,param_grid,cv=5)
175 | gsearch.fit(data,target)
176 | 
177 | #用来计算程序运行时间
178 | endtime = datetime.datetime.now()
179 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds)
180 | 
181 | 
182 | 
183 | 
184 | 
185 | 
186 | 
187 | 
188 | 
189 | 
190 | 
191 | 
192 | 
193 | 
194 | 
195 | 
196 | 
197 | 
198 | 
199 | 
200 | 
201 | 
202 | 
203 | 
204 | 


--------------------------------------------------------------------------------
/XGB_2cEEG.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# -*- coding: utf-8 -*-\n",
 10 |     "import pandas as pd\n",
 11 |     "import numpy as np\n",
 12 |     "import pickle\n",
 13 |     "from sklearn.preprocessing import MinMaxScaler\n",
 14 |     "from sklearn import preprocessing\n",
 15 |     "from sklearn.model_selection import cross_val_score\n",
 16 |     "from sklearn.metrics import mean_absolute_error\n",
 17 |     "from sklearn.naive_bayes import GaussianNB\n",
 18 |     "from sklearn.neighbors import KNeighborsClassifier\n",
 19 |     "from sklearn.neural_network import MLPClassifier\n",
 20 |     "from sklearn.metrics import accuracy_score\n",
 21 |     "from sklearn.metrics import f1_score\n",
 22 |     "from sklearn.metrics import precision_score\n",
 23 |     "from sklearn.metrics import recall_score\n",
 24 |     "import xgboost as xgb \n",
 25 |     "import warnings\n",
 26 |     "warnings.filterwarnings(\"ignore\")\n",
 27 |     "#用来计算程序运行时间\n",
 28 |     "import datetime\n",
 29 |     "starttime = datetime.datetime.now()"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "name": "stdout",
 39 |      "output_type": "stream",
 40 |      "text": [
 41 |       "######读取特征（基于EEG）######\n",
 42 |       "eegFeatureDF.shape: (1280, 1440)\n"
 43 |      ]
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "print(\"######读取特征（基于EEG）######\")\n",
 48 |     "#EEG特征表\n",
 49 |     "eegFeatureDF = pickle.load(open(\"./dump_file/eegFeatureDF\",\"rb\"))\n",
 50 |     "\n",
 51 |     "if True:\n",
 52 |     "    #加上早期的统计特征\n",
 53 |     "    for eegCH in range(1,33,1):\n",
 54 |     "        file_path = \"./dump_file/CH{}_eeg_feat_df\".format(eegCH)\n",
 55 |     "        tmpDF = pickle.load(open(file_path,\"rb\"))\n",
 56 |     "        eegFeatureDF = pd.concat([eegFeatureDF,tmpDF],axis=1)\n",
 57 |     "if True:#加上样本熵特征\n",
 58 |     "    filePath = \"./dump_file_sampEn/sampEnFeatures\".format(eegCH)\n",
 59 |     "    tmpDF = pickle.load(open(filePath,\"rb\"))\n",
 60 |     "    eegFeatureDF = pd.concat([eegFeatureDF,tmpDF],axis=1)\n",
 61 |     "if False:\n",
 62 |     "    #GSR特征表\n",
 63 |     "    GSR_feature_df = pickle.load(open(\"./dump_file/df_feat_selected\",\"rb\"))\n",
 64 |     "    eegFeatureDF = pd.concat([eegFeatureDF,GSR_feature_df],axis=1)\n",
 65 |     "\n",
 66 |     "#总的特征向量样本大小\n",
 67 |     "print(\"eegFeatureDF.shape:\",eegFeatureDF.shape)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 3,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "if False:\n",
 77 |     "    print(\"######数据缩放处理，归一化处理######\")\n",
 78 |     "    min_max_scaler = MinMaxScaler()\n",
 79 |     "    eegFeatureDF = min_max_scaler.fit_transform(eegFeatureDF)\n"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 4,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "#featureDF = pd.concat([eegFeatureDF,GSR_feature_df],axis=1)\n",
 89 |     "#featureDF = GSR_feature_df\n",
 90 |     "featureDF = eegFeatureDF"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 5,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "#############################下面用愉悦度（效价）做分类#################################\n",
100 |     "if False:\n",
101 |     "    print(\"----------------这是高低愉悦度度二分类预测----------------\")\n",
102 |     "    data = featureDF\n",
103 |     "    target = all_df_y_2c #高低愉悦度\n",
104 |     "    \n",
105 |     "    print(\"######xgb classification CV######\")\n",
106 |     "    xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50,\n",
107 |     "                                      objective='binary:logistic',booster='gbtree',n_jobs=10,\n",
108 |     "                                      subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,\n",
109 |     "                                      reg_alpha=0.5, reg_lambda=1.0,gamma=0,\n",
110 |     "                                      scale_pos_weight=1)\n",
111 |     "    xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy')\n",
112 |     "    print(\"xgb_scores:\",abs(xgb_scores))\n",
113 |     "    print(\"xgb_scores_mean:\",abs(xgb_scores.mean()))\n"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {},
120 |    "outputs": [
121 |     {
122 |      "name": "stdout",
123 |      "output_type": "stream",
124 |      "text": [
125 |       "       valence  arousal  dominance  liking  2cArousal  2cValence\n",
126 |       "s01_0     7.71     7.60       6.90    7.83          1          1\n",
127 |       "s01_1     8.10     7.31       7.28    8.47          1          1\n",
128 |       "s01_2     8.58     7.54       9.00    7.08          1          1\n",
129 |       "s01_3     4.94     6.01       6.12    8.06          0          0\n",
130 |       "s01_4     6.96     3.92       7.19    6.05          1          1\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n",
136 |     "all_df_y['2cArousal'] = 0\n",
137 |     "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n",
138 |     "all_df_y['2cValence'] = 0\n",
139 |     "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n",
140 |     "print(all_df_y.head(5))\n",
141 |     "y = all_df_y[[\"2cValence\"]]\n",
142 |     "xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx\",\"rb\"))\n",
143 |     "xTestIdx = pickle.load(open(\"./dump_file/xTestIdx\",\"rb\"))\n",
144 |     "trainY = y.loc[xTrainIdx]\n",
145 |     "testY = y.loc[xTestIdx]\n",
146 |     "trainX = eegFeatureDF.loc[xTrainIdx]\n",
147 |     "testX = eegFeatureDF.loc[xTestIdx]"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 7,
153 |    "metadata": {},
154 |    "outputs": [
155 |     {
156 |      "name": "stdout",
157 |      "output_type": "stream",
158 |      "text": [
159 |       "######xgb classification ######\n",
160 |       "ACC 0.6536458333333334\n",
161 |       "F1 0.712742980561555\n",
162 |       "Recal 0.7603686635944701\n",
163 |       "Precision 0.6707317073170732\n"
164 |      ]
165 |     }
166 |    ],
167 |    "source": [
168 |     "if True:\n",
169 |     "    data = featureDF\n",
170 |     "    target = y\n",
171 |     "    print(\"######xgb classification ######\")\n",
172 |     "    xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50,\n",
173 |     "                                      objective='binary:logistic',booster='gbtree',n_jobs=10,\n",
174 |     "                                      subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,\n",
175 |     "                                      reg_alpha=0.5, reg_lambda=1.0,gamma=0,\n",
176 |     "                                      scale_pos_weight=1)\n",
177 |     "    xgb_model.fit(trainX.values,trainY.values)\n",
178 |     "    predY = xgb_model.predict(testX.values)\n",
179 |     "    print(\"ACC\",accuracy_score(y_true=testY, y_pred=predY))\n",
180 |     "    print(\"F1\",f1_score(y_true=testY, y_pred=predY))\n",
181 |     "    print(\"Recal\",recall_score(y_true=testY,y_pred=predY))\n",
182 |     "    print(\"Precision\",precision_score(y_true=testY, y_pred=predY))"
183 |    ]
184 |   }
185 |  ],
186 |  "metadata": {
187 |   "kernelspec": {
188 |    "display_name": "Python 3",
189 |    "language": "python",
190 |    "name": "python3"
191 |   },
192 |   "language_info": {
193 |    "codemirror_mode": {
194 |     "name": "ipython",
195 |     "version": 3
196 |    },
197 |    "file_extension": ".py",
198 |    "mimetype": "text/x-python",
199 |    "name": "python",
200 |    "nbconvert_exporter": "python",
201 |    "pygments_lexer": "ipython3",
202 |    "version": "3.6.5"
203 |   }
204 |  },
205 |  "nbformat": 4,
206 |  "nbformat_minor": 2
207 | }
208 | 


--------------------------------------------------------------------------------
/XGB_2c.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import multiprocessing\n",
 10 |     "import xgboost as xgb\n",
 11 |     "import pandas as pd\n",
 12 |     "import numpy as np\n",
 13 |     "import pickle \n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "from sklearn.model_selection import train_test_split\n",
 16 |     "from sklearn.model_selection import StratifiedKFold\n",
 17 |     "from sklearn.preprocessing import MinMaxScaler\n",
 18 |     "from sklearn.metrics import accuracy_score\n",
 19 |     "from sklearn.metrics import f1_score\n",
 20 |     "from sklearn.metrics import precision_score\n",
 21 |     "from sklearn.metrics import recall_score\n",
 22 |     "from sklearn.preprocessing import Imputer\n",
 23 |     "import warnings\n",
 24 |     "warnings.filterwarnings(\"ignore\")\n",
 25 |     "#用来计算程序运行时间\n",
 26 |     "import datetime\n",
 27 |     "starttime = datetime.datetime.now()"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 2,
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "name": "stdout",
 37 |      "output_type": "stream",
 38 |      "text": [
 39 |       "       valence  arousal  dominance  liking  2cArousal  2cValence\n",
 40 |       "s01_0     7.71     7.60       6.90    7.83          1          1\n",
 41 |       "s01_1     8.10     7.31       7.28    8.47          1          1\n",
 42 |       "s01_2     8.58     7.54       9.00    7.08          1          1\n",
 43 |       "s01_3     4.94     6.01       6.12    8.06          0          0\n",
 44 |       "s01_4     6.96     3.92       7.19    6.05          1          1\n",
 45 |       "######读取特征（基于EEG）######\n"
 46 |      ]
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "#读取Y\n",
 51 |     "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n",
 52 |     "all_df_y['2cArousal'] = 0\n",
 53 |     "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n",
 54 |     "all_df_y['2cValence'] = 0\n",
 55 |     "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n",
 56 |     "print(all_df_y.head(5))\n",
 57 |     "y = all_df_y[[\"2cValence\"]]\n",
 58 |     "\n",
 59 |     "print(\"######读取特征（基于EEG）######\")\n",
 60 |     "#EEG特征表\n",
 61 |     "eegFeatureDF = pickle.load(open(\"./dump_file/eegFeatureDF\",\"rb\"))\n",
 62 |     "\n",
 63 |     "if True:\n",
 64 |     "    #加上早期的统计特征\n",
 65 |     "    for eegCH in range(1,33,1):\n",
 66 |     "        file_path = \"./dump_file/CH{}_eeg_feat_df\".format(eegCH)\n",
 67 |     "        tmpDF = pickle.load(open(file_path,\"rb\"))\n",
 68 |     "        eegFeatureDF = pd.concat([eegFeatureDF,tmpDF],axis=1)\n",
 69 |     "if True:#加上样本熵特征\n",
 70 |     "    filePath = \"./dump_file_sampEn/sampEnFeatures\".format(eegCH)\n",
 71 |     "    tmpDF = pickle.load(open(filePath,\"rb\"))\n",
 72 |     "    eegFeatureDF = pd.concat([eegFeatureDF,tmpDF],axis=1)"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 7,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "name": "stdout",
 82 |      "output_type": "stream",
 83 |      "text": [
 84 |       "[seed:0]****************************************************\n",
 85 |       "######xgb classification ######\n",
 86 |       "ACC 0.6536458333333334\n",
 87 |       "F1 0.712742980561555\n",
 88 |       "Recal 0.7603686635944701\n",
 89 |       "Precision 0.6707317073170732\n",
 90 |       "[seed:100]****************************************************\n",
 91 |       "######xgb classification ######\n",
 92 |       "ACC 0.6302083333333334\n",
 93 |       "F1 0.7029288702928871\n",
 94 |       "Recal 0.7741935483870968\n",
 95 |       "Precision 0.6436781609195402\n",
 96 |       "[seed:200]****************************************************\n",
 97 |       "######xgb classification ######\n",
 98 |       "ACC 0.6119791666666666\n",
 99 |       "F1 0.6823027718550106\n",
100 |       "Recal 0.7373271889400922\n",
101 |       "Precision 0.6349206349206349\n",
102 |       "[seed:300]****************************************************\n",
103 |       "######xgb classification ######\n",
104 |       "ACC 0.6692708333333334\n",
105 |       "F1 0.7315010570824525\n",
106 |       "Recal 0.7972350230414746\n",
107 |       "Precision 0.67578125\n",
108 |       "[seed:400]****************************************************\n",
109 |       "######xgb classification ######\n",
110 |       "ACC 0.6223958333333334\n",
111 |       "F1 0.7034764826175869\n",
112 |       "Recal 0.7926267281105991\n",
113 |       "Precision 0.6323529411764706\n",
114 |       "[seed:500]****************************************************\n",
115 |       "######xgb classification ######\n",
116 |       "ACC 0.6354166666666666\n",
117 |       "F1 0.6956521739130435\n",
118 |       "Recal 0.7373271889400922\n",
119 |       "Precision 0.6584362139917695\n",
120 |       "[seed:600]****************************************************\n",
121 |       "######xgb classification ######\n",
122 |       "ACC 0.6432291666666666\n",
123 |       "F1 0.7103594080338267\n",
124 |       "Recal 0.7741935483870968\n",
125 |       "Precision 0.65625\n",
126 |       "[seed:700]****************************************************\n",
127 |       "######xgb classification ######\n",
128 |       "ACC 0.6458333333333334\n",
129 |       "F1 0.7043478260869565\n",
130 |       "Recal 0.7465437788018433\n",
131 |       "Precision 0.6666666666666666\n",
132 |       "[seed:800]****************************************************\n",
133 |       "######xgb classification ######\n",
134 |       "ACC 0.6276041666666666\n",
135 |       "F1 0.6963906581740976\n",
136 |       "Recal 0.7557603686635944\n",
137 |       "Precision 0.6456692913385826\n",
138 |       "[seed:900]****************************************************\n",
139 |       "######xgb classification ######\n",
140 |       "ACC 0.6666666666666666\n",
141 |       "F1 0.7241379310344828\n",
142 |       "Recal 0.7741935483870968\n",
143 |       "Precision 0.680161943319838\n"
144 |      ]
145 |     }
146 |    ],
147 |    "source": [
148 |     "for seed in [0,100,200,300,400,500,600,700,800,900]:\n",
149 |     "    print(\"[seed:{}]****************************************************\".format(seed))\n",
150 |     "    xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx_{}\".format(seed),\"rb\"))\n",
151 |     "    xTestIdx = pickle.load(open(\"./dump_file/xTestIdx_{}\".format(seed),\"rb\"))\n",
152 |     "    trainY = y.loc[xTrainIdx]\n",
153 |     "    testY = y.loc[xTestIdx]\n",
154 |     "    trainX = eegFeatureDF.loc[xTrainIdx]\n",
155 |     "    testX = eegFeatureDF.loc[xTestIdx]    \n",
156 |     "    data = trainX\n",
157 |     "    target = trainY   \n",
158 |     "    print(\"######xgb classification ######\")\n",
159 |     "    xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50,\n",
160 |     "                                      objective='binary:logistic',booster='gbtree',n_jobs=10,\n",
161 |     "                                      subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,\n",
162 |     "                                      reg_alpha=0.5, reg_lambda=1.0,gamma=0,\n",
163 |     "                                      scale_pos_weight=1)\n",
164 |     "    xgb_model.fit(trainX.values,trainY.values)\n",
165 |     "    predY = xgb_model.predict(testX.values)\n",
166 |     "    print(\"ACC\",accuracy_score(y_true=testY, y_pred=predY))\n",
167 |     "    print(\"F1\",f1_score(y_true=testY, y_pred=predY))\n",
168 |     "    print(\"Recal\",recall_score(y_true=testY,y_pred=predY))\n",
169 |     "    print(\"Precision\",precision_score(y_true=testY, y_pred=predY))"
170 |    ]
171 |   }
172 |  ],
173 |  "metadata": {
174 |   "kernelspec": {
175 |    "display_name": "Python 3",
176 |    "language": "python",
177 |    "name": "python3"
178 |   },
179 |   "language_info": {
180 |    "codemirror_mode": {
181 |     "name": "ipython",
182 |     "version": 3
183 |    },
184 |    "file_extension": ".py",
185 |    "mimetype": "text/x-python",
186 |    "name": "python",
187 |    "nbconvert_exporter": "python",
188 |    "pygments_lexer": "ipython3",
189 |    "version": "3.6.5"
190 |   }
191 |  },
192 |  "nbformat": 4,
193 |  "nbformat_minor": 2
194 | }
195 | 


--------------------------------------------------------------------------------
/plot_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import matplotlib.pyplot as plt
  3 | plt.rcParams['font.sans-serif']=['SimHei']
  4 | plt.rcParams['axes.unicode_minus']=False
  5 | 
  6 | from mpl_toolkits.mplot3d import Axes3D
  7 | import numpy as np
  8 | import pickle 
  9 | import pandas as pd
 10 | 
 11 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
 12 | xgb_pred_Y = pickle.load(open("./dump_file/xgb_pred_Y","rb"))
 13 | test_Y = pickle.load(open("./dump_file/test_Y","rb"))
 14 | y_valence = pickle.load(open("./dump_file/y_valence","rb"))
 15 | y_arousal = pickle.load(open("./dump_file/y_arousal","rb"))
 16 | #画图DPI设定
 17 | DPI_SET = 100
 18 | 
 19 | if True:
 20 |     #########################愉悦度
 21 |     X = [1,2,3,4,5,6,7,8,9,10]
 22 |     Y_xgb = [1.675444,1.675363,1.680978,1.685009,1.691554,
 23 |              1.675277,1.674311,1.680342,1.691216,1.707188]
 24 |     Y_svr = [1.785470,1.785573,1.785693,1.785803,1.785853,
 25 |              1.785954,1.786118,1.786360,1.786597,1.786775]
 26 |     Y_ridge = [1.789497,1.790049,1.791245,1.792133,1.792958,1.793474,1.793847,
 27 |                1.794242,1.794706,1.795205]
 28 |     Y_linear = [1.76594288635,1.76594288635,1.76594288635,1.76594288635,1.76594288635,
 29 |                 1.76594288635,1.76594288635,1.76594288635,1.76594288635,1.76594288635,]
 30 |     Y_xgb.sort()
 31 |     Y_svr.sort()
 32 |     Y_ridge.sort()
 33 |     Y_linear.sort()
 34 |     plt.figure(dpi = DPI_SET)
 35 |     plt.plot(X,Y_xgb,'ro',label="xgboost")
 36 |     plt.plot(X,Y_svr,'bs',label="SVR")
 37 |     plt.plot(X,Y_ridge,'g^',label="Ridge")
 38 |     plt.plot(X,Y_linear,'k+',label="OLS")
 39 |     plt.grid(False)
 40 |     plt.xlabel(u"最佳MAE表现模型排名")
 41 |     plt.ylabel("MAE")
 42 |     plt.title(u'愉悦度预测最佳十次模型对应的MAE')
 43 |     plt.legend()
 44 |     plt.show()
 45 |     #plt.savefig('MAE',dpi=100)
 46 | if False:
 47 |     #####################唤醒度
 48 |     X = [1,2,3,4,5,6,7,8,9,10]
 49 |     Y_xgb = [1.687146,1.695196,1.692517,1.699958,1.696151,
 50 |              1.689644,1.689434,1.689765,1.692192,1.690135]
 51 |     Y_svr = [1.709018,1.709057,1.709095,1.709111,1.709189,1.709237,1.709258,
 52 |              1.709278,1.709373,1.709496]
 53 |     Y_ridge = [1.697378,1.697711,1.698095,1.698543,1.699079,
 54 |                1.699739,1.700587,1.701745,1.703502,1.707373]
 55 |     Y_linear = [1.71203616905,1.71203616905,1.71203616905,1.71203616905,1.71203616905,
 56 |                 1.71203616905,1.71203616905,1.71203616905,1.71203616905,1.71203616905,]
 57 |     Y_xgb.sort()
 58 |     Y_svr.sort()
 59 |     Y_ridge.sort()
 60 |     Y_linear.sort()
 61 |     plt.figure(dpi = DPI_SET)
 62 |     plt.plot(X,Y_xgb,'ro',label="xgboost")
 63 |     plt.plot(X,Y_svr,'bs',label="SVR")
 64 |     plt.plot(X,Y_ridge,'g^',label="Ridge")
 65 |     plt.plot(X,Y_linear,'k+',label="OLS")
 66 |     plt.grid(False)
 67 |     plt.xlabel(u"最佳MAE表现模型排名")
 68 |     plt.ylabel("MAE")
 69 |     plt.title(u'唤醒度预测最佳十次模型对应的MAE')
 70 |     plt.legend()
 71 |     plt.show()
 72 |     #plt.savefig('MAE',dpi=100)
 73 | 
 74 | if False:
 75 |     #corrs,愉悦度
 76 |     Y_corrs = [0.004346,0.005027,0.052310,0.027697,0.069725,0.043048,0.016981,
 77 |                0.011768,0.000304,0.033582,0.061108,0.056917,0.072331,0.065231,
 78 |                0.026672,0.024671,0.045470, 0.036574,0.062127,0.049776,0.024718,
 79 |                0.020680,0.082800,0.081184,0.076538,0.105749,0.108450,0.112452]
 80 |     X_features = [i for i in range(1,29,1)]
 81 |     plt.figure(dpi = DPI_SET)
 82 |     plt.bar(X_features,Y_corrs,color='red')
 83 |     plt.grid(False)
 84 |     plt.xlabel("特征编号")
 85 |     plt.ylabel("皮尔逊相关系数")
 86 |     plt.title("不同特征与愉悦度的皮尔逊相关系数")
 87 |     plt.legend()
 88 |     plt.show()
 89 | 
 90 | if False:
 91 |     #corrs,唤醒度
 92 |     Y_corrs = [0.048455,0.046454,0.023819,0.038387,0.018331,0.032164,0.025527,
 93 |                0.039727,0.042079,0.029262,0.015931,0.011253,0.002549,0.006947,
 94 |                0.003238,0.013122,0.022206,0.026620,0.014166,0.020778,0.001383,
 95 |                0.018818,0.014761,0.009047,0.023284,0.010995,0.000872,0.006976]
 96 |     X_features = [i for i in range(1,29,1)]
 97 |     plt.figure(dpi = DPI_SET)
 98 |     plt.bar(X_features,Y_corrs,color='red')
 99 |     plt.grid(False)
100 |     plt.xlabel("特征编号")
101 |     plt.ylabel("皮尔逊相关系数")
102 |     plt.title("不同特征与唤醒度的皮尔逊相关系数")
103 |     plt.legend()
104 |     plt.show()
105 | 
106 | if False:
107 |     #画出愉悦度，唤醒度的图
108 |     #x = all_df_y[all_df_y['valence']>5][all_df_y['arousal']>5]['valence']
109 |     #y = all_df_y[all_df_y['valence']>5][all_df_y['arousal']>5]['arousal']
110 |     x_high = all_df_y[all_df_y['valence']>=5]['valence']
111 |     y_high = all_df_y[all_df_y['valence']>=5]['arousal']
112 |     x_low = all_df_y[all_df_y['valence']<=5]['valence']
113 |     y_low = all_df_y[all_df_y['valence']<=5]['arousal']
114 |     plt.figure(dpi = DPI_SET)
115 |     plt.plot(x_high,y_high,'b.')
116 |     plt.plot(x_low,y_low,'y.')
117 |     plt.xlabel("愉悦度(valence)")
118 |     plt.ylabel("唤醒度(arousal)")
119 |     plt.title('样本愉悦度-唤醒度分布')
120 |     plt.plot()
121 | 
122 | if True:
123 |     #画出愉悦度，唤醒度的图,4个象限
124 |     #x = all_df_y[all_df_y['valence']>5][all_df_y['arousal']>5]['valence']
125 |     #y = all_df_y[all_df_y['valence']>5][all_df_y['arousal']>5]['arousal']
126 |     x_1 = all_df_y[all_df_y['valence']>=5][all_df_y['arousal']>=5]['valence']
127 |     y_1 = all_df_y[all_df_y['valence']>=5][all_df_y['arousal']>=5]['arousal']
128 |     x_2 = all_df_y[all_df_y['valence']<5][all_df_y['arousal']>5]['valence']
129 |     y_2 = all_df_y[all_df_y['valence']<5][all_df_y['arousal']>5]['arousal']
130 |     x_3 = all_df_y[all_df_y['valence']<=5][all_df_y['arousal']<=5]['valence']
131 |     y_3 = all_df_y[all_df_y['valence']<=5][all_df_y['arousal']<=5]['arousal']
132 |     x_4 = all_df_y[all_df_y['valence']>5][all_df_y['arousal']<5]['valence']
133 |     y_4 = all_df_y[all_df_y['valence']>5][all_df_y['arousal']<5]['arousal']
134 |     plt.figure(dpi = DPI_SET)
135 |     myMarkerSize = 3
136 |     plt.plot(x_1,y_1,'b.',markersize=myMarkerSize)
137 |     plt.plot(x_2,y_2,'y+',markersize=myMarkerSize)
138 |     plt.plot(x_3,y_3,'gs',markersize=myMarkerSize)
139 |     plt.plot(x_4,y_4,'r^',markersize=myMarkerSize)
140 |     #plt.xlabel("愉悦度(valence)")
141 |     #plt.ylabel("唤醒度(arousal)")
142 |     plt.xlabel("valence")
143 |     plt.ylabel("arousal")
144 |     #plt.title('样本愉悦度-唤醒度分布')
145 |     plt.show()
146 | 
147 | #统计样本个数
148 | #print("高愉悦度（5-9）个数：{}".format(len(x_high)))
149 | #print("高愉悦度（1-5）个数：{}".format(len(x_low)))
150 | 
151 | if True:
152 |     #画3D图像
153 |     x = all_df_y['valence']
154 |     y = all_df_y['arousal']
155 |     z = all_df_y['dominance'] 
156 |     fig = plt.figure(dpi = DPI_SET)
157 |     ax = Axes3D(fig)
158 |     ax.scatter(x, y, z,'r.')
159 |     ax.set_xlabel("valence")
160 |     ax.set_ylabel("arousal")
161 |     ax.set_zlabel("doninance")
162 |     #ax.set_title("样本愉悦度-唤醒度-支配度分布")
163 |     plt.show()
164 | 
165 | if False:
166 |     #画箱线图，愉悦度
167 |     df_test_Y = y_valence[['valence']]
168 |     df_test_Y.columns=['valence_true']
169 |     df_pred_Y = y_valence[['y_pred']]
170 |     df_pred_Y.columns=['valence_pred']
171 |     errors = abs(df_test_Y['valence_true'] - df_pred_Y['valence_pred'])
172 |     df_errors = pd.DataFrame(errors,index=test_Y.index,columns=['abs_errors']) 
173 |     df_result = pd.concat([df_test_Y,df_pred_Y],axis=1)
174 |     df_result = pd.concat([df_result,df_errors],axis=1)
175 |     for i in range(1,9):
176 |         df_tmp = df_result[(df_result['valence_true']>=i) & (df_result['valence_true']<i+1)]
177 |         locals()['errors_{}_{}'.format(i,i+1)] = abs(df_tmp['valence_true'] - df_tmp['valence_pred'])
178 |         print(i)
179 |     list_plt = [errors,errors_1_2,errors_2_3,errors_3_4,errors_4_5,errors_5_6,errors_6_7,errors_7_8,errors_8_9]
180 |     list_labels =['总体误差','[1,2]','[2,3]','[3,4]','[4,5]','[5,6]','[6,7]','[7,8]','[8,9]']
181 |     plt.figure(dpi = DPI_SET)
182 |     plt.boxplot(list_plt,labels=list_labels)
183 |     plt.ylabel("愉悦度误差MAE")
184 |     plt.title("愉悦度回归预测误差箱线图")
185 |     plt.show()
186 | 
187 | if False:
188 |     #画箱线图，唤醒度
189 |     df_test_Y = y_arousal[['arousal']]
190 |     df_test_Y.columns=['arousal_true']
191 |     df_pred_Y = y_arousal[['y_pred']]
192 |     df_pred_Y.columns=['arousal_pred']
193 |     errors = abs(df_test_Y['arousal_true'] - df_pred_Y['arousal_pred'])
194 |     df_errors = pd.DataFrame(errors,index=test_Y.index,columns=['abs_errors']) 
195 |     df_result = pd.concat([df_test_Y,df_pred_Y],axis=1)
196 |     df_result = pd.concat([df_result,df_errors],axis=1)
197 |     for i in range(1,9):
198 |         df_tmp = df_result[(df_result['arousal_true']>=i) & (df_result['arousal_true']<i+1)]
199 |         locals()['errors_{}_{}'.format(i,i+1)] = abs(df_tmp['arousal_true'] - df_tmp['arousal_pred'])
200 |         print(i)
201 |     list_plt = [errors,errors_1_2,errors_2_3,errors_3_4,errors_4_5,errors_5_6,errors_6_7,errors_7_8,errors_8_9]
202 |     list_labels =['总体误差','[1,2]','[2,3]','[3,4]','[4,5]','[5,6]','[6,7]','[7,8]','[8,9]']
203 |     plt.figure(dpi = DPI_SET)
204 |     plt.boxplot(list_plt,labels=list_labels)
205 |     plt.ylabel("唤醒度误差MAE")
206 |     plt.title("唤醒度回归预测误差箱线图")
207 |     plt.show()
208 | 
209 | if False:
210 |     #画出不同区间内预测的精确度，愉悦度
211 |     list_right = []
212 |     list_total = []
213 |     for i in range(1,9):
214 |         num_right = y_valence['2C_pred_true'][(y_valence['valence'] >=i) & (y_valence['valence'] <i+1)].sum()
215 |         num_total = len(y_valence['2C_pred_true'][(y_valence['valence'] >=i) & (y_valence['valence'] <i+1)])
216 |         list_right.append(num_right)
217 |         list_total.append(num_total)
218 |         print("[%d],right:%d,total:%d,res:%f"%(i,num_right,num_total,num_right/num_total))
219 |     list_x = ['总体误差','[1,2]','[2,3]','[3,4]','[4,5]','[5,6]','[6,7]','[7,8]','[8,9]']
220 |     list_y = [sum(list_right)/sum(list_total)*100]
221 |     for i in range(8):
222 |         tmp_res = list_right[i]/list_total[i]*100
223 |         list_y.append(tmp_res)
224 |     plt.figure(dpi = DPI_SET)
225 |     plt.bar(list_x,list_y,color='red')
226 |     plt.grid(False)
227 |     #plt.xlabel("特征编号")
228 |     plt.ylabel("预测精度%")
229 |     plt.title("愉悦度二分类预测精度")
230 |     plt.legend()
231 |     plt.show()
232 | 
233 | if False:
234 |     #画出不同区间内预测的精确度，唤醒度
235 |     list_right = []
236 |     list_total = []
237 |     for i in range(1,9):
238 |         num_right = y_arousal['2C_pred_true'][(y_arousal['arousal'] >=i) & (y_arousal['arousal'] <i+1)].sum()
239 |         num_total = len(y_arousal['2C_pred_true'][(y_arousal['arousal'] >=i) & (y_arousal['arousal'] <i+1)])
240 |         list_right.append(num_right)
241 |         list_total.append(num_total)
242 |         print("[%d],right:%d,total:%d,res:%f"%(i,num_right,num_total,num_right/num_total))
243 |     list_x = ['总体误差','[1,2]','[2,3]','[3,4]','[4,5]','[5,6]','[6,7]','[7,8]','[8,9]']
244 |     list_y = [sum(list_right)/sum(list_total)*100]
245 |     for i in range(8):
246 |         tmp_res = list_right[i]/list_total[i]*100
247 |         list_y.append(tmp_res)
248 |     plt.figure(dpi = DPI_SET)
249 |     plt.bar(list_x,list_y,color='red')
250 |     plt.grid(False)
251 |     #plt.xlabel("特征编号")
252 |     plt.ylabel("预测精度%")
253 |     plt.title("唤醒度二分类预测精度")
254 |     plt.legend()
255 |     plt.show()
256 | 
257 | if False:
258 |     #画某个FFT之后的值
259 |     scfft_df = pickle.load(open("./dump_file/scfft_df","rb"))
260 |     plt.plot(scfft_df.iloc[0,:])
261 | 
262 | 
263 | 
264 | 
265 | 
266 | 
267 | 


--------------------------------------------------------------------------------
/RSP_feat_extract.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Jan  7 21:02:45 2018
  4 | 
  5 | @author: jinyx
  6 | """
  7 | 
  8 | import pandas as pd
  9 | import numpy as np
 10 | from sklearn.model_selection import GroupKFold
 11 | import pickle 
 12 | import matplotlib.pyplot as plt
 13 | from config import *
 14 | import warnings
 15 | warnings.filterwarnings("ignore")
 16 | 
 17 | def rsp_mean(df):
 18 |     return df.mean(axis=1)
 19 | 
 20 | def rsp_median(df):
 21 |     return df.median(axis=1)
 22 | 
 23 | def rsp_std(df):
 24 |     return df.std(axis=1)
 25 | 
 26 | def rsp_min(df):
 27 |     return df.min(axis=1)
 28 | 
 29 | def rsp_max(df):
 30 |     return df.max(axis=1)
 31 | 
 32 | def rsp_range(df_max,df_min):
 33 |     return df_max['rsp_max']-df_min['rsp_min']
 34 | 
 35 | #最小值比率 = Mmin/N
 36 | def rsp_minRatio(all_df,rsp_min):
 37 |     all_df_T = all_df.T
 38 |     rsp_min_T = rsp_min.T
 39 |     rsp_minRatio_dict = {}
 40 |     for i in all_df.index.tolist():
 41 |         num_min = len( all_df_T[i][ all_df_T[i] == rsp_min_T.get_value(index='rsp_min',col=i)] )   
 42 |         rsp_minRatio_dict.update({i:num_min/8064.0})   
 43 |     rsp_minRatio_df = pd.DataFrame.from_dict(data=rsp_minRatio_dict,orient='index')
 44 |     rsp_minRatio_df.columns = ['rsp_minRatio']
 45 |     return rsp_minRatio_df
 46 | 
 47 | #最大值比率 = Nmax/N
 48 | def rsp_maxRatio(all_df,rsp_max):
 49 |     all_df_T = all_df.T
 50 |     rsp_max_T = rsp_max.T
 51 |     rsp_maxRatio_dict = {}
 52 |     for i in all_df.index.tolist():
 53 |         num_max = len( all_df_T[i][ all_df_T[i] == rsp_max_T.get_value(index='rsp_max',col=i)] )   
 54 |         rsp_maxRatio_dict.update({i:num_max/8064.0})    
 55 |     rsp_maxRatio_df = pd.DataFrame.from_dict(data=rsp_maxRatio_dict,orient='index')
 56 |     rsp_maxRatio_df.columns = ['rsp_maxRatio']
 57 |     return rsp_maxRatio_df   
 58 | 
 59 | #RSP一阶差分均值 
 60 | def rsp1Diff_mean(all_df):
 61 |     rsp1Diff_mean = all_df.diff(periods=1,axis=1).dropna(axis=1).mean(axis=1)    
 62 |     return rsp1Diff_mean
 63 | 
 64 | #RSP一阶差分中值
 65 | def rsp1Diff_median(all_df):
 66 |     rsp1Diff_median = all_df.diff(periods=1,axis=1).dropna(axis=1).median(axis=1)
 67 |     return rsp1Diff_median
 68 | 
 69 | #RSP一阶差分标准差
 70 | def rsp1Diff_std(all_df):
 71 |     rsp1Diff_std = all_df.diff(periods=1,axis=1).dropna(axis=1).std(axis=1)
 72 |     return rsp1Diff_std
 73 | 
 74 | def rsp1Diff_min(all_df):
 75 |     rsp1Diff_min = all_df.diff(periods=1,axis=1).dropna(axis=1).min(axis=1)
 76 |     return rsp1Diff_min
 77 |     
 78 | def rsp1Diff_max(all_df):
 79 |     rsp1Diff_max = all_df.diff(periods=1,axis=1).dropna(axis=1).max(axis=1)
 80 |     return rsp1Diff_max    
 81 | 
 82 | def rsp1Diff_range(rsp1Diff_max,rsp1Diff_min):
 83 |     return rsp1Diff_max['rsp1Diff_max']-rsp1Diff_min['rsp1Diff_min']
 84 | 
 85 | def rsp1Diff_minRatio(all_df,rsp1Diff_min):
 86 |     all_df_Diff_T = all_df.diff(periods=1,axis=1).dropna(axis=1).T
 87 |     rsp1Diff_min_T = rsp1Diff_min.T
 88 |     rsp1Diff_minRatio_dict = {}
 89 |     for i in all_df.index.tolist():
 90 |         num_min = len( all_df_Diff_T[i][ all_df_Diff_T[i] == rsp1Diff_min_T.get_value(index='rsp1Diff_min',col=i)])
 91 |         rsp1Diff_minRatio_dict.update({i:num_min/8063.0})
 92 |     rsp1Diff_minRatio_df = pd.DataFrame.from_dict(data=rsp1Diff_minRatio_dict,orient='index')
 93 |     return rsp1Diff_minRatio_df
 94 | 
 95 | def rsp1Diff_maxRatio(all_df,rsp1Diff_max):
 96 |     all_df_Diff_T = all_df.diff(periods=1,axis=1).dropna(axis=1).T
 97 |     rsp1Diff_max_T = rsp1Diff_max.T
 98 |     rsp1Diff_maxRatio_dict = {}
 99 |     for i in all_df.index.tolist():
100 |         num_max = len( all_df_Diff_T[i][all_df_Diff_T[i] == rsp1Diff_max_T.get_value(index='rsp1Diff_max',col=i)])
101 |         rsp1Diff_maxRatio_dict.update({i:num_max/8063.0})
102 |     rsp1Diff_maxRatio_df = pd.DataFrame.from_dict(data=rsp1Diff_maxRatio_dict,orient='index')
103 |     return rsp1Diff_maxRatio_df
104 | 
105 | def rsp2Diff_std(all_df):
106 |     rsp2Diff_std = all_df.diff(periods=2,axis=1).dropna(axis=1).std(axis=1)
107 |     return rsp2Diff_std
108 | 
109 | def rsp2Diff_min(all_df):
110 |     rsp2Diff_min = all_df.diff(periods=2,axis=1).dropna(axis=1).min(axis=1)
111 |     return rsp2Diff_min
112 | 
113 | def rsp2Diff_max(all_df):
114 |     rsp2Diff_max = all_df.diff(periods=2,axis=1).dropna(axis=1).max(axis=1)
115 |     return rsp2Diff_max
116 | 
117 | def rsp2Diff_range(rsp2Diff_max,rsp2Diff_min):
118 |     rsp2Diff_range = rsp2Diff_max['rsp2Diff_max']-rsp2Diff_min['rsp2Diff_min']
119 |     return rsp2Diff_range
120 | 
121 | def rsp2Diff_minRatio(all_df,rsp2Diff_min):
122 |     all_df_2Diff_T = all_df.diff(periods=2,axis=1).dropna(axis=1).T
123 |     rsp2Diff_min_T = rsp2Diff_min.T
124 |     rsp2Diff_minRatio_dict = {}
125 |     for i in all_df.index.tolist():
126 |         num_min = len( all_df_2Diff_T[i][all_df_2Diff_T[i] == rsp2Diff_min_T.get_value(index='rsp2Diff_min',col=i)] )
127 |         rsp2Diff_minRatio_dict.update({i:num_min/8062.0})
128 |     rsp2Diff_minRatio_df = pd.DataFrame.from_dict(data=rsp2Diff_minRatio_dict,orient='index')
129 |     return rsp2Diff_minRatio_df
130 |         
131 | def rsp2Diff_maxRatio(all_df,rsp2Diff_max):
132 |     all_df_2Diff_T = all_df.diff(periods=2,axis=1).dropna(axis=1).T
133 |     rsp2Diff_max_T = rsp2Diff_max.T
134 |     rsp2Diff_maxRatio_dict = {}
135 |     for i in all_df.index.tolist():
136 |         num_max = len( all_df_2Diff_T[i][all_df_2Diff_T[i] == rsp2Diff_max_T.get_value(index='rsp2Diff_max',col=i)] )
137 |         rsp2Diff_maxRatio_dict.update({i:num_max/8062.0})
138 |     rsp2Diff_maxRatio_df = pd.DataFrame.from_dict(data=rsp2Diff_maxRatio_dict,orient='index')
139 |     return rsp2Diff_maxRatio_df
140 | 
141 | #RSP DFT(FFT)频域数据
142 | def rspfft(all_df):
143 |     rspfft_df = pd.DataFrame()
144 |     for i in all_df_RSP_x.index.tolist():
145 |         temp_rspfft = pd.DataFrame(np.fft.fft(all_df_RSP_x.loc[i,:].values)).T
146 |         temp_rspfft.index = [i]
147 |         rspfft_df = rspfft_df.append(temp_rspfft)
148 |     return rspfft_df
149 |         
150 | #RSP 频域中值
151 | def rspfft_mean(rspfft_df):
152 |     rspfft_mean = rspfft_df.mean(axis=1)
153 |     return rspfft_mean
154 | 
155 | def rspfft_median(rspfft_df):
156 |     rspfft_median = rspfft_df.median(axis=1)
157 |     return rspfft_median
158 | 
159 | def rspfft_std(rspfft_df):
160 |     rspfft_std = rspfft_df.std(axis=1)
161 |     return rspfft_std
162 | 
163 | def rspfft_min(rspfft_df):
164 |     rspfft_min = rspfft_df.min(axis=1)
165 |     return rspfft_min
166 | 
167 | def rspfft_max(rspfft_df):
168 |     rspfft_max = rspfft_df.max(axis=1)
169 |     return rspfft_max
170 | 
171 | def rspfft_range(rspfft_max,rspfft_min):
172 |     rspfft_range = rspfft_max['rspfft_max']-rspfft_min['rspfft_min']
173 |     return rspfft_range
174 | 
175 | def get_123count(df):
176 |     tmp_df =pd.DataFrame()
177 |     for i in range(0,40,1):
178 |         num_1 = len(df[i][ df[i]==1 ])
179 |         num_2 = len(df[i][ df[i]==2 ])
180 |         num_3 = len(df[i][ df[i]==3 ])
181 |         list_num = [num_1,num_2,num_3]
182 |         tmp_df = pd.concat([tmp_df,pd.DataFrame(list_num)],axis=1)  
183 |     tmp_df.columns = range(0,40,1)
184 |     tmp_df.index = ['num_1','num_2','num_3']
185 |     return tmp_df
186 |     
187 |     
188 |         
189 | if __name__ == '__main__':
190 |     #read file 
191 |     all_df_RSP_x = pickle.load(open("./dump_file/all_df_RSP_x","rb"))
192 |     
193 |     ###########################################################################
194 |     if True :
195 |         rsp_mean = pd.DataFrame(rsp_mean(all_df_RSP_x),columns=['rsp_mean'])
196 |         rsp_median = pd.DataFrame(rsp_median(all_df_RSP_x),columns=['rsp_median'])
197 |         rsp_std = pd.DataFrame(rsp_std(all_df_RSP_x),columns=['rsp_std'])
198 |         rsp_min = pd.DataFrame(rsp_min(all_df_RSP_x),columns=['rsp_min'])
199 |         rsp_max = pd.DataFrame(rsp_max(all_df_RSP_x),columns=['rsp_max'])
200 |         rsp_range = pd.DataFrame(rsp_range(rsp_max,rsp_min),columns=['rsp_range'])
201 |         rsp_minRatio = pd.DataFrame(rsp_minRatio(all_df_RSP_x,rsp_min),columns=['rsp_minRatio'])
202 |         rsp_maxRatio = pd.DataFrame(rsp_maxRatio(all_df_RSP_x,rsp_max),columns=['rsp_maxRatio'])
203 |         
204 |         rsp1Diff_mean = pd.DataFrame( rsp1Diff_mean(all_df_RSP_x),columns=['rsp1Diff_mean'])
205 |         rsp1Diff_median = pd.DataFrame( rsp1Diff_median(all_df_RSP_x),columns=['rsp1Diff_median'] )
206 |         rsp1Diff_std = pd.DataFrame( rsp1Diff_std(all_df_RSP_x),columns=['rsp1Diff_std'])
207 |         rsp1Diff_min = pd.DataFrame( rsp1Diff_min(all_df_RSP_x),columns=['rsp1Diff_min'])
208 |         rsp1Diff_max = pd.DataFrame( rsp1Diff_max(all_df_RSP_x),columns=['rsp1Diff_max'])
209 |         rsp1Diff_range = pd.DataFrame( rsp1Diff_range(rsp1Diff_max,rsp1Diff_min),columns=['rsp1Diff_range'])
210 |         rsp1Diff_minRatio = rsp1Diff_minRatio(all_df_RSP_x,rsp1Diff_min)
211 |         rsp1Diff_minRatio.columns=['rsp1Diff_minRatio']
212 |         rsp1Diff_maxRatio = rsp1Diff_maxRatio(all_df_RSP_x,rsp1Diff_max)
213 |         rsp1Diff_maxRatio.columns=['rsp1Diff_maxRatio']
214 |         
215 |         rsp2Diff_std = pd.DataFrame( rsp2Diff_std(all_df_RSP_x),columns=['rsp2Diff_std'] )
216 |         rsp2Diff_min = pd.DataFrame( rsp2Diff_min(all_df_RSP_x),columns=['rsp2Diff_min'] ) 
217 |         rsp2Diff_max = pd.DataFrame( rsp2Diff_max(all_df_RSP_x),columns=['rsp2Diff_max'] )
218 |         rsp2Diff_range = pd.DataFrame(rsp2Diff_range(rsp2Diff_max,rsp2Diff_min),columns=['rsp2Diff_range'])
219 |         rsp2Diff_minRatio = rsp2Diff_minRatio(all_df_RSP_x,rsp2Diff_min)
220 |         rsp2Diff_minRatio.columns=['rsp2Diff_minRatio']
221 |         rsp2Diff_maxRatio = rsp2Diff_maxRatio(all_df_RSP_x,rsp2Diff_max)
222 |         rsp2Diff_maxRatio.columns=['rsp2Diff_maxRatio']
223 | 
224 |         #FFT运算比较耗费时间，False直接读取跑过的文件
225 |         if False:
226 |             rspfft_df = rspfft(all_df_RSP_x)
227 |             pickle.dump(rspfft_df,open("./dump_file/rspfft_df","wb"))
228 |         else:
229 |             rspfft_df = pickle.load(open("./dump_file/rspfft_df","rb"))
230 |         
231 |         rspfft_mean = pd.DataFrame( rspfft_mean(rspfft_df),columns=['rspfft_mean'])
232 |         rspfft_median = pd.DataFrame( rspfft_median(rspfft_df),columns=['rspfft_median'])
233 |         rspfft_std = pd.DataFrame( rspfft_std(rspfft_df),columns=['rspfft_std'])
234 |         rspfft_min = pd.DataFrame( rspfft_min(rspfft_df),columns=['rspfft_min'])
235 |         rspfft_max = pd.DataFrame( rspfft_max(rspfft_df),columns=['rspfft_max'])
236 |         rspfft_range = pd.DataFrame( rspfft_range(rspfft_max,rspfft_min),columns=['rspfft_range'])
237 |        
238 |         feature_list = ['rsp_mean','rsp_median','rsp_std','rsp_min','rsp_max','rsp_range',
239 |                         'rsp_minRatio','rsp_maxRatio','rsp1Diff_mean','rsp1Diff_median',
240 |                         'rsp1Diff_std','rsp1Diff_min','rsp1Diff_max','rsp1Diff_range',
241 |                         'rsp1Diff_minRatio','rsp1Diff_maxRatio','rsp2Diff_std',
242 |                         'rsp2Diff_min','rsp2Diff_max','rsp2Diff_range','rsp2Diff_minRatio',
243 |                         'rsp2Diff_maxRatio','rspfft_mean','rspfft_median','rspfft_std',
244 |                         'rspfft_min','rspfft_max','rspfft_range']
245 |         temp_feature_df = pd.DataFrame()
246 |         for i in feature_list:
247 |             temp_feature_df = pd.concat( [locals()[i],temp_feature_df],axis=1)
248 |             
249 |         RSP_feature_df = temp_feature_df
250 |         pickle.dump(RSP_feature_df,open("./dump_file/RSP_feature_df","wb"))
251 |         ######################################################################
252 | 
253 | 
254 |         
255 | 
256 |     
257 |     
258 |     
259 |     
260 |     
261 |     
262 |     
263 |     
264 |     
265 |     
266 |     
267 |     
268 |  


--------------------------------------------------------------------------------
/GCF_2cGSR_TimeDomain.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import multiprocessing\n",
 10 |     "from GCForest import gcForest\n",
 11 |     "import pandas as pd\n",
 12 |     "import numpy as np\n",
 13 |     "import pickle \n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "from sklearn.model_selection import train_test_split\n",
 16 |     "from sklearn.model_selection import StratifiedKFold\n",
 17 |     "from sklearn.preprocessing import MinMaxScaler\n",
 18 |     "from sklearn.metrics import accuracy_score\n",
 19 |     "from sklearn.metrics import f1_score\n",
 20 |     "from sklearn.metrics import precision_score\n",
 21 |     "from sklearn.metrics import recall_score\n",
 22 |     "from sklearn.preprocessing import Imputer\n",
 23 |     "import warnings\n",
 24 |     "warnings.filterwarnings(\"ignore\")\n",
 25 |     "#用来计算程序运行时间\n",
 26 |     "import datetime\n",
 27 |     "starttime = datetime.datetime.now()"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 2,
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "name": "stdout",
 37 |      "output_type": "stream",
 38 |      "text": [
 39 |       "       valence  arousal  dominance  liking  2cArousal  2cValence\n",
 40 |       "s01_0     7.71     7.60       6.90    7.83          1          1\n",
 41 |       "s01_1     8.10     7.31       7.28    8.47          1          1\n",
 42 |       "s01_2     8.58     7.54       9.00    7.08          1          1\n",
 43 |       "s01_3     4.94     6.01       6.12    8.06          0          0\n",
 44 |       "s01_4     6.96     3.92       7.19    6.05          1          1\n",
 45 |       "[seed:0]****************************************************\n",
 46 |       "GSR running multi-grain scan\n",
 47 |       "Slicing Sequence...\n",
 48 |       "Training MGS Random Forests...\n",
 49 |       "Slicing Sequence...\n",
 50 |       "(896, 492)\n",
 51 |       "(384, 492)\n",
 52 |       "Adding/Training Layer, n_layer=1\n",
 53 |       "Layer validation accuracy = 0.6833333333333333\n",
 54 |       "Adding/Training Layer, n_layer=2\n",
 55 |       "Layer validation accuracy = 0.6722222222222223\n",
 56 |       "ACC 0.6692708333333334\n",
 57 |       "F1 0.7514677103718199\n",
 58 |       "Recal 0.8847926267281107\n",
 59 |       "Precision 0.6530612244897959\n",
 60 |       "[seed:100]****************************************************\n",
 61 |       "GSR running multi-grain scan\n",
 62 |       "Slicing Sequence...\n",
 63 |       "Training MGS Random Forests...\n",
 64 |       "Slicing Sequence...\n",
 65 |       "(896, 492)\n",
 66 |       "(384, 492)\n",
 67 |       "Adding/Training Layer, n_layer=1\n",
 68 |       "Layer validation accuracy = 0.6777777777777778\n",
 69 |       "Adding/Training Layer, n_layer=2\n",
 70 |       "Layer validation accuracy = 0.6777777777777778\n",
 71 |       "ACC 0.65625\n",
 72 |       "F1 0.7471264367816092\n",
 73 |       "Recal 0.8986175115207373\n",
 74 |       "Precision 0.639344262295082\n",
 75 |       "[seed:200]****************************************************\n",
 76 |       "GSR running multi-grain scan\n",
 77 |       "Slicing Sequence...\n",
 78 |       "Training MGS Random Forests...\n",
 79 |       "Slicing Sequence...\n",
 80 |       "(896, 492)\n",
 81 |       "(384, 492)\n",
 82 |       "Adding/Training Layer, n_layer=1\n",
 83 |       "Layer validation accuracy = 0.6944444444444444\n",
 84 |       "Adding/Training Layer, n_layer=2\n",
 85 |       "Layer validation accuracy = 0.6944444444444444\n",
 86 |       "ACC 0.6119791666666666\n",
 87 |       "F1 0.7161904761904763\n",
 88 |       "Recal 0.8663594470046083\n",
 89 |       "Precision 0.6103896103896104\n",
 90 |       "[seed:300]****************************************************\n",
 91 |       "GSR running multi-grain scan\n",
 92 |       "Slicing Sequence...\n",
 93 |       "Training MGS Random Forests...\n",
 94 |       "Slicing Sequence...\n",
 95 |       "(896, 492)\n",
 96 |       "(384, 492)\n",
 97 |       "Adding/Training Layer, n_layer=1\n",
 98 |       "Layer validation accuracy = 0.6611111111111111\n",
 99 |       "Adding/Training Layer, n_layer=2\n",
100 |       "Layer validation accuracy = 0.6444444444444445\n",
101 |       "ACC 0.6666666666666666\n",
102 |       "F1 0.7557251908396945\n",
103 |       "Recal 0.9124423963133641\n",
104 |       "Precision 0.6449511400651465\n",
105 |       "[seed:400]****************************************************\n",
106 |       "GSR running multi-grain scan\n",
107 |       "Slicing Sequence...\n",
108 |       "Training MGS Random Forests...\n",
109 |       "Slicing Sequence...\n",
110 |       "(896, 492)\n",
111 |       "(384, 492)\n",
112 |       "Adding/Training Layer, n_layer=1\n",
113 |       "Layer validation accuracy = 0.6333333333333333\n",
114 |       "Adding/Training Layer, n_layer=2\n",
115 |       "Layer validation accuracy = 0.6277777777777778\n",
116 |       "ACC 0.6380208333333334\n",
117 |       "F1 0.7372400756143668\n",
118 |       "Recal 0.8986175115207373\n",
119 |       "Precision 0.625\n",
120 |       "[seed:500]****************************************************\n",
121 |       "GSR running multi-grain scan\n",
122 |       "Slicing Sequence...\n",
123 |       "Training MGS Random Forests...\n",
124 |       "Slicing Sequence...\n",
125 |       "(896, 492)\n",
126 |       "(384, 492)\n",
127 |       "Adding/Training Layer, n_layer=1\n",
128 |       "Layer validation accuracy = 0.6722222222222223\n",
129 |       "Adding/Training Layer, n_layer=2\n",
130 |       "Layer validation accuracy = 0.6777777777777778\n",
131 |       "Adding/Training Layer, n_layer=3\n",
132 |       "Layer validation accuracy = 0.6611111111111111\n",
133 |       "ACC 0.6614583333333334\n",
134 |       "F1 0.751908396946565\n",
135 |       "Recal 0.9078341013824884\n",
136 |       "Precision 0.6416938110749185\n",
137 |       "[seed:600]****************************************************\n",
138 |       "GSR running multi-grain scan\n",
139 |       "Slicing Sequence...\n",
140 |       "Training MGS Random Forests...\n",
141 |       "Slicing Sequence...\n",
142 |       "(896, 492)\n",
143 |       "(384, 492)\n",
144 |       "Adding/Training Layer, n_layer=1\n",
145 |       "Layer validation accuracy = 0.6777777777777778\n",
146 |       "Adding/Training Layer, n_layer=2\n",
147 |       "Layer validation accuracy = 0.6888888888888889\n",
148 |       "Adding/Training Layer, n_layer=3\n",
149 |       "Layer validation accuracy = 0.6888888888888889\n",
150 |       "ACC 0.65625\n",
151 |       "F1 0.7490494296577948\n",
152 |       "Recal 0.9078341013824884\n",
153 |       "Precision 0.6375404530744336\n",
154 |       "[seed:700]****************************************************\n",
155 |       "GSR running multi-grain scan\n",
156 |       "Slicing Sequence...\n",
157 |       "Training MGS Random Forests...\n",
158 |       "Slicing Sequence...\n",
159 |       "(896, 492)\n",
160 |       "(384, 492)\n",
161 |       "Adding/Training Layer, n_layer=1\n",
162 |       "Layer validation accuracy = 0.65\n",
163 |       "Adding/Training Layer, n_layer=2\n",
164 |       "Layer validation accuracy = 0.6388888888888888\n",
165 |       "ACC 0.6796875\n",
166 |       "F1 0.7700934579439251\n",
167 |       "Recal 0.9493087557603687\n",
168 |       "Precision 0.6477987421383647\n",
169 |       "[seed:800]****************************************************\n",
170 |       "GSR running multi-grain scan\n",
171 |       "Slicing Sequence...\n",
172 |       "Training MGS Random Forests...\n",
173 |       "Slicing Sequence...\n",
174 |       "(896, 492)\n",
175 |       "(384, 492)\n",
176 |       "Adding/Training Layer, n_layer=1\n",
177 |       "Layer validation accuracy = 0.6833333333333333\n",
178 |       "Adding/Training Layer, n_layer=2\n",
179 |       "Layer validation accuracy = 0.6833333333333333\n",
180 |       "ACC 0.640625\n",
181 |       "F1 0.7283464566929133\n",
182 |       "Recal 0.8525345622119815\n",
183 |       "Precision 0.6357388316151202\n",
184 |       "[seed:900]****************************************************\n",
185 |       "GSR running multi-grain scan\n",
186 |       "Slicing Sequence...\n",
187 |       "Training MGS Random Forests...\n",
188 |       "Slicing Sequence...\n",
189 |       "(896, 492)\n",
190 |       "(384, 492)\n",
191 |       "Adding/Training Layer, n_layer=1\n",
192 |       "Layer validation accuracy = 0.6611111111111111\n",
193 |       "Adding/Training Layer, n_layer=2\n",
194 |       "Layer validation accuracy = 0.6555555555555556\n",
195 |       "ACC 0.65625\n",
196 |       "F1 0.7528089887640451\n",
197 |       "Recal 0.9262672811059908\n",
198 |       "Precision 0.6340694006309149\n"
199 |      ]
200 |     }
201 |    ],
202 |    "source": [
203 |     "#读取Y\n",
204 |     "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n",
205 |     "all_df_y['2cArousal'] = 0\n",
206 |     "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n",
207 |     "all_df_y['2cValence'] = 0\n",
208 |     "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n",
209 |     "print(all_df_y.head(5))\n",
210 |     "\n",
211 |     "#读取1个通道的GSR数据，每个通道包含32×40=1280个信号样本\n",
212 |     "#每个样本向量大小为8064点（63s*128Hz）\n",
213 |     "all_df_GSR_x = pickle.load(open(\"./dump_file/all_df_GSR_x\",\"rb\"))\n",
214 |     "\n",
215 |     "#y = all_df_y[['2cArousal']]\n",
216 |     "y = all_df_y[['2cValence']]\n",
217 |     "for seed in [0,100,200,300,400,500,600,700,800,900]:\n",
218 |     "    print(\"[seed:{}]****************************************************\".format(seed))\n",
219 |     "    xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx_{}\".format(seed),\"rb\"))\n",
220 |     "    xTestIdx = pickle.load(open(\"./dump_file/xTestIdx_{}\".format(seed),\"rb\"))\n",
221 |     "    y_tr = y.loc[xTrainIdx]\n",
222 |     "    y_te = y.loc[xTestIdx]\n",
223 |     "    GSRTrainSet = all_df_GSR_x.loc[xTrainIdx]\n",
224 |     "    GSRTestSet = all_df_GSR_x.loc[xTestIdx]\n",
225 |     "    myWindowsSize = 256\n",
226 |     "    myStrideSize = 64\n",
227 |     "    gcf = gcForest(shape_1X=8064, window=myWindowsSize, stride=myStrideSize,tolerance=0.0,n_cascadeRF=1, \n",
228 |     "                   min_samples_mgs=0.1, min_samples_cascade=0.1,n_jobs=19)\n",
229 |     "    if True:     \n",
230 |     "        print(\"GSR running multi-grain scan\")\n",
231 |     "        xTrain,yTrain = GSRTrainSet.values,y_tr.values\n",
232 |     "        xTest = GSRTestSet.values   \n",
233 |     "        GSR_mgsTrainVector = gcf.mg_scanning(xTrain,yTrain)\n",
234 |     "        GSR_mgsTestVector = gcf.mg_scanning(xTest)\n",
235 |     "        filePath = \"./dump_file_V2/GSR_mgsTrainVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n",
236 |     "        pickle.dump(GSR_mgsTrainVector,open(filePath,\"wb\"))\n",
237 |     "        filePath = \"./dump_file_V2/GSR_mgsTestVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n",
238 |     "        pickle.dump(GSR_mgsTestVector,open(filePath,\"wb\"))\n",
239 |     "    else:\n",
240 |     "        filePath = \"./dump_file_V2/GSR_mgsTrainVector_{}_{}\".format(myWindowsSize,myStrideSize)\n",
241 |     "        GSR_mgsTrainVector = pickle.load(open(filePath,\"rb\"))\n",
242 |     "        filePath = \"./dump_file_V2/GSR_mgsTestVector_{}_{}\".format(myWindowsSize,myStrideSize)\n",
243 |     "        GSR_mgsTestVector = pickle.load(open(filePath,\"rb\"))\n",
244 |     "    X_tr_vector = GSR_mgsTrainVector\n",
245 |     "    X_te_vector = GSR_mgsTestVector\n",
246 |     "    print(X_tr_vector.shape)\n",
247 |     "    print(X_te_vector.shape)\n",
248 |     "    #有缺失值，填充下\n",
249 |     "    X_tr_vector_fillna= pd.DataFrame(X_tr_vector).fillna(0).values\n",
250 |     "    X_te_vector_fillna= pd.DataFrame(X_te_vector).fillna(0).values\n",
251 |     "    _ = gcf.cascade_forest(X_tr_vector_fillna, y_tr)\n",
252 |     "    pred_proba = gcf.cascade_forest(X_te_vector_fillna)\n",
253 |     "    tmp = np.mean(pred_proba, axis=0)\n",
254 |     "    preds = np.argmax(tmp, axis=1)\n",
255 |     "    print(\"ACC\",accuracy_score(y_true=y_te, y_pred=preds))\n",
256 |     "    print(\"F1\",f1_score(y_true=y_te, y_pred=preds))\n",
257 |     "    print(\"Recal\",recall_score(y_true=y_te, y_pred=preds))\n",
258 |     "    print(\"Precision\",precision_score(y_true=y_te, y_pred=preds))"
259 |    ]
260 |   }
261 |  ],
262 |  "metadata": {
263 |   "kernelspec": {
264 |    "display_name": "Python 3",
265 |    "language": "python",
266 |    "name": "python3"
267 |   },
268 |   "language_info": {
269 |    "codemirror_mode": {
270 |     "name": "ipython",
271 |     "version": 3
272 |    },
273 |    "file_extension": ".py",
274 |    "mimetype": "text/x-python",
275 |    "name": "python",
276 |    "nbconvert_exporter": "python",
277 |    "pygments_lexer": "ipython3",
278 |    "version": "3.6.5"
279 |   }
280 |  },
281 |  "nbformat": 4,
282 |  "nbformat_minor": 2
283 | }
284 | 


--------------------------------------------------------------------------------
/EEG_EMD.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#EEG 经验模态分解\n",
 10 |     "import sampleEntropy as se\n",
 11 |     "from pyhht.visualization import plot_imfs\n",
 12 |     "from pyhht.emd import EMD\n",
 13 |     "import numpy as np\n",
 14 |     "import scipy as sp\n",
 15 |     "import pandas as pd\n",
 16 |     "import pickle \n",
 17 |     "import matplotlib.pyplot as plt\n",
 18 |     "import multiprocessing\n",
 19 |     "import warnings\n",
 20 |     "warnings.filterwarnings(\"ignore\")\n",
 21 |     "#用来计算程序运行时间\n",
 22 |     "import datetime\n",
 23 |     "starttime = datetime.datetime.now()"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "#读取32个通道的EEG数据，每个通道包含32×40=1280个信号样本（人次×每人次40实验）\n",
 33 |     "#每个样本向量大小为8064点（63s*128Hz）\n",
 34 |     "for eeg_CH in range(1,33,1):\n",
 35 |     "    file_path = \"./dump_file/CH{}_df_EEG_x\".format(eeg_CH)\n",
 36 |     "    locals()['CH{}_df_EEG_x'.format(eeg_CH)] = pickle.load(open(file_path,\"rb\"))"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 3,
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "data": {
 46 |       "text/plain": [
 47 |        "'matplotlib画图\\nplt.figure(dpi=300)\\nplt.subplot(4,1,1)\\nplt.ylabel(\"EEG signal\")\\nplt.plot(x)\\nfor i in range(0,3,1):\\n    plt.subplot(4,1,i+2)\\n    plt.ylabel(\"IMF{}\".format(i+1))\\n    plt.plot(imfs[i])\\nplt.show()\\nplt.figure(dpi=300)\\nfor i in [3,4,5,6]:\\n    plt.subplot(4,1,i-2)\\n    plt.ylabel(\"IMF{}\".format(i+1))\\n    plt.plot(imfs[i])\\nplt.show()\\nplt.figure(dpi=300)\\nfor i in [7,8,9,10]:\\n    plt.subplot(4,1,i-6)\\n    if i==10:\\n        plt.ylabel(\"RES\")        \\n    else:\\n        plt.ylabel(\"IMF{}\".format(i+1))\\n    plt.plot(imfs[i])\\nplt.show()\\n'"
 48 |       ]
 49 |      },
 50 |      "execution_count": 3,
 51 |      "metadata": {},
 52 |      "output_type": "execute_result"
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "'''pyhht自带的画图\n",
 57 |     "t = range(0,8064,1)\n",
 58 |     "x = CH1_df_EEG_x.iloc[20,:]\n",
 59 |     "decomposer = EMD(x)\n",
 60 |     "imfs = decomposer.decompose()\n",
 61 |     "#plot_imfs(x, imfs, t)\n",
 62 |     "'''\n",
 63 |     "'''matplotlib画图\n",
 64 |     "plt.figure(dpi=300)\n",
 65 |     "plt.subplot(4,1,1)\n",
 66 |     "plt.ylabel(\"EEG signal\")\n",
 67 |     "plt.plot(x)\n",
 68 |     "for i in range(0,3,1):\n",
 69 |     "    plt.subplot(4,1,i+2)\n",
 70 |     "    plt.ylabel(\"IMF{}\".format(i+1))\n",
 71 |     "    plt.plot(imfs[i])\n",
 72 |     "plt.show()\n",
 73 |     "plt.figure(dpi=300)\n",
 74 |     "for i in [3,4,5,6]:\n",
 75 |     "    plt.subplot(4,1,i-2)\n",
 76 |     "    plt.ylabel(\"IMF{}\".format(i+1))\n",
 77 |     "    plt.plot(imfs[i])\n",
 78 |     "plt.show()\n",
 79 |     "plt.figure(dpi=300)\n",
 80 |     "for i in [7,8,9,10]:\n",
 81 |     "    plt.subplot(4,1,i-6)\n",
 82 |     "    if i==10:\n",
 83 |     "        plt.ylabel(\"RES\")        \n",
 84 |     "    else:\n",
 85 |     "        plt.ylabel(\"IMF{}\".format(i+1))\n",
 86 |     "    plt.plot(imfs[i])\n",
 87 |     "plt.show()\n",
 88 |     "'''"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": []
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 4,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/plain": [
106 |        "'\\nif False:\\n    for eegCH in range(1,33,1):\\n        locals()[\"p{}\".format(eegCH)] = multiprocessing.Process(target = runEMD, args = (eegCH,))\\n        locals()[\"p{}\".format(eegCH)].start()\\n        print(\"p{}.pid:{}\".format(eegCH,locals()[\"p{}\".format(eegCH)].pid))\\nelse:\\n    runEMD(eegCH=2)\\n'"
107 |       ]
108 |      },
109 |      "execution_count": 4,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "def runEMD(eegCH):\n",
116 |     "    #用来计算程序运行时间\n",
117 |     "    import datetime\n",
118 |     "    import pickle\n",
119 |     "    starttime = datetime.datetime.now()\n",
120 |     "    print(\"[CH{}]job start!\".format(eegCH))\n",
121 |     "    file_path = \"./dump_file/CH{}_df_EEG_x\".format(eegCH)\n",
122 |     "    locals()['CH{}_df_EEG_x'.format(eegCH)] = pickle.load(open(file_path,\"rb\"))\n",
123 |     "    if eegCH != 2:\n",
124 |     "        for i in range(0,1280,1):\n",
125 |     "            x = locals()['CH{}_df_EEG_x'.format(eegCH)].iloc[i,:]\n",
126 |     "            decomposer = EMD(x)\n",
127 |     "            imfs = decomposer.decompose()\n",
128 |     "            file_path = \"./dump_file_imfs/CH{}_imfs_{}\".format(eegCH,i)\n",
129 |     "            pickle.dump(imfs,open(file_path,\"wb\"))\n",
130 |     "    else:\n",
131 |     "        dfCH2 = abs(locals()['CH{}_df_EEG_x'.format(eegCH)])\n",
132 |     "        for i in range(0,1280,1):\n",
133 |     "            x = dfCH2.iloc[i,:]\n",
134 |     "            decomposer = EMD(x)\n",
135 |     "            imfs = decomposer.decompose()\n",
136 |     "            file_path = \"./dump_file_imfs/CH{}_imfs_{}\".format(eegCH,i)\n",
137 |     "            pickle.dump(imfs,open(file_path,\"wb\"))        \n",
138 |     "    print(\"[CH{}]job done!\".format(eegCH))\n",
139 |     "    #用来计算程序运行时间\n",
140 |     "    endtime = datetime.datetime.now()\n",
141 |     "    print(\"程序运行时间:%.1fs\"%(endtime - starttime).seconds)\n",
142 |     "'''\n",
143 |     "if False:\n",
144 |     "    for eegCH in range(1,33,1):\n",
145 |     "        locals()[\"p{}\".format(eegCH)] = multiprocessing.Process(target = runEMD, args = (eegCH,))\n",
146 |     "        locals()[\"p{}\".format(eegCH)].start()\n",
147 |     "        print(\"p{}.pid:{}\".format(eegCH,locals()[\"p{}\".format(eegCH)].pid))\n",
148 |     "else:\n",
149 |     "    runEMD(eegCH=2)\n",
150 |     "'''       "
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": []
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": []
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 8,
170 |    "metadata": {},
171 |    "outputs": [],
172 |    "source": [
173 |     "#使用原始EEG信号提取样本熵特征\n",
174 |     "def countSampleEntropy(eegCH,data,lineIndex):\n",
175 |     "    tmpSE = se.sampEn(data,m=2,r=1)\n",
176 |     "    filePath = \"./dump_file_sampEn/CH{}_sampEn_{}\".format(eegCH,lineIndex)\n",
177 |     "    pickle.dump(tmpSE,open(filePath,\"wb\"))\n",
178 |     "    print(\"[CH{}]->{} done!\".format(eegCH,lineIndex))\n",
179 |     "    \n",
180 |     "def countSampleEntropySomes(eegCH,data,startIndex,endIndex):    \n",
181 |     "    sampEnList = []\n",
182 |     "    for i in range(0,endIndex-startIndex,1):\n",
183 |     "        std = np.std(data.iloc[i,:].values)\n",
184 |     "        try:\n",
185 |     "            tmpSampEn = se.sampEn(data.iloc[i,:].values,m=2,r=0.2*std)  #这里不能是dataframe\n",
186 |     "        except BaseException:\n",
187 |     "            print(\"ERROR!!!!!!!!!!!!!!!!!!\",eegCH,startIndex,endIndex,i)          \n",
188 |     "        sampEnList.append(tmpSampEn)\n",
189 |     "    result = pd.DataFrame(sampEnList,index=data.index,columns=['CH{}SampEn'.format(eegCH)])\n",
190 |     "    filePath = \"./dump_file_sampEn/CH{}_sampEn_{}_{}\".format(eegCH,startIndex,endIndex)\n",
191 |     "    pickle.dump(result,open(filePath,\"wb\"))\n",
192 |     "    print(\"[CH{}]->{}_{} done!\".format(eegCH,startIndex,endIndex))\n",
193 |     "\n",
194 |     "cpuNums = 20\n",
195 |     "steps = 1280 // cpuNums\n",
196 |     "startIndexList = [idx for idx in range(0,1280,steps)]\n",
197 |     "startPoint = 6784\n",
198 |     "endPoint = 8064\n",
199 |     "if False:\n",
200 |     "    for eegCH in range(1,33,1):\n",
201 |     "        for sIdx in startIndexList:\n",
202 |     "            if eegCH !=2:\n",
203 |     "                if sIdx+steps >= 1280:\n",
204 |     "                    data = locals()['CH{}_df_EEG_x'.format(eegCH)].iloc[sIdx:1280,startPoint:endPoint]\n",
205 |     "                else:\n",
206 |     "                    data = locals()['CH{}_df_EEG_x'.format(eegCH)].iloc[sIdx:sIdx+steps,startPoint:endPoint]\n",
207 |     "                locals()[\"p{}\".format(eegCH)] = multiprocessing.Process(target = countSampleEntropySomes, \n",
208 |     "                                                                        args = (eegCH,data,sIdx,sIdx+steps))\n",
209 |     "                locals()[\"p{}\".format(eegCH)].start()\n",
210 |     "                print(\"CH{}_sIdx{}.pid:{}\".format(eegCH,sIdx,locals()[\"p{}\".format(eegCH)].pid))\n",
211 |     "            else:#CH2是复数，另外处理\n",
212 |     "                data = abs(locals()['CH{}_df_EEG_x'.format(eegCH)])\n",
213 |     "                if sIdx+steps >= 1280:\n",
214 |     "                    data = data.iloc[sIdx:1280,startPoint:endPoint]\n",
215 |     "                else:\n",
216 |     "                    data = data.iloc[sIdx:sIdx+steps,startPoint:endPoint]\n",
217 |     "                locals()[\"p{}\".format(eegCH)] = multiprocessing.Process(target = countSampleEntropySomes, \n",
218 |     "                                                                        args = (eegCH,data,sIdx,sIdx+steps))\n",
219 |     "                locals()[\"p{}\".format(eegCH)].start()\n",
220 |     "                print(\"CH{}_sIdx{}.pid:{}\".format(eegCH,sIdx,locals()[\"p{}\".format(eegCH)].pid))\n"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 9,
226 |    "metadata": {},
227 |    "outputs": [],
228 |    "source": [
229 |     "#读取样本熵文件,将每个通道分别写入文件中\n",
230 |     "if True:   \n",
231 |     "    for eegCH in range(1,33,1):\n",
232 |     "        sampEnDF = pd.DataFrame()\n",
233 |     "        for sIdx in startIndexList:\n",
234 |     "            if sIdx+steps >= 1280:\n",
235 |     "                filePath = \"./dump_file_sampEn/CH{}_sampEn_{}_{}\".format(eegCH,sIdx,1280)\n",
236 |     "                locals()[\"CH{}_sampEn_{}_{}\".format(eegCH,sIdx,1280)] = pickle.load(open(filePath,\"rb\"))\n",
237 |     "                sampEnDF = pd.concat([sampEnDF,locals()[\"CH{}_sampEn_{}_{}\".format(eegCH,sIdx,1280)]],axis=0)\n",
238 |     "            else:\n",
239 |     "                filePath = \"./dump_file_sampEn/CH{}_sampEn_{}_{}\".format(eegCH,sIdx,sIdx+steps)\n",
240 |     "                locals()[\"CH{}_sampEn_{}_{}\".format(eegCH,sIdx,sIdx+steps)] = pickle.load(open(filePath,\"rb\"))\n",
241 |     "                sampEnDF = pd.concat([sampEnDF,locals()[\"CH{}_sampEn_{}_{}\".format(eegCH,sIdx,sIdx+steps)]],axis=0)\n",
242 |     "        filePath = \"./dump_file_sampEn/CH{}_sampEn\".format(eegCH)\n",
243 |     "        pickle.dump(sampEnDF,open(filePath,\"wb\"))\n",
244 |     "        "
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": 10,
250 |    "metadata": {},
251 |    "outputs": [],
252 |    "source": [
253 |     "if True:\n",
254 |     "    sampEnDF = pd.DataFrame()\n",
255 |     "    for eegCH in range(1,33,1):\n",
256 |     "        filePath = \"./dump_file_sampEn/CH{}_sampEn\".format(eegCH)\n",
257 |     "        tmpDF = pickle.load(open(filePath,\"rb\"))\n",
258 |     "        sampEnDF = pd.concat([sampEnDF,tmpDF],axis=1)\n",
259 |     "    filePath = \"./dump_file_sampEn/sampEnFeatures\".format(eegCH)\n",
260 |     "    pickle.dump(sampEnDF,open(filePath,\"wb\"))\n",
261 |     "    \n"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": 12,
267 |    "metadata": {},
268 |    "outputs": [
269 |     {
270 |      "data": {
271 |       "text/plain": [
272 |        "(1280, 32)"
273 |       ]
274 |      },
275 |      "execution_count": 12,
276 |      "metadata": {},
277 |      "output_type": "execute_result"
278 |     }
279 |    ],
280 |    "source": [
281 |     "sampEnDF.shape\n"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": null,
287 |    "metadata": {},
288 |    "outputs": [],
289 |    "source": [
290 |     "\n"
291 |    ]
292 |   }
293 |  ],
294 |  "metadata": {
295 |   "kernelspec": {
296 |    "display_name": "Python 3",
297 |    "language": "python",
298 |    "name": "python3"
299 |   },
300 |   "language_info": {
301 |    "codemirror_mode": {
302 |     "name": "ipython",
303 |     "version": 3
304 |    },
305 |    "file_extension": ".py",
306 |    "mimetype": "text/x-python",
307 |    "name": "python",
308 |    "nbconvert_exporter": "python",
309 |    "pygments_lexer": "ipython3",
310 |    "version": "3.6.5"
311 |   }
312 |  },
313 |  "nbformat": 4,
314 |  "nbformat_minor": 2
315 | }
316 | 


--------------------------------------------------------------------------------
/GCF_2cPPG_TimeDomain.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import multiprocessing\n",
 10 |     "from GCForest import gcForest\n",
 11 |     "import pandas as pd\n",
 12 |     "import numpy as np\n",
 13 |     "import pickle \n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "from sklearn.model_selection import train_test_split\n",
 16 |     "from sklearn.model_selection import StratifiedKFold\n",
 17 |     "from sklearn.preprocessing import MinMaxScaler\n",
 18 |     "from sklearn.metrics import accuracy_score\n",
 19 |     "from sklearn.metrics import f1_score\n",
 20 |     "from sklearn.metrics import precision_score\n",
 21 |     "from sklearn.metrics import recall_score\n",
 22 |     "from sklearn.preprocessing import Imputer\n",
 23 |     "import warnings\n",
 24 |     "warnings.filterwarnings(\"ignore\")\n",
 25 |     "#用来计算程序运行时间\n",
 26 |     "import datetime\n",
 27 |     "starttime = datetime.datetime.now()"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": []
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 2,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "name": "stdout",
 44 |      "output_type": "stream",
 45 |      "text": [
 46 |       "       valence  arousal  dominance  liking  2cArousal  2cValence\n",
 47 |       "s01_0     7.71     7.60       6.90    7.83          1          1\n",
 48 |       "s01_1     8.10     7.31       7.28    8.47          1          1\n",
 49 |       "s01_2     8.58     7.54       9.00    7.08          1          1\n",
 50 |       "s01_3     4.94     6.01       6.12    8.06          0          0\n",
 51 |       "s01_4     6.96     3.92       7.19    6.05          1          1\n",
 52 |       "[seed:0]****************************************************\n",
 53 |       "BVP running multi-grain scan\n",
 54 |       "Slicing Sequence...\n",
 55 |       "Training MGS Random Forests...\n",
 56 |       "Slicing Sequence...\n",
 57 |       "(896, 492)\n",
 58 |       "(384, 492)\n",
 59 |       "Adding/Training Layer, n_layer=1\n",
 60 |       "Layer validation accuracy = 0.6222222222222222\n",
 61 |       "Adding/Training Layer, n_layer=2\n",
 62 |       "Layer validation accuracy = 0.6222222222222222\n",
 63 |       "ACC 0.6692708333333334\n",
 64 |       "F1 0.7590132827324478\n",
 65 |       "Recal 0.9216589861751152\n",
 66 |       "Precision 0.6451612903225806\n",
 67 |       "[seed:100]****************************************************\n",
 68 |       "BVP running multi-grain scan\n",
 69 |       "Slicing Sequence...\n",
 70 |       "Training MGS Random Forests...\n",
 71 |       "Slicing Sequence...\n",
 72 |       "(896, 492)\n",
 73 |       "(384, 492)\n",
 74 |       "Adding/Training Layer, n_layer=1\n",
 75 |       "Layer validation accuracy = 0.6333333333333333\n",
 76 |       "Adding/Training Layer, n_layer=2\n",
 77 |       "Layer validation accuracy = 0.6388888888888888\n",
 78 |       "Adding/Training Layer, n_layer=3\n",
 79 |       "Layer validation accuracy = 0.6388888888888888\n",
 80 |       "ACC 0.671875\n",
 81 |       "F1 0.7586206896551725\n",
 82 |       "Recal 0.9124423963133641\n",
 83 |       "Precision 0.6491803278688525\n",
 84 |       "[seed:200]****************************************************\n",
 85 |       "BVP running multi-grain scan\n",
 86 |       "Slicing Sequence...\n",
 87 |       "Training MGS Random Forests...\n",
 88 |       "Slicing Sequence...\n",
 89 |       "(896, 492)\n",
 90 |       "(384, 492)\n",
 91 |       "Adding/Training Layer, n_layer=1\n",
 92 |       "Layer validation accuracy = 0.65\n",
 93 |       "Adding/Training Layer, n_layer=2\n",
 94 |       "Layer validation accuracy = 0.65\n",
 95 |       "ACC 0.6171875\n",
 96 |       "F1 0.72\n",
 97 |       "Recal 0.8709677419354839\n",
 98 |       "Precision 0.6136363636363636\n",
 99 |       "[seed:300]****************************************************\n",
100 |       "BVP running multi-grain scan\n",
101 |       "Slicing Sequence...\n",
102 |       "Training MGS Random Forests...\n",
103 |       "Slicing Sequence...\n",
104 |       "(896, 492)\n",
105 |       "(384, 492)\n",
106 |       "Adding/Training Layer, n_layer=1\n",
107 |       "Layer validation accuracy = 0.6277777777777778\n",
108 |       "Adding/Training Layer, n_layer=2\n",
109 |       "Layer validation accuracy = 0.6222222222222222\n",
110 |       "ACC 0.65625\n",
111 |       "F1 0.7421874999999999\n",
112 |       "Recal 0.8755760368663594\n",
113 |       "Precision 0.6440677966101694\n",
114 |       "[seed:400]****************************************************\n",
115 |       "BVP running multi-grain scan\n",
116 |       "Slicing Sequence...\n",
117 |       "Training MGS Random Forests...\n",
118 |       "Slicing Sequence...\n",
119 |       "(896, 492)\n",
120 |       "(384, 492)\n",
121 |       "Adding/Training Layer, n_layer=1\n",
122 |       "Layer validation accuracy = 0.7\n",
123 |       "Adding/Training Layer, n_layer=2\n",
124 |       "Layer validation accuracy = 0.7\n",
125 |       "ACC 0.6432291666666666\n",
126 |       "F1 0.7380497131931166\n",
127 |       "Recal 0.8894009216589862\n",
128 |       "Precision 0.630718954248366\n",
129 |       "[seed:500]****************************************************\n",
130 |       "BVP running multi-grain scan\n",
131 |       "Slicing Sequence...\n",
132 |       "Training MGS Random Forests...\n",
133 |       "Slicing Sequence...\n",
134 |       "(896, 492)\n",
135 |       "(384, 492)\n",
136 |       "Adding/Training Layer, n_layer=1\n",
137 |       "Layer validation accuracy = 0.6166666666666667\n",
138 |       "Adding/Training Layer, n_layer=2\n",
139 |       "Layer validation accuracy = 0.6333333333333333\n",
140 |       "Adding/Training Layer, n_layer=3\n",
141 |       "Layer validation accuracy = 0.6333333333333333\n",
142 |       "ACC 0.6692708333333334\n",
143 |       "F1 0.7543520309477756\n",
144 |       "Recal 0.8986175115207373\n",
145 |       "Precision 0.65\n",
146 |       "[seed:600]****************************************************\n",
147 |       "BVP running multi-grain scan\n",
148 |       "Slicing Sequence...\n",
149 |       "Training MGS Random Forests...\n",
150 |       "Slicing Sequence...\n",
151 |       "(896, 492)\n",
152 |       "(384, 492)\n",
153 |       "Adding/Training Layer, n_layer=1\n",
154 |       "Layer validation accuracy = 0.5833333333333334\n",
155 |       "Adding/Training Layer, n_layer=2\n",
156 |       "Layer validation accuracy = 0.5833333333333334\n",
157 |       "ACC 0.6510416666666666\n",
158 |       "F1 0.7403100775193797\n",
159 |       "Recal 0.880184331797235\n",
160 |       "Precision 0.6387959866220736\n",
161 |       "[seed:700]****************************************************\n",
162 |       "BVP running multi-grain scan\n",
163 |       "Slicing Sequence...\n",
164 |       "Training MGS Random Forests...\n",
165 |       "Slicing Sequence...\n",
166 |       "(896, 492)\n",
167 |       "(384, 492)\n",
168 |       "Adding/Training Layer, n_layer=1\n",
169 |       "Layer validation accuracy = 0.6\n",
170 |       "Adding/Training Layer, n_layer=2\n",
171 |       "Layer validation accuracy = 0.6055555555555555\n",
172 |       "Adding/Training Layer, n_layer=3\n",
173 |       "Layer validation accuracy = 0.5888888888888889\n",
174 |       "ACC 0.6875\n",
175 |       "F1 0.7600000000000001\n",
176 |       "Recal 0.8755760368663594\n",
177 |       "Precision 0.6713780918727915\n",
178 |       "[seed:800]****************************************************\n",
179 |       "BVP running multi-grain scan\n",
180 |       "Slicing Sequence...\n",
181 |       "Training MGS Random Forests...\n",
182 |       "Slicing Sequence...\n",
183 |       "(896, 492)\n",
184 |       "(384, 492)\n",
185 |       "Adding/Training Layer, n_layer=1\n",
186 |       "Layer validation accuracy = 0.6611111111111111\n",
187 |       "Adding/Training Layer, n_layer=2\n",
188 |       "Layer validation accuracy = 0.6555555555555556\n",
189 |       "ACC 0.65625\n",
190 |       "F1 0.7431906614785991\n",
191 |       "Recal 0.880184331797235\n",
192 |       "Precision 0.6430976430976431\n",
193 |       "[seed:900]****************************************************\n",
194 |       "BVP running multi-grain scan\n",
195 |       "Slicing Sequence...\n",
196 |       "Training MGS Random Forests...\n",
197 |       "Slicing Sequence...\n",
198 |       "(896, 492)\n",
199 |       "(384, 492)\n",
200 |       "Adding/Training Layer, n_layer=1\n",
201 |       "Layer validation accuracy = 0.7\n",
202 |       "Adding/Training Layer, n_layer=2\n",
203 |       "Layer validation accuracy = 0.7\n",
204 |       "ACC 0.6614583333333334\n",
205 |       "F1 0.7470817120622569\n",
206 |       "Recal 0.8847926267281107\n",
207 |       "Precision 0.6464646464646465\n"
208 |      ]
209 |     }
210 |    ],
211 |    "source": [
212 |     "#读取Y\n",
213 |     "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n",
214 |     "all_df_y['2cArousal'] = 0\n",
215 |     "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n",
216 |     "all_df_y['2cValence'] = 0\n",
217 |     "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n",
218 |     "print(all_df_y.head(5))\n",
219 |     "\n",
220 |     "#读取1个通道的PPG数据，每个通道包含32×40=1280个信号样本\n",
221 |     "#每个样本向量大小为8064点（63s*128Hz）\n",
222 |     "all_df_PPG_x = pickle.load(open(\"./dump_file/all_df_PPG_x\",\"rb\"))\n",
223 |     "y = all_df_y[['2cValence']]\n",
224 |     "#y = all_df_y[['2cArousal']]\n",
225 |     "for seed in [0,100,200,300,400,500,600,700,800,900]:\n",
226 |     "    print(\"[seed:{}]****************************************************\".format(seed))\n",
227 |     "    xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx_{}\".format(seed),\"rb\"))\n",
228 |     "    xTestIdx = pickle.load(open(\"./dump_file/xTestIdx_{}\".format(seed),\"rb\"))\n",
229 |     "    y_tr = y.loc[xTrainIdx]\n",
230 |     "    y_te = y.loc[xTestIdx]\n",
231 |     "\n",
232 |     "    PPGTrainSet = all_df_PPG_x.loc[xTrainIdx]\n",
233 |     "    PPGTestSet = all_df_PPG_x.loc[xTestIdx]\n",
234 |     "\n",
235 |     "    myWindowsSize = 256\n",
236 |     "    myStrideSize = 64\n",
237 |     "    gcf = gcForest(shape_1X=8064, window=myWindowsSize, stride=myStrideSize,tolerance=0.0,n_cascadeRF=1, \n",
238 |     "                   min_samples_mgs=0.1, min_samples_cascade=0.1,n_jobs=19)\n",
239 |     "    if True:     \n",
240 |     "        print(\"PPG running multi-grain scan\")\n",
241 |     "        xTrain,yTrain = PPGTrainSet.values,y_tr.values\n",
242 |     "        xTest = PPGTestSet.values   \n",
243 |     "        PPG_mgsTrainVector = gcf.mg_scanning(xTrain,yTrain)\n",
244 |     "        PPG_mgsTestVector = gcf.mg_scanning(xTest)\n",
245 |     "        filePath = \"./dump_file_V2/PPG_mgsTrainVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n",
246 |     "        pickle.dump(PPG_mgsTrainVector,open(filePath,\"wb\"))\n",
247 |     "        filePath = \"./dump_file_V2/PPG_mgsTestVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n",
248 |     "        pickle.dump(PPG_mgsTestVector,open(filePath,\"wb\"))\n",
249 |     "    else:\n",
250 |     "        filePath = \"./dump_file_V2/PPG_mgsTrainVector_{}_{}\".format(myWindowsSize,myStrideSize)\n",
251 |     "        PPG_mgsTrainVector = pickle.load(open(filePath,\"rb\"))\n",
252 |     "        filePath = \"./dump_file_V2/PPG_mgsTestVector_{}_{}\".format(myWindowsSize,myStrideSize)\n",
253 |     "        PPG_mgsTestVector = pickle.load(open(filePath,\"rb\"))\n",
254 |     "\n",
255 |     "    X_tr_vector = PPG_mgsTrainVector\n",
256 |     "    X_te_vector = PPG_mgsTestVector\n",
257 |     "    print(X_tr_vector.shape)\n",
258 |     "    print(X_te_vector.shape)\n",
259 |     "\n",
260 |     "    #有缺失值，填充下\n",
261 |     "    X_tr_vector_fillna= pd.DataFrame(X_tr_vector).fillna(0).values\n",
262 |     "    X_te_vector_fillna= pd.DataFrame(X_te_vector).fillna(0).values\n",
263 |     "\n",
264 |     "    _ = gcf.cascade_forest(X_tr_vector_fillna, y_tr)\n",
265 |     "\n",
266 |     "    pred_proba = gcf.cascade_forest(X_te_vector_fillna)\n",
267 |     "    PPG = np.mean(pred_proba, axis=0)\n",
268 |     "    preds = np.argmax(PPG, axis=1)\n",
269 |     "    print(\"ACC\",accuracy_score(y_true=y_te, y_pred=preds))\n",
270 |     "    print(\"F1\",f1_score(y_true=y_te, y_pred=preds))\n",
271 |     "    print(\"Recal\",recall_score(y_true=y_te, y_pred=preds))\n",
272 |     "    print(\"Precision\",precision_score(y_true=y_te, y_pred=preds))"
273 |    ]
274 |   }
275 |  ],
276 |  "metadata": {
277 |   "kernelspec": {
278 |    "display_name": "Python 3",
279 |    "language": "python",
280 |    "name": "python3"
281 |   },
282 |   "language_info": {
283 |    "codemirror_mode": {
284 |     "name": "ipython",
285 |     "version": 3
286 |    },
287 |    "file_extension": ".py",
288 |    "mimetype": "text/x-python",
289 |    "name": "python",
290 |    "nbconvert_exporter": "python",
291 |    "pygments_lexer": "ipython3",
292 |    "version": "3.6.5"
293 |   }
294 |  },
295 |  "nbformat": 4,
296 |  "nbformat_minor": 2
297 | }
298 | 


--------------------------------------------------------------------------------
/GCF_2cRandom.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import multiprocessing\n",
 10 |     "from GCForest import gcForest\n",
 11 |     "import pandas as pd\n",
 12 |     "import numpy as np\n",
 13 |     "import pickle \n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "from sklearn.model_selection import train_test_split\n",
 16 |     "from sklearn.model_selection import StratifiedKFold\n",
 17 |     "from sklearn.preprocessing import MinMaxScaler\n",
 18 |     "from sklearn.metrics import accuracy_score\n",
 19 |     "from sklearn.metrics import f1_score\n",
 20 |     "from sklearn.metrics import precision_score\n",
 21 |     "from sklearn.metrics import recall_score\n",
 22 |     "from sklearn.preprocessing import Imputer\n",
 23 |     "import warnings\n",
 24 |     "warnings.filterwarnings(\"ignore\")\n",
 25 |     "#用来计算程序运行时间\n",
 26 |     "import datetime\n",
 27 |     "starttime = datetime.datetime.now()"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 3,
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "name": "stdout",
 37 |      "output_type": "stream",
 38 |      "text": [
 39 |       "       valence  arousal  dominance  liking  2cArousal  2cValence\n",
 40 |       "s01_0     7.71     7.60       6.90    7.83          1          1\n",
 41 |       "s01_1     8.10     7.31       7.28    8.47          1          1\n",
 42 |       "s01_2     8.58     7.54       9.00    7.08          1          1\n",
 43 |       "s01_3     4.94     6.01       6.12    8.06          0          0\n",
 44 |       "s01_4     6.96     3.92       7.19    6.05          1          1\n",
 45 |       "[seed:0]****************************************************\n",
 46 |       "GSR running multi-grain scan\n",
 47 |       "Slicing Sequence...\n",
 48 |       "Training MGS Random Forests...\n",
 49 |       "Slicing Sequence...\n",
 50 |       "(896, 492)\n",
 51 |       "(384, 492)\n",
 52 |       "Adding/Training Layer, n_layer=1\n",
 53 |       "Layer validation accuracy = 0.55\n",
 54 |       "Adding/Training Layer, n_layer=2\n",
 55 |       "Layer validation accuracy = 0.5555555555555556\n",
 56 |       "Adding/Training Layer, n_layer=3\n",
 57 |       "Layer validation accuracy = 0.5555555555555556\n",
 58 |       "ACC 0.5651041666666666\n",
 59 |       "F1 0.7221297836938436\n",
 60 |       "Recal 1.0\n",
 61 |       "Precision 0.5651041666666666\n",
 62 |       "[seed:100]****************************************************\n",
 63 |       "GSR running multi-grain scan\n",
 64 |       "Slicing Sequence...\n",
 65 |       "Training MGS Random Forests...\n",
 66 |       "Slicing Sequence...\n",
 67 |       "(896, 492)\n",
 68 |       "(384, 492)\n",
 69 |       "Adding/Training Layer, n_layer=1\n",
 70 |       "Layer validation accuracy = 0.5666666666666667\n",
 71 |       "Adding/Training Layer, n_layer=2\n",
 72 |       "Layer validation accuracy = 0.5611111111111111\n",
 73 |       "ACC 0.5651041666666666\n",
 74 |       "F1 0.7221297836938436\n",
 75 |       "Recal 1.0\n",
 76 |       "Precision 0.5651041666666666\n",
 77 |       "[seed:200]****************************************************\n",
 78 |       "GSR running multi-grain scan\n",
 79 |       "Slicing Sequence...\n",
 80 |       "Training MGS Random Forests...\n",
 81 |       "Slicing Sequence...\n",
 82 |       "(896, 492)\n",
 83 |       "(384, 492)\n",
 84 |       "Adding/Training Layer, n_layer=1\n",
 85 |       "Layer validation accuracy = 0.5611111111111111\n",
 86 |       "Adding/Training Layer, n_layer=2\n",
 87 |       "Layer validation accuracy = 0.5666666666666667\n",
 88 |       "Adding/Training Layer, n_layer=3\n",
 89 |       "Layer validation accuracy = 0.5777777777777777\n",
 90 |       "Adding/Training Layer, n_layer=4\n",
 91 |       "Layer validation accuracy = 0.5777777777777777\n",
 92 |       "ACC 0.5651041666666666\n",
 93 |       "F1 0.7221297836938436\n",
 94 |       "Recal 1.0\n",
 95 |       "Precision 0.5651041666666666\n",
 96 |       "[seed:300]****************************************************\n",
 97 |       "GSR running multi-grain scan\n",
 98 |       "Slicing Sequence...\n",
 99 |       "Training MGS Random Forests...\n",
100 |       "Slicing Sequence...\n",
101 |       "(896, 492)\n",
102 |       "(384, 492)\n",
103 |       "Adding/Training Layer, n_layer=1\n",
104 |       "Layer validation accuracy = 0.6111111111111112\n",
105 |       "Adding/Training Layer, n_layer=2\n",
106 |       "Layer validation accuracy = 0.6111111111111112\n",
107 |       "ACC 0.5651041666666666\n",
108 |       "F1 0.7221297836938436\n",
109 |       "Recal 1.0\n",
110 |       "Precision 0.5651041666666666\n",
111 |       "[seed:400]****************************************************\n",
112 |       "GSR running multi-grain scan\n",
113 |       "Slicing Sequence...\n",
114 |       "Training MGS Random Forests...\n",
115 |       "Slicing Sequence...\n",
116 |       "(896, 492)\n",
117 |       "(384, 492)\n",
118 |       "Adding/Training Layer, n_layer=1\n",
119 |       "Layer validation accuracy = 0.6\n",
120 |       "Adding/Training Layer, n_layer=2\n",
121 |       "Layer validation accuracy = 0.6444444444444445\n",
122 |       "Adding/Training Layer, n_layer=3\n",
123 |       "Layer validation accuracy = 0.6166666666666667\n",
124 |       "ACC 0.5651041666666666\n",
125 |       "F1 0.7221297836938436\n",
126 |       "Recal 1.0\n",
127 |       "Precision 0.5651041666666666\n",
128 |       "[seed:500]****************************************************\n",
129 |       "GSR running multi-grain scan\n",
130 |       "Slicing Sequence...\n",
131 |       "Training MGS Random Forests...\n",
132 |       "Slicing Sequence...\n",
133 |       "(896, 492)\n",
134 |       "(384, 492)\n",
135 |       "Adding/Training Layer, n_layer=1\n",
136 |       "Layer validation accuracy = 0.5722222222222222\n",
137 |       "Adding/Training Layer, n_layer=2\n",
138 |       "Layer validation accuracy = 0.5611111111111111\n",
139 |       "ACC 0.5651041666666666\n",
140 |       "F1 0.7221297836938436\n",
141 |       "Recal 1.0\n",
142 |       "Precision 0.5651041666666666\n",
143 |       "[seed:600]****************************************************\n",
144 |       "GSR running multi-grain scan\n",
145 |       "Slicing Sequence...\n",
146 |       "Training MGS Random Forests...\n",
147 |       "Slicing Sequence...\n",
148 |       "(896, 492)\n",
149 |       "(384, 492)\n",
150 |       "Adding/Training Layer, n_layer=1\n",
151 |       "Layer validation accuracy = 0.5555555555555556\n",
152 |       "Adding/Training Layer, n_layer=2\n",
153 |       "Layer validation accuracy = 0.5611111111111111\n",
154 |       "Adding/Training Layer, n_layer=3\n",
155 |       "Layer validation accuracy = 0.5666666666666667\n",
156 |       "Adding/Training Layer, n_layer=4\n",
157 |       "Layer validation accuracy = 0.5555555555555556\n",
158 |       "ACC 0.5651041666666666\n",
159 |       "F1 0.7221297836938436\n",
160 |       "Recal 1.0\n",
161 |       "Precision 0.5651041666666666\n",
162 |       "[seed:700]****************************************************\n",
163 |       "GSR running multi-grain scan\n",
164 |       "Slicing Sequence...\n",
165 |       "Training MGS Random Forests...\n",
166 |       "Slicing Sequence...\n",
167 |       "(896, 492)\n",
168 |       "(384, 492)\n",
169 |       "Adding/Training Layer, n_layer=1\n",
170 |       "Layer validation accuracy = 0.5222222222222223\n",
171 |       "Adding/Training Layer, n_layer=2\n",
172 |       "Layer validation accuracy = 0.5222222222222223\n",
173 |       "ACC 0.5651041666666666\n",
174 |       "F1 0.7221297836938436\n",
175 |       "Recal 1.0\n",
176 |       "Precision 0.5651041666666666\n",
177 |       "[seed:800]****************************************************\n",
178 |       "GSR running multi-grain scan\n",
179 |       "Slicing Sequence...\n",
180 |       "Training MGS Random Forests...\n",
181 |       "Slicing Sequence...\n",
182 |       "(896, 492)\n",
183 |       "(384, 492)\n",
184 |       "Adding/Training Layer, n_layer=1\n",
185 |       "Layer validation accuracy = 0.5111111111111111\n",
186 |       "Adding/Training Layer, n_layer=2\n",
187 |       "Layer validation accuracy = 0.5166666666666667\n",
188 |       "Adding/Training Layer, n_layer=3\n",
189 |       "Layer validation accuracy = 0.5111111111111111\n",
190 |       "ACC 0.5651041666666666\n",
191 |       "F1 0.7221297836938436\n",
192 |       "Recal 1.0\n",
193 |       "Precision 0.5651041666666666\n",
194 |       "[seed:900]****************************************************\n",
195 |       "GSR running multi-grain scan\n",
196 |       "Slicing Sequence...\n",
197 |       "Training MGS Random Forests...\n",
198 |       "Slicing Sequence...\n",
199 |       "(896, 492)\n",
200 |       "(384, 492)\n",
201 |       "Adding/Training Layer, n_layer=1\n",
202 |       "Layer validation accuracy = 0.6111111111111112\n",
203 |       "Adding/Training Layer, n_layer=2\n",
204 |       "Layer validation accuracy = 0.6055555555555555\n",
205 |       "ACC 0.5651041666666666\n",
206 |       "F1 0.7221297836938436\n",
207 |       "Recal 1.0\n",
208 |       "Precision 0.5651041666666666\n"
209 |      ]
210 |     }
211 |    ],
212 |    "source": [
213 |     "#读取Y\n",
214 |     "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n",
215 |     "all_df_y['2cArousal'] = 0\n",
216 |     "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n",
217 |     "all_df_y['2cValence'] = 0\n",
218 |     "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n",
219 |     "print(all_df_y.head(5))\n",
220 |     "\n",
221 |     "#读取1个通道的GSR数据，每个通道包含32×40=1280个信号样本\n",
222 |     "#每个样本向量大小为8064点（63s*128Hz）\n",
223 |     "all_df_GSR_x = pickle.load(open(\"./dump_file/all_df_GSR_x\",\"rb\"))\n",
224 |     "#构建任意随机数据\n",
225 |     "a = np.ones(shape=(1280,8064),)\n",
226 |     "all_df_GSR_x = pd.DataFrame(a,index=all_df_GSR_x.index)\n",
227 |     "\n",
228 |     "#y = all_df_y[['2cArousal']]\n",
229 |     "y = all_df_y[['2cValence']]\n",
230 |     "for seed in [0,100,200,300,400,500,600,700,800,900]:\n",
231 |     "    print(\"[seed:{}]****************************************************\".format(seed))\n",
232 |     "    xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx\",\"rb\"))\n",
233 |     "    xTestIdx = pickle.load(open(\"./dump_file/xTestIdx\",\"rb\"))\n",
234 |     "    y_tr = y.loc[xTrainIdx]\n",
235 |     "    y_te = y.loc[xTestIdx]\n",
236 |     "    GSRTrainSet = all_df_GSR_x.loc[xTrainIdx]\n",
237 |     "    GSRTestSet = all_df_GSR_x.loc[xTestIdx]\n",
238 |     "    myWindowsSize = 256\n",
239 |     "    myStrideSize = 64\n",
240 |     "    gcf = gcForest(shape_1X=8064, window=myWindowsSize, stride=myStrideSize,tolerance=0.0,n_cascadeRF=1, \n",
241 |     "                   min_samples_mgs=0.1, min_samples_cascade=0.1,n_jobs=19)\n",
242 |     "    if True:     \n",
243 |     "        print(\"GSR running multi-grain scan\")\n",
244 |     "        xTrain,yTrain = GSRTrainSet.values,y_tr.values\n",
245 |     "        xTest = GSRTestSet.values   \n",
246 |     "        GSR_mgsTrainVector = gcf.mg_scanning(xTrain,yTrain)\n",
247 |     "        GSR_mgsTestVector = gcf.mg_scanning(xTest)\n",
248 |     "    X_tr_vector = GSR_mgsTrainVector\n",
249 |     "    X_te_vector = GSR_mgsTestVector\n",
250 |     "    print(X_tr_vector.shape)\n",
251 |     "    print(X_te_vector.shape)\n",
252 |     "    #有缺失值，填充下\n",
253 |     "    X_tr_vector_fillna= pd.DataFrame(X_tr_vector).fillna(0).values\n",
254 |     "    X_te_vector_fillna= pd.DataFrame(X_te_vector).fillna(0).values\n",
255 |     "    _ = gcf.cascade_forest(X_tr_vector_fillna, y_tr)\n",
256 |     "    pred_proba = gcf.cascade_forest(X_te_vector_fillna)\n",
257 |     "    tmp = np.mean(pred_proba, axis=0)\n",
258 |     "    preds = np.argmax(tmp, axis=1)\n",
259 |     "    print(\"ACC\",accuracy_score(y_true=y_te, y_pred=preds))\n",
260 |     "    print(\"F1\",f1_score(y_true=y_te, y_pred=preds))\n",
261 |     "    print(\"Recal\",recall_score(y_true=y_te, y_pred=preds))\n",
262 |     "    print(\"Precision\",precision_score(y_true=y_te, y_pred=preds))"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": null,
268 |    "metadata": {},
269 |    "outputs": [],
270 |    "source": []
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": null,
275 |    "metadata": {},
276 |    "outputs": [],
277 |    "source": []
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": null,
282 |    "metadata": {},
283 |    "outputs": [],
284 |    "source": []
285 |   }
286 |  ],
287 |  "metadata": {
288 |   "kernelspec": {
289 |    "display_name": "Python 3",
290 |    "language": "python",
291 |    "name": "python3"
292 |   },
293 |   "language_info": {
294 |    "codemirror_mode": {
295 |     "name": "ipython",
296 |     "version": 3
297 |    },
298 |    "file_extension": ".py",
299 |    "mimetype": "text/x-python",
300 |    "name": "python",
301 |    "nbconvert_exporter": "python",
302 |    "pygments_lexer": "ipython3",
303 |    "version": "3.6.5"
304 |   }
305 |  },
306 |  "nbformat": 4,
307 |  "nbformat_minor": 2
308 | }
309 | 


--------------------------------------------------------------------------------
/GCF_2cTMP__TimeDomain.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import multiprocessing\n",
 10 |     "from GCForest import gcForest\n",
 11 |     "import pandas as pd\n",
 12 |     "import numpy as np\n",
 13 |     "import pickle \n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "from sklearn.model_selection import train_test_split\n",
 16 |     "from sklearn.model_selection import StratifiedKFold\n",
 17 |     "from sklearn.preprocessing import MinMaxScaler\n",
 18 |     "from sklearn.metrics import accuracy_score\n",
 19 |     "from sklearn.metrics import f1_score\n",
 20 |     "from sklearn.metrics import precision_score\n",
 21 |     "from sklearn.metrics import recall_score\n",
 22 |     "from sklearn.preprocessing import Imputer\n",
 23 |     "import warnings\n",
 24 |     "warnings.filterwarnings(\"ignore\")\n",
 25 |     "#用来计算程序运行时间\n",
 26 |     "import datetime\n",
 27 |     "starttime = datetime.datetime.now()"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 2,
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "name": "stdout",
 37 |      "output_type": "stream",
 38 |      "text": [
 39 |       "       valence  arousal  dominance  liking  2cArousal  2cValence\n",
 40 |       "s01_0     7.71     7.60       6.90    7.83          1          1\n",
 41 |       "s01_1     8.10     7.31       7.28    8.47          1          1\n",
 42 |       "s01_2     8.58     7.54       9.00    7.08          1          1\n",
 43 |       "s01_3     4.94     6.01       6.12    8.06          0          0\n",
 44 |       "s01_4     6.96     3.92       7.19    6.05          1          1\n",
 45 |       "[seed:0]****************************************************\n",
 46 |       "TMP running multi-grain scan\n",
 47 |       "Slicing Sequence...\n",
 48 |       "Training MGS Random Forests...\n",
 49 |       "Slicing Sequence...\n",
 50 |       "(896, 492)\n",
 51 |       "(384, 492)\n",
 52 |       "Adding/Training Layer, n_layer=1\n",
 53 |       "Layer validation accuracy = 0.6888888888888889\n",
 54 |       "Adding/Training Layer, n_layer=2\n",
 55 |       "Layer validation accuracy = 0.6888888888888889\n",
 56 |       "ACC 0.6666666666666666\n",
 57 |       "F1 0.7480314960629921\n",
 58 |       "Recal 0.8755760368663594\n",
 59 |       "Precision 0.6529209621993127\n",
 60 |       "[seed:100]****************************************************\n",
 61 |       "TMP running multi-grain scan\n",
 62 |       "Slicing Sequence...\n",
 63 |       "Training MGS Random Forests...\n",
 64 |       "Slicing Sequence...\n",
 65 |       "(896, 492)\n",
 66 |       "(384, 492)\n",
 67 |       "Adding/Training Layer, n_layer=1\n",
 68 |       "Layer validation accuracy = 0.6555555555555556\n",
 69 |       "Adding/Training Layer, n_layer=2\n",
 70 |       "Layer validation accuracy = 0.65\n",
 71 |       "ACC 0.6744791666666666\n",
 72 |       "F1 0.7572815533980584\n",
 73 |       "Recal 0.8986175115207373\n",
 74 |       "Precision 0.6543624161073825\n",
 75 |       "[seed:200]****************************************************\n",
 76 |       "TMP running multi-grain scan\n",
 77 |       "Slicing Sequence...\n",
 78 |       "Training MGS Random Forests...\n",
 79 |       "Slicing Sequence...\n",
 80 |       "(896, 492)\n",
 81 |       "(384, 492)\n",
 82 |       "Adding/Training Layer, n_layer=1\n",
 83 |       "Layer validation accuracy = 0.6666666666666666\n",
 84 |       "Adding/Training Layer, n_layer=2\n",
 85 |       "Layer validation accuracy = 0.6555555555555556\n",
 86 |       "ACC 0.6197916666666666\n",
 87 |       "F1 0.7234848484848484\n",
 88 |       "Recal 0.880184331797235\n",
 89 |       "Precision 0.6141479099678456\n",
 90 |       "[seed:300]****************************************************\n",
 91 |       "TMP running multi-grain scan\n",
 92 |       "Slicing Sequence...\n",
 93 |       "Training MGS Random Forests...\n",
 94 |       "Slicing Sequence...\n",
 95 |       "(896, 492)\n",
 96 |       "(384, 492)\n",
 97 |       "Adding/Training Layer, n_layer=1\n",
 98 |       "Layer validation accuracy = 0.6666666666666666\n",
 99 |       "Adding/Training Layer, n_layer=2\n",
100 |       "Layer validation accuracy = 0.6611111111111111\n",
101 |       "ACC 0.6588541666666666\n",
102 |       "F1 0.745631067961165\n",
103 |       "Recal 0.8847926267281107\n",
104 |       "Precision 0.6442953020134228\n",
105 |       "[seed:400]****************************************************\n",
106 |       "TMP running multi-grain scan\n",
107 |       "Slicing Sequence...\n",
108 |       "Training MGS Random Forests...\n",
109 |       "Slicing Sequence...\n",
110 |       "(896, 492)\n",
111 |       "(384, 492)\n",
112 |       "Adding/Training Layer, n_layer=1\n",
113 |       "Layer validation accuracy = 0.6333333333333333\n",
114 |       "Adding/Training Layer, n_layer=2\n",
115 |       "Layer validation accuracy = 0.6333333333333333\n",
116 |       "ACC 0.6458333333333334\n",
117 |       "F1 0.7394636015325671\n",
118 |       "Recal 0.8894009216589862\n",
119 |       "Precision 0.6327868852459017\n",
120 |       "[seed:500]****************************************************\n",
121 |       "TMP running multi-grain scan\n",
122 |       "Slicing Sequence...\n",
123 |       "Training MGS Random Forests...\n",
124 |       "Slicing Sequence...\n",
125 |       "(896, 492)\n",
126 |       "(384, 492)\n",
127 |       "Adding/Training Layer, n_layer=1\n",
128 |       "Layer validation accuracy = 0.6611111111111111\n",
129 |       "Adding/Training Layer, n_layer=2\n",
130 |       "Layer validation accuracy = 0.6444444444444445\n",
131 |       "ACC 0.6614583333333334\n",
132 |       "F1 0.7470817120622569\n",
133 |       "Recal 0.8847926267281107\n",
134 |       "Precision 0.6464646464646465\n",
135 |       "[seed:600]****************************************************\n",
136 |       "TMP running multi-grain scan\n",
137 |       "Slicing Sequence...\n",
138 |       "Training MGS Random Forests...\n",
139 |       "Slicing Sequence...\n",
140 |       "(896, 492)\n",
141 |       "(384, 492)\n",
142 |       "Adding/Training Layer, n_layer=1\n",
143 |       "Layer validation accuracy = 0.6777777777777778\n",
144 |       "Adding/Training Layer, n_layer=2\n",
145 |       "Layer validation accuracy = 0.6833333333333333\n",
146 |       "Adding/Training Layer, n_layer=3\n",
147 |       "Layer validation accuracy = 0.6944444444444444\n",
148 |       "Adding/Training Layer, n_layer=4\n",
149 |       "Layer validation accuracy = 0.6833333333333333\n",
150 |       "ACC 0.6432291666666666\n",
151 |       "F1 0.730844793713163\n",
152 |       "Recal 0.8571428571428571\n",
153 |       "Precision 0.636986301369863\n",
154 |       "[seed:700]****************************************************\n",
155 |       "TMP running multi-grain scan\n",
156 |       "Slicing Sequence...\n",
157 |       "Training MGS Random Forests...\n",
158 |       "Slicing Sequence...\n",
159 |       "(896, 492)\n",
160 |       "(384, 492)\n",
161 |       "Adding/Training Layer, n_layer=1\n",
162 |       "Layer validation accuracy = 0.7\n",
163 |       "Adding/Training Layer, n_layer=2\n",
164 |       "Layer validation accuracy = 0.7111111111111111\n",
165 |       "Adding/Training Layer, n_layer=3\n",
166 |       "Layer validation accuracy = 0.7111111111111111\n",
167 |       "ACC 0.6822916666666666\n",
168 |       "F1 0.767175572519084\n",
169 |       "Recal 0.9262672811059908\n",
170 |       "Precision 0.6547231270358306\n",
171 |       "[seed:800]****************************************************\n",
172 |       "TMP running multi-grain scan\n",
173 |       "Slicing Sequence...\n",
174 |       "Training MGS Random Forests...\n",
175 |       "Slicing Sequence...\n",
176 |       "(896, 492)\n",
177 |       "(384, 492)\n",
178 |       "Adding/Training Layer, n_layer=1\n",
179 |       "Layer validation accuracy = 0.6555555555555556\n",
180 |       "Adding/Training Layer, n_layer=2\n",
181 |       "Layer validation accuracy = 0.6555555555555556\n",
182 |       "ACC 0.6640625\n",
183 |       "F1 0.7485380116959064\n",
184 |       "Recal 0.8847926267281107\n",
185 |       "Precision 0.6486486486486487\n",
186 |       "[seed:900]****************************************************\n",
187 |       "TMP running multi-grain scan\n",
188 |       "Slicing Sequence...\n",
189 |       "Training MGS Random Forests...\n",
190 |       "Slicing Sequence...\n",
191 |       "(896, 492)\n",
192 |       "(384, 492)\n",
193 |       "Adding/Training Layer, n_layer=1\n",
194 |       "Layer validation accuracy = 0.6222222222222222\n",
195 |       "Adding/Training Layer, n_layer=2\n",
196 |       "Layer validation accuracy = 0.6277777777777778\n",
197 |       "Adding/Training Layer, n_layer=3\n",
198 |       "Layer validation accuracy = 0.6277777777777778\n",
199 |       "ACC 0.65625\n",
200 |       "F1 0.7518796992481204\n",
201 |       "Recal 0.9216589861751152\n",
202 |       "Precision 0.6349206349206349\n"
203 |      ]
204 |     }
205 |    ],
206 |    "source": [
207 |     "#读取Y\n",
208 |     "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n",
209 |     "all_df_y['2cArousal'] = 0\n",
210 |     "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n",
211 |     "all_df_y['2cValence'] = 0\n",
212 |     "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n",
213 |     "print(all_df_y.head(5))\n",
214 |     "\n",
215 |     "#读取1个通道的TMP数据，每个通道包含32×40=1280个信号样本\n",
216 |     "#每个样本向量大小为8064点（63s*128Hz）\n",
217 |     "all_df_TMP_x = pickle.load(open(\"./dump_file/all_df_TMP_x\",\"rb\"))\n",
218 |     "#y = all_df_y[['2cArousal']]\n",
219 |     "y = all_df_y[['2cValence']]\n",
220 |     "for seed in [0,100,200,300,400,500,600,700,800,900]:\n",
221 |     "    print(\"[seed:{}]****************************************************\".format(seed))\n",
222 |     "    xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx_{}\".format(seed),\"rb\"))\n",
223 |     "    xTestIdx = pickle.load(open(\"./dump_file/xTestIdx_{}\".format(seed),\"rb\"))\n",
224 |     "    y_tr = y.loc[xTrainIdx]\n",
225 |     "    y_te = y.loc[xTestIdx]\n",
226 |     "\n",
227 |     "    TMPTrainSet = all_df_TMP_x.loc[xTrainIdx]\n",
228 |     "    TMPTestSet = all_df_TMP_x.loc[xTestIdx]\n",
229 |     "\n",
230 |     "    myWindowsSize = 256\n",
231 |     "    myStrideSize = 64\n",
232 |     "    gcf = gcForest(shape_1X=8064, window=myWindowsSize, stride=myStrideSize,tolerance=0.0,n_cascadeRF=1, \n",
233 |     "                   min_samples_mgs=0.1, min_samples_cascade=0.1,n_jobs=19)\n",
234 |     "    if True:     \n",
235 |     "        print(\"TMP running multi-grain scan\")\n",
236 |     "        xTrain,yTrain = TMPTrainSet.values,y_tr.values\n",
237 |     "        xTest = TMPTestSet.values   \n",
238 |     "        TMP_mgsTrainVector = gcf.mg_scanning(xTrain,yTrain)\n",
239 |     "        TMP_mgsTestVector = gcf.mg_scanning(xTest)\n",
240 |     "        filePath = \"./dump_file_V2/TMP_mgsTrainVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n",
241 |     "        pickle.dump(TMP_mgsTrainVector,open(filePath,\"wb\"))\n",
242 |     "        filePath = \"./dump_file_V2/TMP_mgsTestVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n",
243 |     "        pickle.dump(TMP_mgsTestVector,open(filePath,\"wb\"))\n",
244 |     "    else:\n",
245 |     "        filePath = \"./dump_file_V2/TMP_mgsTrainVector_{}_{}\".format(myWindowsSize,myStrideSize)\n",
246 |     "        TMP_mgsTrainVector = pickle.load(open(filePath,\"rb\"))\n",
247 |     "        filePath = \"./dump_file_V2/TMP_mgsTestVector_{}_{}\".format(myWindowsSize,myStrideSize)\n",
248 |     "        TMP_mgsTestVector = pickle.load(open(filePath,\"rb\"))\n",
249 |     "\n",
250 |     "    X_tr_vector = TMP_mgsTrainVector\n",
251 |     "    X_te_vector = TMP_mgsTestVector\n",
252 |     "    print(X_tr_vector.shape)\n",
253 |     "    print(X_te_vector.shape)\n",
254 |     "\n",
255 |     "    #有缺失值，填充下\n",
256 |     "    X_tr_vector_fillna= pd.DataFrame(X_tr_vector).fillna(0).values\n",
257 |     "    X_te_vector_fillna= pd.DataFrame(X_te_vector).fillna(0).values\n",
258 |     "\n",
259 |     "    _ = gcf.cascade_forest(X_tr_vector_fillna, y_tr)\n",
260 |     "\n",
261 |     "    pred_proba = gcf.cascade_forest(X_te_vector_fillna)\n",
262 |     "    tmp = np.mean(pred_proba, axis=0)\n",
263 |     "    preds = np.argmax(tmp, axis=1)\n",
264 |     "    print(\"ACC\",accuracy_score(y_true=y_te, y_pred=preds))\n",
265 |     "    print(\"F1\",f1_score(y_true=y_te, y_pred=preds))\n",
266 |     "    print(\"Recal\",recall_score(y_true=y_te, y_pred=preds))\n",
267 |     "    print(\"Precision\",precision_score(y_true=y_te, y_pred=preds))"
268 |    ]
269 |   }
270 |  ],
271 |  "metadata": {
272 |   "kernelspec": {
273 |    "display_name": "Python 3",
274 |    "language": "python",
275 |    "name": "python3"
276 |   },
277 |   "language_info": {
278 |    "codemirror_mode": {
279 |     "name": "ipython",
280 |     "version": 3
281 |    },
282 |    "file_extension": ".py",
283 |    "mimetype": "text/x-python",
284 |    "name": "python",
285 |    "nbconvert_exporter": "python",
286 |    "pygments_lexer": "ipython3",
287 |    "version": "3.6.5"
288 |   }
289 |  },
290 |  "nbformat": 4,
291 |  "nbformat_minor": 2
292 | }
293 | 


--------------------------------------------------------------------------------
/EEG_feat_extract.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Apr 26 14:39:31 2018
  4 | 
  5 | @author: jinyu
  6 | """
  7 | 
  8 | import pandas as pd
  9 | import numpy as np
 10 | from sklearn.model_selection import GroupKFold
 11 | import pickle 
 12 | import matplotlib.pyplot as plt
 13 | import warnings
 14 | warnings.filterwarnings("ignore")
 15 | 
 16 |     #read file 
 17 | for eeg_CH in range(1,33,1):
 18 | 
 19 |     file_path = "./dump_file/CH{}_df_EEG_x".format(eeg_CH)
 20 |     #locals()['CH{}_df_EEG_x'.format(eeg_CH)] = pickle.load(open(file_path),"rb")
 21 |     df_data = pickle.load(open(file_path,"rb"))
 22 |     def eeg_mean(df):
 23 |         return df.mean(axis=1)
 24 |     
 25 |     def eeg_median(df):
 26 |         return df.median(axis=1)
 27 |     
 28 |     def eeg_std(df):
 29 |         return df.std(axis=1)
 30 |     
 31 |     def eeg_min(df):
 32 |         return df.min(axis=1)
 33 |     
 34 |     def eeg_max(df):
 35 |         return df.max(axis=1)
 36 |     
 37 |     def eeg_range(df_max,df_min,eeg_CH):
 38 |         return df_max['CH{}eeg_max'.format(eeg_CH)]-df_min['CH{}eeg_min'.format(eeg_CH)]
 39 |     
 40 |     #最小值比率 = Mmin/N
 41 |     def eeg_minRatio(all_df,eeg_min,eeg_CH):
 42 |         all_df_T = all_df.T
 43 |         eeg_min_T = eeg_min.T
 44 |         eeg_minRatio_dict = {}
 45 |         for i in all_df.index.tolist():
 46 |             num_min = len( all_df_T[i][ all_df_T[i] == eeg_min_T.get_value(index='CH{}eeg_min'.format(eeg_CH),col=i)] )   
 47 |             eeg_minRatio_dict.update({i:num_min/8064.0})   
 48 |         eeg_minRatio_df = pd.DataFrame.from_dict(data=eeg_minRatio_dict,orient='index')
 49 |         eeg_minRatio_df.columns = ['CH{}eeg_minRatio'.format(eeg_CH)]
 50 |         return eeg_minRatio_df
 51 |     
 52 |     #最大值比率 = Nmax/N
 53 |     def eeg_maxRatio(all_df,eeg_max,eeg_CH):
 54 |         all_df_T = all_df.T
 55 |         eeg_max_T = eeg_max.T
 56 |         eeg_maxRatio_dict = {}
 57 |         for i in all_df.index.tolist():
 58 |             num_max = len( all_df_T[i][ all_df_T[i] == eeg_max_T.get_value(index='CH{}eeg_max'.format(eeg_CH),col=i)] )   
 59 |             eeg_maxRatio_dict.update({i:num_max/8064.0})    
 60 |         eeg_maxRatio_df = pd.DataFrame.from_dict(data=eeg_maxRatio_dict,orient='index')
 61 |         eeg_maxRatio_df.columns = ['CH{}eeg_maxRatio'.format(eeg_CH)]
 62 |         return eeg_maxRatio_df   
 63 |     
 64 |     #EEG一阶差分均值 
 65 |     def eeg1Diff_mean(all_df):
 66 |         eeg1Diff_mean = all_df.diff(periods=1,axis=1).dropna(axis=1).mean(axis=1)    
 67 |         return eeg1Diff_mean
 68 |     
 69 |     #EEG一阶差分中值
 70 |     def eeg1Diff_median(all_df):
 71 |         eeg1Diff_median = all_df.diff(periods=1,axis=1).dropna(axis=1).median(axis=1)
 72 |         return eeg1Diff_median
 73 |     
 74 |     #EEG一阶差分标准差
 75 |     def eeg1Diff_std(all_df):
 76 |         eeg1Diff_std = all_df.diff(periods=1,axis=1).dropna(axis=1).std(axis=1)
 77 |         return eeg1Diff_std
 78 |     
 79 |     def eeg1Diff_min(all_df):
 80 |         eeg1Diff_min = all_df.diff(periods=1,axis=1).dropna(axis=1).min(axis=1)
 81 |         return eeg1Diff_min
 82 |         
 83 |     def eeg1Diff_max(all_df):
 84 |         eeg1Diff_max = all_df.diff(periods=1,axis=1).dropna(axis=1).max(axis=1)
 85 |         return eeg1Diff_max    
 86 |     
 87 |     def eeg1Diff_range(eeg1Diff_max,eeg1Diff_min,eeg_CH):
 88 |         return eeg1Diff_max['CH{}eeg1Diff_max'.format(eeg_CH)]-eeg1Diff_min['CH{}eeg1Diff_min'.format(eeg_CH)]
 89 |     
 90 |     def eeg1Diff_minRatio(all_df,eeg1Diff_min,eeg_CH):
 91 |         all_df_Diff_T = all_df.diff(periods=1,axis=1).dropna(axis=1).T
 92 |         eeg1Diff_min_T = eeg1Diff_min.T
 93 |         eeg1Diff_minRatio_dict = {}
 94 |         for i in all_df.index.tolist():
 95 |             num_min = len( all_df_Diff_T[i][ all_df_Diff_T[i] == eeg1Diff_min_T.get_value(index='CH{}eeg1Diff_min'.format(eeg_CH),col=i)])
 96 |             eeg1Diff_minRatio_dict.update({i:num_min/8063.0})
 97 |         eeg1Diff_minRatio_df = pd.DataFrame.from_dict(data=eeg1Diff_minRatio_dict,orient='index')
 98 |         return eeg1Diff_minRatio_df
 99 |     
100 |     def eeg1Diff_maxRatio(all_df,eeg1Diff_max,eeg_CH):
101 |         all_df_Diff_T = all_df.diff(periods=1,axis=1).dropna(axis=1).T
102 |         eeg1Diff_max_T = eeg1Diff_max.T
103 |         eeg1Diff_maxRatio_dict = {}
104 |         for i in all_df.index.tolist():
105 |             num_max = len( all_df_Diff_T[i][all_df_Diff_T[i] == eeg1Diff_max_T.get_value(index='CH{}eeg1Diff_max'.format(eeg_CH),col=i)])
106 |             eeg1Diff_maxRatio_dict.update({i:num_max/8063.0})
107 |         eeg1Diff_maxRatio_df = pd.DataFrame.from_dict(data=eeg1Diff_maxRatio_dict,orient='index')
108 |         return eeg1Diff_maxRatio_df
109 |     
110 |     def eeg2Diff_std(all_df):
111 |         eeg2Diff_std = all_df.diff(periods=2,axis=1).dropna(axis=1).std(axis=1)
112 |         return eeg2Diff_std
113 |     
114 |     def eeg2Diff_min(all_df):
115 |         eeg2Diff_min = all_df.diff(periods=2,axis=1).dropna(axis=1).min(axis=1)
116 |         return eeg2Diff_min
117 |     
118 |     def eeg2Diff_max(all_df):
119 |         eeg2Diff_max = all_df.diff(periods=2,axis=1).dropna(axis=1).max(axis=1)
120 |         return eeg2Diff_max
121 |     
122 |     def eeg2Diff_range(eeg2Diff_max,eeg2Diff_min,eeg_CH):
123 |         eeg2Diff_range = eeg2Diff_max['CH{}eeg2Diff_max'.format(eeg_CH)]-eeg2Diff_min['CH{}eeg2Diff_min'.format(eeg_CH)]
124 |         return eeg2Diff_range
125 |     
126 |     def eeg2Diff_minRatio(all_df,eeg2Diff_min,eeg_CH):
127 |         all_df_2Diff_T = all_df.diff(periods=2,axis=1).dropna(axis=1).T
128 |         eeg2Diff_min_T = eeg2Diff_min.T
129 |         eeg2Diff_minRatio_dict = {}
130 |         for i in all_df.index.tolist():
131 |             num_min = len( all_df_2Diff_T[i][all_df_2Diff_T[i] == eeg2Diff_min_T.get_value(index='CH{}eeg2Diff_min'.format(eeg_CH),col=i)] )
132 |             eeg2Diff_minRatio_dict.update({i:num_min/8062.0})
133 |         eeg2Diff_minRatio_df = pd.DataFrame.from_dict(data=eeg2Diff_minRatio_dict,orient='index')
134 |         return eeg2Diff_minRatio_df
135 |             
136 |     def eeg2Diff_maxRatio(all_df,eeg2Diff_max,eeg_CH):
137 |         all_df_2Diff_T = all_df.diff(periods=2,axis=1).dropna(axis=1).T
138 |         eeg2Diff_max_T = eeg2Diff_max.T
139 |         eeg2Diff_maxRatio_dict = {}
140 |         for i in all_df.index.tolist():
141 |             num_max = len( all_df_2Diff_T[i][all_df_2Diff_T[i] == eeg2Diff_max_T.get_value(index='CH{}eeg2Diff_max'.format(eeg_CH),col=i)] )
142 |             eeg2Diff_maxRatio_dict.update({i:num_max/8062.0})
143 |         eeg2Diff_maxRatio_df = pd.DataFrame.from_dict(data=eeg2Diff_maxRatio_dict,orient='index')
144 |         return eeg2Diff_maxRatio_df
145 |     
146 |     #EEG DFT(FFT)频域数据
147 |     def eegfft(df_data):
148 |         eegfft_df = pd.DataFrame()
149 |         for i in df_data.index.tolist():
150 |             temp_eegfft = pd.DataFrame(np.fft.fft(df_data.loc[i,:].values)).T
151 |             temp_eegfft.index = [i]
152 |             eegfft_df = eegfft_df.append(temp_eegfft)
153 |         return eegfft_df
154 |             
155 |     #EEG 频域中值
156 |     def eegfft_mean(eegfft_df):
157 |         eegfft_mean = eegfft_df.mean(axis=1)
158 |         return eegfft_mean
159 |     
160 |     def eegfft_median(eegfft_df):
161 |         eegfft_median = eegfft_df.median(axis=1)
162 |         return eegfft_median
163 |     
164 |     def eegfft_std(eegfft_df):
165 |         eegfft_std = eegfft_df.std(axis=1)
166 |         return eegfft_std
167 |     
168 |     def eegfft_min(eegfft_df):
169 |         eegfft_min = eegfft_df.min(axis=1)
170 |         return eegfft_min
171 |     
172 |     def eegfft_max(eegfft_df):
173 |         eegfft_max = eegfft_df.max(axis=1)
174 |         return eegfft_max
175 |     
176 |     def eegfft_range(eegfft_max,eegfft_min,eeg_CH):
177 |         eegfft_range = eegfft_max['CH{}eegfft_max'.format(eeg_CH)]-eegfft_min['CH{}eegfft_min'.format(eeg_CH)]
178 |         return eegfft_range
179 | 
180 |     ##########################提取EEG统计特征部分#############################
181 |     #df_data = locals()['CH{}_df_EEG_x'.format(eeg_CH)]
182 |     if True :
183 |         eeg_mean = pd.DataFrame(eeg_mean(df_data),columns=['CH{}eeg_mean'.format(eeg_CH)])
184 |         eeg_median = pd.DataFrame(eeg_median(df_data),columns=['CH{}eeg_median'.format(eeg_CH)])
185 |         eeg_std = pd.DataFrame(eeg_std(df_data),columns=['CH{}eeg_std'.format(eeg_CH)])
186 |         eeg_min = pd.DataFrame(eeg_min(df_data),columns=['CH{}eeg_min'.format(eeg_CH)])
187 |         eeg_max = pd.DataFrame(eeg_max(df_data),columns=['CH{}eeg_max'.format(eeg_CH)])
188 |         eeg_range = pd.DataFrame(eeg_range(eeg_max,eeg_min,eeg_CH),columns=['CH{}eeg_range'.format(eeg_CH)])
189 |         eeg_minRatio = pd.DataFrame(eeg_minRatio(df_data,eeg_min,eeg_CH),columns=['CH{}eeg_minRatio'.format(eeg_CH)])
190 |         eeg_maxRatio = pd.DataFrame(eeg_maxRatio(df_data,eeg_max,eeg_CH),columns=['CH{}eeg_maxRatio'.format(eeg_CH)])
191 |         
192 |         eeg1Diff_mean = pd.DataFrame( eeg1Diff_mean(df_data),columns=['CH{}eeg1Diff_mean'.format(eeg_CH)])
193 |         eeg1Diff_median = pd.DataFrame( eeg1Diff_median(df_data),columns=['CH{}eeg1Diff_median'.format(eeg_CH)] )
194 |         eeg1Diff_std = pd.DataFrame( eeg1Diff_std(df_data),columns=['CH{}eeg1Diff_std'.format(eeg_CH)])
195 |         eeg1Diff_min = pd.DataFrame( eeg1Diff_min(df_data),columns=['CH{}eeg1Diff_min'.format(eeg_CH)])
196 |         eeg1Diff_max = pd.DataFrame( eeg1Diff_max(df_data),columns=['CH{}eeg1Diff_max'.format(eeg_CH)])
197 |         eeg1Diff_range = pd.DataFrame( eeg1Diff_range(eeg1Diff_max,eeg1Diff_min,eeg_CH),columns=['CH{}eeg1Diff_range'.format(eeg_CH)])
198 |         eeg1Diff_minRatio = eeg1Diff_minRatio(df_data,eeg1Diff_min,eeg_CH)
199 |         eeg1Diff_minRatio.columns=['CH{}eeg1Diff_minRatio'.format(eeg_CH)]
200 |         eeg1Diff_maxRatio = eeg1Diff_maxRatio(df_data,eeg1Diff_max,eeg_CH)
201 |         eeg1Diff_maxRatio.columns=['CH{}eeg1Diff_maxRatio'.format(eeg_CH)]
202 |         
203 |         eeg2Diff_std = pd.DataFrame( eeg2Diff_std(df_data),columns=['CH{}eeg2Diff_std'.format(eeg_CH)] )
204 |         eeg2Diff_min = pd.DataFrame( eeg2Diff_min(df_data),columns=['CH{}eeg2Diff_min'.format(eeg_CH)] ) 
205 |         eeg2Diff_max = pd.DataFrame( eeg2Diff_max(df_data),columns=['CH{}eeg2Diff_max'.format(eeg_CH)] )
206 |         eeg2Diff_range = pd.DataFrame(eeg2Diff_range(eeg2Diff_max,eeg2Diff_min,eeg_CH),columns=['CH{}eeg2Diff_range'.format(eeg_CH)])
207 |         eeg2Diff_minRatio = eeg2Diff_minRatio(df_data,eeg2Diff_min,eeg_CH)
208 |         eeg2Diff_minRatio.columns=['CH{}eeg2Diff_minRatio'.format(eeg_CH)]
209 |         eeg2Diff_maxRatio = eeg2Diff_maxRatio(df_data,eeg2Diff_max,eeg_CH)
210 |         eeg2Diff_maxRatio.columns=['CH{}eeg2Diff_maxRatio'.format(eeg_CH)]
211 | 
212 |         #FFT运算比较耗费时间，False直接读取跑过的文件
213 |     if False:
214 |         file_path = "./dump_file/CH{}eegfft_df".format(eeg_CH)
215 |         temp_eegfft = eegfft(df_data)
216 |         #locals()["CH{}eegfft_df".format(eeg_CH)] = pd.DataFrame()
217 |         locals()["CH{}eegfft_df".format(eeg_CH)] = temp_eegfft
218 |         pickle.dump(locals()["CH{}eegfft_df".format(eeg_CH)],open(file_path,"wb"))
219 |         eegfft_df = locals()["CH{}eegfft_df".format(eeg_CH)]
220 |     else:
221 |         file_path = "./dump_file/CH{}eegfft_df".format(eeg_CH)
222 |         eegfft_df = pickle.load(open(file_path,"rb"))
223 |         
224 |     eegfft_mean = pd.DataFrame( eegfft_mean(eegfft_df),columns=['CH{}eegfft_mean'.format(eeg_CH)])
225 |     eegfft_median = pd.DataFrame( eegfft_median(eegfft_df),columns=['CH{}eegfft_median'.format(eeg_CH)])
226 |     eegfft_std = pd.DataFrame( eegfft_std(eegfft_df),columns=['CH{}eegfft_std'.format(eeg_CH)])
227 |     eegfft_min = pd.DataFrame( eegfft_min(eegfft_df),columns=['CH{}eegfft_min'.format(eeg_CH)])
228 |     eegfft_max = pd.DataFrame( eegfft_max(eegfft_df),columns=['CH{}eegfft_max'.format(eeg_CH)])
229 |     eegfft_range = pd.DataFrame( eegfft_range(eegfft_max,eegfft_min,eeg_CH),columns=['CH{}eegfft_range'.format(eeg_CH)])
230 |    
231 |     feature_list = ['eeg_mean','eeg_median','eeg_std','eeg_min','eeg_max','eeg_range',
232 |                     'eeg_minRatio','eeg_maxRatio','eeg1Diff_mean','eeg1Diff_median',
233 |                     'eeg1Diff_std','eeg1Diff_min','eeg1Diff_max','eeg1Diff_range',
234 |                     'eeg1Diff_minRatio','eeg1Diff_maxRatio','eeg2Diff_std',
235 |                     'eeg2Diff_min','eeg2Diff_max','eeg2Diff_range','eeg2Diff_minRatio',
236 |                     'eeg2Diff_maxRatio','eegfft_mean','eegfft_median','eegfft_std',
237 |                     'eegfft_min','eegfft_max','eegfft_range']
238 |     #for feat_str in feature_list:
239 |     #   feat_str = "CH{}".format(eeg_CH)+feat_str
240 |     
241 |     temp_feature_df = pd.DataFrame()
242 |     for i in feature_list:
243 |         temp_feature_df = pd.concat( [locals()[i],temp_feature_df],axis=1)
244 |         
245 |     locals()["CH{}_eeg_feat_df".format(eeg_CH)] = temp_feature_df
246 |     file_path = "./dump_file/CH{}_eeg_feat_df".format(eeg_CH)
247 |     pickle.dump(locals()["CH{}_eeg_feat_df".format(eeg_CH)],open(file_path,"wb"))
248 |     ######################################################################
249 | 
250 | 


--------------------------------------------------------------------------------
/DEAP_GSR_feature_extract.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Jan  7 21:02:45 2018
  4 | 
  5 | @author: jinyx
  6 | """
  7 | 
  8 | import pandas as pd
  9 | import numpy as np
 10 | from sklearn.model_selection import GroupKFold
 11 | import pickle 
 12 | import matplotlib.pyplot as plt
 13 | from config import *
 14 | 
 15 | def sc_mean(df):
 16 |     return df.mean(axis=1)
 17 | 
 18 | def sc_median(df):
 19 |     return df.median(axis=1)
 20 | 
 21 | def sc_std(df):
 22 |     return df.std(axis=1)
 23 | 
 24 | def sc_min(df):
 25 |     return df.min(axis=1)
 26 | 
 27 | def sc_max(df):
 28 |     return df.max(axis=1)
 29 | 
 30 | def sc_range(df_max,df_min):
 31 |     return df_max['sc_max']-df_min['sc_min']
 32 | 
 33 | #最小值比率 = Mmin/N
 34 | def sc_minRatio(all_df,sc_min):
 35 |     all_df_T = all_df.T
 36 |     sc_min_T = sc_min.T
 37 |     sc_minRatio_dict = {}
 38 |     for i in all_df.index.tolist():
 39 |         num_min = len( all_df_T[i][ all_df_T[i] == sc_min_T.get_value(index='sc_min',col=i)] )   
 40 |         sc_minRatio_dict.update({i:num_min/8064.0})   
 41 |     sc_minRatio_df = pd.DataFrame.from_dict(data=sc_minRatio_dict,orient='index')
 42 |     sc_minRatio_df.columns = ['sc_minRatio']
 43 |     return sc_minRatio_df
 44 | 
 45 | #最大值比率 = Nmax/N
 46 | def sc_maxRatio(all_df,sc_max):
 47 |     all_df_T = all_df.T
 48 |     sc_max_T = sc_max.T
 49 |     sc_maxRatio_dict = {}
 50 |     for i in all_df.index.tolist():
 51 |         num_max = len( all_df_T[i][ all_df_T[i] == sc_max_T.get_value(index='sc_max',col=i)] )   
 52 |         sc_maxRatio_dict.update({i:num_max/8064.0})    
 53 |     sc_maxRatio_df = pd.DataFrame.from_dict(data=sc_maxRatio_dict,orient='index')
 54 |     sc_maxRatio_df.columns = ['sc_maxRatio']
 55 |     return sc_maxRatio_df   
 56 | 
 57 | #GSR一阶差分均值 
 58 | def sc1Diff_mean(all_df):
 59 |     sc1Diff_mean = all_df.diff(periods=1,axis=1).dropna(axis=1).mean(axis=1)    
 60 |     return sc1Diff_mean
 61 | 
 62 | #GSR一阶差分中值
 63 | def sc1Diff_median(all_df):
 64 |     sc1Diff_median = all_df.diff(periods=1,axis=1).dropna(axis=1).median(axis=1)
 65 |     return sc1Diff_median
 66 | 
 67 | #GSR一阶差分标准差
 68 | def sc1Diff_std(all_df):
 69 |     sc1Diff_std = all_df.diff(periods=1,axis=1).dropna(axis=1).std(axis=1)
 70 |     return sc1Diff_std
 71 | 
 72 | def sc1Diff_min(all_df):
 73 |     sc1Diff_min = all_df.diff(periods=1,axis=1).dropna(axis=1).min(axis=1)
 74 |     return sc1Diff_min
 75 |     
 76 | def sc1Diff_max(all_df):
 77 |     sc1Diff_max = all_df.diff(periods=1,axis=1).dropna(axis=1).max(axis=1)
 78 |     return sc1Diff_max    
 79 | 
 80 | def sc1Diff_range(sc1Diff_max,sc1Diff_min):
 81 |     return sc1Diff_max['sc1Diff_max']-sc1Diff_min['sc1Diff_min']
 82 | 
 83 | def sc1Diff_minRatio(all_df,sc1Diff_min):
 84 |     all_df_Diff_T = all_df.diff(periods=1,axis=1).dropna(axis=1).T
 85 |     sc1Diff_min_T = sc1Diff_min.T
 86 |     sc1Diff_minRatio_dict = {}
 87 |     for i in all_df.index.tolist():
 88 |         num_min = len( all_df_Diff_T[i][ all_df_Diff_T[i] == sc1Diff_min_T.get_value(index='sc1Diff_min',col=i)])
 89 |         sc1Diff_minRatio_dict.update({i:num_min/8063.0})
 90 |     sc1Diff_minRatio_df = pd.DataFrame.from_dict(data=sc1Diff_minRatio_dict,orient='index')
 91 |     return sc1Diff_minRatio_df
 92 | 
 93 | def sc1Diff_maxRatio(all_df,sc1Diff_max):
 94 |     all_df_Diff_T = all_df.diff(periods=1,axis=1).dropna(axis=1).T
 95 |     sc1Diff_max_T = sc1Diff_max.T
 96 |     sc1Diff_maxRatio_dict = {}
 97 |     for i in all_df.index.tolist():
 98 |         num_max = len( all_df_Diff_T[i][all_df_Diff_T[i] == sc1Diff_max_T.get_value(index='sc1Diff_max',col=i)])
 99 |         sc1Diff_maxRatio_dict.update({i:num_max/8063.0})
100 |     sc1Diff_maxRatio_df = pd.DataFrame.from_dict(data=sc1Diff_maxRatio_dict,orient='index')
101 |     return sc1Diff_maxRatio_df
102 | 
103 | def sc2Diff_std(all_df):
104 |     sc2Diff_std = all_df.diff(periods=2,axis=1).dropna(axis=1).std(axis=1)
105 |     return sc2Diff_std
106 | 
107 | def sc2Diff_min(all_df):
108 |     sc2Diff_min = all_df.diff(periods=2,axis=1).dropna(axis=1).min(axis=1)
109 |     return sc2Diff_min
110 | 
111 | def sc2Diff_max(all_df):
112 |     sc2Diff_max = all_df.diff(periods=2,axis=1).dropna(axis=1).max(axis=1)
113 |     return sc2Diff_max
114 | 
115 | def sc2Diff_range(sc2Diff_max,sc2Diff_min):
116 |     sc2Diff_range = sc2Diff_max['sc2Diff_max']-sc2Diff_min['sc2Diff_min']
117 |     return sc2Diff_range
118 | 
119 | def sc2Diff_minRatio(all_df,sc2Diff_min):
120 |     all_df_2Diff_T = all_df.diff(periods=2,axis=1).dropna(axis=1).T
121 |     sc2Diff_min_T = sc2Diff_min.T
122 |     sc2Diff_minRatio_dict = {}
123 |     for i in all_df.index.tolist():
124 |         num_min = len( all_df_2Diff_T[i][all_df_2Diff_T[i] == sc2Diff_min_T.get_value(index='sc2Diff_min',col=i)] )
125 |         sc2Diff_minRatio_dict.update({i:num_min/8062.0})
126 |     sc2Diff_minRatio_df = pd.DataFrame.from_dict(data=sc2Diff_minRatio_dict,orient='index')
127 |     return sc2Diff_minRatio_df
128 |         
129 | def sc2Diff_maxRatio(all_df,sc2Diff_max):
130 |     all_df_2Diff_T = all_df.diff(periods=2,axis=1).dropna(axis=1).T
131 |     sc2Diff_max_T = sc2Diff_max.T
132 |     sc2Diff_maxRatio_dict = {}
133 |     for i in all_df.index.tolist():
134 |         num_max = len( all_df_2Diff_T[i][all_df_2Diff_T[i] == sc2Diff_max_T.get_value(index='sc2Diff_max',col=i)] )
135 |         sc2Diff_maxRatio_dict.update({i:num_max/8062.0})
136 |     sc2Diff_maxRatio_df = pd.DataFrame.from_dict(data=sc2Diff_maxRatio_dict,orient='index')
137 |     return sc2Diff_maxRatio_df
138 | 
139 | #GSR DFT(FFT)频域数据
140 | def scfft(all_df):
141 |     scfft_df = pd.DataFrame()
142 |     for i in all_df_GSR_x.index.tolist():
143 |         temp_scfft = pd.DataFrame(np.fft.fft(all_df_GSR_x.loc[i,:].values)).T
144 |         temp_scfft.index = [i]
145 |         scfft_df = scfft_df.append(temp_scfft)
146 |     return scfft_df
147 |         
148 | #GSR 频域中值
149 | def scfft_mean(scfft_df):
150 |     scfft_mean = scfft_df.mean(axis=1)
151 |     return scfft_mean
152 | 
153 | def scfft_median(scfft_df):
154 |     scfft_median = scfft_df.median(axis=1)
155 |     return scfft_median
156 | 
157 | def scfft_std(scfft_df):
158 |     scfft_std = scfft_df.std(axis=1)
159 |     return scfft_std
160 | 
161 | def scfft_min(scfft_df):
162 |     scfft_min = scfft_df.min(axis=1)
163 |     return scfft_min
164 | 
165 | def scfft_max(scfft_df):
166 |     scfft_max = scfft_df.max(axis=1)
167 |     return scfft_max
168 | 
169 | def scfft_range(scfft_max,scfft_min):
170 |     scfft_range = scfft_max['scfft_max']-scfft_min['scfft_min']
171 |     return scfft_range
172 | 
173 | def get_123count(df):
174 |     tmp_df =pd.DataFrame()
175 |     for i in range(0,40,1):
176 |         num_1 = len(df[i][ df[i]==1 ])
177 |         num_2 = len(df[i][ df[i]==2 ])
178 |         num_3 = len(df[i][ df[i]==3 ])
179 |         list_num = [num_1,num_2,num_3]
180 |         tmp_df = pd.concat([tmp_df,pd.DataFrame(list_num)],axis=1)  
181 |     tmp_df.columns = range(0,40,1)
182 |     tmp_df.index = ['num_1','num_2','num_3']
183 |     return tmp_df
184 |     
185 |     
186 |         
187 | if __name__ == '__main__':
188 |     #read file 
189 |     all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
190 |     all_df_GSR_x = pickle.load(open("./dump_file/all_df_GSR_x","rb"))
191 |     
192 |     ###########################################################################
193 |     if True :
194 |         sc_mean = pd.DataFrame(sc_mean(all_df_GSR_x),columns=['sc_mean'])
195 |         sc_median = pd.DataFrame(sc_median(all_df_GSR_x),columns=['sc_median'])
196 |         sc_std = pd.DataFrame(sc_std(all_df_GSR_x),columns=['sc_std'])
197 |         sc_min = pd.DataFrame(sc_min(all_df_GSR_x),columns=['sc_min'])
198 |         sc_max = pd.DataFrame(sc_max(all_df_GSR_x),columns=['sc_max'])
199 |         sc_range = pd.DataFrame(sc_range(sc_max,sc_min),columns=['sc_range'])
200 |         sc_minRatio = pd.DataFrame(sc_minRatio(all_df_GSR_x,sc_min),columns=['sc_minRatio'])
201 |         sc_maxRatio = pd.DataFrame(sc_maxRatio(all_df_GSR_x,sc_max),columns=['sc_maxRatio'])
202 |         
203 |         sc1Diff_mean = pd.DataFrame( sc1Diff_mean(all_df_GSR_x),columns=['sc1Diff_mean'])
204 |         sc1Diff_median = pd.DataFrame( sc1Diff_median(all_df_GSR_x),columns=['sc1Diff_median'] )
205 |         sc1Diff_std = pd.DataFrame( sc1Diff_std(all_df_GSR_x),columns=['sc1Diff_std'])
206 |         sc1Diff_min = pd.DataFrame( sc1Diff_min(all_df_GSR_x),columns=['sc1Diff_min'])
207 |         sc1Diff_max = pd.DataFrame( sc1Diff_max(all_df_GSR_x),columns=['sc1Diff_max'])
208 |         sc1Diff_range = pd.DataFrame( sc1Diff_range(sc1Diff_max,sc1Diff_min),columns=['sc1Diff_range'])
209 |         sc1Diff_minRatio = sc1Diff_minRatio(all_df_GSR_x,sc1Diff_min)
210 |         sc1Diff_minRatio.columns=['sc1Diff_minRatio']
211 |         sc1Diff_maxRatio = sc1Diff_maxRatio(all_df_GSR_x,sc1Diff_max)
212 |         sc1Diff_maxRatio.columns=['sc1Diff_maxRatio']
213 |         
214 |         sc2Diff_std = pd.DataFrame( sc2Diff_std(all_df_GSR_x),columns=['sc2Diff_std'] )
215 |         sc2Diff_min = pd.DataFrame( sc2Diff_min(all_df_GSR_x),columns=['sc2Diff_min'] ) 
216 |         sc2Diff_max = pd.DataFrame( sc2Diff_max(all_df_GSR_x),columns=['sc2Diff_max'] )
217 |         sc2Diff_range = pd.DataFrame(sc2Diff_range(sc2Diff_max,sc2Diff_min),columns=['sc2Diff_range'])
218 |         sc2Diff_minRatio = sc2Diff_minRatio(all_df_GSR_x,sc2Diff_min)
219 |         sc2Diff_minRatio.columns=['sc2Diff_minRatio']
220 |         sc2Diff_maxRatio = sc2Diff_maxRatio(all_df_GSR_x,sc2Diff_max)
221 |         sc2Diff_maxRatio.columns=['sc2Diff_maxRatio']
222 | 
223 |         if False:
224 |             scfft_df = scfft(all_df_GSR_x)
225 |             pickle.dump(scfft_df,open("./dump_file/scfft_df","wb"))
226 |         else:
227 |             scfft_df = pickle.load(open("./dump_file/scfft_df","rb"))
228 |         
229 |         scfft_mean = pd.DataFrame( scfft_mean(scfft_df),columns=['scfft_mean'])
230 |         scfft_median = pd.DataFrame( scfft_median(scfft_df),columns=['scfft_median'])
231 |         scfft_std = pd.DataFrame( scfft_std(scfft_df),columns=['scfft_std'])
232 |         scfft_min = pd.DataFrame( scfft_min(scfft_df),columns=['scfft_min'])
233 |         scfft_max = pd.DataFrame( scfft_max(scfft_df),columns=['scfft_max'])
234 |         scfft_range = pd.DataFrame( scfft_range(scfft_max,scfft_min),columns=['scfft_range'])
235 |        
236 |         feature_list = ['sc_mean','sc_median','sc_std','sc_min','sc_max','sc_range',
237 |                         'sc_minRatio','sc_maxRatio','sc1Diff_mean','sc1Diff_median',
238 |                         'sc1Diff_std','sc1Diff_min','sc1Diff_max','sc1Diff_range',
239 |                         'sc1Diff_minRatio','sc1Diff_maxRatio','sc2Diff_std',
240 |                         'sc2Diff_min','sc2Diff_max','sc2Diff_range','sc2Diff_minRatio',
241 |                         'sc2Diff_maxRatio','scfft_mean','scfft_median','scfft_std',
242 |                         'scfft_min','scfft_max','scfft_range']
243 |         temp_feature_df = pd.DataFrame()
244 |         for i in feature_list:
245 |             temp_feature_df = pd.concat( [locals()[i],temp_feature_df],axis=1)
246 |             
247 |         GSR_feature_df = temp_feature_df
248 |         pickle.dump(GSR_feature_df,open("./dump_file/GSR_feature_df","wb"))
249 |         ######################################################################
250 | 
251 |     if True:
252 |         '''
253 |         print(all_df_y)
254 |         all_df_y_copy = all_df_y.copy()
255 |         all_df_y_copy['emotion'] = 0
256 |         all_df_y_copy['emotion'][ all_df_y_copy['valence'] >= 6] = 2
257 |         all_df_y_copy['emotion'][ (all_df_y_copy['valence'] < 6) & (all_df_y_copy['valence'] >= 4)] = 1 
258 |         all_df_y_copy['emotion'][ all_df_y_copy['valence'] < 4] = 0
259 |         all_df_y_mutiLable = all_df_y_copy[['emotion']]
260 |         pickle.dump(all_df_y_mutiLable,open("./dump_file/all_df_y_mutiLable","wb"))
261 |         '''
262 |         print(all_df_y)
263 |         all_df_y_copy = all_df_y.copy()
264 |         all_df_y_copy['emotion'] = 0
265 |         all_df_y_copy['emotion'][ (all_df_y_copy['valence'] >= 5) & (all_df_y_copy['arousal'] >= 5)] = 0
266 |         all_df_y_copy['emotion'][ (all_df_y_copy['valence'] < 5) & (all_df_y_copy['arousal'] >= 5)] = 1 
267 |         all_df_y_copy['emotion'][ (all_df_y_copy['valence'] < 5) & (all_df_y_copy['arousal'] < 5)] = 2
268 |         all_df_y_copy['emotion'][ (all_df_y_copy['valence'] >= 5) & (all_df_y_copy['arousal'] < 5)] = 3
269 |         all_df_y_mutiLable = all_df_y_copy[['emotion']]
270 |         pickle.dump(all_df_y_mutiLable,open("./dump_file/all_df_y_mutiLable","wb"))       
271 |     
272 |         all_df_y_copy = all_df_y.copy()
273 |         all_df_y_copy['emotion_2'] = 0
274 |         all_df_y_copy['emotion_2'][(all_df_y_copy['valence'] >= 5) & (all_df_y_copy['arousal'] >= 5)] = 1
275 |         all_df_y_2c = all_df_y_copy[['emotion_2']]
276 |         pickle.dump(all_df_y_2c,open("./dump_file/all_df_y_2c","wb"))   
277 |     
278 |         all_df_y_copy = all_df_y.copy()
279 |         all_df_y_valence = all_df_y_copy[['valence']]
280 |         pickle.dump(all_df_y_valence,open("./dump_file/all_df_y_valence","wb"))
281 |     
282 | 
283 |         
284 | 
285 |     
286 |     
287 |     
288 |     
289 |     
290 |     
291 |     
292 |     
293 |     
294 |     
295 |     
296 |     
297 |  


--------------------------------------------------------------------------------
/GCF_2cRSP__TimeDomain.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import multiprocessing\n",
 10 |     "from GCForest import gcForest\n",
 11 |     "import pandas as pd\n",
 12 |     "import numpy as np\n",
 13 |     "import pickle \n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "from sklearn.model_selection import train_test_split\n",
 16 |     "from sklearn.model_selection import StratifiedKFold\n",
 17 |     "from sklearn.preprocessing import MinMaxScaler\n",
 18 |     "from sklearn.metrics import accuracy_score\n",
 19 |     "from sklearn.metrics import f1_score\n",
 20 |     "from sklearn.metrics import precision_score\n",
 21 |     "from sklearn.metrics import recall_score\n",
 22 |     "from sklearn.preprocessing import Imputer\n",
 23 |     "import warnings\n",
 24 |     "warnings.filterwarnings(\"ignore\")\n",
 25 |     "#用来计算程序运行时间\n",
 26 |     "import datetime\n",
 27 |     "starttime = datetime.datetime.now()"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 2,
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "name": "stdout",
 37 |      "output_type": "stream",
 38 |      "text": [
 39 |       "       valence  arousal  dominance  liking  2cArousal  2cValence\n",
 40 |       "s01_0     7.71     7.60       6.90    7.83          1          1\n",
 41 |       "s01_1     8.10     7.31       7.28    8.47          1          1\n",
 42 |       "s01_2     8.58     7.54       9.00    7.08          1          1\n",
 43 |       "s01_3     4.94     6.01       6.12    8.06          0          0\n",
 44 |       "s01_4     6.96     3.92       7.19    6.05          1          1\n",
 45 |       "[seed:0]****************************************************\n",
 46 |       "RSP running multi-grain scan\n",
 47 |       "Slicing Sequence...\n",
 48 |       "Training MGS Random Forests...\n",
 49 |       "Slicing Sequence...\n",
 50 |       "(896, 492)\n",
 51 |       "(384, 492)\n",
 52 |       "Adding/Training Layer, n_layer=1\n",
 53 |       "Layer validation accuracy = 0.65\n",
 54 |       "Adding/Training Layer, n_layer=2\n",
 55 |       "Layer validation accuracy = 0.65\n",
 56 |       "ACC 0.6692708333333334\n",
 57 |       "F1 0.758095238095238\n",
 58 |       "Recal 0.9170506912442397\n",
 59 |       "Precision 0.6461038961038961\n",
 60 |       "[seed:100]****************************************************\n",
 61 |       "RSP running multi-grain scan\n",
 62 |       "Slicing Sequence...\n",
 63 |       "Training MGS Random Forests...\n",
 64 |       "Slicing Sequence...\n",
 65 |       "(896, 492)\n",
 66 |       "(384, 492)\n",
 67 |       "Adding/Training Layer, n_layer=1\n",
 68 |       "Layer validation accuracy = 0.6666666666666666\n",
 69 |       "Adding/Training Layer, n_layer=2\n",
 70 |       "Layer validation accuracy = 0.6666666666666666\n",
 71 |       "ACC 0.6614583333333334\n",
 72 |       "F1 0.7556390977443609\n",
 73 |       "Recal 0.9262672811059908\n",
 74 |       "Precision 0.638095238095238\n",
 75 |       "[seed:200]****************************************************\n",
 76 |       "RSP running multi-grain scan\n",
 77 |       "Slicing Sequence...\n",
 78 |       "Training MGS Random Forests...\n",
 79 |       "Slicing Sequence...\n",
 80 |       "(896, 492)\n",
 81 |       "(384, 492)\n",
 82 |       "Adding/Training Layer, n_layer=1\n",
 83 |       "Layer validation accuracy = 0.6555555555555556\n",
 84 |       "Adding/Training Layer, n_layer=2\n",
 85 |       "Layer validation accuracy = 0.6444444444444445\n",
 86 |       "ACC 0.6197916666666666\n",
 87 |       "F1 0.7276119402985074\n",
 88 |       "Recal 0.8986175115207373\n",
 89 |       "Precision 0.6112852664576802\n",
 90 |       "[seed:300]****************************************************\n",
 91 |       "RSP running multi-grain scan\n",
 92 |       "Slicing Sequence...\n",
 93 |       "Training MGS Random Forests...\n",
 94 |       "Slicing Sequence...\n",
 95 |       "(896, 492)\n",
 96 |       "(384, 492)\n",
 97 |       "Adding/Training Layer, n_layer=1\n",
 98 |       "Layer validation accuracy = 0.6444444444444445\n",
 99 |       "Adding/Training Layer, n_layer=2\n",
100 |       "Layer validation accuracy = 0.65\n",
101 |       "Adding/Training Layer, n_layer=3\n",
102 |       "Layer validation accuracy = 0.65\n",
103 |       "ACC 0.671875\n",
104 |       "F1 0.7604562737642586\n",
105 |       "Recal 0.9216589861751152\n",
106 |       "Precision 0.6472491909385113\n",
107 |       "[seed:400]****************************************************\n",
108 |       "RSP running multi-grain scan\n",
109 |       "Slicing Sequence...\n",
110 |       "Training MGS Random Forests...\n",
111 |       "Slicing Sequence...\n",
112 |       "(896, 492)\n",
113 |       "(384, 492)\n",
114 |       "Adding/Training Layer, n_layer=1\n",
115 |       "Layer validation accuracy = 0.6444444444444445\n",
116 |       "Adding/Training Layer, n_layer=2\n",
117 |       "Layer validation accuracy = 0.6444444444444445\n",
118 |       "ACC 0.6432291666666666\n",
119 |       "F1 0.7419962335216571\n",
120 |       "Recal 0.9078341013824884\n",
121 |       "Precision 0.6273885350318471\n",
122 |       "[seed:500]****************************************************\n",
123 |       "RSP running multi-grain scan\n",
124 |       "Slicing Sequence...\n",
125 |       "Training MGS Random Forests...\n",
126 |       "Slicing Sequence...\n",
127 |       "(896, 492)\n",
128 |       "(384, 492)\n",
129 |       "Adding/Training Layer, n_layer=1\n",
130 |       "Layer validation accuracy = 0.6611111111111111\n",
131 |       "Adding/Training Layer, n_layer=2\n",
132 |       "Layer validation accuracy = 0.6555555555555556\n",
133 |       "ACC 0.6614583333333334\n",
134 |       "F1 0.7490347490347491\n",
135 |       "Recal 0.8940092165898618\n",
136 |       "Precision 0.6445182724252492\n",
137 |       "[seed:600]****************************************************\n",
138 |       "RSP running multi-grain scan\n",
139 |       "Slicing Sequence...\n",
140 |       "Training MGS Random Forests...\n",
141 |       "Slicing Sequence...\n",
142 |       "(896, 492)\n",
143 |       "(384, 492)\n",
144 |       "Adding/Training Layer, n_layer=1\n",
145 |       "Layer validation accuracy = 0.65\n",
146 |       "Adding/Training Layer, n_layer=2\n",
147 |       "Layer validation accuracy = 0.65\n",
148 |       "ACC 0.65625\n",
149 |       "F1 0.7509433962264151\n",
150 |       "Recal 0.9170506912442397\n",
151 |       "Precision 0.6357827476038339\n",
152 |       "[seed:700]****************************************************\n",
153 |       "RSP running multi-grain scan\n",
154 |       "Slicing Sequence...\n",
155 |       "Training MGS Random Forests...\n",
156 |       "Slicing Sequence...\n",
157 |       "(896, 492)\n",
158 |       "(384, 492)\n",
159 |       "Adding/Training Layer, n_layer=1\n",
160 |       "Layer validation accuracy = 0.5666666666666667\n",
161 |       "Adding/Training Layer, n_layer=2\n",
162 |       "Layer validation accuracy = 0.5666666666666667\n",
163 |       "ACC 0.671875\n",
164 |       "F1 0.7649253731343285\n",
165 |       "Recal 0.9447004608294931\n",
166 |       "Precision 0.6426332288401254\n",
167 |       "[seed:800]****************************************************\n",
168 |       "RSP running multi-grain scan\n",
169 |       "Slicing Sequence...\n",
170 |       "Training MGS Random Forests...\n",
171 |       "Slicing Sequence...\n",
172 |       "(896, 492)\n",
173 |       "(384, 492)\n",
174 |       "Adding/Training Layer, n_layer=1\n",
175 |       "Layer validation accuracy = 0.6222222222222222\n",
176 |       "Adding/Training Layer, n_layer=2\n",
177 |       "Layer validation accuracy = 0.6222222222222222\n",
178 |       "ACC 0.6484375\n",
179 |       "F1 0.7438330170777987\n",
180 |       "Recal 0.9032258064516129\n",
181 |       "Precision 0.632258064516129\n",
182 |       "[seed:900]****************************************************\n",
183 |       "RSP running multi-grain scan\n",
184 |       "Slicing Sequence...\n",
185 |       "Training MGS Random Forests...\n",
186 |       "Slicing Sequence...\n",
187 |       "(896, 492)\n",
188 |       "(384, 492)\n",
189 |       "Adding/Training Layer, n_layer=1\n",
190 |       "Layer validation accuracy = 0.6388888888888888\n",
191 |       "Adding/Training Layer, n_layer=2\n",
192 |       "Layer validation accuracy = 0.65\n",
193 |       "Adding/Training Layer, n_layer=3\n",
194 |       "Layer validation accuracy = 0.6388888888888888\n",
195 |       "ACC 0.6640625\n",
196 |       "F1 0.7579737335834896\n",
197 |       "Recal 0.9308755760368663\n",
198 |       "Precision 0.6392405063291139\n"
199 |      ]
200 |     }
201 |    ],
202 |    "source": [
203 |     "#读取Y\n",
204 |     "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n",
205 |     "all_df_y['2cArousal'] = 0\n",
206 |     "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n",
207 |     "all_df_y['2cValence'] = 0\n",
208 |     "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n",
209 |     "print(all_df_y.head(5))\n",
210 |     "\n",
211 |     "#读取1个通道的RSP数据，每个通道包含32×40=1280个信号样本\n",
212 |     "#每个样本向量大小为8064点（63s*128Hz）\n",
213 |     "all_df_RSP_x = pickle.load(open(\"./dump_file/all_df_RSP_x\",\"rb\"))\n",
214 |     "#y = all_df_y[['2cArousal']]\n",
215 |     "y = all_df_y[['2cValence']]\n",
216 |     "for seed in [0,100,200,300,400,500,600,700,800,900]:\n",
217 |     "    print(\"[seed:{}]****************************************************\".format(seed))\n",
218 |     "    xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx_{}\".format(seed),\"rb\"))\n",
219 |     "    xTestIdx = pickle.load(open(\"./dump_file/xTestIdx_{}\".format(seed),\"rb\"))\n",
220 |     "    y_tr = y.loc[xTrainIdx]\n",
221 |     "    y_te = y.loc[xTestIdx]\n",
222 |     "\n",
223 |     "    RSPTrainSet = all_df_RSP_x.loc[xTrainIdx]\n",
224 |     "    RSPTestSet = all_df_RSP_x.loc[xTestIdx]\n",
225 |     "\n",
226 |     "    myWindowsSize = 256\n",
227 |     "    myStrideSize = 64\n",
228 |     "    gcf = gcForest(shape_1X=8064, window=myWindowsSize, stride=myStrideSize,tolerance=0.0,n_cascadeRF=1, \n",
229 |     "                   min_samples_mgs=0.1, min_samples_cascade=0.1,n_jobs=19)\n",
230 |     "    if True:     \n",
231 |     "        print(\"RSP running multi-grain scan\")\n",
232 |     "        xTrain,yTrain = RSPTrainSet.values,y_tr.values\n",
233 |     "        xTest = RSPTestSet.values   \n",
234 |     "        RSP_mgsTrainVector = gcf.mg_scanning(xTrain,yTrain)\n",
235 |     "        RSP_mgsTestVector = gcf.mg_scanning(xTest)\n",
236 |     "        filePath = \"./dump_file_V2/RSP_mgsTrainVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n",
237 |     "        pickle.dump(RSP_mgsTrainVector,open(filePath,\"wb\"))\n",
238 |     "        filePath = \"./dump_file_V2/RSP_mgsTestVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n",
239 |     "        pickle.dump(RSP_mgsTestVector,open(filePath,\"wb\"))\n",
240 |     "    else:\n",
241 |     "        filePath = \"./dump_file_V2/RSP_mgsTrainVector_{}_{}\".format(myWindowsSize,myStrideSize)\n",
242 |     "        RSP_mgsTrainVector = pickle.load(open(filePath,\"rb\"))\n",
243 |     "        filePath = \"./dump_file_V2/RSP_mgsTestVector_{}_{}\".format(myWindowsSize,myStrideSize)\n",
244 |     "        RSP_mgsTestVector = pickle.load(open(filePath,\"rb\"))\n",
245 |     "\n",
246 |     "    X_tr_vector = RSP_mgsTrainVector\n",
247 |     "    X_te_vector = RSP_mgsTestVector\n",
248 |     "    print(X_tr_vector.shape)\n",
249 |     "    print(X_te_vector.shape)\n",
250 |     "\n",
251 |     "    #有缺失值，填充下\n",
252 |     "    X_tr_vector_fillna= pd.DataFrame(X_tr_vector).fillna(0).values\n",
253 |     "    X_te_vector_fillna= pd.DataFrame(X_te_vector).fillna(0).values\n",
254 |     "\n",
255 |     "    _ = gcf.cascade_forest(X_tr_vector_fillna, y_tr)\n",
256 |     "\n",
257 |     "    pred_proba = gcf.cascade_forest(X_te_vector_fillna)\n",
258 |     "    tmp = np.mean(pred_proba, axis=0)\n",
259 |     "    preds = np.argmax(tmp, axis=1)\n",
260 |     "    print(\"ACC\",accuracy_score(y_true=y_te, y_pred=preds))\n",
261 |     "    print(\"F1\",f1_score(y_true=y_te, y_pred=preds))\n",
262 |     "    print(\"Recal\",recall_score(y_true=y_te, y_pred=preds))\n",
263 |     "    print(\"Precision\",precision_score(y_true=y_te, y_pred=preds))"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": 3,
269 |    "metadata": {},
270 |    "outputs": [
271 |     {
272 |      "name": "stdout",
273 |      "output_type": "stream",
274 |      "text": [
275 |       "ACC 0.4739583333333333\n",
276 |       "F1 0.4898989898989899\n",
277 |       "Recal 0.4470046082949309\n",
278 |       "Precision 0.5418994413407822\n"
279 |      ]
280 |     }
281 |    ],
282 |    "source": [
283 |     "#preds = np.zeros(shape=(384,))\n",
284 |     "#preds = np.ones(shape=(384,))\n",
285 |     "preds = np.ones(shape=(384,))\n",
286 |     "for i in range(0,384):\n",
287 |     "    temp = np.random.choice(a=2, size=1, replace=False, p=[0.5,0.5])\n",
288 |     "    preds[i] = temp\n",
289 |     "print(\"ACC\",accuracy_score(y_true=y_te, y_pred=preds))\n",
290 |     "print(\"F1\",f1_score(y_true=y_te, y_pred=preds))\n",
291 |     "print(\"Recal\",recall_score(y_true=y_te, y_pred=preds))\n",
292 |     "print(\"Precision\",precision_score(y_true=y_te, y_pred=preds))"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": null,
298 |    "metadata": {},
299 |    "outputs": [],
300 |    "source": []
301 |   }
302 |  ],
303 |  "metadata": {
304 |   "kernelspec": {
305 |    "display_name": "Python 3",
306 |    "language": "python",
307 |    "name": "python3"
308 |   },
309 |   "language_info": {
310 |    "codemirror_mode": {
311 |     "name": "ipython",
312 |     "version": 3
313 |    },
314 |    "file_extension": ".py",
315 |    "mimetype": "text/x-python",
316 |    "name": "python",
317 |    "nbconvert_exporter": "python",
318 |    "pygments_lexer": "ipython3",
319 |    "version": "3.6.5"
320 |   }
321 |  },
322 |  "nbformat": 4,
323 |  "nbformat_minor": 2
324 | }
325 | 


--------------------------------------------------------------------------------