├── README.md ├── gcForestConfig.py ├── DEAP_RSP_feature_extract.py ├── EEGFeatV2.py ├── test.py ├── FeatureExtract.py ├── test_2.py ├── sampleEntropy.py ├── train_test.py ├── DNN_inference.py ├── CNN.py ├── stackingFunc.py ├── DEAP_data_xgb.py ├── DEAP_xgb_2c.py ├── CNNFunction.py ├── config.py ├── DEAP_Classification_KFold.py ├── DEAP_linearR_GridSearchCV.py ├── XGB_multiCla.py ├── DEAP_feat_select.py ├── XGB.py ├── XGBRegression.py ├── DNN.py ├── GCF_2cEEG_multiCH_TimeIMFS.ipynb ├── DEAP_data_preprocess.py ├── DEAP_linearR_KFold.py ├── DEAP_linearR_plots.py ├── XGB_4emotion.py ├── DEAP_linearR.py ├── XGB_2cEEG.ipynb ├── XGB_2c.ipynb ├── plot_test.py ├── RSP_feat_extract.py ├── GCF_2cGSR_TimeDomain.ipynb ├── EEG_EMD.ipynb ├── GCF_2cPPG_TimeDomain.ipynb ├── GCF_2cRandom.ipynb ├── GCF_2cTMP__TimeDomain.ipynb ├── EEG_feat_extract.py ├── DEAP_GSR_feature_extract.py └── GCF_2cRSP__TimeDomain.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Emotion-recognition 2 | Emotion recognition using DEAP dataset,GSR。 3 | -------------------------------------------------------------------------------- /gcForestConfig.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Mon Nov 12 18:43:24 2018 3 | 4 | @author: jinyx 5 | """ 6 | 7 | 8 | if __name__ == "__main__": 9 | -------------------------------------------------------------------------------- /DEAP_RSP_feature_extract.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Feb 27 19:52:51 2018 4 | 5 | @author: jinyx 6 | """ 7 | d 8 | -------------------------------------------------------------------------------- /EEGFeatV2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Oct 6 14:51:59 2018 5 | 6 | @author: jinyx 7 | """ 8 | 9 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pandas as pd 3 | import numpy as np 4 | import pickle 5 | 6 | GSR_feature_df = pickle.load(open("./dump_file/GSR_feature_df","rb")) 7 | all_df_y_mutiLable = pickle.load(open("./dump_file/all_df_y_mutiLable","rb")) 8 | 9 | 10 | 11 | feature_cols=GSR_feature_df.columns 12 | corrs=GSR_feature_df[feature_cols].apply(lambda col:np.abs(all_df_y_mutiLable['emotion'].corr(col))) 13 | sort_corrs = corrs.sort_values() 14 | -------------------------------------------------------------------------------- /FeatureExtract.py: -------------------------------------------------------------------------------- 1 | """ 2 | 特征处理函数 3 | """ 4 | '''1-EEG特征''' 5 | '''1.1-EEG时域特征''' 6 | ''' 7 | 1.1.1-EEG时域均值 8 | IN:时域的离散数据值,比如8064个采样点 9 | OUT:均值 10 | ''' 11 | def eeg_mean(df): 12 | return df.mean(axis=1) 13 | ''' 14 | 1.1.2-EEG时域中值 15 | IN:时域的离散数据值,比如8064个采样点 16 | OUT:中值 17 | ''' 18 | def eeg_median(df): 19 | return df.median(axis=1) 20 | ''' 21 | 1.1.2-EEG时域标准差 22 | IN:时域的离散数据值,比如8064个采样点 23 | OUT:标准差 24 | ''' 25 | def eeg_std(df): 26 | return df.std(axis=1) 27 | ''' 28 | 1.1.2-EEG时域香农熵 29 | IN:时域的离散数据值,比如8064个采样点 30 | OUT:香农熵 31 | ''' -------------------------------------------------------------------------------- /test_2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu May 17 14:41:51 2018 4 | 5 | @author: jinyu 6 | """ 7 | ''' 8 | import tensorflow as tf 9 | # 设计Graph 10 | x1 = tf.constant([2, 3, 4]) 11 | x2 = tf.constant([4, 0, 1]) 12 | x3 = tf.constant([1, 1, 1]) 13 | y = tf.add(x1, x2) 14 | # 打开一个session --> 计算y 15 | with tf.Session() as sess: 16 | print(sess.run(y)) 17 | ''' 18 | import tensorflow as tf 19 | # 设计Graph 20 | x1 = tf.placeholder(tf.int16) 21 | x2 = tf.placeholder(tf.int16) 22 | y = tf.add(x1, x2) 23 | # 用Python产生数据 24 | li1 = [2, 3, 4] 25 | li2 = [4, 0, 1] 26 | # 打开一个session --> 喂数据 --> 计算y 27 | with tf.Session() as sess: 28 | print(sess.run(y, feed_dict={x1: li1, x2: li2})) -------------------------------------------------------------------------------- /sampleEntropy.py: -------------------------------------------------------------------------------- 1 | """ 2 | SampEn 计算时间序列data的样本熵 3 | 输入:data是数据一维行向量 4 | m重构维数,一般选择1或2,优先选择2,一般不取m>2 5 | r 阈值大小,一般选择r=0.1~0.25*Std(data) 6 | 输出:SampEnVal样本熵值大小 7 | """ 8 | import numpy as np 9 | 10 | def sampEn(U,m,r): 11 | def _maxdist(x_i, x_j): 12 | return max([abs(ua - va) for ua, va in zip(x_i, x_j)]) 13 | 14 | def _phi(m): 15 | x = [[U[j] for j in range(i, i + m - 1 + 1)] for i in range(N - m + 1)] 16 | B = [(len([1 for x_j in x if _maxdist(x_i, x_j) <= r]) - 1.0) / (N - m) for x_i in x] 17 | return (N - m + 1.0)**(-1) * sum(B) 18 | 19 | N = len(U) 20 | return -np.log(_phi(m+1) / _phi(m)) 21 | 22 | if __name__ == "__main__": 23 | # Usage example 24 | U = np.array([85, 80, 89] *17) 25 | print(sampEn(U,2,3)) -------------------------------------------------------------------------------- /train_test.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from sklearn.model_selection import train_test_split 3 | #读取Y 4 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 5 | all_df_y['2cArousal'] = 0 6 | all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1 7 | all_df_y['2cValence'] = 0 8 | all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1 9 | print(all_df_y.head(5)) 10 | #读取32个通道的EEG数据,每个通道包含32×40=1280个信号样本(人次×每人次40实验) 11 | #每个样本向量大小为8064点(63s*128Hz) 12 | for eegCH in range(1,2,1): 13 | file_path = "./dump_file/CH{}_df_EEG_x".format(eegCH) 14 | locals()['CH{}_df_EEG_x'.format(eegCH)] = pickle.load(open(file_path,"rb")) 15 | #file_path = "./dump_file/CH{}eegfft_df".format(eegCH) 16 | #locals()["CH{}eegfft_df".format(eegCH)] = pickle.load(open(file_path,"rb")) 17 | 18 | X = CH1_df_EEG_x 19 | y = all_df_y[['2cValence']] 20 | if True: 21 | for seed in [0,100,200,300,400,500,600,700,800,900]: 22 | X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3,stratify=y,random_state=seed) 23 | xTrainIdx = X_tr.index 24 | xTestIdx = X_te.index 25 | pickle.dump(xTrainIdx,open("./dump_file/xTrainIdx_{}".format(seed),"wb")) 26 | pickle.dump(xTestIdx,open("./dump_file/xTestIdx_{}".format(seed),"wb")) -------------------------------------------------------------------------------- /DNN_inference.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Created on Thu Sep 27 15:43:31 2018 4 | DNN 前向传播过程 5 | @author: jinyx 6 | """ 7 | import tensorflow as tf 8 | 9 | #定义神经网络结构相关的参数 10 | INPUT_NODE = 258048 # 32*8064,输入节点 11 | OUTPUT_NODE = 2 # 2分类,输出节点 12 | LAYER1_NODE = 500 # 隐藏层节点数 13 | 14 | def get_weight_variable(shape, regularizer): 15 | weights = tf.get_variable("weights", shape, initializer=tf.truncated_normal_initializer(stddev=0.1)) 16 | if regularizer != None: 17 | tf.add_to_collection('losses', regularizer(weights)) 18 | return weights 19 | 20 | def inference(input_tensor, regularizer): 21 | with tf.variable_scope('layer1'): 22 | weights = get_weight_variable([INPUT_NODE, LAYER1_NODE], regularizer) 23 | biases = tf.get_variable("biases", [LAYER1_NODE], initializer=tf.constant_initializer(0.0)) 24 | layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases) 25 | 26 | with tf.variable_scope('layer2'): 27 | weights = get_weight_variable([LAYER1_NODE, OUTPUT_NODE], regularizer) 28 | biases = tf.get_variable("biases", [OUTPUT_NODE], initializer=tf.constant_initializer(0.0)) 29 | layer2 = tf.matmul(layer1, weights) + biases 30 | 31 | return layer2 -------------------------------------------------------------------------------- /CNN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Sep 17 10:10:21 2018 5 | CNN做分类 6 | @author: jinyx 7 | """ 8 | import pandas as pd 9 | import numpy as np 10 | import pickle 11 | import random 12 | import tensorflow as tf 13 | from sklearn.preprocessing import MinMaxScaler 14 | from sklearn import preprocessing 15 | from sklearn.model_selection import cross_val_score 16 | from sklearn.model_selection import cross_val_predict 17 | from sklearn.metrics import accuracy_score 18 | import warnings 19 | warnings.filterwarnings("ignore") 20 | #用来计算程序运行时间 21 | import datetime 22 | starttime = datetime.datetime.now() 23 | #读取数据 24 | for eeg_CH in range(1,33,1): 25 | file_path = "./dump_file/CH{}_df_EEG_x".format(eeg_CH) 26 | df_data = pickle.load(open(file_path,"rb")) 27 | locals()["CH{}_df_EEG_x".format(eeg_CH)] = df_data 28 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 29 | for i in range(0,1280,1): #总共1280个实验所以会有1280个二维矩阵 30 | locals()["mat{}".format(i)]=pd.DataFrame() 31 | for eeg_CH in range(1,33,1): #脑电共有32个通道,所以一个矩阵大小32*8064 32 | locals()["mat{}".format(i)] = locals()["mat{}".format(i)].\ 33 | append(locals()["CH{}_df_EEG_x".format(eeg_CH)].iloc[i:i+1],ignore_index=True) 34 | 35 | # 36 | INPUT_NODE = 258048 #32*8064 37 | OUTPUT_NODE = 2 #2分类 38 | 39 | #先简单的用1000个作为训练集,280个作为测试集 -------------------------------------------------------------------------------- /stackingFunc.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Fri Nov 9 09:44:31 2018 3 | stacking model fusion functions 4 | @author: jinyx 5 | """ 6 | import numpy as np 7 | from sklearn.model_selection import StratifiedKFold 8 | 9 | def get_stacking(clf, x_train, y_train, x_test, n_folds=10): 10 | """ 11 | 这个函数是stacking的核心,使用交叉验证的方法得到次级训练集 12 | x_train, y_train, x_test 的值应该为numpy里面的数组类型 numpy.ndarray . 13 | 如果输入为pandas的DataFrame类型则会把报错 14 | """ 15 | train_num, test_num = x_train.shape[0], x_test.shape[0] 16 | second_level_train_set = np.zeros((train_num,)) 17 | second_level_test_set = np.zeros((test_num,)) 18 | test_nfolds_sets = np.zeros((test_num, n_folds)) 19 | skf = StratifiedKFold(n_splits=n_folds) 20 | 21 | i=0 22 | for (trainIdx, valiIdx) in skf.split(x_train,y_train): 23 | #print(x_train[trainIdx].shape,x_train[valiIdx].shape) 24 | x_tra, y_tra = x_train[trainIdx], y_train[trainIdx] 25 | x_tst, y_tst = x_train[valiIdx], y_train[valiIdx] 26 | clf.fit(x_tra, y_tra) 27 | 28 | second_level_train_set[valiIdx] = clf.predict(x_tst) 29 | test_nfolds_sets[:,i] = clf.predict(x_test) 30 | i+=1 31 | 32 | #回归预测取均值,分类呢? 33 | second_level_test_set[:] = test_nfolds_sets.mean(axis=1) 34 | return second_level_train_set, second_level_test_set 35 | 36 | if __name__ == "__main__": 37 | X = [1,2,3,4,5,6,7,8,9,10] 38 | X = np.array(X) 39 | print(X) 40 | y = [0,0,0,0,1,1,1,1,1,1] 41 | get_stacking(clf=None,x_train=X ,y_train=y ,x_test=X,n_folds=2 ) -------------------------------------------------------------------------------- /DEAP_data_xgb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pandas as pd 4 | import numpy as np 5 | import xgboost as xgb 6 | import pickle 7 | from sklearn.preprocessing import MinMaxScaler 8 | 9 | GSR_selected_feature_df = pickle.load(open("./dump_file/GSR_selected_feature_df","rb")) 10 | all_df_y_mutiLable = pickle.load(open("./dump_file/all_df_y_mutiLable","rb")) 11 | print('GSR_selected_feature_df.shape:',GSR_selected_feature_df.shape) 12 | print('all_df_y_mutiLable.shape:',all_df_y_mutiLable.shape) 13 | 14 | scaler = MinMaxScaler() 15 | scaler.fit(GSR_selected_feature_df) 16 | data = scaler.transform(GSR_selected_feature_df) 17 | data_df = pd.DataFrame(data) 18 | 19 | train_X = data_df.iloc[:int(1280*0.7), :].values 20 | test_X = data_df.iloc[int(1280*0.7):,:].values 21 | 22 | train_Y = all_df_y_mutiLable.iloc[:int(1280*0.7), :].values 23 | test_Y = all_df_y_mutiLable.iloc[int(1280*0.7):,:].values 24 | 25 | xg_train = xgb.DMatrix(train_X, label=train_Y) 26 | xg_test = xgb.DMatrix(test_X, label=test_Y) 27 | 28 | # setup parameters for xgboost 29 | param = {} 30 | # use softmax multi-class classification 31 | param['objective'] = 'multi:softmax' 32 | # scale weight of positive examples 33 | param['eta'] = 0.1 34 | param['max_depth'] = 6 35 | param['silent'] = 1 36 | param['nthread'] = 8 37 | param['num_class'] = 4 38 | 39 | watchlist = [(xg_train, 'train'), (xg_test, 'test')] 40 | num_round = 500 41 | bst = xgb.train(param, xg_train, num_round, watchlist) 42 | # get prediction 43 | pred = bst.predict(xg_test) 44 | error_rate = np.sum(pred != test_Y) / test_Y.shape[0] 45 | print('Test error using softmax = {}'.format(error_rate)) 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /DEAP_xgb_2c.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Jan 16 13:04:47 2018 4 | 5 | @author: jinyx 6 | """ 7 | 8 | import numpy as np 9 | import pandas as pd 10 | import xgboost as xgb 11 | import pickle 12 | from sklearn.preprocessing import MinMaxScaler 13 | from sklearn.model_selection import train_test_split 14 | from sklearn.metrics import accuracy_score 15 | import warnings 16 | warnings.filterwarnings("ignore") 17 | 18 | #读取原始特征 19 | df_feat = pickle.load(open("./dump_file/df_feat_selected","rb")) 20 | #读取标签(Y值) 21 | all_df_y_2c = pickle.load(open("./dump_file/all_df_y_2c","rb")) 22 | print("df_feat.shape:",df_feat.shape) 23 | 24 | train_X,test_X,train_Y,test_Y = \ 25 | train_test_split(df_feat,all_df_y_2c,test_size=0.2,random_state=1000) 26 | 27 | print("train_X.shape:",train_X.shape) 28 | print("test_X.shape:",test_X.shape) 29 | 30 | # 31 | dtrain = xgb.DMatrix(train_X, train_Y) 32 | dtest = xgb.DMatrix(test_X,test_Y) 33 | 34 | xgb_params = { 35 | 'booster': 'gbtree', 36 | 37 | 'colsample_bytree': 0.8, 38 | 39 | 'colsample_bylevel': 0.8, 40 | 41 | 'eta': 0.01, 42 | 43 | 'max_depth': 6, 44 | 45 | 'objective': 'binary:logistic', 46 | 47 | 'eval_metric': 'error', 48 | 49 | 'silent':0, 50 | } 51 | 52 | watchlist = [(dtrain, 'train'), (dtest, 'test')] 53 | num_round = 300 54 | bst = xgb.train(xgb_params, dtrain, num_round,evals=watchlist) 55 | 56 | y_pred = bst.predict(dtest) 57 | 58 | df_y_pred = pd.DataFrame(y_pred,columns=['temp_pred_y']) 59 | df_y_pred['pred_y'] = 0 60 | df_y_pred['pred_y'][df_y_pred['temp_pred_y'] >= 0.5] = 1 61 | df_y_pred['pred_y'][df_y_pred['temp_pred_y'] < 0.5] = 0 62 | print(accuracy_score(test_Y, df_y_pred['pred_y'])) 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /CNNFunction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Oct 6 16:23:16 2018 5 | 6 | @author: jinyx 7 | """ 8 | import tensorflow as tf 9 | # 构建网络 10 | def buildCNN(w, h, c): 11 | # 占位符 12 | x = tf.placeholder(tf.float32, shape=[None, w, h, c], name='x') 13 | y_ = tf.placeholder(tf.int32, shape=[None, ], name='y_') 14 | 15 | # 第一个卷积层 + 池化层 16 | conv1 = tf.layers.conv2d( 17 | inputs=x, 18 | filters=5, 19 | kernel_size=[1, 171], 20 | padding="same", #全零填充 21 | activation=tf.nn.relu, 22 | kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) 23 | pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[1, 5], strides=2) 24 | 25 | re1 = tf.reshape(pool1, [-1, 6 * 6 * 128]) 26 | # 全连接层 27 | dense1 = tf.layers.dense(inputs=re1, 28 | units=1024, 29 | activation=tf.nn.relu, 30 | kernel_initializer=tf.truncated_normal_initializer(stddev=0.01), 31 | kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003)) 32 | logits = tf.layers.dense(inputs=dense1, 33 | units=2, 34 | activation=None, 35 | kernel_initializer=tf.truncated_normal_initializer(stddev=0.01), 36 | kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003)) 37 | 38 | return logits, x, y_ 39 | 40 | # 返回损失函数的值,准确值等参数 41 | def accCNN(logits, y_): 42 | loss = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=logits) 43 | train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) 44 | correct_prediction = tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), y_) 45 | acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 46 | 47 | return loss, train_op, correct_prediction, acc 48 | 49 | 50 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Jan 2 15:14:09 2018 4 | 5 | @author: jinyx 6 | """ 7 | s01_file_path = './data_preprocessed_python/s01.dat' 8 | s02_file_path = './data_preprocessed_python/s02.dat' 9 | s03_file_path = './data_preprocessed_python/s03.dat' 10 | s04_file_path = './data_preprocessed_python/s04.dat' 11 | s05_file_path = './data_preprocessed_python/s05.dat' 12 | s06_file_path = './data_preprocessed_python/s06.dat' 13 | s07_file_path = './data_preprocessed_python/s07.dat' 14 | s08_file_path = './data_preprocessed_python/s08.dat' 15 | s09_file_path = './data_preprocessed_python/s09.dat' 16 | s10_file_path = './data_preprocessed_python/s10.dat' 17 | s11_file_path = './data_preprocessed_python/s11.dat' 18 | s12_file_path = './data_preprocessed_python/s12.dat' 19 | s13_file_path = './data_preprocessed_python/s13.dat' 20 | s14_file_path = './data_preprocessed_python/s14.dat' 21 | s15_file_path = './data_preprocessed_python/s15.dat' 22 | s16_file_path = './data_preprocessed_python/s16.dat' 23 | s17_file_path = './data_preprocessed_python/s17.dat' 24 | s18_file_path = './data_preprocessed_python/s18.dat' 25 | s19_file_path = './data_preprocessed_python/s19.dat' 26 | s20_file_path = './data_preprocessed_python/s20.dat' 27 | s21_file_path = './data_preprocessed_python/s21.dat' 28 | s22_file_path = './data_preprocessed_python/s22.dat' 29 | s23_file_path = './data_preprocessed_python/s23.dat' 30 | s24_file_path = './data_preprocessed_python/s24.dat' 31 | s25_file_path = './data_preprocessed_python/s25.dat' 32 | s26_file_path = './data_preprocessed_python/s26.dat' 33 | s27_file_path = './data_preprocessed_python/s27.dat' 34 | s28_file_path = './data_preprocessed_python/s28.dat' 35 | s29_file_path = './data_preprocessed_python/s29.dat' 36 | s30_file_path = './data_preprocessed_python/s30.dat' 37 | s31_file_path = './data_preprocessed_python/s31.dat' 38 | s32_file_path = './data_preprocessed_python/s32.dat' 39 | -------------------------------------------------------------------------------- /DEAP_Classification_KFold.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pandas as pd 3 | import numpy as np 4 | import pickle 5 | from sklearn.preprocessing import MinMaxScaler 6 | from sklearn import preprocessing 7 | from sklearn.model_selection import cross_val_score 8 | from sklearn.metrics import mean_absolute_error 9 | from sklearn.naive_bayes import GaussianNB 10 | from sklearn.neighbors import KNeighborsClassifier 11 | from sklearn.neural_network import MLPClassifier 12 | import xgboost as xgb 13 | import warnings 14 | warnings.filterwarnings("ignore") 15 | #用来计算程序运行时间 16 | import datetime 17 | starttime = datetime.datetime.now() 18 | 19 | print("######读取数据(基于皮肤电)######") 20 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb")) 21 | all_df_y_valence = pickle.load(open("./dump_file/all_df_y_valence","rb")) 22 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 23 | all_df_y_2c = pickle.load(open("./dump_file/all_df_y_2c","rb")) 24 | print("GSR_feature_df.shape:",GSR_feature_df.shape) 25 | 26 | print("######数据缩放处理,归一化处理######") 27 | min_max_scaler = MinMaxScaler() 28 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df) 29 | 30 | #############################下面用唤醒度做分类################################# 31 | if True: 32 | print("----------------这是高低愉悦度度二分类预测----------------") 33 | data = GSR_feature_df 34 | target = all_df_y_2c #高低愉悦度 35 | 36 | #贝叶斯效果不好,可能是数据不服从高斯(正态)分布 37 | #print("######NB classification CV######") 38 | #NB_model = GaussianNB() 39 | #NB_scores = cross_val_score(NB_model,data,target,cv=5,scoring='accuracy') 40 | #print("NB_scores:",abs(NB_scores)) 41 | #print("NB_scores_mean:",abs(NB_scores.mean())) 42 | 43 | print("######KNN classification CV######") 44 | KNN_model = KNeighborsClassifier(n_neighbors=20) 45 | KNN_scores = cross_val_score(KNN_model,data,target,cv=5,scoring='accuracy') 46 | print("KNN_scores:",abs(KNN_scores)) 47 | print("KNN_scores_mean:",abs(KNN_scores.mean())) 48 | 49 | print("######xgb classification CV######") 50 | xgb_model = xgb.XGBClassifier(max_depth=6,learning_rate=0.01,n_estimators=300, 51 | objective='binary:logistic',booster='gbtree',n_jobs=10, 52 | subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9, 53 | reg_alpha=0.5, reg_lambda=1.0,gamma=0, 54 | scale_pos_weight=1) 55 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy') 56 | print("xgb_scores:",abs(xgb_scores)) 57 | print("xgb_scores_mean:",abs(xgb_scores.mean())) 58 | 59 | print("######MLP classification CV######") 60 | mlp_model = MLPClassifier(hidden_layer_sizes=(500,2),alpha=0.1) 61 | mlp_scores = cross_val_score(mlp_model,data,target,cv=5,scoring='accuracy') 62 | print("mlp_scores:",abs(mlp_scores)) 63 | print("mlp_scores_mean:",abs(mlp_scores.mean())) 64 | 65 | 66 | #用来计算程序运行时间 67 | endtime = datetime.datetime.now() 68 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds) 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /DEAP_linearR_GridSearchCV.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pandas as pd 3 | import numpy as np 4 | import pickle 5 | import xgboost as xgb 6 | from sklearn.linear_model import LinearRegression 7 | from sklearn.linear_model import Lasso 8 | from sklearn.linear_model import Ridge 9 | from sklearn.svm import SVR 10 | from sklearn.preprocessing import MinMaxScaler 11 | from sklearn import preprocessing 12 | from sklearn.model_selection import train_test_split 13 | from sklearn.model_selection import cross_val_score 14 | from sklearn.model_selection import GridSearchCV 15 | from sklearn.metrics import mean_squared_error 16 | from sklearn.metrics import mean_absolute_error 17 | import warnings 18 | warnings.filterwarnings("ignore") 19 | #用来计算程序运行时间 20 | import datetime 21 | starttime = datetime.datetime.now() 22 | 23 | #读取数据 24 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb")) 25 | all_df_y_valence = pickle.load(open("./dump_file/all_df_y_valence","rb")) 26 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 27 | print("GSR_feature_df.shape:",GSR_feature_df.shape) 28 | 29 | print("数据缩放处理,归一化处理") 30 | min_max_scaler = MinMaxScaler() 31 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df) 32 | 33 | ##############################下面用交叉验证做################################## 34 | print("----------------这是愉悦度预测----------------") 35 | data = GSR_feature_df 36 | target = all_df_y_valence 37 | print("######linear regression CV######") 38 | linearR_model = LinearRegression() 39 | linearR_scores = cross_val_score(linearR_model,data,target,cv=5,scoring='neg_mean_absolute_error') 40 | print("linearR_scores:",abs(linearR_scores)) 41 | print("linearR_scores_mean:",abs(linearR_scores.mean())) 42 | 43 | print("######lasso model CV######") 44 | param_grid = {'alpha':[1.0,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1]} 45 | lasso_model = Lasso() 46 | gsearch = GridSearchCV(lasso_model,param_grid,cv=5,scoring='neg_mean_absolute_error') 47 | gsearch.fit(data,target) 48 | print("lasso->best_params:",gsearch.best_score_) 49 | 50 | print("######ridge model CV######") 51 | param_grid = {'alpha':[1.0,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1,0.0]} 52 | Ridge_model = Ridge() 53 | gsearch = GridSearchCV(Ridge_model,param_grid,cv=5,scoring='neg_mean_absolute_error') 54 | gsearch.fit(data,target) 55 | print("Ridge->best_params:",gsearch.best_score_) 56 | 57 | print("######xgboost model CV######") 58 | param_grid = {'max_depth':[3], 59 | 'learning_rate':[0.1], 60 | 'n_estimators':[50], 61 | 'objective':['reg:linear'], 62 | 'booster':['gbtree'], 63 | 'n_jobs':[10], 64 | 'subsample':[1], 65 | 'colsample_bytree':[1.0], 66 | 'colsample_bylevel':[1.0], 67 | 'reg_alpha':[1.0], 68 | 'reg_lambda':[1.0], 69 | 'gamma':[1.0], 70 | } 71 | xgb_model = xgb.XGBRegressor() 72 | gsearch = GridSearchCV(xgb_model,param_grid,cv=5,scoring='neg_mean_absolute_error',n_jobs=10) 73 | gsearch.fit(data,target) 74 | print("Ridge->best_params:",gsearch.best_score_) 75 | #用来计算程序运行时间 76 | endtime = datetime.datetime.now() 77 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds) 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /XGB_multiCla.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Aug 3 13:31:09 2018 4 | 情绪的多分类问题 5 | @author: jinyu 6 | """ 7 | 8 | import pandas as pd 9 | import numpy as np 10 | import pickle 11 | import random 12 | import xgboost as xgb 13 | from sklearn.preprocessing import MinMaxScaler 14 | from sklearn import preprocessing 15 | from sklearn.model_selection import cross_val_score 16 | from sklearn.model_selection import cross_val_predict 17 | from sklearn.metrics import accuracy_score 18 | import warnings 19 | warnings.filterwarnings("ignore") 20 | #用来计算程序运行时间 21 | import datetime 22 | starttime = datetime.datetime.now() 23 | 24 | #读取数据 25 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb")) 26 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 27 | print("GSR_feature_df.shape:",GSR_feature_df.shape) 28 | 29 | print("数据缩放处理,归一化处理") 30 | min_max_scaler = MinMaxScaler() 31 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df) 32 | 33 | print("把连续的唤醒度和愉悦度转化为离散的4个类别值") 34 | print("---------happy emotion----------") 35 | df_result = all_df_y 36 | a = df_result[df_result.valence>=5].index 37 | b = df_result[df_result.arousal>=5].index 38 | #happy_index = [val for val in a if val in b] 39 | happy_index = set(a).intersection(set(b)) 40 | print("len(happy_index)=",len(happy_index)) 41 | df_result['4emotion'] = -1 42 | for i in happy_index: 43 | df_result['4emotion'].loc[i] = 0 44 | print("---------sad emotion----------") 45 | df_result = all_df_y 46 | a = df_result[df_result.valence<=5].index 47 | b = df_result[df_result.arousal<=5].index 48 | #sad_index = [val for val in a if val in b] 49 | sad_index = set(a).intersection(set(b)) 50 | print("len(sad_index)=",len(sad_index)) 51 | for i in sad_index: 52 | df_result['4emotion'].loc[i] = 1 53 | print("---------nervous emotion----------") 54 | df_result = all_df_y 55 | a = df_result[df_result.valence<5].index 56 | b = df_result[df_result.arousal>5].index 57 | #nervous_index = [val for val in a if val in b] 58 | nervous_index = set(a).intersection(set(b)) 59 | print("len(nervous_index)=",len(nervous_index)) 60 | for i in nervous_index: 61 | df_result['4emotion'].loc[i] = 2 62 | print("---------calm emotion----------") 63 | df_result = all_df_y 64 | a = df_result[df_result.valence>5].index 65 | b = df_result[df_result.arousal<5].index 66 | #calm_index = [val for val in a if val in b] 67 | calm_index = set(a).intersection(set(b)) 68 | print("len(calm_index)=",len(calm_index)) 69 | for i in calm_index: 70 | df_result['4emotion'].loc[i] = 3 71 | 72 | ############################################################################### 73 | if True: 74 | print("训练多分类器") 75 | data = GSR_feature_df 76 | target = all_df_y[['4emotion']] 77 | print("######xgboost model CV######") 78 | for xgb_rounds in [50]: 79 | xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50, 80 | objective='multi:softmax',booster='gbtree',n_jobs=10, 81 | subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9, 82 | reg_alpha=0.5, reg_lambda=1.0,gamma=0, 83 | scale_pos_weight=1) 84 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy') 85 | print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean()))) 86 | xgb_pred_4emotion = cross_val_predict(xgb_model,data,target,cv=5) 87 | acc_4emotion = accuracy_score(xgb_pred_4emotion,df_result['4emotion']) 88 | print("4emotion_acc:",acc_4emotion) 89 | 90 | #用来计算程序运行时间 91 | endtime = datetime.datetime.now() 92 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds) 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /DEAP_feat_select.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import pickle 4 | 5 | def feat_select(use_GSR,use_RSP,use_EEG,complex_abs,complex_real,complex_imag): 6 | #读取原始特征 7 | GSR_feature_df = pickle.load(open("./dump_file/GSR_feature_df","rb")) 8 | RSP_feature_df = pickle.load(open("./dump_file/RSP_feature_df","rb")) 9 | for eeg_CH in range(1,33,1): 10 | locals()["CH{}EEG_feature_df".format(eeg_CH)] = pickle.load(open("./dump_file/CH{}_eeg_feat_df".format(eeg_CH),"rb")) 11 | all_df_y_mutiLable = pickle.load(open("./dump_file/all_df_y_mutiLable","rb")) 12 | all_df_y_valence = pickle.load(open("./dump_file/all_df_y_valence","rb")) 13 | all_df_y= pickle.load(open("./dump_file/all_df_y","rb")) 14 | if use_GSR == False: 15 | GSR_feature_df = pd.DataFrame() 16 | if use_RSP == False: 17 | RSP_feature_df = pd.DataFrame() 18 | #把特征都合并在一起 19 | df_feat = pd.concat([GSR_feature_df,RSP_feature_df],axis=1) 20 | if use_EEG == True: 21 | for eeg_CH in range(1,33,1): 22 | df_feat = pd.concat([df_feat,locals()["CH{}EEG_feature_df".format(eeg_CH)]],axis=1) 23 | 24 | #复数的实数部分特征 25 | if complex_real == True: 26 | df_real = df_feat.select_dtypes(["complex128"]).apply(lambda x:x.real) 27 | list_new_col=[] 28 | for col in df_real.columns: 29 | list_new_col.append('real_{}'.format(col)) 30 | df_real.columns = list_new_col 31 | df_feat = pd.concat([df_real,df_feat],axis=1) 32 | 33 | #复数的虚数部分特征 34 | if complex_imag == True: 35 | df_imag = df_feat.select_dtypes(["complex128"]).apply(lambda x:x.imag) 36 | list_new_col=[] 37 | for col in df_imag.columns: 38 | list_new_col.append('imag_{}'.format(col)) 39 | df_imag.columns = list_new_col 40 | df_feat = pd.concat([df_imag,df_feat],axis=1) 41 | 42 | #True: drop complex data 43 | if complex_abs == False: 44 | if use_GSR == True: 45 | df_feat.drop(['scfft_mean','scfft_median','scfft_std', 46 | 'scfft_min','scfft_max','scfft_range' 47 | ],inplace=True,axis=1) 48 | if use_RSP == True: 49 | df_feat.drop(['rspfft_max', 50 | 'rspfft_range','rspfft_min','rspfft_median','rspfft_mean' 51 | ],inplace=True,axis=1) 52 | if use_EEG == True: 53 | df_feat.drop(['CH2eeg2Diff_range','CH2eeg2Diff_max', 54 | 'CH2eeg2Diff_min','CH2eeg1Diff_range','CH2eeg1Diff_max','CH2eeg1Diff_min', 55 | 'CH2eeg1Diff_median','CH2eeg1Diff_mean','CH2eeg_range','CH2eeg_max', 56 | 'CH2eeg_min','CH2eeg_median','CH2eeg_mean'],inplace=True,axis=1) 57 | for eeg_CH in range(1,33,1): 58 | df_feat.drop(['CH{}eegfft_mean'.format(eeg_CH),'CH{}eegfft_median'.format(eeg_CH), 59 | 'CH{}eegfft_std'.format(eeg_CH), 'CH{}eegfft_min'.format(eeg_CH), 60 | 'CH{}eegfft_max'.format(eeg_CH),'CH{}eegfft_range'.format(eeg_CH),],inplace=True,axis=1) 61 | 62 | elif complex_abs == True: 63 | #compute abs for complex 64 | df_abs = df_feat.select_dtypes(["complex128"]).apply(np.abs) 65 | list_drop = df_abs.columns 66 | df_feat.drop(labels=list_drop,axis=1,inplace=True) 67 | df_feat = pd.concat([df_abs,df_feat],axis=1) 68 | 69 | 70 | 71 | df_feat_selected = df_feat 72 | #根据相关程度筛选数据 73 | if True: 74 | feature_cols = df_feat.columns 75 | #测试愉悦度 76 | corrs = df_feat[feature_cols].apply(lambda col:np.abs(all_df_y['valence'].corr(col))) 77 | #测试唤醒度 78 | #corrs = df_feat[feature_cols].apply(lambda col:np.abs(all_df_y['arousal'].corr(col))) 79 | sort_corrs = corrs.sort_values() 80 | selected_feature = sort_corrs[sort_corrs > 0.00].index 81 | df_feat_selected = df_feat[selected_feature] 82 | print(sort_corrs) 83 | return df_feat_selected 84 | 85 | if __name__ == '__main__': 86 | df_feat_selected = feat_select(use_GSR=True,use_RSP=False,use_EEG=False,complex_abs=True,complex_real=False,complex_imag=False) 87 | #df_feat_selected = feat_select(use_GSR=True,use_RSP=False,use_EEG=False,complex_abs=True,complex_real=True,complex_imag=True) 88 | print('df_feat_selected.shape:',df_feat_selected.shape) 89 | 90 | pickle.dump(df_feat_selected,open("./dump_file/df_feat_selected","wb")) 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /XGB.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Aug 2 02:57:14 2018 4 | 代码功能:XGB回归预测唤醒度和愉悦度,并根据结果组合预测4个象限的情绪 5 | @author: jinyu 6 | """ 7 | import pandas as pd 8 | import numpy as np 9 | import pickle 10 | import xgboost as xgb 11 | from sklearn.preprocessing import MinMaxScaler 12 | from sklearn import preprocessing 13 | from sklearn.model_selection import cross_val_score 14 | from sklearn.model_selection import cross_val_predict 15 | import warnings 16 | warnings.filterwarnings("ignore") 17 | #用来计算程序运行时间 18 | import datetime 19 | starttime = datetime.datetime.now() 20 | 21 | #读取数据 22 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb")) 23 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 24 | print("GSR_feature_df.shape:",GSR_feature_df.shape) 25 | 26 | print("数据缩放处理,归一化处理") 27 | min_max_scaler = MinMaxScaler() 28 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df) 29 | 30 | ##############################下面用交叉验证做愉悦度预测######################## 31 | if True: 32 | print("----------------这是愉悦度预测----------------") 33 | data = GSR_feature_df 34 | target = all_df_y[['valence']] 35 | print("######xgboost model CV######") 36 | for xgb_rounds in [50]: 37 | xgb_model = xgb.XGBRegressor(max_depth=5,learning_rate=0.1,n_estimators=xgb_rounds, 38 | objective='reg:linear',booster='gbtree',n_jobs=10, 39 | subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9, 40 | reg_alpha=0.1, reg_lambda=0.8,gamma=1.0) 41 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error') 42 | print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean()))) 43 | xgb_pred_valence = cross_val_predict(xgb_model,data,target,cv=5) 44 | 45 | #############################下面用唤醒度做回归################################# 46 | if True: 47 | print("----------------这是唤醒度预测----------------") 48 | data = GSR_feature_df 49 | target = all_df_y[['arousal']] 50 | print("######xgboost regression model CV######") 51 | for xgb_rounds in [50]: 52 | xgb_model = xgb.XGBRegressor(max_depth=7,learning_rate=0.1,n_estimators=xgb_rounds, 53 | objective='reg:linear',booster='gbtree',n_jobs=10, 54 | subsample=0.9, colsample_bytree=0.90, colsample_bylevel=0.90, 55 | reg_alpha=0.1, reg_lambda=0.5,gamma=0) 56 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error') 57 | print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean()))) 58 | xgb_pred_arousal = cross_val_predict(xgb_model,data,target,cv=5) 59 | 60 | print("根据回归预测值构造4个象限的情绪2分类模型") 61 | df_v = pd.DataFrame(xgb_pred_valence,columns=['pred_v'],index=all_df_y.index) 62 | df_a = pd.DataFrame(xgb_pred_arousal,columns=['pred_a'],index=all_df_y.index) 63 | df_true_v = all_df_y[['valence']] 64 | df_true_a = all_df_y[['arousal']] 65 | df_result = pd.concat([df_v,df_a,df_true_v,df_true_a],axis=1) 66 | 67 | def count_accuracy(ser1,ser2): 68 | sum_all = len(ser1) 69 | tmp = ser1==ser2 70 | sum_acc= len(tmp[tmp==True]) 71 | return sum_acc/sum_all 72 | 73 | print("---------happy emotion----------") 74 | happy_index = df_result[df_result.valence>=5].index.append(df_result[df_result.arousal>=5].index) 75 | happy_index = set(happy_index) 76 | print("len(happy_index)=",len(happy_index)) 77 | df_result['happy'] = -1 78 | for i in happy_index: 79 | df_result['happy'].loc[i] = 1 80 | pred_happy_index = df_result[df_result.pred_v>=5].index.append(df_result[df_result.pred_a>=5].index) 81 | pred_happy_index = set(pred_happy_index) 82 | print("len(pred_happy_index)=",len(pred_happy_index)) 83 | df_result['pred_happy'] = -1 84 | for i in pred_happy_index: 85 | df_result['pred_happy'].loc[i] = 1 86 | acc = count_accuracy(df_result['pred_happy'],df_result['happy']) 87 | print("happy acc:",acc) 88 | 89 | print("---------sad emotion----------") 90 | sad_index = df_result[df_result.valence<5].index.append(df_result[df_result.arousal<5].index) 91 | sad_index = set(sad_index) 92 | print("len(sad_index)=",len(sad_index)) 93 | df_result['sad'] = -1 94 | for i in sad_index: 95 | df_result['sad'].loc[i] = 1 96 | pred_sad_index = df_result[df_result.pred_v<5].index.append(df_result[df_result.pred_a<5].index) 97 | pred_sad_index = set(pred_sad_index) 98 | print("len(pred_sad_index)=",len(pred_sad_index)) 99 | df_result['pred_sad'] = -1 100 | for i in pred_sad_index: 101 | df_result['pred_sad'].loc[i] = 1 102 | acc = count_accuracy(df_result['pred_sad'],df_result['sad']) 103 | print("sad acc:",acc) 104 | 105 | 106 | #用来计算程序运行时间 107 | endtime = datetime.datetime.now() 108 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds) 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /XGBRegression.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Aug 2 02:57:14 2018 4 | 代码功能:XGB回归预测唤醒度和愉悦度,并根据结果组合预测4个象限的情绪 5 | @author: jinyu 6 | """ 7 | import pandas as pd 8 | import numpy as np 9 | import pickle 10 | import xgboost as xgb 11 | from sklearn.preprocessing import MinMaxScaler 12 | from sklearn import preprocessing 13 | from sklearn.model_selection import cross_val_score 14 | from sklearn.model_selection import cross_val_predict 15 | import warnings 16 | warnings.filterwarnings("ignore") 17 | #用来计算程序运行时间 18 | import datetime 19 | starttime = datetime.datetime.now() 20 | 21 | #读取数据 22 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb")) 23 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 24 | print("GSR_feature_df.shape:",GSR_feature_df.shape) 25 | 26 | print("数据缩放处理,归一化处理") 27 | min_max_scaler = MinMaxScaler() 28 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df) 29 | 30 | ##############################下面用交叉验证做愉悦度预测######################## 31 | if True: 32 | print("----------------这是愉悦度预测----------------") 33 | data = GSR_feature_df 34 | target = all_df_y[['valence']] 35 | print("######xgboost model CV######") 36 | for xgb_rounds in [50]: 37 | xgb_model = xgb.XGBRegressor(max_depth=5,learning_rate=0.1,n_estimators=xgb_rounds, 38 | objective='reg:linear',booster='gbtree',n_jobs=10, 39 | subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9, 40 | reg_alpha=0.1, reg_lambda=0.8,gamma=1.0) 41 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error') 42 | print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean()))) 43 | xgb_pred_valence = cross_val_predict(xgb_model,data,target,cv=5) 44 | 45 | #############################下面用唤醒度做回归################################# 46 | if True: 47 | print("----------------这是唤醒度预测----------------") 48 | data = GSR_feature_df 49 | target = all_df_y[['arousal']] 50 | print("######xgboost regression model CV######") 51 | for xgb_rounds in [50]: 52 | xgb_model = xgb.XGBRegressor(max_depth=7,learning_rate=0.1,n_estimators=xgb_rounds, 53 | objective='reg:linear',booster='gbtree',n_jobs=10, 54 | subsample=0.9, colsample_bytree=0.90, colsample_bylevel=0.90, 55 | reg_alpha=0.1, reg_lambda=0.5,gamma=0) 56 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error') 57 | print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean()))) 58 | xgb_pred_arousal = cross_val_predict(xgb_model,data,target,cv=5) 59 | 60 | print("根据回归预测值构造4个象限的情绪2分类模型") 61 | df_v = pd.DataFrame(xgb_pred_valence,columns=['pred_v'],index=all_df_y.index) 62 | df_a = pd.DataFrame(xgb_pred_arousal,columns=['pred_a'],index=all_df_y.index) 63 | df_true_v = all_df_y[['valence']] 64 | df_true_a = all_df_y[['arousal']] 65 | df_result = pd.concat([df_v,df_a,df_true_v,df_true_a],axis=1) 66 | 67 | def count_accuracy(ser1,ser2): 68 | sum_all = len(ser1) 69 | tmp = ser1==ser2 70 | sum_acc= len(tmp[tmp==True]) 71 | return sum_acc/sum_all 72 | 73 | print("---------happy emotion----------") 74 | happy_index = df_result[df_result.valence>=5].index.append(df_result[df_result.arousal>=5].index) 75 | happy_index = set(happy_index) 76 | print("len(happy_index)=",len(happy_index)) 77 | df_result['happy'] = -1 78 | for i in happy_index: 79 | df_result['happy'].loc[i] = 1 80 | pred_happy_index = df_result[df_result.pred_v>=5].index.append(df_result[df_result.pred_a>=5].index) 81 | pred_happy_index = set(pred_happy_index) 82 | print("len(pred_happy_index)=",len(pred_happy_index)) 83 | df_result['pred_happy'] = -1 84 | for i in pred_happy_index: 85 | df_result['pred_happy'].loc[i] = 1 86 | acc = count_accuracy(df_result['pred_happy'],df_result['happy']) 87 | print("happy acc:",acc) 88 | 89 | print("---------sad emotion----------") 90 | sad_index = df_result[df_result.valence<5].index.append(df_result[df_result.arousal<5].index) 91 | sad_index = set(sad_index) 92 | print("len(sad_index)=",len(sad_index)) 93 | df_result['sad'] = -1 94 | for i in sad_index: 95 | df_result['sad'].loc[i] = 1 96 | pred_sad_index = df_result[df_result.pred_v<5].index.append(df_result[df_result.pred_a<5].index) 97 | pred_sad_index = set(pred_sad_index) 98 | print("len(pred_sad_index)=",len(pred_sad_index)) 99 | df_result['pred_sad'] = -1 100 | for i in pred_sad_index: 101 | df_result['pred_sad'].loc[i] = 1 102 | acc = count_accuracy(df_result['pred_sad'],df_result['sad']) 103 | print("sad acc:",acc) 104 | 105 | 106 | #用来计算程序运行时间 107 | endtime = datetime.datetime.now() 108 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds) 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /DNN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Sep 26 16:13:58 2018 5 | DNN做分类 6 | @author: jinyx 7 | """ 8 | 9 | import pandas as pd 10 | import numpy as np 11 | import pickle 12 | import random 13 | import tensorflow as tf 14 | from sklearn.preprocessing import MinMaxScaler 15 | from sklearn import preprocessing 16 | from sklearn.model_selection import cross_val_score 17 | from sklearn.model_selection import cross_val_predict 18 | from sklearn.metrics import accuracy_score 19 | import warnings 20 | warnings.filterwarnings("ignore") 21 | #用来计算程序运行时间 22 | import datetime 23 | starttime = datetime.datetime.now() 24 | #读取数据 25 | for eeg_CH in range(1,33,1): 26 | file_path = "./dump_file/CH{}_df_EEG_x".format(eeg_CH) 27 | df_data = pickle.load(open(file_path,"rb")) 28 | locals()["CH{}_df_EEG_x".format(eeg_CH)] = df_data 29 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 30 | for i in range(0,1280,1): #总共1280个实验所以会有1280个二维矩阵 31 | locals()["mat{}".format(i)]=pd.DataFrame() 32 | for eeg_CH in range(1,33,1): #脑电共有32个通道,所以一个矩阵大小32*8064 33 | locals()["mat{}".format(i)] = locals()["mat{}".format(i)].\ 34 | append(locals()["CH{}_df_EEG_x".format(eeg_CH)].iloc[i:i+1],ignore_index=True) 35 | 36 | #模型相关的参数 37 | INPUT_NODE = 258048 #32*8064,输入节点 38 | OUTPUT_NODE = 2 #2分类,输出节点 39 | LAYER1_NODE = 500 # 隐藏层节点数 40 | BATCH_SIZE = 100 # 每次batch打包的样本个数 41 | LEARNING_RATE_BASE = 0.8 # 基础学习率 42 | LEARNING_RATE_DECAY = 0.99 # 学习率的衰减率 43 | REGULARAZTION_RATE = 0.0001 # 正则化的系数 44 | TRAINING_STEPS = 5000 # 训练轮数 45 | MOVING_AVERAGE_DECAY = 0.99 # 滑动平均衰减率 46 | 47 | def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2): 48 | # 不使用滑动平均类 49 | if avg_class == None: 50 | layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1) 51 | return tf.matmul(layer1, weights2) + biases2 52 | else: 53 | # 使用滑动平均类 54 | layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1)) 55 | return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2) 56 | 57 | def train(mnist): 58 | x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input') 59 | y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input') 60 | # 生成隐藏层的参数。 61 | # tf.truncated_normal(shape, mean, stddev),正太分布数据 62 | weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1)) 63 | biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE])) 64 | # 生成输出层的参数。 65 | weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1)) 66 | biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE])) 67 | 68 | # 计算不含滑动平均类的前向传播结果 69 | y = inference(x, None, weights1, biases1, weights2, biases2) 70 | 71 | # 定义训练轮数及相关的滑动平均类 72 | global_step = tf.Variable(0, trainable=False) 73 | variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) 74 | variables_averages_op = variable_averages.apply(tf.trainable_variables()) 75 | average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2) 76 | 77 | # 计算交叉熵及其平均值 78 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1)) 79 | cross_entropy_mean = tf.reduce_mean(cross_entropy) 80 | 81 | # 损失函数的计算 82 | regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE) 83 | regularaztion = regularizer(weights1) + regularizer(weights2) 84 | loss = cross_entropy_mean + regularaztion 85 | 86 | # 设置指数衰减的学习率。 87 | learning_rate = tf.train.exponential_decay( 88 | LEARNING_RATE_BASE, 89 | global_step, 90 | mnist.train.num_examples / BATCH_SIZE, 91 | LEARNING_RATE_DECAY, 92 | staircase=True) 93 | 94 | # 优化损失函数 95 | train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step) 96 | 97 | # 反向传播更新参数和更新每一个参数的滑动平均值 98 | with tf.control_dependencies([train_step, variables_averages_op]): 99 | train_op = tf.no_op(name='train') 100 | 101 | # 计算正确率 102 | correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1)) 103 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 104 | 105 | # 初始化会话,并开始训练过程。 106 | with tf.Session() as sess: 107 | tf.global_variables_initializer().run() 108 | validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels} 109 | test_feed = {x: mnist.test.images, y_: mnist.test.labels} 110 | 111 | # 循环的训练神经网络。 112 | for i in range(TRAINING_STEPS): 113 | if i % 1000 == 0: 114 | validate_acc = sess.run(accuracy, feed_dict=validate_feed) 115 | print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc)) 116 | 117 | xs,ys=mnist.train.next_batch(BATCH_SIZE) 118 | sess.run(train_op,feed_dict={x:xs,y_:ys}) 119 | 120 | test_acc=sess.run(accuracy,feed_dict=test_feed) 121 | print(("After %d training step(s), test accuracy using average model is %g" %(TRAINING_STEPS, test_acc))) 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /GCF_2cEEG_multiCH_TimeIMFS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import multiprocessing\n", 10 | "from GCForest import gcForest\n", 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "import pickle \n", 14 | "import matplotlib.pyplot as plt\n", 15 | "from sklearn.model_selection import train_test_split\n", 16 | "from sklearn.model_selection import StratifiedKFold\n", 17 | "from sklearn.preprocessing import MinMaxScaler\n", 18 | "from sklearn.metrics import accuracy_score\n", 19 | "from sklearn.metrics import f1_score\n", 20 | "from sklearn.metrics import precision_score\n", 21 | "from sklearn.metrics import recall_score\n", 22 | "from sklearn.preprocessing import Imputer\n", 23 | "import warnings\n", 24 | "warnings.filterwarnings(\"ignore\")\n", 25 | "#用来计算程序运行时间\n", 26 | "import datetime\n", 27 | "starttime = datetime.datetime.now()" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "#读取Y\n", 37 | "all_df_y_2c = pickle.load(open(\"./dump_file/all_df_y_2c\",\"rb\"))\n", 38 | "print(\"Y.shape:\",all_df_y_2c.shape)\n", 39 | "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n", 40 | "all_df_y['2cArousal'] = 0\n", 41 | "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n", 42 | "all_df_y['2cValence'] = 0\n", 43 | "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n", 44 | "print(all_df_y.head(5))\n", 45 | "\n", 46 | "#读取32个通道的EEG数据,每个通道包含32×40=1280个信号样本(人次×每人次40实验)\n", 47 | "#每个样本向量大小为8064点(63s*128Hz)\n", 48 | "for eegCH in range(1,33,1):\n", 49 | " #file_path = \"./dump_file/CH{}_df_EEG_x\".format(eegCH)\n", 50 | " #locals()['CH{}_df_EEG_x'.format(eegCH)] = pickle.load(open(file_path,\"rb\"))\n", 51 | " file_path = \"./dump_file/CH{}eegfft_df\".format(eegCH)\n", 52 | " locals()[\"CH{}eegfft_df\".format(eegCH)] = pickle.load(open(file_path,\"rb\"))\n", 53 | "'''\n", 54 | "X = CH1eegfft_df\n", 55 | "y = all_df_y[[\"2cArousal\"]]\n", 56 | "X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3,stratify=y,random_state=2018) \n", 57 | "xTrainIdx = X_tr.index\n", 58 | "xTestIdx = X_te.index\n", 59 | "'''\n", 60 | "y = all_df_y[[\"2cValence\"]]\n", 61 | "xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx\",\"rb\"))\n", 62 | "xTestIdx = pickle.load(open(\"./dump_file/xTestIdx\",\"rb\"))\n", 63 | "y_tr = y.loc[xTrainIdx]\n", 64 | "y_te = y.loc[xTestIdx]" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "for eegCH in range(1,33,1):\n", 74 | " for i in range(0,1280,1):\n", 75 | "for eegCH in range(1,33,1): \n", 76 | " #时域数据\n", 77 | " #locals()['CH{}TrainSet'.format(eegCH)] = locals()['CH{}_df_EEG_x'.format(eegCH)].loc[xTrainIdx]\n", 78 | " #locals()['CH{}TestSet'.format(eegCH)] = locals()['CH{}_df_EEG_x'.format(eegCH)].loc[xTestIdx]\n", 79 | " #频域数据\n", 80 | " locals()['CH{}TrainSet'.format(eegCH)] = locals()['CH{}eegfft_df'.format(eegCH)].loc[xTrainIdx]\n", 81 | " locals()['CH{}TestSet'.format(eegCH)] = locals()['CH{}eegfft_df'.format(eegCH)].loc[xTestIdx]" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "myWindowsSize = 256\n", 91 | "myStrideSize = 64\n", 92 | "gcf = gcForest(shape_1X=8064, window=myWindowsSize, stride=myStrideSize,tolerance=0.0,n_cascadeRF=1, \n", 93 | " min_samples_mgs=0.1, min_samples_cascade=0.1,n_jobs=19)\n", 94 | "if True:\n", 95 | " for eegCH in range(1,33,1): \n", 96 | " print(\"CH{} running multi-grain scan\".format(eegCH))\n", 97 | " xTrain,yTrain = locals()['CH{}TrainSet'.format(eegCH)].values,y_tr.values\n", 98 | " xTest = locals()['CH{}TestSet'.format(eegCH)].values \n", 99 | " locals()['CH{}mgsTrainVector'.format(eegCH)] = gcf.mg_scanning(xTrain,yTrain)\n", 100 | " locals()['CH{}mgsTestVector'.format(eegCH)] = gcf.mg_scanning(xTest)\n", 101 | " filePath = \"./dump_file_V2/CH{}mgsTrainVector_{}_{}\".format(eegCH,myWindowsSize,myStrideSize)\n", 102 | " pickle.dump(locals()['CH{}mgsTrainVector'.format(eegCH)],open(filePath,\"wb\"))\n", 103 | " filePath = \"./dump_file_V2/CH{}mgsTestVector_{}_{}\".format(eegCH,myWindowsSize,myStrideSize)\n", 104 | " pickle.dump(locals()['CH{}mgsTestVector'.format(eegCH)],open(filePath,\"wb\"))\n", 105 | "else:\n", 106 | " for eegCH in range(1,33,1):\n", 107 | " filePath = \"./dump_file_V2/CH{}mgsTrainVector_{}_{}\".format(eegCH,myWindowsSize,myStrideSize)\n", 108 | " locals()['CH{}mgsTrainVector'.format(eegCH)] = pickle.load(open(filePath,\"rb\"))\n", 109 | " filePath = \"./dump_file_V2/CH{}mgsTestVector_{}_{}\".format(eegCH,myWindowsSize,myStrideSize)\n", 110 | " locals()['CH{}mgsTestVector'.format(eegCH)]= pickle.load(open(filePath,\"rb\"))" 111 | ] 112 | } 113 | ], 114 | "metadata": { 115 | "kernelspec": { 116 | "display_name": "Python 3", 117 | "language": "python", 118 | "name": "python3" 119 | }, 120 | "language_info": { 121 | "codemirror_mode": { 122 | "name": "ipython", 123 | "version": 3 124 | }, 125 | "file_extension": ".py", 126 | "mimetype": "text/x-python", 127 | "name": "python", 128 | "nbconvert_exporter": "python", 129 | "pygments_lexer": "ipython3", 130 | "version": "3.6.5" 131 | } 132 | }, 133 | "nbformat": 4, 134 | "nbformat_minor": 2 135 | } 136 | -------------------------------------------------------------------------------- /DEAP_data_preprocess.py: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # .# -*- coding: utf-8 -*- 3 | # ============================================================================= 4 | """ 5 | Spyder Editor 6 | 7 | This is a temporary script file. 8 | """ 9 | 10 | import pandas as pd 11 | import numpy as np 12 | import pickle 13 | import matplotlib.pyplot as plt 14 | from config import * 15 | 16 | #32个实验者,每个实验者参与40个实验,每人共40路信号采集 17 | sXX = ['s01','s02','s03','s04','s05','s06','s07','s08','s09', 18 | 's10','s11','s12','s13','s14','s15','s16','s17','s18','s19', 19 | 's20','s21','s22','s23','s24','s25','s26','s27','s28','s29', 20 | 's30','s31','s32'] 21 | 22 | #read data from .dat files 23 | for i in sXX: 24 | sXX_file_path ='./data_preprocessed_python/'+i+'.dat' 25 | f = open(sXX_file_path,'rb') 26 | locals()[i] = pickle.load(f, encoding='bytes') 27 | 28 | #read labels 32 people(Y) 29 | for i in sXX: 30 | locals()['%s_df_y'%i] = pd.DataFrame(locals()[i][b'labels']) 31 | locals()['%s_df_y'%i].columns = ['valence','arousal','dominance','liking'] 32 | 33 | #concat all sXX_df_y in one df 34 | all_df_y = pd.DataFrame() 35 | for i in sXX: 36 | temp_index = [] 37 | for j in range(0,40,1): 38 | temp_index.append(i+'_'+str(j)) 39 | locals()['%s_df_y'%i].index = temp_index 40 | all_df_y = pd.concat([all_df_y,locals()['%s_df_y'%i]],axis=0) 41 | 42 | #index最终的表示方式例子:s01_0 ->(s01实验者 第0号情绪测量实验) 43 | pickle.dump(all_df_y,open("./dump_file/all_df_y","wb")) 44 | 45 | #############################提取32路EEG信号#################################### 46 | #read #32路EEG脑电信号,1到32路是脑电信号 47 | for eeg_channel in range(1,33,1): 48 | for i in sXX: 49 | locals()['CH{}_{}_df_EEG_x'.format(eeg_channel,i)] = pd.DataFrame(locals()[i][b'data'][:][eeg_channel][:]) 50 | temp_index = [] 51 | for j in range(0,40,1): 52 | temp_index.append(i+'_'+str(j)) 53 | locals()['CH{}_{}_df_EEG_x'.format(eeg_channel,i)].index = temp_index 54 | #concat all CHX_sXX_df_EEG_x in one df 55 | locals()['CH{}_df_EEG_x'.format(eeg_channel)] = pd.DataFrame() 56 | for i in sXX: 57 | locals()['CH{}_df_EEG_x'.format(eeg_channel)] = \ 58 | pd.concat([locals()['CH{}_df_EEG_x'.format(eeg_channel)],locals()['CH{}_{}_df_EEG_x'.format(eeg_channel,i)]],axis=0) 59 | file_path = "./dump_file/{}".format('CH{}_df_EEG_x'.format(eeg_channel)) 60 | pickle.dump(locals()['CH%s_df_EEG_x'%eeg_channel],open(file_path,"wb")) 61 | ############################################################################### 62 | ###########################提取1路GSR皮肤电信号################################ 63 | #read GSR data 64 | for i in sXX: 65 | locals()['%s_df_GSR_x'%i] = pd.DataFrame(locals()[i][b'data'][:][36][:]) 66 | temp_index = [] 67 | for j in range(0,40,1): 68 | temp_index.append(i+'_'+str(j)) 69 | locals()['%s_df_GSR_x'%i].index = temp_index 70 | 71 | #concat all sXX_df_GSR_x in one df 72 | all_df_GSR_x = pd.DataFrame() 73 | for i in sXX: 74 | all_df_GSR_x = pd.concat([all_df_GSR_x,locals()['%s_df_GSR_x'%i]],axis=0) 75 | 76 | pickle.dump(all_df_GSR_x,open("./dump_file/all_df_GSR_x","wb")) 77 | ############################################################################### 78 | 79 | ############################提取1路RSP呼吸信号################################# 80 | #read Respiration belt data 81 | for i in sXX: 82 | locals()['%s_df_RSP_x'%i] = pd.DataFrame(locals()[i][b'data'][:][37][:]) 83 | temp_index = [] 84 | for j in range(0,40,1): 85 | temp_index.append(i+'_'+str(j)) 86 | locals()['%s_df_RSP_x'%i].index = temp_index 87 | 88 | #concat all sXX_df_RSP_x in one df 89 | all_df_RSP_x = pd.DataFrame() 90 | for i in sXX: 91 | all_df_RSP_x = pd.concat([all_df_RSP_x,locals()['%s_df_RSP_x'%i]],axis=0) 92 | 93 | pickle.dump(all_df_RSP_x,open("./dump_file/all_df_RSP_x","wb")) 94 | ############################################################################### 95 | 96 | ############################提取1路BVP信号################################# 97 | #read Respiration belt data 98 | for i in sXX: 99 | locals()['%s_df_BVP_x'%i] = pd.DataFrame(locals()[i][b'data'][:][38][:]) 100 | temp_index = [] 101 | for j in range(0,40,1): 102 | temp_index.append(i+'_'+str(j)) 103 | locals()['%s_df_BVP_x'%i].index = temp_index 104 | 105 | #concat all sXX_df_BVP_x in one df 106 | all_df_BVP_x = pd.DataFrame() 107 | for i in sXX: 108 | all_df_BVP_x = pd.concat([all_df_BVP_x,locals()['%s_df_BVP_x'%i]],axis=0) 109 | 110 | pickle.dump(all_df_BVP_x,open("./dump_file/all_df_BVP_x","wb")) 111 | ############################################################################### 112 | 113 | ############################提取1路TMP信号################################# 114 | #read Respiration belt data 115 | for i in sXX: 116 | locals()['%s_df_TMP_x'%i] = pd.DataFrame(locals()[i][b'data'][:][39][:]) 117 | temp_index = [] 118 | for j in range(0,40,1): 119 | temp_index.append(i+'_'+str(j)) 120 | locals()['%s_df_TMP_x'%i].index = temp_index 121 | 122 | #concat all sXX_df_BVP_x in one df 123 | all_df_TMP_x = pd.DataFrame() 124 | for i in sXX: 125 | all_df_TMP_x = pd.concat([all_df_TMP_x,locals()['%s_df_TMP_x'%i]],axis=0) 126 | 127 | pickle.dump(all_df_TMP_x,open("./dump_file/all_df_TMP_x","wb")) 128 | ############################################################################### 129 | 130 | #################################画GSR信号的图################################## 131 | #read .dat files(32 total) 132 | f = open(s01_file_path,'rb') 133 | s01 = pickle.load(f, encoding='bytes') 134 | #s01_GSR_df_x,index:40 expriments,columns:8064 datas(128Hz) 135 | s01_GSR_df_x = pd.DataFrame(s01[b'data'][:][36][:]) 136 | #s01_df_y,index:40 expriments,columns:Y 137 | s01_df_y = pd.DataFrame(s01[ b'labels']) 138 | s01_df_y.columns=['valence','arousal','dominance','liking'] 139 | 140 | plt.plot(s01_GSR_df_x.iloc[0,:]) 141 | plt.ylabel('GSR value') 142 | plt.show() 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /DEAP_linearR_KFold.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pandas as pd 3 | import numpy as np 4 | import pickle 5 | import xgboost as xgb 6 | from sklearn.linear_model import LinearRegression 7 | from sklearn.linear_model import Lasso 8 | from sklearn.linear_model import Ridge 9 | from sklearn.svm import SVR 10 | from sklearn.preprocessing import MinMaxScaler 11 | from sklearn import preprocessing 12 | from sklearn.model_selection import train_test_split 13 | from sklearn.model_selection import cross_val_score 14 | from sklearn.model_selection import GridSearchCV 15 | from sklearn.model_selection import KFold 16 | from sklearn.metrics import mean_squared_error 17 | from sklearn.metrics import mean_absolute_error 18 | from sklearn.metrics import accuracy_score 19 | import warnings 20 | warnings.filterwarnings("ignore") 21 | #用来计算程序运行时间 22 | import datetime 23 | starttime = datetime.datetime.now() 24 | 25 | #读取数据 26 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb")) 27 | all_df_y_valence = pickle.load(open("./dump_file/all_df_y_valence","rb")) 28 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 29 | all_df_y_2c = pickle.load(open("./dump_file/all_df_y_2c","rb")) 30 | print("GSR_feature_df.shape:",GSR_feature_df.shape) 31 | 32 | print("数据缩放处理,归一化处理") 33 | features_col = GSR_feature_df.columns 34 | min_max_scaler = MinMaxScaler() 35 | GSR_feature_ndarray = min_max_scaler.fit_transform(GSR_feature_df) 36 | GSR_feature_df = pd.DataFrame(GSR_feature_ndarray) 37 | GSR_feature_df.columns = features_col 38 | 39 | 40 | #数据通过5折交叉验证划分 41 | kf = KFold(n_splits=5) 42 | k=[0,0,0,0,0] 43 | k[0],k[1],k[2],k[3],k[4] = kf.split(GSR_feature_df) 44 | ##############################下面用交叉验证做愉悦度预测################################## 45 | if True: 46 | print("##########愉悦度############") 47 | y_valence = all_df_y_valence.copy() 48 | df_predy = pd.DataFrame() #存放预测结果df 49 | MAE_sum = 0 #存放手动CV后的MAE和 50 | for i in range(0,5): 51 | data = GSR_feature_df.iloc[k[i][0]] 52 | target = all_df_y_valence.iloc[k[i][0]] 53 | test_x = GSR_feature_df.iloc[k[i][1]] 54 | test_y = all_df_y_valence.iloc[k[i][1]] 55 | xgb_model = xgb.XGBRegressor(max_depth=5,learning_rate=0.1,n_estimators=60, 56 | objective='reg:linear',booster='gbtree',n_jobs=10, 57 | subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9, 58 | reg_alpha=0.1, reg_lambda=1.0,gamma=0) 59 | xgb_model.fit(data,target) 60 | test_predy = xgb_model.predict(test_x) 61 | MAE = mean_absolute_error(test_y,test_predy) 62 | MAE_sum = MAE + MAE_sum 63 | test_predy = pd.DataFrame(test_predy,columns=['y_pred'],index=test_y.index) 64 | df_predy = pd.concat([df_predy,test_predy],axis=0) 65 | print("[%d]MAE:%f"%(i,MAE)) 66 | y_valence = pd.merge(y_valence,df_predy,how='outer',left_index=True,right_index=True) 67 | print("MAE_mean:%f"%(MAE_sum/5)) 68 | y_valence['2C_pred_true']=0 69 | y_valence['2C_pred_true'][(y_valence['valence']>=5) & (y_valence['y_pred']>=5)] = 1 70 | y_valence['2C_pred_true'][(y_valence['valence']<5) & (y_valence['y_pred']<5)] = 1 71 | accuracy = y_valence['2C_pred_true'].sum()/1280 72 | print("Accuracy:%f"%(accuracy)) 73 | pickle.dump(y_valence,open("./dump_file/y_valence","wb")) 74 | 75 | if False: 76 | print("##########唤醒度############") 77 | y_arousal = all_df_y[['arousal']].copy() 78 | df_predy = pd.DataFrame() #存放预测结果df 79 | MAE_sum = 0 #存放手动CV后的MAE和 80 | for i in range(0,5): 81 | data = GSR_feature_df.iloc[k[i][0]] 82 | target = all_df_y[['arousal']].iloc[k[i][0]] 83 | test_x = GSR_feature_df.iloc[k[i][1]] 84 | test_y = all_df_y[['arousal']].iloc[k[i][1]] 85 | xgb_model = xgb.XGBRegressor(max_depth=5,learning_rate=0.1,n_estimators=50, 86 | objective='reg:linear',booster='gbtree',n_jobs=10, 87 | subsample=0.90, colsample_bytree=0.90, colsample_bylevel=0.9, 88 | reg_alpha=0.1, reg_lambda=0.8,gamma=0) 89 | xgb_model.fit(data,target) 90 | test_predy = xgb_model.predict(test_x) 91 | MAE = mean_absolute_error(test_y,test_predy) 92 | MAE_sum = MAE + MAE_sum 93 | test_predy = pd.DataFrame(test_predy,columns=['y_pred'],index=test_y.index) 94 | df_predy = pd.concat([df_predy,test_predy],axis=0) 95 | print("[%d]MAE:%f"%(i,MAE)) 96 | y_arousal = pd.merge(y_arousal,df_predy,how='outer',left_index=True,right_index=True) 97 | print("MAE_mean:%f"%(MAE_sum/5)) 98 | y_arousal['2C_pred_true']=0 99 | y_arousal['2C_pred_true'][(y_arousal['arousal']>=5) & (y_arousal['y_pred']>=5)] = 1 100 | y_arousal['2C_pred_true'][(y_arousal['arousal']<5) & (y_arousal['y_pred']<5)] = 1 101 | accuracy = y_arousal['2C_pred_true'].sum()/1280 102 | print("Accuracy:%f"%(accuracy)) 103 | pickle.dump(y_arousal,open("./dump_file/y_arousal","wb")) 104 | 105 | if False: 106 | y_arousal_2c = all_df_y[['arousal']].copy() 107 | y_arousal_2c['2C'] = 0 108 | y_arousal_2c['2C'][y_arousal_2c['arousal'] >= 5] = 1 109 | df_predy = pd.DataFrame() #存放预测结果df 110 | for i in range(0,5): 111 | data = GSR_feature_df.iloc[k[i][0]] 112 | target = y_arousal_2c['2C'].iloc[k[i][0]] 113 | test_x = GSR_feature_df.iloc[k[i][1]] 114 | test_y = y_arousal_2c['2C'].iloc[k[i][1]] 115 | xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50, 116 | objective='binary:logistic',booster='gbtree',n_jobs=10, 117 | subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9, 118 | reg_alpha=0.5, reg_lambda=1.0,gamma=0) 119 | xgb_model.fit(data,target) 120 | test_predy = xgb_model.predict(test_x) 121 | ACC = accuracy_score(test_y,test_predy) 122 | print("[%d]ACC:%f"%(i,ACC)) 123 | test_predy = pd.DataFrame(test_predy,columns=['y_pred_2c'],index=test_y.index) 124 | df_predy = pd.concat([df_predy,test_predy],axis=0) 125 | #用来计算程序运行时间 126 | endtime = datetime.datetime.now() 127 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds) 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /DEAP_linearR_plots.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pandas as pd 3 | import numpy as np 4 | import pickle 5 | import xgboost as xgb 6 | from sklearn.linear_model import LinearRegression 7 | from sklearn.linear_model import Lasso 8 | from sklearn.linear_model import Ridge 9 | from sklearn.neighbors import KNeighborsRegressor 10 | from sklearn.svm import SVR 11 | from sklearn.neural_network import MLPRegressor 12 | from sklearn.preprocessing import MinMaxScaler 13 | from sklearn import preprocessing 14 | from sklearn.model_selection import train_test_split 15 | from sklearn.model_selection import cross_val_score 16 | from sklearn.model_selection import GridSearchCV 17 | from sklearn.model_selection import KFold 18 | from sklearn.metrics import mean_squared_error 19 | from sklearn.metrics import mean_absolute_error 20 | import warnings 21 | warnings.filterwarnings("ignore") 22 | #用来计算程序运行时间 23 | import datetime 24 | starttime = datetime.datetime.now() 25 | 26 | #读取数据 27 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb")) 28 | all_df_y_valence = pickle.load(open("./dump_file/all_df_y_valence","rb")) 29 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 30 | all_df_y_2c = pickle.load(open("./dump_file/all_df_y_2c","rb")) 31 | print("GSR_feature_df.shape:",GSR_feature_df.shape) 32 | 33 | print("数据缩放处理,归一化处理") 34 | min_max_scaler = MinMaxScaler() 35 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df) 36 | 37 | ##############################下面用交叉验证做愉悦度预测################################## 38 | if False: 39 | print("----------------这是愉悦度预测----------------") 40 | data = GSR_feature_df 41 | target = all_df_y_valence 42 | 43 | print("######linear regression CV######") 44 | linearR_model = LinearRegression() 45 | linearR_scores = cross_val_score(linearR_model,data,target,cv=5,scoring='neg_mean_absolute_error') 46 | print("linearR_scores:",abs(linearR_scores)) 47 | print("linearR_scores_mean:",abs(linearR_scores.mean())) 48 | 49 | print("######ridge model CV######") 50 | for alpha in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5]: 51 | ridge_model = Ridge(alpha=alpha) 52 | ridge_scores = cross_val_score(ridge_model,data,target,cv=5,scoring='neg_mean_absolute_error') 53 | print("alpha:%.1f->ridge_scores_mean:%f"%(alpha,abs(ridge_scores.mean()))) 54 | 55 | print("######SVR model CV######") 56 | for c in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2.0,2.1,2.2,2.3,2.4,2.5,4.0]: 57 | svr_model = SVR(C=c,kernel='rbf') 58 | svr_scores = cross_val_score(svr_model,data,target,cv=5,scoring='neg_mean_absolute_error') 59 | print("c:%.1f->svr_scores_mean:%f"%(c,abs(svr_scores.mean()))) 60 | 61 | print("######xgboost model CV######") 62 | for xgb_rounds in [20,30,40,50,60,70]: 63 | xgb_model = xgb.XGBRegressor(max_depth=5,learning_rate=0.1,n_estimators=xgb_rounds, 64 | objective='reg:linear',booster='gbtree',n_jobs=10, 65 | subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9, 66 | reg_alpha=0.1, reg_lambda=0.8,gamma=1.0) 67 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error') 68 | print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean()))) 69 | 70 | print("######xgboost classification model CV######") 71 | target = all_df_y_2c['emotion_2'] 72 | for xgb_rounds in [40,50,60,70]: 73 | xgb_model = xgb.XGBClassifier(max_depth=7,learning_rate=0.1,n_estimators=xgb_rounds, 74 | objective='binary:logistic',booster='gbtree',n_jobs=10, 75 | subsample=0.9, colsample_bytree=0.90, colsample_bylevel=0.90, 76 | reg_alpha=0.1, reg_lambda=0.5,gamma=0) 77 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy') 78 | print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean()))) 79 | 80 | #############################下面用唤醒度做回归################################# 81 | if True: 82 | print("----------------这是唤醒度预测----------------") 83 | data = GSR_feature_df 84 | target = all_df_y[['arousal']] 85 | ''' 86 | print("######linear regression CV######") 87 | linearR_model = LinearRegression() 88 | linearR_scores = cross_val_score(linearR_model,data,target,cv=5,scoring='neg_mean_absolute_error') 89 | print("linearR_scores:",abs(linearR_scores)) 90 | print("linearR_scores_mean:",abs(linearR_scores.mean())) 91 | 92 | print("######ridge model CV######") 93 | for alpha in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5]: 94 | ridge_model = Ridge(alpha=alpha) 95 | ridge_scores = cross_val_score(ridge_model,data,target,cv=5,scoring='neg_mean_absolute_error') 96 | print("alpha:%.1f->ridge_scores_mean:%f"%(alpha,abs(ridge_scores.mean()))) 97 | 98 | print("######SVR model CV######") 99 | for c in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5]: 100 | svr_model = SVR(C=c,kernel='rbf') 101 | svr_scores = cross_val_score(svr_model,data,target,cv=5,scoring='neg_mean_absolute_error') 102 | print("c:%.1f->svr_scores_mean:%f"%(c,abs(svr_scores.mean()))) 103 | ''' 104 | print("######xgboost regression model CV######") 105 | for xgb_rounds in [40,50,60,70]: 106 | xgb_model = xgb.XGBRegressor(max_depth=7,learning_rate=0.1,n_estimators=xgb_rounds, 107 | objective='reg:linear',booster='gblinear',n_jobs=10, 108 | subsample=0.9, colsample_bytree=0.90, colsample_bylevel=0.90, 109 | reg_alpha=0.1, reg_lambda=0.5,gamma=0) 110 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error') 111 | print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean()))) 112 | 113 | ''' 114 | print("######KNN regression model CV######") 115 | for knn_neighbors in [3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]: 116 | knn_model = KNeighborsRegressor(n_neighbors=knn_neighbors) 117 | knn_scores = cross_val_score(knn_model,data,target,cv=5,scoring='neg_mean_absolute_error') 118 | print("knn_neighbors:%d->knn_scores_mean:%f"%(knn_neighbors,abs(knn_scores.mean()))) 119 | 120 | print("######MLP regression model CV######") 121 | for mlp_alpha in [0.1,0.01,0.001,0.0001]: 122 | mlp_model = MLPRegressor(hidden_layer_sizes=(1000, ),alpha=mlp_alpha) 123 | mlp_scores = cross_val_score(mlp_model,data,target,cv=5,scoring='neg_mean_absolute_error',n_jobs=1) 124 | print("mlp_alpha:%f->mlp_scores_mean:%f"%(mlp_alpha,abs(mlp_scores.mean()))) 125 | ''' 126 | 127 | #用来计算程序运行时间 128 | endtime = datetime.datetime.now() 129 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds) 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /XGB_4emotion.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Aug 2 18:53:45 2018 4 | 直接对4个象限的情绪结果做预测,因为皮肤电对唤醒度的预测没啥效果 5 | @author: jinyu 6 | """ 7 | import pandas as pd 8 | import numpy as np 9 | import pickle 10 | import random 11 | import xgboost as xgb 12 | from sklearn.preprocessing import MinMaxScaler 13 | from sklearn import preprocessing 14 | from sklearn.model_selection import cross_val_score 15 | from sklearn.model_selection import cross_val_predict 16 | from sklearn.metrics import accuracy_score 17 | import warnings 18 | warnings.filterwarnings("ignore") 19 | #用来计算程序运行时间 20 | import datetime 21 | starttime = datetime.datetime.now() 22 | 23 | #读取数据 24 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb")) 25 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 26 | print("GSR_feature_df.shape:",GSR_feature_df.shape) 27 | 28 | print("数据缩放处理,归一化处理") 29 | min_max_scaler = MinMaxScaler() 30 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df) 31 | 32 | ############################################################################### 33 | if False: #计算四个象限的情绪 34 | print("把连续的唤醒度和愉悦度转化为离散的二分类值(4个象限对应四种情绪)") 35 | print("---------happy emotion----------") 36 | df_result = all_df_y 37 | a = df_result[df_result.valence>=5].index 38 | b = df_result[df_result.arousal>=5].index 39 | happy_index = [val for val in a if val in b] 40 | print("len(happy_index)=",len(happy_index)) 41 | df_result['happy'] = -1 42 | for i in happy_index: 43 | df_result['happy'].loc[i] = 1 44 | print("---------sad emotion----------") 45 | df_result = all_df_y 46 | a = df_result[df_result.valence<=5].index 47 | b = df_result[df_result.arousal<=5].index 48 | sad_index = [val for val in a if val in b] 49 | print("len(sad_index)=",len(sad_index)) 50 | df_result['sad'] = -1 51 | for i in sad_index: 52 | df_result['sad'].loc[i] = 1 53 | print("---------nervous emotion----------") 54 | df_result = all_df_y 55 | a = df_result[df_result.valence<5].index 56 | b = df_result[df_result.arousal>5].index 57 | nervous_index = [val for val in a if val in b] 58 | print("len(nervous_index)=",len(nervous_index)) 59 | df_result['nervous'] = -1 60 | for i in nervous_index: 61 | df_result['nervous'].loc[i] = 1 62 | print("---------calm emotion----------") 63 | df_result = all_df_y 64 | a = df_result[df_result.valence>5].index 65 | b = df_result[df_result.arousal<5].index 66 | calm_index = [val for val in a if val in b] 67 | print("len(calm_index)=",len(calm_index)) 68 | df_result['calm'] = -1 69 | for i in calm_index: 70 | df_result['calm'].loc[i] = 1 71 | print("四个情绪划分结果dump处理") 72 | pickle.dump(df_result,open("./dump_file/df_result","wb")) 73 | else: 74 | print("读取四种情绪的dump文件") 75 | df_result = pickle.load(open("./dump_file/df_result","rb")) 76 | ############################################################################### 77 | def count_accuracy(ser1,ser2): 78 | sum_all = len(ser1) 79 | tmp = ser1==ser2 80 | sum_acc= len(tmp[tmp==True]) 81 | return sum_acc/sum_all 82 | if True: 83 | print("----------------‘happy’情绪预测----------------") 84 | data = GSR_feature_df 85 | target = df_result[['happy']] 86 | print("######xgboost model CV######") 87 | for xgb_rounds in [50]: 88 | xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50, 89 | objective='binary:logistic',booster='gbtree',n_jobs=-1, 90 | subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9, 91 | reg_alpha=0.5, reg_lambda=1.0,gamma=0, 92 | scale_pos_weight=1) 93 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy') 94 | print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean()))) 95 | xgb_pred_happy = cross_val_predict(xgb_model,data,target,cv=5) 96 | happy_acc = accuracy_score(xgb_pred_happy,df_result['happy']) 97 | print("happy_acc:",happy_acc) 98 | 99 | if True: 100 | print("----------------‘sad’情绪预测----------------") 101 | data = GSR_feature_df 102 | target = df_result[['sad']] 103 | print("######xgboost model CV######") 104 | for xgb_rounds in [50]: 105 | xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50, 106 | objective='binary:logistic',booster='gbtree',n_jobs=-1, 107 | subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9, 108 | reg_alpha=0.5, reg_lambda=1.0,gamma=0, 109 | scale_pos_weight=1) 110 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy') 111 | print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean()))) 112 | xgb_pred_sad = cross_val_predict(xgb_model,data,target,cv=5) 113 | sad_acc = accuracy_score(xgb_pred_sad,df_result['sad']) 114 | print("sad_acc:",sad_acc) 115 | 116 | if True: 117 | print("----------------‘nervous’情绪预测----------------") 118 | data = GSR_feature_df 119 | target = df_result[['nervous']] 120 | print("######xgboost model CV######") 121 | for xgb_rounds in [50]: 122 | xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50, 123 | objective='binary:logistic',booster='gbtree',n_jobs=-1, 124 | subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9, 125 | reg_alpha=0.5, reg_lambda=1.0,gamma=0, 126 | scale_pos_weight=1) 127 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy') 128 | print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean()))) 129 | xgb_pred_nervous = cross_val_predict(xgb_model,data,target,cv=5) 130 | nervous_acc = accuracy_score(xgb_pred_nervous,df_result['nervous']) 131 | print("nervous_acc:",nervous_acc) 132 | 133 | if True: 134 | print("----------------‘calm’情绪预测----------------") 135 | data = GSR_feature_df 136 | target = df_result[['calm']] 137 | print("######xgboost model CV######") 138 | for xgb_rounds in [50]: 139 | xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50, 140 | objective='binary:logistic',booster='gbtree',n_jobs=-1, 141 | subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9, 142 | reg_alpha=0.5, reg_lambda=1.0,gamma=0, 143 | scale_pos_weight=1) 144 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy') 145 | print("xgb_rounds:%d->xgb_scores_mean:%f"%(xgb_rounds,abs(xgb_scores.mean()))) 146 | xgb_pred_calm = cross_val_predict(xgb_model,data,target,cv=5) 147 | calm_acc = accuracy_score(xgb_pred_calm,df_result['calm']) 148 | print("calm_acc:",calm_acc) 149 | 150 | #用来计算程序运行时间 151 | endtime = datetime.datetime.now() 152 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds) -------------------------------------------------------------------------------- /DEAP_linearR.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pandas as pd 3 | import numpy as np 4 | import pickle 5 | import xgboost as xgb 6 | from sklearn.linear_model import LinearRegression 7 | from sklearn.linear_model import Lasso 8 | from sklearn.linear_model import Ridge 9 | from sklearn.svm import SVR 10 | from sklearn.preprocessing import MinMaxScaler 11 | from sklearn import preprocessing 12 | from sklearn.model_selection import train_test_split 13 | from sklearn.model_selection import cross_val_score 14 | from sklearn.model_selection import GridSearchCV 15 | from sklearn.metrics import mean_squared_error 16 | from sklearn.metrics import mean_absolute_error 17 | import warnings 18 | warnings.filterwarnings("ignore") 19 | #用来计算程序运行时间 20 | import datetime 21 | starttime = datetime.datetime.now() 22 | 23 | #读取数据 24 | GSR_feature_df = pickle.load(open("./dump_file/df_feat_selected","rb")) 25 | all_df_y_valence = pickle.load(open("./dump_file/all_df_y_valence","rb")) 26 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 27 | print("GSR_feature_df.shape:",GSR_feature_df.shape) 28 | 29 | #print("数据缩放处理,具有零均值和单位方差") 30 | #stdScaler = preprocessing.StandardScaler() 31 | #stdScaler.fit(GSR_feature_df) 32 | #stdScaler.transform(GSR_feature_df) 33 | #print("mean:\n{}".format(GSR_feature_df.mean(axis=0))) 34 | #print("std:\n{}".format(GSR_feature_df.std(axis=0))) 35 | 36 | print("数据缩放处理,归一化处理") 37 | min_max_scaler = MinMaxScaler() 38 | GSR_feature_df = min_max_scaler.fit_transform(GSR_feature_df) 39 | 40 | ''' 41 | print("----------------愉悦度不使用交叉验证----------------") 42 | train_X,test_X,train_Y,test_Y = \ 43 | train_test_split(GSR_feature_df,all_df_y_valence,test_size=0.2,random_state=1000) 44 | print("######linear regression######") 45 | linearR_model = LinearRegression() 46 | linearR_model.fit(train_X,train_Y) 47 | linear_pred_Y = linearR_model.predict(test_X) 48 | df_linear_pred_Y = pd.DataFrame(linear_pred_Y,columns=['valence']) 49 | mse = mean_squared_error(linear_pred_Y,test_Y) 50 | print("mse=",mse) 51 | mae = mean_absolute_error(linear_pred_Y,test_Y) 52 | print("mae=",mae) 53 | 54 | print("######lasso model######") 55 | lasso_model = Lasso(alpha=0.1) 56 | lasso_model.fit(train_X,train_Y) 57 | lasso_pred_Y = lasso_model.predict(test_X) 58 | df_lasso_pred_Y = pd.DataFrame(lasso_pred_Y,columns=['valence']) 59 | mse = mean_squared_error(lasso_pred_Y,test_Y) 60 | print("mse=",mse) 61 | mae = mean_absolute_error(lasso_pred_Y,test_Y) 62 | print("mae=",mae) 63 | 64 | print("######ridge model######") 65 | ridge_model = Ridge(alpha=0.1) 66 | ridge_model.fit(train_X,train_Y) 67 | ridge_pred_Y = ridge_model.predict(test_X) 68 | df_ridge_pred_Y = pd.DataFrame(ridge_pred_Y,columns=['valence']) 69 | mse = mean_squared_error(ridge_pred_Y,test_Y) 70 | print("mse=",mse) 71 | mae = mean_absolute_error(ridge_pred_Y,test_Y) 72 | print("mae=",mae) 73 | 74 | print("######xgb(gbtree) regression model######") 75 | dtrain = xgb.DMatrix(train_X,train_Y) 76 | dtest = xgb.DMatrix(test_X,test_Y) 77 | xgb_params = { 78 | 'booster': 'gbtree', 79 | 80 | 'eta': 0.1, 81 | 82 | 'max_depth': 7, 83 | 84 | 'objective': 'reg:linear', 85 | 86 | 'eval_metric': 'mae', 87 | 88 | 'colsample_bytree': 0.90, 89 | 90 | 'alpha':0.6, 91 | 92 | 'gamma':1, 93 | 94 | 'silent':0, 95 | } 96 | watchlist = [(dtrain, 'train'), (dtest, 'test')] 97 | num_rounds = 50 98 | #True 会使用watchlist 99 | if False: 100 | xgb_reg_model=xgb.train(xgb_params,dtrain,num_rounds,evals=watchlist) 101 | else: 102 | xgb_reg_model=xgb.train(xgb_params,dtrain,num_rounds) 103 | xgb_pred_Y = xgb_reg_model.predict(dtest) 104 | mse = mean_squared_error(xgb_pred_Y,test_Y) 105 | print("mse=",mse) 106 | mae = mean_absolute_error(xgb_pred_Y,test_Y) 107 | print("mae=",mae) 108 | 109 | pickle.dump(xgb_pred_Y,open("./dump_file/xgb_pred_Y","wb")) 110 | pickle.dump(test_Y,open("./dump_file/test_Y","wb")) 111 | ''' 112 | 113 | ##############################下面用交叉验证做################################## 114 | print("----------------这是愉悦度预测----------------") 115 | data = GSR_feature_df 116 | target = all_df_y_valence 117 | print("######linear regression CV######") 118 | linearR_model = LinearRegression() 119 | linearR_scores = cross_val_score(linearR_model,data,target,cv=5,scoring='neg_mean_absolute_error') 120 | print("linearR_scores:",abs(linearR_scores)) 121 | print("linearR_scores_mean:",abs(linearR_scores.mean())) 122 | 123 | print("######lasso model CV######") 124 | lasso_model = Lasso(alpha=0.1) 125 | lasso_scores = cross_val_score(lasso_model,data,target,cv=5,scoring='neg_mean_absolute_error') 126 | print("lasso_scores:",abs(lasso_scores)) 127 | print("lasso_scores_mean:",abs(lasso_scores.mean())) 128 | 129 | print("######ridge model CV######") 130 | ridge_model = Ridge(alpha=0.1) 131 | ridge_scores = cross_val_score(ridge_model,data,target,cv=5,scoring='neg_mean_absolute_error') 132 | print("ridge_scores:",abs(ridge_scores)) 133 | print("ridge_scores_mean:",abs(ridge_scores.mean())) 134 | 135 | print("######xgboost model CV######") 136 | xgb_model = xgb.XGBRegressor(max_depth=6,learning_rate=0.1,n_estimators=50, 137 | objective='reg:linear',booster='gbtree',n_jobs=10, 138 | subsample=1, colsample_bytree=0.9, colsample_bylevel=1, 139 | reg_alpha=1.0, reg_lambda=1,gamma=1.0) 140 | xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='neg_mean_absolute_error') 141 | print("xgb_scores:",abs(xgb_scores)) 142 | print("xgb_scores_mean:",abs(xgb_scores.mean())) 143 | #############################下面用唤醒度做回归################################# 144 | print("----------------这是唤醒度预测----------------") 145 | data = GSR_feature_df 146 | target = all_df_y[['arousal']] 147 | print("######linear regression CV######") 148 | linearR_model = LinearRegression() 149 | linearR_scores = cross_val_score(linearR_model,data,target,cv=5,scoring='neg_mean_absolute_error') 150 | print("linearR_scores:",abs(linearR_scores)) 151 | print("linearR_scores_mean:",abs(linearR_scores.mean())) 152 | 153 | print("######lasso model CV######") 154 | lasso_model = Lasso(alpha=0.1) 155 | lasso_scores = cross_val_score(lasso_model,data,target,cv=5,scoring='neg_mean_absolute_error') 156 | print("lasso_scores:",abs(lasso_scores)) 157 | print("lasso_scores_mean:",abs(lasso_scores.mean())) 158 | 159 | print("######ridge model CV######") 160 | ridge_model = Ridge(alpha=0.1) 161 | ridge_scores = cross_val_score(ridge_model,data,target,cv=5,scoring='neg_mean_absolute_error') 162 | print("ridge_scores:",abs(ridge_scores)) 163 | print("ridge_scores_mean:",abs(ridge_scores.mean())) 164 | ''' 165 | print("######SVR model CV######") 166 | svr_model = SVR() 167 | svr_scores = cross_val_score(svr_model,data,target,cv=5,scoring='neg_mean_absolute_error') 168 | print("svr_scores:",svr_scores) 169 | print("svr_scores_mean:",svr_scores.mean()) 170 | ''' 171 | print("######GridSearchCV######") 172 | param_grid = {'alpha':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]} 173 | lasso_model = Lasso() 174 | gsearch = GridSearchCV(lasso_model,param_grid,cv=5) 175 | gsearch.fit(data,target) 176 | 177 | #用来计算程序运行时间 178 | endtime = datetime.datetime.now() 179 | print("程序运行时间:%.1fs"%(endtime - starttime).seconds) 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | -------------------------------------------------------------------------------- /XGB_2cEEG.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# -*- coding: utf-8 -*-\n", 10 | "import pandas as pd\n", 11 | "import numpy as np\n", 12 | "import pickle\n", 13 | "from sklearn.preprocessing import MinMaxScaler\n", 14 | "from sklearn import preprocessing\n", 15 | "from sklearn.model_selection import cross_val_score\n", 16 | "from sklearn.metrics import mean_absolute_error\n", 17 | "from sklearn.naive_bayes import GaussianNB\n", 18 | "from sklearn.neighbors import KNeighborsClassifier\n", 19 | "from sklearn.neural_network import MLPClassifier\n", 20 | "from sklearn.metrics import accuracy_score\n", 21 | "from sklearn.metrics import f1_score\n", 22 | "from sklearn.metrics import precision_score\n", 23 | "from sklearn.metrics import recall_score\n", 24 | "import xgboost as xgb \n", 25 | "import warnings\n", 26 | "warnings.filterwarnings(\"ignore\")\n", 27 | "#用来计算程序运行时间\n", 28 | "import datetime\n", 29 | "starttime = datetime.datetime.now()" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "######读取特征(基于EEG)######\n", 42 | "eegFeatureDF.shape: (1280, 1440)\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "print(\"######读取特征(基于EEG)######\")\n", 48 | "#EEG特征表\n", 49 | "eegFeatureDF = pickle.load(open(\"./dump_file/eegFeatureDF\",\"rb\"))\n", 50 | "\n", 51 | "if True:\n", 52 | " #加上早期的统计特征\n", 53 | " for eegCH in range(1,33,1):\n", 54 | " file_path = \"./dump_file/CH{}_eeg_feat_df\".format(eegCH)\n", 55 | " tmpDF = pickle.load(open(file_path,\"rb\"))\n", 56 | " eegFeatureDF = pd.concat([eegFeatureDF,tmpDF],axis=1)\n", 57 | "if True:#加上样本熵特征\n", 58 | " filePath = \"./dump_file_sampEn/sampEnFeatures\".format(eegCH)\n", 59 | " tmpDF = pickle.load(open(filePath,\"rb\"))\n", 60 | " eegFeatureDF = pd.concat([eegFeatureDF,tmpDF],axis=1)\n", 61 | "if False:\n", 62 | " #GSR特征表\n", 63 | " GSR_feature_df = pickle.load(open(\"./dump_file/df_feat_selected\",\"rb\"))\n", 64 | " eegFeatureDF = pd.concat([eegFeatureDF,GSR_feature_df],axis=1)\n", 65 | "\n", 66 | "#总的特征向量样本大小\n", 67 | "print(\"eegFeatureDF.shape:\",eegFeatureDF.shape)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 3, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "if False:\n", 77 | " print(\"######数据缩放处理,归一化处理######\")\n", 78 | " min_max_scaler = MinMaxScaler()\n", 79 | " eegFeatureDF = min_max_scaler.fit_transform(eegFeatureDF)\n" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 4, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "#featureDF = pd.concat([eegFeatureDF,GSR_feature_df],axis=1)\n", 89 | "#featureDF = GSR_feature_df\n", 90 | "featureDF = eegFeatureDF" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 5, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "#############################下面用愉悦度(效价)做分类#################################\n", 100 | "if False:\n", 101 | " print(\"----------------这是高低愉悦度度二分类预测----------------\")\n", 102 | " data = featureDF\n", 103 | " target = all_df_y_2c #高低愉悦度\n", 104 | " \n", 105 | " print(\"######xgb classification CV######\")\n", 106 | " xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50,\n", 107 | " objective='binary:logistic',booster='gbtree',n_jobs=10,\n", 108 | " subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,\n", 109 | " reg_alpha=0.5, reg_lambda=1.0,gamma=0,\n", 110 | " scale_pos_weight=1)\n", 111 | " xgb_scores = cross_val_score(xgb_model,data,target,cv=5,scoring='accuracy')\n", 112 | " print(\"xgb_scores:\",abs(xgb_scores))\n", 113 | " print(\"xgb_scores_mean:\",abs(xgb_scores.mean()))\n" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "name": "stdout", 123 | "output_type": "stream", 124 | "text": [ 125 | " valence arousal dominance liking 2cArousal 2cValence\n", 126 | "s01_0 7.71 7.60 6.90 7.83 1 1\n", 127 | "s01_1 8.10 7.31 7.28 8.47 1 1\n", 128 | "s01_2 8.58 7.54 9.00 7.08 1 1\n", 129 | "s01_3 4.94 6.01 6.12 8.06 0 0\n", 130 | "s01_4 6.96 3.92 7.19 6.05 1 1\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n", 136 | "all_df_y['2cArousal'] = 0\n", 137 | "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n", 138 | "all_df_y['2cValence'] = 0\n", 139 | "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n", 140 | "print(all_df_y.head(5))\n", 141 | "y = all_df_y[[\"2cValence\"]]\n", 142 | "xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx\",\"rb\"))\n", 143 | "xTestIdx = pickle.load(open(\"./dump_file/xTestIdx\",\"rb\"))\n", 144 | "trainY = y.loc[xTrainIdx]\n", 145 | "testY = y.loc[xTestIdx]\n", 146 | "trainX = eegFeatureDF.loc[xTrainIdx]\n", 147 | "testX = eegFeatureDF.loc[xTestIdx]" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 7, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "######xgb classification ######\n", 160 | "ACC 0.6536458333333334\n", 161 | "F1 0.712742980561555\n", 162 | "Recal 0.7603686635944701\n", 163 | "Precision 0.6707317073170732\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "if True:\n", 169 | " data = featureDF\n", 170 | " target = y\n", 171 | " print(\"######xgb classification ######\")\n", 172 | " xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50,\n", 173 | " objective='binary:logistic',booster='gbtree',n_jobs=10,\n", 174 | " subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,\n", 175 | " reg_alpha=0.5, reg_lambda=1.0,gamma=0,\n", 176 | " scale_pos_weight=1)\n", 177 | " xgb_model.fit(trainX.values,trainY.values)\n", 178 | " predY = xgb_model.predict(testX.values)\n", 179 | " print(\"ACC\",accuracy_score(y_true=testY, y_pred=predY))\n", 180 | " print(\"F1\",f1_score(y_true=testY, y_pred=predY))\n", 181 | " print(\"Recal\",recall_score(y_true=testY,y_pred=predY))\n", 182 | " print(\"Precision\",precision_score(y_true=testY, y_pred=predY))" 183 | ] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "Python 3", 189 | "language": "python", 190 | "name": "python3" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.6.5" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 2 207 | } 208 | -------------------------------------------------------------------------------- /XGB_2c.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import multiprocessing\n", 10 | "import xgboost as xgb\n", 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "import pickle \n", 14 | "import matplotlib.pyplot as plt\n", 15 | "from sklearn.model_selection import train_test_split\n", 16 | "from sklearn.model_selection import StratifiedKFold\n", 17 | "from sklearn.preprocessing import MinMaxScaler\n", 18 | "from sklearn.metrics import accuracy_score\n", 19 | "from sklearn.metrics import f1_score\n", 20 | "from sklearn.metrics import precision_score\n", 21 | "from sklearn.metrics import recall_score\n", 22 | "from sklearn.preprocessing import Imputer\n", 23 | "import warnings\n", 24 | "warnings.filterwarnings(\"ignore\")\n", 25 | "#用来计算程序运行时间\n", 26 | "import datetime\n", 27 | "starttime = datetime.datetime.now()" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | " valence arousal dominance liking 2cArousal 2cValence\n", 40 | "s01_0 7.71 7.60 6.90 7.83 1 1\n", 41 | "s01_1 8.10 7.31 7.28 8.47 1 1\n", 42 | "s01_2 8.58 7.54 9.00 7.08 1 1\n", 43 | "s01_3 4.94 6.01 6.12 8.06 0 0\n", 44 | "s01_4 6.96 3.92 7.19 6.05 1 1\n", 45 | "######读取特征(基于EEG)######\n" 46 | ] 47 | } 48 | ], 49 | "source": [ 50 | "#读取Y\n", 51 | "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n", 52 | "all_df_y['2cArousal'] = 0\n", 53 | "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n", 54 | "all_df_y['2cValence'] = 0\n", 55 | "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n", 56 | "print(all_df_y.head(5))\n", 57 | "y = all_df_y[[\"2cValence\"]]\n", 58 | "\n", 59 | "print(\"######读取特征(基于EEG)######\")\n", 60 | "#EEG特征表\n", 61 | "eegFeatureDF = pickle.load(open(\"./dump_file/eegFeatureDF\",\"rb\"))\n", 62 | "\n", 63 | "if True:\n", 64 | " #加上早期的统计特征\n", 65 | " for eegCH in range(1,33,1):\n", 66 | " file_path = \"./dump_file/CH{}_eeg_feat_df\".format(eegCH)\n", 67 | " tmpDF = pickle.load(open(file_path,\"rb\"))\n", 68 | " eegFeatureDF = pd.concat([eegFeatureDF,tmpDF],axis=1)\n", 69 | "if True:#加上样本熵特征\n", 70 | " filePath = \"./dump_file_sampEn/sampEnFeatures\".format(eegCH)\n", 71 | " tmpDF = pickle.load(open(filePath,\"rb\"))\n", 72 | " eegFeatureDF = pd.concat([eegFeatureDF,tmpDF],axis=1)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 7, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "[seed:0]****************************************************\n", 85 | "######xgb classification ######\n", 86 | "ACC 0.6536458333333334\n", 87 | "F1 0.712742980561555\n", 88 | "Recal 0.7603686635944701\n", 89 | "Precision 0.6707317073170732\n", 90 | "[seed:100]****************************************************\n", 91 | "######xgb classification ######\n", 92 | "ACC 0.6302083333333334\n", 93 | "F1 0.7029288702928871\n", 94 | "Recal 0.7741935483870968\n", 95 | "Precision 0.6436781609195402\n", 96 | "[seed:200]****************************************************\n", 97 | "######xgb classification ######\n", 98 | "ACC 0.6119791666666666\n", 99 | "F1 0.6823027718550106\n", 100 | "Recal 0.7373271889400922\n", 101 | "Precision 0.6349206349206349\n", 102 | "[seed:300]****************************************************\n", 103 | "######xgb classification ######\n", 104 | "ACC 0.6692708333333334\n", 105 | "F1 0.7315010570824525\n", 106 | "Recal 0.7972350230414746\n", 107 | "Precision 0.67578125\n", 108 | "[seed:400]****************************************************\n", 109 | "######xgb classification ######\n", 110 | "ACC 0.6223958333333334\n", 111 | "F1 0.7034764826175869\n", 112 | "Recal 0.7926267281105991\n", 113 | "Precision 0.6323529411764706\n", 114 | "[seed:500]****************************************************\n", 115 | "######xgb classification ######\n", 116 | "ACC 0.6354166666666666\n", 117 | "F1 0.6956521739130435\n", 118 | "Recal 0.7373271889400922\n", 119 | "Precision 0.6584362139917695\n", 120 | "[seed:600]****************************************************\n", 121 | "######xgb classification ######\n", 122 | "ACC 0.6432291666666666\n", 123 | "F1 0.7103594080338267\n", 124 | "Recal 0.7741935483870968\n", 125 | "Precision 0.65625\n", 126 | "[seed:700]****************************************************\n", 127 | "######xgb classification ######\n", 128 | "ACC 0.6458333333333334\n", 129 | "F1 0.7043478260869565\n", 130 | "Recal 0.7465437788018433\n", 131 | "Precision 0.6666666666666666\n", 132 | "[seed:800]****************************************************\n", 133 | "######xgb classification ######\n", 134 | "ACC 0.6276041666666666\n", 135 | "F1 0.6963906581740976\n", 136 | "Recal 0.7557603686635944\n", 137 | "Precision 0.6456692913385826\n", 138 | "[seed:900]****************************************************\n", 139 | "######xgb classification ######\n", 140 | "ACC 0.6666666666666666\n", 141 | "F1 0.7241379310344828\n", 142 | "Recal 0.7741935483870968\n", 143 | "Precision 0.680161943319838\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "for seed in [0,100,200,300,400,500,600,700,800,900]:\n", 149 | " print(\"[seed:{}]****************************************************\".format(seed))\n", 150 | " xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx_{}\".format(seed),\"rb\"))\n", 151 | " xTestIdx = pickle.load(open(\"./dump_file/xTestIdx_{}\".format(seed),\"rb\"))\n", 152 | " trainY = y.loc[xTrainIdx]\n", 153 | " testY = y.loc[xTestIdx]\n", 154 | " trainX = eegFeatureDF.loc[xTrainIdx]\n", 155 | " testX = eegFeatureDF.loc[xTestIdx] \n", 156 | " data = trainX\n", 157 | " target = trainY \n", 158 | " print(\"######xgb classification ######\")\n", 159 | " xgb_model = xgb.XGBClassifier(max_depth=5,learning_rate=0.1,n_estimators=50,\n", 160 | " objective='binary:logistic',booster='gbtree',n_jobs=10,\n", 161 | " subsample=0.9, colsample_bytree=0.9, colsample_bylevel=0.9,\n", 162 | " reg_alpha=0.5, reg_lambda=1.0,gamma=0,\n", 163 | " scale_pos_weight=1)\n", 164 | " xgb_model.fit(trainX.values,trainY.values)\n", 165 | " predY = xgb_model.predict(testX.values)\n", 166 | " print(\"ACC\",accuracy_score(y_true=testY, y_pred=predY))\n", 167 | " print(\"F1\",f1_score(y_true=testY, y_pred=predY))\n", 168 | " print(\"Recal\",recall_score(y_true=testY,y_pred=predY))\n", 169 | " print(\"Precision\",precision_score(y_true=testY, y_pred=predY))" 170 | ] 171 | } 172 | ], 173 | "metadata": { 174 | "kernelspec": { 175 | "display_name": "Python 3", 176 | "language": "python", 177 | "name": "python3" 178 | }, 179 | "language_info": { 180 | "codemirror_mode": { 181 | "name": "ipython", 182 | "version": 3 183 | }, 184 | "file_extension": ".py", 185 | "mimetype": "text/x-python", 186 | "name": "python", 187 | "nbconvert_exporter": "python", 188 | "pygments_lexer": "ipython3", 189 | "version": "3.6.5" 190 | } 191 | }, 192 | "nbformat": 4, 193 | "nbformat_minor": 2 194 | } 195 | -------------------------------------------------------------------------------- /plot_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import matplotlib.pyplot as plt 3 | plt.rcParams['font.sans-serif']=['SimHei'] 4 | plt.rcParams['axes.unicode_minus']=False 5 | 6 | from mpl_toolkits.mplot3d import Axes3D 7 | import numpy as np 8 | import pickle 9 | import pandas as pd 10 | 11 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 12 | xgb_pred_Y = pickle.load(open("./dump_file/xgb_pred_Y","rb")) 13 | test_Y = pickle.load(open("./dump_file/test_Y","rb")) 14 | y_valence = pickle.load(open("./dump_file/y_valence","rb")) 15 | y_arousal = pickle.load(open("./dump_file/y_arousal","rb")) 16 | #画图DPI设定 17 | DPI_SET = 100 18 | 19 | if True: 20 | #########################愉悦度 21 | X = [1,2,3,4,5,6,7,8,9,10] 22 | Y_xgb = [1.675444,1.675363,1.680978,1.685009,1.691554, 23 | 1.675277,1.674311,1.680342,1.691216,1.707188] 24 | Y_svr = [1.785470,1.785573,1.785693,1.785803,1.785853, 25 | 1.785954,1.786118,1.786360,1.786597,1.786775] 26 | Y_ridge = [1.789497,1.790049,1.791245,1.792133,1.792958,1.793474,1.793847, 27 | 1.794242,1.794706,1.795205] 28 | Y_linear = [1.76594288635,1.76594288635,1.76594288635,1.76594288635,1.76594288635, 29 | 1.76594288635,1.76594288635,1.76594288635,1.76594288635,1.76594288635,] 30 | Y_xgb.sort() 31 | Y_svr.sort() 32 | Y_ridge.sort() 33 | Y_linear.sort() 34 | plt.figure(dpi = DPI_SET) 35 | plt.plot(X,Y_xgb,'ro',label="xgboost") 36 | plt.plot(X,Y_svr,'bs',label="SVR") 37 | plt.plot(X,Y_ridge,'g^',label="Ridge") 38 | plt.plot(X,Y_linear,'k+',label="OLS") 39 | plt.grid(False) 40 | plt.xlabel(u"最佳MAE表现模型排名") 41 | plt.ylabel("MAE") 42 | plt.title(u'愉悦度预测最佳十次模型对应的MAE') 43 | plt.legend() 44 | plt.show() 45 | #plt.savefig('MAE',dpi=100) 46 | if False: 47 | #####################唤醒度 48 | X = [1,2,3,4,5,6,7,8,9,10] 49 | Y_xgb = [1.687146,1.695196,1.692517,1.699958,1.696151, 50 | 1.689644,1.689434,1.689765,1.692192,1.690135] 51 | Y_svr = [1.709018,1.709057,1.709095,1.709111,1.709189,1.709237,1.709258, 52 | 1.709278,1.709373,1.709496] 53 | Y_ridge = [1.697378,1.697711,1.698095,1.698543,1.699079, 54 | 1.699739,1.700587,1.701745,1.703502,1.707373] 55 | Y_linear = [1.71203616905,1.71203616905,1.71203616905,1.71203616905,1.71203616905, 56 | 1.71203616905,1.71203616905,1.71203616905,1.71203616905,1.71203616905,] 57 | Y_xgb.sort() 58 | Y_svr.sort() 59 | Y_ridge.sort() 60 | Y_linear.sort() 61 | plt.figure(dpi = DPI_SET) 62 | plt.plot(X,Y_xgb,'ro',label="xgboost") 63 | plt.plot(X,Y_svr,'bs',label="SVR") 64 | plt.plot(X,Y_ridge,'g^',label="Ridge") 65 | plt.plot(X,Y_linear,'k+',label="OLS") 66 | plt.grid(False) 67 | plt.xlabel(u"最佳MAE表现模型排名") 68 | plt.ylabel("MAE") 69 | plt.title(u'唤醒度预测最佳十次模型对应的MAE') 70 | plt.legend() 71 | plt.show() 72 | #plt.savefig('MAE',dpi=100) 73 | 74 | if False: 75 | #corrs,愉悦度 76 | Y_corrs = [0.004346,0.005027,0.052310,0.027697,0.069725,0.043048,0.016981, 77 | 0.011768,0.000304,0.033582,0.061108,0.056917,0.072331,0.065231, 78 | 0.026672,0.024671,0.045470, 0.036574,0.062127,0.049776,0.024718, 79 | 0.020680,0.082800,0.081184,0.076538,0.105749,0.108450,0.112452] 80 | X_features = [i for i in range(1,29,1)] 81 | plt.figure(dpi = DPI_SET) 82 | plt.bar(X_features,Y_corrs,color='red') 83 | plt.grid(False) 84 | plt.xlabel("特征编号") 85 | plt.ylabel("皮尔逊相关系数") 86 | plt.title("不同特征与愉悦度的皮尔逊相关系数") 87 | plt.legend() 88 | plt.show() 89 | 90 | if False: 91 | #corrs,唤醒度 92 | Y_corrs = [0.048455,0.046454,0.023819,0.038387,0.018331,0.032164,0.025527, 93 | 0.039727,0.042079,0.029262,0.015931,0.011253,0.002549,0.006947, 94 | 0.003238,0.013122,0.022206,0.026620,0.014166,0.020778,0.001383, 95 | 0.018818,0.014761,0.009047,0.023284,0.010995,0.000872,0.006976] 96 | X_features = [i for i in range(1,29,1)] 97 | plt.figure(dpi = DPI_SET) 98 | plt.bar(X_features,Y_corrs,color='red') 99 | plt.grid(False) 100 | plt.xlabel("特征编号") 101 | plt.ylabel("皮尔逊相关系数") 102 | plt.title("不同特征与唤醒度的皮尔逊相关系数") 103 | plt.legend() 104 | plt.show() 105 | 106 | if False: 107 | #画出愉悦度,唤醒度的图 108 | #x = all_df_y[all_df_y['valence']>5][all_df_y['arousal']>5]['valence'] 109 | #y = all_df_y[all_df_y['valence']>5][all_df_y['arousal']>5]['arousal'] 110 | x_high = all_df_y[all_df_y['valence']>=5]['valence'] 111 | y_high = all_df_y[all_df_y['valence']>=5]['arousal'] 112 | x_low = all_df_y[all_df_y['valence']<=5]['valence'] 113 | y_low = all_df_y[all_df_y['valence']<=5]['arousal'] 114 | plt.figure(dpi = DPI_SET) 115 | plt.plot(x_high,y_high,'b.') 116 | plt.plot(x_low,y_low,'y.') 117 | plt.xlabel("愉悦度(valence)") 118 | plt.ylabel("唤醒度(arousal)") 119 | plt.title('样本愉悦度-唤醒度分布') 120 | plt.plot() 121 | 122 | if True: 123 | #画出愉悦度,唤醒度的图,4个象限 124 | #x = all_df_y[all_df_y['valence']>5][all_df_y['arousal']>5]['valence'] 125 | #y = all_df_y[all_df_y['valence']>5][all_df_y['arousal']>5]['arousal'] 126 | x_1 = all_df_y[all_df_y['valence']>=5][all_df_y['arousal']>=5]['valence'] 127 | y_1 = all_df_y[all_df_y['valence']>=5][all_df_y['arousal']>=5]['arousal'] 128 | x_2 = all_df_y[all_df_y['valence']<5][all_df_y['arousal']>5]['valence'] 129 | y_2 = all_df_y[all_df_y['valence']<5][all_df_y['arousal']>5]['arousal'] 130 | x_3 = all_df_y[all_df_y['valence']<=5][all_df_y['arousal']<=5]['valence'] 131 | y_3 = all_df_y[all_df_y['valence']<=5][all_df_y['arousal']<=5]['arousal'] 132 | x_4 = all_df_y[all_df_y['valence']>5][all_df_y['arousal']<5]['valence'] 133 | y_4 = all_df_y[all_df_y['valence']>5][all_df_y['arousal']<5]['arousal'] 134 | plt.figure(dpi = DPI_SET) 135 | myMarkerSize = 3 136 | plt.plot(x_1,y_1,'b.',markersize=myMarkerSize) 137 | plt.plot(x_2,y_2,'y+',markersize=myMarkerSize) 138 | plt.plot(x_3,y_3,'gs',markersize=myMarkerSize) 139 | plt.plot(x_4,y_4,'r^',markersize=myMarkerSize) 140 | #plt.xlabel("愉悦度(valence)") 141 | #plt.ylabel("唤醒度(arousal)") 142 | plt.xlabel("valence") 143 | plt.ylabel("arousal") 144 | #plt.title('样本愉悦度-唤醒度分布') 145 | plt.show() 146 | 147 | #统计样本个数 148 | #print("高愉悦度(5-9)个数:{}".format(len(x_high))) 149 | #print("高愉悦度(1-5)个数:{}".format(len(x_low))) 150 | 151 | if True: 152 | #画3D图像 153 | x = all_df_y['valence'] 154 | y = all_df_y['arousal'] 155 | z = all_df_y['dominance'] 156 | fig = plt.figure(dpi = DPI_SET) 157 | ax = Axes3D(fig) 158 | ax.scatter(x, y, z,'r.') 159 | ax.set_xlabel("valence") 160 | ax.set_ylabel("arousal") 161 | ax.set_zlabel("doninance") 162 | #ax.set_title("样本愉悦度-唤醒度-支配度分布") 163 | plt.show() 164 | 165 | if False: 166 | #画箱线图,愉悦度 167 | df_test_Y = y_valence[['valence']] 168 | df_test_Y.columns=['valence_true'] 169 | df_pred_Y = y_valence[['y_pred']] 170 | df_pred_Y.columns=['valence_pred'] 171 | errors = abs(df_test_Y['valence_true'] - df_pred_Y['valence_pred']) 172 | df_errors = pd.DataFrame(errors,index=test_Y.index,columns=['abs_errors']) 173 | df_result = pd.concat([df_test_Y,df_pred_Y],axis=1) 174 | df_result = pd.concat([df_result,df_errors],axis=1) 175 | for i in range(1,9): 176 | df_tmp = df_result[(df_result['valence_true']>=i) & (df_result['valence_true']=i) & (df_result['arousal_true']=i) & (y_valence['valence'] =i) & (y_valence['valence'] =i) & (y_arousal['arousal'] =i) & (y_arousal['arousal'] = 5] = 1\n", 207 | "all_df_y['2cValence'] = 0\n", 208 | "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n", 209 | "print(all_df_y.head(5))\n", 210 | "\n", 211 | "#读取1个通道的GSR数据,每个通道包含32×40=1280个信号样本\n", 212 | "#每个样本向量大小为8064点(63s*128Hz)\n", 213 | "all_df_GSR_x = pickle.load(open(\"./dump_file/all_df_GSR_x\",\"rb\"))\n", 214 | "\n", 215 | "#y = all_df_y[['2cArousal']]\n", 216 | "y = all_df_y[['2cValence']]\n", 217 | "for seed in [0,100,200,300,400,500,600,700,800,900]:\n", 218 | " print(\"[seed:{}]****************************************************\".format(seed))\n", 219 | " xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx_{}\".format(seed),\"rb\"))\n", 220 | " xTestIdx = pickle.load(open(\"./dump_file/xTestIdx_{}\".format(seed),\"rb\"))\n", 221 | " y_tr = y.loc[xTrainIdx]\n", 222 | " y_te = y.loc[xTestIdx]\n", 223 | " GSRTrainSet = all_df_GSR_x.loc[xTrainIdx]\n", 224 | " GSRTestSet = all_df_GSR_x.loc[xTestIdx]\n", 225 | " myWindowsSize = 256\n", 226 | " myStrideSize = 64\n", 227 | " gcf = gcForest(shape_1X=8064, window=myWindowsSize, stride=myStrideSize,tolerance=0.0,n_cascadeRF=1, \n", 228 | " min_samples_mgs=0.1, min_samples_cascade=0.1,n_jobs=19)\n", 229 | " if True: \n", 230 | " print(\"GSR running multi-grain scan\")\n", 231 | " xTrain,yTrain = GSRTrainSet.values,y_tr.values\n", 232 | " xTest = GSRTestSet.values \n", 233 | " GSR_mgsTrainVector = gcf.mg_scanning(xTrain,yTrain)\n", 234 | " GSR_mgsTestVector = gcf.mg_scanning(xTest)\n", 235 | " filePath = \"./dump_file_V2/GSR_mgsTrainVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n", 236 | " pickle.dump(GSR_mgsTrainVector,open(filePath,\"wb\"))\n", 237 | " filePath = \"./dump_file_V2/GSR_mgsTestVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n", 238 | " pickle.dump(GSR_mgsTestVector,open(filePath,\"wb\"))\n", 239 | " else:\n", 240 | " filePath = \"./dump_file_V2/GSR_mgsTrainVector_{}_{}\".format(myWindowsSize,myStrideSize)\n", 241 | " GSR_mgsTrainVector = pickle.load(open(filePath,\"rb\"))\n", 242 | " filePath = \"./dump_file_V2/GSR_mgsTestVector_{}_{}\".format(myWindowsSize,myStrideSize)\n", 243 | " GSR_mgsTestVector = pickle.load(open(filePath,\"rb\"))\n", 244 | " X_tr_vector = GSR_mgsTrainVector\n", 245 | " X_te_vector = GSR_mgsTestVector\n", 246 | " print(X_tr_vector.shape)\n", 247 | " print(X_te_vector.shape)\n", 248 | " #有缺失值,填充下\n", 249 | " X_tr_vector_fillna= pd.DataFrame(X_tr_vector).fillna(0).values\n", 250 | " X_te_vector_fillna= pd.DataFrame(X_te_vector).fillna(0).values\n", 251 | " _ = gcf.cascade_forest(X_tr_vector_fillna, y_tr)\n", 252 | " pred_proba = gcf.cascade_forest(X_te_vector_fillna)\n", 253 | " tmp = np.mean(pred_proba, axis=0)\n", 254 | " preds = np.argmax(tmp, axis=1)\n", 255 | " print(\"ACC\",accuracy_score(y_true=y_te, y_pred=preds))\n", 256 | " print(\"F1\",f1_score(y_true=y_te, y_pred=preds))\n", 257 | " print(\"Recal\",recall_score(y_true=y_te, y_pred=preds))\n", 258 | " print(\"Precision\",precision_score(y_true=y_te, y_pred=preds))" 259 | ] 260 | } 261 | ], 262 | "metadata": { 263 | "kernelspec": { 264 | "display_name": "Python 3", 265 | "language": "python", 266 | "name": "python3" 267 | }, 268 | "language_info": { 269 | "codemirror_mode": { 270 | "name": "ipython", 271 | "version": 3 272 | }, 273 | "file_extension": ".py", 274 | "mimetype": "text/x-python", 275 | "name": "python", 276 | "nbconvert_exporter": "python", 277 | "pygments_lexer": "ipython3", 278 | "version": "3.6.5" 279 | } 280 | }, 281 | "nbformat": 4, 282 | "nbformat_minor": 2 283 | } 284 | -------------------------------------------------------------------------------- /EEG_EMD.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#EEG 经验模态分解\n", 10 | "import sampleEntropy as se\n", 11 | "from pyhht.visualization import plot_imfs\n", 12 | "from pyhht.emd import EMD\n", 13 | "import numpy as np\n", 14 | "import scipy as sp\n", 15 | "import pandas as pd\n", 16 | "import pickle \n", 17 | "import matplotlib.pyplot as plt\n", 18 | "import multiprocessing\n", 19 | "import warnings\n", 20 | "warnings.filterwarnings(\"ignore\")\n", 21 | "#用来计算程序运行时间\n", 22 | "import datetime\n", 23 | "starttime = datetime.datetime.now()" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "#读取32个通道的EEG数据,每个通道包含32×40=1280个信号样本(人次×每人次40实验)\n", 33 | "#每个样本向量大小为8064点(63s*128Hz)\n", 34 | "for eeg_CH in range(1,33,1):\n", 35 | " file_path = \"./dump_file/CH{}_df_EEG_x\".format(eeg_CH)\n", 36 | " locals()['CH{}_df_EEG_x'.format(eeg_CH)] = pickle.load(open(file_path,\"rb\"))" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/plain": [ 47 | "'matplotlib画图\\nplt.figure(dpi=300)\\nplt.subplot(4,1,1)\\nplt.ylabel(\"EEG signal\")\\nplt.plot(x)\\nfor i in range(0,3,1):\\n plt.subplot(4,1,i+2)\\n plt.ylabel(\"IMF{}\".format(i+1))\\n plt.plot(imfs[i])\\nplt.show()\\nplt.figure(dpi=300)\\nfor i in [3,4,5,6]:\\n plt.subplot(4,1,i-2)\\n plt.ylabel(\"IMF{}\".format(i+1))\\n plt.plot(imfs[i])\\nplt.show()\\nplt.figure(dpi=300)\\nfor i in [7,8,9,10]:\\n plt.subplot(4,1,i-6)\\n if i==10:\\n plt.ylabel(\"RES\") \\n else:\\n plt.ylabel(\"IMF{}\".format(i+1))\\n plt.plot(imfs[i])\\nplt.show()\\n'" 48 | ] 49 | }, 50 | "execution_count": 3, 51 | "metadata": {}, 52 | "output_type": "execute_result" 53 | } 54 | ], 55 | "source": [ 56 | "'''pyhht自带的画图\n", 57 | "t = range(0,8064,1)\n", 58 | "x = CH1_df_EEG_x.iloc[20,:]\n", 59 | "decomposer = EMD(x)\n", 60 | "imfs = decomposer.decompose()\n", 61 | "#plot_imfs(x, imfs, t)\n", 62 | "'''\n", 63 | "'''matplotlib画图\n", 64 | "plt.figure(dpi=300)\n", 65 | "plt.subplot(4,1,1)\n", 66 | "plt.ylabel(\"EEG signal\")\n", 67 | "plt.plot(x)\n", 68 | "for i in range(0,3,1):\n", 69 | " plt.subplot(4,1,i+2)\n", 70 | " plt.ylabel(\"IMF{}\".format(i+1))\n", 71 | " plt.plot(imfs[i])\n", 72 | "plt.show()\n", 73 | "plt.figure(dpi=300)\n", 74 | "for i in [3,4,5,6]:\n", 75 | " plt.subplot(4,1,i-2)\n", 76 | " plt.ylabel(\"IMF{}\".format(i+1))\n", 77 | " plt.plot(imfs[i])\n", 78 | "plt.show()\n", 79 | "plt.figure(dpi=300)\n", 80 | "for i in [7,8,9,10]:\n", 81 | " plt.subplot(4,1,i-6)\n", 82 | " if i==10:\n", 83 | " plt.ylabel(\"RES\") \n", 84 | " else:\n", 85 | " plt.ylabel(\"IMF{}\".format(i+1))\n", 86 | " plt.plot(imfs[i])\n", 87 | "plt.show()\n", 88 | "'''" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 4, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "'\\nif False:\\n for eegCH in range(1,33,1):\\n locals()[\"p{}\".format(eegCH)] = multiprocessing.Process(target = runEMD, args = (eegCH,))\\n locals()[\"p{}\".format(eegCH)].start()\\n print(\"p{}.pid:{}\".format(eegCH,locals()[\"p{}\".format(eegCH)].pid))\\nelse:\\n runEMD(eegCH=2)\\n'" 107 | ] 108 | }, 109 | "execution_count": 4, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "def runEMD(eegCH):\n", 116 | " #用来计算程序运行时间\n", 117 | " import datetime\n", 118 | " import pickle\n", 119 | " starttime = datetime.datetime.now()\n", 120 | " print(\"[CH{}]job start!\".format(eegCH))\n", 121 | " file_path = \"./dump_file/CH{}_df_EEG_x\".format(eegCH)\n", 122 | " locals()['CH{}_df_EEG_x'.format(eegCH)] = pickle.load(open(file_path,\"rb\"))\n", 123 | " if eegCH != 2:\n", 124 | " for i in range(0,1280,1):\n", 125 | " x = locals()['CH{}_df_EEG_x'.format(eegCH)].iloc[i,:]\n", 126 | " decomposer = EMD(x)\n", 127 | " imfs = decomposer.decompose()\n", 128 | " file_path = \"./dump_file_imfs/CH{}_imfs_{}\".format(eegCH,i)\n", 129 | " pickle.dump(imfs,open(file_path,\"wb\"))\n", 130 | " else:\n", 131 | " dfCH2 = abs(locals()['CH{}_df_EEG_x'.format(eegCH)])\n", 132 | " for i in range(0,1280,1):\n", 133 | " x = dfCH2.iloc[i,:]\n", 134 | " decomposer = EMD(x)\n", 135 | " imfs = decomposer.decompose()\n", 136 | " file_path = \"./dump_file_imfs/CH{}_imfs_{}\".format(eegCH,i)\n", 137 | " pickle.dump(imfs,open(file_path,\"wb\")) \n", 138 | " print(\"[CH{}]job done!\".format(eegCH))\n", 139 | " #用来计算程序运行时间\n", 140 | " endtime = datetime.datetime.now()\n", 141 | " print(\"程序运行时间:%.1fs\"%(endtime - starttime).seconds)\n", 142 | "'''\n", 143 | "if False:\n", 144 | " for eegCH in range(1,33,1):\n", 145 | " locals()[\"p{}\".format(eegCH)] = multiprocessing.Process(target = runEMD, args = (eegCH,))\n", 146 | " locals()[\"p{}\".format(eegCH)].start()\n", 147 | " print(\"p{}.pid:{}\".format(eegCH,locals()[\"p{}\".format(eegCH)].pid))\n", 148 | "else:\n", 149 | " runEMD(eegCH=2)\n", 150 | "''' " 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 8, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "#使用原始EEG信号提取样本熵特征\n", 174 | "def countSampleEntropy(eegCH,data,lineIndex):\n", 175 | " tmpSE = se.sampEn(data,m=2,r=1)\n", 176 | " filePath = \"./dump_file_sampEn/CH{}_sampEn_{}\".format(eegCH,lineIndex)\n", 177 | " pickle.dump(tmpSE,open(filePath,\"wb\"))\n", 178 | " print(\"[CH{}]->{} done!\".format(eegCH,lineIndex))\n", 179 | " \n", 180 | "def countSampleEntropySomes(eegCH,data,startIndex,endIndex): \n", 181 | " sampEnList = []\n", 182 | " for i in range(0,endIndex-startIndex,1):\n", 183 | " std = np.std(data.iloc[i,:].values)\n", 184 | " try:\n", 185 | " tmpSampEn = se.sampEn(data.iloc[i,:].values,m=2,r=0.2*std) #这里不能是dataframe\n", 186 | " except BaseException:\n", 187 | " print(\"ERROR!!!!!!!!!!!!!!!!!!\",eegCH,startIndex,endIndex,i) \n", 188 | " sampEnList.append(tmpSampEn)\n", 189 | " result = pd.DataFrame(sampEnList,index=data.index,columns=['CH{}SampEn'.format(eegCH)])\n", 190 | " filePath = \"./dump_file_sampEn/CH{}_sampEn_{}_{}\".format(eegCH,startIndex,endIndex)\n", 191 | " pickle.dump(result,open(filePath,\"wb\"))\n", 192 | " print(\"[CH{}]->{}_{} done!\".format(eegCH,startIndex,endIndex))\n", 193 | "\n", 194 | "cpuNums = 20\n", 195 | "steps = 1280 // cpuNums\n", 196 | "startIndexList = [idx for idx in range(0,1280,steps)]\n", 197 | "startPoint = 6784\n", 198 | "endPoint = 8064\n", 199 | "if False:\n", 200 | " for eegCH in range(1,33,1):\n", 201 | " for sIdx in startIndexList:\n", 202 | " if eegCH !=2:\n", 203 | " if sIdx+steps >= 1280:\n", 204 | " data = locals()['CH{}_df_EEG_x'.format(eegCH)].iloc[sIdx:1280,startPoint:endPoint]\n", 205 | " else:\n", 206 | " data = locals()['CH{}_df_EEG_x'.format(eegCH)].iloc[sIdx:sIdx+steps,startPoint:endPoint]\n", 207 | " locals()[\"p{}\".format(eegCH)] = multiprocessing.Process(target = countSampleEntropySomes, \n", 208 | " args = (eegCH,data,sIdx,sIdx+steps))\n", 209 | " locals()[\"p{}\".format(eegCH)].start()\n", 210 | " print(\"CH{}_sIdx{}.pid:{}\".format(eegCH,sIdx,locals()[\"p{}\".format(eegCH)].pid))\n", 211 | " else:#CH2是复数,另外处理\n", 212 | " data = abs(locals()['CH{}_df_EEG_x'.format(eegCH)])\n", 213 | " if sIdx+steps >= 1280:\n", 214 | " data = data.iloc[sIdx:1280,startPoint:endPoint]\n", 215 | " else:\n", 216 | " data = data.iloc[sIdx:sIdx+steps,startPoint:endPoint]\n", 217 | " locals()[\"p{}\".format(eegCH)] = multiprocessing.Process(target = countSampleEntropySomes, \n", 218 | " args = (eegCH,data,sIdx,sIdx+steps))\n", 219 | " locals()[\"p{}\".format(eegCH)].start()\n", 220 | " print(\"CH{}_sIdx{}.pid:{}\".format(eegCH,sIdx,locals()[\"p{}\".format(eegCH)].pid))\n" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 9, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "#读取样本熵文件,将每个通道分别写入文件中\n", 230 | "if True: \n", 231 | " for eegCH in range(1,33,1):\n", 232 | " sampEnDF = pd.DataFrame()\n", 233 | " for sIdx in startIndexList:\n", 234 | " if sIdx+steps >= 1280:\n", 235 | " filePath = \"./dump_file_sampEn/CH{}_sampEn_{}_{}\".format(eegCH,sIdx,1280)\n", 236 | " locals()[\"CH{}_sampEn_{}_{}\".format(eegCH,sIdx,1280)] = pickle.load(open(filePath,\"rb\"))\n", 237 | " sampEnDF = pd.concat([sampEnDF,locals()[\"CH{}_sampEn_{}_{}\".format(eegCH,sIdx,1280)]],axis=0)\n", 238 | " else:\n", 239 | " filePath = \"./dump_file_sampEn/CH{}_sampEn_{}_{}\".format(eegCH,sIdx,sIdx+steps)\n", 240 | " locals()[\"CH{}_sampEn_{}_{}\".format(eegCH,sIdx,sIdx+steps)] = pickle.load(open(filePath,\"rb\"))\n", 241 | " sampEnDF = pd.concat([sampEnDF,locals()[\"CH{}_sampEn_{}_{}\".format(eegCH,sIdx,sIdx+steps)]],axis=0)\n", 242 | " filePath = \"./dump_file_sampEn/CH{}_sampEn\".format(eegCH)\n", 243 | " pickle.dump(sampEnDF,open(filePath,\"wb\"))\n", 244 | " " 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 10, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "if True:\n", 254 | " sampEnDF = pd.DataFrame()\n", 255 | " for eegCH in range(1,33,1):\n", 256 | " filePath = \"./dump_file_sampEn/CH{}_sampEn\".format(eegCH)\n", 257 | " tmpDF = pickle.load(open(filePath,\"rb\"))\n", 258 | " sampEnDF = pd.concat([sampEnDF,tmpDF],axis=1)\n", 259 | " filePath = \"./dump_file_sampEn/sampEnFeatures\".format(eegCH)\n", 260 | " pickle.dump(sampEnDF,open(filePath,\"wb\"))\n", 261 | " \n" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 12, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "data": { 271 | "text/plain": [ 272 | "(1280, 32)" 273 | ] 274 | }, 275 | "execution_count": 12, 276 | "metadata": {}, 277 | "output_type": "execute_result" 278 | } 279 | ], 280 | "source": [ 281 | "sampEnDF.shape\n" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "\n" 291 | ] 292 | } 293 | ], 294 | "metadata": { 295 | "kernelspec": { 296 | "display_name": "Python 3", 297 | "language": "python", 298 | "name": "python3" 299 | }, 300 | "language_info": { 301 | "codemirror_mode": { 302 | "name": "ipython", 303 | "version": 3 304 | }, 305 | "file_extension": ".py", 306 | "mimetype": "text/x-python", 307 | "name": "python", 308 | "nbconvert_exporter": "python", 309 | "pygments_lexer": "ipython3", 310 | "version": "3.6.5" 311 | } 312 | }, 313 | "nbformat": 4, 314 | "nbformat_minor": 2 315 | } 316 | -------------------------------------------------------------------------------- /GCF_2cPPG_TimeDomain.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import multiprocessing\n", 10 | "from GCForest import gcForest\n", 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "import pickle \n", 14 | "import matplotlib.pyplot as plt\n", 15 | "from sklearn.model_selection import train_test_split\n", 16 | "from sklearn.model_selection import StratifiedKFold\n", 17 | "from sklearn.preprocessing import MinMaxScaler\n", 18 | "from sklearn.metrics import accuracy_score\n", 19 | "from sklearn.metrics import f1_score\n", 20 | "from sklearn.metrics import precision_score\n", 21 | "from sklearn.metrics import recall_score\n", 22 | "from sklearn.preprocessing import Imputer\n", 23 | "import warnings\n", 24 | "warnings.filterwarnings(\"ignore\")\n", 25 | "#用来计算程序运行时间\n", 26 | "import datetime\n", 27 | "starttime = datetime.datetime.now()" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | " valence arousal dominance liking 2cArousal 2cValence\n", 47 | "s01_0 7.71 7.60 6.90 7.83 1 1\n", 48 | "s01_1 8.10 7.31 7.28 8.47 1 1\n", 49 | "s01_2 8.58 7.54 9.00 7.08 1 1\n", 50 | "s01_3 4.94 6.01 6.12 8.06 0 0\n", 51 | "s01_4 6.96 3.92 7.19 6.05 1 1\n", 52 | "[seed:0]****************************************************\n", 53 | "BVP running multi-grain scan\n", 54 | "Slicing Sequence...\n", 55 | "Training MGS Random Forests...\n", 56 | "Slicing Sequence...\n", 57 | "(896, 492)\n", 58 | "(384, 492)\n", 59 | "Adding/Training Layer, n_layer=1\n", 60 | "Layer validation accuracy = 0.6222222222222222\n", 61 | "Adding/Training Layer, n_layer=2\n", 62 | "Layer validation accuracy = 0.6222222222222222\n", 63 | "ACC 0.6692708333333334\n", 64 | "F1 0.7590132827324478\n", 65 | "Recal 0.9216589861751152\n", 66 | "Precision 0.6451612903225806\n", 67 | "[seed:100]****************************************************\n", 68 | "BVP running multi-grain scan\n", 69 | "Slicing Sequence...\n", 70 | "Training MGS Random Forests...\n", 71 | "Slicing Sequence...\n", 72 | "(896, 492)\n", 73 | "(384, 492)\n", 74 | "Adding/Training Layer, n_layer=1\n", 75 | "Layer validation accuracy = 0.6333333333333333\n", 76 | "Adding/Training Layer, n_layer=2\n", 77 | "Layer validation accuracy = 0.6388888888888888\n", 78 | "Adding/Training Layer, n_layer=3\n", 79 | "Layer validation accuracy = 0.6388888888888888\n", 80 | "ACC 0.671875\n", 81 | "F1 0.7586206896551725\n", 82 | "Recal 0.9124423963133641\n", 83 | "Precision 0.6491803278688525\n", 84 | "[seed:200]****************************************************\n", 85 | "BVP running multi-grain scan\n", 86 | "Slicing Sequence...\n", 87 | "Training MGS Random Forests...\n", 88 | "Slicing Sequence...\n", 89 | "(896, 492)\n", 90 | "(384, 492)\n", 91 | "Adding/Training Layer, n_layer=1\n", 92 | "Layer validation accuracy = 0.65\n", 93 | "Adding/Training Layer, n_layer=2\n", 94 | "Layer validation accuracy = 0.65\n", 95 | "ACC 0.6171875\n", 96 | "F1 0.72\n", 97 | "Recal 0.8709677419354839\n", 98 | "Precision 0.6136363636363636\n", 99 | "[seed:300]****************************************************\n", 100 | "BVP running multi-grain scan\n", 101 | "Slicing Sequence...\n", 102 | "Training MGS Random Forests...\n", 103 | "Slicing Sequence...\n", 104 | "(896, 492)\n", 105 | "(384, 492)\n", 106 | "Adding/Training Layer, n_layer=1\n", 107 | "Layer validation accuracy = 0.6277777777777778\n", 108 | "Adding/Training Layer, n_layer=2\n", 109 | "Layer validation accuracy = 0.6222222222222222\n", 110 | "ACC 0.65625\n", 111 | "F1 0.7421874999999999\n", 112 | "Recal 0.8755760368663594\n", 113 | "Precision 0.6440677966101694\n", 114 | "[seed:400]****************************************************\n", 115 | "BVP running multi-grain scan\n", 116 | "Slicing Sequence...\n", 117 | "Training MGS Random Forests...\n", 118 | "Slicing Sequence...\n", 119 | "(896, 492)\n", 120 | "(384, 492)\n", 121 | "Adding/Training Layer, n_layer=1\n", 122 | "Layer validation accuracy = 0.7\n", 123 | "Adding/Training Layer, n_layer=2\n", 124 | "Layer validation accuracy = 0.7\n", 125 | "ACC 0.6432291666666666\n", 126 | "F1 0.7380497131931166\n", 127 | "Recal 0.8894009216589862\n", 128 | "Precision 0.630718954248366\n", 129 | "[seed:500]****************************************************\n", 130 | "BVP running multi-grain scan\n", 131 | "Slicing Sequence...\n", 132 | "Training MGS Random Forests...\n", 133 | "Slicing Sequence...\n", 134 | "(896, 492)\n", 135 | "(384, 492)\n", 136 | "Adding/Training Layer, n_layer=1\n", 137 | "Layer validation accuracy = 0.6166666666666667\n", 138 | "Adding/Training Layer, n_layer=2\n", 139 | "Layer validation accuracy = 0.6333333333333333\n", 140 | "Adding/Training Layer, n_layer=3\n", 141 | "Layer validation accuracy = 0.6333333333333333\n", 142 | "ACC 0.6692708333333334\n", 143 | "F1 0.7543520309477756\n", 144 | "Recal 0.8986175115207373\n", 145 | "Precision 0.65\n", 146 | "[seed:600]****************************************************\n", 147 | "BVP running multi-grain scan\n", 148 | "Slicing Sequence...\n", 149 | "Training MGS Random Forests...\n", 150 | "Slicing Sequence...\n", 151 | "(896, 492)\n", 152 | "(384, 492)\n", 153 | "Adding/Training Layer, n_layer=1\n", 154 | "Layer validation accuracy = 0.5833333333333334\n", 155 | "Adding/Training Layer, n_layer=2\n", 156 | "Layer validation accuracy = 0.5833333333333334\n", 157 | "ACC 0.6510416666666666\n", 158 | "F1 0.7403100775193797\n", 159 | "Recal 0.880184331797235\n", 160 | "Precision 0.6387959866220736\n", 161 | "[seed:700]****************************************************\n", 162 | "BVP running multi-grain scan\n", 163 | "Slicing Sequence...\n", 164 | "Training MGS Random Forests...\n", 165 | "Slicing Sequence...\n", 166 | "(896, 492)\n", 167 | "(384, 492)\n", 168 | "Adding/Training Layer, n_layer=1\n", 169 | "Layer validation accuracy = 0.6\n", 170 | "Adding/Training Layer, n_layer=2\n", 171 | "Layer validation accuracy = 0.6055555555555555\n", 172 | "Adding/Training Layer, n_layer=3\n", 173 | "Layer validation accuracy = 0.5888888888888889\n", 174 | "ACC 0.6875\n", 175 | "F1 0.7600000000000001\n", 176 | "Recal 0.8755760368663594\n", 177 | "Precision 0.6713780918727915\n", 178 | "[seed:800]****************************************************\n", 179 | "BVP running multi-grain scan\n", 180 | "Slicing Sequence...\n", 181 | "Training MGS Random Forests...\n", 182 | "Slicing Sequence...\n", 183 | "(896, 492)\n", 184 | "(384, 492)\n", 185 | "Adding/Training Layer, n_layer=1\n", 186 | "Layer validation accuracy = 0.6611111111111111\n", 187 | "Adding/Training Layer, n_layer=2\n", 188 | "Layer validation accuracy = 0.6555555555555556\n", 189 | "ACC 0.65625\n", 190 | "F1 0.7431906614785991\n", 191 | "Recal 0.880184331797235\n", 192 | "Precision 0.6430976430976431\n", 193 | "[seed:900]****************************************************\n", 194 | "BVP running multi-grain scan\n", 195 | "Slicing Sequence...\n", 196 | "Training MGS Random Forests...\n", 197 | "Slicing Sequence...\n", 198 | "(896, 492)\n", 199 | "(384, 492)\n", 200 | "Adding/Training Layer, n_layer=1\n", 201 | "Layer validation accuracy = 0.7\n", 202 | "Adding/Training Layer, n_layer=2\n", 203 | "Layer validation accuracy = 0.7\n", 204 | "ACC 0.6614583333333334\n", 205 | "F1 0.7470817120622569\n", 206 | "Recal 0.8847926267281107\n", 207 | "Precision 0.6464646464646465\n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "#读取Y\n", 213 | "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n", 214 | "all_df_y['2cArousal'] = 0\n", 215 | "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n", 216 | "all_df_y['2cValence'] = 0\n", 217 | "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n", 218 | "print(all_df_y.head(5))\n", 219 | "\n", 220 | "#读取1个通道的PPG数据,每个通道包含32×40=1280个信号样本\n", 221 | "#每个样本向量大小为8064点(63s*128Hz)\n", 222 | "all_df_PPG_x = pickle.load(open(\"./dump_file/all_df_PPG_x\",\"rb\"))\n", 223 | "y = all_df_y[['2cValence']]\n", 224 | "#y = all_df_y[['2cArousal']]\n", 225 | "for seed in [0,100,200,300,400,500,600,700,800,900]:\n", 226 | " print(\"[seed:{}]****************************************************\".format(seed))\n", 227 | " xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx_{}\".format(seed),\"rb\"))\n", 228 | " xTestIdx = pickle.load(open(\"./dump_file/xTestIdx_{}\".format(seed),\"rb\"))\n", 229 | " y_tr = y.loc[xTrainIdx]\n", 230 | " y_te = y.loc[xTestIdx]\n", 231 | "\n", 232 | " PPGTrainSet = all_df_PPG_x.loc[xTrainIdx]\n", 233 | " PPGTestSet = all_df_PPG_x.loc[xTestIdx]\n", 234 | "\n", 235 | " myWindowsSize = 256\n", 236 | " myStrideSize = 64\n", 237 | " gcf = gcForest(shape_1X=8064, window=myWindowsSize, stride=myStrideSize,tolerance=0.0,n_cascadeRF=1, \n", 238 | " min_samples_mgs=0.1, min_samples_cascade=0.1,n_jobs=19)\n", 239 | " if True: \n", 240 | " print(\"PPG running multi-grain scan\")\n", 241 | " xTrain,yTrain = PPGTrainSet.values,y_tr.values\n", 242 | " xTest = PPGTestSet.values \n", 243 | " PPG_mgsTrainVector = gcf.mg_scanning(xTrain,yTrain)\n", 244 | " PPG_mgsTestVector = gcf.mg_scanning(xTest)\n", 245 | " filePath = \"./dump_file_V2/PPG_mgsTrainVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n", 246 | " pickle.dump(PPG_mgsTrainVector,open(filePath,\"wb\"))\n", 247 | " filePath = \"./dump_file_V2/PPG_mgsTestVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n", 248 | " pickle.dump(PPG_mgsTestVector,open(filePath,\"wb\"))\n", 249 | " else:\n", 250 | " filePath = \"./dump_file_V2/PPG_mgsTrainVector_{}_{}\".format(myWindowsSize,myStrideSize)\n", 251 | " PPG_mgsTrainVector = pickle.load(open(filePath,\"rb\"))\n", 252 | " filePath = \"./dump_file_V2/PPG_mgsTestVector_{}_{}\".format(myWindowsSize,myStrideSize)\n", 253 | " PPG_mgsTestVector = pickle.load(open(filePath,\"rb\"))\n", 254 | "\n", 255 | " X_tr_vector = PPG_mgsTrainVector\n", 256 | " X_te_vector = PPG_mgsTestVector\n", 257 | " print(X_tr_vector.shape)\n", 258 | " print(X_te_vector.shape)\n", 259 | "\n", 260 | " #有缺失值,填充下\n", 261 | " X_tr_vector_fillna= pd.DataFrame(X_tr_vector).fillna(0).values\n", 262 | " X_te_vector_fillna= pd.DataFrame(X_te_vector).fillna(0).values\n", 263 | "\n", 264 | " _ = gcf.cascade_forest(X_tr_vector_fillna, y_tr)\n", 265 | "\n", 266 | " pred_proba = gcf.cascade_forest(X_te_vector_fillna)\n", 267 | " PPG = np.mean(pred_proba, axis=0)\n", 268 | " preds = np.argmax(PPG, axis=1)\n", 269 | " print(\"ACC\",accuracy_score(y_true=y_te, y_pred=preds))\n", 270 | " print(\"F1\",f1_score(y_true=y_te, y_pred=preds))\n", 271 | " print(\"Recal\",recall_score(y_true=y_te, y_pred=preds))\n", 272 | " print(\"Precision\",precision_score(y_true=y_te, y_pred=preds))" 273 | ] 274 | } 275 | ], 276 | "metadata": { 277 | "kernelspec": { 278 | "display_name": "Python 3", 279 | "language": "python", 280 | "name": "python3" 281 | }, 282 | "language_info": { 283 | "codemirror_mode": { 284 | "name": "ipython", 285 | "version": 3 286 | }, 287 | "file_extension": ".py", 288 | "mimetype": "text/x-python", 289 | "name": "python", 290 | "nbconvert_exporter": "python", 291 | "pygments_lexer": "ipython3", 292 | "version": "3.6.5" 293 | } 294 | }, 295 | "nbformat": 4, 296 | "nbformat_minor": 2 297 | } 298 | -------------------------------------------------------------------------------- /GCF_2cRandom.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import multiprocessing\n", 10 | "from GCForest import gcForest\n", 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "import pickle \n", 14 | "import matplotlib.pyplot as plt\n", 15 | "from sklearn.model_selection import train_test_split\n", 16 | "from sklearn.model_selection import StratifiedKFold\n", 17 | "from sklearn.preprocessing import MinMaxScaler\n", 18 | "from sklearn.metrics import accuracy_score\n", 19 | "from sklearn.metrics import f1_score\n", 20 | "from sklearn.metrics import precision_score\n", 21 | "from sklearn.metrics import recall_score\n", 22 | "from sklearn.preprocessing import Imputer\n", 23 | "import warnings\n", 24 | "warnings.filterwarnings(\"ignore\")\n", 25 | "#用来计算程序运行时间\n", 26 | "import datetime\n", 27 | "starttime = datetime.datetime.now()" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | " valence arousal dominance liking 2cArousal 2cValence\n", 40 | "s01_0 7.71 7.60 6.90 7.83 1 1\n", 41 | "s01_1 8.10 7.31 7.28 8.47 1 1\n", 42 | "s01_2 8.58 7.54 9.00 7.08 1 1\n", 43 | "s01_3 4.94 6.01 6.12 8.06 0 0\n", 44 | "s01_4 6.96 3.92 7.19 6.05 1 1\n", 45 | "[seed:0]****************************************************\n", 46 | "GSR running multi-grain scan\n", 47 | "Slicing Sequence...\n", 48 | "Training MGS Random Forests...\n", 49 | "Slicing Sequence...\n", 50 | "(896, 492)\n", 51 | "(384, 492)\n", 52 | "Adding/Training Layer, n_layer=1\n", 53 | "Layer validation accuracy = 0.55\n", 54 | "Adding/Training Layer, n_layer=2\n", 55 | "Layer validation accuracy = 0.5555555555555556\n", 56 | "Adding/Training Layer, n_layer=3\n", 57 | "Layer validation accuracy = 0.5555555555555556\n", 58 | "ACC 0.5651041666666666\n", 59 | "F1 0.7221297836938436\n", 60 | "Recal 1.0\n", 61 | "Precision 0.5651041666666666\n", 62 | "[seed:100]****************************************************\n", 63 | "GSR running multi-grain scan\n", 64 | "Slicing Sequence...\n", 65 | "Training MGS Random Forests...\n", 66 | "Slicing Sequence...\n", 67 | "(896, 492)\n", 68 | "(384, 492)\n", 69 | "Adding/Training Layer, n_layer=1\n", 70 | "Layer validation accuracy = 0.5666666666666667\n", 71 | "Adding/Training Layer, n_layer=2\n", 72 | "Layer validation accuracy = 0.5611111111111111\n", 73 | "ACC 0.5651041666666666\n", 74 | "F1 0.7221297836938436\n", 75 | "Recal 1.0\n", 76 | "Precision 0.5651041666666666\n", 77 | "[seed:200]****************************************************\n", 78 | "GSR running multi-grain scan\n", 79 | "Slicing Sequence...\n", 80 | "Training MGS Random Forests...\n", 81 | "Slicing Sequence...\n", 82 | "(896, 492)\n", 83 | "(384, 492)\n", 84 | "Adding/Training Layer, n_layer=1\n", 85 | "Layer validation accuracy = 0.5611111111111111\n", 86 | "Adding/Training Layer, n_layer=2\n", 87 | "Layer validation accuracy = 0.5666666666666667\n", 88 | "Adding/Training Layer, n_layer=3\n", 89 | "Layer validation accuracy = 0.5777777777777777\n", 90 | "Adding/Training Layer, n_layer=4\n", 91 | "Layer validation accuracy = 0.5777777777777777\n", 92 | "ACC 0.5651041666666666\n", 93 | "F1 0.7221297836938436\n", 94 | "Recal 1.0\n", 95 | "Precision 0.5651041666666666\n", 96 | "[seed:300]****************************************************\n", 97 | "GSR running multi-grain scan\n", 98 | "Slicing Sequence...\n", 99 | "Training MGS Random Forests...\n", 100 | "Slicing Sequence...\n", 101 | "(896, 492)\n", 102 | "(384, 492)\n", 103 | "Adding/Training Layer, n_layer=1\n", 104 | "Layer validation accuracy = 0.6111111111111112\n", 105 | "Adding/Training Layer, n_layer=2\n", 106 | "Layer validation accuracy = 0.6111111111111112\n", 107 | "ACC 0.5651041666666666\n", 108 | "F1 0.7221297836938436\n", 109 | "Recal 1.0\n", 110 | "Precision 0.5651041666666666\n", 111 | "[seed:400]****************************************************\n", 112 | "GSR running multi-grain scan\n", 113 | "Slicing Sequence...\n", 114 | "Training MGS Random Forests...\n", 115 | "Slicing Sequence...\n", 116 | "(896, 492)\n", 117 | "(384, 492)\n", 118 | "Adding/Training Layer, n_layer=1\n", 119 | "Layer validation accuracy = 0.6\n", 120 | "Adding/Training Layer, n_layer=2\n", 121 | "Layer validation accuracy = 0.6444444444444445\n", 122 | "Adding/Training Layer, n_layer=3\n", 123 | "Layer validation accuracy = 0.6166666666666667\n", 124 | "ACC 0.5651041666666666\n", 125 | "F1 0.7221297836938436\n", 126 | "Recal 1.0\n", 127 | "Precision 0.5651041666666666\n", 128 | "[seed:500]****************************************************\n", 129 | "GSR running multi-grain scan\n", 130 | "Slicing Sequence...\n", 131 | "Training MGS Random Forests...\n", 132 | "Slicing Sequence...\n", 133 | "(896, 492)\n", 134 | "(384, 492)\n", 135 | "Adding/Training Layer, n_layer=1\n", 136 | "Layer validation accuracy = 0.5722222222222222\n", 137 | "Adding/Training Layer, n_layer=2\n", 138 | "Layer validation accuracy = 0.5611111111111111\n", 139 | "ACC 0.5651041666666666\n", 140 | "F1 0.7221297836938436\n", 141 | "Recal 1.0\n", 142 | "Precision 0.5651041666666666\n", 143 | "[seed:600]****************************************************\n", 144 | "GSR running multi-grain scan\n", 145 | "Slicing Sequence...\n", 146 | "Training MGS Random Forests...\n", 147 | "Slicing Sequence...\n", 148 | "(896, 492)\n", 149 | "(384, 492)\n", 150 | "Adding/Training Layer, n_layer=1\n", 151 | "Layer validation accuracy = 0.5555555555555556\n", 152 | "Adding/Training Layer, n_layer=2\n", 153 | "Layer validation accuracy = 0.5611111111111111\n", 154 | "Adding/Training Layer, n_layer=3\n", 155 | "Layer validation accuracy = 0.5666666666666667\n", 156 | "Adding/Training Layer, n_layer=4\n", 157 | "Layer validation accuracy = 0.5555555555555556\n", 158 | "ACC 0.5651041666666666\n", 159 | "F1 0.7221297836938436\n", 160 | "Recal 1.0\n", 161 | "Precision 0.5651041666666666\n", 162 | "[seed:700]****************************************************\n", 163 | "GSR running multi-grain scan\n", 164 | "Slicing Sequence...\n", 165 | "Training MGS Random Forests...\n", 166 | "Slicing Sequence...\n", 167 | "(896, 492)\n", 168 | "(384, 492)\n", 169 | "Adding/Training Layer, n_layer=1\n", 170 | "Layer validation accuracy = 0.5222222222222223\n", 171 | "Adding/Training Layer, n_layer=2\n", 172 | "Layer validation accuracy = 0.5222222222222223\n", 173 | "ACC 0.5651041666666666\n", 174 | "F1 0.7221297836938436\n", 175 | "Recal 1.0\n", 176 | "Precision 0.5651041666666666\n", 177 | "[seed:800]****************************************************\n", 178 | "GSR running multi-grain scan\n", 179 | "Slicing Sequence...\n", 180 | "Training MGS Random Forests...\n", 181 | "Slicing Sequence...\n", 182 | "(896, 492)\n", 183 | "(384, 492)\n", 184 | "Adding/Training Layer, n_layer=1\n", 185 | "Layer validation accuracy = 0.5111111111111111\n", 186 | "Adding/Training Layer, n_layer=2\n", 187 | "Layer validation accuracy = 0.5166666666666667\n", 188 | "Adding/Training Layer, n_layer=3\n", 189 | "Layer validation accuracy = 0.5111111111111111\n", 190 | "ACC 0.5651041666666666\n", 191 | "F1 0.7221297836938436\n", 192 | "Recal 1.0\n", 193 | "Precision 0.5651041666666666\n", 194 | "[seed:900]****************************************************\n", 195 | "GSR running multi-grain scan\n", 196 | "Slicing Sequence...\n", 197 | "Training MGS Random Forests...\n", 198 | "Slicing Sequence...\n", 199 | "(896, 492)\n", 200 | "(384, 492)\n", 201 | "Adding/Training Layer, n_layer=1\n", 202 | "Layer validation accuracy = 0.6111111111111112\n", 203 | "Adding/Training Layer, n_layer=2\n", 204 | "Layer validation accuracy = 0.6055555555555555\n", 205 | "ACC 0.5651041666666666\n", 206 | "F1 0.7221297836938436\n", 207 | "Recal 1.0\n", 208 | "Precision 0.5651041666666666\n" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "#读取Y\n", 214 | "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n", 215 | "all_df_y['2cArousal'] = 0\n", 216 | "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n", 217 | "all_df_y['2cValence'] = 0\n", 218 | "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n", 219 | "print(all_df_y.head(5))\n", 220 | "\n", 221 | "#读取1个通道的GSR数据,每个通道包含32×40=1280个信号样本\n", 222 | "#每个样本向量大小为8064点(63s*128Hz)\n", 223 | "all_df_GSR_x = pickle.load(open(\"./dump_file/all_df_GSR_x\",\"rb\"))\n", 224 | "#构建任意随机数据\n", 225 | "a = np.ones(shape=(1280,8064),)\n", 226 | "all_df_GSR_x = pd.DataFrame(a,index=all_df_GSR_x.index)\n", 227 | "\n", 228 | "#y = all_df_y[['2cArousal']]\n", 229 | "y = all_df_y[['2cValence']]\n", 230 | "for seed in [0,100,200,300,400,500,600,700,800,900]:\n", 231 | " print(\"[seed:{}]****************************************************\".format(seed))\n", 232 | " xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx\",\"rb\"))\n", 233 | " xTestIdx = pickle.load(open(\"./dump_file/xTestIdx\",\"rb\"))\n", 234 | " y_tr = y.loc[xTrainIdx]\n", 235 | " y_te = y.loc[xTestIdx]\n", 236 | " GSRTrainSet = all_df_GSR_x.loc[xTrainIdx]\n", 237 | " GSRTestSet = all_df_GSR_x.loc[xTestIdx]\n", 238 | " myWindowsSize = 256\n", 239 | " myStrideSize = 64\n", 240 | " gcf = gcForest(shape_1X=8064, window=myWindowsSize, stride=myStrideSize,tolerance=0.0,n_cascadeRF=1, \n", 241 | " min_samples_mgs=0.1, min_samples_cascade=0.1,n_jobs=19)\n", 242 | " if True: \n", 243 | " print(\"GSR running multi-grain scan\")\n", 244 | " xTrain,yTrain = GSRTrainSet.values,y_tr.values\n", 245 | " xTest = GSRTestSet.values \n", 246 | " GSR_mgsTrainVector = gcf.mg_scanning(xTrain,yTrain)\n", 247 | " GSR_mgsTestVector = gcf.mg_scanning(xTest)\n", 248 | " X_tr_vector = GSR_mgsTrainVector\n", 249 | " X_te_vector = GSR_mgsTestVector\n", 250 | " print(X_tr_vector.shape)\n", 251 | " print(X_te_vector.shape)\n", 252 | " #有缺失值,填充下\n", 253 | " X_tr_vector_fillna= pd.DataFrame(X_tr_vector).fillna(0).values\n", 254 | " X_te_vector_fillna= pd.DataFrame(X_te_vector).fillna(0).values\n", 255 | " _ = gcf.cascade_forest(X_tr_vector_fillna, y_tr)\n", 256 | " pred_proba = gcf.cascade_forest(X_te_vector_fillna)\n", 257 | " tmp = np.mean(pred_proba, axis=0)\n", 258 | " preds = np.argmax(tmp, axis=1)\n", 259 | " print(\"ACC\",accuracy_score(y_true=y_te, y_pred=preds))\n", 260 | " print(\"F1\",f1_score(y_true=y_te, y_pred=preds))\n", 261 | " print(\"Recal\",recall_score(y_true=y_te, y_pred=preds))\n", 262 | " print(\"Precision\",precision_score(y_true=y_te, y_pred=preds))" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [] 285 | } 286 | ], 287 | "metadata": { 288 | "kernelspec": { 289 | "display_name": "Python 3", 290 | "language": "python", 291 | "name": "python3" 292 | }, 293 | "language_info": { 294 | "codemirror_mode": { 295 | "name": "ipython", 296 | "version": 3 297 | }, 298 | "file_extension": ".py", 299 | "mimetype": "text/x-python", 300 | "name": "python", 301 | "nbconvert_exporter": "python", 302 | "pygments_lexer": "ipython3", 303 | "version": "3.6.5" 304 | } 305 | }, 306 | "nbformat": 4, 307 | "nbformat_minor": 2 308 | } 309 | -------------------------------------------------------------------------------- /GCF_2cTMP__TimeDomain.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import multiprocessing\n", 10 | "from GCForest import gcForest\n", 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "import pickle \n", 14 | "import matplotlib.pyplot as plt\n", 15 | "from sklearn.model_selection import train_test_split\n", 16 | "from sklearn.model_selection import StratifiedKFold\n", 17 | "from sklearn.preprocessing import MinMaxScaler\n", 18 | "from sklearn.metrics import accuracy_score\n", 19 | "from sklearn.metrics import f1_score\n", 20 | "from sklearn.metrics import precision_score\n", 21 | "from sklearn.metrics import recall_score\n", 22 | "from sklearn.preprocessing import Imputer\n", 23 | "import warnings\n", 24 | "warnings.filterwarnings(\"ignore\")\n", 25 | "#用来计算程序运行时间\n", 26 | "import datetime\n", 27 | "starttime = datetime.datetime.now()" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | " valence arousal dominance liking 2cArousal 2cValence\n", 40 | "s01_0 7.71 7.60 6.90 7.83 1 1\n", 41 | "s01_1 8.10 7.31 7.28 8.47 1 1\n", 42 | "s01_2 8.58 7.54 9.00 7.08 1 1\n", 43 | "s01_3 4.94 6.01 6.12 8.06 0 0\n", 44 | "s01_4 6.96 3.92 7.19 6.05 1 1\n", 45 | "[seed:0]****************************************************\n", 46 | "TMP running multi-grain scan\n", 47 | "Slicing Sequence...\n", 48 | "Training MGS Random Forests...\n", 49 | "Slicing Sequence...\n", 50 | "(896, 492)\n", 51 | "(384, 492)\n", 52 | "Adding/Training Layer, n_layer=1\n", 53 | "Layer validation accuracy = 0.6888888888888889\n", 54 | "Adding/Training Layer, n_layer=2\n", 55 | "Layer validation accuracy = 0.6888888888888889\n", 56 | "ACC 0.6666666666666666\n", 57 | "F1 0.7480314960629921\n", 58 | "Recal 0.8755760368663594\n", 59 | "Precision 0.6529209621993127\n", 60 | "[seed:100]****************************************************\n", 61 | "TMP running multi-grain scan\n", 62 | "Slicing Sequence...\n", 63 | "Training MGS Random Forests...\n", 64 | "Slicing Sequence...\n", 65 | "(896, 492)\n", 66 | "(384, 492)\n", 67 | "Adding/Training Layer, n_layer=1\n", 68 | "Layer validation accuracy = 0.6555555555555556\n", 69 | "Adding/Training Layer, n_layer=2\n", 70 | "Layer validation accuracy = 0.65\n", 71 | "ACC 0.6744791666666666\n", 72 | "F1 0.7572815533980584\n", 73 | "Recal 0.8986175115207373\n", 74 | "Precision 0.6543624161073825\n", 75 | "[seed:200]****************************************************\n", 76 | "TMP running multi-grain scan\n", 77 | "Slicing Sequence...\n", 78 | "Training MGS Random Forests...\n", 79 | "Slicing Sequence...\n", 80 | "(896, 492)\n", 81 | "(384, 492)\n", 82 | "Adding/Training Layer, n_layer=1\n", 83 | "Layer validation accuracy = 0.6666666666666666\n", 84 | "Adding/Training Layer, n_layer=2\n", 85 | "Layer validation accuracy = 0.6555555555555556\n", 86 | "ACC 0.6197916666666666\n", 87 | "F1 0.7234848484848484\n", 88 | "Recal 0.880184331797235\n", 89 | "Precision 0.6141479099678456\n", 90 | "[seed:300]****************************************************\n", 91 | "TMP running multi-grain scan\n", 92 | "Slicing Sequence...\n", 93 | "Training MGS Random Forests...\n", 94 | "Slicing Sequence...\n", 95 | "(896, 492)\n", 96 | "(384, 492)\n", 97 | "Adding/Training Layer, n_layer=1\n", 98 | "Layer validation accuracy = 0.6666666666666666\n", 99 | "Adding/Training Layer, n_layer=2\n", 100 | "Layer validation accuracy = 0.6611111111111111\n", 101 | "ACC 0.6588541666666666\n", 102 | "F1 0.745631067961165\n", 103 | "Recal 0.8847926267281107\n", 104 | "Precision 0.6442953020134228\n", 105 | "[seed:400]****************************************************\n", 106 | "TMP running multi-grain scan\n", 107 | "Slicing Sequence...\n", 108 | "Training MGS Random Forests...\n", 109 | "Slicing Sequence...\n", 110 | "(896, 492)\n", 111 | "(384, 492)\n", 112 | "Adding/Training Layer, n_layer=1\n", 113 | "Layer validation accuracy = 0.6333333333333333\n", 114 | "Adding/Training Layer, n_layer=2\n", 115 | "Layer validation accuracy = 0.6333333333333333\n", 116 | "ACC 0.6458333333333334\n", 117 | "F1 0.7394636015325671\n", 118 | "Recal 0.8894009216589862\n", 119 | "Precision 0.6327868852459017\n", 120 | "[seed:500]****************************************************\n", 121 | "TMP running multi-grain scan\n", 122 | "Slicing Sequence...\n", 123 | "Training MGS Random Forests...\n", 124 | "Slicing Sequence...\n", 125 | "(896, 492)\n", 126 | "(384, 492)\n", 127 | "Adding/Training Layer, n_layer=1\n", 128 | "Layer validation accuracy = 0.6611111111111111\n", 129 | "Adding/Training Layer, n_layer=2\n", 130 | "Layer validation accuracy = 0.6444444444444445\n", 131 | "ACC 0.6614583333333334\n", 132 | "F1 0.7470817120622569\n", 133 | "Recal 0.8847926267281107\n", 134 | "Precision 0.6464646464646465\n", 135 | "[seed:600]****************************************************\n", 136 | "TMP running multi-grain scan\n", 137 | "Slicing Sequence...\n", 138 | "Training MGS Random Forests...\n", 139 | "Slicing Sequence...\n", 140 | "(896, 492)\n", 141 | "(384, 492)\n", 142 | "Adding/Training Layer, n_layer=1\n", 143 | "Layer validation accuracy = 0.6777777777777778\n", 144 | "Adding/Training Layer, n_layer=2\n", 145 | "Layer validation accuracy = 0.6833333333333333\n", 146 | "Adding/Training Layer, n_layer=3\n", 147 | "Layer validation accuracy = 0.6944444444444444\n", 148 | "Adding/Training Layer, n_layer=4\n", 149 | "Layer validation accuracy = 0.6833333333333333\n", 150 | "ACC 0.6432291666666666\n", 151 | "F1 0.730844793713163\n", 152 | "Recal 0.8571428571428571\n", 153 | "Precision 0.636986301369863\n", 154 | "[seed:700]****************************************************\n", 155 | "TMP running multi-grain scan\n", 156 | "Slicing Sequence...\n", 157 | "Training MGS Random Forests...\n", 158 | "Slicing Sequence...\n", 159 | "(896, 492)\n", 160 | "(384, 492)\n", 161 | "Adding/Training Layer, n_layer=1\n", 162 | "Layer validation accuracy = 0.7\n", 163 | "Adding/Training Layer, n_layer=2\n", 164 | "Layer validation accuracy = 0.7111111111111111\n", 165 | "Adding/Training Layer, n_layer=3\n", 166 | "Layer validation accuracy = 0.7111111111111111\n", 167 | "ACC 0.6822916666666666\n", 168 | "F1 0.767175572519084\n", 169 | "Recal 0.9262672811059908\n", 170 | "Precision 0.6547231270358306\n", 171 | "[seed:800]****************************************************\n", 172 | "TMP running multi-grain scan\n", 173 | "Slicing Sequence...\n", 174 | "Training MGS Random Forests...\n", 175 | "Slicing Sequence...\n", 176 | "(896, 492)\n", 177 | "(384, 492)\n", 178 | "Adding/Training Layer, n_layer=1\n", 179 | "Layer validation accuracy = 0.6555555555555556\n", 180 | "Adding/Training Layer, n_layer=2\n", 181 | "Layer validation accuracy = 0.6555555555555556\n", 182 | "ACC 0.6640625\n", 183 | "F1 0.7485380116959064\n", 184 | "Recal 0.8847926267281107\n", 185 | "Precision 0.6486486486486487\n", 186 | "[seed:900]****************************************************\n", 187 | "TMP running multi-grain scan\n", 188 | "Slicing Sequence...\n", 189 | "Training MGS Random Forests...\n", 190 | "Slicing Sequence...\n", 191 | "(896, 492)\n", 192 | "(384, 492)\n", 193 | "Adding/Training Layer, n_layer=1\n", 194 | "Layer validation accuracy = 0.6222222222222222\n", 195 | "Adding/Training Layer, n_layer=2\n", 196 | "Layer validation accuracy = 0.6277777777777778\n", 197 | "Adding/Training Layer, n_layer=3\n", 198 | "Layer validation accuracy = 0.6277777777777778\n", 199 | "ACC 0.65625\n", 200 | "F1 0.7518796992481204\n", 201 | "Recal 0.9216589861751152\n", 202 | "Precision 0.6349206349206349\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "#读取Y\n", 208 | "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n", 209 | "all_df_y['2cArousal'] = 0\n", 210 | "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n", 211 | "all_df_y['2cValence'] = 0\n", 212 | "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n", 213 | "print(all_df_y.head(5))\n", 214 | "\n", 215 | "#读取1个通道的TMP数据,每个通道包含32×40=1280个信号样本\n", 216 | "#每个样本向量大小为8064点(63s*128Hz)\n", 217 | "all_df_TMP_x = pickle.load(open(\"./dump_file/all_df_TMP_x\",\"rb\"))\n", 218 | "#y = all_df_y[['2cArousal']]\n", 219 | "y = all_df_y[['2cValence']]\n", 220 | "for seed in [0,100,200,300,400,500,600,700,800,900]:\n", 221 | " print(\"[seed:{}]****************************************************\".format(seed))\n", 222 | " xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx_{}\".format(seed),\"rb\"))\n", 223 | " xTestIdx = pickle.load(open(\"./dump_file/xTestIdx_{}\".format(seed),\"rb\"))\n", 224 | " y_tr = y.loc[xTrainIdx]\n", 225 | " y_te = y.loc[xTestIdx]\n", 226 | "\n", 227 | " TMPTrainSet = all_df_TMP_x.loc[xTrainIdx]\n", 228 | " TMPTestSet = all_df_TMP_x.loc[xTestIdx]\n", 229 | "\n", 230 | " myWindowsSize = 256\n", 231 | " myStrideSize = 64\n", 232 | " gcf = gcForest(shape_1X=8064, window=myWindowsSize, stride=myStrideSize,tolerance=0.0,n_cascadeRF=1, \n", 233 | " min_samples_mgs=0.1, min_samples_cascade=0.1,n_jobs=19)\n", 234 | " if True: \n", 235 | " print(\"TMP running multi-grain scan\")\n", 236 | " xTrain,yTrain = TMPTrainSet.values,y_tr.values\n", 237 | " xTest = TMPTestSet.values \n", 238 | " TMP_mgsTrainVector = gcf.mg_scanning(xTrain,yTrain)\n", 239 | " TMP_mgsTestVector = gcf.mg_scanning(xTest)\n", 240 | " filePath = \"./dump_file_V2/TMP_mgsTrainVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n", 241 | " pickle.dump(TMP_mgsTrainVector,open(filePath,\"wb\"))\n", 242 | " filePath = \"./dump_file_V2/TMP_mgsTestVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n", 243 | " pickle.dump(TMP_mgsTestVector,open(filePath,\"wb\"))\n", 244 | " else:\n", 245 | " filePath = \"./dump_file_V2/TMP_mgsTrainVector_{}_{}\".format(myWindowsSize,myStrideSize)\n", 246 | " TMP_mgsTrainVector = pickle.load(open(filePath,\"rb\"))\n", 247 | " filePath = \"./dump_file_V2/TMP_mgsTestVector_{}_{}\".format(myWindowsSize,myStrideSize)\n", 248 | " TMP_mgsTestVector = pickle.load(open(filePath,\"rb\"))\n", 249 | "\n", 250 | " X_tr_vector = TMP_mgsTrainVector\n", 251 | " X_te_vector = TMP_mgsTestVector\n", 252 | " print(X_tr_vector.shape)\n", 253 | " print(X_te_vector.shape)\n", 254 | "\n", 255 | " #有缺失值,填充下\n", 256 | " X_tr_vector_fillna= pd.DataFrame(X_tr_vector).fillna(0).values\n", 257 | " X_te_vector_fillna= pd.DataFrame(X_te_vector).fillna(0).values\n", 258 | "\n", 259 | " _ = gcf.cascade_forest(X_tr_vector_fillna, y_tr)\n", 260 | "\n", 261 | " pred_proba = gcf.cascade_forest(X_te_vector_fillna)\n", 262 | " tmp = np.mean(pred_proba, axis=0)\n", 263 | " preds = np.argmax(tmp, axis=1)\n", 264 | " print(\"ACC\",accuracy_score(y_true=y_te, y_pred=preds))\n", 265 | " print(\"F1\",f1_score(y_true=y_te, y_pred=preds))\n", 266 | " print(\"Recal\",recall_score(y_true=y_te, y_pred=preds))\n", 267 | " print(\"Precision\",precision_score(y_true=y_te, y_pred=preds))" 268 | ] 269 | } 270 | ], 271 | "metadata": { 272 | "kernelspec": { 273 | "display_name": "Python 3", 274 | "language": "python", 275 | "name": "python3" 276 | }, 277 | "language_info": { 278 | "codemirror_mode": { 279 | "name": "ipython", 280 | "version": 3 281 | }, 282 | "file_extension": ".py", 283 | "mimetype": "text/x-python", 284 | "name": "python", 285 | "nbconvert_exporter": "python", 286 | "pygments_lexer": "ipython3", 287 | "version": "3.6.5" 288 | } 289 | }, 290 | "nbformat": 4, 291 | "nbformat_minor": 2 292 | } 293 | -------------------------------------------------------------------------------- /EEG_feat_extract.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Apr 26 14:39:31 2018 4 | 5 | @author: jinyu 6 | """ 7 | 8 | import pandas as pd 9 | import numpy as np 10 | from sklearn.model_selection import GroupKFold 11 | import pickle 12 | import matplotlib.pyplot as plt 13 | import warnings 14 | warnings.filterwarnings("ignore") 15 | 16 | #read file 17 | for eeg_CH in range(1,33,1): 18 | 19 | file_path = "./dump_file/CH{}_df_EEG_x".format(eeg_CH) 20 | #locals()['CH{}_df_EEG_x'.format(eeg_CH)] = pickle.load(open(file_path),"rb") 21 | df_data = pickle.load(open(file_path,"rb")) 22 | def eeg_mean(df): 23 | return df.mean(axis=1) 24 | 25 | def eeg_median(df): 26 | return df.median(axis=1) 27 | 28 | def eeg_std(df): 29 | return df.std(axis=1) 30 | 31 | def eeg_min(df): 32 | return df.min(axis=1) 33 | 34 | def eeg_max(df): 35 | return df.max(axis=1) 36 | 37 | def eeg_range(df_max,df_min,eeg_CH): 38 | return df_max['CH{}eeg_max'.format(eeg_CH)]-df_min['CH{}eeg_min'.format(eeg_CH)] 39 | 40 | #最小值比率 = Mmin/N 41 | def eeg_minRatio(all_df,eeg_min,eeg_CH): 42 | all_df_T = all_df.T 43 | eeg_min_T = eeg_min.T 44 | eeg_minRatio_dict = {} 45 | for i in all_df.index.tolist(): 46 | num_min = len( all_df_T[i][ all_df_T[i] == eeg_min_T.get_value(index='CH{}eeg_min'.format(eeg_CH),col=i)] ) 47 | eeg_minRatio_dict.update({i:num_min/8064.0}) 48 | eeg_minRatio_df = pd.DataFrame.from_dict(data=eeg_minRatio_dict,orient='index') 49 | eeg_minRatio_df.columns = ['CH{}eeg_minRatio'.format(eeg_CH)] 50 | return eeg_minRatio_df 51 | 52 | #最大值比率 = Nmax/N 53 | def eeg_maxRatio(all_df,eeg_max,eeg_CH): 54 | all_df_T = all_df.T 55 | eeg_max_T = eeg_max.T 56 | eeg_maxRatio_dict = {} 57 | for i in all_df.index.tolist(): 58 | num_max = len( all_df_T[i][ all_df_T[i] == eeg_max_T.get_value(index='CH{}eeg_max'.format(eeg_CH),col=i)] ) 59 | eeg_maxRatio_dict.update({i:num_max/8064.0}) 60 | eeg_maxRatio_df = pd.DataFrame.from_dict(data=eeg_maxRatio_dict,orient='index') 61 | eeg_maxRatio_df.columns = ['CH{}eeg_maxRatio'.format(eeg_CH)] 62 | return eeg_maxRatio_df 63 | 64 | #EEG一阶差分均值 65 | def eeg1Diff_mean(all_df): 66 | eeg1Diff_mean = all_df.diff(periods=1,axis=1).dropna(axis=1).mean(axis=1) 67 | return eeg1Diff_mean 68 | 69 | #EEG一阶差分中值 70 | def eeg1Diff_median(all_df): 71 | eeg1Diff_median = all_df.diff(periods=1,axis=1).dropna(axis=1).median(axis=1) 72 | return eeg1Diff_median 73 | 74 | #EEG一阶差分标准差 75 | def eeg1Diff_std(all_df): 76 | eeg1Diff_std = all_df.diff(periods=1,axis=1).dropna(axis=1).std(axis=1) 77 | return eeg1Diff_std 78 | 79 | def eeg1Diff_min(all_df): 80 | eeg1Diff_min = all_df.diff(periods=1,axis=1).dropna(axis=1).min(axis=1) 81 | return eeg1Diff_min 82 | 83 | def eeg1Diff_max(all_df): 84 | eeg1Diff_max = all_df.diff(periods=1,axis=1).dropna(axis=1).max(axis=1) 85 | return eeg1Diff_max 86 | 87 | def eeg1Diff_range(eeg1Diff_max,eeg1Diff_min,eeg_CH): 88 | return eeg1Diff_max['CH{}eeg1Diff_max'.format(eeg_CH)]-eeg1Diff_min['CH{}eeg1Diff_min'.format(eeg_CH)] 89 | 90 | def eeg1Diff_minRatio(all_df,eeg1Diff_min,eeg_CH): 91 | all_df_Diff_T = all_df.diff(periods=1,axis=1).dropna(axis=1).T 92 | eeg1Diff_min_T = eeg1Diff_min.T 93 | eeg1Diff_minRatio_dict = {} 94 | for i in all_df.index.tolist(): 95 | num_min = len( all_df_Diff_T[i][ all_df_Diff_T[i] == eeg1Diff_min_T.get_value(index='CH{}eeg1Diff_min'.format(eeg_CH),col=i)]) 96 | eeg1Diff_minRatio_dict.update({i:num_min/8063.0}) 97 | eeg1Diff_minRatio_df = pd.DataFrame.from_dict(data=eeg1Diff_minRatio_dict,orient='index') 98 | return eeg1Diff_minRatio_df 99 | 100 | def eeg1Diff_maxRatio(all_df,eeg1Diff_max,eeg_CH): 101 | all_df_Diff_T = all_df.diff(periods=1,axis=1).dropna(axis=1).T 102 | eeg1Diff_max_T = eeg1Diff_max.T 103 | eeg1Diff_maxRatio_dict = {} 104 | for i in all_df.index.tolist(): 105 | num_max = len( all_df_Diff_T[i][all_df_Diff_T[i] == eeg1Diff_max_T.get_value(index='CH{}eeg1Diff_max'.format(eeg_CH),col=i)]) 106 | eeg1Diff_maxRatio_dict.update({i:num_max/8063.0}) 107 | eeg1Diff_maxRatio_df = pd.DataFrame.from_dict(data=eeg1Diff_maxRatio_dict,orient='index') 108 | return eeg1Diff_maxRatio_df 109 | 110 | def eeg2Diff_std(all_df): 111 | eeg2Diff_std = all_df.diff(periods=2,axis=1).dropna(axis=1).std(axis=1) 112 | return eeg2Diff_std 113 | 114 | def eeg2Diff_min(all_df): 115 | eeg2Diff_min = all_df.diff(periods=2,axis=1).dropna(axis=1).min(axis=1) 116 | return eeg2Diff_min 117 | 118 | def eeg2Diff_max(all_df): 119 | eeg2Diff_max = all_df.diff(periods=2,axis=1).dropna(axis=1).max(axis=1) 120 | return eeg2Diff_max 121 | 122 | def eeg2Diff_range(eeg2Diff_max,eeg2Diff_min,eeg_CH): 123 | eeg2Diff_range = eeg2Diff_max['CH{}eeg2Diff_max'.format(eeg_CH)]-eeg2Diff_min['CH{}eeg2Diff_min'.format(eeg_CH)] 124 | return eeg2Diff_range 125 | 126 | def eeg2Diff_minRatio(all_df,eeg2Diff_min,eeg_CH): 127 | all_df_2Diff_T = all_df.diff(periods=2,axis=1).dropna(axis=1).T 128 | eeg2Diff_min_T = eeg2Diff_min.T 129 | eeg2Diff_minRatio_dict = {} 130 | for i in all_df.index.tolist(): 131 | num_min = len( all_df_2Diff_T[i][all_df_2Diff_T[i] == eeg2Diff_min_T.get_value(index='CH{}eeg2Diff_min'.format(eeg_CH),col=i)] ) 132 | eeg2Diff_minRatio_dict.update({i:num_min/8062.0}) 133 | eeg2Diff_minRatio_df = pd.DataFrame.from_dict(data=eeg2Diff_minRatio_dict,orient='index') 134 | return eeg2Diff_minRatio_df 135 | 136 | def eeg2Diff_maxRatio(all_df,eeg2Diff_max,eeg_CH): 137 | all_df_2Diff_T = all_df.diff(periods=2,axis=1).dropna(axis=1).T 138 | eeg2Diff_max_T = eeg2Diff_max.T 139 | eeg2Diff_maxRatio_dict = {} 140 | for i in all_df.index.tolist(): 141 | num_max = len( all_df_2Diff_T[i][all_df_2Diff_T[i] == eeg2Diff_max_T.get_value(index='CH{}eeg2Diff_max'.format(eeg_CH),col=i)] ) 142 | eeg2Diff_maxRatio_dict.update({i:num_max/8062.0}) 143 | eeg2Diff_maxRatio_df = pd.DataFrame.from_dict(data=eeg2Diff_maxRatio_dict,orient='index') 144 | return eeg2Diff_maxRatio_df 145 | 146 | #EEG DFT(FFT)频域数据 147 | def eegfft(df_data): 148 | eegfft_df = pd.DataFrame() 149 | for i in df_data.index.tolist(): 150 | temp_eegfft = pd.DataFrame(np.fft.fft(df_data.loc[i,:].values)).T 151 | temp_eegfft.index = [i] 152 | eegfft_df = eegfft_df.append(temp_eegfft) 153 | return eegfft_df 154 | 155 | #EEG 频域中值 156 | def eegfft_mean(eegfft_df): 157 | eegfft_mean = eegfft_df.mean(axis=1) 158 | return eegfft_mean 159 | 160 | def eegfft_median(eegfft_df): 161 | eegfft_median = eegfft_df.median(axis=1) 162 | return eegfft_median 163 | 164 | def eegfft_std(eegfft_df): 165 | eegfft_std = eegfft_df.std(axis=1) 166 | return eegfft_std 167 | 168 | def eegfft_min(eegfft_df): 169 | eegfft_min = eegfft_df.min(axis=1) 170 | return eegfft_min 171 | 172 | def eegfft_max(eegfft_df): 173 | eegfft_max = eegfft_df.max(axis=1) 174 | return eegfft_max 175 | 176 | def eegfft_range(eegfft_max,eegfft_min,eeg_CH): 177 | eegfft_range = eegfft_max['CH{}eegfft_max'.format(eeg_CH)]-eegfft_min['CH{}eegfft_min'.format(eeg_CH)] 178 | return eegfft_range 179 | 180 | ##########################提取EEG统计特征部分############################# 181 | #df_data = locals()['CH{}_df_EEG_x'.format(eeg_CH)] 182 | if True : 183 | eeg_mean = pd.DataFrame(eeg_mean(df_data),columns=['CH{}eeg_mean'.format(eeg_CH)]) 184 | eeg_median = pd.DataFrame(eeg_median(df_data),columns=['CH{}eeg_median'.format(eeg_CH)]) 185 | eeg_std = pd.DataFrame(eeg_std(df_data),columns=['CH{}eeg_std'.format(eeg_CH)]) 186 | eeg_min = pd.DataFrame(eeg_min(df_data),columns=['CH{}eeg_min'.format(eeg_CH)]) 187 | eeg_max = pd.DataFrame(eeg_max(df_data),columns=['CH{}eeg_max'.format(eeg_CH)]) 188 | eeg_range = pd.DataFrame(eeg_range(eeg_max,eeg_min,eeg_CH),columns=['CH{}eeg_range'.format(eeg_CH)]) 189 | eeg_minRatio = pd.DataFrame(eeg_minRatio(df_data,eeg_min,eeg_CH),columns=['CH{}eeg_minRatio'.format(eeg_CH)]) 190 | eeg_maxRatio = pd.DataFrame(eeg_maxRatio(df_data,eeg_max,eeg_CH),columns=['CH{}eeg_maxRatio'.format(eeg_CH)]) 191 | 192 | eeg1Diff_mean = pd.DataFrame( eeg1Diff_mean(df_data),columns=['CH{}eeg1Diff_mean'.format(eeg_CH)]) 193 | eeg1Diff_median = pd.DataFrame( eeg1Diff_median(df_data),columns=['CH{}eeg1Diff_median'.format(eeg_CH)] ) 194 | eeg1Diff_std = pd.DataFrame( eeg1Diff_std(df_data),columns=['CH{}eeg1Diff_std'.format(eeg_CH)]) 195 | eeg1Diff_min = pd.DataFrame( eeg1Diff_min(df_data),columns=['CH{}eeg1Diff_min'.format(eeg_CH)]) 196 | eeg1Diff_max = pd.DataFrame( eeg1Diff_max(df_data),columns=['CH{}eeg1Diff_max'.format(eeg_CH)]) 197 | eeg1Diff_range = pd.DataFrame( eeg1Diff_range(eeg1Diff_max,eeg1Diff_min,eeg_CH),columns=['CH{}eeg1Diff_range'.format(eeg_CH)]) 198 | eeg1Diff_minRatio = eeg1Diff_minRatio(df_data,eeg1Diff_min,eeg_CH) 199 | eeg1Diff_minRatio.columns=['CH{}eeg1Diff_minRatio'.format(eeg_CH)] 200 | eeg1Diff_maxRatio = eeg1Diff_maxRatio(df_data,eeg1Diff_max,eeg_CH) 201 | eeg1Diff_maxRatio.columns=['CH{}eeg1Diff_maxRatio'.format(eeg_CH)] 202 | 203 | eeg2Diff_std = pd.DataFrame( eeg2Diff_std(df_data),columns=['CH{}eeg2Diff_std'.format(eeg_CH)] ) 204 | eeg2Diff_min = pd.DataFrame( eeg2Diff_min(df_data),columns=['CH{}eeg2Diff_min'.format(eeg_CH)] ) 205 | eeg2Diff_max = pd.DataFrame( eeg2Diff_max(df_data),columns=['CH{}eeg2Diff_max'.format(eeg_CH)] ) 206 | eeg2Diff_range = pd.DataFrame(eeg2Diff_range(eeg2Diff_max,eeg2Diff_min,eeg_CH),columns=['CH{}eeg2Diff_range'.format(eeg_CH)]) 207 | eeg2Diff_minRatio = eeg2Diff_minRatio(df_data,eeg2Diff_min,eeg_CH) 208 | eeg2Diff_minRatio.columns=['CH{}eeg2Diff_minRatio'.format(eeg_CH)] 209 | eeg2Diff_maxRatio = eeg2Diff_maxRatio(df_data,eeg2Diff_max,eeg_CH) 210 | eeg2Diff_maxRatio.columns=['CH{}eeg2Diff_maxRatio'.format(eeg_CH)] 211 | 212 | #FFT运算比较耗费时间,False直接读取跑过的文件 213 | if False: 214 | file_path = "./dump_file/CH{}eegfft_df".format(eeg_CH) 215 | temp_eegfft = eegfft(df_data) 216 | #locals()["CH{}eegfft_df".format(eeg_CH)] = pd.DataFrame() 217 | locals()["CH{}eegfft_df".format(eeg_CH)] = temp_eegfft 218 | pickle.dump(locals()["CH{}eegfft_df".format(eeg_CH)],open(file_path,"wb")) 219 | eegfft_df = locals()["CH{}eegfft_df".format(eeg_CH)] 220 | else: 221 | file_path = "./dump_file/CH{}eegfft_df".format(eeg_CH) 222 | eegfft_df = pickle.load(open(file_path,"rb")) 223 | 224 | eegfft_mean = pd.DataFrame( eegfft_mean(eegfft_df),columns=['CH{}eegfft_mean'.format(eeg_CH)]) 225 | eegfft_median = pd.DataFrame( eegfft_median(eegfft_df),columns=['CH{}eegfft_median'.format(eeg_CH)]) 226 | eegfft_std = pd.DataFrame( eegfft_std(eegfft_df),columns=['CH{}eegfft_std'.format(eeg_CH)]) 227 | eegfft_min = pd.DataFrame( eegfft_min(eegfft_df),columns=['CH{}eegfft_min'.format(eeg_CH)]) 228 | eegfft_max = pd.DataFrame( eegfft_max(eegfft_df),columns=['CH{}eegfft_max'.format(eeg_CH)]) 229 | eegfft_range = pd.DataFrame( eegfft_range(eegfft_max,eegfft_min,eeg_CH),columns=['CH{}eegfft_range'.format(eeg_CH)]) 230 | 231 | feature_list = ['eeg_mean','eeg_median','eeg_std','eeg_min','eeg_max','eeg_range', 232 | 'eeg_minRatio','eeg_maxRatio','eeg1Diff_mean','eeg1Diff_median', 233 | 'eeg1Diff_std','eeg1Diff_min','eeg1Diff_max','eeg1Diff_range', 234 | 'eeg1Diff_minRatio','eeg1Diff_maxRatio','eeg2Diff_std', 235 | 'eeg2Diff_min','eeg2Diff_max','eeg2Diff_range','eeg2Diff_minRatio', 236 | 'eeg2Diff_maxRatio','eegfft_mean','eegfft_median','eegfft_std', 237 | 'eegfft_min','eegfft_max','eegfft_range'] 238 | #for feat_str in feature_list: 239 | # feat_str = "CH{}".format(eeg_CH)+feat_str 240 | 241 | temp_feature_df = pd.DataFrame() 242 | for i in feature_list: 243 | temp_feature_df = pd.concat( [locals()[i],temp_feature_df],axis=1) 244 | 245 | locals()["CH{}_eeg_feat_df".format(eeg_CH)] = temp_feature_df 246 | file_path = "./dump_file/CH{}_eeg_feat_df".format(eeg_CH) 247 | pickle.dump(locals()["CH{}_eeg_feat_df".format(eeg_CH)],open(file_path,"wb")) 248 | ###################################################################### 249 | 250 | -------------------------------------------------------------------------------- /DEAP_GSR_feature_extract.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Jan 7 21:02:45 2018 4 | 5 | @author: jinyx 6 | """ 7 | 8 | import pandas as pd 9 | import numpy as np 10 | from sklearn.model_selection import GroupKFold 11 | import pickle 12 | import matplotlib.pyplot as plt 13 | from config import * 14 | 15 | def sc_mean(df): 16 | return df.mean(axis=1) 17 | 18 | def sc_median(df): 19 | return df.median(axis=1) 20 | 21 | def sc_std(df): 22 | return df.std(axis=1) 23 | 24 | def sc_min(df): 25 | return df.min(axis=1) 26 | 27 | def sc_max(df): 28 | return df.max(axis=1) 29 | 30 | def sc_range(df_max,df_min): 31 | return df_max['sc_max']-df_min['sc_min'] 32 | 33 | #最小值比率 = Mmin/N 34 | def sc_minRatio(all_df,sc_min): 35 | all_df_T = all_df.T 36 | sc_min_T = sc_min.T 37 | sc_minRatio_dict = {} 38 | for i in all_df.index.tolist(): 39 | num_min = len( all_df_T[i][ all_df_T[i] == sc_min_T.get_value(index='sc_min',col=i)] ) 40 | sc_minRatio_dict.update({i:num_min/8064.0}) 41 | sc_minRatio_df = pd.DataFrame.from_dict(data=sc_minRatio_dict,orient='index') 42 | sc_minRatio_df.columns = ['sc_minRatio'] 43 | return sc_minRatio_df 44 | 45 | #最大值比率 = Nmax/N 46 | def sc_maxRatio(all_df,sc_max): 47 | all_df_T = all_df.T 48 | sc_max_T = sc_max.T 49 | sc_maxRatio_dict = {} 50 | for i in all_df.index.tolist(): 51 | num_max = len( all_df_T[i][ all_df_T[i] == sc_max_T.get_value(index='sc_max',col=i)] ) 52 | sc_maxRatio_dict.update({i:num_max/8064.0}) 53 | sc_maxRatio_df = pd.DataFrame.from_dict(data=sc_maxRatio_dict,orient='index') 54 | sc_maxRatio_df.columns = ['sc_maxRatio'] 55 | return sc_maxRatio_df 56 | 57 | #GSR一阶差分均值 58 | def sc1Diff_mean(all_df): 59 | sc1Diff_mean = all_df.diff(periods=1,axis=1).dropna(axis=1).mean(axis=1) 60 | return sc1Diff_mean 61 | 62 | #GSR一阶差分中值 63 | def sc1Diff_median(all_df): 64 | sc1Diff_median = all_df.diff(periods=1,axis=1).dropna(axis=1).median(axis=1) 65 | return sc1Diff_median 66 | 67 | #GSR一阶差分标准差 68 | def sc1Diff_std(all_df): 69 | sc1Diff_std = all_df.diff(periods=1,axis=1).dropna(axis=1).std(axis=1) 70 | return sc1Diff_std 71 | 72 | def sc1Diff_min(all_df): 73 | sc1Diff_min = all_df.diff(periods=1,axis=1).dropna(axis=1).min(axis=1) 74 | return sc1Diff_min 75 | 76 | def sc1Diff_max(all_df): 77 | sc1Diff_max = all_df.diff(periods=1,axis=1).dropna(axis=1).max(axis=1) 78 | return sc1Diff_max 79 | 80 | def sc1Diff_range(sc1Diff_max,sc1Diff_min): 81 | return sc1Diff_max['sc1Diff_max']-sc1Diff_min['sc1Diff_min'] 82 | 83 | def sc1Diff_minRatio(all_df,sc1Diff_min): 84 | all_df_Diff_T = all_df.diff(periods=1,axis=1).dropna(axis=1).T 85 | sc1Diff_min_T = sc1Diff_min.T 86 | sc1Diff_minRatio_dict = {} 87 | for i in all_df.index.tolist(): 88 | num_min = len( all_df_Diff_T[i][ all_df_Diff_T[i] == sc1Diff_min_T.get_value(index='sc1Diff_min',col=i)]) 89 | sc1Diff_minRatio_dict.update({i:num_min/8063.0}) 90 | sc1Diff_minRatio_df = pd.DataFrame.from_dict(data=sc1Diff_minRatio_dict,orient='index') 91 | return sc1Diff_minRatio_df 92 | 93 | def sc1Diff_maxRatio(all_df,sc1Diff_max): 94 | all_df_Diff_T = all_df.diff(periods=1,axis=1).dropna(axis=1).T 95 | sc1Diff_max_T = sc1Diff_max.T 96 | sc1Diff_maxRatio_dict = {} 97 | for i in all_df.index.tolist(): 98 | num_max = len( all_df_Diff_T[i][all_df_Diff_T[i] == sc1Diff_max_T.get_value(index='sc1Diff_max',col=i)]) 99 | sc1Diff_maxRatio_dict.update({i:num_max/8063.0}) 100 | sc1Diff_maxRatio_df = pd.DataFrame.from_dict(data=sc1Diff_maxRatio_dict,orient='index') 101 | return sc1Diff_maxRatio_df 102 | 103 | def sc2Diff_std(all_df): 104 | sc2Diff_std = all_df.diff(periods=2,axis=1).dropna(axis=1).std(axis=1) 105 | return sc2Diff_std 106 | 107 | def sc2Diff_min(all_df): 108 | sc2Diff_min = all_df.diff(periods=2,axis=1).dropna(axis=1).min(axis=1) 109 | return sc2Diff_min 110 | 111 | def sc2Diff_max(all_df): 112 | sc2Diff_max = all_df.diff(periods=2,axis=1).dropna(axis=1).max(axis=1) 113 | return sc2Diff_max 114 | 115 | def sc2Diff_range(sc2Diff_max,sc2Diff_min): 116 | sc2Diff_range = sc2Diff_max['sc2Diff_max']-sc2Diff_min['sc2Diff_min'] 117 | return sc2Diff_range 118 | 119 | def sc2Diff_minRatio(all_df,sc2Diff_min): 120 | all_df_2Diff_T = all_df.diff(periods=2,axis=1).dropna(axis=1).T 121 | sc2Diff_min_T = sc2Diff_min.T 122 | sc2Diff_minRatio_dict = {} 123 | for i in all_df.index.tolist(): 124 | num_min = len( all_df_2Diff_T[i][all_df_2Diff_T[i] == sc2Diff_min_T.get_value(index='sc2Diff_min',col=i)] ) 125 | sc2Diff_minRatio_dict.update({i:num_min/8062.0}) 126 | sc2Diff_minRatio_df = pd.DataFrame.from_dict(data=sc2Diff_minRatio_dict,orient='index') 127 | return sc2Diff_minRatio_df 128 | 129 | def sc2Diff_maxRatio(all_df,sc2Diff_max): 130 | all_df_2Diff_T = all_df.diff(periods=2,axis=1).dropna(axis=1).T 131 | sc2Diff_max_T = sc2Diff_max.T 132 | sc2Diff_maxRatio_dict = {} 133 | for i in all_df.index.tolist(): 134 | num_max = len( all_df_2Diff_T[i][all_df_2Diff_T[i] == sc2Diff_max_T.get_value(index='sc2Diff_max',col=i)] ) 135 | sc2Diff_maxRatio_dict.update({i:num_max/8062.0}) 136 | sc2Diff_maxRatio_df = pd.DataFrame.from_dict(data=sc2Diff_maxRatio_dict,orient='index') 137 | return sc2Diff_maxRatio_df 138 | 139 | #GSR DFT(FFT)频域数据 140 | def scfft(all_df): 141 | scfft_df = pd.DataFrame() 142 | for i in all_df_GSR_x.index.tolist(): 143 | temp_scfft = pd.DataFrame(np.fft.fft(all_df_GSR_x.loc[i,:].values)).T 144 | temp_scfft.index = [i] 145 | scfft_df = scfft_df.append(temp_scfft) 146 | return scfft_df 147 | 148 | #GSR 频域中值 149 | def scfft_mean(scfft_df): 150 | scfft_mean = scfft_df.mean(axis=1) 151 | return scfft_mean 152 | 153 | def scfft_median(scfft_df): 154 | scfft_median = scfft_df.median(axis=1) 155 | return scfft_median 156 | 157 | def scfft_std(scfft_df): 158 | scfft_std = scfft_df.std(axis=1) 159 | return scfft_std 160 | 161 | def scfft_min(scfft_df): 162 | scfft_min = scfft_df.min(axis=1) 163 | return scfft_min 164 | 165 | def scfft_max(scfft_df): 166 | scfft_max = scfft_df.max(axis=1) 167 | return scfft_max 168 | 169 | def scfft_range(scfft_max,scfft_min): 170 | scfft_range = scfft_max['scfft_max']-scfft_min['scfft_min'] 171 | return scfft_range 172 | 173 | def get_123count(df): 174 | tmp_df =pd.DataFrame() 175 | for i in range(0,40,1): 176 | num_1 = len(df[i][ df[i]==1 ]) 177 | num_2 = len(df[i][ df[i]==2 ]) 178 | num_3 = len(df[i][ df[i]==3 ]) 179 | list_num = [num_1,num_2,num_3] 180 | tmp_df = pd.concat([tmp_df,pd.DataFrame(list_num)],axis=1) 181 | tmp_df.columns = range(0,40,1) 182 | tmp_df.index = ['num_1','num_2','num_3'] 183 | return tmp_df 184 | 185 | 186 | 187 | if __name__ == '__main__': 188 | #read file 189 | all_df_y = pickle.load(open("./dump_file/all_df_y","rb")) 190 | all_df_GSR_x = pickle.load(open("./dump_file/all_df_GSR_x","rb")) 191 | 192 | ########################################################################### 193 | if True : 194 | sc_mean = pd.DataFrame(sc_mean(all_df_GSR_x),columns=['sc_mean']) 195 | sc_median = pd.DataFrame(sc_median(all_df_GSR_x),columns=['sc_median']) 196 | sc_std = pd.DataFrame(sc_std(all_df_GSR_x),columns=['sc_std']) 197 | sc_min = pd.DataFrame(sc_min(all_df_GSR_x),columns=['sc_min']) 198 | sc_max = pd.DataFrame(sc_max(all_df_GSR_x),columns=['sc_max']) 199 | sc_range = pd.DataFrame(sc_range(sc_max,sc_min),columns=['sc_range']) 200 | sc_minRatio = pd.DataFrame(sc_minRatio(all_df_GSR_x,sc_min),columns=['sc_minRatio']) 201 | sc_maxRatio = pd.DataFrame(sc_maxRatio(all_df_GSR_x,sc_max),columns=['sc_maxRatio']) 202 | 203 | sc1Diff_mean = pd.DataFrame( sc1Diff_mean(all_df_GSR_x),columns=['sc1Diff_mean']) 204 | sc1Diff_median = pd.DataFrame( sc1Diff_median(all_df_GSR_x),columns=['sc1Diff_median'] ) 205 | sc1Diff_std = pd.DataFrame( sc1Diff_std(all_df_GSR_x),columns=['sc1Diff_std']) 206 | sc1Diff_min = pd.DataFrame( sc1Diff_min(all_df_GSR_x),columns=['sc1Diff_min']) 207 | sc1Diff_max = pd.DataFrame( sc1Diff_max(all_df_GSR_x),columns=['sc1Diff_max']) 208 | sc1Diff_range = pd.DataFrame( sc1Diff_range(sc1Diff_max,sc1Diff_min),columns=['sc1Diff_range']) 209 | sc1Diff_minRatio = sc1Diff_minRatio(all_df_GSR_x,sc1Diff_min) 210 | sc1Diff_minRatio.columns=['sc1Diff_minRatio'] 211 | sc1Diff_maxRatio = sc1Diff_maxRatio(all_df_GSR_x,sc1Diff_max) 212 | sc1Diff_maxRatio.columns=['sc1Diff_maxRatio'] 213 | 214 | sc2Diff_std = pd.DataFrame( sc2Diff_std(all_df_GSR_x),columns=['sc2Diff_std'] ) 215 | sc2Diff_min = pd.DataFrame( sc2Diff_min(all_df_GSR_x),columns=['sc2Diff_min'] ) 216 | sc2Diff_max = pd.DataFrame( sc2Diff_max(all_df_GSR_x),columns=['sc2Diff_max'] ) 217 | sc2Diff_range = pd.DataFrame(sc2Diff_range(sc2Diff_max,sc2Diff_min),columns=['sc2Diff_range']) 218 | sc2Diff_minRatio = sc2Diff_minRatio(all_df_GSR_x,sc2Diff_min) 219 | sc2Diff_minRatio.columns=['sc2Diff_minRatio'] 220 | sc2Diff_maxRatio = sc2Diff_maxRatio(all_df_GSR_x,sc2Diff_max) 221 | sc2Diff_maxRatio.columns=['sc2Diff_maxRatio'] 222 | 223 | if False: 224 | scfft_df = scfft(all_df_GSR_x) 225 | pickle.dump(scfft_df,open("./dump_file/scfft_df","wb")) 226 | else: 227 | scfft_df = pickle.load(open("./dump_file/scfft_df","rb")) 228 | 229 | scfft_mean = pd.DataFrame( scfft_mean(scfft_df),columns=['scfft_mean']) 230 | scfft_median = pd.DataFrame( scfft_median(scfft_df),columns=['scfft_median']) 231 | scfft_std = pd.DataFrame( scfft_std(scfft_df),columns=['scfft_std']) 232 | scfft_min = pd.DataFrame( scfft_min(scfft_df),columns=['scfft_min']) 233 | scfft_max = pd.DataFrame( scfft_max(scfft_df),columns=['scfft_max']) 234 | scfft_range = pd.DataFrame( scfft_range(scfft_max,scfft_min),columns=['scfft_range']) 235 | 236 | feature_list = ['sc_mean','sc_median','sc_std','sc_min','sc_max','sc_range', 237 | 'sc_minRatio','sc_maxRatio','sc1Diff_mean','sc1Diff_median', 238 | 'sc1Diff_std','sc1Diff_min','sc1Diff_max','sc1Diff_range', 239 | 'sc1Diff_minRatio','sc1Diff_maxRatio','sc2Diff_std', 240 | 'sc2Diff_min','sc2Diff_max','sc2Diff_range','sc2Diff_minRatio', 241 | 'sc2Diff_maxRatio','scfft_mean','scfft_median','scfft_std', 242 | 'scfft_min','scfft_max','scfft_range'] 243 | temp_feature_df = pd.DataFrame() 244 | for i in feature_list: 245 | temp_feature_df = pd.concat( [locals()[i],temp_feature_df],axis=1) 246 | 247 | GSR_feature_df = temp_feature_df 248 | pickle.dump(GSR_feature_df,open("./dump_file/GSR_feature_df","wb")) 249 | ###################################################################### 250 | 251 | if True: 252 | ''' 253 | print(all_df_y) 254 | all_df_y_copy = all_df_y.copy() 255 | all_df_y_copy['emotion'] = 0 256 | all_df_y_copy['emotion'][ all_df_y_copy['valence'] >= 6] = 2 257 | all_df_y_copy['emotion'][ (all_df_y_copy['valence'] < 6) & (all_df_y_copy['valence'] >= 4)] = 1 258 | all_df_y_copy['emotion'][ all_df_y_copy['valence'] < 4] = 0 259 | all_df_y_mutiLable = all_df_y_copy[['emotion']] 260 | pickle.dump(all_df_y_mutiLable,open("./dump_file/all_df_y_mutiLable","wb")) 261 | ''' 262 | print(all_df_y) 263 | all_df_y_copy = all_df_y.copy() 264 | all_df_y_copy['emotion'] = 0 265 | all_df_y_copy['emotion'][ (all_df_y_copy['valence'] >= 5) & (all_df_y_copy['arousal'] >= 5)] = 0 266 | all_df_y_copy['emotion'][ (all_df_y_copy['valence'] < 5) & (all_df_y_copy['arousal'] >= 5)] = 1 267 | all_df_y_copy['emotion'][ (all_df_y_copy['valence'] < 5) & (all_df_y_copy['arousal'] < 5)] = 2 268 | all_df_y_copy['emotion'][ (all_df_y_copy['valence'] >= 5) & (all_df_y_copy['arousal'] < 5)] = 3 269 | all_df_y_mutiLable = all_df_y_copy[['emotion']] 270 | pickle.dump(all_df_y_mutiLable,open("./dump_file/all_df_y_mutiLable","wb")) 271 | 272 | all_df_y_copy = all_df_y.copy() 273 | all_df_y_copy['emotion_2'] = 0 274 | all_df_y_copy['emotion_2'][(all_df_y_copy['valence'] >= 5) & (all_df_y_copy['arousal'] >= 5)] = 1 275 | all_df_y_2c = all_df_y_copy[['emotion_2']] 276 | pickle.dump(all_df_y_2c,open("./dump_file/all_df_y_2c","wb")) 277 | 278 | all_df_y_copy = all_df_y.copy() 279 | all_df_y_valence = all_df_y_copy[['valence']] 280 | pickle.dump(all_df_y_valence,open("./dump_file/all_df_y_valence","wb")) 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | -------------------------------------------------------------------------------- /GCF_2cRSP__TimeDomain.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import multiprocessing\n", 10 | "from GCForest import gcForest\n", 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "import pickle \n", 14 | "import matplotlib.pyplot as plt\n", 15 | "from sklearn.model_selection import train_test_split\n", 16 | "from sklearn.model_selection import StratifiedKFold\n", 17 | "from sklearn.preprocessing import MinMaxScaler\n", 18 | "from sklearn.metrics import accuracy_score\n", 19 | "from sklearn.metrics import f1_score\n", 20 | "from sklearn.metrics import precision_score\n", 21 | "from sklearn.metrics import recall_score\n", 22 | "from sklearn.preprocessing import Imputer\n", 23 | "import warnings\n", 24 | "warnings.filterwarnings(\"ignore\")\n", 25 | "#用来计算程序运行时间\n", 26 | "import datetime\n", 27 | "starttime = datetime.datetime.now()" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | " valence arousal dominance liking 2cArousal 2cValence\n", 40 | "s01_0 7.71 7.60 6.90 7.83 1 1\n", 41 | "s01_1 8.10 7.31 7.28 8.47 1 1\n", 42 | "s01_2 8.58 7.54 9.00 7.08 1 1\n", 43 | "s01_3 4.94 6.01 6.12 8.06 0 0\n", 44 | "s01_4 6.96 3.92 7.19 6.05 1 1\n", 45 | "[seed:0]****************************************************\n", 46 | "RSP running multi-grain scan\n", 47 | "Slicing Sequence...\n", 48 | "Training MGS Random Forests...\n", 49 | "Slicing Sequence...\n", 50 | "(896, 492)\n", 51 | "(384, 492)\n", 52 | "Adding/Training Layer, n_layer=1\n", 53 | "Layer validation accuracy = 0.65\n", 54 | "Adding/Training Layer, n_layer=2\n", 55 | "Layer validation accuracy = 0.65\n", 56 | "ACC 0.6692708333333334\n", 57 | "F1 0.758095238095238\n", 58 | "Recal 0.9170506912442397\n", 59 | "Precision 0.6461038961038961\n", 60 | "[seed:100]****************************************************\n", 61 | "RSP running multi-grain scan\n", 62 | "Slicing Sequence...\n", 63 | "Training MGS Random Forests...\n", 64 | "Slicing Sequence...\n", 65 | "(896, 492)\n", 66 | "(384, 492)\n", 67 | "Adding/Training Layer, n_layer=1\n", 68 | "Layer validation accuracy = 0.6666666666666666\n", 69 | "Adding/Training Layer, n_layer=2\n", 70 | "Layer validation accuracy = 0.6666666666666666\n", 71 | "ACC 0.6614583333333334\n", 72 | "F1 0.7556390977443609\n", 73 | "Recal 0.9262672811059908\n", 74 | "Precision 0.638095238095238\n", 75 | "[seed:200]****************************************************\n", 76 | "RSP running multi-grain scan\n", 77 | "Slicing Sequence...\n", 78 | "Training MGS Random Forests...\n", 79 | "Slicing Sequence...\n", 80 | "(896, 492)\n", 81 | "(384, 492)\n", 82 | "Adding/Training Layer, n_layer=1\n", 83 | "Layer validation accuracy = 0.6555555555555556\n", 84 | "Adding/Training Layer, n_layer=2\n", 85 | "Layer validation accuracy = 0.6444444444444445\n", 86 | "ACC 0.6197916666666666\n", 87 | "F1 0.7276119402985074\n", 88 | "Recal 0.8986175115207373\n", 89 | "Precision 0.6112852664576802\n", 90 | "[seed:300]****************************************************\n", 91 | "RSP running multi-grain scan\n", 92 | "Slicing Sequence...\n", 93 | "Training MGS Random Forests...\n", 94 | "Slicing Sequence...\n", 95 | "(896, 492)\n", 96 | "(384, 492)\n", 97 | "Adding/Training Layer, n_layer=1\n", 98 | "Layer validation accuracy = 0.6444444444444445\n", 99 | "Adding/Training Layer, n_layer=2\n", 100 | "Layer validation accuracy = 0.65\n", 101 | "Adding/Training Layer, n_layer=3\n", 102 | "Layer validation accuracy = 0.65\n", 103 | "ACC 0.671875\n", 104 | "F1 0.7604562737642586\n", 105 | "Recal 0.9216589861751152\n", 106 | "Precision 0.6472491909385113\n", 107 | "[seed:400]****************************************************\n", 108 | "RSP running multi-grain scan\n", 109 | "Slicing Sequence...\n", 110 | "Training MGS Random Forests...\n", 111 | "Slicing Sequence...\n", 112 | "(896, 492)\n", 113 | "(384, 492)\n", 114 | "Adding/Training Layer, n_layer=1\n", 115 | "Layer validation accuracy = 0.6444444444444445\n", 116 | "Adding/Training Layer, n_layer=2\n", 117 | "Layer validation accuracy = 0.6444444444444445\n", 118 | "ACC 0.6432291666666666\n", 119 | "F1 0.7419962335216571\n", 120 | "Recal 0.9078341013824884\n", 121 | "Precision 0.6273885350318471\n", 122 | "[seed:500]****************************************************\n", 123 | "RSP running multi-grain scan\n", 124 | "Slicing Sequence...\n", 125 | "Training MGS Random Forests...\n", 126 | "Slicing Sequence...\n", 127 | "(896, 492)\n", 128 | "(384, 492)\n", 129 | "Adding/Training Layer, n_layer=1\n", 130 | "Layer validation accuracy = 0.6611111111111111\n", 131 | "Adding/Training Layer, n_layer=2\n", 132 | "Layer validation accuracy = 0.6555555555555556\n", 133 | "ACC 0.6614583333333334\n", 134 | "F1 0.7490347490347491\n", 135 | "Recal 0.8940092165898618\n", 136 | "Precision 0.6445182724252492\n", 137 | "[seed:600]****************************************************\n", 138 | "RSP running multi-grain scan\n", 139 | "Slicing Sequence...\n", 140 | "Training MGS Random Forests...\n", 141 | "Slicing Sequence...\n", 142 | "(896, 492)\n", 143 | "(384, 492)\n", 144 | "Adding/Training Layer, n_layer=1\n", 145 | "Layer validation accuracy = 0.65\n", 146 | "Adding/Training Layer, n_layer=2\n", 147 | "Layer validation accuracy = 0.65\n", 148 | "ACC 0.65625\n", 149 | "F1 0.7509433962264151\n", 150 | "Recal 0.9170506912442397\n", 151 | "Precision 0.6357827476038339\n", 152 | "[seed:700]****************************************************\n", 153 | "RSP running multi-grain scan\n", 154 | "Slicing Sequence...\n", 155 | "Training MGS Random Forests...\n", 156 | "Slicing Sequence...\n", 157 | "(896, 492)\n", 158 | "(384, 492)\n", 159 | "Adding/Training Layer, n_layer=1\n", 160 | "Layer validation accuracy = 0.5666666666666667\n", 161 | "Adding/Training Layer, n_layer=2\n", 162 | "Layer validation accuracy = 0.5666666666666667\n", 163 | "ACC 0.671875\n", 164 | "F1 0.7649253731343285\n", 165 | "Recal 0.9447004608294931\n", 166 | "Precision 0.6426332288401254\n", 167 | "[seed:800]****************************************************\n", 168 | "RSP running multi-grain scan\n", 169 | "Slicing Sequence...\n", 170 | "Training MGS Random Forests...\n", 171 | "Slicing Sequence...\n", 172 | "(896, 492)\n", 173 | "(384, 492)\n", 174 | "Adding/Training Layer, n_layer=1\n", 175 | "Layer validation accuracy = 0.6222222222222222\n", 176 | "Adding/Training Layer, n_layer=2\n", 177 | "Layer validation accuracy = 0.6222222222222222\n", 178 | "ACC 0.6484375\n", 179 | "F1 0.7438330170777987\n", 180 | "Recal 0.9032258064516129\n", 181 | "Precision 0.632258064516129\n", 182 | "[seed:900]****************************************************\n", 183 | "RSP running multi-grain scan\n", 184 | "Slicing Sequence...\n", 185 | "Training MGS Random Forests...\n", 186 | "Slicing Sequence...\n", 187 | "(896, 492)\n", 188 | "(384, 492)\n", 189 | "Adding/Training Layer, n_layer=1\n", 190 | "Layer validation accuracy = 0.6388888888888888\n", 191 | "Adding/Training Layer, n_layer=2\n", 192 | "Layer validation accuracy = 0.65\n", 193 | "Adding/Training Layer, n_layer=3\n", 194 | "Layer validation accuracy = 0.6388888888888888\n", 195 | "ACC 0.6640625\n", 196 | "F1 0.7579737335834896\n", 197 | "Recal 0.9308755760368663\n", 198 | "Precision 0.6392405063291139\n" 199 | ] 200 | } 201 | ], 202 | "source": [ 203 | "#读取Y\n", 204 | "all_df_y = pickle.load(open(\"./dump_file/all_df_y\",\"rb\"))\n", 205 | "all_df_y['2cArousal'] = 0\n", 206 | "all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1\n", 207 | "all_df_y['2cValence'] = 0\n", 208 | "all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1\n", 209 | "print(all_df_y.head(5))\n", 210 | "\n", 211 | "#读取1个通道的RSP数据,每个通道包含32×40=1280个信号样本\n", 212 | "#每个样本向量大小为8064点(63s*128Hz)\n", 213 | "all_df_RSP_x = pickle.load(open(\"./dump_file/all_df_RSP_x\",\"rb\"))\n", 214 | "#y = all_df_y[['2cArousal']]\n", 215 | "y = all_df_y[['2cValence']]\n", 216 | "for seed in [0,100,200,300,400,500,600,700,800,900]:\n", 217 | " print(\"[seed:{}]****************************************************\".format(seed))\n", 218 | " xTrainIdx = pickle.load(open(\"./dump_file/xTrainIdx_{}\".format(seed),\"rb\"))\n", 219 | " xTestIdx = pickle.load(open(\"./dump_file/xTestIdx_{}\".format(seed),\"rb\"))\n", 220 | " y_tr = y.loc[xTrainIdx]\n", 221 | " y_te = y.loc[xTestIdx]\n", 222 | "\n", 223 | " RSPTrainSet = all_df_RSP_x.loc[xTrainIdx]\n", 224 | " RSPTestSet = all_df_RSP_x.loc[xTestIdx]\n", 225 | "\n", 226 | " myWindowsSize = 256\n", 227 | " myStrideSize = 64\n", 228 | " gcf = gcForest(shape_1X=8064, window=myWindowsSize, stride=myStrideSize,tolerance=0.0,n_cascadeRF=1, \n", 229 | " min_samples_mgs=0.1, min_samples_cascade=0.1,n_jobs=19)\n", 230 | " if True: \n", 231 | " print(\"RSP running multi-grain scan\")\n", 232 | " xTrain,yTrain = RSPTrainSet.values,y_tr.values\n", 233 | " xTest = RSPTestSet.values \n", 234 | " RSP_mgsTrainVector = gcf.mg_scanning(xTrain,yTrain)\n", 235 | " RSP_mgsTestVector = gcf.mg_scanning(xTest)\n", 236 | " filePath = \"./dump_file_V2/RSP_mgsTrainVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n", 237 | " pickle.dump(RSP_mgsTrainVector,open(filePath,\"wb\"))\n", 238 | " filePath = \"./dump_file_V2/RSP_mgsTestVector_{}_{}_{}\".format(myWindowsSize,myStrideSize,seed)\n", 239 | " pickle.dump(RSP_mgsTestVector,open(filePath,\"wb\"))\n", 240 | " else:\n", 241 | " filePath = \"./dump_file_V2/RSP_mgsTrainVector_{}_{}\".format(myWindowsSize,myStrideSize)\n", 242 | " RSP_mgsTrainVector = pickle.load(open(filePath,\"rb\"))\n", 243 | " filePath = \"./dump_file_V2/RSP_mgsTestVector_{}_{}\".format(myWindowsSize,myStrideSize)\n", 244 | " RSP_mgsTestVector = pickle.load(open(filePath,\"rb\"))\n", 245 | "\n", 246 | " X_tr_vector = RSP_mgsTrainVector\n", 247 | " X_te_vector = RSP_mgsTestVector\n", 248 | " print(X_tr_vector.shape)\n", 249 | " print(X_te_vector.shape)\n", 250 | "\n", 251 | " #有缺失值,填充下\n", 252 | " X_tr_vector_fillna= pd.DataFrame(X_tr_vector).fillna(0).values\n", 253 | " X_te_vector_fillna= pd.DataFrame(X_te_vector).fillna(0).values\n", 254 | "\n", 255 | " _ = gcf.cascade_forest(X_tr_vector_fillna, y_tr)\n", 256 | "\n", 257 | " pred_proba = gcf.cascade_forest(X_te_vector_fillna)\n", 258 | " tmp = np.mean(pred_proba, axis=0)\n", 259 | " preds = np.argmax(tmp, axis=1)\n", 260 | " print(\"ACC\",accuracy_score(y_true=y_te, y_pred=preds))\n", 261 | " print(\"F1\",f1_score(y_true=y_te, y_pred=preds))\n", 262 | " print(\"Recal\",recall_score(y_true=y_te, y_pred=preds))\n", 263 | " print(\"Precision\",precision_score(y_true=y_te, y_pred=preds))" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 3, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "ACC 0.4739583333333333\n", 276 | "F1 0.4898989898989899\n", 277 | "Recal 0.4470046082949309\n", 278 | "Precision 0.5418994413407822\n" 279 | ] 280 | } 281 | ], 282 | "source": [ 283 | "#preds = np.zeros(shape=(384,))\n", 284 | "#preds = np.ones(shape=(384,))\n", 285 | "preds = np.ones(shape=(384,))\n", 286 | "for i in range(0,384):\n", 287 | " temp = np.random.choice(a=2, size=1, replace=False, p=[0.5,0.5])\n", 288 | " preds[i] = temp\n", 289 | "print(\"ACC\",accuracy_score(y_true=y_te, y_pred=preds))\n", 290 | "print(\"F1\",f1_score(y_true=y_te, y_pred=preds))\n", 291 | "print(\"Recal\",recall_score(y_true=y_te, y_pred=preds))\n", 292 | "print(\"Precision\",precision_score(y_true=y_te, y_pred=preds))" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [] 301 | } 302 | ], 303 | "metadata": { 304 | "kernelspec": { 305 | "display_name": "Python 3", 306 | "language": "python", 307 | "name": "python3" 308 | }, 309 | "language_info": { 310 | "codemirror_mode": { 311 | "name": "ipython", 312 | "version": 3 313 | }, 314 | "file_extension": ".py", 315 | "mimetype": "text/x-python", 316 | "name": "python", 317 | "nbconvert_exporter": "python", 318 | "pygments_lexer": "ipython3", 319 | "version": "3.6.5" 320 | } 321 | }, 322 | "nbformat": 4, 323 | "nbformat_minor": 2 324 | } 325 | --------------------------------------------------------------------------------