├── Pre-Processing.py ├── k_cross_fold_train.py ├── model.py ├── ppnet_utils.py ├── test.py └── train.py /Pre-Processing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import os 4 | import pandas as pd 5 | from scipy import signal 6 | from ppnet_utils import split_train_test 7 | """ 8 | 1、筛选出大于八分钟的数据 9 | 2、将PPG信号划分为6s重叠的8s窗口;按照同样方法从ABP信号获取SBP和DBP 10 | 3、对PPG进行下采样 11 | 4、分成训练集和测试集 12 | """ 13 | 14 | 15 | #将数据分割成8s窗口,其中6s重叠 16 | def spilt_signal(ppg_signal, stride = 250, interval=1000): 17 | 18 | segmentArr = []#1000的窗宽 19 | 20 | start = 0 21 | while start+interval <= len(ppg_signal): 22 | segmentArr.append(ppg_signal[start:start+interval]) #截取1000个数据 23 | start += stride #0,250……以此类推 24 | return np.array(segmentArr) 25 | 26 | 27 | #对8s窗口数据进行下采样,变成250个 28 | #取平均值 29 | def down_sampling(segmentArr, scaling_factor = 4): 30 | downSampArr = [] 31 | for seg in segmentArr: 32 | ds_seg = [] 33 | for i in range(len(seg)): 34 | if i%scaling_factor == 0: 35 | ds_seg.append(np.mean(seg[i:i+4]))#取四个数的平均值 36 | downSampArr.append(ds_seg) 37 | 38 | return np.array(downSampArr) 39 | 40 | 41 | #通过ABP信号获取SBP和DBP信号 42 | def cal_BP(downSamp_ABP): 43 | 44 | SBP =np.max(downSamp_ABP,axis=1).reshape(-1,1) 45 | DBP = np.min(downSamp_ABP,axis=1).reshape(-1,1) 46 | 47 | return np.hstack((SBP,DBP)) 48 | 49 | if __name__ == '__main__': 50 | 51 | minMinutes = 8 # 8 minutes 52 | original_path = '../PPNet/Dataset_orig/'#原始数据路径 53 | 54 | for partID in range(1,5): 55 | cols_ppgRaw = ['partID', 'caseID', 'startT'] + ['f' + str(i) for i in range(1000)] 56 | df_ppg_raw = pd.DataFrame(columns=cols_ppgRaw) 57 | 58 | # cols_ppgDown = ['partID', 'caseID', 'startT'] + ['f' + str(i) for i in range(250)] 59 | # df_ppg_down = pd.DataFrame(columns=cols_ppgDown) 60 | 61 | cols_bpRaw = ['partID', 'caseID', 'startT', 'SBP', 'DBP'] 62 | df_BP_raw = pd.DataFrame(columns=cols_bpRaw) 63 | 64 | # cols_bpDown = ['partID', 'caseID', 'startT', 'SBP', 'DBP'] 65 | # df_BP_down = pd.DataFrame(columns=cols_bpDown) 66 | 67 | readPath = original_path + 'Part_' + str(partID) + '/' # 原始数据的路径 68 | idNum = len(os.listdir(readPath)) 69 | 70 | for caseID in range(1, idNum + 1): 71 | if caseID % 100 == 0: 72 | print('partID:', partID, '; caseID:', caseID) 73 | 74 | fileName = str(caseID) + '.csv' 75 | file = pd.read_csv(readPath + fileName) # 读取文件 76 | 77 | # 去掉PPG和ECG少于八分钟的数据8*60*125=60000个数据 78 | signals = pd.DataFrame(file) 79 | # ECG = np.array(signals['ECG']) 80 | PPG = np.array(signals['PPG']) 81 | ABP = np.array(signals['ABP']) 82 | 83 | minSamples = minMinutes * 60 * 125 # 8 minutes 84 | if len(PPG) >= minSamples: 85 | # Process the ppg signals 86 | segment_PPG = spilt_signal(PPG, stride=250, interval=1000) 87 | downSamp_PPG = down_sampling(segment_PPG, scaling_factor=4) 88 | segment_ABP = spilt_signal(ABP, stride=250, interval=1000) 89 | downSamp_ABP = down_sampling(segment_ABP, scaling_factor=4) 90 | 91 | for i in range(segment_PPG.shape[0]):#逐列添加 92 | 93 | #ppg信号 94 | tmp = [partID, caseID, i * 250] + list(segment_PPG[i]) 95 | df_ppg_raw = df_ppg_raw.append(pd.Series(tmp, index=cols_ppgRaw), ignore_index=True) 96 | # tmp = [partID, caseID, i * 250] + list(downSamp_PPG[i]) 97 | # df_ppg_down = df_ppg_down.append(pd.Series(tmp, index=cols_ppgDown), ignore_index=True) 98 | #BP信号 99 | rawSBP, rawDBP = np.round(np.max(segment_ABP[i]),4), np.round(np.min(segment_ABP[i]),4) 100 | # downSBP, downDBP = np.round(np.max(downSamp_ABP[i]),4), np.round(np.min(downSamp_ABP[i]),4) 101 | 102 | tmp = [partID, caseID, i * 250] + list([rawSBP, rawDBP]) 103 | df_BP_raw = df_BP_raw.append(pd.Series(tmp, index=cols_bpRaw), ignore_index=True) 104 | # tmp = [partID, caseID, i * 250] + list([downSBP, downDBP]) 105 | # df_BP_down = df_BP_down.append(pd.Series(tmp, index=cols_bpDown), ignore_index=True) 106 | 107 | 108 | df_ppg_raw.to_csv('../PPNet/Datasets/df_PPGraw_part' + str(partID) + '.csv', index=False, float_format='%.4f') 109 | # df_ppg_down.to_csv('../PPNet/Datasets/df_PPGdown_part' + str(partID) + '.csv', index=False, float_format='%.4f') 110 | 111 | df_BP_raw.to_csv('../PPNet/Datasets/df_BPraw_part' + str(partID) + '.csv', index=False) 112 | # df_BP_down.to_csv('../PPNet/Datasets/df_BPdown_part' + str(partID) + '.csv', index=False) 113 | 114 | # 115 | # # 分割成训练集和测试集 116 | # split_train_test('../PPNet/', 'train_data.csv', 'labels.csv', 0.1) 117 | # 118 | # 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /k_cross_fold_train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch.nn as nn 4 | import matplotlib.pyplot as plt 5 | import os 6 | import pandas as pd 7 | from torch.utils.data import DataLoader, Dataset, TensorDataset 8 | from model import * 9 | import time 10 | import sys 11 | from ppnet_utils import cal_MAE,cal_RMSE 12 | 13 | 14 | 15 | 16 | #获取k折交叉验证某一折的训练集和验证集 17 | def get_kfold_data(k, i, X, y): 18 | # 返回第 i+1 折 (i = 0 -> k-1) 交叉验证时所需要的训练和验证数据,X_train为训练集,X_valid为验证集 19 | fold_size = X.shape[0] // k # 每份的个数:数据总条数/折数(组数) 20 | 21 | val_start = i * fold_size 22 | if i != k - 1: 23 | val_end = (i + 1) * fold_size 24 | X_valid, y_valid = X[val_start:val_end], y[val_start:val_end] 25 | X_train = torch.cat((X[0:val_start], X[val_end:]), dim=0) 26 | y_train = torch.cat((y[0:val_start], y[val_end:]), dim=0) 27 | else: # 若是最后一折交叉验证 28 | X_valid, y_valid = X[val_start:], y[val_start:] # 若不能整除,将多的case放在最后一折里 29 | X_train = X[0:val_start] 30 | y_train = y[0:val_start] 31 | 32 | return X_train, y_train, X_valid, y_valid 33 | 34 | 35 | 36 | #模型训练 37 | def traink(model, X_train, y_train, X_val, y_val, BATCH_SIZE, learning_rate, TOTAL_EPOCHS): 38 | 39 | #训练数据和测试数据 40 | train_loader = DataLoader(TensorDataset(X_train, y_train), BATCH_SIZE, shuffle=True) 41 | val_loader = DataLoader(TensorDataset(X_val, y_val), BATCH_SIZE, shuffle=True) 42 | #均方误差损失函数 43 | criterion = nn.MSELoss() 44 | #Adam优化器 45 | optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate) 46 | 47 | #训练损失和测试损失 48 | losses = [] 49 | val_losses = [] 50 | 51 | for epoch in range(TOTAL_EPOCHS): 52 | model.train() 53 | 54 | for i, (ppg_data, labels) in enumerate(train_loader): 55 | ppg_data = ppg_data.float() 56 | labels = torch.squeeze(labels.type(torch.FloatTensor)) 57 | # print(ppg_data.size(),labels.size()) #torch.Size([100, 1, 250]) 58 | 59 | optimizer.zero_grad() # 清零 60 | outputs = model(ppg_data) 61 | # print(outputs.size())torch.Size([100, 2]) 62 | 63 | # 计算损失函数 64 | loss = criterion(outputs, labels) 65 | loss.backward() 66 | optimizer.step() 67 | losses.append(loss.item()) 68 | 69 | # if (i + 1) % 1000 == 0: 70 | # 每10个batches打印一次loss 71 | # print('Epoch : %d/%d, Iter : %d/%d, Loss: %.4f' % (epoch + 1, TOTAL_EPOCHS, 72 | # i + 1, len(X_train) // BATCH_SIZE, 73 | # loss.item())) 74 | 75 | #保存最后的模型 76 | if epoch == TOTAL_EPOCHS-1: 77 | print('saving epoch%d model' % (epoch + 1)) 78 | state = { 79 | 'model': model.state_dict(), 80 | 'epoch': epoch + 1 81 | } # 还可以保存优化器这些 82 | if not os.path.isdir('checkpoint'): 83 | os.mkdir('checkpoint') 84 | torch.save(state, './checkpoint/PPNet_epoch_%d.ckpt' % (epoch + 1)) 85 | 86 | 87 | # 每个epoch计算测试集的loss,MAE,RMSE 88 | model.eval() 89 | val_loss = 0 90 | mae_arr_SBP = [] 91 | rmse_arr_SBP = [] 92 | mae_arr_DBP = [] 93 | rmse_arr_DBP = [] 94 | mae_arr_BP = [] 95 | rmse_arr_BP = [] 96 | 97 | eval_dic = {} 98 | 99 | with torch.no_grad(): 100 | for i, (ppg_data, labels) in enumerate(val_loader): 101 | ppg_data = ppg_data.float() 102 | labels = torch.squeeze(labels.type(torch.FloatTensor)) 103 | optimizer.zero_grad() 104 | y_hat = model(ppg_data) 105 | 106 | #测试集的Loss 107 | loss = criterion(y_hat, labels).item() # batch average loss 108 | val_loss += loss * len(labels) # sum up batch loss 109 | #测试集的MAE&RMSE 110 | mae_arr_SBP.append(cal_MAE(y_hat[:,0],labels[:,0])) 111 | rmse_arr_SBP.append(cal_RMSE(y_hat[:,0],labels[:,0])) 112 | mae_arr_DBP.append(cal_MAE(y_hat[:, 1], labels[:, 1])) 113 | rmse_arr_DBP.append(cal_RMSE(y_hat[:, 1], labels[:, 1])) 114 | mae_arr_BP.append(cal_MAE(y_hat,labels)) 115 | rmse_arr_BP.append(cal_RMSE(y_hat,labels)) 116 | 117 | val_losses.append(val_loss / len(X_val)) 118 | 119 | eval_dic['mae_SBP'] = sum(mae_arr_SBP)/len(mae_arr_SBP) 120 | eval_dic['rmse_SBP'] = sum(rmse_arr_SBP)/len(rmse_arr_SBP) 121 | eval_dic['mae_DBP'] = sum(mae_arr_DBP) / len(mae_arr_DBP) 122 | eval_dic['rmse_DBP'] = sum(rmse_arr_DBP) / len(rmse_arr_DBP) 123 | eval_dic['mae_BP'] = sum(mae_arr_BP) / len(mae_arr_BP) 124 | eval_dic['rmse_BP'] = sum(rmse_arr_BP) / len(rmse_arr_BP) 125 | 126 | # #画出训练过程中损失曲线 127 | # plt.figure() 128 | # plt.plot(losses) 129 | # plt.show() 130 | 131 | return losses, val_losses,eval_dic 132 | 133 | def k_fold(k, X_train, y_train,model_choice, num_epochs, learning_rate, batch_size): 134 | train_loss_sum, valid_loss_sum,mae_sum_SBP,rmse_sum_SBP,mae_sum_DBP,rmse_sum_DBP = 0,0,0,0,0,0 135 | 136 | 137 | for i in range(k): 138 | print('*' * 25, '第', i + 1, '折', '*' * 25) 139 | data = get_kfold_data(k, i, X_train, y_train) # 获取k折交叉验证的训练和验证数据 140 | 141 | #选择模型:CNN VS CNN_LSTM 142 | if model_choice == 'CNN': 143 | net =CNN() # 实例化模型(某已经定义好的模型) 144 | elif model_choice == 'CNN_LSTM': 145 | net = CNN_LSTM() 146 | else: 147 | print('Choose right Model!') 148 | return 149 | 150 | # 每份数据进行训练 151 | train_loss, val_loss,eval_dic = traink(net, *data, batch_size, learning_rate, num_epochs) 152 | 153 | for i in eval_dic.items(): 154 | print(i) 155 | 156 | train_loss_sum += train_loss[-1] 157 | valid_loss_sum += val_loss[-1] 158 | mae_sum_SBP += eval_dic['mae_SBP'] 159 | rmse_sum_SBP += eval_dic['rmse_SBP'] 160 | mae_sum_DBP += eval_dic['mae_DBP'] 161 | rmse_sum_DBP += eval_dic['rmse_DBP'] 162 | 163 | 164 | print('\n', '#' * 10, '最终k折交叉验证结果', '#' * 10) 165 | print('Average train loss for {} is {}'.format(model_choice,train_loss_sum / k)) 166 | print('Average valid loss for {} is {}'.format(model_choice,valid_loss_sum / k)) 167 | print('Average MAE for SBP is {}'.format(mae_sum_SBP/k)) 168 | print('Average RMSE for SBP is{}'.format(rmse_sum_SBP/k)) 169 | print('Average MAE for DBP is {}'.format(mae_sum_DBP / k)) 170 | print('Average RMSE for DBP is{}'.format(rmse_sum_DBP / k)) 171 | 172 | return 173 | 174 | if __name__ == '__main__': 175 | data = pd.read_csv('../PPNet/Datasets/min_max_Nor_PPG.csv') 176 | data = np.array(data)[:,1:] 177 | X_train = torch.tensor(data).view(-1,1,250) 178 | print('shape of x:',X_train.size()) 179 | 180 | labels = pd.read_csv('../PPNet/Datasets/labels_min_max.csv') 181 | labels = np.array(labels)[:,1:] 182 | y_train = torch.tensor(labels) 183 | print('shape of y:',y_train.size()) 184 | 185 | # filename = 'save_result' 186 | # output = sys.stdout 187 | # outputfile = open(filename + '.txt', 'w') 188 | # sys.stdout = outputfile 189 | # 190 | # model=CNN_Model() 191 | # data = get_kfold_data(10, 6, X_train, y_train) 192 | # epoch_arr = [1,10,25,50] 193 | # for TOTAL_EPOCHS in epoch_arr: 194 | # start_time = time.time() 195 | # print('epoch',TOTAL_EPOCHS,file=outputfile) 196 | # train_loss, val_loss,eval_dict= traink(model, *data, 100, 1e-3, TOTAL_EPOCHS) 197 | # for i in eval_dict.items(): 198 | # print(i,file=outputfile) 199 | # print('total time for CNN:{} mins'.format((time.time() - start_time) / 60),file=outputfile) 200 | # 201 | # outputfile.close() 202 | 203 | start_time = time.time() 204 | k_fold(10, X_train, y_train,model_choice='CNN',num_epochs=50, learning_rate=1e-3, batch_size=100) 205 | print('total time for CNN:{} mins'.format((time.time()-start_time)/60)) 206 | start_time1 = time.time() 207 | k_fold(10, X_train, y_train,model_choice='CNN_LSTM',num_epochs=50, learning_rate=1e-3, batch_size=100) 208 | print('total time for CNN_LSTM:{} mins'.format((time.time()-start_time1)/60)) 209 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import numpy as np 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch.optim as optim 7 | 8 | 9 | ''' 10 | input.permute(0,2,1) 11 | batch_size = first 12 | ''' 13 | 14 | # class CNN(nn.Module): 15 | # def __init__(self): 16 | # super(CNN, self).__init__() 17 | # 18 | # #两个一维卷积层 19 | # self.conv1ds = nn.Sequential( 20 | # #Convolution1-----output:torch.Size([1, 20, 60]) 21 | # nn.Conv1d(in_channels=1, out_channels=20, kernel_size=9), 22 | # # nn.BatchNorm1d(20), 23 | # nn.ReLU(True), 24 | # nn.MaxPool1d(kernel_size=4), 25 | # nn.Dropout(0.5), 26 | # 27 | # #Convolution2-----output:torch.Size([1, 20, 13]) 28 | # nn.Conv1d(in_channels=20, out_channels=20, kernel_size=9), 29 | # # nn.BatchNorm1d(20), 30 | # nn.ReLU(True), 31 | # nn.MaxPool1d(kernel_size=4), 32 | # nn.Dropout(0.5) 33 | # ) 34 | # 35 | # self.fc1 = nn.Linear(20*13,64) 36 | # self.fc2 = nn.Linear(64,128) 37 | # self.fc3 = nn.Linear(128,2) 38 | # 39 | # def forward(self, x): 40 | # x = self.conv1ds(x) 41 | # x = x.view(x.size(0),-1) 42 | # 43 | # x = F.relu(self.fc1(x)) 44 | # x = F.relu(self.fc2(x)) 45 | # x = self.fc3(x) 46 | # return x 47 | # 48 | # class CNN_LSTM(nn.Module): 49 | # def __init__(self): 50 | # super(CNN_LSTM, self).__init__() 51 | # 52 | # # 两个一维卷积层 53 | # self.conv1ds = nn.Sequential( 54 | # # Convolution1-----output:torch.Size([1, 20, 60]) 55 | # nn.Conv1d(in_channels=1, out_channels=20, kernel_size=9), 56 | # 57 | # nn.ReLU(True), 58 | # nn.MaxPool1d(kernel_size=4), 59 | # nn.Dropout(0.5), 60 | # 61 | # # Convolution2-----output:torch.Size([1, 20, 13]) 1batch,13单词,每个单词20维 62 | # nn.Conv1d(in_channels=20, out_channels=20, kernel_size=9), 63 | # 64 | # nn.ReLU(True), 65 | # nn.MaxPool1d(kernel_size=4), 66 | # nn.Dropout(0.5) 67 | # ) 68 | # 69 | # # #两个LSTM:64cells 和128cells tanh激活函数 70 | # self.lstm1 = nn.LSTM(input_size=20,hidden_size=64,num_layers=1,batch_first=True,dropout=0.1) 71 | # self.lstm2 = nn.LSTM(input_size=64,hidden_size=128,num_layers=1,batch_first=True,dropout=0.1) 72 | # # 73 | # # #全连接层 74 | # self.fc = nn.Linear(128, 2) 75 | # 76 | # def forward(self, x): 77 | # x = self.conv1ds(x) 78 | # x = x.view(-1,13,20) 79 | # out1, (h1,c1) = self.lstm1(x) 80 | # out2, (h2,c2) = self.lstm2(out1) 81 | # # 82 | # h2 = h2.view(h2.size(1),-1) 83 | # y = self.fc(h2) 84 | # return y 85 | 86 | 87 | class CNN_LSTM_SDBP(nn.Module): 88 | def __init__(self): 89 | super(CNN_LSTM_SDBP, self).__init__() 90 | 91 | # 两个一维卷积层 92 | self.conv1ds = nn.Sequential( 93 | # Convolution1-----output:torch.Size([1, 20, 60]) 94 | nn.Conv1d(in_channels=1, out_channels=20, kernel_size=9), 95 | 96 | nn.ReLU(True), 97 | nn.MaxPool1d(kernel_size=4), 98 | nn.Dropout(0.5), 99 | 100 | # Convolution2-----output:torch.Size([1, 20, 13]) 1batch,13单词,每个单词20维 101 | nn.Conv1d(in_channels=20, out_channels=20, kernel_size=9), 102 | 103 | nn.ReLU(True), 104 | nn.MaxPool1d(kernel_size=4), 105 | nn.Dropout(0.5) 106 | ) 107 | 108 | # #两个LSTM:64cells 和128cells tanh激活函数 109 | self.lstm1 = nn.LSTM(input_size=20,hidden_size=64,num_layers=1,batch_first=True,dropout=0.1) 110 | self.lstm2 = nn.LSTM(input_size=64,hidden_size=128,num_layers=1,batch_first=True,dropout=0.1) 111 | # 112 | # #全连接层 113 | self.fc = nn.Linear(128, 1) 114 | 115 | def forward(self, x): 116 | x = self.conv1ds(x) 117 | x = x.view(-1,13,20) 118 | out1, (h1,c1) = self.lstm1(x) 119 | out2, (h2,c2) = self.lstm2(out1) 120 | # 121 | h2 = h2.view(h2.size(1),-1) 122 | y = self.fc(h2) 123 | return y 124 | 125 | 126 | 127 | # net = CNN_LSTM_SDBP() 128 | # x = torch.randn(100,1,250)#(100batch,250单词,一个单词的维度是1维) 129 | # ht = net(x) 130 | # 131 | # print(ht.size()) 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /ppnet_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import os 4 | import pandas as pd 5 | from scipy import signal 6 | import torch 7 | from sklearn.metrics import mean_squared_error,mean_absolute_error 8 | 9 | 10 | 11 | def split_train_test(read_path, datas_name, labels_name, test_rate): 12 | datas = np.array(pd.read_csv(read_path+datas_name)) 13 | labels = np.array(pd.read_csv(read_path+labels_name)) 14 | 15 | print(datas.shape,labels.shape) 16 | 17 | assert(datas.shape[0] == labels.shape[0]) 18 | total_len = datas.shape[0] #数据总长度 19 | 20 | test_len = int(total_len*test_rate)#分割数据长度 21 | test_index = np.random.choice(np.arange(total_len), test_len, replace=False) 22 | train_index = np.delete(np.arange(total_len),test_index) 23 | 24 | #保存测试数据 25 | test_data = [] 26 | test_label = [] 27 | for i in test_index: 28 | test_data.append(datas[i,:]) 29 | test_label.append(labels[i,:]) 30 | test_data = np.vstack(test_data) 31 | test_label = np.vstack(test_label) 32 | print(test_data.shape,test_label.shape) 33 | 34 | test_data = pd.DataFrame(test_data) 35 | test_label = pd.DataFrame(test_label) 36 | test_data.to_csv('../PPNet/val_set/test_data.csv') 37 | test_label.to_csv('../PPNet/val_set/test_label.csv') 38 | 39 | #保存训练数据 40 | train_data = [] 41 | train_label = [] 42 | for i in train_index: 43 | train_data.append(datas[i, :]) 44 | train_label.append(labels[i, :]) 45 | train_data = np.vstack(train_data) 46 | train_label = np.vstack(train_label) 47 | print(train_data.shape,train_label.shape) 48 | train_data = pd.DataFrame(train_data) 49 | train_data.to_csv('../PPNet/train_set/train_data.csv') 50 | train_label = pd.DataFrame(train_label) 51 | train_label.to_csv('../PPNet/train_set/train_label.csv') 52 | 53 | 54 | 55 | ''' 56 | 10折交叉验证的归一化 57 | ''' 58 | def min_max_Normalize(data): 59 | min_features = np.min(data, axis=0) 60 | max_features = np.max(data, axis=0) 61 | 62 | nor = (data - min_features) / (max_features - min_features) 63 | 64 | return nor, (min_features, max_features) 65 | 66 | def min_max_Unnormalized(nor_data, min_data, max_data): 67 | return nor_data * (max_data - min_data) + min_data 68 | 69 | 70 | 71 | ''' 72 | 设置了最大值和最小值的归一化 73 | ''' 74 | def correct_Normalize(data, feature): 75 | dict_norm = {'PPG': [0, 4], 'SBP': [80, 180], 'DBP': [60, 130]} 76 | 77 | min_features = dict_norm[feature][0] 78 | max_features = dict_norm[feature][1] 79 | 80 | nor = np.round((data - min_features) / (max_features - min_features), 4) 81 | nor[nor < 0] = 0 82 | nor[nor > 1] = 1 83 | 84 | return nor 85 | 86 | 87 | def correct_Unnormalized(nor_data, feature): 88 | 89 | dict_norm = {'PPG':[0,4], 'SBP':[80,180], 'DBP':[60,130]} 90 | min_features = dict_norm[feature][0] 91 | max_features = dict_norm[feature][1] 92 | return nor_data * (max_features - min_features) + min_features 93 | 94 | #计算平均绝对误差 95 | def cal_MAE(y,y_pred): 96 | return torch.mean(torch.abs(y-y_pred)) 97 | 98 | #计算均方误差 99 | def cal_RMSE(y,y_pred): 100 | return torch.sqrt(torch.mean(torch.pow(y-y_pred,2))) 101 | 102 | 103 | 104 | #计算Error的标准差STD 105 | def cal_STD(y, y_pred): 106 | return torch.std(abs(y_pred-y)) 107 | 108 | #计算ME 109 | def cal_ME(y,y_pred): 110 | return torch.mean(y_pred-y) 111 | 112 | 113 | #计算相关系数Correlation 114 | 115 | 116 | 117 | # if __name__ == '__main__': 118 | # 119 | # features = pd.read_csv('D:/DeepLearning/BP-estimation/BP-python/PPNet/Datasets/train_data.csv') # 训练数据所在位置 120 | # features = pd.DataFrame(features) 121 | # labels = pd.read_csv('D:/DeepLearning/BP-estimation/BP-python/PPNet/Datasets/labels.csv') # 标签所在目录 122 | # labels = pd.DataFrame(labels) 123 | # 124 | # features = np.array(features)[:, 1:] 125 | # labels = np.array(labels)[:, 1:] 126 | # 127 | # #训练数据归一化 128 | # nor1 = min_max_Normalize(features) 129 | # min_max_Nor = pd.DataFrame(nor1) 130 | # min_max_Nor.to_csv('D:/DeepLearning/BP-estimation/BP-python/PPNet/Dataset/min_max_Nor_PPG.csv') 131 | # nor2 = zero_score_Normalize(features) 132 | # nor2 = pd.DataFrame(nor2) 133 | # nor2.to_csv('D:/DeepLearning/BP-estimation/BP-python/PPNet/Dataset/zero_score_PPG.csv') 134 | # 135 | # #标签归一化 136 | # nor1_labels = min_max_Normalize(labels) 137 | # print(nor1_labels.shape) 138 | # nor1_labels = pd.DataFrame(nor1_labels) 139 | # nor1_labels.to_csv('D:/DeepLearning/BP-estimation/BP-python/PPNet/Dataset/labels_min_max.csv') 140 | # nor2_labels = zero_score_Normalize(labels) 141 | # print(nor2_labels.shape) 142 | # nor2_labels = pd.DataFrame(nor2_labels) 143 | # nor2_labels.to_csv('D:/DeepLearning/BP-estimation/BP-python/PPNet/Dataset/labels_zero_score.csv') 144 | 145 | # min_BPs = np.min(labels, axis=0) 146 | # max_BPs = np.max(labels, axis=0) 147 | # print(min_BPs,max_BPs)#[63.7708 50. ] [199.9093 154.9724] 148 | 149 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from model import * 3 | from k_cross_fold_train import get_kfold_data 4 | from torch.utils.data import DataLoader, Dataset, TensorDataset 5 | import pandas as pd 6 | import numpy as np 7 | from ppnet_utils import * 8 | from train import GetLoader 9 | from sklearn.metrics import mean_absolute_error 10 | 11 | def test_model(BP_choice, check_point): 12 | 13 | 14 | # 读取测试数据 15 | X_val = np.array(pd.read_csv('../PPNet/val_set/test_data.csv'))[:, 2:] 16 | Y_val = np.array(pd.read_csv('../PPNet/val_set/test_label.csv'))[:, 2:] 17 | 18 | if BP_choice == 'SBP': 19 | Y_val = Y_val[:, 0] 20 | elif BP_choice == 'DBP': 21 | Y_val = Y_val[:, 1] 22 | else: 23 | print('choose right BP!') 24 | return 25 | print('shape of x,y:', X_val.shape, Y_val.shape) 26 | 27 | # 数据归一化 28 | test_data_nor = correct_Normalize(X_val, 'PPG') 29 | test_label_nor = correct_Normalize(Y_val, BP_choice) 30 | 31 | 32 | # 放入DataLoader 33 | test_data_nor = test_data_nor.reshape(-1, 1, 250) 34 | test_label_nor = test_label_nor.reshape(-1,1) 35 | print(test_label_nor.shape, test_label_nor.shape) 36 | torch_data = GetLoader(test_data_nor, test_label_nor) 37 | val_loader = DataLoader(torch_data, batch_size=100, shuffle=True) 38 | 39 | net = CNN_LSTM_SDBP() 40 | checkpoint = check_point 41 | net.load_state_dict(checkpoint['model']) 42 | 43 | nmae_arr_BP = [] 44 | nrmse_arr_BP = [] 45 | 46 | mae_arr_BP = [] 47 | me_arr_BP = [] 48 | std_arr_BP = [] 49 | 50 | with torch.no_grad(): 51 | for data in val_loader: 52 | ppg_data, labels = data 53 | ppg_data = ppg_data.float() 54 | labels = labels.type(torch.FloatTensor) 55 | 56 | y_pred = net(ppg_data) 57 | # print(y_pred.size(),labels.size()) 58 | nmae_arr_BP.append(cal_MAE(y_pred, labels)) 59 | nrmse_arr_BP.append(cal_RMSE(y_pred, labels)) 60 | 61 | 62 | y_pred_unor = correct_Unnormalized(y_pred, BP_choice) 63 | labels_unor = correct_Unnormalized(labels, BP_choice) 64 | 65 | mae_arr_BP.append(cal_MAE(y_pred_unor, labels_unor)) 66 | me_arr_BP.append(cal_ME(y_pred_unor, labels_unor)) 67 | std_arr_BP.append(cal_STD(y_pred_unor, labels_unor)) 68 | 69 | 70 | # print(len(me_arr_BP)) 71 | print('NMAE', sum(nmae_arr_BP) / len(nmae_arr_BP)) 72 | print('NRMSE', sum(nrmse_arr_BP) / len(nrmse_arr_BP)) 73 | print('ME=', sum(me_arr_BP) / len(me_arr_BP)) 74 | print('MAE', sum(mae_arr_BP) / len(mae_arr_BP)) 75 | print('STD=', sum(std_arr_BP) / len(std_arr_BP)) 76 | 77 | 78 | if __name__ == "__main__": 79 | 80 | checkpoint = torch.load("../PPNet/checkpoint/CNN_LSTM_DBP_lr_0_epoch_100_batch_100_droput0.1_v2.ckpt") 81 | test_model(BP_choice='DBP',check_point=checkpoint) 82 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import torch 3 | import numpy as np 4 | from torch.utils.data import DataLoader 5 | from model import * 6 | import os 7 | import matplotlib.pyplot as plt 8 | from ppnet_utils import * 9 | 10 | 11 | # 定义GetLoader类,继承Dataset方法,并重写__getitem__()和__len__()方法 12 | class GetLoader(torch.utils.data.Dataset): 13 | # 初始化函数,得到数据 14 | def __init__(self, data_root, data_label): 15 | self.data = data_root 16 | self.label = data_label 17 | # index是根据batchsize划分数据后得到的索引,最后将data和对应的labels进行一起返回 18 | def __getitem__(self, index): 19 | data = self.data[index] 20 | labels = self.label[index] 21 | return data, labels 22 | # 该函数返回数据大小长度,目的是DataLoader方便划分,如果不知道大小,DataLoader会一脸懵逼 23 | def __len__(self): 24 | return len(self.data) 25 | 26 | def train_model(BP_choice, batch_size,total_epoches,is_plot=True): 27 | 28 | # 判断是否可以使用GPU 29 | use_gpu = torch.cuda.is_available() 30 | print('Use GPU:',use_gpu) 31 | 32 | # 读取训练数据和测试数据 33 | X_train = np.array(pd.read_csv('../PPNet/train_set/train_data.csv'))[:, 2:] 34 | Y_train = np.array(pd.read_csv('../PPNet/train_set/train_label.csv'))[:, 2:] 35 | 36 | if BP_choice == 'SBP': 37 | Y_train = Y_train[:,0] 38 | elif BP_choice == 'DBP': 39 | Y_train = Y_train[:,1] 40 | else: 41 | print('choose right BP!') 42 | return 43 | print('shape of x,y:',X_train.shape,Y_train.shape) 44 | 45 | # 数据归一化 46 | train_data_nor = correct_Normalize(X_train, 'PPG') 47 | train_label_nor = correct_Normalize(Y_train,BP_choice) 48 | 49 | # 放入DataLoader 50 | train_data_nor = train_data_nor.reshape(-1, 1, 250) 51 | train_label_nor = train_label_nor.reshape(-1,1) 52 | print(train_data_nor.shape, train_label_nor.shape) 53 | 54 | torch_data = GetLoader(train_data_nor, train_label_nor) 55 | train_loader = DataLoader(torch_data, batch_size=batch_size, shuffle=True) 56 | 57 | """ 58 | 开始训练 59 | """ 60 | 61 | # 均方误差损失函数 62 | model = CNN_LSTM_SDBP() 63 | criterion = nn.MSELoss() 64 | if use_gpu: 65 | model = model.cuda() 66 | criterion = criterion.cuda() 67 | 68 | total_epoches = total_epoches 69 | # Adam优化器 70 | optimizer = torch.optim.Adam(params=model.parameters()) 71 | 72 | train_loss = [] 73 | for epoch in range(total_epoches): 74 | for i, (ppg_data, labels) in enumerate(train_loader): 75 | 76 | ppg_data = ppg_data.float() 77 | labels = labels.type(torch.FloatTensor) 78 | 79 | if use_gpu: 80 | ppg_data = ppg_data.cuda() 81 | labels = labels.cuda() 82 | 83 | optimizer.zero_grad() # 清零 84 | outputs = model(ppg_data) 85 | # print(labels.size()) 86 | # print(outputs.size()) 87 | 88 | # 计算损失函数 89 | loss = criterion(outputs, labels) 90 | loss.backward() 91 | optimizer.step() 92 | 93 | if (i + 1) % 100 == 0: 94 | train_loss.append(loss.item()) 95 | 96 | if epoch == total_epoches - 1: 97 | print('saving epoch%d model' % (epoch + 1)) 98 | state = { 99 | 'model': model.state_dict(), 100 | 'epoch': epoch + 1 101 | } # 还可以保存优化器这些 102 | if not os.path.isdir('checkpoint'): 103 | os.mkdir('checkpoint') 104 | torch.save(state, './checkpoint/CNN_LSTM_%s__epoch_%d_batch_%d_droput0.1_v2.ckpt' % (BP_choice,epoch + 1, batch_size)) 105 | if is_plot: 106 | plt.figure() 107 | plt.plot(train_loss) 108 | plt.savefig("CNNLSTM_loss{}__epoch{}_batch_{}.jpg".format(BP_choice, total_epoches,batch_size)) 109 | if __name__ == "__main__": 110 | 111 | train_model(BP_choice='SBP', batch_size=100, total_epoches=100) 112 | train_model(BP_choice='DBP', batch_size=100,total_epoches=100) 113 | 114 | 115 | 116 | 117 | 118 | --------------------------------------------------------------------------------