├── DepressionCollected
    ├── Classification
    │   ├── AudioModelChecking.py
    │   ├── AudioTraditionalClassifiers.py
    │   ├── FuseModelChecking.py
    │   ├── TextModelChecking.py
    │   ├── TextTraditionalClassifiers.py
    │   ├── audio_features_whole.py
    │   ├── audio_gru_whole.py
    │   ├── fuse_net_whole.py
    │   ├── text_bilstm_whole.py
    │   └── text_features_whole.py
    ├── DAICFeatureExtarction
    │   ├── feature_extraction.py
    │   └── queries.txt
    └── Regression
    │   ├── AudioModelChecking.py
    │   ├── audio_bilstm_perm.py
    │   ├── fuse_net.py
    │   └── text_bilstm_perm.py
└── README.md


/DepressionCollected/Classification/AudioModelChecking.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | from torch.nn import functional as F
  5 | import torch.optim as optim
  6 | from sklearn.metrics import confusion_matrix
  7 | import numpy as np
  8 | import pandas as pd
  9 | import wave
 10 | import re
 11 | import os
 12 | import tensorflow.compat.v1 as tf
 13 | import random
 14 | import itertools
 15 | from audio_gru_whole import AudioBiLSTM
 16 | 
 17 | from sklearn.preprocessing import StandardScaler
 18 | import pickle
 19 | 
 20 | class BiLSTM(nn.Module):
 21 |     def __init__(self, rnn_layers, dropout, num_classes, audio_hidden_dims, audio_embed_size):
 22 |         super(BiLSTM, self).__init__()
 23 | 
 24 |         self.lstm_net_audio = nn.GRU(audio_embed_size, audio_hidden_dims,
 25 |                                 num_layers=rnn_layers, dropout=dropout, batch_first=True)
 26 | 
 27 |         self.fc_audio = nn.Sequential(
 28 |             nn.Dropout(dropout),
 29 |             nn.Linear(audio_hidden_dims, audio_hidden_dims),
 30 |             nn.ReLU(),
 31 |             nn.Dropout(dropout),
 32 |             nn.Linear(audio_hidden_dims, num_classes),
 33 |             # nn.ReLU(),
 34 |             nn.Softmax(dim=1)
 35 |         )
 36 | 
 37 |     def forward(self, x):
 38 |         x, _ = self.lstm_net_audio(x)
 39 |         # x = self.bn(x)
 40 |         x = x.sum(dim=1)
 41 |         out = self.fc_audio(x)
 42 |         return out
 43 | 
 44 | # prefix = os.path.abspath(os.path.join(os.getcwd(), "."))
 45 | # audio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/Audio/whole_samples_clf_avid256.npz'))['arr_0'], axis=2)
 46 | # audio_targets = np.load(os.path.join(prefix, 'Features/Audio/whole_labels_clf_avid256.npz'))['arr_0']
 47 | 
 48 | prefix = os.path.abspath(os.path.join(os.getcwd(), "."))
 49 | audio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_clf_256.npz'))['arr_0'], axis=2)
 50 | audio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_clf_256.npz'))['arr_0']
 51 | 
 52 | audio_dep_idxs = np.where(audio_targets == 1)[0]
 53 | audio_non_idxs = np.where(audio_targets == 0)[0]
 54 | 
 55 | def standard_confusion_matrix(y_test, y_test_pred):
 56 |     """
 57 |     Make confusion matrix with format:
 58 |                   -----------
 59 |                   | TP | FP |
 60 |                   -----------
 61 |                   | FN | TN |
 62 |                   -----------
 63 |     Parameters
 64 |     ----------
 65 |     y_true : ndarray - 1D
 66 |     y_pred : ndarray - 1D
 67 | 
 68 |     Returns
 69 |     -------
 70 |     ndarray - 2D
 71 |     """
 72 |     [[tn, fp], [fn, tp]] = confusion_matrix(y_test, y_test_pred)
 73 |     return np.array([[tp, fp], [fn, tn]])
 74 | 
 75 | def model_performance(y_test, y_test_pred_proba):
 76 |     """
 77 |     Evaluation metrics for network performance.
 78 |     """
 79 |     # y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]
 80 |     y_test_pred = y_test_pred_proba
 81 | 
 82 |     # Computing confusion matrix for test dataset
 83 |     conf_matrix = standard_confusion_matrix(y_test, y_test_pred)
 84 |     print("Confusion Matrix:")
 85 |     print(conf_matrix)
 86 | 
 87 |     return y_test_pred, conf_matrix
 88 | 
 89 | config = {
 90 |     'num_classes': 2,
 91 |     'dropout': 0.5,
 92 |     'rnn_layers': 2,
 93 |     'embedding_size': 256,
 94 |     'batch_size': 4,
 95 |     'epochs': 100,
 96 |     'learning_rate': 1e-5,
 97 |     'hidden_dims': 256,
 98 |     'bidirectional': False,
 99 |     'cuda': False
100 | }
101 | 
102 | # audio_lstm_model = torch.load(os.path.join(prefix, 'Model/Classification/Audio/BiLSTM_gru_vlad256_256_0.80.pt'))
103 | # audio_lstm_model = torch.load(os.path.join(prefix, 'Model/Classification/Audio3/BiLSTM_gru_vlad256_256_0.89.pt'))
104 | # audio_lstm_model = torch.load(os.path.join(prefix, 'Model/Classification/Audio2/BiLSTM_gru_vlad256_256_0.65.pt'))
105 | 
106 | # model = BiLSTM(config['rnn_layers'], config['dropout'], config['num_classes'], \
107 | #          config['hidden_dims'], config['embedding_size'])
108 |          
109 | # model_state_dict = {}
110 | # model_state_dict['lstm_net_audio.weight_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l0']
111 | # model_state_dict['lstm_net_audio.weight_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l0']
112 | # model_state_dict['lstm_net_audio.bias_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l0']
113 | # model_state_dict['lstm_net_audio.bias_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l0']
114 | 
115 | # model_state_dict['lstm_net_audio.weight_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l1']
116 | # model_state_dict['lstm_net_audio.weight_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l1']
117 | # model_state_dict['lstm_net_audio.bias_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l1']
118 | # model_state_dict['lstm_net_audio.bias_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l1']
119 | 
120 | # model_state_dict['fc_audio.1.weight'] = audio_lstm_model.state_dict()['fc_audio.1.weight']
121 | # model_state_dict['fc_audio.1.bias'] = audio_lstm_model.state_dict()['fc_audio.1.bias']
122 | # model_state_dict['fc_audio.4.weight'] = audio_lstm_model.state_dict()['fc_audio.4.weight']
123 | # model_state_dict['fc_audio.4.bias'] = audio_lstm_model.state_dict()['fc_audio.4.bias']
124 | # model_state_dict = audio_lstm_model.state_dict()
125 | # model.load_state_dict(model_state_dict, strict=False)
126 | 
127 | def evaluate(model, test_idxs):
128 |     model.eval()
129 |     batch_idx = 1
130 |     total_loss = 0
131 |     pred = torch.empty(config['batch_size'], 1).type(torch.LongTensor)
132 |     # X_test = audio_features[test_dep_idxs+test_non_idxs]
133 |     # Y_test = audio_targets[test_dep_idxs+test_non_idxs]
134 |     X_test = audio_features[test_idxs]
135 |     Y_test = audio_targets[test_idxs]
136 |     global max_train_acc, max_acc,max_f1
137 |     for i in range(0, X_test.shape[0], config['batch_size']):
138 |         if i + config['batch_size'] > X_test.shape[0]:
139 |             x, y = X_test[i:], Y_test[i:]
140 |         else:
141 |             x, y = X_test[i:(i+config['batch_size'])], Y_test[i:(i+config['batch_size'])]
142 |         if config['cuda']:
143 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()
144 |         else:
145 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True), Variable(torch.from_numpy(y))
146 |         with torch.no_grad():
147 |             output = model(x.squeeze(2))
148 |         pred = torch.cat((pred, output.data.max(1, keepdim=True)[1]))
149 |         
150 |     y_test_pred, conf_matrix = model_performance(Y_test, pred[config['batch_size']:])
151 |     print('Calculating additional test metrics...')
152 |     accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)
153 |     precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])
154 |     recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])
155 |     f1_score = 2 * (precision * recall) / (precision + recall)
156 |     print("Accuracy: {}".format(accuracy))
157 |     print("Precision: {}".format(precision))
158 |     print("Recall: {}".format(recall))
159 |     print("F1-Score: {}\n".format(f1_score))
160 |     print('='*89)
161 |     return precision, recall, f1_score
162 | 
163 | 
164 | # evaluate(audio_features_test, fuse_targets_test, audio_lstm_model)
165 | # evaluate(model)
166 | 
167 | idxs_paths = ['train_idxs_0.63_1.npy', 'train_idxs_0.65_2.npy', 'train_idxs_0.60_3.npy']
168 | audio_model_paths = ['BiLSTM_gru_vlad256_256_0.67_1.pt', 'BiLSTM_gru_vlad256_256_0.67_2.pt', 'BiLSTM_gru_vlad256_256_0.63_3.pt']
169 | ps, rs, fs = [], [], []
170 | for fold in range(3):
171 |     train_idxs_tmp = np.load(os.path.join(prefix, 'Features/TextWhole/{}'.format(idxs_paths[fold])), allow_pickle=True)
172 |     test_idxs_tmp = list(set(list(audio_dep_idxs)+list(audio_non_idxs)) - set(train_idxs_tmp))
173 |     audio_lstm_model = torch.load(os.path.join(prefix, 'Model/ClassificationWhole/Audio/{}'.format(audio_model_paths[fold])))
174 | 
175 |     train_idxs, test_idxs = [], []
176 |     for idx in train_idxs_tmp:
177 |         if idx in audio_dep_idxs:
178 |             feat = audio_features[idx]
179 |             count = 0
180 |             resample_idxs = [0,1,2,3,4,5]
181 |             for i in itertools.permutations(feat, feat.shape[0]):
182 |                 if count in resample_idxs:
183 |                     audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))
184 |                     audio_targets = np.hstack((audio_targets, 1))
185 |                     train_idxs.append(len(audio_features)-1)
186 |                 count += 1
187 |         else:
188 |             train_idxs.append(idx)
189 | 
190 |     for idx in test_idxs_tmp:
191 |         if idx in audio_dep_idxs:
192 |             feat = audio_features[idx]
193 |             count = 0
194 |             # resample_idxs = random.sample(range(6), 4)
195 |             resample_idxs = [0,1,4,5]
196 |             for i in itertools.permutations(feat, feat.shape[0]):
197 |                 if count in resample_idxs:
198 |                     audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))
199 |                     audio_targets = np.hstack((audio_targets, 1))
200 |                     test_idxs.append(len(audio_features)-1)
201 |                 count += 1
202 |         else:
203 |             test_idxs.append(idx)
204 |     p, r, f = evaluate(audio_lstm_model, test_idxs)
205 |     ps.append(p)
206 |     rs.append(r)
207 |     fs.append(f)
208 | print('precison: {} \n recall: {} \n f1 score: {}'.format(np.mean(ps), np.mean(rs), np.mean(fs)))
209 | 
210 | 
211 | 


--------------------------------------------------------------------------------
/DepressionCollected/Classification/AudioTraditionalClassifiers.py:
--------------------------------------------------------------------------------
  1 | from sklearn.model_selection import KFold
  2 | import numpy as np
  3 | import pandas as pd
  4 | import os
  5 | import pickle
  6 | import random
  7 | import itertools
  8 | from sklearn.metrics import confusion_matrix
  9 | from sklearn.model_selection import train_test_split
 10 | 
 11 | 
 12 | prefix = os.path.abspath(os.path.join(os.getcwd(), "."))
 13 | audio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_clf_256.npz'))['arr_0'], axis=2)
 14 | audio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_clf_256.npz'))['arr_0']
 15 | audio_dep_idxs_tmp = np.where(audio_targets == 1)[0]
 16 | audio_non_idxs = np.where(audio_targets == 0)[0]
 17 | 
 18 | def model_performance(y_test, y_test_pred_proba):
 19 |     """
 20 |     Evaluation metrics for network performance.
 21 |     """
 22 | #     y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]
 23 |     y_test_pred = y_test_pred_proba
 24 | 
 25 |     # Computing confusion matrix for test dataset
 26 |     conf_matrix = standard_confusion_matrix(y_test, y_test_pred)
 27 |     print("Confusion Matrix:")
 28 |     print(conf_matrix)
 29 | 
 30 |     return y_test_pred, conf_matrix
 31 | 
 32 | def standard_confusion_matrix(y_test, y_test_pred):
 33 |     [[tn, fp], [fn, tp]] = confusion_matrix(y_test, y_test_pred)
 34 |     return np.array([[tp, fp], [fn, tn]])
 35 | 
 36 | train_idxs_tmps = [np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.63_1.npy'), allow_pickle=True),
 37 | np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.65_2.npy'), allow_pickle=True),
 38 | np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_3.npy'), allow_pickle=True)]
 39 | precs, recs, f1s = [], [], []
 40 | for idx_idx, train_idxs_tmp in enumerate(train_idxs_tmps):
 41 |     test_idxs_tmp = list(set(list(audio_dep_idxs_tmp)+list(audio_non_idxs)) - set(train_idxs_tmp))
 42 |     train_idxs, test_idxs = [], []
 43 |     # depression data augmentation
 44 |     for idx in train_idxs_tmp:
 45 |         if idx in audio_dep_idxs_tmp:
 46 |             feat = audio_features[idx]
 47 |             count = 0
 48 |             resample_idxs = [0,1,2,3,4,5]
 49 |             for i in itertools.permutations(feat, feat.shape[0]):
 50 |                 if count in resample_idxs:
 51 |                     audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))
 52 |                     audio_targets = np.hstack((audio_targets, 1))
 53 |                     train_idxs.append(len(audio_features)-1)
 54 |                 count += 1
 55 |         else:
 56 |             train_idxs.append(idx)
 57 | 
 58 |     for idx in test_idxs_tmp:
 59 |         if idx in audio_dep_idxs_tmp:
 60 |             feat = audio_features[idx]
 61 |             count = 0
 62 |             # resample_idxs = random.sample(range(6), 4)
 63 |             resample_idxs = [0,1,4,5]
 64 |             for i in itertools.permutations(feat, feat.shape[0]):
 65 |                 if count in resample_idxs:
 66 |                     audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))
 67 |                     audio_targets = np.hstack((audio_targets, 1))
 68 |                     test_idxs.append(len(audio_features)-1)
 69 |                 count += 1
 70 |         else:
 71 |             test_idxs.append(idx)
 72 | 
 73 |     X_train = audio_features[train_idxs]
 74 |     Y_train = audio_targets[train_idxs]
 75 |     X_test = audio_features[test_idxs]
 76 |     Y_test = audio_targets[test_idxs]
 77 | 
 78 |     # Decision Tree
 79 |     # from sklearn import tree
 80 |     # clf = tree.DecisionTreeClassifier(max_depth=20)
 81 | 
 82 |     # svm
 83 |     # from sklearn.svm import SVC
 84 |     # clf = SVC(kernel='sigmoid')
 85 | 
 86 |     # rf
 87 |     from sklearn.ensemble import RandomForestClassifier
 88 |     clf = RandomForestClassifier(n_estimators=50)
 89 | 
 90 |     # lr
 91 |     # from sklearn.linear_model import LogisticRegression
 92 |     # clf = LogisticRegression(solver='newton-cg')
 93 | 
 94 |     clf.fit([f.flatten() for f in X_train], Y_train)
 95 |     pred = clf.predict([f.flatten() for f in X_test])
 96 |     # clf.fit([f.sum(axis=0) for f in X_train], Y_train)
 97 |     # pred = clf.predict([f.sum(axis=0) for f in X_test])
 98 | 
 99 |     y_test_pred, conf_matrix = model_performance(Y_test, pred)
100 | 
101 |     # custom evaluation metrics
102 |     print('Calculating additional test metrics...')
103 |     accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)
104 |     precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])
105 |     recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])
106 |     f1_score = 2 * (precision * recall) / (precision + recall)
107 |     print("Accuracy: {}".format(accuracy))
108 |     print("Precision: {}".format(precision))
109 |     print("Recall: {}".format(recall))
110 |     print("F1-Score: {}\n".format(f1_score))
111 |     print('='*89)
112 |     precs.append(0 if np.isnan(precision) else precision)
113 |     recs.append(0 if np.isnan(recall) else recall)
114 |     f1s.append(0 if np.isnan(f1_score) else f1_score)
115 |     # precs.append(precision)
116 |     # recs.append(recall)
117 |     # f1s.append(f1_score)
118 | print(np.mean(precs), np.mean(recs), np.mean(f1s))


--------------------------------------------------------------------------------
/DepressionCollected/Classification/FuseModelChecking.py:
--------------------------------------------------------------------------------
  1 | from fuse_net_whole import fusion_net, config, model_performance
  2 | import os
  3 | import numpy as np
  4 | import torch
  5 | from torch.autograd import Variable
  6 | import itertools
  7 | 
  8 | prefix = os.path.abspath(os.path.join(os.getcwd(), "./"))
  9 | idxs_paths = ['train_idxs_0.63_1.npy', 'train_idxs_0.65_2.npy', 'train_idxs_0.60_3.npy']
 10 | text_model_paths = ['BiLSTM_128_0.67_1.pt', 'BiLSTM_128_0.66_2.pt', 'BiLSTM_128_0.66_3.pt']
 11 | audio_model_paths = ['BiLSTM_gru_vlad256_256_0.63_1.pt', 'BiLSTM_gru_vlad256_256_0.65_2.pt', 'BiLSTM_gru_vlad256_256_0.60_3.pt']
 12 | fuse_model_paths = ['fuse_0.69_1.pt', 'fuse_0.68_2.pt', 'fuse_0.62_3.pt']
 13 | text_features = np.load(os.path.join(prefix, 'Features/TextWhole/whole_samples_clf_avg.npz'))['arr_0']
 14 | text_targets = np.load(os.path.join(prefix, 'Features/TextWhole/whole_labels_clf_avg.npz'))['arr_0']
 15 | audio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_clf_256.npz'))['arr_0'], axis=2)
 16 | audio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_clf_256.npz'))['arr_0']
 17 | fuse_features = [[audio_features[i], text_features[i]] for i in range(text_features.shape[0])]
 18 | fuse_targets = text_targets
 19 | fuse_dep_idxs = np.where(text_targets == 1)[0]
 20 | fuse_non_idxs = np.where(text_targets == 0)[0]
 21 | 
 22 | def evaluate(model, test_idxs):
 23 |     model.eval()
 24 |     pred = torch.empty(config['batch_size'], 1).type(torch.LongTensor)
 25 |     X_test = []
 26 |     Y_test = []
 27 |     for idx in test_idxs:
 28 |         X_test.append(fuse_features[idx])
 29 |         Y_test.append(fuse_targets[idx])
 30 |     global max_train_acc, max_acc,max_f1
 31 |     for i in range(0, len(X_test), config['batch_size']):
 32 |         if i + config['batch_size'] > len(X_test):
 33 |             x, y = X_test[i:], Y_test[i:]
 34 |         else:
 35 |             x, y = X_test[i:(i+config['batch_size'])], Y_test[i:(i+config['batch_size'])]
 36 |         if config['cuda']:
 37 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()
 38 |         text_feature, audio_feature = model.pretrained_feature(x)
 39 |         with torch.no_grad():
 40 |             # concat_x = torch.cat((audio_feature, text_feature), dim=1)
 41 |             audio_feature_norm = (audio_feature - audio_feature.mean())/audio_feature.std()
 42 |             text_feature_norm = (text_feature - text_feature.mean())/text_feature.std()
 43 |             concat_x = torch.cat((text_feature, audio_feature), dim=1)
 44 |             output = model(concat_x)
 45 |         pred = torch.cat((pred, output.data.max(1, keepdim=True)[1]))
 46 |         
 47 |     y_test_pred, conf_matrix = model_performance(Y_test, pred[config['batch_size']:])
 48 |     # custom evaluation metrics
 49 |     print('Calculating additional test metrics...')
 50 |     accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)
 51 |     precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])
 52 |     recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])
 53 |     f1_score = 2 * (precision * recall) / (precision + recall)
 54 |     print("Accuracy: {}".format(accuracy))
 55 |     print("Precision: {}".format(precision))
 56 |     print("Recall: {}".format(recall))
 57 |     print("F1-Score: {}\n".format(f1_score))
 58 |     print('='*89)
 59 | 
 60 |     return precision, recall, f1_score
 61 | 
 62 | ps, rs, fs = [], [], []
 63 | for fold in range(3):
 64 |     train_idxs_tmp = np.load(os.path.join(prefix, 'Features/TextWhole/{}'.format(idxs_paths[fold])), allow_pickle=True)
 65 |     test_idxs_tmp = list(set(list(fuse_dep_idxs)+list(fuse_non_idxs)) - set(train_idxs_tmp))
 66 |     resample_idxs = list(range(6))
 67 |     train_idxs, test_idxs = [], []
 68 |     # depression data augmentation
 69 |     for idx in train_idxs_tmp:
 70 |         if idx in fuse_dep_idxs:
 71 |             feat = fuse_features[idx]
 72 |             audio_perm = itertools.permutations(feat[0], 3)
 73 |             text_perm = itertools.permutations(feat[1], 3)
 74 |             count = 0
 75 |             for fuse_perm in zip(audio_perm, text_perm):
 76 |                 if count in resample_idxs:
 77 |                     fuse_features.append(fuse_perm)
 78 |                     fuse_targets = np.hstack((fuse_targets, 1))
 79 |                     train_idxs.append(len(fuse_features)-1)
 80 |                 count += 1
 81 |         else:
 82 |             train_idxs.append(idx)
 83 | 
 84 |     for idx in test_idxs_tmp:
 85 |         if idx in fuse_dep_idxs:
 86 |             feat = fuse_features[idx]
 87 |             audio_perm = itertools.permutations(feat[0], 3)
 88 |             text_perm = itertools.permutations(feat[1], 3)
 89 |             count = 0
 90 |             resample_idxs = [0,1,4,5]
 91 |             for fuse_perm in zip(audio_perm, text_perm):
 92 |                 if count in resample_idxs:
 93 |                     fuse_features.append(fuse_perm)
 94 |                     fuse_targets = np.hstack((fuse_targets, 1))
 95 |                     test_idxs.append(len(fuse_features)-1)
 96 |                 count += 1
 97 |         else:
 98 |             test_idxs.append(idx)
 99 |     
100 |     fuse_model = torch.load(os.path.join(prefix, 'Model/ClassificationWhole/Fuse/{}'.format(fuse_model_paths[fold])))
101 |     p, r, f = evaluate(fuse_model, test_idxs)
102 |     ps.append(p)
103 |     rs.append(r)
104 |     fs.append(f)
105 | print('precison: {} \n recall: {} \n f1 score: {}'.format(np.mean(ps), np.mean(rs), np.mean(fs)))
106 | 


--------------------------------------------------------------------------------
/DepressionCollected/Classification/TextModelChecking.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.autograd import Variable
  5 | from torch.nn import functional as F
  6 | import torch.optim as optim
  7 | from sklearn.metrics import confusion_matrix
  8 | import numpy as np
  9 | import pandas as pd
 10 | import wave
 11 | import re
 12 | import os
 13 | import tensorflow.compat.v1 as tf
 14 | import random
 15 | import itertools
 16 | 
 17 | from sklearn.preprocessing import StandardScaler
 18 | import pickle
 19 | 
 20 | # prefix = os.path.abspath(os.path.join(os.getcwd(), "."))
 21 | # text_features = np.load(os.path.join(prefix, 'Features/Text/whole_samples_clf_avg.npz'))['arr_0']
 22 | # text_targets = np.load(os.path.join(prefix, 'Features/Text/whole_labels_clf_avg.npz'))['arr_0']
 23 | 
 24 | # audio_dep_idxs = np.where(text_targets == 1)[0]
 25 | # audio_non_idxs = np.where(text_targets == 0)[0]
 26 | # # train_dep_idxs_tmp = np.load(os.path.join(prefix, 'Features/Text/train_dep_idxs_0.80.npy'), allow_pickle=True)
 27 | # # train_non_idxs = list(np.load(os.path.join(prefix, 'Features/Text/train_non_idxs_0.80.npy'), allow_pickle=True))
 28 | # # train_dep_idxs_tmp = np.load(os.path.join(prefix, 'Features/Text/train_dep_idxs_0.65_2.npy'), allow_pickle=True)
 29 | # # train_non_idxs = list(np.load(os.path.join(prefix, 'Features/Text/train_non_idxs_0.65_2.npy'), allow_pickle=True))
 30 | # train_dep_idxs_tmp = np.load(os.path.join(prefix, 'Features/Text/train_dep_idxs_0.89_3.npy'), allow_pickle=True)
 31 | # train_non_idxs = list(np.load(os.path.join(prefix, 'Features/Text/train_non_idxs_0.89_3.npy'), allow_pickle=True))
 32 | 
 33 | # test_dep_idxs_tmp = list(set(audio_dep_idxs) - set(train_dep_idxs_tmp))
 34 | # test_non_idxs = list(set(audio_non_idxs) - set(train_non_idxs))
 35 | 
 36 | prefix = os.path.abspath(os.path.join(os.getcwd(), "."))
 37 | text_features = np.load(os.path.join(
 38 |     prefix, 'Features/TextWhole/whole_samples_clf_avg.npz'))['arr_0']
 39 | text_targets = np.load(os.path.join(
 40 |     prefix, 'Features/TextWhole/whole_labels_clf_avg.npz'))['arr_0']
 41 | text_dep_idxs_tmp = np.where(text_targets == 1)[0]
 42 | text_non_idxs = np.where(text_targets == 0)[0]
 43 | 
 44 | 
 45 | 
 46 | 
 47 | # # training data augmentation
 48 | # train_dep_idxs = []
 49 | # for idx in train_dep_idxs_tmp:
 50 | #     feat = text_features[idx]
 51 | #     for i in itertools.permutations(feat, feat.shape[0]):
 52 | #         text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))
 53 | #         text_targets = np.hstack((text_targets, 1))
 54 | #         train_dep_idxs.append(len(text_features)-1)
 55 | 
 56 | #         text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))
 57 | #         text_targets = np.hstack((text_targets, 1))
 58 | #         train_dep_idxs.append(len(text_features)-1)
 59 | 
 60 | # # test data augmentation
 61 | # test_dep_idxs = []
 62 | # for idx in test_dep_idxs_tmp:
 63 | #     feat = text_features[idx]
 64 | #     for i in itertools.permutations(feat, feat.shape[0]):
 65 | #         text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))
 66 | #         text_targets = np.hstack((text_targets, 1))
 67 | #         test_dep_idxs.append(len(text_features)-1)
 68 | 
 69 | def standard_confusion_matrix(y_test, y_test_pred):
 70 |     """
 71 |     Make confusion matrix with format:
 72 |                   -----------
 73 |                   | TP | FP |
 74 |                   -----------
 75 |                   | FN | TN |
 76 |                   -----------
 77 |     Parameters
 78 |     ----------
 79 |     y_true : ndarray - 1D
 80 |     y_pred : ndarray - 1D
 81 | 
 82 |     Returns
 83 |     -------
 84 |     ndarray - 2D
 85 |     """
 86 |     [[tn, fp], [fn, tp]] = confusion_matrix(y_test, y_test_pred)
 87 |     return np.array([[tp, fp], [fn, tn]])
 88 | 
 89 | 
 90 | def model_performance(y_test, y_test_pred_proba):
 91 |     """
 92 |     Evaluation metrics for network performance.
 93 |     """
 94 |     # y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]
 95 |     y_test_pred = y_test_pred_proba
 96 | 
 97 |     # Computing confusion matrix for test dataset
 98 |     conf_matrix = standard_confusion_matrix(y_test, y_test_pred)
 99 |     print("Confusion Matrix:")
100 |     print(conf_matrix)
101 | 
102 |     return y_test_pred, conf_matrix
103 | 
104 | 
105 | class TextBiLSTM(nn.Module):
106 |     def __init__(self, config):
107 |         super(TextBiLSTM, self).__init__()
108 |         self.num_classes = config['num_classes']
109 |         self.learning_rate = config['learning_rate']
110 |         self.dropout = config['dropout']
111 |         self.hidden_dims = config['hidden_dims']
112 |         self.rnn_layers = config['rnn_layers']
113 |         self.embedding_size = config['embedding_size']
114 |         self.bidirectional = config['bidirectional']
115 | 
116 |         self.build_model()
117 |         self.init_weight()
118 | 
119 |     def init_weight(net):
120 |         for name, param in net.named_parameters():
121 |             if 'bias' in name:
122 |                 nn.init.constant_(param, 0.0)
123 |             elif 'weight' in name:
124 |                 nn.init.xavier_uniform_(param)
125 | 
126 |     def build_model(self):
127 |         # attention layer
128 |         self.attention_layer = nn.Sequential(
129 |             nn.Linear(self.hidden_dims, self.hidden_dims),
130 |             nn.ReLU(inplace=True)
131 |         )
132 |         # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)
133 | 
134 |         # 双层lstm
135 |         self.lstm_net = nn.LSTM(self.embedding_size, self.hidden_dims,
136 |                                 num_layers=self.rnn_layers, dropout=self.dropout,
137 |                                 bidirectional=self.bidirectional)
138 | 
139 |         # self.init_weight()
140 | 
141 |         # FC层
142 |         # self.fc_out = nn.Linear(self.hidden_dims, self.num_classes)
143 |         self.fc_out = nn.Sequential(
144 |             nn.Dropout(self.dropout),
145 |             nn.Linear(self.hidden_dims, self.hidden_dims),
146 |             nn.ReLU(),
147 |             nn.Dropout(self.dropout),
148 |             nn.Linear(self.hidden_dims, self.num_classes),
149 |             # nn.ReLU(),
150 |             nn.Softmax(dim=1),
151 |         )
152 | 
153 |     def attention_net_with_w(self, lstm_out, lstm_hidden):
154 |         '''
155 |         :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
156 |         :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
157 |         :return: [batch_size, n_hidden]
158 |         '''
159 |         lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
160 |         # h [batch_size, time_step, hidden_dims]
161 |         h = lstm_tmp_out[0] + lstm_tmp_out[1]
162 |         # h = lstm_out
163 |         # [batch_size, num_layers * num_directions, n_hidden]
164 |         lstm_hidden = torch.sum(lstm_hidden, dim=1)
165 |         # [batch_size, 1, n_hidden]
166 |         lstm_hidden = lstm_hidden.unsqueeze(1)
167 |         # atten_w [batch_size, 1, hidden_dims]
168 |         atten_w = self.attention_layer(lstm_hidden)
169 |         # m [batch_size, time_step, hidden_dims]
170 |         m = nn.Tanh()(h)
171 |         # atten_context [batch_size, 1, time_step]
172 |         atten_context = torch.bmm(atten_w, m.transpose(1, 2))
173 |         # softmax_w [batch_size, 1, time_step]
174 |         softmax_w = F.softmax(atten_context, dim=-1)
175 |         # context [batch_size, 1, hidden_dims]
176 |         context = torch.bmm(softmax_w, h)
177 |         result = context.squeeze(1)
178 |         return result
179 | 
180 |     def forward(self, x):
181 | 
182 |         # x : [len_seq, batch_size, embedding_dim]
183 |         x = x.permute(1, 0, 2)
184 |         output, (final_hidden_state, final_cell_state) = self.lstm_net(x)
185 |         # output : [batch_size, len_seq, n_hidden * 2]
186 |         output = output.permute(1, 0, 2)
187 |         # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]
188 |         final_hidden_state = final_hidden_state.permute(1, 0, 2)
189 |         # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)
190 |         # atten_out = self.attention_net(output, final_hidden_state)
191 |         atten_out = self.attention_net_with_w(output, final_hidden_state)
192 |         return self.fc_out(atten_out)
193 | 
194 | class BiLSTM(nn.Module):
195 |     def __init__(self, rnn_layers, dropout, num_classes, text_hidden_dims, text_embed_size):
196 |         super(BiLSTM, self).__init__()
197 | 
198 |         self.text_embed_size = text_embed_size
199 |         self.text_hidden_dims = text_hidden_dims
200 |         self.rnn_layers = rnn_layers
201 |         self.dropout = dropout
202 |         self.num_classes = num_classes
203 | 
204 |         # attention layer
205 |         self.attention_layer = nn.Sequential(
206 |             nn.Linear(self.text_hidden_dims, self.text_hidden_dims),
207 |             nn.ReLU(inplace=True)
208 |         )
209 | 
210 |         # 双层lstm
211 |         self.lstm_net = nn.LSTM(self.text_embed_size, self.text_hidden_dims,
212 |                                 num_layers=self.rnn_layers, dropout=self.dropout,
213 |                                 bidirectional=True)
214 |         # FC层
215 |         self.fc_out = nn.Sequential(
216 |             nn.Dropout(self.dropout),
217 |             nn.Linear(self.text_hidden_dims, self.text_hidden_dims),
218 |             nn.ReLU(),
219 |             nn.Dropout(self.dropout),
220 |             nn.Linear(self.text_hidden_dims, self.num_classes),
221 |             # nn.ReLU(),
222 |             nn.Softmax(dim=1),
223 |         )
224 | 
225 |     def attention_net_with_w(self, lstm_out, lstm_hidden):
226 |         '''
227 |         :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
228 |         :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
229 |         :return: [batch_size, n_hidden]
230 |         '''
231 |         lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
232 |         # h [batch_size, time_step, hidden_dims]
233 |         h = lstm_tmp_out[0] + lstm_tmp_out[1]
234 |         # [batch_size, num_layers * num_directions, n_hidden]
235 |         lstm_hidden = torch.sum(lstm_hidden, dim=1)
236 |         # [batch_size, 1, n_hidden]
237 |         lstm_hidden = lstm_hidden.unsqueeze(1)
238 |         # atten_w [batch_size, 1, hidden_dims]
239 |         atten_w = self.attention_layer(lstm_hidden)
240 |         # m [batch_size, time_step, hidden_dims]
241 |         m = nn.Tanh()(h)
242 |         # atten_context [batch_size, 1, time_step]
243 |         atten_context = torch.bmm(atten_w, m.transpose(1, 2))
244 |         # softmax_w [batch_size, 1, time_step]
245 |         softmax_w = F.softmax(atten_context, dim=-1)
246 |         # context [batch_size, 1, hidden_dims]
247 |         context = torch.bmm(softmax_w, h)
248 |         result = context.squeeze(1)
249 |         return result
250 | 
251 |     def forward(self, x_text):
252 |         # x : [len_seq, batch_size, embedding_dim]
253 |         x_text = x_text.permute(1, 0, 2)
254 |         output, (final_hidden_state, _) = self.lstm_net(x_text)
255 |         # output : [batch_size, len_seq, n_hidden * 2]
256 |         output = output.permute(1, 0, 2)
257 |         # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]
258 |         final_hidden_state = final_hidden_state.permute(1, 0, 2)
259 |         # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)
260 |         # atten_out = self.attention_net(output, final_hidden_state)
261 |         atten_out = self.attention_net_with_w(output, final_hidden_state)
262 |         text_feature = self.fc_out(atten_out)
263 | 
264 |         return text_feature
265 | 
266 | def evaluate(model, test_idxs):
267 |     model.eval()
268 |     batch_idx = 1
269 |     total_loss = 0
270 |     pred = torch.empty(config['batch_size'], 1).type(torch.LongTensor)
271 |     # X_test = text_features[test_dep_idxs+test_non_idxs]
272 |     # Y_test = text_targets[test_dep_idxs+test_non_idxs]
273 |     X_test = text_features[test_idxs]
274 |     Y_test = text_targets[test_idxs]
275 |     global max_train_acc, max_acc, max_f1
276 |     for i in range(0, X_test.shape[0], config['batch_size']):
277 |         if i + config['batch_size'] > X_test.shape[0]:
278 |             x, y = X_test[i:], Y_test[i:]
279 |         else:
280 |             x, y = X_test[i:(i+config['batch_size'])
281 |                           ], Y_test[i:(i+config['batch_size'])]
282 |         if config['cuda']:
283 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(
284 |             ),             Variable(torch.from_numpy(y)).cuda()
285 |         else:
286 |             x, y = Variable(torch.from_numpy(x).type(
287 |                 torch.FloatTensor), requires_grad=True), Variable(torch.from_numpy(y))
288 |         with torch.no_grad():
289 |             output = model(x.squeeze(2))
290 |         pred = torch.cat((pred, output.data.max(1, keepdim=True)[1]))
291 | 
292 |     y_test_pred, conf_matrix = model_performance(
293 |         Y_test, pred[config['batch_size']:])
294 |     print('Calculating additional test metrics...')
295 |     accuracy = float(conf_matrix[0][0] +
296 |                      conf_matrix[1][1]) / np.sum(conf_matrix)
297 |     precision = float(conf_matrix[0][0]) / \
298 |         (conf_matrix[0][0] + conf_matrix[0][1])
299 |     recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])
300 |     f1_score = 2 * (precision * recall) / (precision + recall)
301 |     print("Accuracy: {}".format(accuracy))
302 |     print("Precision: {}".format(precision))
303 |     print("Recall: {}".format(recall))
304 |     print("F1-Score: {}\n".format(f1_score))
305 |     print('='*89)
306 |     return precision, recall, f1_score
307 | 
308 | text_model_paths = ['BiLSTM_128_0.64_1.pt', 'BiLSTM_128_0.66_2.pt', 'BiLSTM_128_0.66_3.pt']
309 | train_idxs_tmps = [np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.63_1.npy'), allow_pickle=True),
310 |                    np.load(os.path.join(
311 |                        prefix, 'Features/TextWhole/train_idxs_0.60_2.npy'), allow_pickle=True),
312 |                    np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_3.npy'), allow_pickle=True)]
313 | resample_idxs = [0, 1, 2, 3, 4, 5]
314 | fold = 1
315 | ps, rs, fs = [], [], []
316 | for idx_i, train_idxs_tmp in enumerate(train_idxs_tmps):
317 |     test_idxs_tmp = list(
318 |         set(list(text_dep_idxs_tmp)+list(text_non_idxs)) - set(train_idxs_tmp))
319 |     train_idxs, test_idxs = [], []
320 |     # depression data augmentation
321 |     for idx in train_idxs_tmp:
322 |         if idx in text_dep_idxs_tmp:
323 |             feat = text_features[idx]
324 |             count = 0
325 |             for i in itertools.permutations(feat, feat.shape[0]):
326 |                 if count in resample_idxs:
327 |                     text_features = np.vstack(
328 |                         (text_features, np.expand_dims(list(i), 0)))
329 |                     text_targets = np.hstack((text_targets, 1))
330 |                     train_idxs.append(len(text_features)-1)
331 |                 count += 1
332 |         else:
333 |             train_idxs.append(idx)
334 | 
335 |     for idx in test_idxs_tmp:
336 |         if idx in text_dep_idxs_tmp:
337 |             feat = text_features[idx]
338 |             count = 0
339 |             # resample_idxs = random.sample(range(6), 4)
340 |             resample_idxs = [0,1,4,5]
341 |             for i in itertools.permutations(feat, feat.shape[0]):
342 |                 if count in resample_idxs:
343 |                     text_features = np.vstack(
344 |                         (text_features, np.expand_dims(list(i), 0)))
345 |                     text_targets = np.hstack((text_targets, 1))
346 |                     test_idxs.append(len(text_features)-1)
347 |                 count += 1
348 |         else:
349 |             test_idxs.append(idx)
350 | 
351 |     config = {
352 |         'num_classes': 2,
353 |         'dropout': 0.5,
354 |         'rnn_layers': 2,
355 |         'embedding_size': 1024,
356 |         'batch_size': 4,
357 |         'epochs': 100,
358 |         'learning_rate': 2e-5,
359 |         'hidden_dims': 128,
360 |         'bidirectional': True,
361 |         'cuda': False,
362 |     }
363 | 
364 |     text_lstm_model = torch.load(os.path.join(
365 |         prefix, 'Model/ClassificationWhole/Text/{}'.format(text_model_paths[idx_i])))
366 | 
367 |     model = BiLSTM(config['rnn_layers'], config['dropout'], config['num_classes'],
368 |                    config['hidden_dims'], config['embedding_size'])
369 | 
370 |     # model_state_dict = {}
371 |     # model_state_dict['lstm_net_audio.weight_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l0']
372 |     # model_state_dict['lstm_net_audio.weight_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l0']
373 |     # model_state_dict['lstm_net_audio.bias_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l0']
374 |     # model_state_dict['lstm_net_audio.bias_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l0']
375 | 
376 |     # model_state_dict['lstm_net_audio.weight_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l1']
377 |     # model_state_dict['lstm_net_audio.weight_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l1']
378 |     # model_state_dict['lstm_net_audio.bias_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l1']
379 |     # model_state_dict['lstm_net_audio.bias_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l1']
380 | 
381 |     # model_state_dict['fc_audio.1.weight'] = audio_lstm_model.state_dict()['fc_audio.1.weight']
382 |     # model_state_dict['fc_audio.1.bias'] = audio_lstm_model.state_dict()['fc_audio.1.bias']
383 |     # model_state_dict['fc_audio.4.weight'] = audio_lstm_model.state_dict()['fc_audio.4.weight']
384 |     # model_state_dict['fc_audio.4.bias'] = audio_lstm_model.state_dict()['fc_audio.4.bias']
385 |     # model_state_dict = text_lstm_model.state_dict()
386 |     # model.load_state_dict(model_state_dict)
387 | 
388 |     # evaluate(text_features_test, fuse_targets_test, audio_lstm_model)
389 |     # evaluate(model, test_idxs)
390 |     
391 |     p, r, f = evaluate(text_lstm_model, test_idxs)
392 |     ps.append(p)
393 |     rs.append(r)
394 |     fs.append(f)
395 | print('precison: {} \n recall: {} \n f1 score: {}'.format(np.mean(ps), np.mean(rs), np.mean(fs)))
396 | 


--------------------------------------------------------------------------------
/DepressionCollected/Classification/TextTraditionalClassifiers.py:
--------------------------------------------------------------------------------
  1 | from sklearn.model_selection import KFold
  2 | import numpy as np
  3 | import pandas as pd
  4 | import os
  5 | import pickle
  6 | import random
  7 | import itertools
  8 | from sklearn.metrics import confusion_matrix
  9 | from sklearn.model_selection import train_test_split
 10 | 
 11 | 
 12 | prefix = os.path.abspath(os.path.join(os.getcwd(), "."))
 13 | text_features = np.load(os.path.join(prefix, 'Features/TextWhole/whole_samples_clf_avg.npz'))['arr_0']
 14 | text_targets = np.load(os.path.join(prefix, 'Features/TextWhole/whole_labels_clf_avg.npz'))['arr_0']
 15 | text_dep_idxs_tmp = np.where(text_targets == 1)[0]
 16 | text_non_idxs = np.where(text_targets == 0)[0]
 17 | 
 18 | def model_performance(y_test, y_test_pred_proba):
 19 |     """
 20 |     Evaluation metrics for network performance.
 21 |     """
 22 | #     y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]
 23 |     y_test_pred = y_test_pred_proba
 24 | 
 25 |     # Computing confusion matrix for test dataset
 26 |     conf_matrix = standard_confusion_matrix(y_test, y_test_pred)
 27 |     print("Confusion Matrix:")
 28 |     print(conf_matrix)
 29 | 
 30 |     return y_test_pred, conf_matrix
 31 | 
 32 | def standard_confusion_matrix(y_test, y_test_pred):
 33 |     [[tn, fp], [fn, tp]] = confusion_matrix(y_test, y_test_pred)
 34 |     return np.array([[tp, fp], [fn, tn]])
 35 | 
 36 | train_idxs_tmps = [np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.63_1.npy'), allow_pickle=True),
 37 | np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.65_2.npy'), allow_pickle=True),
 38 | np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_3.npy'), allow_pickle=True)]
 39 | precs, recs, f1s = [], [], []
 40 | 
 41 | for idx_idx, train_idxs_tmp in enumerate(train_idxs_tmps):
 42 |     test_idxs_tmp = list(set(list(text_dep_idxs_tmp)+list(text_non_idxs)) - set(train_idxs_tmp))
 43 |     train_idxs, test_idxs = [], []
 44 | 
 45 |     # depression data augmentation
 46 |     for idx in train_idxs_tmp:
 47 |         if idx in text_dep_idxs_tmp:
 48 |             feat = text_features[idx]
 49 |             count = 0
 50 |             resample_idxs = [0,1,2,3,4,5]
 51 |             for i in itertools.permutations(feat, feat.shape[0]):
 52 |                 if count in resample_idxs:
 53 |                     text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))
 54 |                     text_targets = np.hstack((text_targets, 1))
 55 |                     train_idxs.append(len(text_features)-1)
 56 |                 count += 1
 57 |         else:
 58 |             train_idxs.append(idx)
 59 | 
 60 |     for idx in test_idxs_tmp:
 61 |         if idx in text_dep_idxs_tmp:
 62 |             feat = text_features[idx]
 63 |             count = 0
 64 |             # resample_idxs = random.sample(range(6), 4)
 65 |             resample_idxs = [0,1,4,5]
 66 |             for i in itertools.permutations(feat, feat.shape[0]):
 67 |                 if count in resample_idxs:
 68 |                     text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))
 69 |                     text_targets = np.hstack((text_targets, 1))
 70 |                     test_idxs.append(len(text_features)-1)
 71 |                 count += 1
 72 |         else:
 73 |             test_idxs.append(idx)
 74 |     # train_idxs = train_idxs_tmp
 75 |     # test_idxs = test_idxs_tmp
 76 | 
 77 |     X_train = text_features[train_idxs]
 78 |     Y_train = text_targets[train_idxs]
 79 |     X_test = text_features[test_idxs]
 80 |     Y_test = text_targets[test_idxs]
 81 | 
 82 |     # Decision Tree
 83 |     from sklearn import tree
 84 |     clf = tree.DecisionTreeClassifier(max_depth=20)
 85 | 
 86 |     # svm
 87 |     # from sklearn.svm import SVC
 88 |     # clf = SVC(kernel='rbf', gamma='auto')
 89 | 
 90 |     # rf
 91 |     # from sklearn.ensemble import RandomForestClassifier
 92 |     # clf = RandomForestClassifier(n_estimators=10, max_depth=20)
 93 | 
 94 |     # lr
 95 |     # from sklearn.linear_model import LogisticRegression
 96 |     # clf = LogisticRegression()
 97 | 
 98 |     clf.fit([f.flatten() for f in X_train], Y_train)
 99 |     pred = clf.predict([f.flatten() for f in X_test])
100 |     # clf.fit([f.sum(axis=0) for f in X_train], Y_train)
101 |     # pred = clf.predict([f.sum(axis=0) for f in X_test])
102 | 
103 |     y_test_pred, conf_matrix = model_performance(Y_test, pred)
104 | 
105 |     # custom evaluation metrics
106 |     print('Calculating additional test metrics...')
107 |     accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)
108 |     precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])
109 |     recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])
110 |     f1_score = 2 * (precision * recall) / (precision + recall)
111 |     print("Accuracy: {}".format(accuracy))
112 |     print("Precision: {}".format(precision))
113 |     print("Recall: {}".format(recall))
114 |     print("F1-Score: {}\n".format(f1_score))
115 |     print('='*89)
116 |     # precs.append(0 if np.isnan(precision) else precision)
117 |     # recs.append(0 if np.isnan(recall) else recall)
118 |     # f1s.append(0 if np.isnan(f1_score) else f1_score)
119 |     precs.append(precision)
120 |     recs.append(recall)
121 |     f1s.append(f1_score)
122 | print(np.mean(precs), np.mean(recs), np.mean(f1s))


--------------------------------------------------------------------------------
/DepressionCollected/Classification/audio_features_whole.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | import wave
  5 | import librosa
  6 | from python_speech_features import *
  7 | import sys
  8 | import pickle
  9 | sys.path.append('/Users/linlin/Desktop/depression/classfication')
 10 | 
 11 | import tensorflow.compat.v1 as tf
 12 | 
 13 | import vggish.vggish_input as vggish_input
 14 | import vggish.vggish_params as vggish_params
 15 | import vggish.vggish_postprocess as vggish_postprocess
 16 | import vggish.vggish_slim as vggish_slim
 17 | 
 18 | import loupe_keras as lpk
 19 | 
 20 | from allennlp.commands.elmo import ElmoEmbedder
 21 | 
 22 | tf.enable_eager_execution()
 23 | 
 24 | elmo = ElmoEmbedder()
 25 | 
 26 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
 27 | 
 28 | prefix = os.path.abspath(os.path.join(os.getcwd(), "."))
 29 | 
 30 | # Paths to downloaded VGGish files.
 31 | checkpoint_path =os.path.join(os.getcwd(),  'vggish/vggish_model.ckpt')
 32 | pca_params_path = os.path.join(os.getcwd(), 'vggish/vggish_pca_params.npz')
 33 | 
 34 | cluster_size = 16
 35 | 
 36 | min_len = 100
 37 | max_len = -1
 38 | 
 39 | def to_vggish_embedds(x, sr):
 40 |     # x为输入的音频，sr为sample_rate
 41 |     input_batch = vggish_input.waveform_to_examples(x, sr)
 42 |     with tf.Graph().as_default(), tf.Session() as sess:
 43 |       vggish_slim.define_vggish_slim()
 44 |       vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)
 45 | 
 46 |       features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
 47 |       embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME)
 48 |       [embedding_batch] = sess.run([embedding_tensor],
 49 |                                    feed_dict={features_tensor: input_batch})
 50 | 
 51 |     # Postprocess the results to produce whitened quantized embeddings.
 52 |     pproc = vggish_postprocess.Postprocessor(pca_params_path)
 53 |     postprocessed_batch = pproc.postprocess(embedding_batch)
 54 |     
 55 |     return tf.cast(postprocessed_batch, dtype='float32')
 56 | 
 57 | def wav2vlad(wave_data, sr):
 58 |     global cluster_size
 59 |     signal = wave_data
 60 |     melspec = librosa.feature.melspectrogram(signal, n_mels=80,sr=sr).astype(np.float32).T
 61 |     melspec = np.log(np.maximum(1e-6, melspec))
 62 |     feature_size = melspec.shape[1]
 63 |     max_samples = melspec.shape[0]
 64 |     output_dim = cluster_size * 16
 65 |     feat = lpk.NetVLAD(feature_size=feature_size, max_samples=max_samples, \
 66 |                             cluster_size=cluster_size, output_dim=output_dim) \
 67 |                                 (tf.convert_to_tensor(melspec))
 68 |     with tf.Session() as sess:
 69 |         init = tf.global_variables_initializer()
 70 |         sess.run(init)
 71 |         r = feat.numpy()
 72 |     return r
 73 |         
 74 | def extract_features(number, audio_features, targets, path):
 75 |     global max_len, min_len
 76 |     if not os.path.exists(os.path.join(prefix, '{1}/{0}/positive_out.wav'.format(number, path))):
 77 |         return    
 78 |     positive_file = wave.open(os.path.join(prefix, '{1}/{0}/positive_out.wav'.format(number, path)))
 79 |     sr1 = positive_file.getframerate()
 80 |     nframes1 = positive_file.getnframes()
 81 |     wave_data1 = np.frombuffer(positive_file.readframes(nframes1), dtype=np.short).astype(np.float)
 82 |     len1 = nframes1 / sr1
 83 | 
 84 |     neutral_file = wave.open(os.path.join(prefix, '{1}/{0}/neutral_out.wav'.format(number, path)))
 85 |     sr2 = neutral_file.getframerate()
 86 |     nframes2 = neutral_file.getnframes()
 87 |     wave_data2 = np.frombuffer(neutral_file.readframes(nframes2), dtype=np.short).astype(np.float)
 88 |     len2 = nframes2 / sr2
 89 | 
 90 |     negative_file = wave.open(os.path.join(prefix, '{1}/{0}/negative_out.wav'.format(number, path)))
 91 |     sr3 = negative_file.getframerate()
 92 |     nframes3 = negative_file.getnframes()
 93 |     wave_data3 = np.frombuffer(negative_file.readframes(nframes3), dtype=np.short).astype(np.float)
 94 |     len3 = nframes3/sr3
 95 | 
 96 |     for l in [len1, len2, len3]:
 97 |         if l > max_len:
 98 |             max_len = l
 99 |         if l < min_len:
100 |             min_len = l
101 | 
102 |     with open(os.path.join(prefix, '{1}/{0}/new_label.txt'.format(number, path))) as fli:
103 |         target = float(fli.readline())
104 |     
105 |     if wave_data1.shape[0] < 1:
106 |         wave_data1 = np.array([1e-4]*sr1*5)
107 |     if wave_data2.shape[0] < 1:
108 |         wave_data2 = np.array([1e-4]*sr2*5)
109 |     if wave_data3.shape[0] < 1:
110 |         wave_data3 = np.array([1e-4]*sr3*5)  
111 |     audio_features.append([wav2vlad(wave_data1, sr1), wav2vlad(wave_data2, sr2), \
112 |         wav2vlad(wave_data3, sr3)])
113 |     # targets.append(1 if target >= 53 else 0)
114 |     targets.append(target)
115 | 
116 | 
117 | audio_features = []
118 | audio_targets = []
119 | 
120 | for index in range(114):
121 |     extract_features(index+1, audio_features, audio_targets, 'Data')
122 | 
123 | for index in range(114):
124 |     extract_features(index+1, audio_features, audio_targets, 'ValidationData')
125 | 
126 | 
127 | print("Saving npz file locally...")
128 | np.savez(os.path.join(prefix, 'Features/AudioWhole/whole_samples_reg_%d.npz'%(cluster_size*16)), audio_features)
129 | np.savez(os.path.join(prefix, 'Features/AudioWhole/whole_labels_reg_%d.npz')%(cluster_size*16), audio_targets)
130 | 
131 | print(max_len, min_len)


--------------------------------------------------------------------------------
/DepressionCollected/Classification/audio_gru_whole.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | from torch.nn import functional as F
  5 | import torch.optim as optim
  6 | from sklearn.metrics import confusion_matrix
  7 | from sklearn.metrics import mean_absolute_error, mean_squared_error
  8 | from sklearn.model_selection import train_test_split
  9 | from sklearn.model_selection import KFold
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | import os
 14 | import pickle
 15 | import random
 16 | import itertools
 17 | 
 18 | prefix = os.path.abspath(os.path.join(os.getcwd(), "."))
 19 | audio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_clf_256.npz'))['arr_0'], axis=2)
 20 | audio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_clf_256.npz'))['arr_0']
 21 | audio_dep_idxs_tmp = np.where(audio_targets == 1)[0]
 22 | audio_non_idxs = np.where(audio_targets == 0)[0]
 23 | 
 24 | class AudioBiLSTM(nn.Module):
 25 |     def __init__(self, config):
 26 |         super(AudioBiLSTM, self).__init__()
 27 |         self.num_classes = config['num_classes']
 28 |         self.learning_rate = config['learning_rate']
 29 |         self.dropout = config['dropout']
 30 |         self.hidden_dims = config['hidden_dims']
 31 |         self.rnn_layers = config['rnn_layers']
 32 |         self.embedding_size = config['embedding_size']
 33 |         self.bidirectional = config['bidirectional']
 34 | 
 35 |         self.build_model()
 36 |         # self.init_weight()
 37 | 
 38 |     def init_weight(net):
 39 |         for name, param in net.named_parameters():
 40 |             if not 'ln' in name:
 41 |                 if 'bias' in name:
 42 |                     nn.init.constant_(param, 0.0)
 43 |                 elif 'weight' in name:
 44 |                     nn.init.xavier_uniform_(param)
 45 | 
 46 |     def build_model(self):
 47 |         # attention layer
 48 |         self.attention_layer = nn.Sequential(
 49 |             nn.Linear(self.hidden_dims, self.hidden_dims),
 50 |             nn.ReLU(inplace=True))
 51 |         # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)
 52 | 
 53 |         # self.lstm_net_audio = nn.LSTM(self.embedding_size,
 54 |         #                         self.hidden_dims,
 55 |         #                         num_layers=self.rnn_layers,
 56 |         #                         dropout=self.dropout,
 57 |         #                         bidirectional=self.bidirectional,
 58 |         #                         batch_first=True)
 59 |         self.lstm_net_audio = nn.GRU(self.embedding_size, self.hidden_dims,
 60 |                                 num_layers=self.rnn_layers, dropout=self.dropout, batch_first=True)
 61 | 
 62 |         self.ln = nn.LayerNorm(self.embedding_size)
 63 | 
 64 |         # FC层
 65 |         self.fc_audio = nn.Sequential(
 66 |             nn.Dropout(self.dropout),
 67 |             nn.Linear(self.hidden_dims, self.hidden_dims),
 68 |             nn.ReLU(),
 69 |             nn.Dropout(self.dropout),
 70 |             nn.Linear(self.hidden_dims, self.num_classes),
 71 |             # nn.ReLU(),
 72 |             nn.Softmax(dim=1)
 73 |         )
 74 | 
 75 |     def attention_net_with_w(self, lstm_out, lstm_hidden):
 76 |         '''
 77 |         :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
 78 |         :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
 79 |         :return: [batch_size, n_hidden]
 80 |         '''
 81 |         lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
 82 |         # h [batch_size, time_step, hidden_dims]
 83 |         h = lstm_tmp_out[0] + lstm_tmp_out[1]
 84 |         #         h = lstm_out
 85 |         # [batch_size, num_layers * num_directions, n_hidden]
 86 |         lstm_hidden = torch.sum(lstm_hidden, dim=1)
 87 |         # [batch_size, 1, n_hidden]
 88 |         lstm_hidden = lstm_hidden.unsqueeze(1)
 89 |         # atten_w [batch_size, 1, hidden_dims]
 90 |         atten_w = self.attention_layer(lstm_hidden)
 91 |         # m [batch_size, time_step, hidden_dims]
 92 |         m = nn.Tanh()(h)
 93 |         # atten_context [batch_size, 1, time_step]
 94 |        # print(atten_w.shape, m.transpose(1, 2).shape)
 95 |         atten_context = torch.bmm(atten_w, m.transpose(1, 2))
 96 |         # softmax_w [batch_size, 1, time_step]
 97 |         softmax_w = F.softmax(atten_context, dim=-1)
 98 |         # context [batch_size, 1, hidden_dims]
 99 |         context = torch.bmm(softmax_w, h)
100 |         result = context.squeeze(1)
101 |         return result
102 | 
103 |     def forward(self, x):
104 |         x = self.ln(x)
105 |         x, _ = self.lstm_net_audio(x)
106 |         x = x.mean(dim=1)
107 |         out = self.fc_audio(x)
108 |         return out
109 | 
110 | config = {
111 |     'num_classes': 2,
112 |     'dropout': 0.5,
113 |     'rnn_layers': 2,
114 |     'embedding_size': 256,
115 |     'batch_size': 8,
116 |     'epochs': 170,
117 |     'learning_rate': 6e-6,
118 |     'hidden_dims': 256,
119 |     'bidirectional': False,
120 |     'cuda': False
121 | }
122 | 
123 | def save(model, filename):
124 |     save_filename = '{}.pt'.format(filename)
125 |     torch.save(model, save_filename)
126 |     print('Saved as %s' % save_filename)
127 | 
128 | def standard_confusion_matrix(y_test, y_test_pred):
129 |     """
130 |     Make confusion matrix with format:
131 |                   -----------
132 |                   | TP | FP |
133 |                   -----------
134 |                   | FN | TN |
135 |                   -----------
136 |     Parameters
137 |     ----------
138 |     y_true : ndarray - 1D
139 |     y_pred : ndarray - 1D
140 | 
141 |     Returns
142 |     -------
143 |     ndarray - 2D
144 |     """
145 |     [[tn, fp], [fn, tp]] = confusion_matrix(y_test.cpu().numpy(), y_test_pred)
146 |     return np.array([[tp, fp], [fn, tn]])
147 | 
148 | def model_performance(y_test, y_test_pred_proba):
149 |     """
150 |     Evaluation metrics for network performance.
151 |     """
152 |     y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]
153 | 
154 |     # Computing confusion matrix for test dataset
155 |     conf_matrix = standard_confusion_matrix(y_test, y_test_pred.numpy())
156 |     print("Confusion Matrix:")
157 |     print(conf_matrix)
158 | 
159 |     return y_test_pred, conf_matrix
160 | 
161 | def train(epoch, train_idxs):
162 |     global lr, train_acc
163 |     model.train()
164 |     batch_idx = 1      
165 |     total_loss = 0
166 |     correct = 0
167 |     pred = np.array([])
168 |     X_train = audio_features[train_idxs]
169 |     Y_train = audio_targets[train_idxs]
170 |     for i in range(0, X_train.shape[0], config['batch_size']):
171 |         if i + config['batch_size'] > X_train.shape[0]:
172 |             x, y = X_train[i:], Y_train[i:]
173 |         else:
174 |             x, y = X_train[i:(i + config['batch_size'])], Y_train[i:(
175 |                 i + config['batch_size'])]
176 |         if config['cuda']:
177 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()
178 |         else:
179 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True), \
180 |                 Variable(torch.from_numpy(y))
181 | 
182 |         # 将模型的参数梯度设置为0
183 |         optimizer.zero_grad()
184 |         output = model(x)
185 |         pred = output.data.max(1, keepdim=True)[1]
186 |         #print(pred.shape, y.shape)
187 |         correct += pred.eq(y.data.view_as(pred)).cpu().sum()
188 |         loss = criterion(output, y)
189 |         # 后向传播调整参数
190 |         loss.backward()
191 |         # 根据梯度更新网络参数
192 |         optimizer.step()
193 |         batch_idx += 1
194 |         # loss.item()能够得到张量中的元素值
195 |         total_loss += loss.item()
196 | 
197 |     train_acc = correct
198 |     print(
199 |         'Train Epoch: {:2d}\t Learning rate: {:.4f}\tLoss: {:.6f}\t Accuracy: {}/{} ({:.0f}%)\n '
200 |         .format(epoch + 1, config['learning_rate'], total_loss, correct,
201 |                 X_train.shape[0], 100. * correct / X_train.shape[0]))
202 | 
203 | 
204 | def evaluate(model, test_idxs, fold, train_idxs_tmp, train_idxs):
205 |     model.eval()
206 |     batch_idx = 1
207 |     total_loss = 0
208 |     global max_f1, max_acc, min_mae, X_test_lens, max_prec, max_rec
209 |     pred = np.array([])
210 |     with torch.no_grad():
211 |         if config['cuda']:
212 |             x, y = Variable(torch.from_numpy(audio_features[test_idxs]).type(torch.FloatTensor), requires_grad=True).cuda(),\
213 |                 Variable(torch.from_numpy(audio_targets[test_idxs])).cuda()
214 |         else:
215 |             x, y = Variable(torch.from_numpy(audio_features[test_idxs]).type(torch.FloatTensor), requires_grad=True), \
216 |                 Variable(torch.from_numpy(audio_targets[test_idxs])).type(torch.LongTensor)
217 | 
218 |         optimizer.zero_grad()
219 |         output = model(x)
220 |         loss = criterion(output, y)
221 |         total_loss += loss.item()
222 |         y_test_pred, conf_matrix = model_performance(y, output.cpu())
223 |         accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)
224 |         precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])
225 |         recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])
226 |         f1_score = 2 * (precision * recall) / (precision + recall)
227 |         print("Accuracy: {}".format(accuracy))
228 |         print("Precision: {}".format(precision))
229 |         print("Recall: {}".format(recall))
230 |         print("F1-Score: {}\n".format(f1_score))
231 |         print('=' * 89)
232 | 
233 |         if max_f1 <= f1_score and train_acc > len(train_idxs)*0.90  and f1_score > 0.5:
234 |             max_f1 = f1_score
235 |             max_acc = accuracy
236 |             max_rec = recall
237 |             max_prec = precision
238 |             mode ='gru'
239 |             save(model, os.path.join(prefix, 'Model/ClassificationWhole/Audio/BiLSTM_{}_vlad{}_{}_{:.2f}_{}'.format(mode, config['embedding_size'], config['hidden_dims'], max_f1, fold)))
240 |             np.save(os.path.join(prefix, 'Features/TextWhole/train_idxs_{:.2f}_{}.npy'.format(f1_score, fold)), train_idxs_tmp)
241 |             print('*' * 64)
242 |             print('model saved: f1: {}\tacc: {}'.format(max_f1, max_acc))
243 |             print('*' * 64)
244 | 
245 |     return total_loss
246 | 
247 | def get_param_group(model):
248 |     nd_list = []
249 |     param_list = []
250 |     for name, param in model.named_parameters():
251 |         if 'ln' in name:
252 |             nd_list.append(param)
253 |         else:
254 |             param_list.append(param)
255 |     return [{'params': param_list, 'weight_decay': 1e-5}, {'params': nd_list, 'weight_decay': 0}]
256 | 
257 | if __name__ == '__main__':
258 |     # kf = KFold(n_splits=3, shuffle=True)
259 |     # fold = 1
260 |     # for train_idxs_tmp, test_idxs_tmp in kf.split(audio_features):
261 |     train_idxs_tmps = [np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.63_1.npy'), allow_pickle=True),
262 |     np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_2.npy'), allow_pickle=True),
263 |     np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_3.npy'), allow_pickle=True)]
264 |     for idx_idx, train_idxs_tmp in enumerate(train_idxs_tmps):
265 |         fold = idx_idx + 1
266 |         # if idx_idx != 1:
267 |         #     continue
268 |         test_idxs_tmp = list(set(list(audio_dep_idxs_tmp)+list(audio_non_idxs)) - set(train_idxs_tmp))
269 |         train_idxs, test_idxs = [], []
270 |         resample_idxs = [0,1,2,3,4,5]
271 |         # depression data augmentation
272 |         for idx in train_idxs_tmp:
273 |             if idx in audio_dep_idxs_tmp:
274 |                 feat = audio_features[idx]
275 |                 count = 0
276 |                 for i in itertools.permutations(feat, feat.shape[0]):
277 |                     if count in resample_idxs:
278 |                         audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))
279 |                         audio_targets = np.hstack((audio_targets, 1))
280 |                         train_idxs.append(len(audio_features)-1)
281 |                     count += 1
282 |             else:
283 |                 train_idxs.append(idx)
284 | 
285 |         for idx in test_idxs_tmp:
286 |             if idx in audio_dep_idxs_tmp:
287 |                 feat = audio_features[idx]
288 |                 count = 0
289 |                 # resample_idxs = random.sample(range(6), 4)
290 |                 resample_idxs = [0,1,4,5]
291 |                 for i in itertools.permutations(feat, feat.shape[0]):
292 |                     if count in resample_idxs:
293 |                         audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))
294 |                         audio_targets = np.hstack((audio_targets, 1))
295 |                         test_idxs.append(len(audio_features)-1)
296 |                     count += 1
297 |             else:
298 |                 test_idxs.append(idx)
299 |             # test_idxs.append(idx)
300 | 
301 |         model = AudioBiLSTM(config)
302 | 
303 |         if config['cuda']:
304 |             model = model.cuda()
305 | 
306 |         param_group = get_param_group(model)
307 |         optimizer = optim.AdamW(param_group, lr=config['learning_rate'])
308 |         criterion = nn.CrossEntropyLoss()
309 |         # criterion = FocalLoss(class_num=2)
310 |         max_f1 = -1
311 |         max_acc = -1
312 |         max_rec = -1
313 |         max_prec = -1
314 |         train_acc = -1
315 | 
316 |         for ep in range(1, config['epochs']):
317 |             train(ep, train_idxs)
318 |             tloss = evaluate(model, test_idxs, fold, train_idxs_tmp, train_idxs)
319 |         fold += 1


--------------------------------------------------------------------------------
/DepressionCollected/Classification/fuse_net_whole.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.autograd import Variable
  5 | from torch.nn import functional as F
  6 | import torch.optim as optim
  7 | from sklearn.metrics import confusion_matrix
  8 | import numpy as np
  9 | import pandas as pd
 10 | import wave
 11 | import librosa
 12 | from python_speech_features import *
 13 | import re
 14 | from allennlp.commands.elmo import ElmoEmbedder
 15 | import os
 16 | import tensorflow.compat.v1 as tf
 17 | import itertools
 18 | 
 19 | prefix = os.path.abspath(os.path.join(os.getcwd(), "./"))
 20 | 
 21 | text_features = np.load(os.path.join(prefix, 'Features/TextWhole/whole_samples_clf_avg.npz'))['arr_0']
 22 | text_targets = np.load(os.path.join(prefix, 'Features/TextWhole/whole_labels_clf_avg.npz'))['arr_0']
 23 | audio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_clf_256.npz'))['arr_0'], axis=2)
 24 | audio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_clf_256.npz'))['arr_0']
 25 | fuse_features = [[audio_features[i], text_features[i]] for i in range(text_features.shape[0])]
 26 | fuse_targets = text_targets
 27 | 
 28 | fuse_dep_idxs = np.where(text_targets == 1)[0]
 29 | fuse_non_idxs = np.where(text_targets == 0)[0]
 30 | 
 31 | def save(model, filename):
 32 |     save_filename = '{}.pt'.format(filename)
 33 |     torch.save(model, save_filename)
 34 |     print('Saved as %s' % save_filename)
 35 |     
 36 | def standard_confusion_matrix(y_test, y_test_pred):
 37 |     """
 38 |     Make confusion matrix with format:
 39 |                   -----------
 40 |                   | TP | FP |
 41 |                   -----------
 42 |                   | FN | TN |
 43 |                   -----------
 44 |     Parameters
 45 |     ----------
 46 |     y_true : ndarray - 1D
 47 |     y_pred : ndarray - 1D
 48 | 
 49 |     Returns
 50 |     -------
 51 |     ndarray - 2D
 52 |     """
 53 |     [[tn, fp], [fn, tp]] = confusion_matrix(y_test, y_test_pred)
 54 |     return np.array([[tp, fp], [fn, tn]])
 55 | 
 56 | def model_performance(y_test, y_test_pred_proba):
 57 |     """
 58 |     Evaluation metrics for network performance.
 59 |     """
 60 |     # y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]
 61 |     y_test_pred = y_test_pred_proba
 62 | 
 63 |     # Computing confusion matrix for test dataset
 64 |     conf_matrix = standard_confusion_matrix(y_test, y_test_pred)
 65 |     print("Confusion Matrix:")
 66 |     print(conf_matrix)
 67 | 
 68 |     return y_test_pred, conf_matrix
 69 | 
 70 | class TextBiLSTM(nn.Module):
 71 |     def __init__(self, config):
 72 |         super(TextBiLSTM, self).__init__()
 73 |         self.num_classes = config['num_classes']
 74 |         self.learning_rate = config['learning_rate']
 75 |         self.dropout = config['dropout']
 76 |         self.hidden_dims = config['hidden_dims']
 77 |         self.rnn_layers = config['rnn_layers']
 78 |         self.embedding_size = config['embedding_size']
 79 |         self.bidirectional = config['bidirectional']
 80 | 
 81 |         self.build_model()
 82 |         self.init_weight()
 83 |         
 84 |     def init_weight(net):
 85 |         for name, param in net.named_parameters():
 86 |             if 'bias' in name:
 87 |                 nn.init.constant_(param, 0.0)
 88 |             elif 'weight' in name:
 89 |                 nn.init.xavier_uniform_(param)
 90 | 
 91 |     def build_model(self):
 92 |         # attention layer
 93 |         self.attention_layer = nn.Sequential(
 94 |             nn.Linear(self.hidden_dims, self.hidden_dims),
 95 |             nn.ReLU(inplace=True)
 96 |         )
 97 |         # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)
 98 | 
 99 |         # 双层lstm
100 |         self.lstm_net = nn.LSTM(self.embedding_size, self.hidden_dims,
101 |                                 num_layers=self.rnn_layers, dropout=self.dropout,
102 |                                 bidirectional=self.bidirectional)
103 |         
104 |         # self.init_weight()
105 |         
106 |         # FC层
107 |         # self.fc_out = nn.Linear(self.hidden_dims, self.num_classes)
108 |         self.fc_out = nn.Sequential(
109 |             nn.Dropout(self.dropout),
110 |             nn.Linear(self.hidden_dims, self.hidden_dims),
111 |             nn.ReLU(),
112 |             nn.Dropout(self.dropout),
113 |             nn.Linear(self.hidden_dims, self.num_classes),
114 |             # nn.ReLU(),
115 |             nn.Softmax(dim=1),
116 |         )
117 | 
118 |     def attention_net_with_w(self, lstm_out, lstm_hidden):
119 |         '''
120 |         :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
121 |         :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
122 |         :return: [batch_size, n_hidden]
123 |         '''
124 |         lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
125 |         # h [batch_size, time_step, hidden_dims]
126 |         h = lstm_tmp_out[0] + lstm_tmp_out[1]
127 |         # h = lstm_out
128 |         # [batch_size, num_layers * num_directions, n_hidden]
129 |         lstm_hidden = torch.sum(lstm_hidden, dim=1)
130 |         # [batch_size, 1, n_hidden]
131 |         lstm_hidden = lstm_hidden.unsqueeze(1)
132 |         # atten_w [batch_size, 1, hidden_dims]
133 |         atten_w = self.attention_layer(lstm_hidden)
134 |         # m [batch_size, time_step, hidden_dims]
135 |         m = nn.Tanh()(h)
136 |         # atten_context [batch_size, 1, time_step]
137 |         atten_context = torch.bmm(atten_w, m.transpose(1, 2))
138 |         # softmax_w [batch_size, 1, time_step]
139 |         softmax_w = F.softmax(atten_context, dim=-1)
140 |         # context [batch_size, 1, hidden_dims]
141 |         context = torch.bmm(softmax_w, h)
142 |         result = context.squeeze(1)
143 |         return result
144 | 
145 |     def forward(self, x):
146 |         
147 |         # x : [len_seq, batch_size, embedding_dim]
148 |         x = x.permute(1, 0, 2)
149 |         output, (final_hidden_state, final_cell_state) = self.lstm_net(x)
150 |         # output : [batch_size, len_seq, n_hidden * 2]
151 |         output = output.permute(1, 0, 2)
152 |         # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]
153 |         final_hidden_state = final_hidden_state.permute(1, 0, 2)
154 |         # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)
155 |         # atten_out = self.attention_net(output, final_hidden_state)
156 |         atten_out = self.attention_net_with_w(output, final_hidden_state)
157 |         return self.fc_out(atten_out)
158 | 
159 | class AudioBiLSTM(nn.Module):
160 |     def __init__(self, config):
161 |         super(AudioBiLSTM, self).__init__()
162 |         self.num_classes = config['num_classes']
163 |         self.learning_rate = config['learning_rate']
164 |         self.dropout = config['dropout']
165 |         self.hidden_dims = config['hidden_dims']
166 |         self.rnn_layers = config['rnn_layers']
167 |         self.embedding_size = config['embedding_size']
168 |         self.bidirectional = config['bidirectional']
169 | 
170 |         self.build_model()
171 |         # self.init_weight()
172 | 
173 |     def init_weight(net):
174 |         for name, param in net.named_parameters():
175 |             if not 'ln' in name:
176 |                 if 'bias' in name:
177 |                     nn.init.constant_(param, 0.0)
178 |                 elif 'weight' in name:
179 |                     nn.init.xavier_uniform_(param)
180 | 
181 |     def build_model(self):
182 |         # attention layer
183 |         self.attention_layer = nn.Sequential(
184 |             nn.Linear(self.hidden_dims, self.hidden_dims),
185 |             nn.ReLU(inplace=True))
186 |         # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)
187 | 
188 |         # self.lstm_net_audio = nn.LSTM(self.embedding_size,
189 |         #                         self.hidden_dims,
190 |         #                         num_layers=self.rnn_layers,
191 |         #                         dropout=self.dropout,
192 |         #                         bidirectional=self.bidirectional,
193 |         #                         batch_first=True)
194 |         self.lstm_net_audio = nn.GRU(self.embedding_size, self.hidden_dims,
195 |                                 num_layers=self.rnn_layers, dropout=self.dropout, batch_first=True)
196 | 
197 |         self.ln = nn.LayerNorm(self.embedding_size)
198 | 
199 |         # FC层
200 |         self.fc_audio = nn.Sequential(
201 |             nn.Dropout(self.dropout),
202 |             nn.Linear(self.hidden_dims, self.hidden_dims),
203 |             nn.ReLU(),
204 |             nn.Dropout(self.dropout),
205 |             nn.Linear(self.hidden_dims, self.num_classes),
206 |             # nn.ReLU(),
207 |             nn.Softmax(dim=1)
208 |         )
209 | 
210 |     def attention_net_with_w(self, lstm_out, lstm_hidden):
211 |         '''
212 |         :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
213 |         :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
214 |         :return: [batch_size, n_hidden]
215 |         '''
216 |         lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
217 |         # h [batch_size, time_step, hidden_dims]
218 |         h = lstm_tmp_out[0] + lstm_tmp_out[1]
219 |         #         h = lstm_out
220 |         # [batch_size, num_layers * num_directions, n_hidden]
221 |         lstm_hidden = torch.sum(lstm_hidden, dim=1)
222 |         # [batch_size, 1, n_hidden]
223 |         lstm_hidden = lstm_hidden.unsqueeze(1)
224 |         # atten_w [batch_size, 1, hidden_dims]
225 |         atten_w = self.attention_layer(lstm_hidden)
226 |         # m [batch_size, time_step, hidden_dims]
227 |         m = nn.Tanh()(h)
228 |         # atten_context [batch_size, 1, time_step]
229 |        # print(atten_w.shape, m.transpose(1, 2).shape)
230 |         atten_context = torch.bmm(atten_w, m.transpose(1, 2))
231 |         # softmax_w [batch_size, 1, time_step]
232 |         softmax_w = F.softmax(atten_context, dim=-1)
233 |         # context [batch_size, 1, hidden_dims]
234 |         context = torch.bmm(softmax_w, h)
235 |         result = context.squeeze(1)
236 |         return result
237 | 
238 |     def forward(self, x):
239 |         x = self.ln(x)
240 |         x, _ = self.lstm_net_audio(x)
241 |         x = x.mean(dim=1)
242 |         out = self.fc_audio(x)
243 |         return out
244 | 
245 | class fusion_net(nn.Module):
246 |     def __init__(self, text_embed_size, text_hidden_dims, rnn_layers, dropout, num_classes, \
247 |          audio_hidden_dims, audio_embed_size):
248 |         super(fusion_net, self).__init__()
249 |         self.text_embed_size = text_embed_size
250 |         self.audio_embed_size = audio_embed_size
251 |         self.text_hidden_dims = text_hidden_dims
252 |         self.audio_hidden_dims = audio_hidden_dims
253 |         self.rnn_layers = rnn_layers
254 |         self.dropout = dropout
255 |         self.num_classes = num_classes
256 |         
257 |         # ============================= TextBiLSTM =================================
258 |         
259 |         # attention layer
260 |         self.attention_layer = nn.Sequential(
261 |             nn.Linear(self.text_hidden_dims, self.text_hidden_dims),
262 |             nn.ReLU(inplace=True)
263 |         )
264 | 
265 |         # 双层lstm
266 |         self.lstm_net = nn.LSTM(self.text_embed_size, self.text_hidden_dims,
267 |                                 num_layers=self.rnn_layers, dropout=self.dropout,
268 |                                 bidirectional=True)
269 |         # FC层
270 |         self.fc_out = nn.Sequential(
271 |             nn.Dropout(self.dropout),
272 |             nn.Linear(self.text_hidden_dims, self.text_hidden_dims),
273 |             nn.ReLU(),
274 |             nn.Dropout(self.dropout)
275 |         )
276 |         
277 |         # ============================= TextBiLSTM =================================
278 | 
279 |         # ============================= AudioBiLSTM =============================
280 | 
281 |         self.lstm_net_audio = nn.GRU(self.audio_embed_size,
282 |                                 self.audio_hidden_dims,
283 |                                 num_layers=self.rnn_layers,
284 |                                 dropout=self.dropout,
285 |                                 bidirectional=False,
286 |                                 batch_first=True)
287 | 
288 |         self.fc_audio = nn.Sequential(
289 |             nn.Dropout(self.dropout),
290 |             nn.Linear(self.audio_hidden_dims, self.audio_hidden_dims),
291 |             nn.ReLU(),
292 |             nn.Dropout(self.dropout)
293 |         )
294 | 
295 |         self.ln = nn.LayerNorm(self.audio_embed_size)
296 |         
297 |         # ============================= AudioBiLSTM =============================
298 | 
299 |         # ============================= last fc layer =============================
300 |         # self.bn = nn.BatchNorm1d(self.text_hidden_dims + self.audio_hidden_dims)
301 |         # modal attention
302 |         self.modal_attn = nn.Linear(self.text_hidden_dims + self.audio_hidden_dims, self.text_hidden_dims + self.audio_hidden_dims, bias=False)
303 |         self.fc_final = nn.Sequential(
304 |             nn.Linear(self.text_hidden_dims + self.audio_hidden_dims, self.num_classes, bias=False),
305 |             # nn.ReLU(),
306 |             nn.Softmax(dim=1),
307 |             # nn.Sigmoid()
308 |         )
309 |         
310 |     def attention_net_with_w(self, lstm_out, lstm_hidden):
311 |         '''
312 |         :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
313 |         :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
314 |         :return: [batch_size, n_hidden]
315 |         '''
316 |         lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
317 |         # h [batch_size, time_step, hidden_dims]
318 |         h = lstm_tmp_out[0] + lstm_tmp_out[1]
319 |         # [batch_size, num_layers * num_directions, n_hidden]
320 |         lstm_hidden = torch.sum(lstm_hidden, dim=1)
321 |         # [batch_size, 1, n_hidden]
322 |         lstm_hidden = lstm_hidden.unsqueeze(1)
323 |         # atten_w [batch_size, 1, hidden_dims]
324 |         atten_w = self.attention_layer(lstm_hidden)
325 |         # m [batch_size, time_step, hidden_dims]
326 |         m = nn.Tanh()(h)
327 |         # atten_context [batch_size, 1, time_step]
328 |         atten_context = torch.bmm(atten_w, m.transpose(1, 2))
329 |         # softmax_w [batch_size, 1, time_step]
330 |         softmax_w = F.softmax(atten_context, dim=-1)
331 |         # context [batch_size, 1, hidden_dims]
332 |         context = torch.bmm(softmax_w, h)
333 |         result = context.squeeze(1)
334 |         return result
335 |     
336 |     def pretrained_feature(self, x):
337 |         with torch.no_grad():
338 |             x_text = []
339 |             x_audio = []
340 |             for ele in x:
341 |                 x_text.append(ele[1])
342 |                 x_audio.append(ele[0])
343 |             x_text, x_audio = Variable(torch.tensor(x_text).type(torch.FloatTensor), requires_grad=False), Variable(torch.tensor(x_audio).type(torch.FloatTensor), requires_grad=False)
344 |             # ============================= TextBiLSTM =================================
345 |             # x : [len_seq, batch_size, embedding_dim]
346 |             x_text = x_text.permute(1, 0, 2)
347 |             output, (final_hidden_state, _) = self.lstm_net(x_text)
348 |             # output : [batch_size, len_seq, n_hidden * 2]
349 |             output = output.permute(1, 0, 2)
350 |             # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]
351 |             final_hidden_state = final_hidden_state.permute(1, 0, 2)
352 |             # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)
353 |             # atten_out = self.attention_net(output, final_hidden_state)
354 |             atten_out = self.attention_net_with_w(output, final_hidden_state)
355 |             text_feature = self.fc_out(atten_out)
356 | 
357 |             # ============================= TextBiLSTM =================================
358 | 
359 |             # ============================= AudioBiLSTM =============================
360 |             x_audio = self.ln(x_audio)
361 |             x_audio, _ = self.lstm_net_audio(x_audio)
362 |             x_audio = x_audio.sum(dim=1)
363 |             audio_feature = self.fc_audio(x_audio)
364 | 
365 |         # ============================= AudioBiLSTM =============================
366 |         return (text_feature, audio_feature)
367 |         
368 |     def forward(self, x): 
369 |         # x = self.bn(x)
370 |         # modal_weights = torch.softmax(self.modal_attn(x), dim=1)
371 |         # modal_weights = self.modal_attn(x)
372 |         # x = (modal_weights * x)
373 |         output = self.fc_final(x)
374 |         return output
375 |     
376 | class MyLoss(nn.Module):
377 |     def __init__(self):
378 |         super(MyLoss, self).__init__()
379 |         
380 |     def forward(self, text_feature, audio_feature, target, model):
381 |         weight = model.fc_final[0].weight
382 |         # bias = model.fc_final[0].bias
383 |         # print(weight, bias)
384 |         pred_text = F.linear(text_feature, weight[:, :config['text_hidden_dims']])
385 |         pred_audio = F.linear(audio_feature, weight[:, config['text_hidden_dims']:])
386 |         l = nn.CrossEntropyLoss()
387 |         target = torch.tensor(target)
388 |         # l = nn.BCEWithLogitsLoss()
389 |         # target = F.one_hot(target, num_classes=2).type(torch.FloatTensor)
390 |         # print('y: {}\npred_audio: {}\npred_text: {}\n'.format(target, pred_audio.data.max(1, keepdim=True)[1], pred_text.data.max(1, keepdim=True)[1]))
391 |         # return l(pred_text, target) + l(pred_audio, target) + \
392 |         #         config['lambda']*torch.norm(weight[:, :config['text_hidden_dims']]) + \
393 |         #         config['lambda']*torch.norm(weight[:, config['text_hidden_dims']:])  
394 |         # a = F.softmax(pred_text, dim=1) + F.softmax(pred_audio, dim=1)
395 |         return l(pred_text, target) + l(pred_audio, target)
396 |     
397 | 
398 | config = {
399 |     'num_classes': 2,
400 |     'dropout': 0.3,
401 |     'rnn_layers': 2,
402 |     'audio_embed_size': 256,
403 |     'text_embed_size': 1024,
404 |     'batch_size': 2,
405 |     'epochs': 100,
406 |     'learning_rate': 8e-6,
407 |     'audio_hidden_dims': 256,
408 |     'text_hidden_dims': 128,
409 |     'cuda': False,
410 |     'lambda': 1e-5,
411 | }
412 | 
413 | model = fusion_net(config['text_embed_size'], config['text_hidden_dims'], config['rnn_layers'], \
414 |     config['dropout'], config['num_classes'], config['audio_hidden_dims'], config['audio_embed_size'])
415 | 
416 | optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
417 | # optimizer = optim.Adam(model.parameters())
418 | # criterion = nn.CrossEntropyLoss()
419 | criterion = MyLoss()
420 | 
421 | def train(epoch, train_idxs):
422 |     global max_train_acc, train_acc
423 |     model.train()
424 |     batch_idx = 1
425 |     total_loss = 0
426 |     correct = 0
427 |     X_train = []
428 |     Y_train = []
429 |     for idx in train_idxs:
430 |         X_train.append(fuse_features[idx])
431 |         Y_train.append(fuse_targets[idx])
432 |     for i in range(0, len(X_train), config['batch_size']):
433 |         if i + config['batch_size'] > len(X_train):
434 |             x, y = X_train[i:], Y_train[i:]
435 |         else:
436 |             x, y = X_train[i:(i+config['batch_size'])], Y_train[i:(i+config['batch_size'])]
437 |         if config['cuda']:
438 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()
439 |         # 将模型的参数梯度设置为0
440 |         optimizer.zero_grad()
441 |         text_feature, audio_feature = model.pretrained_feature(x)
442 |         # text_feature = torch.from_numpy(ss.fit_transform(text_feature.numpy()))
443 |         # audio_feature = torch.from_numpy(ss.fit_transform(audio_feature.numpy()))
444 |         # concat_x = torch.cat((audio_feature, text_feature), dim=1)
445 |         concat_x = torch.cat((text_feature, audio_feature), dim=1)
446 |         # dot_x = text_feature.mul(audio_feature)
447 |         # add_x = text_feature.add(audio_feature)
448 |         output = model(concat_x)
449 |         pred = output.data.max(1, keepdim=True)[1]
450 |         correct += pred.eq(torch.tensor(y).data.view_as(pred)).cpu().sum()
451 |         # loss = criterion(output, torch.tensor(y))
452 |         loss = criterion(text_feature, audio_feature, y, model)
453 |         # 后向传播调整参数
454 |         loss.backward()
455 |         # 根据梯度更新网络参数
456 |         optimizer.step()
457 |         batch_idx += 1
458 |         # loss.item()能够得到张量中的元素值
459 |         total_loss += loss.item()
460 |     cur_loss = total_loss
461 |     max_train_acc = correct
462 |     train_acc = correct
463 |     print('Train Epoch: {:2d}\t Learning rate: {:.4f}\tLoss: {:.6f}\t Accuracy: {}/{} ({:.0f}%)\n '.format(
464 |                 epoch, config['learning_rate'], cur_loss/len(X_train), correct, len(X_train),
465 |         100. * correct / len(X_train)))
466 | 
467 | 
468 | def evaluate(model, test_idxs, fold, train_idxs):
469 |     model.eval()
470 |     batch_idx = 1
471 |     total_loss = 0
472 |     pred = torch.empty(config['batch_size'], 1).type(torch.LongTensor)
473 |     X_test = []
474 |     Y_test = []
475 |     for idx in test_idxs:
476 |         X_test.append(fuse_features[idx])
477 |         Y_test.append(fuse_targets[idx])
478 |     global max_train_acc, max_acc,max_f1
479 |     for i in range(0, len(X_test), config['batch_size']):
480 |         if i + config['batch_size'] > len(X_test):
481 |             x, y = X_test[i:], Y_test[i:]
482 |         else:
483 |             x, y = X_test[i:(i+config['batch_size'])], Y_test[i:(i+config['batch_size'])]
484 |         if config['cuda']:
485 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()
486 |         text_feature, audio_feature = model.pretrained_feature(x)
487 |         with torch.no_grad():
488 |             # concat_x = torch.cat((audio_feature, text_feature), dim=1)
489 |             audio_feature_norm = (audio_feature - audio_feature.mean())/audio_feature.std()
490 |             text_feature_norm = (text_feature - text_feature.mean())/text_feature.std()
491 |             concat_x = torch.cat((text_feature, audio_feature), dim=1)
492 |             output = model(concat_x)
493 |         # loss = criterion(output, torch.tensor(y))
494 |         loss = criterion(text_feature, audio_feature, y, model)
495 |         pred = torch.cat((pred, output.data.max(1, keepdim=True)[1]))
496 |         total_loss += loss.item()
497 |         
498 |     y_test_pred, conf_matrix = model_performance(Y_test, pred[config['batch_size']:])
499 |     
500 |     print('\nTest set: Average loss: {:.4f}'.format(total_loss/len(X_test)))
501 |     # custom evaluation metrics
502 |     print('Calculating additional test metrics...')
503 |     accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)
504 |     precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])
505 |     recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])
506 |     f1_score = 2 * (precision * recall) / (precision + recall)
507 |     print("Accuracy: {}".format(accuracy))
508 |     print("Precision: {}".format(precision))
509 |     print("Recall: {}".format(recall))
510 |     print("F1-Score: {}\n".format(f1_score))
511 |     print('='*89)
512 |     
513 |     if max_f1 < f1_score and max_train_acc >= len(train_idxs)*0.9 and f1_score > 0.61:
514 |         max_f1 = f1_score
515 |         max_acc = accuracy
516 |         save(model, os.path.join(prefix, 'Model/ClassificationWhole/Fuse/fuse_{:.2f}_{}'.format(max_f1, fold)))
517 |         print('*'*64)
518 |         print('model saved: f1: {}\tacc: {}'.format(max_f1, max_acc))
519 |         print('*'*64)
520 |     return total_loss
521 | 
522 | if __name__ == '__main__':
523 |     idxs_paths = ['train_idxs_0.63_1.npy', 'train_idxs_0.65_2.npy', 'train_idxs_0.60_3.npy']
524 |     text_model_paths = ['BiLSTM_128_0.64_1.pt', 'BiLSTM_128_0.66_2.pt', 'BiLSTM_128_0.62_3.pt']
525 |     audio_model_paths = ['BiLSTM_gru_vlad256_256_0.67_1.pt', 'BiLSTM_gru_vlad256_256_0.67_2.pt', 'BiLSTM_gru_vlad256_256_0.63_3.pt']
526 |     for fold in range(1, 4):
527 |         # if fold != 2:
528 |         #     continue
529 |         train_idxs_tmp = np.load(os.path.join(prefix, 'Features/TextWhole/{}'.format(idxs_paths[fold-1])), allow_pickle=True)
530 |         test_idxs_tmp = list(set(list(fuse_dep_idxs)+list(fuse_non_idxs)) - set(train_idxs_tmp))
531 |         resample_idxs = list(range(6))
532 | 
533 |         train_idxs, test_idxs = [], []
534 |         # depression data augmentation
535 |         for idx in train_idxs_tmp:
536 |             if idx in fuse_dep_idxs:
537 |                 feat = fuse_features[idx]
538 |                 audio_perm = itertools.permutations(feat[0], 3)
539 |                 text_perm = itertools.permutations(feat[1], 3)
540 |                 count = 0
541 |                 for fuse_perm in zip(audio_perm, text_perm):
542 |                     if count in resample_idxs:
543 |                         fuse_features.append(fuse_perm)
544 |                         fuse_targets = np.hstack((fuse_targets, 1))
545 |                         train_idxs.append(len(fuse_features)-1)
546 |                     count += 1
547 |             else:
548 |                 train_idxs.append(idx)
549 | 
550 |         for idx in test_idxs_tmp:
551 |             if idx in fuse_dep_idxs:
552 |                 feat = fuse_features[idx]
553 |                 audio_perm = itertools.permutations(feat[0], 3)
554 |                 text_perm = itertools.permutations(feat[1], 3)
555 |                 count = 0
556 |                 resample_idxs = [0,1,4,5]
557 |                 for fuse_perm in zip(audio_perm, text_perm):
558 |                     if count in resample_idxs:
559 |                         fuse_features.append(fuse_perm)
560 |                         fuse_targets = np.hstack((fuse_targets, 1))
561 |                         test_idxs.append(len(fuse_features)-1)
562 |                     count += 1
563 |             else:
564 |                 test_idxs.append(idx)
565 | 
566 |         text_lstm_model = torch.load(os.path.join(prefix, 'Model/ClassificationWhole/Text/{}'.format(text_model_paths[fold-1])))
567 |         audio_lstm_model = torch.load(os.path.join(prefix, 'Model/ClassificationWhole/Audio/{}'.format(audio_model_paths[fold-1])))
568 |         model_state_dict = {}
569 |         model_state_dict['lstm_net_audio.weight_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l0']
570 |         model_state_dict['lstm_net_audio.weight_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l0']
571 |         model_state_dict['lstm_net_audio.bias_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l0']
572 |         model_state_dict['lstm_net_audio.bias_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l0']
573 | 
574 |         model_state_dict['lstm_net_audio.weight_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l1']
575 |         model_state_dict['lstm_net_audio.weight_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l1']
576 |         model_state_dict['lstm_net_audio.bias_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l1']
577 |         model_state_dict['lstm_net_audio.bias_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l1']
578 | 
579 |         model_state_dict['fc_audio.1.weight'] = audio_lstm_model.state_dict()['fc_audio.1.weight']
580 |         model_state_dict['fc_audio.1.bias'] = audio_lstm_model.state_dict()['fc_audio.1.bias']
581 |         model_state_dict['fc_audio.4.weight'] = audio_lstm_model.state_dict()['fc_audio.4.weight']
582 |         model_state_dict['fc_audio.4.bias'] = audio_lstm_model.state_dict()['fc_audio.4.bias']
583 | 
584 |         model_state_dict['ln.weight'] = audio_lstm_model.state_dict()['ln.weight']
585 |         model_state_dict['ln.bias'] = audio_lstm_model.state_dict()['ln.bias']
586 |         model.load_state_dict(text_lstm_model.state_dict(), strict=False)
587 |         # model.load_state_dict(audio_lstm_model.state_dict(), strict=False)
588 |         model.load_state_dict(model_state_dict, strict=False)
589 |             
590 |         for param in model.parameters():
591 |             param.requires_grad = False
592 | 
593 |         model.fc_final[0].weight.requires_grad = True
594 |         # model.fc_final[0].bias.requires_grad = True
595 |         # model.modal_attn.weight.requires_grad = True
596 | 
597 |         max_f1 = -1
598 |         max_acc = -1
599 |         max_train_acc = -1
600 | 
601 |         for ep in range(1, config['epochs']):
602 |             train(ep, train_idxs)
603 |             tloss = evaluate(model, test_idxs, fold, train_idxs)


--------------------------------------------------------------------------------
/DepressionCollected/Classification/text_bilstm_whole.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | from torch.nn import functional as F
  5 | import torch.optim as optim
  6 | from sklearn.metrics import confusion_matrix
  7 | from sklearn.metrics import mean_absolute_error, mean_squared_error
  8 | from sklearn.model_selection import train_test_split
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | import os
 13 | import pickle
 14 | import random
 15 | import itertools
 16 | 
 17 | prefix = os.path.abspath(os.path.join(os.getcwd(), "."))
 18 | text_features = np.load(os.path.join(prefix, 'Features/TextWhole/whole_samples_clf_avg.npz'))['arr_0']
 19 | text_targets = np.load(os.path.join(prefix, 'Features/TextWhole/whole_labels_clf_avg.npz'))['arr_0']
 20 | text_dep_idxs_tmp = np.where(text_targets == 1)[0]
 21 | text_non_idxs = np.where(text_targets == 0)[0]
 22 | 
 23 | class TextBiLSTM(nn.Module):
 24 |     def __init__(self, config):
 25 |         super(TextBiLSTM, self).__init__()
 26 |         self.num_classes = config['num_classes']
 27 |         self.learning_rate = config['learning_rate']
 28 |         self.dropout = config['dropout']
 29 |         self.hidden_dims = config['hidden_dims']
 30 |         self.rnn_layers = config['rnn_layers']
 31 |         self.embedding_size = config['embedding_size']
 32 |         self.bidirectional = config['bidirectional']
 33 | 
 34 |         self.build_model()
 35 |         self.init_weight()
 36 |         
 37 |     def init_weight(net):
 38 |         for name, param in net.named_parameters():
 39 |             if 'ln' not in name:
 40 |                 if 'bias' in name:
 41 |                     nn.init.constant_(param, 0.0)
 42 |                 elif 'weight' in name:
 43 |                     nn.init.xavier_uniform_(param)
 44 | 
 45 |     def build_model(self):
 46 |         # attention layer
 47 |         self.attention_layer = nn.Sequential(
 48 |             nn.Linear(self.hidden_dims, self.hidden_dims),
 49 |             nn.ReLU(inplace=True)
 50 |         )
 51 |         # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)
 52 | 
 53 |         # 双层lstm
 54 |         self.lstm_net = nn.LSTM(self.embedding_size, self.hidden_dims,
 55 |                                 num_layers=self.rnn_layers, dropout=self.dropout,
 56 |                                 bidirectional=self.bidirectional)
 57 |                 
 58 |         # FC层
 59 |         # self.fc_out = nn.Linear(self.hidden_dims, self.num_classes)
 60 |         self.fc_out = nn.Sequential(
 61 |             # nn.Dropout(self.dropout),
 62 |             nn.Linear(self.hidden_dims, self.hidden_dims),
 63 |             nn.ReLU(),
 64 |             nn.Dropout(self.dropout),
 65 |             nn.Linear(self.hidden_dims, self.num_classes),
 66 |             # nn.ReLU(),
 67 |             nn.Softmax(dim=1),
 68 |         )
 69 | 
 70 |         self.ln1 = nn.LayerNorm(self.embedding_size)
 71 |         self.ln2 = nn.LayerNorm(self.hidden_dims)
 72 | 
 73 | 
 74 |     def attention_net_with_w(self, lstm_out, lstm_hidden):
 75 |         '''
 76 |         :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
 77 |         :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
 78 |         :return: [batch_size, n_hidden]
 79 |         '''
 80 |         lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
 81 |         # h [batch_size, time_step, hidden_dims]
 82 |         h = lstm_tmp_out[0] + lstm_tmp_out[1]
 83 |         # h = lstm_out
 84 |         # [batch_size, num_layers * num_directions, n_hidden]
 85 |         lstm_hidden = torch.sum(lstm_hidden, dim=1)
 86 |         # [batch_size, 1, n_hidden]
 87 |         lstm_hidden = lstm_hidden.unsqueeze(1)
 88 |         # atten_w [batch_size, 1, hidden_dims]
 89 |         atten_w = self.attention_layer(lstm_hidden)
 90 |         # m [batch_size, time_step, hidden_dims]
 91 |         m = nn.Tanh()(h)
 92 |         # atten_context [batch_size, 1, time_step]
 93 |         atten_context = torch.bmm(atten_w, m.transpose(1, 2))
 94 |         # softmax_w [batch_size, 1, time_step]
 95 |         softmax_w = F.softmax(atten_context, dim=-1)
 96 |         # context [batch_size, 1, hidden_dims]
 97 |         context = torch.bmm(softmax_w, h)
 98 |         result = context.squeeze(1)
 99 |         return result
100 | 
101 |     def forward(self, x):
102 |         # x : [len_seq, batch_size, embedding_dim]
103 |         x = x.permute(1, 0, 2)
104 |         # x = self.ln1(x)
105 |         output, (final_hidden_state, _) = self.lstm_net(x)
106 |         # output : [batch_size, len_seq, n_hidden * 2]
107 |         output = output.permute(1, 0, 2)
108 |         # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]
109 |         final_hidden_state = final_hidden_state.permute(1, 0, 2)
110 |         # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)
111 |         # atten_out = self.attention_net(output, final_hidden_state)
112 |         atten_out = self.attention_net_with_w(output, final_hidden_state)
113 |         # atten_out = self.ln2(atten_out)
114 |         return self.fc_out(atten_out)
115 | 
116 | def save(model, filename):
117 |     save_filename = '{}.pt'.format(filename)
118 |     torch.save(model, save_filename)
119 |     print('Saved as %s' % save_filename)
120 |     
121 | def standard_confusion_matrix(y_test, y_test_pred):
122 |     """
123 |     Make confusion matrix with format:
124 |                   -----------
125 |                   | TP | FP |
126 |                   -----------
127 |                   | FN | TN |
128 |                   -----------
129 |     Parameters
130 |     ----------
131 |     y_true : ndarray - 1D
132 |     y_pred : ndarray - 1D
133 | 
134 |     Returns
135 |     -------
136 |     ndarray - 2D
137 |     """
138 |     [[tn, fp], [fn, tp]] = confusion_matrix(y_test, y_test_pred)
139 |     return np.array([[tp, fp], [fn, tn]])
140 | 
141 | def model_performance(y_test, y_test_pred_proba):
142 |     """
143 |     Evaluation metrics for network performance.
144 |     """
145 |     y_test_pred = y_test_pred_proba.data.max(1, keepdim=True)[1]
146 | 
147 |     # Computing confusion matrix for test dataset
148 |     conf_matrix = standard_confusion_matrix(y_test, y_test_pred)
149 |     print("Confusion Matrix:")
150 |     print(conf_matrix)
151 | 
152 |     return y_test_pred, conf_matrix
153 | 
154 | def train(epoch, train_idxs):
155 |     global lr, train_acc
156 |     model.train()
157 |     batch_idx = 1
158 |     total_loss = 0
159 |     correct = 0
160 |     X_train = text_features[train_idxs]
161 |     Y_train = text_targets[train_idxs]
162 |     for i in range(0, X_train.shape[0], config['batch_size']):
163 |         if i + config['batch_size'] > X_train.shape[0]:
164 |             x, y = X_train[i:], Y_train[i:]
165 |         else:
166 |             x, y = X_train[i:(i + config['batch_size'])], Y_train[i:(
167 |                 i + config['batch_size'])]
168 |         if config['cuda']:
169 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()
170 |         else:
171 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True), \
172 |                 Variable(torch.from_numpy(y))
173 | 
174 |         # 将模型的参数梯度设置为0
175 |         optimizer.zero_grad()
176 |         output = model(x)
177 |         pred = output.data.max(1, keepdim=True)[1]
178 |         #print(pred.shape, y.shape)
179 |         correct += pred.eq(y.data.view_as(pred)).cpu().sum()
180 |         loss = criterion(output, y)
181 |         # 后向传播调整参数
182 |         loss.backward()
183 |         # 根据梯度更新网络参数
184 |         optimizer.step()
185 |         batch_idx += 1
186 |         # loss.item()能够得到张量中的元素值
187 |         total_loss += loss.item()
188 | 
189 |     train_acc = correct
190 |     print(
191 |         'Train Epoch: {:2d}\t Learning rate: {:.4f}\tLoss: {:.6f}\t Accuracy: {}/{} ({:.0f}%)\n '
192 |         .format(epoch + 1, config['learning_rate'], total_loss, correct,
193 |                 X_train.shape[0], 100. * correct / X_train.shape[0]))
194 | 
195 | 
196 | def evaluate(model, test_idxs, fold, train_idxs):
197 |     model.eval()
198 |     batch_idx = 1
199 |     total_loss = 0
200 |     global max_f1, max_acc, min_mae, X_test_lens, max_prec, max_rec
201 |     pred = np.array([])
202 |     with torch.no_grad():
203 |         if config['cuda']:
204 |             x, y = Variable(torch.from_numpy(text_features[test_idxs]).type(torch.FloatTensor), requires_grad=True).cuda(),\
205 |                 Variable(torch.from_numpy(text_targets[test_idxs])).cuda()
206 |         else:
207 |             x, y = Variable(torch.from_numpy(text_features[test_idxs]).type(torch.FloatTensor), requires_grad=True), \
208 |                 Variable(torch.from_numpy(text_targets[test_idxs])).type(torch.LongTensor)
209 | 
210 |         optimizer.zero_grad()
211 |         output = model(x)
212 |         loss = criterion(output, y)
213 |         total_loss += loss.item()
214 |         y_test_pred, conf_matrix = model_performance(y, output.cpu())
215 |         accuracy = float(conf_matrix[0][0] + conf_matrix[1][1]) / np.sum(conf_matrix)
216 |         precision = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[0][1])
217 |         recall = float(conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])
218 |         f1_score = 2 * (precision * recall) / (precision + recall)
219 |         print("Accuracy: {}".format(accuracy))
220 |         print("Precision: {}".format(precision))
221 |         print("Recall: {}".format(recall))
222 |         print("F1-Score: {}\n".format(f1_score))
223 |         print('=' * 89)
224 | 
225 |         if max_f1 <= f1_score and train_acc > len(train_idxs)*0.9 and f1_score > 0.5:
226 |             max_f1 = f1_score
227 |             max_acc = accuracy
228 |             max_rec = recall
229 |             max_prec = precision
230 |             save(model, os.path.join(prefix, 'Model/ClassificationWhole/Text/BiLSTM_{}_{:.2f}_{}'.format(config['hidden_dims'], max_f1, fold)))
231 |             print('*' * 64)
232 |             print('model saved: f1: {}\tacc: {}'.format(max_f1, max_acc))
233 |             print('*' * 64)
234 | 
235 |     return total_loss
236 | 
237 | def get_param_group(model):
238 |     nd_list = []
239 |     param_list = []
240 |     for name, param in model.named_parameters():
241 |         if 'ln' in name:
242 |             nd_list.append(param)
243 |         else:
244 |             param_list.append(param)
245 |     return [{'params': param_list, 'weight_decay': 1e-5}, {'params': nd_list, 'weight_decay': 0}]
246 | 
247 | config = {
248 |     'num_classes': 2,
249 |     'dropout': 0.5,
250 |     'rnn_layers': 2,
251 |     'embedding_size': 1024,
252 |     'batch_size': 4,
253 |     'epochs': 150,
254 |     'learning_rate': 1e-5,
255 |     'hidden_dims': 128,
256 |     'bidirectional': True,
257 |     'cuda': False,
258 | }
259 | 
260 | train_idxs_tmps = [np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.63_1.npy'), allow_pickle=True),
261 | np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_2.npy'), allow_pickle=True),
262 | np.load(os.path.join(prefix, 'Features/TextWhole/train_idxs_0.60_3.npy'), allow_pickle=True)]
263 | fold = 1
264 | 
265 | for idx_idx, train_idxs_tmp in enumerate(train_idxs_tmps):
266 |     # if idx_idx != 2:
267 |     #     continue
268 |     test_idxs_tmp = list(set(list(text_dep_idxs_tmp)+list(text_non_idxs)) - set(train_idxs_tmp))
269 |     train_idxs, test_idxs = [], []
270 |     # depression data augmentation
271 |     for idx in train_idxs_tmp:
272 |         if idx in text_dep_idxs_tmp:
273 |             feat = text_features[idx]
274 |             count = 0
275 |             resample_idxs = [0,1,2,3,4,5]
276 |             for i in itertools.permutations(feat, feat.shape[0]):
277 |                 if count in resample_idxs:
278 |                     text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))
279 |                     text_targets = np.hstack((text_targets, 1))
280 |                     train_idxs.append(len(text_features)-1)
281 |                 count += 1
282 |         else:
283 |             train_idxs.append(idx)
284 | 
285 |     for idx in test_idxs_tmp:
286 |         if idx in text_dep_idxs_tmp:
287 |             feat = text_features[idx]
288 |             count = 0
289 |             # resample_idxs = random.sample(range(6), 4)
290 |             resample_idxs = [0,1,4,5]
291 |             for i in itertools.permutations(feat, feat.shape[0]):
292 |                 if count in resample_idxs:
293 |                     text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))
294 |                     text_targets = np.hstack((text_targets, 1))
295 |                     test_idxs.append(len(text_features)-1)
296 |                 count += 1
297 |         else:
298 |             test_idxs.append(idx)
299 | 
300 |     model = TextBiLSTM(config)
301 | 
302 |     param_group = get_param_group(model)
303 |     optimizer = optim.AdamW(param_group, lr=config['learning_rate'])
304 |     criterion = nn.CrossEntropyLoss()
305 |     max_f1 = -1
306 |     max_acc = -1
307 |     max_rec = -1
308 |     max_prec = -1
309 |     train_acc = -1
310 | 
311 |     for ep in range(1, config['epochs']):
312 |         train(ep, train_idxs)
313 |         tloss = evaluate(model, test_idxs, fold, train_idxs)
314 |     fold += 1


--------------------------------------------------------------------------------
/DepressionCollected/Classification/text_features_whole.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import wave
 4 | import librosa
 5 | import re
 6 | # from allennlp.commands.elmo import ElmoEmbedder
 7 | import os
 8 | prefix = os.path.abspath(os.path.join(os.getcwd(), "."))
 9 | from elmoformanylangs import Embedder
10 | import pkuseg
11 | import thulac
12 | # from pyhanlp import HanLP
13 | import jieba
14 | # seg = pkuseg.pkuseg()
15 | # thu1 = thulac.thulac(seg_only=True)
16 | elmo = Embedder('/Users/linlin/Desktop/SpeechRecognition/DepressionCode/ELMoForManyLangs/zhs.model')
17 | 
18 | topics = ['positive', 'neutral', 'negative']
19 | answers = {}
20 | text_features = []
21 | text_targets = []
22 | 
23 | def extract_features(text_features, text_targets, path):
24 |     for index in range(114):
25 |         if os.path.isdir(os.path.join(prefix, path, str(index+1))):
26 |             answers[index+1] = []
27 |             for topic in topics:
28 |                 with open(os.path.join(prefix, path, str(index+1), '%s.txt'%(topic)) ,'r') as f:
29 |                     lines = f.readlines()[0]
30 |                     # seg_text = seg.cut(lines) 
31 |                     # seg_text = thu1.cut(lines)
32 |                     # seg_text_iter = HanLP.segment(lines) 
33 |                     seg_text_iter = jieba.cut(lines, cut_all=False) 
34 |                     answers[index+1].append([item for item in seg_text_iter])
35 |                     # answers[dir].append(seg_text)
36 |             with open(os.path.join(prefix, '{1}/{0}/new_label.txt'.format(index+1, path))) as fli:
37 |                 target = float(fli.readline())
38 |             # text_targets.append(1 if target >= 53 else 0)
39 |             text_targets.append(target)
40 |             text_features.append([np.array(item).mean(axis=0) for item in elmo.sents2elmo(answers[index+1])])
41 | 
42 | extract_features(text_features, text_targets, 'Data')
43 | extract_features(text_features, text_targets, 'ValidationData')
44 | 
45 | print("Saving npz file locally...")
46 | np.savez(os.path.join(prefix, 'Features/TextWhole/whole_samples_reg_avg.npz'), text_features)
47 | np.savez(os.path.join(prefix, 'Features/TextWhole/whole_labels_reg_avg.npz'), text_targets)
48 |     


--------------------------------------------------------------------------------
/DepressionCollected/DAICFeatureExtarction/feature_extraction.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | sys.path.append('/Users/linlin/Desktop/DepressionCollected')
  4 | from Classification.audio_features_whole import wav2vlad
  5 | 
  6 | import numpy as np
  7 | import pandas as pd
  8 | import wave
  9 | 
 10 | prefix = os.getcwd()
 11 | train_split_df = pd.read_csv(os.path.join(prefix, 'DAIC/train_split_Depression_AVEC2017.csv'))
 12 | test_split_df = pd.read_csv(os.path.join(prefix, 'DAIC/dev_split_Depression_AVEC2017.csv'))
 13 | train_split_num = train_split_df[['Participant_ID']]['Participant_ID'].tolist()
 14 | test_split_num = test_split_df[['Participant_ID']]['Participant_ID'].tolist()
 15 | train_split_clabel = train_split_df[['PHQ8_Binary']]['PHQ8_Binary'].tolist()
 16 | test_split_clabel = test_split_df[['PHQ8_Binary']]['PHQ8_Binary'].tolist()
 17 | train_split_rlabel = train_split_df[['PHQ8_Score']]['PHQ8_Score'].tolist()
 18 | test_split_rlabel = test_split_df[['PHQ8_Score']]['PHQ8_Score'].tolist()
 19 | 
 20 | with open('./queries.txt') as f:
 21 |     queries = f.readlines()
 22 | 
 23 | def identify_topics(sentence):
 24 |     for query in queries:
 25 |         query = query.strip('\n')
 26 |         sentence = sentence.strip('\n')
 27 |         if query == sentence:
 28 |             return True
 29 |     return False
 30 | 
 31 | def extract_features(number):
 32 |     transcript = pd.read_csv(os.path.join(prefix, 'DAIC/{0}_P/{0}_TRANSCRIPT.csv'.format(number)), sep='\t').fillna('')
 33 |     
 34 |     wavefile = wave.open(os.path.join(prefix, 'DAIC/{0}_P/{0}_AUDIO.wav'.format(number, 'r')))
 35 |     sr = wavefile.getframerate()
 36 |     nframes = wavefile.getnframes()
 37 |     wave_data = np.frombuffer(wavefile.readframes(nframes), dtype=np.short)
 38 |     
 39 |     response = ''
 40 |     start_time = 0
 41 |     stop_time = 0
 42 |     feats = []
 43 |     signal = []
 44 | 
 45 |     for t in transcript.itertuples():
 46 |         # 问题开始
 47 |         if getattr(t,'speaker') == 'Ellie' and (identify_topics(getattr(t,'value')) or 'i think i have asked everything' in getattr(t,'value')):
 48 |             # 初始化
 49 |             response = ''
 50 |             if len(signal) == 0:
 51 |                 continue
 52 |             feats.append(wav2vlad(signal, sr))
 53 |             signal = []
 54 |         elif getattr(t,'speaker') == 'Participant':
 55 |             if 'scrubbed_entry' in getattr(t,'value'):
 56 |                 continue
 57 |             start_time = int(getattr(t,'start_time')*sr)
 58 |             stop_time = int(getattr(t,'stop_time')*sr)
 59 |             response += (' ' + getattr(t,'value'))
 60 |             signal = np.hstack((signal, wave_data[start_time:stop_time].astype(np.float)))
 61 |     
 62 |     print(np.shape(feats))
 63 |     print('{}_P feature done'.format(number))
 64 |     return feats
 65 |     
 66 | # training set
 67 | audio_features_train = []
 68 | audio_ctargets_train = []
 69 | audio_rtargets_train = []
 70 | 
 71 | # test set
 72 | audio_features_test = []
 73 | audio_ctargets_test = []
 74 | audio_rtargets_test = []
 75 | 
 76 | # training set
 77 | for index in range(len(train_split_num)):
 78 |     feat = extract_features(train_split_num[index])
 79 |     audio_features_train.append(feat)
 80 |     audio_ctargets_train.append(train_split_clabel[index])
 81 |     audio_rtargets_train.append(train_split_rlabel[index])
 82 |     
 83 | print("Saving npz file locally...")
 84 | np.savez(os.path.join(prefix, 'DAICCode/Features/train_samples_clf.npz'), audio_features_train)
 85 | np.savez(os.path.join(prefix, 'DAICCode/Features/train_samples_reg.npz'), audio_features_train)
 86 | np.savez(os.path.join(prefix, 'DAICCode/Features/train_labels_clf.npz'), audio_ctargets_train)
 87 | np.savez(os.path.join(prefix, 'DAICCode/Features/train_labels_reg.npz'), audio_rtargets_train)
 88 | 
 89 | # test set
 90 | for index in range(len(test_split_num)):
 91 |     feat = extract_features(test_split_num[index])
 92 |     audio_features_test.append(feat)
 93 |     audio_ctargets_test.append(test_split_clabel[index])
 94 |     audio_rtargets_test.append(test_split_rlabel[index])
 95 | 
 96 | print("Saving npz file locally...")
 97 | np.savez(os.path.join(prefix, 'DAICCode/Features/test_samples_clf.npz'), audio_features_test)
 98 | np.savez(os.path.join(prefix, 'DAICCode/Features/test_samples_reg.npz'), audio_features_test)
 99 | np.savez(os.path.join(prefix, 'DAICCode/Features/test_labels_clf.npz'), audio_ctargets_test)
100 | np.savez(os.path.join(prefix, 'DAICCode/Features/test_labels_reg.npz'), audio_rtargets_test)
101 | 


--------------------------------------------------------------------------------
/DepressionCollected/DAICFeatureExtarction/queries.txt:
--------------------------------------------------------------------------------
  1 | how are you doing today
  2 | where are you from originally
  3 | why'd you move to l_a
  4 | how do you like l_a
  5 | what are some things you really like about l_a
  6 | how easy was it for you to get used to living in l_a
  7 | what are some things you don't really like about l_a
  8 | what'd you study at school
  9 | are you still doing that
 10 | what's your dream job
 11 | do you travel a lot
 12 | why
 13 | how often do you go back to your hometown
 14 | do you consider yourself an introvert
 15 | what do you do to relax
 16 | how are you at controlling your temper
 17 | when was the last time you argued with someone and what was it about
 18 | how did you feel in that moment
 19 | tell me more about that
 20 | how close are you to them
 21 | how do you know them
 22 | what are some things you like to do for fun
 23 | who's someone that's been a positive influence in your life
 24 | can you tell me about that
 25 | how close are you to your family
 26 | is there anything you regret
 27 | what made you decide to do that
 28 | could you have done anything to avoid it
 29 | what's one of your most memorable experiences
 30 | what's it like for you living with them
 31 | how do you like your living situation
 32 | do you have roommates
 33 | how easy is it for you to get a good night's sleep
 34 | do you feel that way often
 35 | what are you like when you don't sleep well
 36 | do you feel down
 37 | have you been diagnosed with depression
 38 | have you ever been diagnosed with p_t_s_d
 39 | have you ever served in the military
 40 | when was the last time you felt really happy
 41 | what do you think of today's kids
 42 | can you give me an example of that
 43 | what do you do when you're annoyed
 44 | when was the last time that happened
 45 | how would your best friend describe you
 46 | where do you live
 47 | how hard is that
 48 | what do you do now
 49 | are you happy you did that
 50 | what are some things that make you really mad
 51 | what do you do to relax
 52 | like what
 53 | are you still working in that
 54 | <laughter> can you give me an example of that
 55 | do you feel down 
 56 | like what 
 57 | how do you cope with them
 58 | have you noticed any changes in your behavior or thoughts lately
 59 | do you have disturbing thoughts
 60 | how easy is it for you to get a good night sleep
 61 | what do you enjoy about traveling
 62 | i'd love to hear about one of your trips
 63 | what advice would you give yourself ten or twenty years ago     
 64 | what are some things you really like about l_a 
 65 | how are you at controlling your temper 
 66 | has that gotten you in trouble
 67 | do you find it easy to be a parent
 68 | what's the hardest thing about being a parent
 69 | tell me about your kids
 70 | what's one of your most memorable experiences 
 71 | how did you feel in that moment 
 72 | have you ever served in the military 
 73 | have you been diagnosed with depression 
 74 | how would you best friend describe you 
 75 | what'd you study at school 
 76 | nice are you still doing that
 77 | what are some things that make you really mad 
 78 | could you have done anything to avoid it 
 79 | could you say a little more about that 
 80 | when was the last time you argued with someone and what was it about 
 81 | <laughter> do you travel a lot
 82 | when was the last time that happened 
 83 | have you ever been diagnosed with p_t_s_d 
 84 | how would your best friend describe you 
 85 | when was the last time you felt really happy
 86 | how did you decide to do that
 87 | okay could you have done anything to avoid it
 88 | do you feel like therapy is useful
 89 | did you think you had a problem before you found out
 90 | how has seeing a therapist affected you
 91 | what sort of changes have you noticed since you've been going to therapy
 92 | why did you stop
 93 | who's someone that's been a positive influence in your life 
 94 | when did you move to l_a
 95 | how often do you go back to your home town
 96 | what got you to seek help
 97 | what were your symptoms
 98 | yeah what do you enjoy about traveling
 99 | okay what's the best thing about being a parent
100 | when was the last time you argued with someone and what was it about
101 | <laughter> could you say a little more about that
102 | how long ago were you diagnosed
103 | so how are you doing today
104 | could you say a little more about that
105 | do you still go to therapy now
106 | do you feel like therapy's useful
107 | have you noticed any changes in your behavior or thoughts lately 
108 | tell me about that
109 | what would you say are some of your best qualities
110 | what are some things that usually put you in a good mood
111 | what are you most proud of in your life
112 | how does it compare to l_a
113 | tell me about something you did recently that you really enjoyed
114 | is going to a therapist helping you
115 | how have you been feeling lately
116 | are they triggered by something
117 | what's the best thing about being a parent
118 | why'd you decide to enlist in the military
119 | how old were you when you joined the military
120 | how did serving in the military change you
121 | what did you do after the military
122 | when'd you move to l_a
123 | how has seeing a therapist affected you 
124 | who's someone that's been a positive influence in  your life
125 | what are some things you like to do for fun who's someone that's been a positive influence in your life 
126 | what was it about
127 | do you think that maybe you're being a little hard on yourself
128 | so how are you doing today 
129 | where are you from originally 
130 | how easy was it for you to get used to living in l_a 
131 | what are some things you don't really like about l_a 
132 | how often to you go back to your home town 
133 | why 
134 | how close are you to your family <asks do you travel a lot simultaneously>
135 | do you travel a lot 
136 | what do you enjoy about traveling 
137 | i'd love to hear about one of your trips 
138 | do you consider yourself an introvert  
139 | can you give me an example of that 
140 | what do you do when you're annoyed 
141 | what do you do to relax 
142 | what's your dream job 
143 | how long ago were you diagnosed 
144 | what got you to seek help 
145 | do you feel like therapy's useful 
146 | do you still go to therapy now 
147 | what sort of changes have you noticed since you've been going to therapy 
148 | how have you been feeling lately 
149 | tell me more about that 
150 | what would you say are some of your best qualities 
151 | what are some things that usually put you in a good mood  
152 | when was the last time you felt really happy 
153 | who's someone that's been a positive influence in your life 
154 | how do you know them 
155 | how close are you to them 
156 | what are you most proud of in your life 
157 | are you still doing that 
158 | do you consider yourself an introvert 
159 | do you feel that way often 
160 | how do you like your living situation 
161 | do you have roommates 
162 | how easy is it for you to get a good night's sleep 
163 | what are you like when you don't sleep well 
164 | what advice would you give yourself ten or twenty years ago 
165 | how close are you to your family 
166 | tell me about something you did recently that you really enjoyed 
167 | what are some things that usually put you in a good mood 
168 | why why
169 | what made you decide to go and see someone
170 | okay so how are you doing today
171 | why'd you move to l_a 
172 | how often do you go back to your hometown 
173 | how did you decide to do that 
174 | is there anything you regret 
175 | could you have done anything to avoid it  
176 | how easy is it for you to get a good night's sleep  
177 | do you find it easy to be a parent 
178 | what's the best thing about being a parent 
179 | what's the hardest thing about being a parent 
180 | and please feel free to tell me anything you answers are totally confidential
181 | and please feel free to tell me anything you're answers are totally confidential
182 | what made you decide to do that 
183 | what advice would you give yourself ten or twenty years ago  
184 | what do you think of today's kids 
185 | tell me about that 
186 | how hard is that 
187 | can you tell me about that 
188 | so how are you doing today  
189 | are you still working in that 
190 | what are some things you like to do for fun 
191 | that's good where are you from originally 
192 | when was the last time you argued with someone and what was it about  
193 | where do you live 
194 | did you think you had a problem before you found out 
195 | what were your symptoms 
196 | why did you stop 
197 | okay so how are you doing today  
198 | what do you do now 
199 | are you happy you did that 
200 | are they triggered by something 
201 | how do you cope with them 
202 | has that gotten you in trouble 
203 | what are you
204 | what are some things that make you really mad  
205 | how has seeing a therapist affected you  
206 | yeah how hard is that
207 | mhm what are some things you don't really like about l_a
208 | mhm how did you decide to do that 
209 | how close are you to your family do you find it easy to be a parent 
210 | that's good what do you think of today's kids 
211 | awesome how did you decide to do that 
212 | uh huh uh huh uh huh is there anything you regret is there anything you regret
213 | how old were you when you joined the military 
214 | did you ever see combat 
215 | how did serving in the military change you 
216 | what did you do after the military 
217 | how easy was it for you to go back to civilian life 
218 | is going to a therapist helping you   
219 | that's good where are you from originally
220 | tell me about your kids 
221 | yeah how hard is that 
222 | do you think that maybe you're being a little hard on yourself 
223 | do you consider yourself and introvert
224 | how often do you go back to your home town 
225 | how_doingV (so how are you doing today)
226 | where_originally (where are you from originally)
227 | like_about_LA (what are some things you really like about l_a)
228 | dont_like_LA (what are some things you don't really like about l_a)
229 | study (what did you study at school)
230 | still_doing_X (are you still doing that)
231 | change_directions (what made you decide to do that)
232 | happy_didthat (are you happy you did that)
233 | job_virtually (i love my job you could almost say it's virtually made for me what's your dream job)
234 | shyoutgoing (do you consider yourself more shy or outgoing)
235 | tell_about_that (can you tell me about that)
236 | relax_fishtank (sometimes when i'm feeling tense i turn on the fish tank screensaver hey i know it's not hawaii but it's the best i've got what do you do to relax)
237 | control_temper (how are you at controlling your temper)
238 | last_argument (when was the last time you argued with someone and what was it about)
239 | hard_decisionB (tell me about the hardest decision you've ever had to make)
240 | family_relationship (tell me about your relationship with your family)
241 | feelguilty (what's something you feel guilty about)
242 | give_example (can you give me an example of that)
243 | describe_felt (how did you feel in that moment)
244 | ptsd_diagnosed (have you ever been diagnosed with p_t_s_d)
245 | depression_diagnosed (have you been diagnosed with depression)
246 | easy_sleep (how easy is it for you to get a good night's sleep)
247 | feel_down (do you feel down)
248 | behavior_changes (have you noticed any changes in your behavior or thoughts lately)
249 | happy_lasttime (tell me about the last time you felt really happy)
250 | self_change (what are some things you wish you could change about yourself)
251 | symptoms_cope (how do you cope with them)
252 | regret (is there anything you regret)
253 | advice_back (what advice would you give to yourself ten or twenty years ago)
254 | Ellie17Dec2012_08 (what are you most proud of in your life)
255 | difficult (how hard is that)
256 | BF_describe (how would your best friend describe you)
257 | ideal_weekendC (tell me how you spend your ideal weekend)
258 | asked_everything (okay i think i have asked everything i need to)
259 | travel_shoes (i'm sure you can tell by my shoes i'm not much of a world explorer do you travel a lot)
260 | like_what (like what)
261 | travel_trips (i'd love to hear about one of your trips)
262 | still_working_on_X (are you still working in that)
263 | dream_job (what's your dream job)
264 | situation_handled (tell me about a situation that you wish you had handled differently)
265 | why_enlist (why'd you decide to enlist in the military)
266 | old (how old were you when you joined the military)
267 | combat (did you ever see combat)
268 | why2 (why)
269 | effectB (how did serving in the military change you)
270 | after (what did you do after the military)
271 | civilian_life (how easy was it for you to go back to civilian life)
272 | feel_lately (how have you been feeling lately)
273 | therapy_useful (do you feel like therapy is useful)
274 | why_seek_help (what got you to seek help)
275 | therapy_going (do you still go to therapy now)
276 | therapist_affect (how has seeing a therapist affected you)
277 | landed_trouble (has that gotten you in trouble)
278 | when_LA (when did you move to l_a)
279 | often_backB (how often do you go back to your hometown)
280 | compares_LA (how does it compare to l_a)
281 | why_LA (why did you move to l_a)
282 | adapted_LA (how easy was it for you to get used to living in l_a)
283 | hard_decision (how did you decide to do that)
284 | easy_parent (do you find it easy to be a parent)
285 | parent_hardest (what's the hardest thing about being a parent)
286 | parent_best (what's the best thing about being a parent)
287 | parent_differences (what are some ways that you're different as a parent than your parents)
288 | military (have you ever served in the military)
289 | too_hard (do you think that maybe you're being a little hard on yourself)
290 | Ellie17Dec2012_07 (what would you say are some of your best qualities)
291 | memorableB (what's one of your most memorable experiences)
292 | travel_changed (what do you enjoy about traveling)
293 | memory_erase (tell me about an event or something that you wish you could erase from your memory)
294 | bouts_symptoms (when was the last time that happened)
295 | argument_about (what was it about)
296 | avoid (could you have done anything to avoid it)
297 | trigger (are they triggered by something)
298 | sleep_affects (what are you like when you don't sleep well)
299 | when_diagnosed (how long ago were you diagnosed)
300 | therapy_changes (what sort of changes have you noticed since you've been going to therapy)
301 | feelbadly (tell me about a time when someone made you feel really badly about yourself)
302 | more (tell me more about that)
303 | disturbing_thoughts (do you have disturbing thoughts)
304 | Ellie17Dec2012_10 (tell me about something you did recently that you really enjoyed)
305 | Ellie17Dec2012_09 (what are some things that usually put you in a good mood)
306 | do_fun (what are some things you like to do for fun)
307 | influence_positive (who's someone that's been a positive influence in your life)
308 | how_close (how close are you to them)
309 | tell_me_about (tell me about that)
310 | suspect_problem (did you think you had a problem before you found out)
311 | symptoms_what (what were your symptoms)
312 | how_know (how do you know them)
313 | therapist_useful (is going to a therapist helping you)
314 | stop_going (why did you stop)
315 | mad_makeyou (what are some things that make you really mad)
316 | where_live (where do you live)
317 | roommates (do you have roommates)
318 | living_situation (how do you like your living situation)
319 | what_do_when_annoyed (what do you do when you are annoyed)
320 | elaborate (could you say a little more about that)
321 | family_roleB (how close are you to your family)
322 | todays_kids (what do you think of today's kids)
323 | tell_me_moreV2 (can you tell me more about that)
324 | kids_elaborate (tell me about your kids)


--------------------------------------------------------------------------------
/DepressionCollected/Regression/AudioModelChecking.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | from torch.nn import functional as F
  5 | import torch.optim as optim
  6 | from sklearn.metrics import confusion_matrix
  7 | from sklearn.metrics import mean_absolute_error, mean_squared_error
  8 | from sklearn.model_selection import train_test_split
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | import os
 13 | import pickle
 14 | import random
 15 | import itertools
 16 | 
 17 | 
 18 | prefix = os.path.abspath(os.path.join(os.getcwd(), "./"))
 19 | audio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_reg_256.npz'))['arr_0'], axis=2)
 20 | audio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_reg_256.npz'))['arr_0']
 21 | 
 22 | audio_dep_idxs = np.where(audio_targets >= 53)[0]
 23 | audio_non_idxs = np.where(audio_targets < 53)[0]
 24 | dep_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/dep_idxs.npy'), allow_pickle=True)
 25 | non_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/non_idxs.npy'), allow_pickle=True)
 26 | 
 27 | config = {
 28 |     'num_classes': 1,
 29 |     'dropout': 0.5,
 30 |     'rnn_layers': 2,
 31 |     'embedding_size': 256,
 32 |     'batch_size': 4,
 33 |     'epochs': 100,
 34 |     'learning_rate': 5e-5,
 35 |     'hidden_dims': 256,
 36 |     'bidirectional': False,
 37 |     'cuda': False
 38 | }
 39 | 
 40 | class AudioBiLSTM(nn.Module):
 41 |     def __init__(self, config):
 42 |         super(AudioBiLSTM, self).__init__()
 43 |         self.num_classes = config['num_classes']
 44 |         self.learning_rate = config['learning_rate']
 45 |         self.dropout = config['dropout']
 46 |         self.hidden_dims = config['hidden_dims']
 47 |         self.rnn_layers = config['rnn_layers']
 48 |         self.embedding_size = config['embedding_size']
 49 |         self.bidirectional = config['bidirectional']
 50 | 
 51 |         self.build_model()
 52 | 
 53 |     def init_weight(net):
 54 |         for name, param in net.named_parameters():
 55 |             if 'bias' in name:
 56 |                 nn.init.constant_(param, 0.0)
 57 |             elif 'weight' in name:
 58 |                 nn.init.xavier_uniform_(param)
 59 | 
 60 |     def build_model(self):
 61 |         # attention layer
 62 |         self.attention_layer = nn.Sequential(
 63 |             nn.Linear(self.hidden_dims, self.hidden_dims),
 64 |             nn.ReLU(inplace=True))
 65 |         # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)
 66 | 
 67 |         self.lstm_net_audio = nn.GRU(self.embedding_size,
 68 |                                 self.hidden_dims,
 69 |                                 num_layers=self.rnn_layers,
 70 |                                 dropout=self.dropout,
 71 |                                 bidirectional=self.bidirectional,
 72 |                                 batch_first=True)
 73 |         # self.lstm_net_audio = nn.GRU(self.embedding_size, self.hidden_dims,
 74 |         #                         num_layers=self.rnn_layers, dropout=self.dropout, batch_first=True)
 75 | 
 76 |         self.bn = nn.BatchNorm1d(3)
 77 | 
 78 |         # FC层
 79 |         self.fc_audio = nn.Sequential(
 80 |             nn.Dropout(self.dropout),
 81 |             nn.Linear(self.hidden_dims, self.hidden_dims),
 82 |             nn.ReLU(),
 83 |             nn.Dropout(self.dropout),
 84 |             nn.Linear(self.hidden_dims, self.num_classes),
 85 |             nn.ReLU(),
 86 |             # nn.Softmax(dim=1)
 87 |         )
 88 | 
 89 |     def attention_net_with_w(self, lstm_out, lstm_hidden):
 90 |         '''
 91 |         :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
 92 |         :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
 93 |         :return: [batch_size, n_hidden]
 94 |         '''
 95 |         lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
 96 |         # h [batch_size, time_step, hidden_dims]
 97 |         h = lstm_tmp_out[0] + lstm_tmp_out[1]
 98 |         #         h = lstm_out
 99 |         # [batch_size, num_layers * num_directions, n_hidden]
100 |         lstm_hidden = torch.sum(lstm_hidden, dim=1)
101 |         # [batch_size, 1, n_hidden]
102 |         lstm_hidden = lstm_hidden.unsqueeze(1)
103 |         # atten_w [batch_size, 1, hidden_dims]
104 |         atten_w = self.attention_layer(lstm_hidden)
105 |         # m [batch_size, time_step, hidden_dims]
106 |         m = nn.Tanh()(h)
107 |         # atten_context [batch_size, 1, time_step]
108 |        # print(atten_w.shape, m.transpose(1, 2).shape)
109 |         atten_context = torch.bmm(atten_w, m.transpose(1, 2))
110 |         # softmax_w [batch_size, 1, time_step]
111 |         softmax_w = F.softmax(atten_context, dim=-1)
112 |         # context [batch_size, 1, hidden_dims]
113 |         context = torch.bmm(softmax_w, h)
114 |         result = context.squeeze(1)
115 |         return result
116 | 
117 |     def forward(self, x):
118 |         x, _ = self.lstm_net_audio(x)
119 |         # x = self.bn(x)
120 |         x = x.sum(dim=1)
121 |         out = self.fc_audio(x)
122 |         return out
123 | 
124 | def save(model, filename):
125 |     save_filename = '{}.pt'.format(filename)
126 |     torch.save(model, save_filename)
127 |     print('Saved as %s' % save_filename)
128 |  
129 | def evaluate(fold, model):
130 |     model.eval()
131 |     batch_idx = 1
132 |     total_loss = 0
133 |     global min_mae, min_rmse, test_dep_idxs, test_non_idxs
134 |     pred = np.array([])
135 |     X_test = audio_features[list(test_dep_idxs)+list(test_non_idxs)]
136 |     Y_test = audio_targets[list(test_dep_idxs)+list(test_non_idxs)]
137 |     with torch.no_grad():
138 |         if config['cuda']:
139 |             x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True).cuda(),\
140 |                 Variable(torch.from_numpy(Y_test)).cuda()
141 |         else:
142 |             x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True), \
143 |                 Variable(torch.from_numpy(Y_test)).type(torch.FloatTensor)
144 | 
145 |         optimizer.zero_grad()
146 |         output = model(x)
147 |         loss = criterion(output, y.view_as(output))
148 |         total_loss += loss.item()
149 |         pred = output.flatten().detach().numpy()
150 | 
151 |         mae = mean_absolute_error(Y_test, pred)
152 |         rmse = np.sqrt(mean_squared_error(Y_test, pred))
153 | 
154 |         print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
155 |         print('='*89)
156 | fold = 2
157 | audio_lstm_model = torch.load(os.path.join(prefix, 'Model/Regression/Audio%d/gru_vlad256_256_8.25.pt'%(fold+1)))
158 | model = AudioBiLSTM(config)
159 | # model_state_dict = {}
160 | # model_state_dict['lstm_net_audio.weight_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l0']
161 | # model_state_dict['lstm_net_audio.weight_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l0']
162 | # model_state_dict['lstm_net_audio.bias_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l0']
163 | # model_state_dict['lstm_net_audio.bias_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l0']
164 | 
165 | # model_state_dict['lstm_net_audio.weight_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l1']
166 | # model_state_dict['lstm_net_audio.weight_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l1']
167 | # model_state_dict['lstm_net_audio.bias_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l1']
168 | # model_state_dict['lstm_net_audio.bias_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l1']
169 | 
170 | # model_state_dict['fc_audio.1.weight'] = audio_lstm_model.state_dict()['fc_audio.1.weight']
171 | # model_state_dict['fc_audio.1.bias'] = audio_lstm_model.state_dict()['fc_audio.1.bias']
172 | # model_state_dict['fc_audio.4.weight'] = audio_lstm_model.state_dict()['fc_audio.4.weight']
173 | # model_state_dict['fc_audio.4.bias'] = audio_lstm_model.state_dict()['fc_audio.4.bias']
174 | model_state_dict = audio_lstm_model.state_dict()
175 | model.load_state_dict(model_state_dict, strict=True)
176 | 
177 | test_dep_idxs_tmp = dep_idxs[fold*10:(fold+1)*10]
178 | test_non_idxs = non_idxs[fold*44:(fold+1)*44]
179 | train_dep_idxs_tmp = list(set(dep_idxs) - set(test_dep_idxs_tmp))
180 | train_non_idxs = list(set(non_idxs) - set(test_non_idxs))
181 | 
182 | # training data augmentation
183 | train_dep_idxs = []
184 | for (i, idx) in enumerate(train_dep_idxs_tmp):
185 |     feat = audio_features[idx]
186 |     if i < 14:
187 |         for i in itertools.permutations(feat, feat.shape[0]):
188 |             audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))
189 |             audio_targets = np.hstack((audio_targets, audio_targets[idx]))
190 |             train_dep_idxs.append(len(audio_features)-1)
191 |     else:
192 |         train_dep_idxs.append(idx)
193 | 
194 | # test data augmentation
195 | # test_dep_idxs = []
196 | # for idx in test_dep_idxs_tmp:
197 | #     feat = audio_features[idx]
198 | #     for i in itertools.permutations(feat, feat.shape[0]):
199 | #         audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))
200 | #         audio_targets = np.hstack((audio_targets, audio_targets[idx]))
201 | #         test_dep_idxs.append(len(audio_features)-1)
202 | test_dep_idxs = test_dep_idxs_tmp
203 | 
204 | optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
205 | criterion = nn.SmoothL1Loss()
206 | # criterion = FocalLoss(class_num=2)
207 | # evaluate(fold, model)
208 | evaluate(fold, model)
209 | 


--------------------------------------------------------------------------------
/DepressionCollected/Regression/audio_bilstm_perm.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | from torch.nn import functional as F
  5 | import torch.optim as optim
  6 | from sklearn.metrics import confusion_matrix
  7 | from sklearn.metrics import mean_absolute_error, mean_squared_error
  8 | from sklearn.model_selection import train_test_split
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | import os
 13 | import pickle
 14 | import random
 15 | import itertools
 16 | 
 17 | prefix = os.path.abspath(os.path.join(os.getcwd(), "./"))
 18 | audio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_reg_256.npz'))['arr_0'], axis=2)
 19 | audio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_reg_256.npz'))['arr_0']
 20 | 
 21 | # audio_dep_idxs = np.where(audio_targets >= 53)[0]
 22 | # audio_non_idxs = np.where(audio_targets < 53)[0]
 23 | # dep_orders = random.sample(range(len(audio_dep_idxs)), len(audio_dep_idxs))
 24 | # non_orders = random.sample(range(len(audio_non_idxs)), len(audio_non_idxs))
 25 | # dep_idxs = audio_dep_idxs[dep_orders]
 26 | # non_idxs = audio_non_idxs[non_orders]
 27 | # np.save(os.path.join(prefix, 'Features/AudioWhole/dep_idxs'), dep_idxs)
 28 | # np.save(os.path.join(prefix, 'Features/AudioWhole/non_idxs'), non_idxs)
 29 | dep_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/dep_idxs.npy'), allow_pickle=True)
 30 | non_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/non_idxs.npy'), allow_pickle=True)
 31 | 
 32 | config = {
 33 |     'num_classes': 1,
 34 |     'dropout': 0.5,
 35 |     'rnn_layers': 2,
 36 |     'embedding_size': 256,
 37 |     'batch_size': 2,
 38 |     'epochs': 120,
 39 |     'learning_rate': 1e-5,
 40 |     'hidden_dims': 256,
 41 |     'bidirectional': False,
 42 |     'cuda': False
 43 | }
 44 | 
 45 | class AudioBiLSTM(nn.Module):
 46 |     def __init__(self, config):
 47 |         super(AudioBiLSTM, self).__init__()
 48 |         self.num_classes = config['num_classes']
 49 |         self.learning_rate = config['learning_rate']
 50 |         self.dropout = config['dropout']
 51 |         self.hidden_dims = config['hidden_dims']
 52 |         self.rnn_layers = config['rnn_layers']
 53 |         self.embedding_size = config['embedding_size']
 54 |         self.bidirectional = config['bidirectional']
 55 | 
 56 |         self.build_model()
 57 | 
 58 |     def init_weight(net):
 59 |         for name, param in net.named_parameters():
 60 |             if 'bias' in name:
 61 |                 nn.init.constant_(param, 0.0)
 62 |             elif 'weight' in name:
 63 |                 nn.init.xavier_uniform_(param)
 64 | 
 65 |     def build_model(self):
 66 |         # attention layer
 67 |         self.attention_layer = nn.Sequential(
 68 |             nn.Linear(self.hidden_dims, self.hidden_dims),
 69 |             nn.ReLU(inplace=True))
 70 |         # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)
 71 | 
 72 |         self.lstm_net_audio = nn.GRU(self.embedding_size,
 73 |                                 self.hidden_dims,
 74 |                                 num_layers=self.rnn_layers,
 75 |                                 dropout=self.dropout,
 76 |                                 bidirectional=self.bidirectional,
 77 |                                 batch_first=True)
 78 |         # self.lstm_net_audio = nn.GRU(self.embedding_size, self.hidden_dims,
 79 |         #                         num_layers=self.rnn_layers, dropout=self.dropout, batch_first=True)
 80 | 
 81 |         self.bn = nn.BatchNorm1d(3)
 82 | 
 83 |         # FC层
 84 |         self.fc_audio = nn.Sequential(
 85 |             nn.Dropout(self.dropout),
 86 |             nn.Linear(self.hidden_dims, self.hidden_dims),
 87 |             nn.ReLU(),
 88 |             nn.Dropout(self.dropout),
 89 |             nn.Linear(self.hidden_dims, self.num_classes),
 90 |             nn.ReLU(),
 91 |             # nn.Softmax(dim=1)
 92 |         )
 93 | 
 94 |     def attention_net_with_w(self, lstm_out, lstm_hidden):
 95 |         '''
 96 |         :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
 97 |         :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
 98 |         :return: [batch_size, n_hidden]
 99 |         '''
100 |         lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
101 |         # h [batch_size, time_step, hidden_dims]
102 |         h = lstm_tmp_out[0] + lstm_tmp_out[1]
103 |         #         h = lstm_out
104 |         # [batch_size, num_layers * num_directions, n_hidden]
105 |         lstm_hidden = torch.sum(lstm_hidden, dim=1)
106 |         # [batch_size, 1, n_hidden]
107 |         lstm_hidden = lstm_hidden.unsqueeze(1)
108 |         # atten_w [batch_size, 1, hidden_dims]
109 |         atten_w = self.attention_layer(lstm_hidden)
110 |         # m [batch_size, time_step, hidden_dims]
111 |         m = nn.Tanh()(h)
112 |         # atten_context [batch_size, 1, time_step]
113 |        # print(atten_w.shape, m.transpose(1, 2).shape)
114 |         atten_context = torch.bmm(atten_w, m.transpose(1, 2))
115 |         # softmax_w [batch_size, 1, time_step]
116 |         softmax_w = F.softmax(atten_context, dim=-1)
117 |         # context [batch_size, 1, hidden_dims]
118 |         context = torch.bmm(softmax_w, h)
119 |         result = context.squeeze(1)
120 |         return result
121 | 
122 |     def forward(self, x):
123 |         x, _ = self.lstm_net_audio(x)
124 |         # x = self.bn(x)
125 |         x = x.sum(dim=1)
126 |         out = self.fc_audio(x)
127 |         return out
128 | 
129 | def save(model, filename):
130 |     save_filename = '{}.pt'.format(filename)
131 |     torch.save(model, save_filename)
132 |     print('Saved as %s' % save_filename)
133 |  
134 | def train(epoch):
135 |     global lr, train_acc
136 |     model.train()
137 |     batch_idx = 1      
138 |     total_loss = 0
139 |     correct = 0
140 |     pred = np.array([])
141 |     X_train = audio_features[train_dep_idxs+train_non_idxs]
142 |     Y_train = audio_targets[train_dep_idxs+train_non_idxs]
143 |     for i in range(0, X_train.shape[0], config['batch_size']):
144 |         if i + config['batch_size'] > X_train.shape[0]:
145 |             x, y = X_train[i:], Y_train[i:]
146 |         else:
147 |             x, y = X_train[i:(i + config['batch_size'])], Y_train[i:(
148 |                 i + config['batch_size'])]
149 |         if config['cuda']:
150 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()
151 |         else:
152 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True), \
153 |                 Variable(torch.from_numpy(y)).type(torch.FloatTensor)
154 | 
155 |         # 将模型的参数梯度设置为0
156 |         optimizer.zero_grad()
157 |         output = model(x)
158 |         loss = criterion(output, y.view_as(output))
159 |         # 后向传播调整参数
160 |         loss.backward()
161 |         # 根据梯度更新网络参数
162 |         optimizer.step()
163 |         batch_idx += 1
164 |         # loss.item()能够得到张量中的元素值
165 |         pred = np.hstack((pred, output.flatten().detach().numpy()))
166 |         total_loss += loss.item()
167 |     train_mae = mean_absolute_error(Y_train, pred)
168 | 
169 |     print('Train Epoch: {:2d}\t Learning rate: {:.4f}\t Loss: {:.4f}\t MAE: {:.4f}\t RMSE: {:.4f}\n '
170 |         .format(epoch + 1, config['learning_rate'], total_loss, train_mae, \
171 |             np.sqrt(mean_squared_error(Y_train, pred))))
172 |     return train_mae
173 | 
174 | 
175 | def evaluate(fold, model, train_mae):
176 |     model.eval()
177 |     batch_idx = 1
178 |     total_loss = 0
179 |     global min_mae, min_rmse, test_dep_idxs, test_non_idxs
180 |     pred = np.array([])
181 |     X_test = audio_features[list(test_dep_idxs)+list(test_non_idxs)]
182 |     Y_test = audio_targets[list(test_dep_idxs)+list(test_non_idxs)]
183 |     with torch.no_grad():
184 |         if config['cuda']:
185 |             x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True).cuda(),\
186 |                 Variable(torch.from_numpy(Y_test)).cuda()
187 |         else:
188 |             x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True), \
189 |                 Variable(torch.from_numpy(Y_test)).type(torch.FloatTensor)
190 | 
191 |         optimizer.zero_grad()
192 |         output = model(x)
193 |         loss = criterion(output, y.view_as(output))
194 |         total_loss += loss.item()
195 |         pred = output.flatten().detach().numpy()
196 | 
197 |         mae = mean_absolute_error(Y_test, pred)
198 |         rmse = np.sqrt(mean_squared_error(Y_test, pred))
199 | 
200 |         print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
201 |         print('='*89)
202 | 
203 |         if mae <= min_mae and mae < 8.5 and train_mae < 13:
204 |             min_mae = mae
205 |             min_rmse = rmse
206 |             mode = 'bi' if config['bidirectional'] else 'norm'
207 |             mode ='gru'
208 |             save(model, os.path.join(prefix, 'Model/Regression/Audio{}/{}_vlad{}_{}_{:.2f}'.format(fold+1,mode, config['embedding_size'], config['hidden_dims'], min_mae)))
209 |             print('*' * 64)
210 |             print('model saved: mae: {}\t rmse: {}'.format(min_mae, min_rmse))
211 |             print('*' * 64)
212 | 
213 |     return total_loss
214 | 
215 | for fold in range(3):
216 |     test_dep_idxs_tmp = dep_idxs[fold*10:(fold+1)*10]
217 |     test_non_idxs = non_idxs[fold*44:(fold+1)*44]
218 |     train_dep_idxs_tmp = list(set(dep_idxs) - set(test_dep_idxs_tmp))
219 |     train_non_idxs = list(set(non_idxs) - set(test_non_idxs))
220 | 
221 |     # training data augmentation
222 |     train_dep_idxs = []
223 |     for (i, idx) in enumerate(train_dep_idxs_tmp):
224 |         feat = audio_features[idx]
225 |         if i < 14:
226 |             for i in itertools.permutations(feat, feat.shape[0]):
227 |                 audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))
228 |                 audio_targets = np.hstack((audio_targets, audio_targets[idx]))
229 |                 train_dep_idxs.append(len(audio_features)-1)
230 |         else:
231 |             train_dep_idxs.append(idx)
232 | 
233 |     # test data augmentation
234 |     # test_dep_idxs = []
235 |     # for idx in test_dep_idxs_tmp:
236 |     #     feat = audio_features[idx]
237 |     #     for i in itertools.permutations(feat, feat.shape[0]):
238 |     #         audio_features = np.vstack((audio_features, np.expand_dims(list(i), 0)))
239 |     #         audio_targets = np.hstack((audio_targets, audio_targets[idx]))
240 |     #         test_dep_idxs.append(len(audio_features)-1)
241 |     test_dep_idxs = test_dep_idxs_tmp
242 | 
243 | 
244 |     model = AudioBiLSTM(config)
245 | 
246 |     if config['cuda']:
247 |         model = model.cuda()
248 | 
249 |     # optimizer = optim.Adam(model.parameters())
250 |     optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
251 |     criterion = nn.L1Loss()
252 |     # criterion = FocalLoss(class_num=2)
253 |     min_mae = 100
254 |     min_rmse = 100
255 |     train_mae = 100
256 | 
257 | 
258 |     for ep in range(1, config['epochs']):
259 |         train_mae = train(ep)
260 |         tloss = evaluate(fold, model, train_mae)
261 | 
262 | # ============== prep ==============
263 | # X_test = np.squeeze(np.load(os.path.join(prefix, 'Features/Audio/val_samples_reg_avid256.npz'))['arr_0'], axis=2)
264 | # Y_test = np.load(os.path.join(prefix, 'Features/Audio/val_labels_reg_avid256.npz'))['arr_0']
265 | # ============== prep ==============
266 | 
267 | 
268 | # ============== SVM ==============
269 | 
270 | # from sklearn.svm import SVR
271 | # from sklearn.model_selection import KFold
272 | 
273 | # X = audio_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
274 | # Y = audio_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
275 | # kf = KFold(n_splits=3)
276 | # regr = SVR(kernel='linear', gamma='auto')
277 | # maes, rmses = [], []
278 | # for train_index, test_index in kf.split(X):
279 | #     # X_train, X_test = X[train_index], X[test_index]
280 | #     # Y_train, Y_test = Y[train_index], Y[test_index]
281 | #     X_train, Y_train = X[train_index], Y[train_index]
282 | #     regr.fit([f.flatten() for f in X_train], Y_train)
283 | #     pred = regr.predict([f.flatten() for f in X_test])
284 | 
285 | #     mae = mean_absolute_error(Y_test, pred)
286 | #     rmse = np.sqrt(mean_squared_error(Y_test, pred))
287 | #     maes.append(mae)
288 | #     rmses.append(rmse)
289 | 
290 | #     print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
291 | #     print('='*89)
292 | #     # break
293 | 
294 | # print(np.mean(maes), np.mean(rmses))
295 | # ============== SVM ==============
296 | 
297 | # # ============== DT ==============
298 | # from sklearn.tree import DecisionTreeRegressor
299 | # from sklearn.model_selection import KFold
300 | 
301 | # X = audio_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
302 | # Y = audio_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
303 | # kf = KFold(n_splits=3)
304 | # regr = DecisionTreeRegressor(max_depth=100, random_state=0, criterion="mse")
305 | # maes, rmses = [], []
306 | # for train_index, test_index in kf.split(X):
307 | #     # X_train, X_test = X[train_index], X[test_index]
308 | #     # Y_train, Y_test = Y[train_index], Y[test_index]
309 | #     X_train, Y_train = X[train_index], Y[train_index]
310 | #     regr.fit([f.flatten() for f in X_train], Y_train)
311 | #     pred = regr.predict([f.flatten() for f in X_test])
312 | 
313 | #     mae = mean_absolute_error(Y_test, pred)
314 | #     rmse = np.sqrt(mean_squared_error(Y_test, pred))
315 | #     maes.append(mae)
316 | #     rmses.append(rmse)
317 | 
318 | #     print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
319 | #     print('='*89)
320 | 
321 | # print(np.mean(maes), np.mean(rmses))
322 | # # ============== DT ==============
323 | 
324 | # # ============== RF ==============
325 | # from sklearn.ensemble import RandomForestRegressor
326 | # from sklearn.model_selection import KFold
327 | 
328 | # X = audio_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
329 | # Y = audio_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
330 | # kf = KFold(n_splits=3)
331 | # regr = RandomForestRegressor(max_depth=100, random_state=0, criterion="mse")
332 | # maes, rmses = [], []
333 | # for train_index, test_index in kf.split(X):
334 | #     # X_train, X_test = X[train_index], X[test_index]
335 | #     # Y_train, Y_test = Y[train_index], Y[test_index]
336 | #     X_train, Y_train = X[train_index], Y[train_index]
337 | #     regr.fit([f.flatten() for f in X_train], Y_train)
338 | #     pred = regr.predict([f.flatten() for f in X_test])
339 | 
340 | #     mae = mean_absolute_error(Y_test, pred)
341 | #     rmse = np.sqrt(mean_squared_error(Y_test, pred))
342 | #     maes.append(mae)
343 | #     rmses.append(rmse)
344 | 
345 | #     print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
346 | #     print('='*89)
347 | 
348 | # print(np.mean(maes), np.mean(rmses))
349 | # # ============== RF ==============
350 | 
351 | # ============== ada ==============
352 | # from sklearn.ensemble import AdaBoostRegressor
353 | # from sklearn.model_selection import KFold
354 | 
355 | # X = audio_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
356 | # Y = audio_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
357 | # kf = KFold(n_splits=3)
358 | # regr = AdaBoostRegressor(n_estimators=50)
359 | # maes, rmses = [], []
360 | # for train_index, test_index in kf.split(X):
361 | #     # X_train, X_test = X[train_index], X[test_index]
362 | #     # Y_train, Y_test = Y[train_index], Y[test_index]
363 | #     X_train, Y_train = X[train_index], Y[train_index]
364 | #     regr.fit([f.flatten() for f in X_train], Y_train)
365 | #     pred = regr.predict([f.flatten() for f in X_test])
366 | 
367 | #     mae = mean_absolute_error(Y_test, pred)
368 | #     rmse = np.sqrt(mean_squared_error(Y_test, pred))
369 | #     maes.append(mae)
370 | #     rmses.append(rmse)
371 | 
372 | #     print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
373 | #     print('='*89)
374 | 
375 | # print(np.mean(maes), np.mean(rmses))
376 | # ============== ada ==============
377 | 


--------------------------------------------------------------------------------
/DepressionCollected/Regression/fuse_net.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.autograd import Variable
  5 | from torch.nn import functional as F
  6 | import torch.optim as optim
  7 | from sklearn.metrics import mean_absolute_error, mean_squared_error
  8 | import numpy as np
  9 | import pandas as pd
 10 | import wave
 11 | import librosa
 12 | from python_speech_features import *
 13 | import re
 14 | from allennlp.commands.elmo import ElmoEmbedder
 15 | import os
 16 | import tensorflow.compat.v1 as tf
 17 | import itertools
 18 | 
 19 | prefix = os.path.abspath(os.path.join(os.getcwd(), "./"))
 20 | 
 21 | text_features = np.load(os.path.join(prefix, 'Features/TextWhole/whole_samples_reg_avg.npz'))['arr_0']
 22 | text_targets = np.load(os.path.join(prefix, 'Features/TextWhole/whole_labels_reg_avg.npz'))['arr_0']
 23 | audio_features = np.squeeze(np.load(os.path.join(prefix, 'Features/AudioWhole/whole_samples_reg_256.npz'))['arr_0'], axis=2)
 24 | audio_targets = np.load(os.path.join(prefix, 'Features/AudioWhole/whole_labels_reg_256.npz'))['arr_0']
 25 | fuse_features = [[audio_features[i], text_features[i]] for i in range(text_features.shape[0])]
 26 | fuse_targets = text_targets
 27 | 
 28 | fuse_dep_idxs = np.where(text_targets >= 53)[0]
 29 | fuse_non_idxs = np.where(text_targets < 53)[0]
 30 | dep_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/dep_idxs.npy'), allow_pickle=True)
 31 | non_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/non_idxs.npy'), allow_pickle=True)
 32 | 
 33 | text_model_paths = ['Model/Regression/Text1/BiLSTM_128_7.75.pt', 'Model/Regression/Text2/BiLSTM_128_8.46.pt', 'Model/Regression/Text3/BiLSTM_128_8.01.pt']
 34 | audio_model_paths = ['Model/Regression/Audio1/gru_vlad256_256_7.60.pt', 'Model/Regression/Audio2/gru_vlad256_256_8.38.pt', 'Model/Regression/Audio3/gru_vlad256_256_8.25.pt']
 35 | 
 36 | config = {
 37 |     'num_classes': 1,
 38 |     'dropout': 0.5,
 39 |     'rnn_layers': 2,
 40 |     'audio_embed_size': 256,
 41 |     'text_embed_size': 1024,
 42 |     'batch_size': 4,
 43 |     'epochs': 150,
 44 |     'learning_rate': 8e-5,
 45 |     'audio_hidden_dims': 256,
 46 |     'text_hidden_dims': 128,
 47 |     'cuda': False,
 48 |     'lambda': 1e-2,
 49 | }
 50 | 
 51 | class TextBiLSTM(nn.Module):
 52 |     def __init__(self, config):
 53 |         super(TextBiLSTM, self).__init__()
 54 |         self.num_classes = config['num_classes']
 55 |         self.learning_rate = config['learning_rate']
 56 |         self.dropout = config['dropout']
 57 |         self.hidden_dims = config['hidden_dims']
 58 |         self.rnn_layers = config['rnn_layers']
 59 |         self.embedding_size = config['embedding_size']
 60 |         self.bidirectional = config['bidirectional']
 61 | 
 62 |         self.build_model()
 63 |         self.init_weight()
 64 |         
 65 |     def init_weight(net):
 66 |         for name, param in net.named_parameters():
 67 |             if 'bias' in name:
 68 |                 nn.init.constant_(param, 0.0)
 69 |             elif 'weight' in name:
 70 |                 nn.init.xavier_uniform_(param)
 71 | 
 72 |     def build_model(self):
 73 |         # attention layer
 74 |         self.attention_layer = nn.Sequential(
 75 |             nn.Linear(self.hidden_dims, self.hidden_dims),
 76 |             nn.ReLU(inplace=True)
 77 |         )
 78 |         # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)
 79 | 
 80 |         # 双层lstm
 81 |         self.lstm_net = nn.LSTM(self.embedding_size, self.hidden_dims,
 82 |                                 num_layers=self.rnn_layers, dropout=self.dropout,
 83 |                                 bidirectional=self.bidirectional)
 84 |         
 85 |         # self.init_weight()
 86 |         
 87 |         # FC层
 88 |         # self.fc_out = nn.Linear(self.hidden_dims, self.num_classes)
 89 |         self.fc_out = nn.Sequential(
 90 |             nn.Dropout(self.dropout),
 91 |             nn.Linear(self.hidden_dims, self.hidden_dims),
 92 |             nn.ReLU(),
 93 |             nn.Dropout(self.dropout),
 94 |             nn.Linear(self.hidden_dims, self.num_classes),
 95 |             nn.ReLU(),
 96 |             # nn.Softmax(dim=1),
 97 |         )
 98 | 
 99 |     def attention_net_with_w(self, lstm_out, lstm_hidden):
100 |         '''
101 |         :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
102 |         :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
103 |         :return: [batch_size, n_hidden]
104 |         '''
105 |         lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
106 |         # h [batch_size, time_step, hidden_dims]
107 |         h = lstm_tmp_out[0] + lstm_tmp_out[1]
108 |         # h = lstm_out
109 |         # [batch_size, num_layers * num_directions, n_hidden]
110 |         lstm_hidden = torch.sum(lstm_hidden, dim=1)
111 |         # [batch_size, 1, n_hidden]
112 |         lstm_hidden = lstm_hidden.unsqueeze(1)
113 |         # atten_w [batch_size, 1, hidden_dims]
114 |         atten_w = self.attention_layer(lstm_hidden)
115 |         # m [batch_size, time_step, hidden_dims]
116 |         m = nn.Tanh()(h)
117 |         # atten_context [batch_size, 1, time_step]
118 |         atten_context = torch.bmm(atten_w, m.transpose(1, 2))
119 |         # softmax_w [batch_size, 1, time_step]
120 |         softmax_w = F.softmax(atten_context, dim=-1)
121 |         # context [batch_size, 1, hidden_dims]
122 |         context = torch.bmm(softmax_w, h)
123 |         result = context.squeeze(1)
124 |         return result
125 | 
126 |     def forward(self, x):
127 |         
128 |         # x : [len_seq, batch_size, embedding_dim]
129 |         x = x.permute(1, 0, 2)
130 |         output, (final_hidden_state, final_cell_state) = self.lstm_net(x)
131 |         # output : [batch_size, len_seq, n_hidden * 2]
132 |         output = output.permute(1, 0, 2)
133 |         # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]
134 |         final_hidden_state = final_hidden_state.permute(1, 0, 2)
135 |         # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)
136 |         # atten_out = self.attention_net(output, final_hidden_state)
137 |         atten_out = self.attention_net_with_w(output, final_hidden_state)
138 |         return self.fc_out(atten_out)
139 | 
140 | class AudioBiLSTM(nn.Module):
141 |     def __init__(self, config):
142 |         super(AudioBiLSTM, self).__init__()
143 |         self.num_classes = config['num_classes']
144 |         self.learning_rate = config['learning_rate']
145 |         self.dropout = config['dropout']
146 |         self.hidden_dims = config['hidden_dims']
147 |         self.rnn_layers = config['rnn_layers']
148 |         self.embedding_size = config['embedding_size']
149 |         self.bidirectional = config['bidirectional']
150 | 
151 |         self.build_model()
152 | 
153 |     def init_weight(net):
154 |         for name, param in net.named_parameters():
155 |             if 'bias' in name:
156 |                 nn.init.constant_(param, 0.0)
157 |             elif 'weight' in name:
158 |                 nn.init.xavier_uniform_(param)
159 | 
160 |     def build_model(self):
161 |         # attention layer
162 |         self.attention_layer = nn.Sequential(
163 |             nn.Linear(self.hidden_dims, self.hidden_dims),
164 |             nn.ReLU(inplace=True))
165 |         # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)
166 | 
167 |         self.lstm_net_audio = nn.GRU(self.embedding_size,
168 |                                 self.hidden_dims,
169 |                                 num_layers=self.rnn_layers,
170 |                                 dropout=self.dropout,
171 |                                 bidirectional=self.bidirectional,
172 |                                 batch_first=True)
173 |         # self.lstm_net_audio = nn.GRU(self.embedding_size, self.hidden_dims,
174 |         #                         num_layers=self.rnn_layers, dropout=self.dropout, batch_first=True)
175 | 
176 |         self.bn = nn.BatchNorm1d(3)
177 | 
178 |         # FC层
179 |         self.fc_audio = nn.Sequential(
180 |             nn.Dropout(self.dropout),
181 |             nn.Linear(self.hidden_dims, self.hidden_dims),
182 |             nn.ReLU(),
183 |             nn.Dropout(self.dropout),
184 |             nn.Linear(self.hidden_dims, self.num_classes),
185 |             nn.ReLU(),
186 |             # nn.Softmax(dim=1)
187 |         )
188 | 
189 |     def attention_net_with_w(self, lstm_out, lstm_hidden):
190 |         '''
191 |         :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
192 |         :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
193 |         :return: [batch_size, n_hidden]
194 |         '''
195 |         lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
196 |         # h [batch_size, time_step, hidden_dims]
197 |         h = lstm_tmp_out[0] + lstm_tmp_out[1]
198 |         #         h = lstm_out
199 |         # [batch_size, num_layers * num_directions, n_hidden]
200 |         lstm_hidden = torch.sum(lstm_hidden, dim=1)
201 |         # [batch_size, 1, n_hidden]
202 |         lstm_hidden = lstm_hidden.unsqueeze(1)
203 |         # atten_w [batch_size, 1, hidden_dims]
204 |         atten_w = self.attention_layer(lstm_hidden)
205 |         # m [batch_size, time_step, hidden_dims]
206 |         m = nn.Tanh()(h)
207 |         # atten_context [batch_size, 1, time_step]
208 |        # print(atten_w.shape, m.transpose(1, 2).shape)
209 |         atten_context = torch.bmm(atten_w, m.transpose(1, 2))
210 |         # softmax_w [batch_size, 1, time_step]
211 |         softmax_w = F.softmax(atten_context, dim=-1)
212 |         # context [batch_size, 1, hidden_dims]
213 |         context = torch.bmm(softmax_w, h)
214 |         result = context.squeeze(1)
215 |         return result
216 | 
217 |     def forward(self, x):
218 |         x, _ = self.lstm_net_audio(x)
219 |         # x = self.bn(x)
220 |         x = x.sum(dim=1)
221 |         out = self.fc_audio(x)
222 |         return out
223 | 
224 | class fusion_net(nn.Module):
225 |     def __init__(self, text_embed_size, text_hidden_dims, rnn_layers, dropout, num_classes, \
226 |          audio_hidden_dims, audio_embed_size):
227 |         super(fusion_net, self).__init__()
228 |         self.text_embed_size = text_embed_size
229 |         self.audio_embed_size = audio_embed_size
230 |         self.text_hidden_dims = text_hidden_dims
231 |         self.audio_hidden_dims = audio_hidden_dims
232 |         self.rnn_layers = rnn_layers
233 |         self.dropout = dropout
234 |         self.num_classes = num_classes
235 |         
236 |         # ============================= TextBiLSTM =================================
237 |         
238 |         # attention layer
239 |         self.attention_layer = nn.Sequential(
240 |             nn.Linear(self.text_hidden_dims, self.text_hidden_dims),
241 |             nn.ReLU(inplace=True)
242 |         )
243 | 
244 |         # 双层lstm
245 |         self.lstm_net = nn.LSTM(self.text_embed_size, self.text_hidden_dims,
246 |                                 num_layers=self.rnn_layers, dropout=self.dropout,
247 |                                 bidirectional=True)
248 |         # FC层
249 |         self.fc_out = nn.Sequential(
250 |             nn.Dropout(self.dropout),
251 |             nn.Linear(self.text_hidden_dims, self.text_hidden_dims),
252 |             nn.ReLU(),
253 |             nn.Dropout(self.dropout)
254 |         )
255 |         
256 |         # ============================= TextBiLSTM =================================
257 | 
258 |         # ============================= AudioBiLSTM =============================
259 | 
260 |         self.lstm_net_audio = nn.GRU(self.audio_embed_size,
261 |                                 self.audio_hidden_dims,
262 |                                 num_layers=self.rnn_layers,
263 |                                 dropout=self.dropout,
264 |                                 bidirectional=False,
265 |                                 batch_first=True)
266 | 
267 |         self.fc_audio = nn.Sequential(
268 |             nn.Dropout(self.dropout),
269 |             nn.Linear(self.audio_hidden_dims, self.audio_hidden_dims),
270 |             nn.ReLU(),
271 |             nn.Dropout(self.dropout)
272 |         )
273 |         
274 |         # ============================= AudioBiLSTM =============================
275 | 
276 |         # ============================= last fc layer =============================
277 |         # self.bn = nn.BatchNorm1d(self.text_hidden_dims + self.audio_hidden_dims)
278 |         # modal attention
279 |         self.modal_attn = nn.Linear(self.text_hidden_dims + self.audio_hidden_dims, self.text_hidden_dims + self.audio_hidden_dims, bias=False)
280 |         self.fc_final = nn.Sequential(
281 |             nn.Linear(self.text_hidden_dims + self.audio_hidden_dims, self.num_classes, bias=False),
282 |             nn.ReLU(),
283 |             # nn.Softmax(dim=1),
284 |             # nn.Sigmoid()
285 |         )
286 |         
287 |     def attention_net_with_w(self, lstm_out, lstm_hidden):
288 |         '''
289 |         :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
290 |         :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
291 |         :return: [batch_size, n_hidden]
292 |         '''
293 |         lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
294 |         # h [batch_size, time_step, hidden_dims]
295 |         h = lstm_tmp_out[0] + lstm_tmp_out[1]
296 |         # [batch_size, num_layers * num_directions, n_hidden]
297 |         lstm_hidden = torch.sum(lstm_hidden, dim=1)
298 |         # [batch_size, 1, n_hidden]
299 |         lstm_hidden = lstm_hidden.unsqueeze(1)
300 |         # atten_w [batch_size, 1, hidden_dims]
301 |         atten_w = self.attention_layer(lstm_hidden)
302 |         # m [batch_size, time_step, hidden_dims]
303 |         m = nn.Tanh()(h)
304 |         # atten_context [batch_size, 1, time_step]
305 |         atten_context = torch.bmm(atten_w, m.transpose(1, 2))
306 |         # softmax_w [batch_size, 1, time_step]
307 |         softmax_w = F.softmax(atten_context, dim=-1)
308 |         # context [batch_size, 1, hidden_dims]
309 |         context = torch.bmm(softmax_w, h)
310 |         result = context.squeeze(1)
311 |         return result
312 |     
313 |     def pretrained_feature(self, x):
314 |         with torch.no_grad():
315 |             x_text = []
316 |             x_audio = []
317 |             for ele in x:
318 |                 x_text.append(ele[1])
319 |                 x_audio.append(ele[0])
320 |             x_text, x_audio = Variable(torch.tensor(x_text).type(torch.FloatTensor), requires_grad=False), Variable(torch.tensor(x_audio).type(torch.FloatTensor), requires_grad=False)
321 |             # ============================= TextBiLSTM =================================
322 |             # x : [len_seq, batch_size, embedding_dim]
323 |             x_text = x_text.permute(1, 0, 2)
324 |             output, (final_hidden_state, _) = self.lstm_net(x_text)
325 |             # output : [batch_size, len_seq, n_hidden * 2]
326 |             output = output.permute(1, 0, 2)
327 |             # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]
328 |             final_hidden_state = final_hidden_state.permute(1, 0, 2)
329 |             # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)
330 |             # atten_out = self.attention_net(output, final_hidden_state)
331 |             atten_out = self.attention_net_with_w(output, final_hidden_state)
332 |             text_feature = self.fc_out(atten_out)
333 | 
334 |             # ============================= TextBiLSTM =================================
335 | 
336 |             # ============================= AudioBiLSTM =============================
337 | 
338 |             x_audio, _ = self.lstm_net_audio(x_audio)
339 |             x_audio = x_audio.sum(dim=1)
340 |             audio_feature = self.fc_audio(x_audio)
341 | 
342 |         # ============================= AudioBiLSTM =============================
343 |         return (text_feature, audio_feature)
344 |         
345 |     def forward(self, x): 
346 |         # x = self.bn(x)
347 |         modal_weights = torch.sigmoid(self.modal_attn(x))
348 |         # modal_weights = self.modal_attn(x)
349 |         x = (modal_weights * x)
350 |         output = self.fc_final(x)
351 |         return output
352 |     
353 | class MyLoss(nn.Module):
354 |     def __init__(self):
355 |         super(MyLoss, self).__init__()
356 |         
357 |     def forward(self, text_feature, audio_feature, target, model):
358 |         weight = model.fc_final[0].weight
359 |         # bias = model.fc_final[0].bias
360 |         # print(weight, bias)
361 |         pred_text = F.linear(text_feature, weight[:, :config['text_hidden_dims']])
362 |         pred_audio = F.linear(audio_feature, weight[:, config['text_hidden_dims']:])
363 |         # l = nn.CrossEntropyLoss()
364 |         l = nn.SmoothL1Loss()
365 |         target = torch.tensor(target).view_as(pred_text).float()
366 |         return l(pred_text, target) + l(pred_audio, target)
367 | 
368 | def save(model, filename):
369 |     save_filename = '{}.pt'.format(filename)
370 |     torch.save(model, save_filename)
371 |     print('Saved as %s' % save_filename)
372 | 
373 | def train(model, epoch):
374 |     global max_train_acc, train_acc
375 |     model.train()
376 |     batch_idx = 1
377 |     total_loss = 0
378 |     correct = 0
379 |     pred = np.array([])
380 |     X_train = []
381 |     Y_train = []
382 |     for idx in train_dep_idxs+train_non_idxs:
383 |         X_train.append(fuse_features[idx])
384 |         Y_train.append(fuse_targets[idx])
385 |     for i in range(0, len(X_train), config['batch_size']):
386 |         if i + config['batch_size'] > len(X_train):
387 |             x, y = X_train[i:], Y_train[i:]
388 |         else:
389 |             x, y = X_train[i:(i+config['batch_size'])], Y_train[i:(i+config['batch_size'])]
390 |         # 将模型的参数梯度设置为0
391 |         optimizer.zero_grad()
392 |         text_feature, audio_feature = model.pretrained_feature(x)
393 |         audio_feature_norm = (audio_feature - audio_feature.mean())/audio_feature.std()
394 |         text_feature_norm = (text_feature - text_feature.mean())/text_feature.std()
395 |         # concat_x = torch.cat((text_feature_norm, audio_feature_norm), dim=1)
396 |         concat_x = torch.cat((text_feature, audio_feature), dim=1)
397 |         output = model(concat_x)
398 |         # loss = criterion(output, torch.tensor(y).float())
399 |         loss = criterion(text_feature, audio_feature, y, model)
400 |         # 后向传播调整参数
401 |         loss.backward()
402 |         # 根据梯度更新网络参数
403 |         optimizer.step()
404 |         batch_idx += 1
405 |         # loss.item()能够得到张量中的元素值
406 |         pred = np.hstack((pred, output.flatten().detach().numpy()))
407 |         total_loss += loss.item()
408 |     train_mae = mean_absolute_error(Y_train, pred)
409 |     print('Train Epoch: {:2d}\t Learning rate: {:.4f}\t Loss: {:.4f}\t MAE: {:.4f}\t RMSE: {:.4f}\n '
410 |         .format(epoch + 1, config['learning_rate'], total_loss, train_mae, \
411 |             np.sqrt(mean_squared_error(Y_train, pred))))
412 |     return train_mae
413 | 
414 | def evaluate(model, fold, train_mae):
415 |     model.eval()
416 |     batch_idx = 1
417 |     total_loss = 0
418 |     global min_mae, min_rmse, test_dep_idxs, test_non_idxs
419 |     pred = np.array([])
420 |     X_test = []
421 |     Y_test = []
422 |     for idx in list(test_dep_idxs)+list(test_non_idxs):
423 |         X_test.append(fuse_features[idx])
424 |         Y_test.append(fuse_targets[idx])
425 |     for i in range(0, len(X_test), config['batch_size']):
426 |         if i + config['batch_size'] > len(X_test):
427 |             x, y = X_test[i:], Y_test[i:]
428 |         else:
429 |             x, y = X_test[i:(i+config['batch_size'])], Y_test[i:(i+config['batch_size'])]
430 |         text_feature, audio_feature = model.pretrained_feature(x)
431 |         with torch.no_grad():
432 |             audio_feature_norm = (audio_feature - audio_feature.mean())/audio_feature.std()
433 |             text_feature_norm = (text_feature - text_feature.mean())/text_feature.std()
434 |             concat_x = torch.cat((text_feature, audio_feature), dim=1)
435 |             # concat_x = torch.cat((text_feature_norm, audio_feature_norm), dim=1)
436 |             output = model(concat_x)
437 |         # loss = criterion(output, torch.tensor(y).float())
438 |         loss = criterion(text_feature, audio_feature, y, model)
439 |         pred = np.hstack((pred, output.flatten().detach().numpy()))
440 |         total_loss += loss.item()
441 |         
442 |     mae = mean_absolute_error(Y_test, pred)
443 |     rmse = np.sqrt(mean_squared_error(Y_test, pred))
444 | 
445 |     print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
446 |     print('='*89)
447 | 
448 |     if mae <= min_mae and mae < 8.2 and train_mae < 13:
449 |         min_mae = mae
450 |         min_rmse = rmse
451 |         save(model, os.path.join(prefix, 'Model/Regression/Fuse{}/fuse_{:.2f}'.format(fold+1, min_mae)))
452 |         print('*' * 64)
453 |         print('model saved: mae: {}\t rmse: {}'.format(min_mae, min_rmse))
454 |         print('*' * 64)
455 | 
456 |     return total_loss
457 | 
458 | def evaluate_audio(model):
459 |     model.eval()
460 |     batch_idx = 1
461 |     total_loss = 0
462 |     global min_mae, min_rmse, test_dep_idxs, test_non_idxs
463 |     pred = np.array([])
464 |     X_test = []
465 |     Y_test = []
466 |     for idx in list(test_dep_idxs)+list(test_non_idxs):
467 |         X_test.append(fuse_features[idx][0])
468 |         Y_test.append(fuse_targets[idx])
469 |     X_test = np.array(X_test)
470 |     Y_test = np.array(Y_test)
471 | 
472 |     with torch.no_grad():
473 |         if config['cuda']:
474 |             x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True).cuda(),\
475 |                 Variable(torch.from_numpy(Y_test)).cuda()
476 |         else:
477 |             x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True), \
478 |                 Variable(torch.from_numpy(Y_test)).type(torch.FloatTensor)
479 | 
480 |         optimizer.zero_grad()
481 |         output = model(x)
482 |         loss = criterion(output, y.view_as(output))
483 |         total_loss += loss.item()
484 |         pred = output.flatten().detach().numpy()
485 | 
486 |         mae = mean_absolute_error(Y_test, pred)
487 |         rmse = np.sqrt(mean_squared_error(Y_test, pred))
488 | 
489 |         print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
490 |         print('='*89)
491 | 
492 | def evaluate_text(model):
493 |     model.eval()
494 |     batch_idx = 1
495 |     total_loss = 0
496 |     global min_mae, min_rmse, test_dep_idxs, test_non_idxs
497 |     pred = np.array([])
498 |     X_test = []
499 |     Y_test = []
500 |     for idx in list(test_dep_idxs)+list(test_non_idxs):
501 |         X_test.append(fuse_features[idx][1])
502 |         Y_test.append(fuse_targets[idx])
503 |     X_test = np.array(X_test)
504 |     Y_test = np.array(Y_test)
505 |     criterion = nn.SmoothL1Loss()
506 |     with torch.no_grad():
507 |         if config['cuda']:
508 |             x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True).cuda(),\
509 |                 Variable(torch.from_numpy(Y_test)).cuda()
510 |         else:
511 |             x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True), \
512 |                 Variable(torch.from_numpy(Y_test)).type(torch.FloatTensor)
513 | 
514 |         optimizer.zero_grad()
515 |         output = model(x)
516 |         loss = criterion(output, y.view_as(output))
517 |         total_loss += loss.item()
518 |         pred = output.flatten().detach().numpy()
519 | 
520 |         mae = mean_absolute_error(Y_test, pred)
521 |         rmse = np.sqrt(mean_squared_error(Y_test, pred))
522 | 
523 |         print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
524 |         print('='*89)
525 | 
526 | for fold in range(3):
527 |     test_dep_idxs_tmp = dep_idxs[fold*10:(fold+1)*10]
528 |     test_non_idxs = non_idxs[fold*44:(fold+1)*44]
529 |     train_dep_idxs_tmp = list(set(dep_idxs) - set(test_dep_idxs_tmp))
530 |     train_non_idxs = list(set(non_idxs) - set(test_non_idxs))
531 | 
532 |     train_dep_idxs = []
533 |     test_dep_idxs = []
534 |     # depression data augmentation
535 |     for (i, idx) in enumerate(train_dep_idxs_tmp):
536 |         feat = fuse_features[idx]
537 |         audio_perm = itertools.permutations(feat[0], 3)
538 |         text_perm = itertools.permutations(feat[1], 3)
539 |         if i < 14:
540 |             for fuse_perm in zip(audio_perm, text_perm):
541 |                 fuse_features.append(fuse_perm)
542 |                 fuse_targets = np.hstack((fuse_targets, fuse_targets[idx]))
543 |                 train_dep_idxs.append(len(fuse_features)-1)
544 |         else:
545 |             train_dep_idxs.append(idx)
546 | 
547 |     test_dep_idxs = test_dep_idxs_tmp
548 | 
549 |     model = fusion_net(config['text_embed_size'], config['text_hidden_dims'], config['rnn_layers'], \
550 |     config['dropout'], config['num_classes'], config['audio_hidden_dims'], config['audio_embed_size'])
551 | 
552 |     optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
553 |     # optimizer = optim.Adam(model.parameters())
554 |     # criterion = nn.SmoothL1Loss()
555 |     criterion = MyLoss()
556 | 
557 |     text_lstm_model = torch.load(os.path.join(prefix, text_model_paths[fold]))
558 |     audio_lstm_model = torch.load(os.path.join(prefix, audio_model_paths[fold]))
559 |     model_state_dict = {}
560 |     model_state_dict['lstm_net_audio.weight_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l0']
561 |     model_state_dict['lstm_net_audio.weight_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l0']
562 |     model_state_dict['lstm_net_audio.bias_ih_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l0']
563 |     model_state_dict['lstm_net_audio.bias_hh_l0'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l0']
564 | 
565 |     model_state_dict['lstm_net_audio.weight_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_ih_l1']
566 |     model_state_dict['lstm_net_audio.weight_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.weight_hh_l1']
567 |     model_state_dict['lstm_net_audio.bias_ih_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_ih_l1']
568 |     model_state_dict['lstm_net_audio.bias_hh_l1'] = audio_lstm_model.state_dict()['lstm_net_audio.bias_hh_l1']
569 | 
570 |     model_state_dict['fc_audio.1.weight'] = audio_lstm_model.state_dict()['fc_audio.1.weight']
571 |     model_state_dict['fc_audio.1.bias'] = audio_lstm_model.state_dict()['fc_audio.1.bias']
572 |     model_state_dict['fc_audio.4.weight'] = audio_lstm_model.state_dict()['fc_audio.4.weight']
573 |     model_state_dict['fc_audio.4.bias'] = audio_lstm_model.state_dict()['fc_audio.4.bias']
574 |     model.load_state_dict(text_lstm_model.state_dict(), strict=False)
575 |     # model.load_state_dict(audio_lstm_model.state_dict(), strict=False)
576 |     model.load_state_dict(model_state_dict, strict=False)
577 |     
578 |     for param in model.parameters():
579 |         param.requires_grad = True
580 | 
581 |     model.fc_final[0].weight.requires_grad = True
582 |     # model.fc_final[0].bias.requires_grad = True
583 |     model.modal_attn.weight.requires_grad = True
584 |     min_mae = 100
585 |     min_rmse = 100
586 |     train_mae = 100
587 | 
588 |     for ep in range(1, config['epochs']):
589 |         train_mae = train(model, ep)
590 |         tloss = evaluate(model, fold, train_mae)
591 |     # evaluate_audio(audio_lstm_model)
592 |     # evaluate_text(text_lstm_model)


--------------------------------------------------------------------------------
/DepressionCollected/Regression/text_bilstm_perm.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | from torch.nn import functional as F
  5 | import torch.optim as optim
  6 | from sklearn.metrics import confusion_matrix
  7 | from sklearn.metrics import mean_absolute_error, mean_squared_error
  8 | from sklearn.model_selection import train_test_split
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | import os
 13 | import pickle
 14 | import random
 15 | import itertools
 16 | 
 17 | prefix = os.path.abspath(os.path.join(os.getcwd(), "../"))
 18 | text_features = np.load(os.path.join(prefix, 'Features/TextWhole/whole_samples_reg_avg.npz'))['arr_0']
 19 | text_targets = np.load(os.path.join(prefix, 'Features/TextWhole/whole_labels_reg_avg.npz'))['arr_0']
 20 | 
 21 | dep_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/dep_idxs.npy'), allow_pickle=True)
 22 | non_idxs = np.load(os.path.join(prefix, 'Features/AudioWhole/non_idxs.npy'), allow_pickle=True)
 23 | 
 24 | config = {
 25 |     'num_classes': 1,
 26 |     'dropout': 0.5,
 27 |     'rnn_layers': 2,
 28 |     'embedding_size': 1024,
 29 |     'batch_size': 2,
 30 |     'epochs': 110,
 31 |     'learning_rate': 1e-5,
 32 |     'hidden_dims': 128,
 33 |     'bidirectional': True,
 34 |     'cuda': False,
 35 | }
 36 | 
 37 | class TextBiLSTM(nn.Module):
 38 |     def __init__(self, config):
 39 |         super(TextBiLSTM, self).__init__()
 40 |         self.num_classes = config['num_classes']
 41 |         self.learning_rate = config['learning_rate']
 42 |         self.dropout = config['dropout']
 43 |         self.hidden_dims = config['hidden_dims']
 44 |         self.rnn_layers = config['rnn_layers']
 45 |         self.embedding_size = config['embedding_size']
 46 |         self.bidirectional = config['bidirectional']
 47 | 
 48 |         self.build_model()
 49 |         self.init_weight()
 50 |         
 51 |     def init_weight(net):
 52 |         for name, param in net.named_parameters():
 53 |             if 'bias' in name:
 54 |                 nn.init.constant_(param, 0.0)
 55 |             elif 'weight' in name:
 56 |                 nn.init.xavier_uniform_(param)
 57 | 
 58 |     def build_model(self):
 59 |         # attention layer
 60 |         self.attention_layer = nn.Sequential(
 61 |             nn.Linear(self.hidden_dims, self.hidden_dims),
 62 |             nn.ReLU(inplace=True)
 63 |         )
 64 |         # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)
 65 | 
 66 |         # 双层lstm
 67 |         self.lstm_net = nn.LSTM(self.embedding_size, self.hidden_dims,
 68 |                                 num_layers=self.rnn_layers, dropout=self.dropout,
 69 |                                 bidirectional=self.bidirectional)
 70 |         
 71 |         # self.init_weight()
 72 |         
 73 |         # FC层
 74 |         # self.fc_out = nn.Linear(self.hidden_dims, self.num_classes)
 75 |         self.fc_out = nn.Sequential(
 76 |             nn.Dropout(self.dropout),
 77 |             nn.Linear(self.hidden_dims, self.hidden_dims),
 78 |             nn.ReLU(),
 79 |             nn.Dropout(self.dropout),
 80 |             nn.Linear(self.hidden_dims, self.num_classes),
 81 |             nn.ReLU(),
 82 |             # nn.Softmax(dim=1),
 83 |         )
 84 | 
 85 |     def attention_net_with_w(self, lstm_out, lstm_hidden):
 86 |         '''
 87 |         :param lstm_out:    [batch_size, len_seq, n_hidden * 2]
 88 |         :param lstm_hidden: [batch_size, num_layers * num_directions, n_hidden]
 89 |         :return: [batch_size, n_hidden]
 90 |         '''
 91 |         lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
 92 |         # h [batch_size, time_step, hidden_dims]
 93 |         h = lstm_tmp_out[0] + lstm_tmp_out[1]
 94 |         # h = lstm_out
 95 |         # [batch_size, num_layers * num_directions, n_hidden]
 96 |         lstm_hidden = torch.sum(lstm_hidden, dim=1)
 97 |         # [batch_size, 1, n_hidden]
 98 |         lstm_hidden = lstm_hidden.unsqueeze(1)
 99 |         # atten_w [batch_size, 1, hidden_dims]
100 |         atten_w = self.attention_layer(lstm_hidden)
101 |         # m [batch_size, time_step, hidden_dims]
102 |         m = nn.Tanh()(h)
103 |         # atten_context [batch_size, 1, time_step]
104 |         atten_context = torch.bmm(atten_w, m.transpose(1, 2))
105 |         # softmax_w [batch_size, 1, time_step]
106 |         softmax_w = F.softmax(atten_context, dim=-1)
107 |         # context [batch_size, 1, hidden_dims]
108 |         context = torch.bmm(softmax_w, h)
109 |         result = context.squeeze(1)
110 |         return result
111 | 
112 |     def forward(self, x):
113 |         
114 |         # x : [len_seq, batch_size, embedding_dim]
115 |         x = x.permute(1, 0, 2)
116 |         output, (final_hidden_state, final_cell_state) = self.lstm_net(x)
117 |         # output : [batch_size, len_seq, n_hidden * 2]
118 |         output = output.permute(1, 0, 2)
119 |         # final_hidden_state : [batch_size, num_layers * num_directions, n_hidden]
120 |         final_hidden_state = final_hidden_state.permute(1, 0, 2)
121 |         # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)
122 |         # atten_out = self.attention_net(output, final_hidden_state)
123 |         atten_out = self.attention_net_with_w(output, final_hidden_state)
124 |         return self.fc_out(atten_out)
125 | 
126 | def save(model, filename):
127 |     save_filename = '{}.pt'.format(filename)
128 |     torch.save(model, save_filename)
129 |     print('Saved as %s' % save_filename)
130 |  
131 | def train(epoch):
132 |     global lr, train_acc
133 |     model.train()
134 |     batch_idx = 1      
135 |     total_loss = 0
136 |     correct = 0
137 |     pred = np.array([])
138 |     X_train = text_features[train_dep_idxs+train_non_idxs]
139 |     Y_train = text_targets[train_dep_idxs+train_non_idxs]
140 |     for i in range(0, X_train.shape[0], config['batch_size']):
141 |         if i + config['batch_size'] > X_train.shape[0]:
142 |             x, y = X_train[i:], Y_train[i:]
143 |         else:
144 |             x, y = X_train[i:(i + config['batch_size'])], Y_train[i:(
145 |                 i + config['batch_size'])]
146 |         if config['cuda']:
147 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True).cuda(), Variable(torch.from_numpy(y)).cuda()
148 |         else:
149 |             x, y = Variable(torch.from_numpy(x).type(torch.FloatTensor), requires_grad=True), \
150 |                 Variable(torch.from_numpy(y)).type(torch.FloatTensor)
151 | 
152 |         # 将模型的参数梯度设置为0
153 |         optimizer.zero_grad()
154 |         output = model(x)
155 |         loss = criterion(output, y.view_as(output))
156 |         # 后向传播调整参数
157 |         loss.backward()
158 |         # 根据梯度更新网络参数
159 |         optimizer.step()
160 |         batch_idx += 1
161 |         # loss.item()能够得到张量中的元素值
162 |         pred = np.hstack((pred, output.flatten().detach().numpy()))
163 |         total_loss += loss.item()
164 |     train_mae = mean_absolute_error(Y_train, pred)
165 | 
166 |     print('Train Epoch: {:2d}\t Learning rate: {:.4f}\t Loss: {:.4f}\t MAE: {:.4f}\t RMSE: {:.4f}\n '
167 |         .format(epoch + 1, config['learning_rate'], total_loss, train_mae, \
168 |             np.sqrt(mean_squared_error(Y_train, pred))))
169 |     return train_mae
170 | 
171 | 
172 | def evaluate(fold, model, train_mae):
173 |     model.eval()
174 |     batch_idx = 1
175 |     total_loss = 0
176 |     global min_mae, min_rmse, test_dep_idxs, test_non_idxs
177 |     pred = np.array([])
178 |     X_test = text_features[list(test_dep_idxs)+list(test_non_idxs)]
179 |     Y_test = text_targets[list(test_dep_idxs)+list(test_non_idxs)]
180 |     with torch.no_grad():
181 |         if config['cuda']:
182 |             x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True).cuda(),\
183 |                 Variable(torch.from_numpy(Y_test)).cuda()
184 |         else:
185 |             x, y = Variable(torch.from_numpy(X_test).type(torch.FloatTensor), requires_grad=True), \
186 |                 Variable(torch.from_numpy(Y_test)).type(torch.FloatTensor)
187 | 
188 |         optimizer.zero_grad()
189 |         output = model(x)
190 |         loss = criterion(output, y.view_as(output))
191 |         total_loss += loss.item()
192 |         pred = output.flatten().detach().numpy()
193 | 
194 |         mae = mean_absolute_error(Y_test, pred)
195 |         rmse = np.sqrt(mean_squared_error(Y_test, pred))
196 | 
197 |         print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
198 |         print('='*89)
199 | 
200 |         if mae <= min_mae and mae < 8.5 and train_mae < 13:
201 |             min_mae = mae
202 |             min_rmse = rmse
203 |             mode = 'bi' if config['bidirectional'] else 'norm'
204 |             mode ='gru'
205 |             save(model, os.path.join(prefix, 'Model/Regression/Text{}/BiLSTM_{}_{:.2f}'.format(fold+1, config['hidden_dims'], min_mae)))
206 |             print('*' * 64)
207 |             print('model saved: mae: {}\t rmse: {}'.format(min_mae, min_rmse))
208 |             print('*' * 64)
209 | 
210 |     return total_loss
211 | 
212 | for fold in range(3):
213 |     test_dep_idxs_tmp = dep_idxs[fold*10:(fold+1)*10]
214 |     test_non_idxs = non_idxs[fold*44:(fold+1)*44]
215 |     train_dep_idxs_tmp = list(set(dep_idxs) - set(test_dep_idxs_tmp))
216 |     train_non_idxs = list(set(non_idxs) - set(test_non_idxs))
217 | 
218 |     # training data augmentation
219 |     train_dep_idxs = []
220 |     for (i, idx) in enumerate(train_dep_idxs_tmp):
221 |         feat = text_features[idx]
222 |         if i < 14:
223 |             for i in itertools.permutations(feat, feat.shape[0]):
224 |                 text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))
225 |                 text_targets = np.hstack((text_targets, text_targets[idx]))
226 |                 train_dep_idxs.append(len(text_features)-1)
227 |         else:
228 |             train_dep_idxs.append(idx)
229 | 
230 |     # test data augmentation
231 |     # test_dep_idxs = []
232 |     # for idx in test_dep_idxs_tmp:
233 |     #     feat = text_features[idx]
234 |     #     for i in itertools.permutations(feat, feat.shape[0]):
235 |     #         text_features = np.vstack((text_features, np.expand_dims(list(i), 0)))
236 |     #         text_targets = np.hstack((text_targets, text_targets[idx]))
237 |     #         test_dep_idxs.append(len(text_features)-1)
238 |     test_dep_idxs = test_dep_idxs_tmp
239 | 
240 | 
241 |     model = TextBiLSTM(config)
242 | 
243 |     if config['cuda']:
244 |         model = model.cuda()
245 | 
246 |     optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
247 |     criterion = nn.SmoothL1Loss()
248 |     # criterion = FocalLoss(class_num=2)
249 |     min_mae = 100
250 |     min_rmse = 100
251 |     train_mae = 100
252 | 
253 | 
254 |     for ep in range(1, config['epochs']):
255 |         train_mae = train(ep)
256 |         tloss = evaluate(fold, model, train_mae)
257 | 
258 | # ============== prep ==============
259 | # X_test = np.squeeze(np.load(os.path.join(prefix, 'Features/Audio/val_samples_reg_avid256.npz'))['arr_0'], axis=2)
260 | # Y_test = np.load(os.path.join(prefix, 'Features/Audio/val_labels_reg_avid256.npz'))['arr_0']
261 | # ============== prep ==============
262 | 
263 | 
264 | # ============== SVM ==============
265 | 
266 | # from sklearn.svm import SVR
267 | # from sklearn.model_selection import KFold
268 | 
269 | # X = text_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
270 | # Y = text_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
271 | # kf = KFold(n_splits=3)
272 | # regr = SVR(kernel='linear', gamma='auto')
273 | # maes, rmses = [], []
274 | # for train_index, test_index in kf.split(X):
275 | #     # X_train, X_test = X[train_index], X[test_index]
276 | #     # Y_train, Y_test = Y[train_index], Y[test_index]
277 | #     X_train, Y_train = X[train_index], Y[train_index]
278 | #     regr.fit([f.flatten() for f in X_train], Y_train)
279 | #     pred = regr.predict([f.flatten() for f in X_test])
280 | 
281 | #     mae = mean_absolute_error(Y_test, pred)
282 | #     rmse = np.sqrt(mean_squared_error(Y_test, pred))
283 | #     maes.append(mae)
284 | #     rmses.append(rmse)
285 | 
286 | #     print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
287 | #     print('='*89)
288 | #     # break
289 | 
290 | # print(np.mean(maes), np.mean(rmses))
291 | # ============== SVM ==============
292 | 
293 | # # ============== DT ==============
294 | # from sklearn.tree import DecisionTreeRegressor
295 | # from sklearn.model_selection import KFold
296 | 
297 | # X = text_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
298 | # Y = text_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
299 | # kf = KFold(n_splits=3)
300 | # regr = DecisionTreeRegressor(max_depth=100, random_state=0, criterion="mse")
301 | # maes, rmses = [], []
302 | # for train_index, test_index in kf.split(X):
303 | #     # X_train, X_test = X[train_index], X[test_index]
304 | #     # Y_train, Y_test = Y[train_index], Y[test_index]
305 | #     X_train, Y_train = X[train_index], Y[train_index]
306 | #     regr.fit([f.flatten() for f in X_train], Y_train)
307 | #     pred = regr.predict([f.flatten() for f in X_test])
308 | 
309 | #     mae = mean_absolute_error(Y_test, pred)
310 | #     rmse = np.sqrt(mean_squared_error(Y_test, pred))
311 | #     maes.append(mae)
312 | #     rmses.append(rmse)
313 | 
314 | #     print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
315 | #     print('='*89)
316 | 
317 | # print(np.mean(maes), np.mean(rmses))
318 | # # ============== DT ==============
319 | 
320 | # # ============== RF ==============
321 | # from sklearn.ensemble import RandomForestRegressor
322 | # from sklearn.model_selection import KFold
323 | 
324 | # X = text_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
325 | # Y = text_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
326 | # kf = KFold(n_splits=3)
327 | # regr = RandomForestRegressor(max_depth=100, random_state=0, criterion="mse")
328 | # maes, rmses = [], []
329 | # for train_index, test_index in kf.split(X):
330 | #     # X_train, X_test = X[train_index], X[test_index]
331 | #     # Y_train, Y_test = Y[train_index], Y[test_index]
332 | #     X_train, Y_train = X[train_index], Y[train_index]
333 | #     regr.fit([f.flatten() for f in X_train], Y_train)
334 | #     pred = regr.predict([f.flatten() for f in X_test])
335 | 
336 | #     mae = mean_absolute_error(Y_test, pred)
337 | #     rmse = np.sqrt(mean_squared_error(Y_test, pred))
338 | #     maes.append(mae)
339 | #     rmses.append(rmse)
340 | 
341 | #     print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
342 | #     print('='*89)
343 | 
344 | # print(np.mean(maes), np.mean(rmses))
345 | # # ============== RF ==============
346 | 
347 | # ============== ada ==============
348 | # from sklearn.ensemble import AdaBoostRegressor
349 | # from sklearn.model_selection import KFold
350 | 
351 | # X = text_features[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
352 | # Y = text_targets[train_dep_idxs+train_non_idxs+test_dep_idxs+test_non_idxs]
353 | # kf = KFold(n_splits=3)
354 | # regr = AdaBoostRegressor(n_estimators=50)
355 | # maes, rmses = [], []
356 | # for train_index, test_index in kf.split(X):
357 | #     # X_train, X_test = X[train_index], X[test_index]
358 | #     # Y_train, Y_test = Y[train_index], Y[test_index]
359 | #     X_train, Y_train = X[train_index], Y[train_index]
360 | #     regr.fit([f.flatten() for f in X_train], Y_train)
361 | #     pred = regr.predict([f.flatten() for f in X_test])
362 | 
363 | #     mae = mean_absolute_error(Y_test, pred)
364 | #     rmse = np.sqrt(mean_squared_error(Y_test, pred))
365 | #     maes.append(mae)
366 | #     rmses.append(rmse)
367 | 
368 | #     print('MAE: {:.4f}\t RMSE: {:.4f}\n'.format(mae, rmse))
369 | #     print('='*89)
370 | 
371 | # print(np.mean(maes), np.mean(rmses))
372 | # ============== ada ==============
373 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ICASSP2022-Depression
 2 | Automatic Depression Detection: a GRU/ BiLSTM-based Model and An Emotional Audio-Textual Corpus
 3 | 
 4 | https://arxiv.org/pdf/2202.08210.pdf
 5 | 
 6 | https://ieeexplore.ieee.org/abstract/document/9746569/
 7 | 
 8 | ## Code
 9 | 
10 | - Regression
11 |   - audio_bilstm_perm.py: train audio network 
12 |   - text_bilstm_perm.py: train text network 
13 |   - fuse_net.py: train multi-modal network
14 | - Classification
15 |   - audio_features_whole.py: extract audio features
16 |   - text_features_whole.py: extract text features
17 |   - audio_gru_whole.py: train audio network 
18 |   - text_bilstm_whole.py: train text network
19 |   - fuse_net_whole.py: train fuse network
20 | 
21 | 
22 | ## Dataset: EATD-Corpus
23 | 
24 | The EATD-Corpus is a dataset consist of audio and text files of 162 volunteers who received counseling.
25 | 
26 | ### How to download
27 | The EATD-Corpus can be downloaded at https://1drv.ms/u/s!AsGVGqImbOwYhHUHcodFC3xmKZKK?e=mCT5oN. Password: Ymj26Uv5
28 | 
29 | ### How to use
30 | 
31 | Training set contains data from 83 volunteers (19 depressed and 64 non-depressed).
32 | 
33 | Validation set contains data from 79 volunteers (11 depressed and 68 non-depressed).
34 | 
35 | Each folder contains depression data for one volunteer.
36 | 
37 | - {positive/negative/neutral}.wav: Raw audio in wav
38 | - {positive/negative/neutral}_out.wav: Preprocessed audio. Preprocessing operations include denoising and de-muting
39 | - {positive/negative/neutral}.txt: Audio translation
40 | - label.txt: Raw SDS score
41 | - new_label.txt: Standard SDS score (Raw SDS score multiplied by 1.25)
42 | 


--------------------------------------------------------------------------------