├── README.md ├── dataset.py ├── loss_function.py ├── main.py ├── net.py ├── params.py ├── train_validate.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # CAFNet 2 | 3 | This repository contains the code of the paper [Dynamic Confidence-Aware Multi-Modal Emotion Recognition](https://ieeexplore.ieee.org/abstract/document/10349925). The DEAP dataset can be downloaded through the link: https://www.eecs.qmul.ac.uk/mmv/datasets/deap/download.html 4 | 5 | ## Requirement 6 | * Python 3 7 | * Pytorch 1.8.0 8 | * sklearn 9 | * numpy 10 | 11 | ## Files 12 | * dataset.py 13 | * loss_function.py 14 | * main.py 15 | * net.py 16 | * params.py 17 | * train_validate.py 18 | * utils.py 19 | 20 | # Usage 21 | 22 | Getting the classification result by running main.py. 23 | 24 | ## Citation 25 | If you find our work useful, please consider citing our paper: 26 | ``` 27 | @ARTICLE{10349925, 28 | author = {Zhu, Qi and Zheng, Chuhang and Zhang, Zheng and Shao, Wei and Zhang, Daoqiang}, 29 | journal = {IEEE Transactions on Affective Computing}, 30 | title = {Dynamic Confidence-Aware Multi-Modal Emotion Recognition}, 31 | year = {2023}, 32 | volume = {}, 33 | number = {}, 34 | pages = {1-13}, 35 | keywords = {Uncertainty;Emotion recognition;Multi-modal fusion}, 36 | doi = {10.1109/TAFFC.2023.3340924} 37 | } 38 | ``` 39 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch.utils.data.dataset import Dataset 4 | from sklearn.preprocessing import StandardScaler 5 | 6 | class DeapDataset(Dataset): 7 | def __init__(self, path_view1, path_view2, path_label): 8 | super(DeapDataset, self).__init__() 9 | self.path_view1 = path_view1 10 | self.path_view2 = path_view2 11 | self.path_label = path_label 12 | 13 | self.view1 = np.load(self.path_view1) 14 | self.view1 = normalize(self.view1) 15 | self.view1 = self.view1.reshape((720, 20, 160)) 16 | self.view1 = torch.from_numpy(self.view1).float() 17 | 18 | self.view2 = np.load(self.path_view2) 19 | self.view2 = normalize(self.view2) 20 | self.view2 = self.view2.reshape((720, 20, 29)) 21 | self.view2 = torch.from_numpy(self.view2).float() 22 | 23 | self.label = np.load(self.path_label) 24 | self.label[self.label <= 5] = 0 25 | self.label[self.label > 5] = 1 26 | self.label = torch.from_numpy(self.label).int() 27 | 28 | def __getitem__(self, index): 29 | v1 = self.view1[index] 30 | v2 = self.view2[index] 31 | v = [] 32 | v.append(v1) 33 | v.append(v2) 34 | label = self.label[index] 35 | return v, label, index 36 | 37 | def __len__(self): 38 | return len(self.label) 39 | 40 | def normalize(x): 41 | scaler = StandardScaler() 42 | norm_x = scaler.fit_transform(x) 43 | return norm_x 44 | -------------------------------------------------------------------------------- /loss_function.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch import Tensor 4 | 5 | class MySpl_loss(nn.CrossEntropyLoss): 6 | def __init__(self, *args, n_samples=0, batch_size, alpha, beta, spl_lambda, spl_gamma, **kwargs): 7 | super(MySpl_loss, self).__init__(*args, **kwargs) 8 | self.spl_lambda = spl_lambda 9 | self.spl_gamma = spl_gamma 10 | self.alpha = alpha 11 | self.beta = beta 12 | self.v = torch.zeros(n_samples, batch_size).int() 13 | 14 | def forward(self, input: Tensor, target: Tensor, index: Tensor, c_loss: Tensor, kd_loss: Tensor) -> Tensor: 15 | super_loss = nn.functional.cross_entropy(input, target, reduction='none') + self.alpha * c_loss + self.beta * kd_loss 16 | v = self.spl_loss(super_loss) 17 | self.v[index] = v 18 | return (super_loss * v).mean() 19 | 20 | def increase_threshold(self): 21 | self.spl_lambda *= self.spl_gamma 22 | 23 | def spl_loss(self, super_loss): 24 | v = super_loss < self.spl_lambda 25 | return v.int() 26 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import statistics 3 | import torch.nn as nn 4 | from params import Config 5 | from utils import SetSeeds 6 | from net import net 7 | from dataset import DeapDataset 8 | from train_validate import train 9 | from loss_fuction import MySpl_loss 10 | from torch.utils.data import DataLoader 11 | from sklearn.model_selection import KFold 12 | 13 | if __name__ == '__main__': 14 | 15 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 16 | 17 | seed = 1 18 | print('seed is {}'.format(seed)) 19 | print('training on:', device) 20 | SetSeeds(seed) 21 | 22 | args = Config().parse() 23 | 24 | dataset = DeapDataset(path_view1 = args.path_eeg, path_view2 = args.path_face, path_label = args.path_label) 25 | 26 | K = 18 27 | KF = KFold(n_splits=K, shuffle=False) 28 | 29 | predict_acc, predict_f1 = [], [] 30 | 31 | fold = 1 32 | for train_idx, test_idx in KF.split(dataset): 33 | train_subsampler = torch.utils.data.SubsetRandomSampler(train_idx) 34 | val_subsampler = torch.utils.data.SubsetRandomSampler(test_idx) 35 | train_loader = DataLoader(dataset, batch_size=args.batch_size, sampler=train_subsampler) 36 | val_loader = DataLoader(dataset, batch_size=args.batch_size, sampler=val_subsampler) 37 | 38 | model = net(eeg_dim=160, face_dim=29, 39 | hidden_size=256, num_layers=1, 40 | dim=256, heads=1, dim_head=256, 41 | mlp_dim=512, num_classes=2, dropout=0.5).to(device) 42 | 43 | optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5) 44 | scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99) 45 | 46 | criterion1 = nn.CrossEntropyLoss() 47 | criterion2 = MySpl_loss(n_samples=len(train_loader.dataset), batch_size=args.batch_size, alpha=args.alpha, beta=args.beta, spl_lambda=args.spl_lambda, spl_gamma=args.spl_gamma) 48 | criterions = [criterion1, criterion2] 49 | 50 | acc, f1, train_acces, train_losses, valid_acces, valid_losses = train(model, criterions, optimizer, scheduler, train_loader, val_loader, device, args.epochs, args.alpha, args.beta, fold, args.task) 51 | 52 | predict_acc.append(acc) 53 | predict_f1.append(f1) 54 | 55 | print('Best of Fold {}: acc: {}, f1: {}'.format(fold, acc, f1)) 56 | 57 | fold += 1 58 | 59 | print('Accuracy: {}'.format(sum(predict_acc) / K)) 60 | print('F1: {}'.format(sum(predict_f1) / K)) 61 | 62 | print(predict_acc) 63 | print(predict_f1) 64 | print(statistics.stdev(predict_acc)) 65 | print(statistics.stdev(predict_f1)) 66 | -------------------------------------------------------------------------------- /net.py: -------------------------------------------------------------------------------- 1 | """ Componets of the model 2 | """ 3 | import math 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | class FeedForward(nn.Module): 9 | def __init__(self, 10 | dim, 11 | hidden_dim, 12 | dropout): 13 | super().__init__() 14 | self.net = nn.Sequential( 15 | nn.Linear(dim, hidden_dim), 16 | nn.ReLU(), 17 | nn.Dropout(dropout), 18 | nn.Linear(hidden_dim, dim), 19 | ) 20 | self.norm = nn.LayerNorm(dim) 21 | 22 | def forward(self, x): 23 | x = self.net(x) 24 | x = self.norm(x) 25 | return x 26 | 27 | class MCLSTM(nn.Module): 28 | def __init__(self, 29 | input_size1, 30 | input_size2, 31 | hidden_size, 32 | num_layers, 33 | dropout): 34 | super(MCLSTM, self).__init__() 35 | self.hidden_size = hidden_size 36 | self.linear1 = nn.Linear(input_size1, hidden_size) 37 | self.linear2 = nn.Linear(input_size2, hidden_size) 38 | self.shared_lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True) 39 | self.dropout = nn.Dropout(p=dropout) 40 | 41 | def forward(self, eeg, face): 42 | eeg = self.linear1(eeg) 43 | face = self.linear2(face) 44 | 45 | h = torch.zeros((1, eeg.size(0), self.hidden_size), dtype=eeg.dtype, device=eeg.device) 46 | c_e = torch.zeros((1, eeg.size(0), self.hidden_size), dtype=eeg.dtype, device=eeg.device) 47 | c_f = torch.zeros((1, face.size(0), self.hidden_size), dtype=face.dtype, device=face.device) 48 | 49 | eeg, _ = self.shared_lstm(eeg, (h, c_e)) 50 | face, _ = self.shared_lstm(face, (h, c_f)) 51 | 52 | eeg = self.dropout(eeg) 53 | face = self.dropout(face) 54 | return eeg, face 55 | 56 | class Attention(nn.Module): 57 | def __init__(self, 58 | dim, 59 | dim_head, 60 | heads): 61 | super(Attention, self).__init__() 62 | self.to_Q = nn.Linear(dim, dim_head * heads, bias=False) 63 | self.to_K = nn.Linear(dim, dim_head * heads, bias=False) 64 | self.to_V = nn.Linear(dim, dim_head * heads, bias=False) 65 | self.norm = nn.LayerNorm(dim) 66 | 67 | def attention(self, Q, K, V): 68 | d_k = K.size(-1) 69 | scores = torch.matmul(Q, K.transpose(1,2)) / math.sqrt(d_k) 70 | alpha_n = F.softmax(scores, dim=-1) 71 | output = torch.matmul(alpha_n, V) 72 | output = output.sum(1) 73 | return output, alpha_n 74 | 75 | def forward(self, x): 76 | Q = self.to_Q(x) 77 | K = self.to_K(x) 78 | V = self.to_V(x) 79 | out, _ = self.attention(Q, K, V) 80 | out = self.norm(out) 81 | return out 82 | 83 | class LinearLayer(nn.Module): 84 | def __init__(self, 85 | in_dim, 86 | out_dim): 87 | super().__init__() 88 | self.clf = nn.Sequential(nn.Linear(in_dim, out_dim)) 89 | 90 | def forward(self, x): 91 | x = self.clf(x) 92 | return x 93 | 94 | class EegSubNet(nn.Module): 95 | def __init__(self, 96 | dropout, 97 | dim, 98 | heads, 99 | dim_head, 100 | mlp_dim): 101 | super(EegSubNet, self).__init__() 102 | self.SelfAttention = Attention(dim, heads, dim_head) 103 | self.FeedForward = FeedForward(dim, mlp_dim, dropout) 104 | 105 | def forward(self, x): 106 | x = self.SelfAttention(x) 107 | x = self.FeedForward(x) 108 | return x 109 | 110 | class FaceSubNet(nn.Module): 111 | def __init__(self, 112 | dropout, 113 | dim, 114 | heads, 115 | dim_head, 116 | mlp_dim): 117 | super(FaceSubNet, self).__init__() 118 | self.SelfAttention = Attention(dim, heads, dim_head) 119 | self.FeedForward = FeedForward(dim, mlp_dim, dropout) 120 | 121 | def forward(self, x): 122 | x = self.SelfAttention(x) 123 | x = self.FeedForward(x) 124 | return x 125 | 126 | class RegressionSubNetwork(nn.Module): 127 | def __init__(self, 128 | mlp_dim): 129 | super(RegressionSubNetwork, self).__init__() 130 | self.layers = nn.ModuleList([LinearLayer(mlp_dim, 1)]) 131 | 132 | def forward(self, x): 133 | for layer in self.layers: 134 | x = layer(x) 135 | return x 136 | 137 | class ClassificationSubNetwork(nn.Module): 138 | def __init__(self, 139 | mlp_dim, 140 | num_classes): 141 | super(ClassificationSubNetwork, self).__init__() 142 | self.layers = nn.ModuleList([LinearLayer(mlp_dim, num_classes)]) 143 | 144 | def forward(self, x): 145 | for layer in self.layers: 146 | x = layer(x) 147 | return x 148 | 149 | class net(nn.Module): 150 | def __init__(self, 151 | eeg_dim, 152 | face_dim, 153 | hidden_size, 154 | num_layers, 155 | dim, 156 | heads, 157 | dim_head, 158 | mlp_dim, 159 | num_classes, 160 | dropout): 161 | super().__init__() 162 | self.num_classes = num_classes 163 | self.mc_lstm = MCLSTM(eeg_dim, face_dim, hidden_size, num_layers, dropout) 164 | self.eeg_subnet = EegSubNet(dropout, dim, heads, dim_head, mlp_dim) 165 | self.face_subnet = FaceSubNet(dropout, dim, heads, dim_head, mlp_dim) 166 | self.Regression = RegressionSubNetwork(dim) 167 | self.Classification = ClassificationSubNetwork(dim, num_classes) 168 | self.FeedForward = FeedForward(mlp_dim, dim, dropout) 169 | self.fc = nn.Linear(mlp_dim, num_classes) 170 | 171 | def confidence_loss(self, TCPLogit, TCPConfidence, label): 172 | pred = F.softmax(TCPLogit, dim=1) 173 | p_target = torch.gather(input=pred, dim=1, index=label.unsqueeze(dim=1).type(torch.int64)).view(-1) 174 | c_loss = torch.mean(F.mse_loss(TCPConfidence.view(-1), p_target, reduction='none')) 175 | return c_loss 176 | 177 | def KD_loss(self, TCPLogit_eeg, TCPLogit_face): 178 | loss1 = nn.KLDivLoss(reduction='batchmean')(F.log_softmax(TCPLogit_eeg, dim=1), F.softmax(TCPLogit_face, dim=1)) 179 | loss2 = nn.KLDivLoss(reduction='batchmean')(F.log_softmax(TCPLogit_face, dim=1), F.softmax(TCPLogit_eeg, dim=1)) 180 | return (loss1 + loss2) / 2 181 | 182 | def forward(self, v, label): 183 | eeg, face = v 184 | eeg, face = self.mc_lstm(eeg, face) 185 | eeg = self.eeg_subnet(eeg) 186 | face = self.face_subnet(face) 187 | 188 | TCPConfidence_eeg = self.Regression(eeg) 189 | TCPConfidence_face = self.Regression(face) 190 | TCPLogit_eeg = self.Classification(eeg) 191 | TCPLogit_face = self.Classification(face) 192 | 193 | eeg = eeg * TCPConfidence_eeg 194 | face = face * TCPConfidence_face 195 | 196 | feature = torch.cat([eeg, face], dim=1) 197 | feature = self.FeedForward(feature) 198 | Logit = self.fc(feature) 199 | 200 | c_loss_eeg = self.confidence_loss(TCPLogit_eeg, TCPConfidence_eeg, label) 201 | c_loss_face = self.confidence_loss(TCPLogit_face, TCPConfidence_face, label) 202 | c_loss = c_loss_eeg + c_loss_face 203 | kd_loss = self.KD_loss(TCPLogit_eeg, TCPLogit_face) 204 | return Logit, c_loss, kd_loss 205 | -------------------------------------------------------------------------------- /params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | class Config(): 4 | def __init__(self): 5 | self.parser = argparse.ArgumentParser() 6 | self.initialized = False 7 | 8 | def initialize(self): 9 | 10 | self.parser.add_argument('--task', type=str, default='Arousal', help='the type of classification task') 11 | self.parser.add_argument('--path_eeg', type=str, default='data\eeg.npy', help='path of eeg modality data') 12 | self.parser.add_argument('--path_face', type=str, default='data\\face.npy', help='path of face modality data') 13 | self.parser.add_argument('--path_label', type=str, default='data\label2.npy', help='path of label. Valence——label1; Arousal——label2') 14 | self.parser.add_argument('--batch-size', type=int, default=40, metavar='N', help='input batch size for training [default: 40]') 15 | self.parser.add_argument('--epochs', type=int, default=200, metavar='N', help='number of epochs to train [default: 200]') 16 | self.parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate') 17 | self.parser.add_argument('--alpha', type=float, default=0.5, help='alpha value of loss funciton') 18 | self.parser.add_argument('--beta', type=float, default=0.4, help='beta value of loss funciton') 19 | self.parser.add_argument('--spl_lambda', type=float, default=1, help='lambda value of spl process') 20 | self.parser.add_argument('--spl_gamma', type=float, default=1.15, help='gamma value of spl process') 21 | 22 | self.initialized = True 23 | 24 | def parse(self): 25 | if not self.initialized: 26 | self.initialize() 27 | self.opt = self.parser.parse_args() 28 | return self.opt 29 | 30 | -------------------------------------------------------------------------------- /train_validate.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | import torch.nn as nn 4 | from utils import Metrics 5 | 6 | def train(model, criterions, optimizer, scheduler, train_loader, valid_loader, device, num_epoch, alpha, beta, fold, task, init=False): 7 | def init_kaiming(m): 8 | if type(m) == nn.Linear: 9 | nn.init.kaiming_normal_(m.weight.data) 10 | 11 | if init: 12 | model.apply(init_kaiming) 13 | 14 | model.to(device) 15 | 16 | best_acc, f1 = 0.0, 0.0 17 | train_acces, train_losses, valid_acces, valid_losses = [], [], [], [] 18 | 19 | for epoch in range(num_epoch): 20 | start = time.time() 21 | model.train() 22 | train_epoch_loss = 0 23 | valid_epoch_loss = 0 24 | 25 | # train 26 | predict_train, y_true_train = None, None 27 | for i, (v, labels, index) in enumerate(train_loader): 28 | v[0] = v[0].to(device) 29 | v[1] = v[1].to(device) 30 | label = labels.view(-1).to(device) 31 | 32 | predict_train, c_loss, kd_loss = model(v, label) 33 | 34 | loss = criterions[1](predict_train, label.long(), i, c_loss, kd_loss) 35 | 36 | optimizer.zero_grad() 37 | loss.backward() 38 | optimizer.step() 39 | 40 | train_epoch_loss += loss.item() 41 | 42 | predict_train = predict_train.max(1)[1] 43 | 44 | if i == 0: 45 | pre = predict_train 46 | y_true_train = label 47 | else: 48 | pre = torch.hstack((pre, predict_train)) 49 | y_true_train = torch.hstack((y_true_train, label)) 50 | 51 | scheduler.step() 52 | criterions[1].increase_threshold() 53 | train_acc, _ = Metrics(y_true_train.cpu(), pre.cpu()) 54 | 55 | # valid 56 | predict, y_true = None, None 57 | model.eval() 58 | with torch.no_grad(): 59 | for i, (v, labels, index) in enumerate(valid_loader): 60 | v[0] = v[0].to(device) 61 | v[1] = v[1].to(device) 62 | label = labels.view(-1).to(device) 63 | 64 | predict, c_loss, kd_loss = model(v, label) 65 | 66 | loss = criterions[0](predict, label.long()) + alpha * c_loss + beta * kd_loss 67 | 68 | valid_epoch_loss += loss.item() 69 | 70 | predict = predict.max(1)[1] 71 | 72 | if i == 0: 73 | pre = predict 74 | y_true = label 75 | else: 76 | pre = torch.hstack((pre, predict)) 77 | y_true = torch.hstack((y_true, label)) 78 | 79 | valid_acc, valid_f1 = Metrics(y_true.cpu(), pre.cpu()) 80 | 81 | if valid_acc > best_acc: 82 | best_acc = valid_acc 83 | f1 = valid_f1 84 | torch.save(model.state_dict(), 'checkpoint/' + task + '/Fold_' + str(fold) + '_best_acc.pth') 85 | 86 | train_epoch_loss = train_epoch_loss / len(train_loader) 87 | valid_epoch_loss = valid_epoch_loss / len(valid_loader) 88 | 89 | end = time.time() - start 90 | 91 | train_acces.append(train_acc) 92 | train_losses.append(train_epoch_loss) 93 | valid_acces.append(valid_acc) 94 | valid_losses.append(valid_epoch_loss) 95 | print("< Fold{} {:.0f}% {}/{} {:.3f}s >".format(fold, (epoch + 1) / num_epoch * 100, epoch + 1, num_epoch, end), end="") 96 | print('train_loss =', '{:.5f}'.format(train_epoch_loss), end=" ") 97 | print('train_acc =', '{:.5f}'.format(train_acc), end=" ") 98 | print('valid_loss =', '{:.5f}'.format(valid_epoch_loss), end=" ") 99 | print('valid_acc =', '{:.4f}'.format(valid_acc)) 100 | 101 | return best_acc, f1, train_acces, train_losses, valid_acces, valid_losses 102 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.metrics import accuracy_score, roc_auc_score, f1_score 4 | 5 | def SetSeeds(seed): 6 | torch.manual_seed(seed) 7 | torch.cuda.manual_seed(seed) 8 | torch.set_printoptions(precision=8) 9 | np.random.seed(seed) 10 | 11 | def Metrics(y_true, pre): 12 | acc, f1 = 0.0, 0.0 13 | try: 14 | ACC = accuracy_score(y_true, pre) 15 | F1 = f1_score(y_true, pre) 16 | 17 | acc += ACC 18 | f1 += F1 19 | 20 | except ValueError as ve: 21 | print(ve) 22 | pass 23 | 24 | return acc, f1 25 | 26 | --------------------------------------------------------------------------------