├── LICENSE ├── NegPearsonLoss.py ├── PhysNetED_BMVC.py └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Zitong Yu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NegPearsonLoss.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code of 'Remote Photoplethysmograph Signal Measurement from Facial Videos Using Spatio-Temporal Networks' 3 | By Zitong Yu, 2019/05/05 4 | 5 | If you use the code, please cite: 6 | @inproceedings{yu2019remote, 7 | title={Remote Photoplethysmograph Signal Measurement from Facial Videos Using Spatio-Temporal Networks}, 8 | author={Yu, Zitong and Li, Xiaobai and Zhao, Guoying}, 9 | booktitle= {British Machine Vision Conference (BMVC)}, 10 | year = {2019} 11 | } 12 | 13 | Only for research purpose, and commercial use is not allowed. 14 | 15 | MIT License 16 | Copyright (c) 2019 17 | ''' 18 | 19 | ######################################## 20 | ''' How to use it 21 | #1. Inference the model 22 | rPPG, x_visual, x_visual3232, x_visual1616 = model(inputs) 23 | 24 | #2. Normalized the Predicted rPPG signal and GroundTruth BVP signal 25 | rPPG = (rPPG-torch.mean(rPPG)) /torch.std(rPPG) # normalize 26 | BVP_label = (BVP_label-torch.mean(BVP_label)) /torch.std(BVP_label) # normalize 27 | 28 | #3. Calculate the loss 29 | loss_ecg = Neg_Pearson(rPPG, BVP_label) 30 | 31 | ''' 32 | ######################################## 33 | 34 | 35 | from __future__ import print_function, division 36 | import torch 37 | import matplotlib.pyplot as plt 38 | import argparse,os 39 | import pandas as pd 40 | import numpy as np 41 | import random 42 | import math 43 | from torchvision import transforms 44 | 45 | class Neg_Pearson(nn.Module): # Pearson range [-1, 1] so if < 0, abs|loss| ; if >0, 1- loss 46 | def __init__(self): 47 | super(Neg_Pearson,self).__init__() 48 | return 49 | def forward(self, preds, labels): # tensor [Batch, Temporal] 50 | loss = 0 51 | for i in range(preds.shape[0]): 52 | sum_x = torch.sum(preds[i]) # x 53 | sum_y = torch.sum(labels[i]) # y 54 | sum_xy = torch.sum(preds[i]*labels[i]) # xy 55 | sum_x2 = torch.sum(torch.pow(preds[i],2)) # x^2 56 | sum_y2 = torch.sum(torch.pow(labels[i],2)) # y^2 57 | N = preds.shape[1] 58 | pearson = (N*sum_xy - sum_x*sum_y)/(torch.sqrt((N*sum_x2 - torch.pow(sum_x,2))*(N*sum_y2 - torch.pow(sum_y,2)))) 59 | 60 | #if (pearson>=0).data.cpu().numpy(): # torch.cuda.ByteTensor --> numpy 61 | # loss += 1 - pearson 62 | #else: 63 | # loss += 1 - torch.abs(pearson) 64 | 65 | loss += 1 - pearson 66 | 67 | 68 | loss = loss/preds.shape[0] 69 | return loss 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /PhysNetED_BMVC.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code of 'Remote Photoplethysmograph Signal Measurement from Facial Videos Using Spatio-Temporal Networks' 3 | By Zitong Yu, 2019/05/05 4 | 5 | If you use the code, please cite: 6 | @inproceedings{yu2019remote, 7 | title={Remote Photoplethysmograph Signal Measurement from Facial Videos Using Spatio-Temporal Networks}, 8 | author={Yu, Zitong and Li, Xiaobai and Zhao, Guoying}, 9 | booktitle= {British Machine Vision Conference (BMVC)}, 10 | year = {2019} 11 | } 12 | 13 | Only for research purpose, and commercial use is not allowed. 14 | 15 | MIT License 16 | Copyright (c) 2019 17 | ''' 18 | 19 | 20 | 21 | import math 22 | import torch.nn as nn 23 | from torch.nn.modules.utils import _triple 24 | import torch 25 | import pdb 26 | 27 | 28 | 29 | class PhysNet_padding_Encoder_Decoder_MAX(nn.Module): 30 | def __init__(self, frames=128): 31 | super(PhysNet_padding_Encoder_Decoder_MAX, self).__init__() 32 | 33 | self.ConvBlock1 = nn.Sequential( 34 | nn.Conv3d(3, 16, [1,5,5],stride=1, padding=[0,2,2]), 35 | nn.BatchNorm3d(16), 36 | nn.ReLU(inplace=True), 37 | ) 38 | 39 | self.ConvBlock2 = nn.Sequential( 40 | nn.Conv3d(16, 32, [3, 3, 3], stride=1, padding=1), 41 | nn.BatchNorm3d(32), 42 | nn.ReLU(inplace=True), 43 | ) 44 | self.ConvBlock3 = nn.Sequential( 45 | nn.Conv3d(32, 64, [3, 3, 3], stride=1, padding=1), 46 | nn.BatchNorm3d(64), 47 | nn.ReLU(inplace=True), 48 | ) 49 | 50 | self.ConvBlock4 = nn.Sequential( 51 | nn.Conv3d(64, 64, [3, 3, 3], stride=1, padding=1), 52 | nn.BatchNorm3d(64), 53 | nn.ReLU(inplace=True), 54 | ) 55 | self.ConvBlock5 = nn.Sequential( 56 | nn.Conv3d(64, 64, [3, 3, 3], stride=1, padding=1), 57 | nn.BatchNorm3d(64), 58 | nn.ReLU(inplace=True), 59 | ) 60 | self.ConvBlock6 = nn.Sequential( 61 | nn.Conv3d(64, 64, [3, 3, 3], stride=1, padding=1), 62 | nn.BatchNorm3d(64), 63 | nn.ReLU(inplace=True), 64 | ) 65 | self.ConvBlock7 = nn.Sequential( 66 | nn.Conv3d(64, 64, [3, 3, 3], stride=1, padding=1), 67 | nn.BatchNorm3d(64), 68 | nn.ReLU(inplace=True), 69 | ) 70 | self.ConvBlock8 = nn.Sequential( 71 | nn.Conv3d(64, 64, [3, 3, 3], stride=1, padding=1), 72 | nn.BatchNorm3d(64), 73 | nn.ReLU(inplace=True), 74 | ) 75 | self.ConvBlock9 = nn.Sequential( 76 | nn.Conv3d(64, 64, [3, 3, 3], stride=1, padding=1), 77 | nn.BatchNorm3d(64), 78 | nn.ReLU(inplace=True), 79 | ) 80 | 81 | self.upsample = nn.Sequential( 82 | nn.ConvTranspose3d(in_channels=64, out_channels=64, kernel_size=[4,1,1], stride=[2,1,1], padding=[1,0,0]), #[1, 128, 32] 83 | nn.BatchNorm3d(64), 84 | nn.ELU(), 85 | ) 86 | self.upsample2 = nn.Sequential( 87 | nn.ConvTranspose3d(in_channels=64, out_channels=64, kernel_size=[4,1,1], stride=[2,1,1], padding=[1,0,0]), #[1, 128, 32] 88 | nn.BatchNorm3d(64), 89 | nn.ELU(), 90 | ) 91 | 92 | self.ConvBlock10 = nn.Conv3d(64, 1, [1,1,1],stride=1, padding=0) 93 | 94 | self.MaxpoolSpa = nn.MaxPool3d((1, 2, 2), stride=(1, 2, 2)) 95 | self.MaxpoolSpaTem = nn.MaxPool3d((2, 2, 2), stride=2) 96 | 97 | 98 | #self.poolspa = nn.AdaptiveMaxPool3d((frames,1,1)) # pool only spatial space 99 | self.poolspa = nn.AdaptiveAvgPool3d((frames,1,1)) 100 | 101 | 102 | def forward(self, x): # x [3, T, 128,128] 103 | x_visual = x 104 | [batch,channel,length,width,height] = x.shape 105 | 106 | x = self.ConvBlock1(x) # x [3, T, 128,128] 107 | x = self.MaxpoolSpa(x) # x [16, T, 64,64] 108 | 109 | x = self.ConvBlock2(x) # x [32, T, 64,64] 110 | x_visual6464 = self.ConvBlock3(x) # x [32, T, 64,64] 111 | x = self.MaxpoolSpaTem(x_visual6464) # x [32, T/2, 32,32] Temporal halve 112 | 113 | x = self.ConvBlock4(x) # x [64, T/2, 32,32] 114 | x_visual3232 = self.ConvBlock5(x) # x [64, T/2, 32,32] 115 | x = self.MaxpoolSpaTem(x_visual3232) # x [64, T/4, 16,16] 116 | 117 | 118 | x = self.ConvBlock6(x) # x [64, T/4, 16,16] 119 | x_visual1616 = self.ConvBlock7(x) # x [64, T/4, 16,16] 120 | x = self.MaxpoolSpa(x_visual1616) # x [64, T/4, 8,8] 121 | 122 | x = self.ConvBlock8(x) # x [64, T/4, 8, 8] 123 | x = self.ConvBlock9(x) # x [64, T/4, 8, 8] 124 | x = self.upsample(x) # x [64, T/2, 8, 8] 125 | x = self.upsample2(x) # x [64, T, 8, 8] 126 | 127 | 128 | x = self.poolspa(x) # x [64, T, 1,1] --> groundtruth left and right - 7 129 | x = self.ConvBlock10(x) # x [1, T, 1,1] 130 | 131 | rPPG = x.view(-1,length) 132 | 133 | 134 | return rPPG, x_visual, x_visual3232, x_visual1616 135 | 136 | 137 | 138 | 139 | 140 | 141 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PhysNet 2 | Main code of **BMVC2019 paper 'Photoplethysmograph Signal Measurement from Facial Videos Using Spatio-Temporal Networks'** 3 | 4 | How to train it? 5 | 6 | #1. Inference the model 7 | model = PhysNet_padding_Encoder_Decoder_MAX(frames=128) 8 | rPPG, x_visual, x_visual3232, x_visual1616 = model(inputs) 9 | 10 | #2. Normalized the Predicted rPPG signal and GroundTruth BVP signal 11 | rPPG = (rPPG-torch.mean(rPPG)) /torch.std(rPPG) # normalize 12 | BVP_label = (BVP_label-torch.mean(BVP_label)) /torch.std(BVP_label) # normalize 13 | 14 | #3. Calculate the loss 15 | loss_ecg = Neg_Pearson(rPPG, BVP_label) 16 | 17 | It is just for **research purpose**, and commercial use is not allowed. 18 | 19 | If you use the PhysNet please cite: 20 | 21 | >@inproceedings{yu2019remote, 22 | >    title={Remote Photoplethysmograph Signal Measurement from Facial Videos Using Spatio-Temporal Networks}, 23 | >    author={Yu, Zitong and Li, Xiaobai and Zhao, Guoying}, 24 | >    booktitle= {Proc. BMVC}, 25 | >    year = {2019} 26 | >} 27 | 28 | 29 | --------------------------------------------------------------------------------