├── .gitignore ├── DemoNotebook.ipynb ├── LICENSE ├── README.md ├── data └── read.txt ├── requirements.txt ├── results └── train-test_loss.png └── src ├── __init__.py ├── config.py ├── engine.py ├── engine_vipl.py ├── loss_func ├── __init__.py ├── custom_loss.py └── rhythmnet_loss.py ├── main.py ├── models ├── __init__.py ├── lenet.py ├── resnet.py ├── rhythmNet.py └── simpleCNN.py └── utils ├── __init__.py ├── dataset.py ├── generate_fold_csv.py ├── model_utils.py ├── plot_scripts.py ├── signal_utils.py └── video2st_maps.py /.gitignore: -------------------------------------------------------------------------------- 1 | /data/ 2 | /venv/ -------------------------------------------------------------------------------- /DemoNotebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "melanomaDetection.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "view-in-github", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "\"Open" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "metadata": { 31 | "colab": { 32 | "base_uri": "https://localhost:8080/" 33 | }, 34 | "id": "QMRaimMFMErb", 35 | "outputId": "1fec65ad-be6c-4b63-b80e-98b1e41e4ac3" 36 | }, 37 | "source": [ 38 | "# Mount Google Drive\n", 39 | "from google.colab import drive # import drive from google colab\n", 40 | "\n", 41 | "ROOT = \"/content/drive\" # default location for the drive\n", 42 | "print(ROOT) # print content of ROOT (Optional)\n", 43 | "\n", 44 | "drive.mount(ROOT) # we mount the google drive at /content/drive" 45 | ], 46 | "execution_count": 13, 47 | "outputs": [ 48 | { 49 | "output_type": "stream", 50 | "text": [ 51 | "/content/drive\n", 52 | "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" 53 | ], 54 | "name": "stdout" 55 | } 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "metadata": { 61 | "colab": { 62 | "base_uri": "https://localhost:8080/" 63 | }, 64 | "id": "mFWquqGJMXs0", 65 | "outputId": "45551f54-8b6c-4749-d21d-6f548a0c4ef7" 66 | }, 67 | "source": [ 68 | "# Clone github repository setup\n", 69 | "from os.path import join \n", 70 | "\n", 71 | "# path to your project on Google Drive\n", 72 | "MY_GOOGLE_DRIVE_PATH = 'My Drive/MyDrive/' \n", 73 | "\n", 74 | "# your Github username \n", 75 | "GIT_USERNAME = \"{GITHUB USERNAME}\" \n", 76 | "# GitHub access token\n", 77 | "GIT_TOKEN = \"{GITHUB TOKEN}\" \n", 78 | "# Replace with your github repository\n", 79 | "GIT_REPOSITORY = \"DL_boilerplate\" \n", 80 | "\n", 81 | "PROJECT_PATH = join(ROOT, MY_GOOGLE_DRIVE_PATH)\n", 82 | "\n", 83 | "print(\"PROJECT_PATH: \", PROJECT_PATH) \n", 84 | "\n", 85 | "# # In case we haven't created the folder already; we will create a folder in the project path \n", 86 | "# !mkdir \"{PROJECT_PATH}\" \n", 87 | "\n", 88 | "GIT_PATH = f\"https://{GIT_TOKEN}@github.com/{GIT_USERNAME}/{GIT_REPOSITORY}.git\"\n", 89 | "print(\"GIT_PATH: \", GIT_PATH)\n", 90 | "GIT_BRANCH = \"main\"" 91 | ], 92 | "execution_count": 14, 93 | "outputs": [ 94 | { 95 | "output_type": "stream", 96 | "text": [ 97 | "PROJECT_PATH: /content/drive/My Drive/MyDrive/\n", 98 | "GIT_PATH: https://{a675a49eff6b8a3df6aad1c1c2cea844c3fdeeb2}@github.com/anweshcr7/DL_boilerplate.git\n" 99 | ], 100 | "name": "stdout" 101 | } 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "metadata": { 107 | "colab": { 108 | "base_uri": "https://localhost:8080/" 109 | }, 110 | "id": "aYKVgctbOAnp", 111 | "outputId": "1a99f760-80c0-46b6-aca7-37798e3193ad" 112 | }, 113 | "source": [ 114 | "%rm -r /content/DL_boilerplate/\n", 115 | "# EXEC_COMMAND = f\"-b {GIT_BRANCH} {GIT_PATH}\"\n", 116 | "!git clone -b \"{GIT_BRANCH}\" \"{GIT_PATH}\"" 117 | ], 118 | "execution_count": 16, 119 | "outputs": [ 120 | { 121 | "output_type": "stream", 122 | "text": [ 123 | "Cloning into 'DL_boilerplate'...\n", 124 | "remote: Enumerating objects: 30, done.\u001b[K\n", 125 | "remote: Counting objects: 100% (30/30), done.\u001b[K\n", 126 | "remote: Compressing objects: 100% (25/25), done.\u001b[K\n", 127 | "remote: Total 30 (delta 8), reused 20 (delta 4), pack-reused 0\u001b[K\n", 128 | "Unpacking objects: 100% (30/30), done.\n" 129 | ], 130 | "name": "stdout" 131 | } 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "metadata": { 137 | "colab": { 138 | "base_uri": "https://localhost:8080/" 139 | }, 140 | "id": "UVkBhRqEQK9L", 141 | "outputId": "cfae1561-bd91-4322-b565-1853207e631a" 142 | }, 143 | "source": [ 144 | "!python3 /content/DL_boilerplate/src/main.py " 145 | ], 146 | "execution_count": 18, 147 | "outputs": [ 148 | { 149 | "output_type": "stream", 150 | "text": [ 151 | "GPU available... using GPU\n", 152 | "100% 1875/1875 [00:17<00:00, 105.36it/s]\n", 153 | "Saved!\n", 154 | "100% 313/313 [00:02<00:00, 124.59it/s]\n", 155 | "Epoch 0 => Training Loss: 0.19766141367604334, Val Loss: 0.05979700740503025\n", 156 | "100% 1875/1875 [00:17<00:00, 104.25it/s]\n", 157 | "Saved!\n", 158 | "100% 313/313 [00:02<00:00, 114.15it/s]\n", 159 | "Epoch 1 => Training Loss: 0.05563460199572146, Val Loss: 0.039060163388883073\n", 160 | "100% 1875/1875 [00:18<00:00, 99.60it/s]\n", 161 | "Saved!\n", 162 | "100% 313/313 [00:02<00:00, 121.64it/s]\n", 163 | "Epoch 2 => Training Loss: 0.03707047849864078, Val Loss: 0.0323324040974794\n", 164 | "100% 1875/1875 [00:18<00:00, 100.07it/s]\n", 165 | "Saved!\n", 166 | "100% 313/313 [00:02<00:00, 120.85it/s]\n", 167 | "Epoch 3 => Training Loss: 0.027940944086847594, Val Loss: 0.02903405395759931\n", 168 | "100% 1875/1875 [00:18<00:00, 101.86it/s]\n", 169 | "Saved!\n", 170 | "100% 313/313 [00:02<00:00, 134.51it/s]\n", 171 | "Epoch 4 => Training Loss: 0.020424689519958336, Val Loss: 0.03015474373615215\n", 172 | "100% 1875/1875 [00:17<00:00, 105.84it/s]\n", 173 | "Saved!\n", 174 | "100% 313/313 [00:02<00:00, 133.46it/s]\n", 175 | "Epoch 5 => Training Loss: 0.01611058505279458, Val Loss: 0.026433935853666494\n", 176 | "100% 1875/1875 [00:18<00:00, 102.68it/s]\n", 177 | "Saved!\n", 178 | "100% 313/313 [00:02<00:00, 129.28it/s]\n", 179 | "Epoch 6 => Training Loss: 0.013548866433653165, Val Loss: 0.027602487027936874\n", 180 | "100% 1875/1875 [00:17<00:00, 104.42it/s]\n", 181 | "Saved!\n", 182 | "100% 313/313 [00:02<00:00, 133.54it/s]\n", 183 | "Epoch 7 => Training Loss: 0.01068355406346988, Val Loss: 0.030769745317247334\n", 184 | "100% 1875/1875 [00:18<00:00, 101.77it/s]\n", 185 | "Saved!\n", 186 | "100% 313/313 [00:02<00:00, 128.57it/s]\n", 187 | "Epoch 8 => Training Loss: 0.007954271178089523, Val Loss: 0.04250151148297651\n", 188 | "100% 1875/1875 [00:17<00:00, 104.32it/s]\n", 189 | "Saved!\n", 190 | "100% 313/313 [00:02<00:00, 116.59it/s]\n", 191 | "Epoch 9 => Training Loss: 0.007675014509890358, Val Loss: 0.031255505094597145\n", 192 | "
\n", 193 | "done\n" 194 | ], 195 | "name": "stdout" 196 | } 197 | ] 198 | } 199 | ] 200 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Anwesh Marwade 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RhythmNet: End-to-end Heart Rate Estimation from Face via Spatial-temporal Representation 2 | A reproduction of the RhythmNet model. [Paper link](arxiv.org/abs/1910.11515) 3 | 4 | #### Dataset: 5 | VIPL-HR dataset 6 | 7 | ## Experiments 8 | Shared parameters: 9 | ``` 10 | batch size: 32 11 | Dataset: VIPL 12 | Model: RhythmNet 13 | initial learning rate: 1e-3 14 | epochs: 50 15 | window_size = 300 frames with stride of 0.5 seconds 16 | ``` 17 | 18 | **Dataset-split**: 5 fold validation 19 | ### Experiment for 1-Fold without GRU layer 20 | 21 | | Set | Loss | MAE (bpm) | RMSE (bpm) | 22 | |----------|:-----:|:----------:|:----------:| 23 | | Training | 3.096 | 1.817 | 2.834 | 24 | | Eval | 15.91 | 9.255 | 11.787 | 25 | 26 | ### Experiment for 1-Fold with GRU layer 27 | | Set | Loss | MAE (bpm) | RMSE (bpm) | 28 | |----------|:-----:|:----------:|:----------:| 29 | | Training | 3.925 | 2.423 | 4.16 | 30 | | Eval | 14.25 | 13.992 | 17.019 | 31 | 32 | -------------------------------------------------------------------------------- /data/read.txt: -------------------------------------------------------------------------------- 1 | YOUR DATA WILL BE DOWNLOADED HERE, or added here -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cached-property==1.5.2 2 | cycler==0.10.0 3 | dataclasses==0.6 4 | future==0.18.2 5 | h5py==3.1.0 6 | heartpy==1.2.6 7 | joblib==0.17.0 8 | kiwisolver==1.3.1 9 | matplotlib==3.3.3 10 | mne==0.21.2 11 | numpy==1.19.5 12 | opencv-python==4.4.0.46 13 | pandas==1.2.0 14 | Pillow==9.0.0 15 | pyparsing==2.4.7 16 | python-dateutil==2.8.1 17 | pytz==2020.5 18 | scikit-learn==0.23.2 19 | scipy==1.5.4 20 | six==1.15.0 21 | threadpoolctl==2.1.0 22 | torch==1.7.0 23 | torchvision==0.8.1 24 | tqdm==4.53.0 25 | typing-extensions==3.7.4.3 26 | -------------------------------------------------------------------------------- /results/train-test_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnweshCR7/RhythmNet/aa4b336a249af64d0c5e7f516863d6f1f6c285c1/results/train-test_loss.png -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnweshCR7/RhythmNet/aa4b336a249af64d0c5e7f516863d6f1f6c285c1/src/__init__.py -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- 1 | # FACE_DATA_DIR = "/content/drive/MyDrive/data/deep_phys/face_videos/" 2 | # DATA_PATH = "/content/drive/MyDrive/data/rhythmnet/st_maps/" 3 | # TARGET_SIGNAL_DIR = "/content/drive/MyDrive/data/deep_phys/data_preprocessed/" 4 | # SAVE_CSV_PATH = "/content/drive/MyDrive/data/rhythmnet/kfold.csv" 5 | # ST_MAPS_PATH = "/content/drive/MyDrive/data/rhythmnet/st_maps/" 6 | # CHECKPOINT_PATH = "/content/drive/MyDrive/data/rhythmnet/checkpoint" 7 | # PLOT_PATH = "/content/drive/MyDrive/data/rhythmnet/plots" 8 | # NUM_WORKERS = 2 9 | # DEVICE = "cuda" 10 | # BATCH_SIZE = 10 11 | # EPOCHS = 50 12 | # lr = 1e-3 13 | # CLIP_SIZE = 300 14 | 15 | # For INSY server 16 | 17 | # FACE_DATA_DIR = "/content/drive/MyDrive/data/deep_phys/face_videos/" 18 | # HOME_DIR = "/tudelft.net/staff-bulk/ewi/insy/VisionLab/students/amarwade/" 19 | # HR_DATA_PATH = HOME_DIR + "data/DEAP/hr_csv/" 20 | # DATA_PATH = HOME_DIR + "data/DEAP/st_maps/" 21 | # TARGET_SIGNAL_DIR = HOME_DIR + "data/DEAP/data_preprocessed/" 22 | # SAVE_CSV_PATH = HOME_DIR + "RhythmNet/subject_exclusive_folds.csv" 23 | # ST_MAPS_PATH = HOME_DIR + "data/DEAP/st_maps/" 24 | # CHECKPOINT_PATH = HOME_DIR + "checkpoints/RhythmNet" 25 | # PLOT_PATH = HOME_DIR + "plots/RhythmNet" 26 | # NUM_WORKERS = 2 27 | # DEVICE = "cuda" 28 | # BATCH_SIZE = 16 29 | # EPOCHS = 20 30 | # lr = 1e-3 31 | # CLIP_SIZE = 300 32 | # TENSORBOARD_PATH = HOME_DIR + "/runs/" 33 | # GRU_TEMPORAL_WINDOW = 6 34 | 35 | haarcascade_url = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_alt2.xml" 36 | eye_cascade_url = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_eye.xml" 37 | # FACE_DATA_DIR = "../data/face_video/" 38 | # HR_DATA_PATH = "../data/VIPL_hr_csv/" 39 | HR_DATA_PATH = "../data/VIPL_hr_csv/" 40 | FACE_DATA_DIR = "/Volumes/T7/vipl_videos/" 41 | TARGET_SIGNAL_DIR = "/Users/anweshcr7/Downloads/CleanerPPG/VIPL-HR/Cleaned/" 42 | # SAVE_CSV_PATH = "subject_exclusive_folds.csv" 43 | SAVE_CSV_PATH = "VIPL_npy.csv" 44 | ST_MAPS_PATH = "../data/vipl_st_maps/" 45 | # ST_MAPS_PATH = "/Volumes/Backup Plus/vision/DEAP_emotion/st_maps/" 46 | CHECKPOINT_PATH = "../checkpoint" 47 | DATA_PATH = "../data/" 48 | PLOT_PATH = "../plots" 49 | BATCH_SIZE = 16 50 | EPOCHS = 10 51 | EPOCHS_TEST = 1 52 | CLIP_SIZE = 300 53 | lr = 1e-3 54 | IMAGE_WIDTH = 300 55 | IMAGE_HEIGHT = 75 56 | NUM_WORKERS = 0 57 | DEVICE = "cpu" 58 | GRU_TEMPORAL_WINDOW = 6 -------------------------------------------------------------------------------- /src/engine.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import torch 3 | import config 4 | from utils.model_utils import save_model_checkpoint 5 | 6 | 7 | def train_fn(model, data_loader, optimizer, loss_fn): 8 | model.train() 9 | fin_loss = 0 10 | loss = 0.0 11 | 12 | target_hr_list = [] 13 | predicted_hr_list = [] 14 | tk_iterator = tqdm(data_loader, total=len(data_loader)) 15 | for data in tk_iterator: 16 | # an item of the data is available as a dictionary 17 | for (key, value) in data.items(): 18 | data[key] = value.to(config.DEVICE) 19 | 20 | optimizer.zero_grad() 21 | with torch.set_grad_enabled(True): 22 | outputs = model(**data) 23 | # w/o GRU 24 | loss = loss_fn(outputs.squeeze(2), data["target"]) 25 | # loss = loss_fn(outputs, data["target"]) 26 | loss.backward() 27 | optimizer.step() 28 | # "For each face video, the avg of all HR (bpm) of individual clips are computed as the final HR result 29 | target_hr_batch = list(data["target"].mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy()) 30 | target_hr_list.extend(target_hr_batch) 31 | 32 | predicted_hr_batch = list(outputs.squeeze(2).mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy()) 33 | predicted_hr_list.extend(predicted_hr_batch) 34 | fin_loss += loss.item() 35 | 36 | return target_hr_list, predicted_hr_list, fin_loss / len(data_loader) 37 | 38 | 39 | def eval_fn(model, data_loader, loss_fn): 40 | model.eval() 41 | fin_loss = 0 42 | target_hr_list = [] 43 | predicted_list = [] 44 | with torch.no_grad(): 45 | tk_iterator = tqdm(data_loader, total=len(data_loader)) 46 | for data in tk_iterator: 47 | for (key, value) in data.items(): 48 | data[key] = value.to(config.DEVICE) 49 | 50 | # with torch.set_grad_enabled(False): 51 | outputs = model(**data) 52 | # _, _, out = model(**data) 53 | loss = loss_fn(outputs.squeeze(2), data["target"]) 54 | # _, batch_preds = torch.max(out.data, 1) 55 | fin_loss += loss.item() 56 | target_hr_batch = list(data["target"].mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy()) 57 | target_hr_list.extend(target_hr_batch) 58 | 59 | predicted_hr_batch = list(outputs.squeeze(2).mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy()) 60 | predicted_list.extend(predicted_hr_batch) 61 | 62 | 63 | return target_hr_list, predicted_list, fin_loss / len(data_loader) 64 | -------------------------------------------------------------------------------- /src/engine_vipl.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import torch 3 | import config 4 | import numpy as np 5 | from utils.model_utils import save_model_checkpoint 6 | 7 | 8 | def train_fn(model, data_loader, optimizer, loss_fn): 9 | model.train() 10 | fin_loss = 0 11 | loss = 0.0 12 | 13 | target_hr_list = [] 14 | predicted_hr_list = [] 15 | tk_iterator = tqdm(data_loader, total=len(data_loader)) 16 | batched_data = [] 17 | for batch in tk_iterator: 18 | for data in batch: 19 | # an item of the data is available as a dictionary 20 | for (key, value) in data.items(): 21 | data[key] = value.to(config.DEVICE) 22 | 23 | optimizer.zero_grad() 24 | with torch.set_grad_enabled(True): 25 | outputs, gru_outputs = model(**data) 26 | # w/o GRU 27 | # loss = loss_fn(outputs.squeeze(0), data["target"]) 28 | loss = loss_fn(outputs.squeeze(0), gru_outputs, data["target"]) 29 | loss.backward() 30 | optimizer.step() 31 | # "For each face video, the avg of all HR (bpm) of individual clips are computed as the final HR result 32 | # target_hr_batch = list(data["target"].mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy()) 33 | target_hr_list.append(data["target"].mean().item()) 34 | 35 | # predicted_hr_batch = list(outputs.squeeze(2).mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy()) 36 | predicted_hr_list.append(outputs.squeeze(0).mean().item()) 37 | fin_loss += loss.item() 38 | 39 | return target_hr_list, predicted_hr_list, fin_loss / (len(data_loader)*config.BATCH_SIZE) 40 | 41 | 42 | def eval_fn(model, data_loader, loss_fn): 43 | model.eval() 44 | fin_loss = 0 45 | target_hr_list = [] 46 | predicted_hr_list = [] 47 | with torch.no_grad(): 48 | tk_iterator = tqdm(data_loader, total=len(data_loader)) 49 | for batch in tk_iterator: 50 | for data in batch: 51 | for (key, value) in data.items(): 52 | data[key] = value.to(config.DEVICE) 53 | 54 | # with torch.set_grad_enabled(False): 55 | outputs, gru_outputs = model(**data) 56 | # loss w/o GRU 57 | # loss = loss_fn(outputs.squeeze(0), data["target"]) 58 | # loss with GRU 59 | loss = loss_fn(outputs.squeeze(0), gru_outputs, data["target"]) 60 | fin_loss += loss.item() 61 | # target_hr_batch = list(data["target"].mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy()) 62 | target_hr_list.append(data["target"].mean().item()) 63 | 64 | # predicted_hr_batch = list(outputs.squeeze(2).mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy()) 65 | predicted_hr_list.append(outputs.squeeze(0).mean().item()) 66 | 67 | return target_hr_list, predicted_hr_list, fin_loss / (len(data_loader)*config.BATCH_SIZE) 68 | -------------------------------------------------------------------------------- /src/loss_func/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnweshCR7/RhythmNet/aa4b336a249af64d0c5e7f516863d6f1f6c285c1/src/loss_func/__init__.py -------------------------------------------------------------------------------- /src/loss_func/custom_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import config as config 3 | 4 | class MyLoss(torch.autograd.Function): 5 | """ 6 | We can implement our own custom autograd Functions by subclassing 7 | torch.autograd.Function and implementing the forward and backward passes 8 | which operate on Tensors. 9 | """ 10 | 11 | @staticmethod 12 | def forward(ctx, hr_t, hr_outs, T): 13 | """ 14 | In the forward pass we receive a Tensor containing the input and return 15 | a Tensor containing the output. ctx is a context object that can be used 16 | to stash information for backward computation. You can cache arbitrary 17 | objects for use in the backward pass using the ctx.save_for_backward method. 18 | """ 19 | ctx.hr_outs = hr_outs 20 | ctx.hr_mean = hr_outs.mean() 21 | ctx.T = T 22 | ctx.save_for_backward(hr_t) 23 | # pdb.set_trace() 24 | # hr_t, hr_mean, T = input 25 | 26 | if hr_t > ctx.hr_mean: 27 | loss = hr_t - ctx.hr_mean 28 | else: 29 | loss = ctx.hr_mean - hr_t 30 | 31 | return loss 32 | # return input.clamp(min=0) 33 | 34 | @staticmethod 35 | def backward(ctx, grad_output): 36 | """ 37 | In the backward pass we receive a Tensor containing the gradient of the loss 38 | with respect to the output, and we need to compute the gradient of the loss 39 | with respect to the input. 40 | """ 41 | output = torch.zeros(1).to(config.DEVICE) 42 | 43 | hr_t, = ctx.saved_tensors 44 | hr_outs = ctx.hr_outs 45 | 46 | # create a list of hr_outs without hr_t 47 | 48 | for hr in hr_outs: 49 | if hr == hr_t: 50 | pass 51 | else: 52 | output = output + (1/ctx.T)*torch.sign(ctx.hr_mean - hr) 53 | 54 | output = (1/ctx.T - 1)*torch.sign(ctx.hr_mean - hr_t) + output 55 | 56 | return output, None, None 57 | 58 | 59 | # if __name__ == '__main__': 60 | # 61 | # dtype = torch.float 62 | # device = torch.device("cpu") 63 | # # device = torch.device("cuda:0") # Uncomment this to run on GPU 64 | # # torch.backends.cuda.matmul.allow_tf32 = False # Uncomment this to run on GPU 65 | # 66 | # # The above line disables TensorFloat32. This a feature that allows 67 | # # networks to run at a much faster speed while sacrificing precision. 68 | # # Although TensorFloat32 works well on most real models, for our toy model 69 | # # in this tutorial, the sacrificed precision causes convergence issue. 70 | # # For more information, see: 71 | # # https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices 72 | # 73 | # # N is batch size; D_in is input dimension; 74 | # # H is hidden dimension; D_out is output dimension. 75 | # N, D_in, H, D_out = 64, 1000, 100, 10 76 | # # tensor([[0.4178, 0.8199, 0.1713, -0.8368, 0.2154, -0.4960, 0.4925, -0.7679, 77 | # # -0.1096, 0.7345]], grad_fn= < SqueezeBackward1 >) 78 | # # Create random Tensors to hold input and outputs. 79 | # with torch.set_grad_enabled(True): 80 | # # hr_outs = torch.tensor([0.4178, 0.8199, 0.1713, -0.8368, 0.2154, -0.4960, 0.4925, -0.7679, -0.1096, 0.7345], 81 | # # device=device, dtype=dtype) 82 | # hr_outs = torch.autograd.Variable(torch.randn(3), requires_grad=True) 83 | # hr_mean = hr_outs.mean() 84 | # # y = torch.tensor(0., device=device, dtype=dtype) 85 | # 86 | # # Create random Tensors for weights. 87 | # # w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True) 88 | # # w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True) 89 | # 90 | # learning_rate = 1e-6 91 | # smooth_loss = torch.autograd.Variable(torch.zeros(1), requires_grad=True) 92 | # for i in range(hr_outs.shape[0]): 93 | # # To apply our Function, we use Function.apply method. We alias this as 'relu'. 94 | # custom_loss = MyLoss.apply 95 | # smooth_loss = smooth_loss + custom_loss(hr_outs[i], hr_outs, hr_outs.shape[0]) 96 | # 97 | # smooth_loss.backward() 98 | # 99 | # print("done") 100 | -------------------------------------------------------------------------------- /src/loss_func/rhythmnet_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import numpy as np 3 | import torch 4 | import config as config 5 | from loss_func.custom_loss import MyLoss 6 | 7 | 8 | class RhythmNetLoss(nn.Module): 9 | def __init__(self, weight=100.0): 10 | super(RhythmNetLoss, self).__init__() 11 | self.l1_loss = nn.L1Loss() 12 | self.lambd = weight 13 | self.gru_outputs_considered = None 14 | self.custom_loss = MyLoss() 15 | self.device = config.DEVICE 16 | 17 | def forward(self, resnet_outputs, gru_outputs, target): 18 | frame_rate = 25.0 19 | # resnet_outputs, gru_outputs, _ = outputs 20 | # target_array = target.repeat(1, resnet_outputs.shape[1]) 21 | l1_loss = self.l1_loss(resnet_outputs, target) 22 | smooth_loss_component = self.smooth_loss(gru_outputs) 23 | 24 | loss = l1_loss + self.lambd*smooth_loss_component 25 | return loss 26 | 27 | # Need to write backward pass for this loss function 28 | def smooth_loss(self, gru_outputs): 29 | smooth_loss = torch.zeros(1).to(device=self.device) 30 | self.gru_outputs_considered = gru_outputs.flatten() 31 | # hr_mean = self.gru_outputs_considered.mean() 32 | for hr_t in self.gru_outputs_considered: 33 | # custom_fn = MyLoss.apply 34 | smooth_loss = smooth_loss + self.custom_loss.apply(torch.autograd.Variable(hr_t, requires_grad=True), 35 | self.gru_outputs_considered, 36 | self.gru_outputs_considered.shape[0]) 37 | return smooth_loss / self.gru_outputs_considered.shape[0] 38 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import torch 4 | import numpy as np 5 | import pandas as pd 6 | import torch.nn as nn 7 | from tqdm import tqdm 8 | import engine 9 | import engine_vipl 10 | import config 11 | from torch.utils.tensorboard import SummaryWriter 12 | from utils.dataset import DataLoaderRhythmNet 13 | from utils.plot_scripts import plot_train_test_curves, bland_altman_plot, gt_vs_est, create_plot_for_tensorboard 14 | from utils.model_utils import plot_loss, load_model_if_checkpointed, save_model_checkpoint 15 | from models.simpleCNN import SimpleCNN 16 | from models.lenet import LeNet 17 | from models.rhythmNet import RhythmNet 18 | from loss_func.rhythmnet_loss import RhythmNetLoss 19 | from scipy.stats.stats import pearsonr 20 | 21 | 22 | # Needed in VIPL dataset where each data item has a different number of frames/maps 23 | def collate_fn(batch): 24 | batched_st_map, batched_targets = [], [] 25 | # for data in batch: 26 | # batched_st_map.append(data["st_maps"]) 27 | # batched_targets.append(data["target"]) 28 | # # torch.stack(batched_output_per_clip, dim=0).transpose_(0, 1) 29 | return batch 30 | 31 | 32 | def rmse(l1, l2): 33 | 34 | return np.sqrt(np.mean((l1-l2)**2)) 35 | 36 | 37 | def mae(l1, l2): 38 | 39 | return np.mean([abs(item1-item2)for item1, item2 in zip(l1, l2)]) 40 | 41 | 42 | def compute_criteria(target_hr_list, predicted_hr_list): 43 | pearson_per_signal = [] 44 | HR_MAE = mae(np.array(predicted_hr_list), np.array(target_hr_list)) 45 | HR_RMSE = rmse(np.array(predicted_hr_list), np.array(target_hr_list)) 46 | 47 | # for (gt_signal, predicted_signal) in zip(target_hr_list, predicted_hr_list): 48 | # r, p_value = pearsonr(predicted_signal, gt_signal) 49 | # pearson_per_signal.append(r) 50 | 51 | # return {"MAE": np.mean(HR_MAE), "RMSE": HR_RMSE, "Pearson": np.mean(pearson_per_signal)} 52 | return {"MAE": np.mean(HR_MAE), "RMSE": HR_RMSE} 53 | 54 | 55 | def run_training(): 56 | 57 | # check path to checkpoint directory 58 | if config.CHECKPOINT_PATH: 59 | if not os.path.exists(config.CHECKPOINT_PATH): 60 | os.makedirs(config.CHECKPOINT_PATH) 61 | print("Output directory is created") 62 | 63 | # -------------------------------------- 64 | # Initialize Model 65 | # -------------------------------------- 66 | 67 | model = RhythmNet() 68 | 69 | if torch.cuda.is_available(): 70 | print('GPU available... using GPU') 71 | torch.cuda.manual_seed_all(42) 72 | else: 73 | print("GPU not available, using CPU") 74 | 75 | if config.CHECKPOINT_PATH: 76 | checkpoint_path = os.path.join(os.getcwd(), config.CHECKPOINT_PATH) 77 | if not os.path.exists(checkpoint_path): 78 | os.makedirs(checkpoint_path) 79 | print("Output directory is created") 80 | 81 | # device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu') 82 | 83 | model.to(config.DEVICE) 84 | 85 | optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) 86 | scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 87 | optimizer, factor=0.8, patience=5, verbose=True 88 | ) 89 | # loss_fn = nn.L1Loss() 90 | loss_fn = RhythmNetLoss() 91 | 92 | testset = trainset = None 93 | 94 | # Initialize SummaryWriter object 95 | writer = SummaryWriter() 96 | 97 | # Read from a pre-made csv file that contains data divided into folds for cross validation 98 | folds_df = pd.read_csv(config.SAVE_CSV_PATH) 99 | 100 | # Loop for enumerating through folds. 101 | print(f"Details: {len(folds_df['iteration'].unique())} fold training for {config.EPOCHS} Epochs (each video)") 102 | # for k in folds_df['iteration'].unique(): 103 | for k in [1]: 104 | # Filter DF 105 | video_files_test = folds_df.loc[(folds_df['iteration'] == k) & (folds_df['set'] == 'V')] 106 | video_files_train = folds_df.loc[(folds_df['iteration'] == k) & (folds_df['set'] == 'T')] 107 | 108 | # Get paths from filtered DF VIPL 109 | video_files_test = [os.path.join(config.ST_MAPS_PATH, video_path.split('/')[-1]) for video_path in 110 | video_files_test["video"].values] 111 | video_files_train = [os.path.join(config.ST_MAPS_PATH, video_path.split('/')[-1]) for video_path in 112 | video_files_train["video"].values] 113 | 114 | # video_files_test = [os.path.join(config.ST_MAPS_PATH, video_path) for video_path in 115 | # video_files_test["video"].values] 116 | # video_files_train = [os.path.join(config.ST_MAPS_PATH, video_path) for video_path in 117 | # video_files_train["video"].values] 118 | 119 | # video_files_train = video_files_train[:32] 120 | # video_files_test = video_files_test[:32] 121 | 122 | # print(f"Reading Current File: {video_files_train[0]}") 123 | 124 | # -------------------------------------- 125 | # Build Dataloaders 126 | # -------------------------------------- 127 | 128 | train_set = DataLoaderRhythmNet(st_maps_path=video_files_train, target_signal_path=config.TARGET_SIGNAL_DIR) 129 | 130 | train_loader = torch.utils.data.DataLoader( 131 | dataset=train_set, 132 | batch_size=config.BATCH_SIZE, 133 | num_workers=config.NUM_WORKERS, 134 | shuffle=False, 135 | collate_fn=collate_fn 136 | ) 137 | print('\nTrain DataLoader constructed successfully!') 138 | 139 | # Code to use multiple GPUs (if available) 140 | if torch.cuda.device_count() > 1: 141 | print("Let's use", torch.cuda.device_count(), "GPUs!") 142 | model = torch.nn.DataParallel(model) 143 | 144 | # -------------------------------------- 145 | # Load checkpointed model (if present) 146 | # -------------------------------------- 147 | if config.DEVICE == "cpu": 148 | load_on_cpu = True 149 | else: 150 | load_on_cpu = False 151 | model, optimizer, checkpointed_loss, checkpoint_flag = load_model_if_checkpointed(model, optimizer, checkpoint_path, load_on_cpu=load_on_cpu) 152 | if checkpoint_flag: 153 | print(f"Checkpoint Found! Loading from checkpoint :: LOSS={checkpointed_loss}") 154 | else: 155 | print("Checkpoint Not Found! Training from beginning") 156 | 157 | # ----------------------------- 158 | # Start training 159 | # ----------------------------- 160 | 161 | train_loss_per_epoch = [] 162 | for epoch in range(config.EPOCHS): 163 | # short-circuit for evaluation 164 | if k == 1: 165 | break 166 | target_hr_list, predicted_hr_list, train_loss = engine_vipl.train_fn(model, train_loader, optimizer, loss_fn) 167 | 168 | # Save model with final train loss (script to save the best weights?) 169 | if checkpointed_loss != 0.0: 170 | if train_loss < checkpointed_loss: 171 | save_model_checkpoint(model, optimizer, train_loss, checkpoint_path) 172 | checkpointed_loss = train_loss 173 | else: 174 | pass 175 | else: 176 | if len(train_loss_per_epoch) > 0: 177 | if train_loss < min(train_loss_per_epoch): 178 | save_model_checkpoint(model, optimizer, train_loss, checkpoint_path) 179 | else: 180 | save_model_checkpoint(model, optimizer, train_loss, checkpoint_path) 181 | 182 | metrics = compute_criteria(target_hr_list, predicted_hr_list) 183 | 184 | for metric in metrics.keys(): 185 | writer.add_scalar(f"Train/{metric}", metrics[metric], epoch) 186 | 187 | print(f"\nFinished [Epoch: {epoch + 1}/{config.EPOCHS}]", 188 | "\nTraining Loss: {:.3f} |".format(train_loss), 189 | "HR_MAE : {:.3f} |".format(metrics["MAE"]), 190 | "HR_RMSE : {:.3f} |".format(metrics["RMSE"]),) 191 | # "Pearsonr : {:.3f} |".format(metrics["Pearson"]), ) 192 | 193 | train_loss_per_epoch.append(train_loss) 194 | writer.add_scalar("Loss/train", train_loss, epoch+1) 195 | 196 | # Plots on tensorboard 197 | ba_plot_image = create_plot_for_tensorboard('bland_altman', target_hr_list, predicted_hr_list) 198 | gtvsest_plot_image = create_plot_for_tensorboard('gt_vs_est', target_hr_list, predicted_hr_list) 199 | writer.add_image('BA_plot', ba_plot_image, epoch) 200 | writer.add_image('gtvsest_plot', gtvsest_plot_image, epoch) 201 | 202 | mean_loss = np.mean(train_loss_per_epoch) 203 | # Save the mean_loss value for each video instance to the writer 204 | print(f"Avg Training Loss: {np.mean(mean_loss)} for {config.EPOCHS} epochs") 205 | writer.flush() 206 | 207 | # -------------------------------------- 208 | # Load checkpointed model (if present) 209 | # -------------------------------------- 210 | if config.DEVICE == "cpu": 211 | load_on_cpu = True 212 | else: 213 | load_on_cpu = False 214 | model, optimizer, checkpointed_loss, checkpoint_flag = load_model_if_checkpointed(model, optimizer, 215 | checkpoint_path, 216 | load_on_cpu=load_on_cpu) 217 | if checkpoint_flag: 218 | print(f"Checkpoint Found! Loading from checkpoint :: LOSS={checkpointed_loss}") 219 | else: 220 | print("Checkpoint Not Found! Training from beginning") 221 | 222 | # ----------------------------- 223 | # Start Validation 224 | # ----------------------------- 225 | test_set = DataLoaderRhythmNet(st_maps_path=video_files_test, target_signal_path=config.TARGET_SIGNAL_DIR) 226 | test_loader = torch.utils.data.DataLoader( 227 | dataset=test_set, 228 | batch_size=config.BATCH_SIZE, 229 | num_workers=config.NUM_WORKERS, 230 | shuffle=False, 231 | collate_fn=collate_fn 232 | ) 233 | print('\nEvaluation DataLoader constructed successfully!') 234 | 235 | print(f"Finished Training, Validating {len(video_files_test)} video files for {config.EPOCHS_TEST} Epochs") 236 | 237 | eval_loss_per_epoch = [] 238 | for epoch in range(config.EPOCHS_TEST): 239 | # validation 240 | target_hr_list, predicted_hr_list, test_loss = engine_vipl.eval_fn(model, test_loader, loss_fn) 241 | 242 | # truth_hr_list.append(target) 243 | # estimated_hr_list.append(predicted) 244 | metrics = compute_criteria(target_hr_list, predicted_hr_list) 245 | for metric in metrics.keys(): 246 | writer.add_scalar(f"Test/{metric}", metrics[metric], epoch) 247 | 248 | print(f"\nFinished Test [Epoch: {epoch + 1}/{config.EPOCHS_TEST}]", 249 | "\nTest Loss: {:.3f} |".format(test_loss), 250 | "HR_MAE : {:.3f} |".format(metrics["MAE"]), 251 | "HR_RMSE : {:.3f} |".format(metrics["RMSE"]),) 252 | 253 | writer.add_scalar("Loss/test", test_loss, epoch) 254 | 255 | # Plots on tensorboard 256 | ba_plot_image = create_plot_for_tensorboard('bland_altman', target_hr_list, predicted_hr_list) 257 | gtvsest_plot_image = create_plot_for_tensorboard('gt_vs_est', target_hr_list, predicted_hr_list) 258 | writer.add_image('BA_plot', ba_plot_image, epoch) 259 | writer.add_image('gtvsest_plot', gtvsest_plot_image, epoch) 260 | 261 | 262 | # print(f"Avg Validation Loss: {mean_test_loss} for {config.EPOCHS_TEST} epochs") 263 | writer.flush() 264 | # plot_train_test_curves(train_loss_data, test_loss_data, plot_path=config.PLOT_PATH, fold_tag=k) 265 | # Plots on the local storage. 266 | gt_vs_est(target_hr_list, predicted_hr_list, plot_path=config.PLOT_PATH) 267 | bland_altman_plot(target_hr_list, predicted_hr_list, plot_path=config.PLOT_PATH) 268 | writer.close() 269 | print("done") 270 | 271 | 272 | if __name__ == '__main__': 273 | run_training() 274 | -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnweshCR7/RhythmNet/aa4b336a249af64d0c5e7f516863d6f1f6c285c1/src/models/__init__.py -------------------------------------------------------------------------------- /src/models/lenet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class LeNet(nn.Module): 6 | def __init__(self): 7 | super(LeNet, self).__init__() 8 | self.conv1 = nn.Conv2d(1, 20, 5, 1) 9 | self.conv2 = nn.Conv2d(20, 50, 5, 1) 10 | self.fc1 = nn.Linear(4 * 4 * 50, 500) 11 | self.fc2 = nn.Linear(500, 10) 12 | 13 | def forward(self, x): 14 | x = F.relu(self.conv1(x)) 15 | x = F.max_pool2d(x, 2, 2) 16 | x = F.relu(self.conv2(x)) 17 | x = F.max_pool2d(x, 2, 2) 18 | x = x.view(-1, 4 * 4 * 50) 19 | x = F.relu(self.fc1(x)) 20 | x = self.fc2(x) 21 | return x 22 | 23 | def name(self): 24 | return "LeNet" -------------------------------------------------------------------------------- /src/models/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch 4 | import torch.utils.model_zoo as model_zoo 5 | 6 | 7 | def conv3x3(in_planes, out_planes, stride=1): 8 | "3x3 convolution with padding" 9 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 10 | padding=1, bias=False) 11 | 12 | 13 | class BasicBlock(nn.Module): 14 | expansion = 1 15 | 16 | def __init__(self, inplanes, planes, stride=1, downsample=None): 17 | super(BasicBlock, self).__init__() 18 | self.conv1 = conv3x3(inplanes, planes, stride) 19 | self.bn1 = nn.BatchNorm2d(planes) 20 | self.relu = nn.ReLU(inplace=True) 21 | self.conv2 = conv3x3(planes, planes) 22 | self.bn2 = nn.BatchNorm2d(planes) 23 | self.downsample = downsample 24 | self.stride = stride 25 | 26 | def forward(self, x): 27 | residual = x 28 | 29 | out = self.conv1(x) 30 | out = self.bn1(out) 31 | out = self.relu(out) 32 | 33 | out = self.conv2(out) 34 | out = self.bn2(out) 35 | 36 | if self.downsample is not None: 37 | residual = self.downsample(x) 38 | 39 | out += residual 40 | out = self.relu(out) 41 | 42 | return out 43 | 44 | 45 | class Bottleneck(nn.Module): 46 | expansion = 4 47 | 48 | def __init__(self, inplanes, planes, stride=1, downsample=None): 49 | super(Bottleneck, self).__init__() 50 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 51 | self.bn1 = nn.BatchNorm2d(planes) 52 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 53 | padding=1, bias=False) 54 | self.bn2 = nn.BatchNorm2d(planes) 55 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 56 | self.bn3 = nn.BatchNorm2d(planes * 4) 57 | self.relu = nn.ReLU(inplace=True) 58 | self.downsample = downsample 59 | self.stride = stride 60 | 61 | def forward(self, x): 62 | residual = x 63 | 64 | out = self.conv1(x) 65 | out = self.bn1(out) 66 | out = self.relu(out) 67 | 68 | out = self.conv2(out) 69 | out = self.bn2(out) 70 | out = self.relu(out) 71 | 72 | out = self.conv3(out) 73 | out = self.bn3(out) 74 | 75 | if self.downsample is not None: 76 | residual = self.downsample(x) 77 | 78 | out += residual 79 | out = self.relu(out) 80 | 81 | return out 82 | 83 | 84 | class ResNet(nn.Module): 85 | 86 | def __init__(self, block, layers, num_classes=1000): 87 | self.inplanes = 64 88 | super(ResNet, self).__init__() 89 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 90 | bias=False) 91 | self.bn1 = nn.BatchNorm2d(64) 92 | self.relu = nn.ReLU(inplace=True) 93 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 94 | self.layer1 = self._make_layer(block, 64, layers[0]) 95 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 96 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 97 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 98 | # Average pooling of 10x1 (which is the image dim after the last layer) 99 | self.avgpool = nn.AvgPool2d((10, 1)) 100 | # self.fc = nn.Linear(512 * block.expansion, num_classes) 101 | # This seems forced atm 102 | # 512*batch_size 103 | self.fc = nn.Linear(512, num_classes) 104 | # self.softmax = nn.LogSoftmax(dim=1) 105 | 106 | for m in self.modules(): 107 | if isinstance(m, nn.Conv2d): 108 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 109 | m.weight.data.normal_(0, math.sqrt(2. / n)) 110 | elif isinstance(m, nn.BatchNorm2d): 111 | m.weight.data.fill_(1) 112 | m.bias.data.zero_() 113 | 114 | def _make_layer(self, block, planes, blocks, stride=1): 115 | downsample = None 116 | if stride != 1 or self.inplanes != planes * block.expansion: 117 | downsample = nn.Sequential( 118 | nn.Conv2d(self.inplanes, planes * block.expansion, 119 | kernel_size=1, stride=stride, bias=False), 120 | nn.BatchNorm2d(planes * block.expansion), 121 | ) 122 | 123 | layers = [] 124 | layers.append(block(self.inplanes, planes, stride, downsample)) 125 | self.inplanes = planes * block.expansion 126 | for i in range(1, blocks): 127 | layers.append(block(self.inplanes, planes)) 128 | 129 | return nn.Sequential(*layers) 130 | 131 | def forward(self, x): 132 | x = self.conv1(x) 133 | x = self.bn1(x) 134 | x = self.relu(x) 135 | x = self.maxpool(x) 136 | 137 | x = self.layer1(x) 138 | x = self.layer2(x) 139 | x = self.layer3(x) 140 | x = self.layer4(x) 141 | 142 | x = self.avgpool(x) 143 | x = x.view(x.size(0), -1) 144 | # x = x.flatten() 145 | x = self.fc(x) 146 | # x = self.softmax(x) 147 | 148 | return x 149 | 150 | 151 | def resnet18(pretrained=False, **kwargs): 152 | """Constructs a ResNet-18 model. 153 | 154 | Args: 155 | pretrained (bool): If True, returns a model pre-trained on ImageNet 156 | """ 157 | model = ResNet(BasicBlock, [1, 1, 1, 1], **kwargs) 158 | # if pretrained: 159 | # model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 160 | return model 161 | 162 | 163 | if __name__ == '__main__': 164 | 165 | model = resnet18(pretrained=False) 166 | img = torch.rand(10, 3, 300, 25)*255 167 | # target = torch.randint(1, 20, (5, 5)) 168 | print(model) 169 | x = model(img) 170 | rnn = nn.GRU(input_size=x.shape[1], hidden_size=1) 171 | output, h_n = rnn(x.unsqueeze(1)) 172 | print(resnet18) -------------------------------------------------------------------------------- /src/models/rhythmNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | import torchvision.models as models 5 | import ssl 6 | import config 7 | 8 | ssl._create_default_https_context = ssl._create_stdlib_context 9 | 10 | ''' 11 | Backbone CNN for RhythmNet model is a RestNet-18 12 | ''' 13 | 14 | 15 | class RhythmNet(nn.Module): 16 | def __init__(self): 17 | super(RhythmNet, self).__init__() 18 | 19 | # resnet o/p -> bs x 1000 20 | # self.resnet18 = resnet18(pretrained=False) 21 | resnet = models.resnet18(pretrained=False) 22 | modules = list(resnet.children())[:-1] 23 | 24 | self.resnet18 = nn.Sequential(*modules) 25 | # The resnet average pool layer before fc 26 | # self.avgpool = nn.AvgPool2d((10, 1)) 27 | self.resnet_linear = nn.Linear(512, 1000) 28 | self.fc_regression = nn.Linear(1000, 1) 29 | self.gru_fc_out = nn.Linear(1000, 1) 30 | self.rnn = nn.GRU(input_size=1000, hidden_size=1000, num_layers=1) 31 | # self.fc = nn.Linear(config.GRU_TEMPORAL_WINDOW, config.GRU_TEMPORAL_WINDOW) 32 | 33 | def forward(self, st_maps, target): 34 | batched_output_per_clip = [] 35 | gru_input_per_clip = [] 36 | hr_per_clip = [] 37 | 38 | # Need to have so as to reflect a batch_size = 1 // if batched then comment out 39 | st_maps = st_maps.unsqueeze(0) 40 | for t in range(st_maps.size(1)): 41 | # with torch.no_grad(): 42 | x = self.resnet18(st_maps[:, t, :, :, :]) 43 | # collapse dimensions to BSx512 (resnet o/p) 44 | x = x.view(x.size(0), -1) 45 | # output dim: BSx1 and Squeeze sequence length after completing GRU step 46 | x = self.resnet_linear(x) 47 | # Save CNN features per clip for the GRU 48 | gru_input_per_clip.append(x.squeeze(0)) 49 | 50 | # Final regression layer for CNN features -> HR (per clip) 51 | x = self.fc_regression(x) 52 | # normalize HR by frame-rate: 25.0 for VIPL 53 | x = x * 25.0 54 | batched_output_per_clip.append(x.squeeze(0)) 55 | # input should be (seq_len, batch, input_size) 56 | 57 | # the features extracted from the backbone CNN are fed to a one-layer GRU structure. 58 | regression_output = torch.stack(batched_output_per_clip, dim=0).permute(1, 0) 59 | 60 | # Trying out GRU in addition to the regression now. 61 | gru_input = torch.stack(gru_input_per_clip, dim=0) 62 | gru_output, h_n = self.rnn(gru_input.unsqueeze(1)) 63 | # gru_output = gru_output.squeeze(1) 64 | for i in range(gru_output.size(0)): 65 | hr = self.gru_fc_out(gru_output[i, :, :]) 66 | hr_per_clip.append(hr.flatten()) 67 | 68 | gru_output_seq = torch.stack(hr_per_clip, dim=0).permute(1, 0) 69 | # return output_seq, gru_output.squeeze(0), fc_out 70 | return regression_output, gru_output_seq.squeeze(0)[:6] 71 | 72 | def name(self): 73 | return "RhythmNet" 74 | 75 | 76 | if __name__ == '__main__': 77 | # cm = RhythmNet() 78 | # img = torch.rand(3, 28, 28) 79 | # target = torch.randint(1, 20, (5, 5)) 80 | # x = cm(img) 81 | # print(x) 82 | resnet18 = models.resnet18(pretrained=False) 83 | print(resnet18) 84 | -------------------------------------------------------------------------------- /src/models/simpleCNN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | 6 | class SimpleCNN(nn.Module): 7 | def __init__(self): 8 | super(SimpleCNN, self).__init__() 9 | 10 | self.fc1 = nn.Linear(28 * 28, 500) 11 | self.fc2 = nn.Linear(500, 256) 12 | self.fc3 = nn.Linear(256, 10) 13 | 14 | def forward(self, x): 15 | x = x.view(-1, 28 * 28) 16 | x = F.relu(self.fc1(x)) 17 | x = F.relu(self.fc2(x)) 18 | x = self.fc3(x) 19 | 20 | return x 21 | 22 | def name(self): 23 | return "SimpleCNN" 24 | 25 | 26 | if __name__ == '__main__': 27 | cm = SimpleCNN() 28 | img = torch.rand(3, 28, 28) 29 | target = torch.randint(1, 20, (5, 5)) 30 | x = cm(img) 31 | print(x) 32 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnweshCR7/RhythmNet/aa4b336a249af64d0c5e7f516863d6f1f6c285c1/src/utils/__init__.py -------------------------------------------------------------------------------- /src/utils/dataset.py: -------------------------------------------------------------------------------- 1 | # import albumentations 2 | import torch 3 | import numpy as np 4 | from PIL import Image 5 | from PIL import ImageFile 6 | from torch.utils.data import Dataset 7 | from utils.signal_utils import read_target_data, calculate_hr, get_hr_data 8 | 9 | ImageFile.LOAD_TRUNCATED_IMAGES = True 10 | 11 | 12 | class DataLoaderRhythmNet(Dataset): 13 | """ 14 | Dataset class for RhythmNet 15 | """ 16 | # The data is now the SpatioTemporal Maps instead of videos 17 | 18 | def __init__(self, st_maps_path, target_signal_path): 19 | self.H = 180 20 | self.W = 180 21 | self.C = 3 22 | # self.video_path = data_path 23 | self.st_maps_path = st_maps_path 24 | # self.resize = resize 25 | self.target_path = target_signal_path 26 | self.maps = None 27 | 28 | mean = (0.485, 0.456, 0.406) 29 | std = (0.229, 0.224, 0.225) 30 | # Maybe add more augmentations 31 | # self.augmentation_pipeline = albumentations.Compose( 32 | # [ 33 | # albumentations.Normalize( 34 | # mean, std, max_pixel_value=255.0, always_apply=True 35 | # ) 36 | # ] 37 | # ) 38 | 39 | def __len__(self): 40 | return len(self.st_maps_path) 41 | 42 | def __getitem__(self, index): 43 | # identify the name of the video file so as to get the ground truth signal 44 | self.video_file_name = self.st_maps_path[index].split('/')[-1].split('.')[0] 45 | # targets, timestamps = read_target_data(self.target_path, self.video_file_name) 46 | # sampling rate is video fps (check) 47 | 48 | # Load the maps for video at 'index' 49 | self.maps = np.load(self.st_maps_path[index]) 50 | map_shape = self.maps.shape 51 | self.maps = self.maps.reshape((-1, map_shape[3], map_shape[1], map_shape[2])) 52 | 53 | # target_hr = calculate_hr(targets, timestamps=timestamps) 54 | # target_hr = calculate_hr_clip_wise(map_shape[0], targets, timestamps=timestamps) 55 | target_hr = get_hr_data(self.video_file_name) 56 | # To check the fact that we dont have number of targets greater than the number of maps 57 | # target_hr = target_hr[:map_shape[0]] 58 | self.maps = self.maps[:target_hr.shape[0], :, :, :] 59 | return { 60 | "st_maps": torch.tensor(self.maps, dtype=torch.float), 61 | "target": torch.tensor(target_hr, dtype=torch.float) 62 | } 63 | -------------------------------------------------------------------------------- /src/utils/generate_fold_csv.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import pandas as pd 3 | import os 4 | import cv2 5 | from sklearn import model_selection 6 | import scipy.io 7 | 8 | 9 | def preprocess_file_name(file_path): 10 | split_by_path = file_path.split('/') 11 | preprocessed_file_name = "_".join(split_by_path[-4:-1]) 12 | return os.path.join("vipl_npy", f"{preprocessed_file_name}.npy") 13 | 14 | 15 | def make_csv(fold_data_dict): 16 | # video_file_paths = glob.glob(config.ST_MAPS_PATH + "/**/*.npy") 17 | 18 | 19 | # video_file_paths = glob.glob("/Users/anweshcr7/thesis/src/data/vipl_npy/*.npy") 20 | # video_files = [] 21 | # 22 | # for path in video_file_paths: 23 | # split_by_path = path.split('/') 24 | # video_file = os.path.join(split_by_path[-2], split_by_path[-1]) 25 | # video_files.append(video_file) 26 | # 27 | # video_files = [x for x in video_files if "source4" not in x] 28 | # num_folds = 5 29 | # kf = model_selection.KFold(n_splits=num_folds) 30 | 31 | col_names = ['video', 'fold'] 32 | df = pd.DataFrame(columns=col_names) 33 | 34 | fold = 1 35 | 36 | for idx, fold in enumerate(fold_data_dict.keys()): 37 | video_files_fold = [] 38 | fold_subjects = [str(x) for x in fold_data_dict[fold].squeeze(0)] 39 | for subject in fold_subjects: 40 | video_files_fold.extend(glob.glob(f"/Volumes/Backup Plus/vision/VIPL-HR/data/*/p{subject}/*/*/*.avi")) 41 | 42 | # Don't consider NIR videos 43 | video_files_fold = [file_path for file_path in video_files_fold if "source4" not in file_path] 44 | video_files_fold = [preprocess_file_name(file_path) for file_path in video_files_fold] 45 | 46 | 47 | trainDF = pd.DataFrame(video_files_fold, columns=['video']) 48 | trainDF['fold'] = idx + 1 49 | 50 | df = pd.concat([df, trainDF]) 51 | df.to_csv("VIPL_folds_final.csv", index=False) 52 | 53 | print("done") 54 | 55 | 56 | # for train_idx, validation_idx in kf.split(video_files): 57 | # trainDF = pd.DataFrame([video_files[idx] for idx in train_idx], columns=['video']) 58 | # validateDF = pd.DataFrame([video_files[idx] for idx in validation_idx], columns=['video']) 59 | # trainDF[['set', 'iteration']] = 'T', fold 60 | # validateDF[['set', 'iteration']] = 'V', fold 61 | # fold += 1 62 | # 63 | # df = pd.concat([df, trainDF, validateDF]) 64 | # df.to_csv("VIPL_npy.csv", index=False) 65 | 66 | return 67 | 68 | 69 | def make_csv_with_frame_rate(): 70 | # video_file_paths = glob.glob(config.ST_MAPS_PATH + "/**/*.npy") 71 | video_file_paths = glob.glob("/Users/anweshcr7/thesis/src/data/vipl_npy/*.npy") 72 | video_source = "/Volumes/Backup Plus/vision/vipl_videos" 73 | video_files = [] 74 | fr_dict = {} 75 | 76 | for path in video_file_paths: 77 | split_by_path = path.split('/') 78 | video_file = os.path.join(split_by_path[-2], split_by_path[-1]) 79 | video_files.append(video_file) 80 | video_name = split_by_path[-1].split('.')[0] + ".avi" 81 | cap = cv2.VideoCapture(os.path.join(video_source, video_name)) 82 | frameRate = cap.get(5) 83 | fr_dict[video_file] = frameRate 84 | cap.release() 85 | 86 | 87 | video_files = [x for x in video_files if "source4" not in x] 88 | num_folds = 5 89 | kf = model_selection.KFold(n_splits=num_folds) 90 | 91 | col_names = ['video', 'set', 'iteration', 'fps'] 92 | df = pd.DataFrame(columns=col_names) 93 | 94 | fold = 1 95 | for train_idx, validation_idx in kf.split(video_files): 96 | trainDF = pd.DataFrame([video_files[idx] for idx in train_idx], columns=['video']) 97 | validateDF = pd.DataFrame([video_files[idx] for idx in validation_idx], columns=['video']) 98 | trainDF[['set', 'iteration']] = 'T', fold 99 | validateDF[['set', 'iteration']] = 'V', fold 100 | trainDF[['fps']] = [fr_dict[video_files[idx]] for idx in train_idx] 101 | validateDF[['fps']] = [fr_dict[video_files[idx]] for idx in validation_idx] 102 | fold += 1 103 | 104 | df = pd.concat([df, trainDF, validateDF]) 105 | df.to_csv("VIPL_npy_with_fps.csv", index=False) 106 | 107 | return 108 | 109 | 110 | if __name__ == '__main__': 111 | fold_data_dict = {} 112 | fold_files = glob.glob("/Volumes/Backup Plus/vision/VIPL-HR/fold/*.mat") 113 | for fold in fold_files: 114 | name = fold.split('/')[-1].split('.')[0] 115 | fold_data = scipy.io.loadmat(fold) 116 | fold_data_dict[name] = fold_data[name] 117 | make_csv(fold_data_dict) 118 | print("done") -------------------------------------------------------------------------------- /src/utils/model_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | def plot_loss(train_loss_data, test_loss_data, plot_path): 8 | if not os.path.exists(plot_path): 9 | os.makedirs(plot_path) 10 | 11 | x_ax = np.arange(1, len(train_loss_data)+1) 12 | fig = plt.figure() 13 | plt.plot(x_ax, train_loss_data, label="train_loss") 14 | plt.plot(x_ax, test_loss_data, label="test_loss") 15 | plt.title('Train-Test Loss') 16 | plt.ylabel('Loss') 17 | plt.xlabel('Num Epoch') 18 | plt.legend(loc='best') 19 | plt.show() 20 | fig.savefig(plot_path+'/train-test_loss.png', dpi=fig.dpi) 21 | 22 | 23 | def save_model_checkpoint(model, optimizer, loss, checkpoint_path): 24 | save_filename = "running_model.pt" 25 | # checkpoint_path = os.path.join(checkpoint_path, save_filename) 26 | if not os.path.exists(checkpoint_path): 27 | os.makedirs(checkpoint_path) 28 | 29 | torch.save({ 30 | # 'epoch': epoch, 31 | 'model_state_dict': model.state_dict(), 32 | 'optimizer_state_dict': optimizer.state_dict(), 33 | 'loss': loss, 34 | }, os.path.join(checkpoint_path, save_filename)) 35 | print('Saved!') 36 | 37 | 38 | def load_model_if_checkpointed(model, optimizer, checkpoint_path, load_on_cpu=False): 39 | loss = 0.0 40 | checkpoint_flag = False 41 | 42 | # check if checkpoint exists 43 | if os.path.exists(os.path.join(checkpoint_path, "running_model.pt")): 44 | checkpoint_flag = True 45 | if load_on_cpu: 46 | checkpoint = torch.load(os.path.join(checkpoint_path, "running_model.pt"), map_location=torch.device('cpu')) 47 | else: 48 | checkpoint = torch.load(os.path.join(checkpoint_path, "running_model.pt")) 49 | 50 | model.load_state_dict(checkpoint['model_state_dict']) 51 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 52 | # epoch = checkpoint['epoch'] 53 | loss = checkpoint['loss'] 54 | 55 | return model, optimizer, loss, checkpoint_flag 56 | -------------------------------------------------------------------------------- /src/utils/plot_scripts.py: -------------------------------------------------------------------------------- 1 | import os 2 | import io 3 | import numpy as np 4 | import PIL.Image 5 | from torchvision.transforms import ToTensor 6 | import config as config 7 | # from utils.read_data import plot_signal 8 | import matplotlib.pyplot as plt 9 | 10 | 11 | def plot_train_test_curves(train_loss_data, test_loss_data, plot_path, fold_tag=1): 12 | if not os.path.exists(plot_path): 13 | os.makedirs(plot_path) 14 | 15 | clip = min(len(train_loss_data), len(test_loss_data)) 16 | x_ax = np.arange(1, clip + 1) 17 | fig = plt.figure() 18 | plt.plot(x_ax, train_loss_data[:clip], label="train_loss") 19 | plt.plot(x_ax, test_loss_data[:clip], label="test_loss") 20 | plt.title('Train-Test Loss') 21 | plt.ylabel('Loss') 22 | plt.xlabel('Num Epoch') 23 | plt.legend(loc='best') 24 | plt.show() 25 | fig.savefig(plot_path + f'/loss_fold_{fold_tag}.png', dpi=fig.dpi) 26 | 27 | 28 | def gt_vs_est(data1, data2, plot_path=None, tb=False): 29 | data1 = np.asarray(data1) 30 | data2 = np.asarray(data2) 31 | # mean = np.mean([data1, data2], axis=0) 32 | # diff = data1 - data2 # Difference between data1 and data2 33 | # md = np.mean(diff) # Mean of the difference 34 | # sd = np.std(diff, axis=0) # Standard deviation of the difference 35 | 36 | fig = plt.figure() 37 | plt.scatter(data1, data2) 38 | plt.title('true labels vs estimated') 39 | plt.ylabel('estimated HR') 40 | plt.xlabel('true HR') 41 | # plt.axhline(md, color='gray', linestyle='--') 42 | # plt.axhline(md + 1.96*sd, color='gray', linestyle='--') 43 | # plt.axhline(md - 1.96*sd, color='gray', linestyle='--') 44 | 45 | if tb: 46 | buf = io.BytesIO() 47 | plt.savefig(buf, format='png') 48 | buf.seek(0) 49 | return buf 50 | 51 | else: 52 | # plt.show() 53 | fig.savefig(plot_path + f'/true_vs_est.png', dpi=fig.dpi) 54 | 55 | 56 | def bland_altman_plot(data1, data2, plot_path=None, tb=False): 57 | data1 = np.asarray(data1) 58 | data2 = np.asarray(data2) 59 | mean = np.mean([data1, data2], axis=0) 60 | diff = data1 - data2 # Difference between data1 and data2 61 | md = np.mean(diff) # Mean of the difference 62 | sd = np.std(diff, axis=0) # Standard deviation of the difference 63 | 64 | fig = plt.figure() 65 | plt.scatter(mean, diff) 66 | plt.axhline(md, color='gray', linestyle='--') 67 | plt.axhline(md + 1.96 * sd, color='gray', linestyle='--') 68 | plt.axhline(md - 1.96 * sd, color='gray', linestyle='--') 69 | 70 | if tb: 71 | buf = io.BytesIO() 72 | plt.savefig(buf, format='png') 73 | buf.seek(0) 74 | return buf 75 | 76 | else: 77 | # plt.show() 78 | fig.savefig(plot_path + f'/bland-altman_new.png', dpi=fig.dpi) 79 | 80 | 81 | def create_plot_for_tensorboard(plot_name, data1, data2): 82 | if plot_name == "bland_altman": 83 | fig_buf = bland_altman_plot(data1, data2, tb=True) 84 | 85 | if plot_name == "gt_vs_est": 86 | fig_buf = gt_vs_est(data1, data2, tb=True) 87 | 88 | image = PIL.Image.open(fig_buf) 89 | image = ToTensor()(image) 90 | 91 | return image 92 | 93 | # 94 | # def plot_rmse(data, plot_path, fold=0): 95 | # if not os.path.exists(plot_path): 96 | # os.makedirs(plot_path) 97 | # 98 | # x_ax = np.arange(1, len(data)+1) 99 | # fig = plt.figure() 100 | # plt.plot(x_ax, data, label="predicted_HR_RMSE") 101 | # plt.ylabel('RMSE_HR') 102 | # plt.xlabel('Time') 103 | # plt.show() 104 | # fig.savefig(plot_path + f'/RMSE_fold{fold}.png', dpi=fig.dpi) 105 | 106 | 107 | if __name__ == '__main__': 108 | # plot_signal('data/data_preprocessed', 's22_trial05') 109 | gt_vs_est(np.random.random(100), np.random.random(100), plot_path=config.PLOT_PATH) 110 | -------------------------------------------------------------------------------- /src/utils/signal_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import numpy as np 4 | import pandas as pd 5 | import torch 6 | from scipy import signal 7 | import heartpy as hp 8 | from tqdm import tqdm 9 | import config as config 10 | import utils.video2st_maps as video2st_maps 11 | import matplotlib.pyplot as plt 12 | 13 | 14 | # To be used for DEAP dataset where the PPG signal is data[38] 15 | def get_ppg_channel(x): 16 | # i think PPG channel is at 38 17 | return x[38] 18 | 19 | 20 | # Reads the clip-wise HR data that was computed and stored in the csv files (per video) 21 | def get_hr_data(file_name): 22 | hr_df = pd.read_csv(config.HR_DATA_PATH + f"{file_name}.csv") 23 | 24 | return hr_df["hr_bpm"].values 25 | 26 | 27 | # Read the raw signal from the ground truth csv and resample. 28 | # Not be needed during the model as we will compute the HRs first-hand and use them directly instead of raw signals 29 | def read_target_data(target_data_path, video_file_name): 30 | signal_data_file_path = os.path.join(target_data_path, f"{video_file_name} PPG.csv") 31 | signal_df = pd.read_csv(signal_data_file_path) 32 | 33 | return signal_df["Signal"].values, signal_df["Time"].values 34 | # In RhythmNet maybe we don't need to resample. CHECK 35 | return filter_and_resample_truth_signal(signal_df, resampling_size=3000) 36 | 37 | 38 | # Function allows filtering and resampling of signals. Not being used for VIPL-HR 39 | def filter_and_resample_truth_signal(signal_df, resampling_size): 40 | # Signal should be bandpass filtered to remove noise outside of expected HR frequency range. 41 | # But we are using CLEANER_PPG signals which are considered filtered. 42 | orignal_sample_rate = hp.get_samplerate_mstimer(signal_df["Time"].values) 43 | 44 | # filtered = hp.filter_signal(signal_df["Signal"].values, [0.7, 2.5], sample_rate=sample_rate, 45 | # order=3, filtertype='bandpass') 46 | resampled_signal = signal.resample(signal_df["Signal"].values, resampling_size, t=signal_df["Time"].values) 47 | 48 | # we'll need to add resampled[1] 49 | return resampled_signal[0], resampled_signal[1] 50 | 51 | 52 | # Returns index of value that is nearest to the arg:value in the arg:array 53 | def find_nearest(array, value): 54 | array = np.asarray(array) 55 | idx = (np.abs(array - value)).argmin() 56 | return idx 57 | 58 | 59 | # Controller Function to compute and store the HR values as csv (HR values measured clip-wise i.e. per st_map per video) 60 | def compute_hr_for_rhythmnet(): 61 | data_files = glob.glob(config.TARGET_SIGNAL_DIR + "*.csv") 62 | # for file in tqdm(data_files): 63 | for file in tqdm(data_files[:1]): 64 | file = '/Users/anweshcr7/Downloads/CleanerPPG/VIPL-HR/Cleaned/p41_v7_source2.csv' 65 | signal_df = pd.read_csv(file) 66 | signal_data, timestamps, peak_data = signal_df["Signal"].values, signal_df["Time"].values, signal_df["Peaks"].values 67 | video_path = config.FACE_DATA_DIR + f"{file.split('/')[-1].split('.')[0]}.avi" 68 | video_meta_data = video2st_maps.get_frames_and_video_meta_data(video_path, meta_data_only=True) 69 | # hr_segmentwise = hp.process_segmentwise(signal_df["Signal"].values, sample_rate=128, segment_width=10, segment_overlap=0.951) 70 | # hr_segmentwise = hr_segmentwise[1]["bpm"] 71 | # plt.plot(np.arange(len(hr_segmentwise)), hr_segmentwise) 72 | # plt.show() 73 | npy_path = f"{config.ST_MAPS_PATH}{file.split('/')[-1].split('.')[0]}.npy" 74 | if os.path.exists(npy_path): 75 | video_meta_data["num_maps"] = np.load(f"{config.ST_MAPS_PATH}{file.split('/')[-1].split('.')[0]}.npy").shape[0] 76 | else: 77 | continue 78 | hr = np.asarray(calculate_hr_clip_wise(timestamps, signal_df, video_meta_data), dtype="float32") 79 | file_name = file.split("/")[-1].split(".")[0].split(" ")[0] 80 | hr_df = pd.DataFrame(hr, columns=["hr_bpm"]) 81 | hr_df.to_csv(f"../data/hr_csv/{file_name}.csv", index=False) 82 | # print("eheee") 83 | 84 | 85 | # Function to compute and store the HR values as csv (HR values measured clip-wise i.e. per st_map per video) 86 | def calculate_hr_clip_wise(timestamps=None, signal_df=None, video_meta_data=None): 87 | 88 | sliding_window_stride = int((video_meta_data["sliding_window_stride"]/video_meta_data["frame_rate"])*1000) 89 | sliding_window_size_frame = int((config.CLIP_SIZE/video_meta_data["frame_rate"])) 90 | # convert to milliseconds 91 | sliding_window_size = sliding_window_size_frame * 1000 92 | # num_maps = int((video_meta_data["num_frames"] - config.CLIP_SIZE)/sliding_window_size_frame) + 1 93 | num_maps = video_meta_data["num_maps"] 94 | # for i in range(len(timestamps)): 95 | # print(timestamps[i+1]-timestamps[i]) 96 | count = 0 97 | hr_list = [] 98 | for start_time in range(0, int(timestamps[-1]), sliding_window_stride): 99 | if count == num_maps: 100 | break 101 | # start_index = np.where(timestamps == start_time) 102 | end_time = start_time + sliding_window_size 103 | # end_index = np.where(timestamps == end_time) 104 | start_index = np.searchsorted(timestamps, start_time, side='left') 105 | end_index = np.searchsorted(timestamps, end_time, side='left') 106 | 107 | # start_index = start_index[0][0] 108 | if end_index == 0: 109 | end_index = len(timestamps) - 1 110 | # break 111 | 112 | curr_data = signal_df.iloc[start_index:end_index] 113 | time_intervals = curr_data[curr_data["Peaks"] == 1]["Time"].values 114 | ibi_array = [time_intervals[idx + 1] - time_intervals[idx] for idx, time_val in enumerate(time_intervals[:-1])] 115 | if len(ibi_array) == 0: 116 | hr_bpm = hr_list[-1] 117 | else: 118 | hr_bpm = 1000/np.mean(ibi_array)*60 119 | hr_list.append(hr_bpm) 120 | 121 | count += 1 122 | 123 | 124 | # plt.plot(np.arange(len(hr_list)), hr_list) 125 | # plt.show() 126 | return hr_list 127 | 128 | 129 | # Function to compute HR from raw signal. 130 | def calculate_hr(signal_data, timestamps=None): 131 | sampling_rate = 47.63 132 | if timestamps is not None: 133 | sampling_rate = hp.get_samplerate_mstimer(timestamps) 134 | try: 135 | wd, m = hp.process(signal_data, sample_rate=sampling_rate) 136 | hr_bpm = m["bpm"] 137 | except: 138 | hr_bpm = 75.0 139 | 140 | if np.isnan(hr_bpm): 141 | hr_bpm = 75.0 142 | return hr_bpm 143 | 144 | else: 145 | # We are working with predicted HR: 146 | # need to filter and do other stuff.. lets see 147 | signal_data = hp.filter_signal(signal_data, cutoff=[0.7, 2.5], sample_rate=sampling_rate, order=6, 148 | filtertype='bandpass') 149 | try: 150 | wd, m = hp.process(signal_data, sample_rate=sampling_rate, high_precision=True, clean_rr=True) 151 | hr_bpm = m["bpm"] 152 | except: 153 | print("BadSignal received (could not be filtered) using def HR value = 75bpm") 154 | hr_bpm = 75.0 155 | return hr_bpm 156 | 157 | 158 | if __name__ == '__main__': 159 | compute_hr_for_rhythmnet() 160 | 161 | files = glob.glob(config.HR_DATA_PATH+"/*.csv") 162 | for file in files: 163 | hr = get_hr_data(file.split('/')[-1].split('.')[0]) 164 | if type(hr) == np.object_: 165 | print(file) 166 | try: 167 | torch.tensor(hr, dtype=torch.float) 168 | except: 169 | print(file) -------------------------------------------------------------------------------- /src/utils/video2st_maps.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import glob 4 | import numpy as np 5 | import config as config 6 | from tqdm import tqdm 7 | import matplotlib.pyplot as plt 8 | # used for accessing url to download files 9 | import urllib.request as urlreq 10 | from sklearn import preprocessing 11 | from joblib import Parallel, delayed, parallel_backend 12 | import time 13 | 14 | # download requisite certificates 15 | import ssl; 16 | 17 | ssl._create_default_https_context = ssl._create_stdlib_context 18 | 19 | 20 | # Chunks the ROI into blocks of size 5x5 21 | def chunkify(img, block_width=5, block_height=5): 22 | shape = img.shape 23 | x_len = shape[0] // block_width 24 | y_len = shape[1] // block_height 25 | # print(x_len, y_len) 26 | 27 | chunks = [] 28 | x_indices = [i for i in range(0, shape[0] + 1, x_len)] 29 | y_indices = [i for i in range(0, shape[1] + 1, y_len)] 30 | 31 | shapes = list(zip(x_indices, y_indices)) 32 | 33 | # # for plotting purpose 34 | # implot = plt.imshow(img) 35 | # 36 | # end_x_list = [] 37 | # end_y_list = [] 38 | 39 | 40 | for i in range(len(x_indices) - 1): 41 | # try: 42 | start_x = x_indices[i] 43 | end_x = x_indices[i + 1] 44 | for j in range(len(y_indices) - 1): 45 | start_y = y_indices[j] 46 | end_y = y_indices[j+1] 47 | # end_x_list.append(end_x) 48 | # end_y_list.append(end_y) 49 | chunks.append(img[start_x:end_x, start_y:end_y]) 50 | # except IndexError: 51 | # print('End of Array') 52 | 53 | return chunks 54 | 55 | 56 | def plot_image(img): 57 | plt.axis("off") 58 | plt.imshow(img, origin='upper') 59 | plt.show() 60 | 61 | 62 | # Downloads xml file for face detection cascade 63 | def get_haarcascade(): 64 | haarcascade_url = config.haarcascade_url 65 | haarcascade_filename = haarcascade_url.split('/')[-1] 66 | # chech if file is in working directory 67 | if haarcascade_filename in os.listdir(os.curdir): 68 | # print("xml file already exists") 69 | pass 70 | else: 71 | # download file from url and save locally as haarcascade_frontalface_alt2.xml, < 1MB 72 | urlreq.urlretrieve(haarcascade_url, haarcascade_filename) 73 | print("xml file downloaded") 74 | 75 | return cv2.CascadeClassifier(haarcascade_filename) 76 | 77 | # Downloads xml file for eye detection cascade 78 | def get_eye_cascade(): 79 | eye_cascade_url = config.eye_cascade_url 80 | eye_cascade_filename = eye_cascade_url.split('/')[-1] 81 | # chech if file is in working directory 82 | if eye_cascade_filename in os.listdir(os.curdir): 83 | # print("xml file already exists") 84 | pass 85 | else: 86 | # download file from url and save locally as haarcascade_frontalface_alt2.xml, < 1MB 87 | urlreq.urlretrieve(eye_cascade_url, eye_cascade_filename) 88 | print("xml file downloaded") 89 | 90 | return cv2.CascadeClassifier(eye_cascade_filename) 91 | 92 | 93 | # Function to read the the video data as an array of frames and additionally return metadata like FPS, Dims etc. 94 | def get_frames_and_video_meta_data(video_path, meta_data_only=False): 95 | cap = cv2.VideoCapture(video_path) 96 | frameRate = cap.get(5) # frame rate 97 | 98 | # Frame dimensions: WxH 99 | frame_dims = (int(cap.get(3)), int(cap.get(4))) 100 | # Paper mentions a stride of 0.5 seconds = 15 frames 101 | sliding_window_stride = int(frameRate / 2) 102 | num_frames = int(cap.get(7)) 103 | if meta_data_only: 104 | return {"frame_rate": frameRate, "sliding_window_stride": sliding_window_stride, "num_frames": num_frames} 105 | 106 | # Frames from the video have shape NumFrames x H x W x C 107 | frames = np.zeros((num_frames, frame_dims[1], frame_dims[0], 3), dtype='uint8') 108 | 109 | frame_counter = 0 110 | while cap.isOpened(): 111 | # curr_frame_id = int(cap.get(1)) # current frame number 112 | ret, frame = cap.read() 113 | if not ret: 114 | break 115 | 116 | frames[frame_counter, :, :, :] = frame 117 | frame_counter += 1 118 | if frame_counter == num_frames: 119 | break 120 | 121 | cap.release() 122 | return frames, frameRate, sliding_window_stride 123 | 124 | 125 | # Threaded function for st_map generation from a single video arg:file in dataset 126 | def get_spatio_temporal_map_threaded(file): 127 | # print(f"Generating Maps for file: {file}") 128 | # maps = np.zeros((10, config.CLIP_SIZE, 25, 3)) 129 | # print(index) 130 | maps = preprocess_video_to_st_maps( 131 | video_path=file, 132 | output_shape=(180, 180), clip_size=config.CLIP_SIZE) 133 | 134 | if maps is None: 135 | return 1 136 | 137 | file_name = file.split('/')[-1].split('.')[0] 138 | folder_name = file.split('/')[-2] 139 | save_path = os.path.join(config.ST_MAPS_PATH, folder_name) 140 | if not os.path.exists(save_path): 141 | os.makedirs(save_path) 142 | save_path = os.path.join(save_path, f"{file_name}.npy") 143 | # np.save(f"{config.ST_MAPS_PATH}{file_name}.npy", maps) 144 | np.save(save_path, maps) 145 | return 1 146 | 147 | 148 | # Threaded wrapper function for st_maps from all videos that calls the threaded func in a parallel fashion 149 | def get_spatio_temporal_map_threaded_wrapper(): 150 | video_files = glob.glob(config.FACE_DATA_DIR + '*avi') 151 | # video_files = video_files[:10] 152 | less_than_ten = ['/Volumes/T7/vipl_videos/p19_v2_source2.avi', '/Volumes/T7/vipl_videos/p32_v7_source3.avi', '/Volumes/T7/vipl_videos/p32_v7_source4.avi', '/Volumes/T7/vipl_videos/p40_v7_source2.avi', '/Volumes/T7/vipl_videos/p22_v3_source1.avi'] 153 | video_files = [file for file in video_files if file not in less_than_ten] 154 | start = time.time() 155 | with parallel_backend("loky", inner_max_num_threads=4): 156 | Parallel(n_jobs=3)(delayed(get_spatio_temporal_map_threaded)(file) for file in tqdm(video_files)) 157 | end = time.time() 158 | 159 | print('{:.4f} s'.format(end - start)) 160 | 161 | 162 | # function for st_map generation from all videos in dataset 163 | def get_spatio_temporal_map(): 164 | video_files = glob.glob(config.FACE_DATA_DIR + '*avi') 165 | # video_files = video_files[100:110] 166 | # video_files = ['/Volumes/Backup Plus/vision/vipl_videos/p10_v1_source1.avi', '/Volumes/Backup Plus/vision/vipl_videos/p10_v1_source2.avi'] 167 | # video_files = ['/Volumes/Backup Plus/vision/DEAP_emotion/face_video/s01/s01_trial01.avi'] 168 | start = time.time() 169 | for file in tqdm(video_files): 170 | # maps = np.zeros((1, config.CLIP_SIZE, 25, 3)) 171 | # for index in range(1): 172 | # print(index) 173 | if os.path.exists(f"{config.ST_MAPS_PATH}{file.split('/')[-1].split('.')[0]}.npy"): 174 | map = np.load(f"{config.ST_MAPS_PATH}{file.split('/')[-1].split('.')[0]}.npy") 175 | if (map.shape[0]) > 125: 176 | print(f"\nFilename:{file} | num maps: {map.shape[0]}") 177 | else: 178 | continue 179 | # maps = preprocess_video_to_st_maps( 180 | # video_path=file, 181 | # output_shape=(180, 180), clip_size=config.CLIP_SIZE) 182 | # if maps is None: 183 | # continue 184 | optimized_end = time.time() 185 | # print('{:.4f} s'.format((optimized_end - start)/60)) 186 | 187 | # file_name = file.split('/')[-1].split('.')[0] 188 | # folder_name = file.split('/')[-2] 189 | # save_path = os.path.join(config.ST_MAPS_PATH, folder_name) 190 | # if not os.path.exists(save_path): 191 | # os.makedirs(save_path) 192 | # save_path = os.path.join(save_path, f"{file_name}.npy") 193 | # # np.save(f"{config.ST_MAPS_PATH}{file_name}.npy", maps) 194 | # np.save(save_path, maps) 195 | 196 | end = time.time() 197 | print('{:.4f} s'.format(end - start)) 198 | # return maps 199 | 200 | 201 | # Optimized function for converting videos to Spatio-temporal maps 202 | def preprocess_video_to_st_maps(video_path, output_shape, clip_size): 203 | frames, frameRate, sliding_window_stride = get_frames_and_video_meta_data(video_path) 204 | 205 | num_frames = frames.shape[0] 206 | output_shape = (frames.shape[1], frames.shape[2]) 207 | num_maps = int((num_frames - clip_size)/sliding_window_stride + 1) 208 | if num_maps < 0: 209 | # print(num_maps) 210 | print(video_path) 211 | return None 212 | 213 | # stacked_maps is the all the st maps for a given video (=num_maps) stacked. 214 | stacked_maps = np.zeros((num_maps, config.CLIP_SIZE, 25, 3)) 215 | # processed_maps will contain all the data after processing each frame, but not yet converted into maps 216 | processed_maps = np.zeros((num_frames, 25, 3)) 217 | # processed_frames = np.zeros((num_frames, output_shape[0], output_shape[1], 3)) 218 | processed_frames = [] 219 | map_index = 0 220 | 221 | # Init scaler and detector 222 | min_max_scaler = preprocessing.MinMaxScaler() 223 | detector = get_haarcascade() 224 | eye_detector = get_eye_cascade() 225 | 226 | # First we process all the frames and then work with sliding window to save repeated processing for the same frame index 227 | for idx, frame in enumerate(frames): 228 | # spatio_temporal_map = np.zeros((fr, 25, 3)) 229 | ''' 230 | Preprocess the Image 231 | Step 1: Use cv2 face detector based on Haar cascades 232 | Step 2: Crop the frame based on the face co-ordinates (we need to do 160%) 233 | Step 3: Downsample the face cropped frame to output_shape = 36x36 234 | ''' 235 | faces = detector.detectMultiScale(frame, 1.3, 5) 236 | if len(faces) is not 0: 237 | (x, y, w, d) = faces[0] 238 | frame_cropped = frame[y:(y + d), x:(x + w)] 239 | eyes = eye_detector.detectMultiScale(frame_cropped, 1.2, 3) 240 | # if len(eyes) > 0: 241 | # # for having the same radius in both eyes 242 | # (eye_x, eye_y, eye_w, eye_h) = eyes[0] 243 | # eye_radius = (eye_w + eye_h) // 5 244 | # mask = np.ones(frame_cropped.shape[:2], dtype="uint8") 245 | # for (ex, ey, ew, eh) in eyes[:2]: 246 | # eye_center = (ex + ew // 2, ey + eh // 2) 247 | # # if eye_radius 248 | # cv2.circle(mask, eye_center, eye_radius, 0, -1) 249 | # # eh = int(0.8*eh) 250 | # # ew = int(0.8*ew) 251 | # # cv2.rectangle(mask, (ex, ey), (ex+ew, ey+eh), 0, -1) 252 | # 253 | # frame_masked = cv2.bitwise_and(frame_cropped, frame_cropped, mask=mask) 254 | # else: 255 | # frame_masked = frame_cropped 256 | # # plot_image(frame_masked) 257 | 258 | frame_masked = frame_cropped 259 | else: 260 | # The problemis that this doesn't get cropped :/ 261 | # (x, y, w, d) = (308, 189, 215, 215) 262 | # frame_masked = frame[y:(y + d), x:(x + w)] 263 | 264 | # print("face detection failed, image frame will be masked") 265 | mask = np.zeros(frame.shape[:2], dtype="uint8") 266 | frame_masked = cv2.bitwise_and(frame, frame, mask=mask) 267 | # plot_image(frame_masked) 268 | 269 | # frame_cropped = frame[y:(y + d), x:(x + w)] 270 | 271 | try: 272 | # frame_resized = cv2.resize(frame_masked, output_shape, interpolation=cv2.INTER_CUBIC) 273 | frame_resized = cv2.cvtColor(frame_masked, cv2.COLOR_BGR2YUV) 274 | 275 | except: 276 | print('\n--------- ERROR! -----------\nUsual cv empty error') 277 | print(f'Shape of img1: {frame.shape}') 278 | # print(f'bbox: {bbox}') 279 | print(f'This is at idx: {idx}') 280 | exit(666) 281 | 282 | processed_frames.append(frame_resized) 283 | # roi_blocks = chunkify(frame_resized) 284 | # for block_idx, block in enumerate(roi_blocks): 285 | # avg_pixels = cv2.mean(block) 286 | # processed_maps[idx, block_idx, 0] = avg_pixels[0] 287 | # processed_maps[idx, block_idx, 1] = avg_pixels[1] 288 | # processed_maps[idx, block_idx, 2] = avg_pixels[2] 289 | 290 | # At this point we have the processed maps from all the frames in a video and now we do the sliding window part. 291 | for start_frame_index in range(0, num_frames, sliding_window_stride): 292 | end_frame_index = start_frame_index + clip_size 293 | if end_frame_index > num_frames: 294 | break 295 | # # print(f"start_idx: {start_frame_index} | end_idx: {end_frame_index}") 296 | spatio_temporal_map = np.zeros((clip_size, 25, 3)) 297 | # 298 | # spatio_temporal_map = processed_maps[start_frame_index:end_frame_index, :, :] 299 | 300 | 301 | for idx, frame in enumerate(processed_frames[start_frame_index:end_frame_index]): 302 | roi_blocks = chunkify(frame) 303 | for block_idx, block in enumerate(roi_blocks): 304 | avg_pixels = cv2.mean(block) 305 | spatio_temporal_map[idx, block_idx, 0] = avg_pixels[0] 306 | spatio_temporal_map[idx, block_idx, 1] = avg_pixels[1] 307 | spatio_temporal_map[idx, block_idx, 2] = avg_pixels[2] 308 | 309 | for block_idx in range(spatio_temporal_map.shape[1]): 310 | # Not sure about uint8 311 | fn_scale_0_255 = lambda x: (x * 255.0).astype(np.uint8) 312 | scaled_channel_0 = min_max_scaler.fit_transform(spatio_temporal_map[:, block_idx, 0].reshape(-1, 1)) 313 | spatio_temporal_map[:, block_idx, 0] = fn_scale_0_255(scaled_channel_0.flatten()) 314 | scaled_channel_1 = min_max_scaler.fit_transform(spatio_temporal_map[:, block_idx, 1].reshape(-1, 1)) 315 | spatio_temporal_map[:, block_idx, 1] = fn_scale_0_255(scaled_channel_1.flatten()) 316 | scaled_channel_2 = min_max_scaler.fit_transform(spatio_temporal_map[:, block_idx, 2].reshape(-1, 1)) 317 | spatio_temporal_map[:, block_idx, 2] = fn_scale_0_255(scaled_channel_2.flatten()) 318 | 319 | stacked_maps[map_index, :, :, :] = spatio_temporal_map 320 | map_index += 1 321 | 322 | return stacked_maps 323 | 324 | # UNOPTIMIZED CODE 325 | # def get_st_maps(video_path, output_shape, clip_size): 326 | # frames, frameRate, sliding_window_stride = get_frames_and_video_meta_data(video_path) 327 | # 328 | # num_frames = frames.shape[0] 329 | # num_maps = int((num_frames - clip_size)/sliding_window_stride + 1) 330 | # maps = np.zeros((num_maps, config.CLIP_SIZE, 25, 3)) 331 | # map_index = 0 332 | # 333 | # # Init scaler and detector 334 | # min_max_scaler = preprocessing.MinMaxScaler() 335 | # detector = get_haarcascade() 336 | # eye_detector = get_eye_cascade() 337 | # 338 | # for start_frame_index in tqdm(range(0, num_frames, sliding_window_stride)): 339 | # end_frame_index = start_frame_index + clip_size 340 | # if end_frame_index > 400: 341 | # break 342 | # # print(f"start_idx: {start_frame_index} | end_idx: {end_frame_index}") 343 | # spatio_temporal_map = np.zeros((clip_size, 25, 3)) 344 | # 345 | # frames_in_clip = frames[start_frame_index:end_frame_index] 346 | # 347 | # for idx, frame in enumerate(frames_in_clip): 348 | # ''' 349 | # Preprocess the Image 350 | # Step 1: Use cv2 face detector based on Haar cascades 351 | # Step 2: Crop the frame based on the face co-ordinates (we need to do 160%) 352 | # Step 3: Downsample the face cropped frame to output_shape = 36x36 353 | # ''' 354 | # faces = detector.detectMultiScale(frame, 1.3, 5) 355 | # if len(faces) is not 0: 356 | # (x, y, w, d) = faces[0] 357 | # frame_cropped = frame[y:(y + d), x:(x + w)] 358 | # eyes = eye_detector.detectMultiScale(frame_cropped, 1.2, 3) 359 | # if len(eyes) > 0: 360 | # # for having the same radius in both eyes 361 | # (eye_x, eye_y, eye_w, eye_h) = eyes[0] 362 | # eye_radius = (eye_w + eye_h) // 5 363 | # mask = np.ones(frame_cropped.shape[:2], dtype="uint8") 364 | # for (ex, ey, ew, eh) in eyes[:2]: 365 | # eye_center = (ex + ew // 2, ey + eh // 2) 366 | # # if eye_radius 367 | # cv2.circle(mask, eye_center, eye_radius, 0, -1) 368 | # # eh = int(0.8*eh) 369 | # # ew = int(0.8*ew) 370 | # # cv2.rectangle(mask, (ex, ey), (ex+ew, ey+eh), 0, -1) 371 | # 372 | # frame_masked = cv2.bitwise_and(frame_cropped, frame_cropped, mask=mask) 373 | # else: 374 | # frame_masked = frame_cropped 375 | # # plot_image(frame_masked) 376 | # else: 377 | # # The problemis that this doesn't get cropped :/ 378 | # # (x, y, w, d) = (308, 189, 215, 215) 379 | # # frame_masked = frame[y:(y + d), x:(x + w)] 380 | # 381 | # # print("face detection failed, image frame will be masked") 382 | # mask = np.zeros(frame.shape[:2], dtype="uint8") 383 | # frame_masked = cv2.bitwise_and(frame, frame, mask=mask) 384 | # # plot_image(frame_masked) 385 | # 386 | # # frame_cropped = frame[y:(y + d), x:(x + w)] 387 | # 388 | # try: 389 | # frame_resized = cv2.resize(frame_masked, output_shape, interpolation=cv2.INTER_CUBIC) 390 | # frame_resized = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2YUV) 391 | # 392 | # except: 393 | # print('\n--------- ERROR! -----------\nUsual cv empty error') 394 | # print(f'Shape of img1: {frame.shape}') 395 | # # print(f'bbox: {bbox}') 396 | # print(f'This is at idx: {idx}') 397 | # exit(666) 398 | # 399 | # roi_blocks = chunkify(frame_resized) 400 | # for block_idx, block in enumerate(roi_blocks): 401 | # avg_pixels = cv2.mean(block) 402 | # spatio_temporal_map[idx, block_idx, 0] = avg_pixels[0] 403 | # spatio_temporal_map[idx, block_idx, 1] = avg_pixels[1] 404 | # spatio_temporal_map[idx, block_idx, 2] = avg_pixels[2] 405 | # 406 | # print('he;;p') 407 | # 408 | # for block_idx in range(spatio_temporal_map.shape[1]): 409 | # # Not sure about uint8 410 | # fn_scale_0_255 = lambda x: (x * 255.0) 411 | # scaled_channel_0 = min_max_scaler.fit_transform(spatio_temporal_map[:, block_idx, 0].reshape(-1, 1)) 412 | # spatio_temporal_map[:, block_idx, 0] = fn_scale_0_255(scaled_channel_0.flatten()) 413 | # scaled_channel_1 = min_max_scaler.fit_transform(spatio_temporal_map[:, block_idx, 1].reshape(-1, 1)) 414 | # spatio_temporal_map[:, block_idx, 1] = fn_scale_0_255(scaled_channel_1.flatten()) 415 | # scaled_channel_2 = min_max_scaler.fit_transform(spatio_temporal_map[:, block_idx, 2].reshape(-1, 1)) 416 | # spatio_temporal_map[:, block_idx, 2] = fn_scale_0_255(scaled_channel_2.flatten()) 417 | # 418 | # maps[map_index, :, :, :] = spatio_temporal_map 419 | # map_index += 1 420 | # 421 | # return maps 422 | 423 | 424 | if __name__ == '__main__': 425 | # get_frames_and_video_meta_data('/Volumes/T7/vipl_videos/p58_v4_source3.avi') 426 | # get_spatio_temporal_map() 427 | # get_spatio_temporal_map_threaded_wrapper() 428 | # video_files = glob.glob(config.FACE_DATA_DIR + '/**/*avi') 429 | # r = list(process_map(get_spatio_temporal_map_threaded, video_files[:2], max_workers=1)) 430 | # signal = read_target_data("/Users/anweshcr7/github/RhythmNet/data/data_preprocessed/", "s01_trial04") 431 | # 432 | # resampled = signal.resample(df["Signal"].values, 3000, df["Time"].values) 433 | # resampled_sample_rate = hp.get_samplerate_mstimer(resampled[1]) 434 | # print(calculate_hr(resampled[0], resampled_sample_rate)) 435 | 436 | # make_csv_with_frame_rate() 437 | print('done') --------------------------------------------------------------------------------