├── .gitignore
├── DemoNotebook.ipynb
├── LICENSE
├── README.md
├── data
└── read.txt
├── requirements.txt
├── results
└── train-test_loss.png
└── src
├── __init__.py
├── config.py
├── engine.py
├── engine_vipl.py
├── loss_func
├── __init__.py
├── custom_loss.py
└── rhythmnet_loss.py
├── main.py
├── models
├── __init__.py
├── lenet.py
├── resnet.py
├── rhythmNet.py
└── simpleCNN.py
└── utils
├── __init__.py
├── dataset.py
├── generate_fold_csv.py
├── model_utils.py
├── plot_scripts.py
├── signal_utils.py
└── video2st_maps.py
/.gitignore:
--------------------------------------------------------------------------------
1 | /data/
2 | /venv/
--------------------------------------------------------------------------------
/DemoNotebook.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "melanomaDetection.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | },
15 | "accelerator": "GPU"
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "view-in-github",
22 | "colab_type": "text"
23 | },
24 | "source": [
25 | "
"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "metadata": {
31 | "colab": {
32 | "base_uri": "https://localhost:8080/"
33 | },
34 | "id": "QMRaimMFMErb",
35 | "outputId": "1fec65ad-be6c-4b63-b80e-98b1e41e4ac3"
36 | },
37 | "source": [
38 | "# Mount Google Drive\n",
39 | "from google.colab import drive # import drive from google colab\n",
40 | "\n",
41 | "ROOT = \"/content/drive\" # default location for the drive\n",
42 | "print(ROOT) # print content of ROOT (Optional)\n",
43 | "\n",
44 | "drive.mount(ROOT) # we mount the google drive at /content/drive"
45 | ],
46 | "execution_count": 13,
47 | "outputs": [
48 | {
49 | "output_type": "stream",
50 | "text": [
51 | "/content/drive\n",
52 | "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
53 | ],
54 | "name": "stdout"
55 | }
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "metadata": {
61 | "colab": {
62 | "base_uri": "https://localhost:8080/"
63 | },
64 | "id": "mFWquqGJMXs0",
65 | "outputId": "45551f54-8b6c-4749-d21d-6f548a0c4ef7"
66 | },
67 | "source": [
68 | "# Clone github repository setup\n",
69 | "from os.path import join \n",
70 | "\n",
71 | "# path to your project on Google Drive\n",
72 | "MY_GOOGLE_DRIVE_PATH = 'My Drive/MyDrive/' \n",
73 | "\n",
74 | "# your Github username \n",
75 | "GIT_USERNAME = \"{GITHUB USERNAME}\" \n",
76 | "# GitHub access token\n",
77 | "GIT_TOKEN = \"{GITHUB TOKEN}\" \n",
78 | "# Replace with your github repository\n",
79 | "GIT_REPOSITORY = \"DL_boilerplate\" \n",
80 | "\n",
81 | "PROJECT_PATH = join(ROOT, MY_GOOGLE_DRIVE_PATH)\n",
82 | "\n",
83 | "print(\"PROJECT_PATH: \", PROJECT_PATH) \n",
84 | "\n",
85 | "# # In case we haven't created the folder already; we will create a folder in the project path \n",
86 | "# !mkdir \"{PROJECT_PATH}\" \n",
87 | "\n",
88 | "GIT_PATH = f\"https://{GIT_TOKEN}@github.com/{GIT_USERNAME}/{GIT_REPOSITORY}.git\"\n",
89 | "print(\"GIT_PATH: \", GIT_PATH)\n",
90 | "GIT_BRANCH = \"main\""
91 | ],
92 | "execution_count": 14,
93 | "outputs": [
94 | {
95 | "output_type": "stream",
96 | "text": [
97 | "PROJECT_PATH: /content/drive/My Drive/MyDrive/\n",
98 | "GIT_PATH: https://{a675a49eff6b8a3df6aad1c1c2cea844c3fdeeb2}@github.com/anweshcr7/DL_boilerplate.git\n"
99 | ],
100 | "name": "stdout"
101 | }
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "metadata": {
107 | "colab": {
108 | "base_uri": "https://localhost:8080/"
109 | },
110 | "id": "aYKVgctbOAnp",
111 | "outputId": "1a99f760-80c0-46b6-aca7-37798e3193ad"
112 | },
113 | "source": [
114 | "%rm -r /content/DL_boilerplate/\n",
115 | "# EXEC_COMMAND = f\"-b {GIT_BRANCH} {GIT_PATH}\"\n",
116 | "!git clone -b \"{GIT_BRANCH}\" \"{GIT_PATH}\""
117 | ],
118 | "execution_count": 16,
119 | "outputs": [
120 | {
121 | "output_type": "stream",
122 | "text": [
123 | "Cloning into 'DL_boilerplate'...\n",
124 | "remote: Enumerating objects: 30, done.\u001b[K\n",
125 | "remote: Counting objects: 100% (30/30), done.\u001b[K\n",
126 | "remote: Compressing objects: 100% (25/25), done.\u001b[K\n",
127 | "remote: Total 30 (delta 8), reused 20 (delta 4), pack-reused 0\u001b[K\n",
128 | "Unpacking objects: 100% (30/30), done.\n"
129 | ],
130 | "name": "stdout"
131 | }
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "metadata": {
137 | "colab": {
138 | "base_uri": "https://localhost:8080/"
139 | },
140 | "id": "UVkBhRqEQK9L",
141 | "outputId": "cfae1561-bd91-4322-b565-1853207e631a"
142 | },
143 | "source": [
144 | "!python3 /content/DL_boilerplate/src/main.py "
145 | ],
146 | "execution_count": 18,
147 | "outputs": [
148 | {
149 | "output_type": "stream",
150 | "text": [
151 | "GPU available... using GPU\n",
152 | "100% 1875/1875 [00:17<00:00, 105.36it/s]\n",
153 | "Saved!\n",
154 | "100% 313/313 [00:02<00:00, 124.59it/s]\n",
155 | "Epoch 0 => Training Loss: 0.19766141367604334, Val Loss: 0.05979700740503025\n",
156 | "100% 1875/1875 [00:17<00:00, 104.25it/s]\n",
157 | "Saved!\n",
158 | "100% 313/313 [00:02<00:00, 114.15it/s]\n",
159 | "Epoch 1 => Training Loss: 0.05563460199572146, Val Loss: 0.039060163388883073\n",
160 | "100% 1875/1875 [00:18<00:00, 99.60it/s]\n",
161 | "Saved!\n",
162 | "100% 313/313 [00:02<00:00, 121.64it/s]\n",
163 | "Epoch 2 => Training Loss: 0.03707047849864078, Val Loss: 0.0323324040974794\n",
164 | "100% 1875/1875 [00:18<00:00, 100.07it/s]\n",
165 | "Saved!\n",
166 | "100% 313/313 [00:02<00:00, 120.85it/s]\n",
167 | "Epoch 3 => Training Loss: 0.027940944086847594, Val Loss: 0.02903405395759931\n",
168 | "100% 1875/1875 [00:18<00:00, 101.86it/s]\n",
169 | "Saved!\n",
170 | "100% 313/313 [00:02<00:00, 134.51it/s]\n",
171 | "Epoch 4 => Training Loss: 0.020424689519958336, Val Loss: 0.03015474373615215\n",
172 | "100% 1875/1875 [00:17<00:00, 105.84it/s]\n",
173 | "Saved!\n",
174 | "100% 313/313 [00:02<00:00, 133.46it/s]\n",
175 | "Epoch 5 => Training Loss: 0.01611058505279458, Val Loss: 0.026433935853666494\n",
176 | "100% 1875/1875 [00:18<00:00, 102.68it/s]\n",
177 | "Saved!\n",
178 | "100% 313/313 [00:02<00:00, 129.28it/s]\n",
179 | "Epoch 6 => Training Loss: 0.013548866433653165, Val Loss: 0.027602487027936874\n",
180 | "100% 1875/1875 [00:17<00:00, 104.42it/s]\n",
181 | "Saved!\n",
182 | "100% 313/313 [00:02<00:00, 133.54it/s]\n",
183 | "Epoch 7 => Training Loss: 0.01068355406346988, Val Loss: 0.030769745317247334\n",
184 | "100% 1875/1875 [00:18<00:00, 101.77it/s]\n",
185 | "Saved!\n",
186 | "100% 313/313 [00:02<00:00, 128.57it/s]\n",
187 | "Epoch 8 => Training Loss: 0.007954271178089523, Val Loss: 0.04250151148297651\n",
188 | "100% 1875/1875 [00:17<00:00, 104.32it/s]\n",
189 | "Saved!\n",
190 | "100% 313/313 [00:02<00:00, 116.59it/s]\n",
191 | "Epoch 9 => Training Loss: 0.007675014509890358, Val Loss: 0.031255505094597145\n",
192 | "\n",
193 | "done\n"
194 | ],
195 | "name": "stdout"
196 | }
197 | ]
198 | }
199 | ]
200 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Anwesh Marwade
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RhythmNet: End-to-end Heart Rate Estimation from Face via Spatial-temporal Representation
2 | A reproduction of the RhythmNet model. [Paper link](arxiv.org/abs/1910.11515)
3 |
4 | #### Dataset:
5 | VIPL-HR dataset
6 |
7 | ## Experiments
8 | Shared parameters:
9 | ```
10 | batch size: 32
11 | Dataset: VIPL
12 | Model: RhythmNet
13 | initial learning rate: 1e-3
14 | epochs: 50
15 | window_size = 300 frames with stride of 0.5 seconds
16 | ```
17 |
18 | **Dataset-split**: 5 fold validation
19 | ### Experiment for 1-Fold without GRU layer
20 |
21 | | Set | Loss | MAE (bpm) | RMSE (bpm) |
22 | |----------|:-----:|:----------:|:----------:|
23 | | Training | 3.096 | 1.817 | 2.834 |
24 | | Eval | 15.91 | 9.255 | 11.787 |
25 |
26 | ### Experiment for 1-Fold with GRU layer
27 | | Set | Loss | MAE (bpm) | RMSE (bpm) |
28 | |----------|:-----:|:----------:|:----------:|
29 | | Training | 3.925 | 2.423 | 4.16 |
30 | | Eval | 14.25 | 13.992 | 17.019 |
31 |
32 |
--------------------------------------------------------------------------------
/data/read.txt:
--------------------------------------------------------------------------------
1 | YOUR DATA WILL BE DOWNLOADED HERE, or added here
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cached-property==1.5.2
2 | cycler==0.10.0
3 | dataclasses==0.6
4 | future==0.18.2
5 | h5py==3.1.0
6 | heartpy==1.2.6
7 | joblib==0.17.0
8 | kiwisolver==1.3.1
9 | matplotlib==3.3.3
10 | mne==0.21.2
11 | numpy==1.19.5
12 | opencv-python==4.4.0.46
13 | pandas==1.2.0
14 | Pillow==9.0.0
15 | pyparsing==2.4.7
16 | python-dateutil==2.8.1
17 | pytz==2020.5
18 | scikit-learn==0.23.2
19 | scipy==1.5.4
20 | six==1.15.0
21 | threadpoolctl==2.1.0
22 | torch==1.7.0
23 | torchvision==0.8.1
24 | tqdm==4.53.0
25 | typing-extensions==3.7.4.3
26 |
--------------------------------------------------------------------------------
/results/train-test_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AnweshCR7/RhythmNet/aa4b336a249af64d0c5e7f516863d6f1f6c285c1/results/train-test_loss.png
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AnweshCR7/RhythmNet/aa4b336a249af64d0c5e7f516863d6f1f6c285c1/src/__init__.py
--------------------------------------------------------------------------------
/src/config.py:
--------------------------------------------------------------------------------
1 | # FACE_DATA_DIR = "/content/drive/MyDrive/data/deep_phys/face_videos/"
2 | # DATA_PATH = "/content/drive/MyDrive/data/rhythmnet/st_maps/"
3 | # TARGET_SIGNAL_DIR = "/content/drive/MyDrive/data/deep_phys/data_preprocessed/"
4 | # SAVE_CSV_PATH = "/content/drive/MyDrive/data/rhythmnet/kfold.csv"
5 | # ST_MAPS_PATH = "/content/drive/MyDrive/data/rhythmnet/st_maps/"
6 | # CHECKPOINT_PATH = "/content/drive/MyDrive/data/rhythmnet/checkpoint"
7 | # PLOT_PATH = "/content/drive/MyDrive/data/rhythmnet/plots"
8 | # NUM_WORKERS = 2
9 | # DEVICE = "cuda"
10 | # BATCH_SIZE = 10
11 | # EPOCHS = 50
12 | # lr = 1e-3
13 | # CLIP_SIZE = 300
14 |
15 | # For INSY server
16 |
17 | # FACE_DATA_DIR = "/content/drive/MyDrive/data/deep_phys/face_videos/"
18 | # HOME_DIR = "/tudelft.net/staff-bulk/ewi/insy/VisionLab/students/amarwade/"
19 | # HR_DATA_PATH = HOME_DIR + "data/DEAP/hr_csv/"
20 | # DATA_PATH = HOME_DIR + "data/DEAP/st_maps/"
21 | # TARGET_SIGNAL_DIR = HOME_DIR + "data/DEAP/data_preprocessed/"
22 | # SAVE_CSV_PATH = HOME_DIR + "RhythmNet/subject_exclusive_folds.csv"
23 | # ST_MAPS_PATH = HOME_DIR + "data/DEAP/st_maps/"
24 | # CHECKPOINT_PATH = HOME_DIR + "checkpoints/RhythmNet"
25 | # PLOT_PATH = HOME_DIR + "plots/RhythmNet"
26 | # NUM_WORKERS = 2
27 | # DEVICE = "cuda"
28 | # BATCH_SIZE = 16
29 | # EPOCHS = 20
30 | # lr = 1e-3
31 | # CLIP_SIZE = 300
32 | # TENSORBOARD_PATH = HOME_DIR + "/runs/"
33 | # GRU_TEMPORAL_WINDOW = 6
34 |
35 | haarcascade_url = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_alt2.xml"
36 | eye_cascade_url = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_eye.xml"
37 | # FACE_DATA_DIR = "../data/face_video/"
38 | # HR_DATA_PATH = "../data/VIPL_hr_csv/"
39 | HR_DATA_PATH = "../data/VIPL_hr_csv/"
40 | FACE_DATA_DIR = "/Volumes/T7/vipl_videos/"
41 | TARGET_SIGNAL_DIR = "/Users/anweshcr7/Downloads/CleanerPPG/VIPL-HR/Cleaned/"
42 | # SAVE_CSV_PATH = "subject_exclusive_folds.csv"
43 | SAVE_CSV_PATH = "VIPL_npy.csv"
44 | ST_MAPS_PATH = "../data/vipl_st_maps/"
45 | # ST_MAPS_PATH = "/Volumes/Backup Plus/vision/DEAP_emotion/st_maps/"
46 | CHECKPOINT_PATH = "../checkpoint"
47 | DATA_PATH = "../data/"
48 | PLOT_PATH = "../plots"
49 | BATCH_SIZE = 16
50 | EPOCHS = 10
51 | EPOCHS_TEST = 1
52 | CLIP_SIZE = 300
53 | lr = 1e-3
54 | IMAGE_WIDTH = 300
55 | IMAGE_HEIGHT = 75
56 | NUM_WORKERS = 0
57 | DEVICE = "cpu"
58 | GRU_TEMPORAL_WINDOW = 6
--------------------------------------------------------------------------------
/src/engine.py:
--------------------------------------------------------------------------------
1 | from tqdm import tqdm
2 | import torch
3 | import config
4 | from utils.model_utils import save_model_checkpoint
5 |
6 |
7 | def train_fn(model, data_loader, optimizer, loss_fn):
8 | model.train()
9 | fin_loss = 0
10 | loss = 0.0
11 |
12 | target_hr_list = []
13 | predicted_hr_list = []
14 | tk_iterator = tqdm(data_loader, total=len(data_loader))
15 | for data in tk_iterator:
16 | # an item of the data is available as a dictionary
17 | for (key, value) in data.items():
18 | data[key] = value.to(config.DEVICE)
19 |
20 | optimizer.zero_grad()
21 | with torch.set_grad_enabled(True):
22 | outputs = model(**data)
23 | # w/o GRU
24 | loss = loss_fn(outputs.squeeze(2), data["target"])
25 | # loss = loss_fn(outputs, data["target"])
26 | loss.backward()
27 | optimizer.step()
28 | # "For each face video, the avg of all HR (bpm) of individual clips are computed as the final HR result
29 | target_hr_batch = list(data["target"].mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy())
30 | target_hr_list.extend(target_hr_batch)
31 |
32 | predicted_hr_batch = list(outputs.squeeze(2).mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy())
33 | predicted_hr_list.extend(predicted_hr_batch)
34 | fin_loss += loss.item()
35 |
36 | return target_hr_list, predicted_hr_list, fin_loss / len(data_loader)
37 |
38 |
39 | def eval_fn(model, data_loader, loss_fn):
40 | model.eval()
41 | fin_loss = 0
42 | target_hr_list = []
43 | predicted_list = []
44 | with torch.no_grad():
45 | tk_iterator = tqdm(data_loader, total=len(data_loader))
46 | for data in tk_iterator:
47 | for (key, value) in data.items():
48 | data[key] = value.to(config.DEVICE)
49 |
50 | # with torch.set_grad_enabled(False):
51 | outputs = model(**data)
52 | # _, _, out = model(**data)
53 | loss = loss_fn(outputs.squeeze(2), data["target"])
54 | # _, batch_preds = torch.max(out.data, 1)
55 | fin_loss += loss.item()
56 | target_hr_batch = list(data["target"].mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy())
57 | target_hr_list.extend(target_hr_batch)
58 |
59 | predicted_hr_batch = list(outputs.squeeze(2).mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy())
60 | predicted_list.extend(predicted_hr_batch)
61 |
62 |
63 | return target_hr_list, predicted_list, fin_loss / len(data_loader)
64 |
--------------------------------------------------------------------------------
/src/engine_vipl.py:
--------------------------------------------------------------------------------
1 | from tqdm import tqdm
2 | import torch
3 | import config
4 | import numpy as np
5 | from utils.model_utils import save_model_checkpoint
6 |
7 |
8 | def train_fn(model, data_loader, optimizer, loss_fn):
9 | model.train()
10 | fin_loss = 0
11 | loss = 0.0
12 |
13 | target_hr_list = []
14 | predicted_hr_list = []
15 | tk_iterator = tqdm(data_loader, total=len(data_loader))
16 | batched_data = []
17 | for batch in tk_iterator:
18 | for data in batch:
19 | # an item of the data is available as a dictionary
20 | for (key, value) in data.items():
21 | data[key] = value.to(config.DEVICE)
22 |
23 | optimizer.zero_grad()
24 | with torch.set_grad_enabled(True):
25 | outputs, gru_outputs = model(**data)
26 | # w/o GRU
27 | # loss = loss_fn(outputs.squeeze(0), data["target"])
28 | loss = loss_fn(outputs.squeeze(0), gru_outputs, data["target"])
29 | loss.backward()
30 | optimizer.step()
31 | # "For each face video, the avg of all HR (bpm) of individual clips are computed as the final HR result
32 | # target_hr_batch = list(data["target"].mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy())
33 | target_hr_list.append(data["target"].mean().item())
34 |
35 | # predicted_hr_batch = list(outputs.squeeze(2).mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy())
36 | predicted_hr_list.append(outputs.squeeze(0).mean().item())
37 | fin_loss += loss.item()
38 |
39 | return target_hr_list, predicted_hr_list, fin_loss / (len(data_loader)*config.BATCH_SIZE)
40 |
41 |
42 | def eval_fn(model, data_loader, loss_fn):
43 | model.eval()
44 | fin_loss = 0
45 | target_hr_list = []
46 | predicted_hr_list = []
47 | with torch.no_grad():
48 | tk_iterator = tqdm(data_loader, total=len(data_loader))
49 | for batch in tk_iterator:
50 | for data in batch:
51 | for (key, value) in data.items():
52 | data[key] = value.to(config.DEVICE)
53 |
54 | # with torch.set_grad_enabled(False):
55 | outputs, gru_outputs = model(**data)
56 | # loss w/o GRU
57 | # loss = loss_fn(outputs.squeeze(0), data["target"])
58 | # loss with GRU
59 | loss = loss_fn(outputs.squeeze(0), gru_outputs, data["target"])
60 | fin_loss += loss.item()
61 | # target_hr_batch = list(data["target"].mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy())
62 | target_hr_list.append(data["target"].mean().item())
63 |
64 | # predicted_hr_batch = list(outputs.squeeze(2).mean(dim=1, keepdim=True).squeeze(1).detach().cpu().numpy())
65 | predicted_hr_list.append(outputs.squeeze(0).mean().item())
66 |
67 | return target_hr_list, predicted_hr_list, fin_loss / (len(data_loader)*config.BATCH_SIZE)
68 |
--------------------------------------------------------------------------------
/src/loss_func/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AnweshCR7/RhythmNet/aa4b336a249af64d0c5e7f516863d6f1f6c285c1/src/loss_func/__init__.py
--------------------------------------------------------------------------------
/src/loss_func/custom_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import config as config
3 |
4 | class MyLoss(torch.autograd.Function):
5 | """
6 | We can implement our own custom autograd Functions by subclassing
7 | torch.autograd.Function and implementing the forward and backward passes
8 | which operate on Tensors.
9 | """
10 |
11 | @staticmethod
12 | def forward(ctx, hr_t, hr_outs, T):
13 | """
14 | In the forward pass we receive a Tensor containing the input and return
15 | a Tensor containing the output. ctx is a context object that can be used
16 | to stash information for backward computation. You can cache arbitrary
17 | objects for use in the backward pass using the ctx.save_for_backward method.
18 | """
19 | ctx.hr_outs = hr_outs
20 | ctx.hr_mean = hr_outs.mean()
21 | ctx.T = T
22 | ctx.save_for_backward(hr_t)
23 | # pdb.set_trace()
24 | # hr_t, hr_mean, T = input
25 |
26 | if hr_t > ctx.hr_mean:
27 | loss = hr_t - ctx.hr_mean
28 | else:
29 | loss = ctx.hr_mean - hr_t
30 |
31 | return loss
32 | # return input.clamp(min=0)
33 |
34 | @staticmethod
35 | def backward(ctx, grad_output):
36 | """
37 | In the backward pass we receive a Tensor containing the gradient of the loss
38 | with respect to the output, and we need to compute the gradient of the loss
39 | with respect to the input.
40 | """
41 | output = torch.zeros(1).to(config.DEVICE)
42 |
43 | hr_t, = ctx.saved_tensors
44 | hr_outs = ctx.hr_outs
45 |
46 | # create a list of hr_outs without hr_t
47 |
48 | for hr in hr_outs:
49 | if hr == hr_t:
50 | pass
51 | else:
52 | output = output + (1/ctx.T)*torch.sign(ctx.hr_mean - hr)
53 |
54 | output = (1/ctx.T - 1)*torch.sign(ctx.hr_mean - hr_t) + output
55 |
56 | return output, None, None
57 |
58 |
59 | # if __name__ == '__main__':
60 | #
61 | # dtype = torch.float
62 | # device = torch.device("cpu")
63 | # # device = torch.device("cuda:0") # Uncomment this to run on GPU
64 | # # torch.backends.cuda.matmul.allow_tf32 = False # Uncomment this to run on GPU
65 | #
66 | # # The above line disables TensorFloat32. This a feature that allows
67 | # # networks to run at a much faster speed while sacrificing precision.
68 | # # Although TensorFloat32 works well on most real models, for our toy model
69 | # # in this tutorial, the sacrificed precision causes convergence issue.
70 | # # For more information, see:
71 | # # https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices
72 | #
73 | # # N is batch size; D_in is input dimension;
74 | # # H is hidden dimension; D_out is output dimension.
75 | # N, D_in, H, D_out = 64, 1000, 100, 10
76 | # # tensor([[0.4178, 0.8199, 0.1713, -0.8368, 0.2154, -0.4960, 0.4925, -0.7679,
77 | # # -0.1096, 0.7345]], grad_fn= < SqueezeBackward1 >)
78 | # # Create random Tensors to hold input and outputs.
79 | # with torch.set_grad_enabled(True):
80 | # # hr_outs = torch.tensor([0.4178, 0.8199, 0.1713, -0.8368, 0.2154, -0.4960, 0.4925, -0.7679, -0.1096, 0.7345],
81 | # # device=device, dtype=dtype)
82 | # hr_outs = torch.autograd.Variable(torch.randn(3), requires_grad=True)
83 | # hr_mean = hr_outs.mean()
84 | # # y = torch.tensor(0., device=device, dtype=dtype)
85 | #
86 | # # Create random Tensors for weights.
87 | # # w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
88 | # # w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)
89 | #
90 | # learning_rate = 1e-6
91 | # smooth_loss = torch.autograd.Variable(torch.zeros(1), requires_grad=True)
92 | # for i in range(hr_outs.shape[0]):
93 | # # To apply our Function, we use Function.apply method. We alias this as 'relu'.
94 | # custom_loss = MyLoss.apply
95 | # smooth_loss = smooth_loss + custom_loss(hr_outs[i], hr_outs, hr_outs.shape[0])
96 | #
97 | # smooth_loss.backward()
98 | #
99 | # print("done")
100 |
--------------------------------------------------------------------------------
/src/loss_func/rhythmnet_loss.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import numpy as np
3 | import torch
4 | import config as config
5 | from loss_func.custom_loss import MyLoss
6 |
7 |
8 | class RhythmNetLoss(nn.Module):
9 | def __init__(self, weight=100.0):
10 | super(RhythmNetLoss, self).__init__()
11 | self.l1_loss = nn.L1Loss()
12 | self.lambd = weight
13 | self.gru_outputs_considered = None
14 | self.custom_loss = MyLoss()
15 | self.device = config.DEVICE
16 |
17 | def forward(self, resnet_outputs, gru_outputs, target):
18 | frame_rate = 25.0
19 | # resnet_outputs, gru_outputs, _ = outputs
20 | # target_array = target.repeat(1, resnet_outputs.shape[1])
21 | l1_loss = self.l1_loss(resnet_outputs, target)
22 | smooth_loss_component = self.smooth_loss(gru_outputs)
23 |
24 | loss = l1_loss + self.lambd*smooth_loss_component
25 | return loss
26 |
27 | # Need to write backward pass for this loss function
28 | def smooth_loss(self, gru_outputs):
29 | smooth_loss = torch.zeros(1).to(device=self.device)
30 | self.gru_outputs_considered = gru_outputs.flatten()
31 | # hr_mean = self.gru_outputs_considered.mean()
32 | for hr_t in self.gru_outputs_considered:
33 | # custom_fn = MyLoss.apply
34 | smooth_loss = smooth_loss + self.custom_loss.apply(torch.autograd.Variable(hr_t, requires_grad=True),
35 | self.gru_outputs_considered,
36 | self.gru_outputs_considered.shape[0])
37 | return smooth_loss / self.gru_outputs_considered.shape[0]
38 |
--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import glob
3 | import torch
4 | import numpy as np
5 | import pandas as pd
6 | import torch.nn as nn
7 | from tqdm import tqdm
8 | import engine
9 | import engine_vipl
10 | import config
11 | from torch.utils.tensorboard import SummaryWriter
12 | from utils.dataset import DataLoaderRhythmNet
13 | from utils.plot_scripts import plot_train_test_curves, bland_altman_plot, gt_vs_est, create_plot_for_tensorboard
14 | from utils.model_utils import plot_loss, load_model_if_checkpointed, save_model_checkpoint
15 | from models.simpleCNN import SimpleCNN
16 | from models.lenet import LeNet
17 | from models.rhythmNet import RhythmNet
18 | from loss_func.rhythmnet_loss import RhythmNetLoss
19 | from scipy.stats.stats import pearsonr
20 |
21 |
22 | # Needed in VIPL dataset where each data item has a different number of frames/maps
23 | def collate_fn(batch):
24 | batched_st_map, batched_targets = [], []
25 | # for data in batch:
26 | # batched_st_map.append(data["st_maps"])
27 | # batched_targets.append(data["target"])
28 | # # torch.stack(batched_output_per_clip, dim=0).transpose_(0, 1)
29 | return batch
30 |
31 |
32 | def rmse(l1, l2):
33 |
34 | return np.sqrt(np.mean((l1-l2)**2))
35 |
36 |
37 | def mae(l1, l2):
38 |
39 | return np.mean([abs(item1-item2)for item1, item2 in zip(l1, l2)])
40 |
41 |
42 | def compute_criteria(target_hr_list, predicted_hr_list):
43 | pearson_per_signal = []
44 | HR_MAE = mae(np.array(predicted_hr_list), np.array(target_hr_list))
45 | HR_RMSE = rmse(np.array(predicted_hr_list), np.array(target_hr_list))
46 |
47 | # for (gt_signal, predicted_signal) in zip(target_hr_list, predicted_hr_list):
48 | # r, p_value = pearsonr(predicted_signal, gt_signal)
49 | # pearson_per_signal.append(r)
50 |
51 | # return {"MAE": np.mean(HR_MAE), "RMSE": HR_RMSE, "Pearson": np.mean(pearson_per_signal)}
52 | return {"MAE": np.mean(HR_MAE), "RMSE": HR_RMSE}
53 |
54 |
55 | def run_training():
56 |
57 | # check path to checkpoint directory
58 | if config.CHECKPOINT_PATH:
59 | if not os.path.exists(config.CHECKPOINT_PATH):
60 | os.makedirs(config.CHECKPOINT_PATH)
61 | print("Output directory is created")
62 |
63 | # --------------------------------------
64 | # Initialize Model
65 | # --------------------------------------
66 |
67 | model = RhythmNet()
68 |
69 | if torch.cuda.is_available():
70 | print('GPU available... using GPU')
71 | torch.cuda.manual_seed_all(42)
72 | else:
73 | print("GPU not available, using CPU")
74 |
75 | if config.CHECKPOINT_PATH:
76 | checkpoint_path = os.path.join(os.getcwd(), config.CHECKPOINT_PATH)
77 | if not os.path.exists(checkpoint_path):
78 | os.makedirs(checkpoint_path)
79 | print("Output directory is created")
80 |
81 | # device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
82 |
83 | model.to(config.DEVICE)
84 |
85 | optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)
86 | scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
87 | optimizer, factor=0.8, patience=5, verbose=True
88 | )
89 | # loss_fn = nn.L1Loss()
90 | loss_fn = RhythmNetLoss()
91 |
92 | testset = trainset = None
93 |
94 | # Initialize SummaryWriter object
95 | writer = SummaryWriter()
96 |
97 | # Read from a pre-made csv file that contains data divided into folds for cross validation
98 | folds_df = pd.read_csv(config.SAVE_CSV_PATH)
99 |
100 | # Loop for enumerating through folds.
101 | print(f"Details: {len(folds_df['iteration'].unique())} fold training for {config.EPOCHS} Epochs (each video)")
102 | # for k in folds_df['iteration'].unique():
103 | for k in [1]:
104 | # Filter DF
105 | video_files_test = folds_df.loc[(folds_df['iteration'] == k) & (folds_df['set'] == 'V')]
106 | video_files_train = folds_df.loc[(folds_df['iteration'] == k) & (folds_df['set'] == 'T')]
107 |
108 | # Get paths from filtered DF VIPL
109 | video_files_test = [os.path.join(config.ST_MAPS_PATH, video_path.split('/')[-1]) for video_path in
110 | video_files_test["video"].values]
111 | video_files_train = [os.path.join(config.ST_MAPS_PATH, video_path.split('/')[-1]) for video_path in
112 | video_files_train["video"].values]
113 |
114 | # video_files_test = [os.path.join(config.ST_MAPS_PATH, video_path) for video_path in
115 | # video_files_test["video"].values]
116 | # video_files_train = [os.path.join(config.ST_MAPS_PATH, video_path) for video_path in
117 | # video_files_train["video"].values]
118 |
119 | # video_files_train = video_files_train[:32]
120 | # video_files_test = video_files_test[:32]
121 |
122 | # print(f"Reading Current File: {video_files_train[0]}")
123 |
124 | # --------------------------------------
125 | # Build Dataloaders
126 | # --------------------------------------
127 |
128 | train_set = DataLoaderRhythmNet(st_maps_path=video_files_train, target_signal_path=config.TARGET_SIGNAL_DIR)
129 |
130 | train_loader = torch.utils.data.DataLoader(
131 | dataset=train_set,
132 | batch_size=config.BATCH_SIZE,
133 | num_workers=config.NUM_WORKERS,
134 | shuffle=False,
135 | collate_fn=collate_fn
136 | )
137 | print('\nTrain DataLoader constructed successfully!')
138 |
139 | # Code to use multiple GPUs (if available)
140 | if torch.cuda.device_count() > 1:
141 | print("Let's use", torch.cuda.device_count(), "GPUs!")
142 | model = torch.nn.DataParallel(model)
143 |
144 | # --------------------------------------
145 | # Load checkpointed model (if present)
146 | # --------------------------------------
147 | if config.DEVICE == "cpu":
148 | load_on_cpu = True
149 | else:
150 | load_on_cpu = False
151 | model, optimizer, checkpointed_loss, checkpoint_flag = load_model_if_checkpointed(model, optimizer, checkpoint_path, load_on_cpu=load_on_cpu)
152 | if checkpoint_flag:
153 | print(f"Checkpoint Found! Loading from checkpoint :: LOSS={checkpointed_loss}")
154 | else:
155 | print("Checkpoint Not Found! Training from beginning")
156 |
157 | # -----------------------------
158 | # Start training
159 | # -----------------------------
160 |
161 | train_loss_per_epoch = []
162 | for epoch in range(config.EPOCHS):
163 | # short-circuit for evaluation
164 | if k == 1:
165 | break
166 | target_hr_list, predicted_hr_list, train_loss = engine_vipl.train_fn(model, train_loader, optimizer, loss_fn)
167 |
168 | # Save model with final train loss (script to save the best weights?)
169 | if checkpointed_loss != 0.0:
170 | if train_loss < checkpointed_loss:
171 | save_model_checkpoint(model, optimizer, train_loss, checkpoint_path)
172 | checkpointed_loss = train_loss
173 | else:
174 | pass
175 | else:
176 | if len(train_loss_per_epoch) > 0:
177 | if train_loss < min(train_loss_per_epoch):
178 | save_model_checkpoint(model, optimizer, train_loss, checkpoint_path)
179 | else:
180 | save_model_checkpoint(model, optimizer, train_loss, checkpoint_path)
181 |
182 | metrics = compute_criteria(target_hr_list, predicted_hr_list)
183 |
184 | for metric in metrics.keys():
185 | writer.add_scalar(f"Train/{metric}", metrics[metric], epoch)
186 |
187 | print(f"\nFinished [Epoch: {epoch + 1}/{config.EPOCHS}]",
188 | "\nTraining Loss: {:.3f} |".format(train_loss),
189 | "HR_MAE : {:.3f} |".format(metrics["MAE"]),
190 | "HR_RMSE : {:.3f} |".format(metrics["RMSE"]),)
191 | # "Pearsonr : {:.3f} |".format(metrics["Pearson"]), )
192 |
193 | train_loss_per_epoch.append(train_loss)
194 | writer.add_scalar("Loss/train", train_loss, epoch+1)
195 |
196 | # Plots on tensorboard
197 | ba_plot_image = create_plot_for_tensorboard('bland_altman', target_hr_list, predicted_hr_list)
198 | gtvsest_plot_image = create_plot_for_tensorboard('gt_vs_est', target_hr_list, predicted_hr_list)
199 | writer.add_image('BA_plot', ba_plot_image, epoch)
200 | writer.add_image('gtvsest_plot', gtvsest_plot_image, epoch)
201 |
202 | mean_loss = np.mean(train_loss_per_epoch)
203 | # Save the mean_loss value for each video instance to the writer
204 | print(f"Avg Training Loss: {np.mean(mean_loss)} for {config.EPOCHS} epochs")
205 | writer.flush()
206 |
207 | # --------------------------------------
208 | # Load checkpointed model (if present)
209 | # --------------------------------------
210 | if config.DEVICE == "cpu":
211 | load_on_cpu = True
212 | else:
213 | load_on_cpu = False
214 | model, optimizer, checkpointed_loss, checkpoint_flag = load_model_if_checkpointed(model, optimizer,
215 | checkpoint_path,
216 | load_on_cpu=load_on_cpu)
217 | if checkpoint_flag:
218 | print(f"Checkpoint Found! Loading from checkpoint :: LOSS={checkpointed_loss}")
219 | else:
220 | print("Checkpoint Not Found! Training from beginning")
221 |
222 | # -----------------------------
223 | # Start Validation
224 | # -----------------------------
225 | test_set = DataLoaderRhythmNet(st_maps_path=video_files_test, target_signal_path=config.TARGET_SIGNAL_DIR)
226 | test_loader = torch.utils.data.DataLoader(
227 | dataset=test_set,
228 | batch_size=config.BATCH_SIZE,
229 | num_workers=config.NUM_WORKERS,
230 | shuffle=False,
231 | collate_fn=collate_fn
232 | )
233 | print('\nEvaluation DataLoader constructed successfully!')
234 |
235 | print(f"Finished Training, Validating {len(video_files_test)} video files for {config.EPOCHS_TEST} Epochs")
236 |
237 | eval_loss_per_epoch = []
238 | for epoch in range(config.EPOCHS_TEST):
239 | # validation
240 | target_hr_list, predicted_hr_list, test_loss = engine_vipl.eval_fn(model, test_loader, loss_fn)
241 |
242 | # truth_hr_list.append(target)
243 | # estimated_hr_list.append(predicted)
244 | metrics = compute_criteria(target_hr_list, predicted_hr_list)
245 | for metric in metrics.keys():
246 | writer.add_scalar(f"Test/{metric}", metrics[metric], epoch)
247 |
248 | print(f"\nFinished Test [Epoch: {epoch + 1}/{config.EPOCHS_TEST}]",
249 | "\nTest Loss: {:.3f} |".format(test_loss),
250 | "HR_MAE : {:.3f} |".format(metrics["MAE"]),
251 | "HR_RMSE : {:.3f} |".format(metrics["RMSE"]),)
252 |
253 | writer.add_scalar("Loss/test", test_loss, epoch)
254 |
255 | # Plots on tensorboard
256 | ba_plot_image = create_plot_for_tensorboard('bland_altman', target_hr_list, predicted_hr_list)
257 | gtvsest_plot_image = create_plot_for_tensorboard('gt_vs_est', target_hr_list, predicted_hr_list)
258 | writer.add_image('BA_plot', ba_plot_image, epoch)
259 | writer.add_image('gtvsest_plot', gtvsest_plot_image, epoch)
260 |
261 |
262 | # print(f"Avg Validation Loss: {mean_test_loss} for {config.EPOCHS_TEST} epochs")
263 | writer.flush()
264 | # plot_train_test_curves(train_loss_data, test_loss_data, plot_path=config.PLOT_PATH, fold_tag=k)
265 | # Plots on the local storage.
266 | gt_vs_est(target_hr_list, predicted_hr_list, plot_path=config.PLOT_PATH)
267 | bland_altman_plot(target_hr_list, predicted_hr_list, plot_path=config.PLOT_PATH)
268 | writer.close()
269 | print("done")
270 |
271 |
272 | if __name__ == '__main__':
273 | run_training()
274 |
--------------------------------------------------------------------------------
/src/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AnweshCR7/RhythmNet/aa4b336a249af64d0c5e7f516863d6f1f6c285c1/src/models/__init__.py
--------------------------------------------------------------------------------
/src/models/lenet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 |
4 |
5 | class LeNet(nn.Module):
6 | def __init__(self):
7 | super(LeNet, self).__init__()
8 | self.conv1 = nn.Conv2d(1, 20, 5, 1)
9 | self.conv2 = nn.Conv2d(20, 50, 5, 1)
10 | self.fc1 = nn.Linear(4 * 4 * 50, 500)
11 | self.fc2 = nn.Linear(500, 10)
12 |
13 | def forward(self, x):
14 | x = F.relu(self.conv1(x))
15 | x = F.max_pool2d(x, 2, 2)
16 | x = F.relu(self.conv2(x))
17 | x = F.max_pool2d(x, 2, 2)
18 | x = x.view(-1, 4 * 4 * 50)
19 | x = F.relu(self.fc1(x))
20 | x = self.fc2(x)
21 | return x
22 |
23 | def name(self):
24 | return "LeNet"
--------------------------------------------------------------------------------
/src/models/resnet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import math
3 | import torch
4 | import torch.utils.model_zoo as model_zoo
5 |
6 |
7 | def conv3x3(in_planes, out_planes, stride=1):
8 | "3x3 convolution with padding"
9 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
10 | padding=1, bias=False)
11 |
12 |
13 | class BasicBlock(nn.Module):
14 | expansion = 1
15 |
16 | def __init__(self, inplanes, planes, stride=1, downsample=None):
17 | super(BasicBlock, self).__init__()
18 | self.conv1 = conv3x3(inplanes, planes, stride)
19 | self.bn1 = nn.BatchNorm2d(planes)
20 | self.relu = nn.ReLU(inplace=True)
21 | self.conv2 = conv3x3(planes, planes)
22 | self.bn2 = nn.BatchNorm2d(planes)
23 | self.downsample = downsample
24 | self.stride = stride
25 |
26 | def forward(self, x):
27 | residual = x
28 |
29 | out = self.conv1(x)
30 | out = self.bn1(out)
31 | out = self.relu(out)
32 |
33 | out = self.conv2(out)
34 | out = self.bn2(out)
35 |
36 | if self.downsample is not None:
37 | residual = self.downsample(x)
38 |
39 | out += residual
40 | out = self.relu(out)
41 |
42 | return out
43 |
44 |
45 | class Bottleneck(nn.Module):
46 | expansion = 4
47 |
48 | def __init__(self, inplanes, planes, stride=1, downsample=None):
49 | super(Bottleneck, self).__init__()
50 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
51 | self.bn1 = nn.BatchNorm2d(planes)
52 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
53 | padding=1, bias=False)
54 | self.bn2 = nn.BatchNorm2d(planes)
55 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
56 | self.bn3 = nn.BatchNorm2d(planes * 4)
57 | self.relu = nn.ReLU(inplace=True)
58 | self.downsample = downsample
59 | self.stride = stride
60 |
61 | def forward(self, x):
62 | residual = x
63 |
64 | out = self.conv1(x)
65 | out = self.bn1(out)
66 | out = self.relu(out)
67 |
68 | out = self.conv2(out)
69 | out = self.bn2(out)
70 | out = self.relu(out)
71 |
72 | out = self.conv3(out)
73 | out = self.bn3(out)
74 |
75 | if self.downsample is not None:
76 | residual = self.downsample(x)
77 |
78 | out += residual
79 | out = self.relu(out)
80 |
81 | return out
82 |
83 |
84 | class ResNet(nn.Module):
85 |
86 | def __init__(self, block, layers, num_classes=1000):
87 | self.inplanes = 64
88 | super(ResNet, self).__init__()
89 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
90 | bias=False)
91 | self.bn1 = nn.BatchNorm2d(64)
92 | self.relu = nn.ReLU(inplace=True)
93 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
94 | self.layer1 = self._make_layer(block, 64, layers[0])
95 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
96 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
97 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
98 | # Average pooling of 10x1 (which is the image dim after the last layer)
99 | self.avgpool = nn.AvgPool2d((10, 1))
100 | # self.fc = nn.Linear(512 * block.expansion, num_classes)
101 | # This seems forced atm
102 | # 512*batch_size
103 | self.fc = nn.Linear(512, num_classes)
104 | # self.softmax = nn.LogSoftmax(dim=1)
105 |
106 | for m in self.modules():
107 | if isinstance(m, nn.Conv2d):
108 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
109 | m.weight.data.normal_(0, math.sqrt(2. / n))
110 | elif isinstance(m, nn.BatchNorm2d):
111 | m.weight.data.fill_(1)
112 | m.bias.data.zero_()
113 |
114 | def _make_layer(self, block, planes, blocks, stride=1):
115 | downsample = None
116 | if stride != 1 or self.inplanes != planes * block.expansion:
117 | downsample = nn.Sequential(
118 | nn.Conv2d(self.inplanes, planes * block.expansion,
119 | kernel_size=1, stride=stride, bias=False),
120 | nn.BatchNorm2d(planes * block.expansion),
121 | )
122 |
123 | layers = []
124 | layers.append(block(self.inplanes, planes, stride, downsample))
125 | self.inplanes = planes * block.expansion
126 | for i in range(1, blocks):
127 | layers.append(block(self.inplanes, planes))
128 |
129 | return nn.Sequential(*layers)
130 |
131 | def forward(self, x):
132 | x = self.conv1(x)
133 | x = self.bn1(x)
134 | x = self.relu(x)
135 | x = self.maxpool(x)
136 |
137 | x = self.layer1(x)
138 | x = self.layer2(x)
139 | x = self.layer3(x)
140 | x = self.layer4(x)
141 |
142 | x = self.avgpool(x)
143 | x = x.view(x.size(0), -1)
144 | # x = x.flatten()
145 | x = self.fc(x)
146 | # x = self.softmax(x)
147 |
148 | return x
149 |
150 |
151 | def resnet18(pretrained=False, **kwargs):
152 | """Constructs a ResNet-18 model.
153 |
154 | Args:
155 | pretrained (bool): If True, returns a model pre-trained on ImageNet
156 | """
157 | model = ResNet(BasicBlock, [1, 1, 1, 1], **kwargs)
158 | # if pretrained:
159 | # model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
160 | return model
161 |
162 |
163 | if __name__ == '__main__':
164 |
165 | model = resnet18(pretrained=False)
166 | img = torch.rand(10, 3, 300, 25)*255
167 | # target = torch.randint(1, 20, (5, 5))
168 | print(model)
169 | x = model(img)
170 | rnn = nn.GRU(input_size=x.shape[1], hidden_size=1)
171 | output, h_n = rnn(x.unsqueeze(1))
172 | print(resnet18)
--------------------------------------------------------------------------------
/src/models/rhythmNet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.nn import functional as F
4 | import torchvision.models as models
5 | import ssl
6 | import config
7 |
8 | ssl._create_default_https_context = ssl._create_stdlib_context
9 |
10 | '''
11 | Backbone CNN for RhythmNet model is a RestNet-18
12 | '''
13 |
14 |
15 | class RhythmNet(nn.Module):
16 | def __init__(self):
17 | super(RhythmNet, self).__init__()
18 |
19 | # resnet o/p -> bs x 1000
20 | # self.resnet18 = resnet18(pretrained=False)
21 | resnet = models.resnet18(pretrained=False)
22 | modules = list(resnet.children())[:-1]
23 |
24 | self.resnet18 = nn.Sequential(*modules)
25 | # The resnet average pool layer before fc
26 | # self.avgpool = nn.AvgPool2d((10, 1))
27 | self.resnet_linear = nn.Linear(512, 1000)
28 | self.fc_regression = nn.Linear(1000, 1)
29 | self.gru_fc_out = nn.Linear(1000, 1)
30 | self.rnn = nn.GRU(input_size=1000, hidden_size=1000, num_layers=1)
31 | # self.fc = nn.Linear(config.GRU_TEMPORAL_WINDOW, config.GRU_TEMPORAL_WINDOW)
32 |
33 | def forward(self, st_maps, target):
34 | batched_output_per_clip = []
35 | gru_input_per_clip = []
36 | hr_per_clip = []
37 |
38 | # Need to have so as to reflect a batch_size = 1 // if batched then comment out
39 | st_maps = st_maps.unsqueeze(0)
40 | for t in range(st_maps.size(1)):
41 | # with torch.no_grad():
42 | x = self.resnet18(st_maps[:, t, :, :, :])
43 | # collapse dimensions to BSx512 (resnet o/p)
44 | x = x.view(x.size(0), -1)
45 | # output dim: BSx1 and Squeeze sequence length after completing GRU step
46 | x = self.resnet_linear(x)
47 | # Save CNN features per clip for the GRU
48 | gru_input_per_clip.append(x.squeeze(0))
49 |
50 | # Final regression layer for CNN features -> HR (per clip)
51 | x = self.fc_regression(x)
52 | # normalize HR by frame-rate: 25.0 for VIPL
53 | x = x * 25.0
54 | batched_output_per_clip.append(x.squeeze(0))
55 | # input should be (seq_len, batch, input_size)
56 |
57 | # the features extracted from the backbone CNN are fed to a one-layer GRU structure.
58 | regression_output = torch.stack(batched_output_per_clip, dim=0).permute(1, 0)
59 |
60 | # Trying out GRU in addition to the regression now.
61 | gru_input = torch.stack(gru_input_per_clip, dim=0)
62 | gru_output, h_n = self.rnn(gru_input.unsqueeze(1))
63 | # gru_output = gru_output.squeeze(1)
64 | for i in range(gru_output.size(0)):
65 | hr = self.gru_fc_out(gru_output[i, :, :])
66 | hr_per_clip.append(hr.flatten())
67 |
68 | gru_output_seq = torch.stack(hr_per_clip, dim=0).permute(1, 0)
69 | # return output_seq, gru_output.squeeze(0), fc_out
70 | return regression_output, gru_output_seq.squeeze(0)[:6]
71 |
72 | def name(self):
73 | return "RhythmNet"
74 |
75 |
76 | if __name__ == '__main__':
77 | # cm = RhythmNet()
78 | # img = torch.rand(3, 28, 28)
79 | # target = torch.randint(1, 20, (5, 5))
80 | # x = cm(img)
81 | # print(x)
82 | resnet18 = models.resnet18(pretrained=False)
83 | print(resnet18)
84 |
--------------------------------------------------------------------------------
/src/models/simpleCNN.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.nn import functional as F
4 |
5 |
6 | class SimpleCNN(nn.Module):
7 | def __init__(self):
8 | super(SimpleCNN, self).__init__()
9 |
10 | self.fc1 = nn.Linear(28 * 28, 500)
11 | self.fc2 = nn.Linear(500, 256)
12 | self.fc3 = nn.Linear(256, 10)
13 |
14 | def forward(self, x):
15 | x = x.view(-1, 28 * 28)
16 | x = F.relu(self.fc1(x))
17 | x = F.relu(self.fc2(x))
18 | x = self.fc3(x)
19 |
20 | return x
21 |
22 | def name(self):
23 | return "SimpleCNN"
24 |
25 |
26 | if __name__ == '__main__':
27 | cm = SimpleCNN()
28 | img = torch.rand(3, 28, 28)
29 | target = torch.randint(1, 20, (5, 5))
30 | x = cm(img)
31 | print(x)
32 |
--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AnweshCR7/RhythmNet/aa4b336a249af64d0c5e7f516863d6f1f6c285c1/src/utils/__init__.py
--------------------------------------------------------------------------------
/src/utils/dataset.py:
--------------------------------------------------------------------------------
1 | # import albumentations
2 | import torch
3 | import numpy as np
4 | from PIL import Image
5 | from PIL import ImageFile
6 | from torch.utils.data import Dataset
7 | from utils.signal_utils import read_target_data, calculate_hr, get_hr_data
8 |
9 | ImageFile.LOAD_TRUNCATED_IMAGES = True
10 |
11 |
12 | class DataLoaderRhythmNet(Dataset):
13 | """
14 | Dataset class for RhythmNet
15 | """
16 | # The data is now the SpatioTemporal Maps instead of videos
17 |
18 | def __init__(self, st_maps_path, target_signal_path):
19 | self.H = 180
20 | self.W = 180
21 | self.C = 3
22 | # self.video_path = data_path
23 | self.st_maps_path = st_maps_path
24 | # self.resize = resize
25 | self.target_path = target_signal_path
26 | self.maps = None
27 |
28 | mean = (0.485, 0.456, 0.406)
29 | std = (0.229, 0.224, 0.225)
30 | # Maybe add more augmentations
31 | # self.augmentation_pipeline = albumentations.Compose(
32 | # [
33 | # albumentations.Normalize(
34 | # mean, std, max_pixel_value=255.0, always_apply=True
35 | # )
36 | # ]
37 | # )
38 |
39 | def __len__(self):
40 | return len(self.st_maps_path)
41 |
42 | def __getitem__(self, index):
43 | # identify the name of the video file so as to get the ground truth signal
44 | self.video_file_name = self.st_maps_path[index].split('/')[-1].split('.')[0]
45 | # targets, timestamps = read_target_data(self.target_path, self.video_file_name)
46 | # sampling rate is video fps (check)
47 |
48 | # Load the maps for video at 'index'
49 | self.maps = np.load(self.st_maps_path[index])
50 | map_shape = self.maps.shape
51 | self.maps = self.maps.reshape((-1, map_shape[3], map_shape[1], map_shape[2]))
52 |
53 | # target_hr = calculate_hr(targets, timestamps=timestamps)
54 | # target_hr = calculate_hr_clip_wise(map_shape[0], targets, timestamps=timestamps)
55 | target_hr = get_hr_data(self.video_file_name)
56 | # To check the fact that we dont have number of targets greater than the number of maps
57 | # target_hr = target_hr[:map_shape[0]]
58 | self.maps = self.maps[:target_hr.shape[0], :, :, :]
59 | return {
60 | "st_maps": torch.tensor(self.maps, dtype=torch.float),
61 | "target": torch.tensor(target_hr, dtype=torch.float)
62 | }
63 |
--------------------------------------------------------------------------------
/src/utils/generate_fold_csv.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import pandas as pd
3 | import os
4 | import cv2
5 | from sklearn import model_selection
6 | import scipy.io
7 |
8 |
9 | def preprocess_file_name(file_path):
10 | split_by_path = file_path.split('/')
11 | preprocessed_file_name = "_".join(split_by_path[-4:-1])
12 | return os.path.join("vipl_npy", f"{preprocessed_file_name}.npy")
13 |
14 |
15 | def make_csv(fold_data_dict):
16 | # video_file_paths = glob.glob(config.ST_MAPS_PATH + "/**/*.npy")
17 |
18 |
19 | # video_file_paths = glob.glob("/Users/anweshcr7/thesis/src/data/vipl_npy/*.npy")
20 | # video_files = []
21 | #
22 | # for path in video_file_paths:
23 | # split_by_path = path.split('/')
24 | # video_file = os.path.join(split_by_path[-2], split_by_path[-1])
25 | # video_files.append(video_file)
26 | #
27 | # video_files = [x for x in video_files if "source4" not in x]
28 | # num_folds = 5
29 | # kf = model_selection.KFold(n_splits=num_folds)
30 |
31 | col_names = ['video', 'fold']
32 | df = pd.DataFrame(columns=col_names)
33 |
34 | fold = 1
35 |
36 | for idx, fold in enumerate(fold_data_dict.keys()):
37 | video_files_fold = []
38 | fold_subjects = [str(x) for x in fold_data_dict[fold].squeeze(0)]
39 | for subject in fold_subjects:
40 | video_files_fold.extend(glob.glob(f"/Volumes/Backup Plus/vision/VIPL-HR/data/*/p{subject}/*/*/*.avi"))
41 |
42 | # Don't consider NIR videos
43 | video_files_fold = [file_path for file_path in video_files_fold if "source4" not in file_path]
44 | video_files_fold = [preprocess_file_name(file_path) for file_path in video_files_fold]
45 |
46 |
47 | trainDF = pd.DataFrame(video_files_fold, columns=['video'])
48 | trainDF['fold'] = idx + 1
49 |
50 | df = pd.concat([df, trainDF])
51 | df.to_csv("VIPL_folds_final.csv", index=False)
52 |
53 | print("done")
54 |
55 |
56 | # for train_idx, validation_idx in kf.split(video_files):
57 | # trainDF = pd.DataFrame([video_files[idx] for idx in train_idx], columns=['video'])
58 | # validateDF = pd.DataFrame([video_files[idx] for idx in validation_idx], columns=['video'])
59 | # trainDF[['set', 'iteration']] = 'T', fold
60 | # validateDF[['set', 'iteration']] = 'V', fold
61 | # fold += 1
62 | #
63 | # df = pd.concat([df, trainDF, validateDF])
64 | # df.to_csv("VIPL_npy.csv", index=False)
65 |
66 | return
67 |
68 |
69 | def make_csv_with_frame_rate():
70 | # video_file_paths = glob.glob(config.ST_MAPS_PATH + "/**/*.npy")
71 | video_file_paths = glob.glob("/Users/anweshcr7/thesis/src/data/vipl_npy/*.npy")
72 | video_source = "/Volumes/Backup Plus/vision/vipl_videos"
73 | video_files = []
74 | fr_dict = {}
75 |
76 | for path in video_file_paths:
77 | split_by_path = path.split('/')
78 | video_file = os.path.join(split_by_path[-2], split_by_path[-1])
79 | video_files.append(video_file)
80 | video_name = split_by_path[-1].split('.')[0] + ".avi"
81 | cap = cv2.VideoCapture(os.path.join(video_source, video_name))
82 | frameRate = cap.get(5)
83 | fr_dict[video_file] = frameRate
84 | cap.release()
85 |
86 |
87 | video_files = [x for x in video_files if "source4" not in x]
88 | num_folds = 5
89 | kf = model_selection.KFold(n_splits=num_folds)
90 |
91 | col_names = ['video', 'set', 'iteration', 'fps']
92 | df = pd.DataFrame(columns=col_names)
93 |
94 | fold = 1
95 | for train_idx, validation_idx in kf.split(video_files):
96 | trainDF = pd.DataFrame([video_files[idx] for idx in train_idx], columns=['video'])
97 | validateDF = pd.DataFrame([video_files[idx] for idx in validation_idx], columns=['video'])
98 | trainDF[['set', 'iteration']] = 'T', fold
99 | validateDF[['set', 'iteration']] = 'V', fold
100 | trainDF[['fps']] = [fr_dict[video_files[idx]] for idx in train_idx]
101 | validateDF[['fps']] = [fr_dict[video_files[idx]] for idx in validation_idx]
102 | fold += 1
103 |
104 | df = pd.concat([df, trainDF, validateDF])
105 | df.to_csv("VIPL_npy_with_fps.csv", index=False)
106 |
107 | return
108 |
109 |
110 | if __name__ == '__main__':
111 | fold_data_dict = {}
112 | fold_files = glob.glob("/Volumes/Backup Plus/vision/VIPL-HR/fold/*.mat")
113 | for fold in fold_files:
114 | name = fold.split('/')[-1].split('.')[0]
115 | fold_data = scipy.io.loadmat(fold)
116 | fold_data_dict[name] = fold_data[name]
117 | make_csv(fold_data_dict)
118 | print("done")
--------------------------------------------------------------------------------
/src/utils/model_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 |
6 |
7 | def plot_loss(train_loss_data, test_loss_data, plot_path):
8 | if not os.path.exists(plot_path):
9 | os.makedirs(plot_path)
10 |
11 | x_ax = np.arange(1, len(train_loss_data)+1)
12 | fig = plt.figure()
13 | plt.plot(x_ax, train_loss_data, label="train_loss")
14 | plt.plot(x_ax, test_loss_data, label="test_loss")
15 | plt.title('Train-Test Loss')
16 | plt.ylabel('Loss')
17 | plt.xlabel('Num Epoch')
18 | plt.legend(loc='best')
19 | plt.show()
20 | fig.savefig(plot_path+'/train-test_loss.png', dpi=fig.dpi)
21 |
22 |
23 | def save_model_checkpoint(model, optimizer, loss, checkpoint_path):
24 | save_filename = "running_model.pt"
25 | # checkpoint_path = os.path.join(checkpoint_path, save_filename)
26 | if not os.path.exists(checkpoint_path):
27 | os.makedirs(checkpoint_path)
28 |
29 | torch.save({
30 | # 'epoch': epoch,
31 | 'model_state_dict': model.state_dict(),
32 | 'optimizer_state_dict': optimizer.state_dict(),
33 | 'loss': loss,
34 | }, os.path.join(checkpoint_path, save_filename))
35 | print('Saved!')
36 |
37 |
38 | def load_model_if_checkpointed(model, optimizer, checkpoint_path, load_on_cpu=False):
39 | loss = 0.0
40 | checkpoint_flag = False
41 |
42 | # check if checkpoint exists
43 | if os.path.exists(os.path.join(checkpoint_path, "running_model.pt")):
44 | checkpoint_flag = True
45 | if load_on_cpu:
46 | checkpoint = torch.load(os.path.join(checkpoint_path, "running_model.pt"), map_location=torch.device('cpu'))
47 | else:
48 | checkpoint = torch.load(os.path.join(checkpoint_path, "running_model.pt"))
49 |
50 | model.load_state_dict(checkpoint['model_state_dict'])
51 | optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
52 | # epoch = checkpoint['epoch']
53 | loss = checkpoint['loss']
54 |
55 | return model, optimizer, loss, checkpoint_flag
56 |
--------------------------------------------------------------------------------
/src/utils/plot_scripts.py:
--------------------------------------------------------------------------------
1 | import os
2 | import io
3 | import numpy as np
4 | import PIL.Image
5 | from torchvision.transforms import ToTensor
6 | import config as config
7 | # from utils.read_data import plot_signal
8 | import matplotlib.pyplot as plt
9 |
10 |
11 | def plot_train_test_curves(train_loss_data, test_loss_data, plot_path, fold_tag=1):
12 | if not os.path.exists(plot_path):
13 | os.makedirs(plot_path)
14 |
15 | clip = min(len(train_loss_data), len(test_loss_data))
16 | x_ax = np.arange(1, clip + 1)
17 | fig = plt.figure()
18 | plt.plot(x_ax, train_loss_data[:clip], label="train_loss")
19 | plt.plot(x_ax, test_loss_data[:clip], label="test_loss")
20 | plt.title('Train-Test Loss')
21 | plt.ylabel('Loss')
22 | plt.xlabel('Num Epoch')
23 | plt.legend(loc='best')
24 | plt.show()
25 | fig.savefig(plot_path + f'/loss_fold_{fold_tag}.png', dpi=fig.dpi)
26 |
27 |
28 | def gt_vs_est(data1, data2, plot_path=None, tb=False):
29 | data1 = np.asarray(data1)
30 | data2 = np.asarray(data2)
31 | # mean = np.mean([data1, data2], axis=0)
32 | # diff = data1 - data2 # Difference between data1 and data2
33 | # md = np.mean(diff) # Mean of the difference
34 | # sd = np.std(diff, axis=0) # Standard deviation of the difference
35 |
36 | fig = plt.figure()
37 | plt.scatter(data1, data2)
38 | plt.title('true labels vs estimated')
39 | plt.ylabel('estimated HR')
40 | plt.xlabel('true HR')
41 | # plt.axhline(md, color='gray', linestyle='--')
42 | # plt.axhline(md + 1.96*sd, color='gray', linestyle='--')
43 | # plt.axhline(md - 1.96*sd, color='gray', linestyle='--')
44 |
45 | if tb:
46 | buf = io.BytesIO()
47 | plt.savefig(buf, format='png')
48 | buf.seek(0)
49 | return buf
50 |
51 | else:
52 | # plt.show()
53 | fig.savefig(plot_path + f'/true_vs_est.png', dpi=fig.dpi)
54 |
55 |
56 | def bland_altman_plot(data1, data2, plot_path=None, tb=False):
57 | data1 = np.asarray(data1)
58 | data2 = np.asarray(data2)
59 | mean = np.mean([data1, data2], axis=0)
60 | diff = data1 - data2 # Difference between data1 and data2
61 | md = np.mean(diff) # Mean of the difference
62 | sd = np.std(diff, axis=0) # Standard deviation of the difference
63 |
64 | fig = plt.figure()
65 | plt.scatter(mean, diff)
66 | plt.axhline(md, color='gray', linestyle='--')
67 | plt.axhline(md + 1.96 * sd, color='gray', linestyle='--')
68 | plt.axhline(md - 1.96 * sd, color='gray', linestyle='--')
69 |
70 | if tb:
71 | buf = io.BytesIO()
72 | plt.savefig(buf, format='png')
73 | buf.seek(0)
74 | return buf
75 |
76 | else:
77 | # plt.show()
78 | fig.savefig(plot_path + f'/bland-altman_new.png', dpi=fig.dpi)
79 |
80 |
81 | def create_plot_for_tensorboard(plot_name, data1, data2):
82 | if plot_name == "bland_altman":
83 | fig_buf = bland_altman_plot(data1, data2, tb=True)
84 |
85 | if plot_name == "gt_vs_est":
86 | fig_buf = gt_vs_est(data1, data2, tb=True)
87 |
88 | image = PIL.Image.open(fig_buf)
89 | image = ToTensor()(image)
90 |
91 | return image
92 |
93 | #
94 | # def plot_rmse(data, plot_path, fold=0):
95 | # if not os.path.exists(plot_path):
96 | # os.makedirs(plot_path)
97 | #
98 | # x_ax = np.arange(1, len(data)+1)
99 | # fig = plt.figure()
100 | # plt.plot(x_ax, data, label="predicted_HR_RMSE")
101 | # plt.ylabel('RMSE_HR')
102 | # plt.xlabel('Time')
103 | # plt.show()
104 | # fig.savefig(plot_path + f'/RMSE_fold{fold}.png', dpi=fig.dpi)
105 |
106 |
107 | if __name__ == '__main__':
108 | # plot_signal('data/data_preprocessed', 's22_trial05')
109 | gt_vs_est(np.random.random(100), np.random.random(100), plot_path=config.PLOT_PATH)
110 |
--------------------------------------------------------------------------------
/src/utils/signal_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import glob
3 | import numpy as np
4 | import pandas as pd
5 | import torch
6 | from scipy import signal
7 | import heartpy as hp
8 | from tqdm import tqdm
9 | import config as config
10 | import utils.video2st_maps as video2st_maps
11 | import matplotlib.pyplot as plt
12 |
13 |
14 | # To be used for DEAP dataset where the PPG signal is data[38]
15 | def get_ppg_channel(x):
16 | # i think PPG channel is at 38
17 | return x[38]
18 |
19 |
20 | # Reads the clip-wise HR data that was computed and stored in the csv files (per video)
21 | def get_hr_data(file_name):
22 | hr_df = pd.read_csv(config.HR_DATA_PATH + f"{file_name}.csv")
23 |
24 | return hr_df["hr_bpm"].values
25 |
26 |
27 | # Read the raw signal from the ground truth csv and resample.
28 | # Not be needed during the model as we will compute the HRs first-hand and use them directly instead of raw signals
29 | def read_target_data(target_data_path, video_file_name):
30 | signal_data_file_path = os.path.join(target_data_path, f"{video_file_name} PPG.csv")
31 | signal_df = pd.read_csv(signal_data_file_path)
32 |
33 | return signal_df["Signal"].values, signal_df["Time"].values
34 | # In RhythmNet maybe we don't need to resample. CHECK
35 | return filter_and_resample_truth_signal(signal_df, resampling_size=3000)
36 |
37 |
38 | # Function allows filtering and resampling of signals. Not being used for VIPL-HR
39 | def filter_and_resample_truth_signal(signal_df, resampling_size):
40 | # Signal should be bandpass filtered to remove noise outside of expected HR frequency range.
41 | # But we are using CLEANER_PPG signals which are considered filtered.
42 | orignal_sample_rate = hp.get_samplerate_mstimer(signal_df["Time"].values)
43 |
44 | # filtered = hp.filter_signal(signal_df["Signal"].values, [0.7, 2.5], sample_rate=sample_rate,
45 | # order=3, filtertype='bandpass')
46 | resampled_signal = signal.resample(signal_df["Signal"].values, resampling_size, t=signal_df["Time"].values)
47 |
48 | # we'll need to add resampled[1]
49 | return resampled_signal[0], resampled_signal[1]
50 |
51 |
52 | # Returns index of value that is nearest to the arg:value in the arg:array
53 | def find_nearest(array, value):
54 | array = np.asarray(array)
55 | idx = (np.abs(array - value)).argmin()
56 | return idx
57 |
58 |
59 | # Controller Function to compute and store the HR values as csv (HR values measured clip-wise i.e. per st_map per video)
60 | def compute_hr_for_rhythmnet():
61 | data_files = glob.glob(config.TARGET_SIGNAL_DIR + "*.csv")
62 | # for file in tqdm(data_files):
63 | for file in tqdm(data_files[:1]):
64 | file = '/Users/anweshcr7/Downloads/CleanerPPG/VIPL-HR/Cleaned/p41_v7_source2.csv'
65 | signal_df = pd.read_csv(file)
66 | signal_data, timestamps, peak_data = signal_df["Signal"].values, signal_df["Time"].values, signal_df["Peaks"].values
67 | video_path = config.FACE_DATA_DIR + f"{file.split('/')[-1].split('.')[0]}.avi"
68 | video_meta_data = video2st_maps.get_frames_and_video_meta_data(video_path, meta_data_only=True)
69 | # hr_segmentwise = hp.process_segmentwise(signal_df["Signal"].values, sample_rate=128, segment_width=10, segment_overlap=0.951)
70 | # hr_segmentwise = hr_segmentwise[1]["bpm"]
71 | # plt.plot(np.arange(len(hr_segmentwise)), hr_segmentwise)
72 | # plt.show()
73 | npy_path = f"{config.ST_MAPS_PATH}{file.split('/')[-1].split('.')[0]}.npy"
74 | if os.path.exists(npy_path):
75 | video_meta_data["num_maps"] = np.load(f"{config.ST_MAPS_PATH}{file.split('/')[-1].split('.')[0]}.npy").shape[0]
76 | else:
77 | continue
78 | hr = np.asarray(calculate_hr_clip_wise(timestamps, signal_df, video_meta_data), dtype="float32")
79 | file_name = file.split("/")[-1].split(".")[0].split(" ")[0]
80 | hr_df = pd.DataFrame(hr, columns=["hr_bpm"])
81 | hr_df.to_csv(f"../data/hr_csv/{file_name}.csv", index=False)
82 | # print("eheee")
83 |
84 |
85 | # Function to compute and store the HR values as csv (HR values measured clip-wise i.e. per st_map per video)
86 | def calculate_hr_clip_wise(timestamps=None, signal_df=None, video_meta_data=None):
87 |
88 | sliding_window_stride = int((video_meta_data["sliding_window_stride"]/video_meta_data["frame_rate"])*1000)
89 | sliding_window_size_frame = int((config.CLIP_SIZE/video_meta_data["frame_rate"]))
90 | # convert to milliseconds
91 | sliding_window_size = sliding_window_size_frame * 1000
92 | # num_maps = int((video_meta_data["num_frames"] - config.CLIP_SIZE)/sliding_window_size_frame) + 1
93 | num_maps = video_meta_data["num_maps"]
94 | # for i in range(len(timestamps)):
95 | # print(timestamps[i+1]-timestamps[i])
96 | count = 0
97 | hr_list = []
98 | for start_time in range(0, int(timestamps[-1]), sliding_window_stride):
99 | if count == num_maps:
100 | break
101 | # start_index = np.where(timestamps == start_time)
102 | end_time = start_time + sliding_window_size
103 | # end_index = np.where(timestamps == end_time)
104 | start_index = np.searchsorted(timestamps, start_time, side='left')
105 | end_index = np.searchsorted(timestamps, end_time, side='left')
106 |
107 | # start_index = start_index[0][0]
108 | if end_index == 0:
109 | end_index = len(timestamps) - 1
110 | # break
111 |
112 | curr_data = signal_df.iloc[start_index:end_index]
113 | time_intervals = curr_data[curr_data["Peaks"] == 1]["Time"].values
114 | ibi_array = [time_intervals[idx + 1] - time_intervals[idx] for idx, time_val in enumerate(time_intervals[:-1])]
115 | if len(ibi_array) == 0:
116 | hr_bpm = hr_list[-1]
117 | else:
118 | hr_bpm = 1000/np.mean(ibi_array)*60
119 | hr_list.append(hr_bpm)
120 |
121 | count += 1
122 |
123 |
124 | # plt.plot(np.arange(len(hr_list)), hr_list)
125 | # plt.show()
126 | return hr_list
127 |
128 |
129 | # Function to compute HR from raw signal.
130 | def calculate_hr(signal_data, timestamps=None):
131 | sampling_rate = 47.63
132 | if timestamps is not None:
133 | sampling_rate = hp.get_samplerate_mstimer(timestamps)
134 | try:
135 | wd, m = hp.process(signal_data, sample_rate=sampling_rate)
136 | hr_bpm = m["bpm"]
137 | except:
138 | hr_bpm = 75.0
139 |
140 | if np.isnan(hr_bpm):
141 | hr_bpm = 75.0
142 | return hr_bpm
143 |
144 | else:
145 | # We are working with predicted HR:
146 | # need to filter and do other stuff.. lets see
147 | signal_data = hp.filter_signal(signal_data, cutoff=[0.7, 2.5], sample_rate=sampling_rate, order=6,
148 | filtertype='bandpass')
149 | try:
150 | wd, m = hp.process(signal_data, sample_rate=sampling_rate, high_precision=True, clean_rr=True)
151 | hr_bpm = m["bpm"]
152 | except:
153 | print("BadSignal received (could not be filtered) using def HR value = 75bpm")
154 | hr_bpm = 75.0
155 | return hr_bpm
156 |
157 |
158 | if __name__ == '__main__':
159 | compute_hr_for_rhythmnet()
160 |
161 | files = glob.glob(config.HR_DATA_PATH+"/*.csv")
162 | for file in files:
163 | hr = get_hr_data(file.split('/')[-1].split('.')[0])
164 | if type(hr) == np.object_:
165 | print(file)
166 | try:
167 | torch.tensor(hr, dtype=torch.float)
168 | except:
169 | print(file)
--------------------------------------------------------------------------------
/src/utils/video2st_maps.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import os
3 | import glob
4 | import numpy as np
5 | import config as config
6 | from tqdm import tqdm
7 | import matplotlib.pyplot as plt
8 | # used for accessing url to download files
9 | import urllib.request as urlreq
10 | from sklearn import preprocessing
11 | from joblib import Parallel, delayed, parallel_backend
12 | import time
13 |
14 | # download requisite certificates
15 | import ssl;
16 |
17 | ssl._create_default_https_context = ssl._create_stdlib_context
18 |
19 |
20 | # Chunks the ROI into blocks of size 5x5
21 | def chunkify(img, block_width=5, block_height=5):
22 | shape = img.shape
23 | x_len = shape[0] // block_width
24 | y_len = shape[1] // block_height
25 | # print(x_len, y_len)
26 |
27 | chunks = []
28 | x_indices = [i for i in range(0, shape[0] + 1, x_len)]
29 | y_indices = [i for i in range(0, shape[1] + 1, y_len)]
30 |
31 | shapes = list(zip(x_indices, y_indices))
32 |
33 | # # for plotting purpose
34 | # implot = plt.imshow(img)
35 | #
36 | # end_x_list = []
37 | # end_y_list = []
38 |
39 |
40 | for i in range(len(x_indices) - 1):
41 | # try:
42 | start_x = x_indices[i]
43 | end_x = x_indices[i + 1]
44 | for j in range(len(y_indices) - 1):
45 | start_y = y_indices[j]
46 | end_y = y_indices[j+1]
47 | # end_x_list.append(end_x)
48 | # end_y_list.append(end_y)
49 | chunks.append(img[start_x:end_x, start_y:end_y])
50 | # except IndexError:
51 | # print('End of Array')
52 |
53 | return chunks
54 |
55 |
56 | def plot_image(img):
57 | plt.axis("off")
58 | plt.imshow(img, origin='upper')
59 | plt.show()
60 |
61 |
62 | # Downloads xml file for face detection cascade
63 | def get_haarcascade():
64 | haarcascade_url = config.haarcascade_url
65 | haarcascade_filename = haarcascade_url.split('/')[-1]
66 | # chech if file is in working directory
67 | if haarcascade_filename in os.listdir(os.curdir):
68 | # print("xml file already exists")
69 | pass
70 | else:
71 | # download file from url and save locally as haarcascade_frontalface_alt2.xml, < 1MB
72 | urlreq.urlretrieve(haarcascade_url, haarcascade_filename)
73 | print("xml file downloaded")
74 |
75 | return cv2.CascadeClassifier(haarcascade_filename)
76 |
77 | # Downloads xml file for eye detection cascade
78 | def get_eye_cascade():
79 | eye_cascade_url = config.eye_cascade_url
80 | eye_cascade_filename = eye_cascade_url.split('/')[-1]
81 | # chech if file is in working directory
82 | if eye_cascade_filename in os.listdir(os.curdir):
83 | # print("xml file already exists")
84 | pass
85 | else:
86 | # download file from url and save locally as haarcascade_frontalface_alt2.xml, < 1MB
87 | urlreq.urlretrieve(eye_cascade_url, eye_cascade_filename)
88 | print("xml file downloaded")
89 |
90 | return cv2.CascadeClassifier(eye_cascade_filename)
91 |
92 |
93 | # Function to read the the video data as an array of frames and additionally return metadata like FPS, Dims etc.
94 | def get_frames_and_video_meta_data(video_path, meta_data_only=False):
95 | cap = cv2.VideoCapture(video_path)
96 | frameRate = cap.get(5) # frame rate
97 |
98 | # Frame dimensions: WxH
99 | frame_dims = (int(cap.get(3)), int(cap.get(4)))
100 | # Paper mentions a stride of 0.5 seconds = 15 frames
101 | sliding_window_stride = int(frameRate / 2)
102 | num_frames = int(cap.get(7))
103 | if meta_data_only:
104 | return {"frame_rate": frameRate, "sliding_window_stride": sliding_window_stride, "num_frames": num_frames}
105 |
106 | # Frames from the video have shape NumFrames x H x W x C
107 | frames = np.zeros((num_frames, frame_dims[1], frame_dims[0], 3), dtype='uint8')
108 |
109 | frame_counter = 0
110 | while cap.isOpened():
111 | # curr_frame_id = int(cap.get(1)) # current frame number
112 | ret, frame = cap.read()
113 | if not ret:
114 | break
115 |
116 | frames[frame_counter, :, :, :] = frame
117 | frame_counter += 1
118 | if frame_counter == num_frames:
119 | break
120 |
121 | cap.release()
122 | return frames, frameRate, sliding_window_stride
123 |
124 |
125 | # Threaded function for st_map generation from a single video arg:file in dataset
126 | def get_spatio_temporal_map_threaded(file):
127 | # print(f"Generating Maps for file: {file}")
128 | # maps = np.zeros((10, config.CLIP_SIZE, 25, 3))
129 | # print(index)
130 | maps = preprocess_video_to_st_maps(
131 | video_path=file,
132 | output_shape=(180, 180), clip_size=config.CLIP_SIZE)
133 |
134 | if maps is None:
135 | return 1
136 |
137 | file_name = file.split('/')[-1].split('.')[0]
138 | folder_name = file.split('/')[-2]
139 | save_path = os.path.join(config.ST_MAPS_PATH, folder_name)
140 | if not os.path.exists(save_path):
141 | os.makedirs(save_path)
142 | save_path = os.path.join(save_path, f"{file_name}.npy")
143 | # np.save(f"{config.ST_MAPS_PATH}{file_name}.npy", maps)
144 | np.save(save_path, maps)
145 | return 1
146 |
147 |
148 | # Threaded wrapper function for st_maps from all videos that calls the threaded func in a parallel fashion
149 | def get_spatio_temporal_map_threaded_wrapper():
150 | video_files = glob.glob(config.FACE_DATA_DIR + '*avi')
151 | # video_files = video_files[:10]
152 | less_than_ten = ['/Volumes/T7/vipl_videos/p19_v2_source2.avi', '/Volumes/T7/vipl_videos/p32_v7_source3.avi', '/Volumes/T7/vipl_videos/p32_v7_source4.avi', '/Volumes/T7/vipl_videos/p40_v7_source2.avi', '/Volumes/T7/vipl_videos/p22_v3_source1.avi']
153 | video_files = [file for file in video_files if file not in less_than_ten]
154 | start = time.time()
155 | with parallel_backend("loky", inner_max_num_threads=4):
156 | Parallel(n_jobs=3)(delayed(get_spatio_temporal_map_threaded)(file) for file in tqdm(video_files))
157 | end = time.time()
158 |
159 | print('{:.4f} s'.format(end - start))
160 |
161 |
162 | # function for st_map generation from all videos in dataset
163 | def get_spatio_temporal_map():
164 | video_files = glob.glob(config.FACE_DATA_DIR + '*avi')
165 | # video_files = video_files[100:110]
166 | # video_files = ['/Volumes/Backup Plus/vision/vipl_videos/p10_v1_source1.avi', '/Volumes/Backup Plus/vision/vipl_videos/p10_v1_source2.avi']
167 | # video_files = ['/Volumes/Backup Plus/vision/DEAP_emotion/face_video/s01/s01_trial01.avi']
168 | start = time.time()
169 | for file in tqdm(video_files):
170 | # maps = np.zeros((1, config.CLIP_SIZE, 25, 3))
171 | # for index in range(1):
172 | # print(index)
173 | if os.path.exists(f"{config.ST_MAPS_PATH}{file.split('/')[-1].split('.')[0]}.npy"):
174 | map = np.load(f"{config.ST_MAPS_PATH}{file.split('/')[-1].split('.')[0]}.npy")
175 | if (map.shape[0]) > 125:
176 | print(f"\nFilename:{file} | num maps: {map.shape[0]}")
177 | else:
178 | continue
179 | # maps = preprocess_video_to_st_maps(
180 | # video_path=file,
181 | # output_shape=(180, 180), clip_size=config.CLIP_SIZE)
182 | # if maps is None:
183 | # continue
184 | optimized_end = time.time()
185 | # print('{:.4f} s'.format((optimized_end - start)/60))
186 |
187 | # file_name = file.split('/')[-1].split('.')[0]
188 | # folder_name = file.split('/')[-2]
189 | # save_path = os.path.join(config.ST_MAPS_PATH, folder_name)
190 | # if not os.path.exists(save_path):
191 | # os.makedirs(save_path)
192 | # save_path = os.path.join(save_path, f"{file_name}.npy")
193 | # # np.save(f"{config.ST_MAPS_PATH}{file_name}.npy", maps)
194 | # np.save(save_path, maps)
195 |
196 | end = time.time()
197 | print('{:.4f} s'.format(end - start))
198 | # return maps
199 |
200 |
201 | # Optimized function for converting videos to Spatio-temporal maps
202 | def preprocess_video_to_st_maps(video_path, output_shape, clip_size):
203 | frames, frameRate, sliding_window_stride = get_frames_and_video_meta_data(video_path)
204 |
205 | num_frames = frames.shape[0]
206 | output_shape = (frames.shape[1], frames.shape[2])
207 | num_maps = int((num_frames - clip_size)/sliding_window_stride + 1)
208 | if num_maps < 0:
209 | # print(num_maps)
210 | print(video_path)
211 | return None
212 |
213 | # stacked_maps is the all the st maps for a given video (=num_maps) stacked.
214 | stacked_maps = np.zeros((num_maps, config.CLIP_SIZE, 25, 3))
215 | # processed_maps will contain all the data after processing each frame, but not yet converted into maps
216 | processed_maps = np.zeros((num_frames, 25, 3))
217 | # processed_frames = np.zeros((num_frames, output_shape[0], output_shape[1], 3))
218 | processed_frames = []
219 | map_index = 0
220 |
221 | # Init scaler and detector
222 | min_max_scaler = preprocessing.MinMaxScaler()
223 | detector = get_haarcascade()
224 | eye_detector = get_eye_cascade()
225 |
226 | # First we process all the frames and then work with sliding window to save repeated processing for the same frame index
227 | for idx, frame in enumerate(frames):
228 | # spatio_temporal_map = np.zeros((fr, 25, 3))
229 | '''
230 | Preprocess the Image
231 | Step 1: Use cv2 face detector based on Haar cascades
232 | Step 2: Crop the frame based on the face co-ordinates (we need to do 160%)
233 | Step 3: Downsample the face cropped frame to output_shape = 36x36
234 | '''
235 | faces = detector.detectMultiScale(frame, 1.3, 5)
236 | if len(faces) is not 0:
237 | (x, y, w, d) = faces[0]
238 | frame_cropped = frame[y:(y + d), x:(x + w)]
239 | eyes = eye_detector.detectMultiScale(frame_cropped, 1.2, 3)
240 | # if len(eyes) > 0:
241 | # # for having the same radius in both eyes
242 | # (eye_x, eye_y, eye_w, eye_h) = eyes[0]
243 | # eye_radius = (eye_w + eye_h) // 5
244 | # mask = np.ones(frame_cropped.shape[:2], dtype="uint8")
245 | # for (ex, ey, ew, eh) in eyes[:2]:
246 | # eye_center = (ex + ew // 2, ey + eh // 2)
247 | # # if eye_radius
248 | # cv2.circle(mask, eye_center, eye_radius, 0, -1)
249 | # # eh = int(0.8*eh)
250 | # # ew = int(0.8*ew)
251 | # # cv2.rectangle(mask, (ex, ey), (ex+ew, ey+eh), 0, -1)
252 | #
253 | # frame_masked = cv2.bitwise_and(frame_cropped, frame_cropped, mask=mask)
254 | # else:
255 | # frame_masked = frame_cropped
256 | # # plot_image(frame_masked)
257 |
258 | frame_masked = frame_cropped
259 | else:
260 | # The problemis that this doesn't get cropped :/
261 | # (x, y, w, d) = (308, 189, 215, 215)
262 | # frame_masked = frame[y:(y + d), x:(x + w)]
263 |
264 | # print("face detection failed, image frame will be masked")
265 | mask = np.zeros(frame.shape[:2], dtype="uint8")
266 | frame_masked = cv2.bitwise_and(frame, frame, mask=mask)
267 | # plot_image(frame_masked)
268 |
269 | # frame_cropped = frame[y:(y + d), x:(x + w)]
270 |
271 | try:
272 | # frame_resized = cv2.resize(frame_masked, output_shape, interpolation=cv2.INTER_CUBIC)
273 | frame_resized = cv2.cvtColor(frame_masked, cv2.COLOR_BGR2YUV)
274 |
275 | except:
276 | print('\n--------- ERROR! -----------\nUsual cv empty error')
277 | print(f'Shape of img1: {frame.shape}')
278 | # print(f'bbox: {bbox}')
279 | print(f'This is at idx: {idx}')
280 | exit(666)
281 |
282 | processed_frames.append(frame_resized)
283 | # roi_blocks = chunkify(frame_resized)
284 | # for block_idx, block in enumerate(roi_blocks):
285 | # avg_pixels = cv2.mean(block)
286 | # processed_maps[idx, block_idx, 0] = avg_pixels[0]
287 | # processed_maps[idx, block_idx, 1] = avg_pixels[1]
288 | # processed_maps[idx, block_idx, 2] = avg_pixels[2]
289 |
290 | # At this point we have the processed maps from all the frames in a video and now we do the sliding window part.
291 | for start_frame_index in range(0, num_frames, sliding_window_stride):
292 | end_frame_index = start_frame_index + clip_size
293 | if end_frame_index > num_frames:
294 | break
295 | # # print(f"start_idx: {start_frame_index} | end_idx: {end_frame_index}")
296 | spatio_temporal_map = np.zeros((clip_size, 25, 3))
297 | #
298 | # spatio_temporal_map = processed_maps[start_frame_index:end_frame_index, :, :]
299 |
300 |
301 | for idx, frame in enumerate(processed_frames[start_frame_index:end_frame_index]):
302 | roi_blocks = chunkify(frame)
303 | for block_idx, block in enumerate(roi_blocks):
304 | avg_pixels = cv2.mean(block)
305 | spatio_temporal_map[idx, block_idx, 0] = avg_pixels[0]
306 | spatio_temporal_map[idx, block_idx, 1] = avg_pixels[1]
307 | spatio_temporal_map[idx, block_idx, 2] = avg_pixels[2]
308 |
309 | for block_idx in range(spatio_temporal_map.shape[1]):
310 | # Not sure about uint8
311 | fn_scale_0_255 = lambda x: (x * 255.0).astype(np.uint8)
312 | scaled_channel_0 = min_max_scaler.fit_transform(spatio_temporal_map[:, block_idx, 0].reshape(-1, 1))
313 | spatio_temporal_map[:, block_idx, 0] = fn_scale_0_255(scaled_channel_0.flatten())
314 | scaled_channel_1 = min_max_scaler.fit_transform(spatio_temporal_map[:, block_idx, 1].reshape(-1, 1))
315 | spatio_temporal_map[:, block_idx, 1] = fn_scale_0_255(scaled_channel_1.flatten())
316 | scaled_channel_2 = min_max_scaler.fit_transform(spatio_temporal_map[:, block_idx, 2].reshape(-1, 1))
317 | spatio_temporal_map[:, block_idx, 2] = fn_scale_0_255(scaled_channel_2.flatten())
318 |
319 | stacked_maps[map_index, :, :, :] = spatio_temporal_map
320 | map_index += 1
321 |
322 | return stacked_maps
323 |
324 | # UNOPTIMIZED CODE
325 | # def get_st_maps(video_path, output_shape, clip_size):
326 | # frames, frameRate, sliding_window_stride = get_frames_and_video_meta_data(video_path)
327 | #
328 | # num_frames = frames.shape[0]
329 | # num_maps = int((num_frames - clip_size)/sliding_window_stride + 1)
330 | # maps = np.zeros((num_maps, config.CLIP_SIZE, 25, 3))
331 | # map_index = 0
332 | #
333 | # # Init scaler and detector
334 | # min_max_scaler = preprocessing.MinMaxScaler()
335 | # detector = get_haarcascade()
336 | # eye_detector = get_eye_cascade()
337 | #
338 | # for start_frame_index in tqdm(range(0, num_frames, sliding_window_stride)):
339 | # end_frame_index = start_frame_index + clip_size
340 | # if end_frame_index > 400:
341 | # break
342 | # # print(f"start_idx: {start_frame_index} | end_idx: {end_frame_index}")
343 | # spatio_temporal_map = np.zeros((clip_size, 25, 3))
344 | #
345 | # frames_in_clip = frames[start_frame_index:end_frame_index]
346 | #
347 | # for idx, frame in enumerate(frames_in_clip):
348 | # '''
349 | # Preprocess the Image
350 | # Step 1: Use cv2 face detector based on Haar cascades
351 | # Step 2: Crop the frame based on the face co-ordinates (we need to do 160%)
352 | # Step 3: Downsample the face cropped frame to output_shape = 36x36
353 | # '''
354 | # faces = detector.detectMultiScale(frame, 1.3, 5)
355 | # if len(faces) is not 0:
356 | # (x, y, w, d) = faces[0]
357 | # frame_cropped = frame[y:(y + d), x:(x + w)]
358 | # eyes = eye_detector.detectMultiScale(frame_cropped, 1.2, 3)
359 | # if len(eyes) > 0:
360 | # # for having the same radius in both eyes
361 | # (eye_x, eye_y, eye_w, eye_h) = eyes[0]
362 | # eye_radius = (eye_w + eye_h) // 5
363 | # mask = np.ones(frame_cropped.shape[:2], dtype="uint8")
364 | # for (ex, ey, ew, eh) in eyes[:2]:
365 | # eye_center = (ex + ew // 2, ey + eh // 2)
366 | # # if eye_radius
367 | # cv2.circle(mask, eye_center, eye_radius, 0, -1)
368 | # # eh = int(0.8*eh)
369 | # # ew = int(0.8*ew)
370 | # # cv2.rectangle(mask, (ex, ey), (ex+ew, ey+eh), 0, -1)
371 | #
372 | # frame_masked = cv2.bitwise_and(frame_cropped, frame_cropped, mask=mask)
373 | # else:
374 | # frame_masked = frame_cropped
375 | # # plot_image(frame_masked)
376 | # else:
377 | # # The problemis that this doesn't get cropped :/
378 | # # (x, y, w, d) = (308, 189, 215, 215)
379 | # # frame_masked = frame[y:(y + d), x:(x + w)]
380 | #
381 | # # print("face detection failed, image frame will be masked")
382 | # mask = np.zeros(frame.shape[:2], dtype="uint8")
383 | # frame_masked = cv2.bitwise_and(frame, frame, mask=mask)
384 | # # plot_image(frame_masked)
385 | #
386 | # # frame_cropped = frame[y:(y + d), x:(x + w)]
387 | #
388 | # try:
389 | # frame_resized = cv2.resize(frame_masked, output_shape, interpolation=cv2.INTER_CUBIC)
390 | # frame_resized = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2YUV)
391 | #
392 | # except:
393 | # print('\n--------- ERROR! -----------\nUsual cv empty error')
394 | # print(f'Shape of img1: {frame.shape}')
395 | # # print(f'bbox: {bbox}')
396 | # print(f'This is at idx: {idx}')
397 | # exit(666)
398 | #
399 | # roi_blocks = chunkify(frame_resized)
400 | # for block_idx, block in enumerate(roi_blocks):
401 | # avg_pixels = cv2.mean(block)
402 | # spatio_temporal_map[idx, block_idx, 0] = avg_pixels[0]
403 | # spatio_temporal_map[idx, block_idx, 1] = avg_pixels[1]
404 | # spatio_temporal_map[idx, block_idx, 2] = avg_pixels[2]
405 | #
406 | # print('he;;p')
407 | #
408 | # for block_idx in range(spatio_temporal_map.shape[1]):
409 | # # Not sure about uint8
410 | # fn_scale_0_255 = lambda x: (x * 255.0)
411 | # scaled_channel_0 = min_max_scaler.fit_transform(spatio_temporal_map[:, block_idx, 0].reshape(-1, 1))
412 | # spatio_temporal_map[:, block_idx, 0] = fn_scale_0_255(scaled_channel_0.flatten())
413 | # scaled_channel_1 = min_max_scaler.fit_transform(spatio_temporal_map[:, block_idx, 1].reshape(-1, 1))
414 | # spatio_temporal_map[:, block_idx, 1] = fn_scale_0_255(scaled_channel_1.flatten())
415 | # scaled_channel_2 = min_max_scaler.fit_transform(spatio_temporal_map[:, block_idx, 2].reshape(-1, 1))
416 | # spatio_temporal_map[:, block_idx, 2] = fn_scale_0_255(scaled_channel_2.flatten())
417 | #
418 | # maps[map_index, :, :, :] = spatio_temporal_map
419 | # map_index += 1
420 | #
421 | # return maps
422 |
423 |
424 | if __name__ == '__main__':
425 | # get_frames_and_video_meta_data('/Volumes/T7/vipl_videos/p58_v4_source3.avi')
426 | # get_spatio_temporal_map()
427 | # get_spatio_temporal_map_threaded_wrapper()
428 | # video_files = glob.glob(config.FACE_DATA_DIR + '/**/*avi')
429 | # r = list(process_map(get_spatio_temporal_map_threaded, video_files[:2], max_workers=1))
430 | # signal = read_target_data("/Users/anweshcr7/github/RhythmNet/data/data_preprocessed/", "s01_trial04")
431 | #
432 | # resampled = signal.resample(df["Signal"].values, 3000, df["Time"].values)
433 | # resampled_sample_rate = hp.get_samplerate_mstimer(resampled[1])
434 | # print(calculate_hr(resampled[0], resampled_sample_rate))
435 |
436 | # make_csv_with_frame_rate()
437 | print('done')
--------------------------------------------------------------------------------