├── pose-estimation ├── datasets │ ├── x │ ├── coco.py │ └── transformations.py ├── modules │ ├── loss.py │ ├── get_parameters.py │ ├── conv.py │ ├── load_state.py │ ├── one_euro_filter.py │ ├── pose.py │ └── keypoints.py ├── readme.md ├── scripts │ ├── convert_to_onnx.py │ ├── make_val_subset.py │ └── prepare_train_labels.py ├── models │ └── with_mobilenet.py ├── demo.py ├── val.py └── train.py ├── emotion-detection ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── vgg.cpython-37.pyc │ │ ├── resnet.cpython-37.pyc │ │ └── __init__.cpython-37.pyc │ ├── vgg.py │ └── resnet.py ├── images │ ├── emojis │ │ ├── Sad.png │ │ ├── Angry.png │ │ ├── Fear.png │ │ ├── Happy.png │ │ ├── Disgust.png │ │ ├── Neutral.png │ │ └── Surprise.png │ └── results │ │ ├── Happy-Result.png │ │ └── Neutral-Result.png ├── BlazeFace_PyTorch │ ├── anchors.npy │ ├── blazeface.pth │ ├── __pycache__ │ │ └── blazeface.cpython-37.pyc │ └── blazeface.py ├── emotion_taker.py ├── emotion_detection.py ├── visualize.py ├── functional.py └── transforms.py ├── Classroom.png ├── Pose-Detection.png ├── finger-print-sensor.png ├── BlockDiagram-Classroom.png ├── LICENSE ├── README.md ├── audio_to_text.py └── attendance-code.txt /pose-estimation/datasets/x: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /emotion-detection/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .vgg import * 2 | from .resnet import * -------------------------------------------------------------------------------- /Classroom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/Classroom.png -------------------------------------------------------------------------------- /Pose-Detection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/Pose-Detection.png -------------------------------------------------------------------------------- /finger-print-sensor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/finger-print-sensor.png -------------------------------------------------------------------------------- /BlockDiagram-Classroom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/BlockDiagram-Classroom.png -------------------------------------------------------------------------------- /emotion-detection/images/emojis/Sad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Sad.png -------------------------------------------------------------------------------- /emotion-detection/images/emojis/Angry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Angry.png -------------------------------------------------------------------------------- /emotion-detection/images/emojis/Fear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Fear.png -------------------------------------------------------------------------------- /emotion-detection/images/emojis/Happy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Happy.png -------------------------------------------------------------------------------- /emotion-detection/images/emojis/Disgust.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Disgust.png -------------------------------------------------------------------------------- /emotion-detection/images/emojis/Neutral.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Neutral.png -------------------------------------------------------------------------------- /emotion-detection/images/emojis/Surprise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Surprise.png -------------------------------------------------------------------------------- /emotion-detection/BlazeFace_PyTorch/anchors.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/BlazeFace_PyTorch/anchors.npy -------------------------------------------------------------------------------- /emotion-detection/BlazeFace_PyTorch/blazeface.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/BlazeFace_PyTorch/blazeface.pth -------------------------------------------------------------------------------- /emotion-detection/images/results/Happy-Result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/results/Happy-Result.png -------------------------------------------------------------------------------- /emotion-detection/images/results/Neutral-Result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/results/Neutral-Result.png -------------------------------------------------------------------------------- /emotion-detection/models/__pycache__/vgg.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/models/__pycache__/vgg.cpython-37.pyc -------------------------------------------------------------------------------- /emotion-detection/models/__pycache__/resnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/models/__pycache__/resnet.cpython-37.pyc -------------------------------------------------------------------------------- /emotion-detection/models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /pose-estimation/modules/loss.py: -------------------------------------------------------------------------------- 1 | def l2_loss(input, target, mask, batch_size): 2 | loss = (input - target) * mask 3 | loss = (loss * loss) / 2 / batch_size 4 | 5 | return loss.sum() 6 | -------------------------------------------------------------------------------- /emotion-detection/BlazeFace_PyTorch/__pycache__/blazeface.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/BlazeFace_PyTorch/__pycache__/blazeface.cpython-37.pyc -------------------------------------------------------------------------------- /pose-estimation/readme.md: -------------------------------------------------------------------------------- 1 | The pretrained-weights for this model are available in this [link](https://drive.google.com/drive/folders/1GQYH4M5X5gclQo5k9pBID9n0j7XvMdrZ?usp=sharing). 2 | In order to run successfully, make sure you put it in a file with the same name here. 3 | -------------------------------------------------------------------------------- /emotion-detection/emotion_taker.py: -------------------------------------------------------------------------------- 1 | import visualize 2 | emotion_array = visualize.visualizer() 3 | emotion_array = np.array(emotion_array) 4 | emotion_array = (emotion_array/sum(emotion_array))*100 5 | 6 | plt.rcParams['figure.figsize'] = (13.5,5.5) 7 | for i in range(len(emotion_array)): 8 | axes = plt.subplot(2, 4, i) 9 | emojis_img = io.imread('images/emojis/%s.png' % str(class_names[i])) 10 | plt.imshow(emojis_img) 11 | plt.xlabel(str(emotion_array(i)), fontsize=16) 12 | axes.set_xticks([]) 13 | axes.set_yticks([]) 14 | plt.tight_layout() 15 | plt.savefig(os.path.join('images/results/{}.png'.format(i+1))) 16 | plt.close() 17 | -------------------------------------------------------------------------------- /pose-estimation/modules/get_parameters.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | def get_parameters(model, predicate): 5 | for module in model.modules(): 6 | for param_name, param in module.named_parameters(): 7 | if predicate(module, param_name): 8 | yield param 9 | 10 | 11 | def get_parameters_conv(model, name): 12 | return get_parameters(model, lambda m, p: isinstance(m, nn.Conv2d) and m.groups == 1 and p == name) 13 | 14 | 15 | def get_parameters_conv_depthwise(model, name): 16 | return get_parameters(model, lambda m, p: isinstance(m, nn.Conv2d) 17 | and m.groups == m.in_channels 18 | and m.in_channels == m.out_channels 19 | and p == name) 20 | 21 | 22 | def get_parameters_bn(model, name): 23 | return get_parameters(model, lambda m, p: isinstance(m, nn.BatchNorm2d) and p == name) 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Vineeth Raj B 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pose-estimation/scripts/convert_to_onnx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | 5 | from models.with_mobilenet import PoseEstimationWithMobileNet 6 | from modules.load_state import load_state 7 | 8 | 9 | def convert_to_onnx(net, output_name): 10 | input = torch.randn(1, 3, 256, 456) 11 | input_names = ['data'] 12 | output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs', 13 | 'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs'] 14 | 15 | torch.onnx.export(net, input, output_name, verbose=True, input_names=input_names, output_names=output_names) 16 | 17 | 18 | if __name__ == '__main__': 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint') 21 | parser.add_argument('--output-name', type=str, default='human-pose-estimation.onnx', 22 | help='name of output model in ONNX format') 23 | args = parser.parse_args() 24 | 25 | net = PoseEstimationWithMobileNet() 26 | checkpoint = torch.load(args.checkpoint_path) 27 | load_state(net, checkpoint) 28 | 29 | convert_to_onnx(net, args.output_name) 30 | -------------------------------------------------------------------------------- /pose-estimation/modules/conv.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | def conv(in_channels, out_channels, kernel_size=3, padding=1, bn=True, dilation=1, stride=1, relu=True, bias=True): 5 | modules = [nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias)] 6 | if bn: 7 | modules.append(nn.BatchNorm2d(out_channels)) 8 | if relu: 9 | modules.append(nn.ReLU(inplace=True)) 10 | return nn.Sequential(*modules) 11 | 12 | 13 | def conv_dw(in_channels, out_channels, kernel_size=3, padding=1, stride=1, dilation=1): 14 | return nn.Sequential( 15 | nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, dilation=dilation, groups=in_channels, bias=False), 16 | nn.BatchNorm2d(in_channels), 17 | nn.ReLU(inplace=True), 18 | 19 | nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False), 20 | nn.BatchNorm2d(out_channels), 21 | nn.ReLU(inplace=True), 22 | ) 23 | 24 | 25 | def conv_dw_no_bn(in_channels, out_channels, kernel_size=3, padding=1, stride=1, dilation=1): 26 | return nn.Sequential( 27 | nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, dilation=dilation, groups=in_channels, bias=False), 28 | nn.ELU(inplace=True), 29 | 30 | nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False), 31 | nn.ELU(inplace=True), 32 | ) 33 | -------------------------------------------------------------------------------- /pose-estimation/modules/load_state.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | 4 | def load_state(net, checkpoint): 5 | source_state = checkpoint['state_dict'] 6 | target_state = net.state_dict() 7 | new_target_state = collections.OrderedDict() 8 | for target_key, target_value in target_state.items(): 9 | if target_key in source_state and source_state[target_key].size() == target_state[target_key].size(): 10 | new_target_state[target_key] = source_state[target_key] 11 | else: 12 | new_target_state[target_key] = target_state[target_key] 13 | print('[WARNING] Not found pre-trained parameters for {}'.format(target_key)) 14 | 15 | net.load_state_dict(new_target_state) 16 | 17 | 18 | def load_from_mobilenet(net, checkpoint): 19 | source_state = checkpoint['state_dict'] 20 | target_state = net.state_dict() 21 | new_target_state = collections.OrderedDict() 22 | for target_key, target_value in target_state.items(): 23 | k = target_key 24 | if k.find('model') != -1: 25 | k = k.replace('model', 'module.model') 26 | if k in source_state and source_state[k].size() == target_state[target_key].size(): 27 | new_target_state[target_key] = source_state[k] 28 | else: 29 | new_target_state[target_key] = target_state[target_key] 30 | print('[WARNING] Not found pre-trained parameters for {}'.format(target_key)) 31 | 32 | net.load_state_dict(new_target_state) 33 | -------------------------------------------------------------------------------- /emotion-detection/models/vgg.py: -------------------------------------------------------------------------------- 1 | '''VGG11/13/16/19 in Pytorch.''' 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | 7 | 8 | cfg = { 9 | 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 10 | 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 11 | 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 12 | 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 13 | } 14 | 15 | 16 | class VGG(nn.Module): 17 | def __init__(self, vgg_name): 18 | super(VGG, self).__init__() 19 | self.features = self._make_layers(cfg[vgg_name]) 20 | self.classifier = nn.Linear(512, 7) 21 | 22 | def forward(self, x): 23 | out = self.features(x) 24 | out = out.view(out.size(0), -1) 25 | out = F.dropout(out, p=0.5, training=self.training) 26 | out = self.classifier(out) 27 | return out 28 | 29 | def _make_layers(self, cfg): 30 | layers = [] 31 | in_channels = 3 32 | for x in cfg: 33 | if x == 'M': 34 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 35 | else: 36 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), 37 | nn.BatchNorm2d(x), 38 | nn.ReLU(inplace=True)] 39 | in_channels = x 40 | layers += [nn.AvgPool2d(kernel_size=1, stride=1)] 41 | return nn.Sequential(*layers) 42 | -------------------------------------------------------------------------------- /pose-estimation/modules/one_euro_filter.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def get_alpha(rate=30, cutoff=1): 5 | tau = 1 / (2 * math.pi * cutoff) 6 | te = 1 / rate 7 | return 1 / (1 + tau / te) 8 | 9 | 10 | class LowPassFilter: 11 | def __init__(self): 12 | self.x_previous = None 13 | 14 | def __call__(self, x, alpha=0.5): 15 | if self.x_previous is None: 16 | self.x_previous = x 17 | return x 18 | x_filtered = alpha * x + (1 - alpha) * self.x_previous 19 | self.x_previous = x_filtered 20 | return x_filtered 21 | 22 | 23 | class OneEuroFilter: 24 | def __init__(self, freq=15, mincutoff=1, beta=0.05, dcutoff=1): 25 | self.freq = freq 26 | self.mincutoff = mincutoff 27 | self.beta = beta 28 | self.dcutoff = dcutoff 29 | self.filter_x = LowPassFilter() 30 | self.filter_dx = LowPassFilter() 31 | self.x_previous = None 32 | self.dx = None 33 | 34 | def __call__(self, x): 35 | if self.dx is None: 36 | self.dx = 0 37 | else: 38 | self.dx = (x - self.x_previous) * self.freq 39 | dx_smoothed = self.filter_dx(self.dx, get_alpha(self.freq, self.dcutoff)) 40 | cutoff = self.mincutoff + self.beta * abs(dx_smoothed) 41 | x_filtered = self.filter_x(x, get_alpha(self.freq, cutoff)) 42 | self.x_previous = x 43 | return x_filtered 44 | 45 | 46 | if __name__ == '__main__': 47 | filter = OneEuroFilter(freq=15, beta=0.1) 48 | for val in range(10): 49 | x = val + (-1)**(val % 2) 50 | x_filtered = filter(x) 51 | print(x_filtered, x) 52 | -------------------------------------------------------------------------------- /pose-estimation/scripts/make_val_subset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import random 4 | 5 | 6 | if __name__ == '__main__': 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--labels', type=str, required=True, help='path to json with keypoints val labels') 9 | parser.add_argument('--output-name', type=str, default='val_subset.json', 10 | help='name of output file with subset of val labels') 11 | parser.add_argument('--num-images', type=int, default=250, help='number of images in subset') 12 | args = parser.parse_args() 13 | 14 | with open(args.labels, 'r') as f: 15 | data = json.load(f) 16 | 17 | random.seed(0) 18 | total_val_images = 5000 19 | idxs = list(range(total_val_images)) 20 | random.shuffle(idxs) 21 | 22 | images_by_id = {} 23 | for idx in idxs[:args.num_images]: 24 | images_by_id[data['images'][idx]['id']] = data['images'][idx] 25 | 26 | annotations_by_image_id = {} 27 | for annotation in data['annotations']: 28 | if annotation['image_id'] in images_by_id: 29 | if not annotation['image_id'] in annotations_by_image_id: 30 | annotations_by_image_id[annotation['image_id']] = [] 31 | annotations_by_image_id[annotation['image_id']].append(annotation) 32 | 33 | subset = { 34 | 'info': data['info'], 35 | 'licenses': data['licenses'], 36 | 'images': [], 37 | 'annotations': [], 38 | 'categories': data['categories'] 39 | } 40 | for image_id, image in images_by_id.items(): 41 | subset['images'].append(image) 42 | if image_id in annotations_by_image_id: # image has at least 1 annotation 43 | subset['annotations'].extend(annotations_by_image_id[image_id]) 44 | 45 | with open(args.output_name, 'w') as f: 46 | json.dump(subset, f, indent=4) 47 | 48 | -------------------------------------------------------------------------------- /emotion-detection/emotion_detection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[21]: 5 | 6 | 7 | import torch 8 | from torch import nn 9 | import torchvision 10 | from BlazeFace_PyTorch import blazeface 11 | 12 | gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 13 | net = blazeface.BlazeFace().to(gpu) 14 | net.load_weights("BlazeFace_PyTorch/blazeface.pth") 15 | net.load_anchors("BlazeFace_PyTorch/anchors.npy") 16 | 17 | 18 | # In[39]: 19 | 20 | 21 | import cv2 22 | import matplotlib.pyplot as plt 23 | import numpy as np 24 | get_ipython().run_line_magic('matplotlib', 'inline') 25 | 26 | video_capture = cv2.VideoCapture(0) 27 | 28 | while True: 29 | # Capture frame-by-frame 30 | ret, frame = video_capture.read() 31 | #cv2.imshow('Video', frame) 32 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 33 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 34 | #frame = cv2.resize(frame, (128, 128)) 35 | 36 | '''faces = faceCascade.detectMultiScale( 37 | gray, 38 | scaleFactor=1.1, 39 | minNeighbors=5, 40 | minSize=(30, 30), 41 | flags=cv2.cv.CV_HAAR_SCALE_IMAGE 42 | ) 43 | 44 | # Draw a rectangle around the faces 45 | for (x, y, w, h) in faces: 46 | cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)''' 47 | 48 | # Display the resulting frame 49 | cv2.imshow('Video', frame) 50 | 51 | if cv2.waitKey(1) & 0xFF == ord('q'): 52 | break 53 | # When everything is done, release the capture 54 | frame = cv2.resize(frame, (128, 128)) 55 | detections = net.predict_on_image(frame) 56 | detections = detections.cpu().numpy() 57 | for i in range(detections.shape[0]): 58 | ymin = int(detections[0, 0] * frame.shape[0]) 59 | xmin = int(detections[0, 1] * frame.shape[1]) 60 | ymax = int(detections[0, 2] * frame.shape[0]) 61 | xmax = int(detections[0, 3] * frame.shape[1]) 62 | face = frame[xmin:xmax, ymin:ymax] 63 | face = cv2.flip(face, 1) 64 | face = cv2.resize(face, (128, 128)) 65 | sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]]) 66 | faces = cv2.filter2D(face, -1, sharpen_kernel) 67 | cv2.imwrite('images/{}.jpg'.format(i+1), faces) 68 | plt.imshow(faces) 69 | video_capture.release() 70 | cv2.destroyAllWindows() 71 | #plt.show() 72 | 73 | 74 | # In[17]: 75 | 76 | 77 | get_ipython().system('python visualize.py') 78 | 79 | 80 | # In[18]: 81 | 82 | 83 | detections.shape 84 | 85 | 86 | # In[19]: 87 | 88 | 89 | detections.ndim 90 | 91 | 92 | # In[20]: 93 | 94 | 95 | detections.shape[0] 96 | 97 | 98 | # In[ ]: 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /emotion-detection/visualize.py: -------------------------------------------------------------------------------- 1 | """ 2 | visualize results for test image 3 | """ 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from PIL import Image 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | import os 12 | from torch.autograd import Variable 13 | 14 | import transforms as transforms 15 | from skimage import io 16 | from skimage.transform import resize 17 | from models import * 18 | 19 | 20 | 21 | def rgb2gray(rgb): 22 | return np.dot(rgb[...,:3], [0.299, 0.587, 0.114]) 23 | def visualizer(): 24 | cut_size = 44 25 | transform_test = transforms.Compose([ 26 | transforms.TenCrop(cut_size), 27 | transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),]) 28 | x = 0 29 | for root, dirs, files in os.walk('images/'): 30 | for f in files: 31 | x = x+1 32 | emotion_array = [0,0,0,0,0,0,0] 33 | for i in range(x): 34 | raw_img = io.imread('images/{}.jpg'.format(i+1)) 35 | gray = rgb2gray(raw_img) 36 | gray = resize(gray, (48,48), mode='symmetric').astype(np.uint8) 37 | 38 | img = gray[:, :, np.newaxis] 39 | 40 | img = np.concatenate((img, img, img), axis=2) 41 | img = Image.fromarray(img) 42 | inputs = transform_test(img) 43 | 44 | class_names = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'] 45 | 46 | net = VGG('VGG19') 47 | checkpoint = torch.load(os.path.join('FER2013_VGG19', 'PrivateTest_model.t7'), map_location=torch.device('cuda')) 48 | net.load_state_dict(checkpoint['net']) 49 | net.cuda() 50 | net.eval() 51 | 52 | ncrops, c, h, w = np.shape(inputs) 53 | 54 | inputs = inputs.view(-1, c, h, w) 55 | inputs = inputs.cuda() 56 | inputs = Variable(inputs, volatile=True) 57 | outputs = net(inputs) 58 | 59 | outputs_avg = outputs.view(ncrops, -1).mean(0) # avg over crops 60 | 61 | score = F.softmax(outputs_avg) 62 | _, predicted = torch.max(outputs_avg.data, 0) 63 | 64 | plt.rcParams['figure.figsize'] = (13.5,5.5) 65 | axes=plt.subplot(1, 3, 1) 66 | plt.imshow(raw_img) 67 | plt.xlabel('Input Image', fontsize=16) 68 | axes.set_xticks([]) 69 | axes.set_yticks([]) 70 | plt.tight_layout() 71 | 72 | 73 | plt.subplots_adjust(left=0.05, bottom=0.2, right=0.95, top=0.9, hspace=0.02, wspace=0.3) 74 | 75 | plt.subplot(1, 3, 2) 76 | ind = 0.1+0.6*np.arange(len(class_names)) # the x locations for the groups 77 | width = 0.4 # the width of the bars: can also be len(x) sequence 78 | color_list = ['red','orangered','darkorange','limegreen','darkgreen','royalblue','navy'] 79 | for i in range(len(class_names)): 80 | plt.bar(ind[i], score.data.cpu().numpy()[i], width, color=color_list[i]) 81 | plt.title("Classification results ",fontsize=20) 82 | plt.xlabel(" Expression Category ",fontsize=16) 83 | plt.ylabel(" Classification Score ",fontsize=16) 84 | plt.xticks(ind, class_names, rotation=45, fontsize=14) 85 | 86 | axes=plt.subplot(1, 3, 3) 87 | emojis_img = io.imread('images/emojis/%s.png' % str(class_names[int(predicted.cpu().numpy())])) 88 | plt.imshow(emojis_img) 89 | plt.xlabel('Emoji Expression', fontsize=16) 90 | axes.set_xticks([]) 91 | axes.set_yticks([]) 92 | plt.tight_layout() 93 | # show emojis 94 | 95 | #plt.show() 96 | plt.savefig(os.path.join('images/results/{}.png'.format(i+1))) 97 | plt.close() 98 | 99 | #print("The Expression is %s" %str(class_names[int(predicted.cpu().numpy())])) 100 | emotion_array[int(predicted.cpu().numpy())] += 1 101 | return emotion_array 102 | -------------------------------------------------------------------------------- /emotion-detection/models/resnet.py: -------------------------------------------------------------------------------- 1 | '''ResNet in PyTorch. 2 | 3 | For Pre-activation ResNet, see 'preact_resnet.py'. 4 | 5 | Reference: 6 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 7 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 8 | ''' 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | from torch.autograd import Variable 14 | 15 | 16 | class BasicBlock(nn.Module): 17 | expansion = 1 18 | 19 | def __init__(self, in_planes, planes, stride=1): 20 | super(BasicBlock, self).__init__() 21 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 22 | self.bn1 = nn.BatchNorm2d(planes) 23 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 24 | self.bn2 = nn.BatchNorm2d(planes) 25 | 26 | self.shortcut = nn.Sequential() 27 | if stride != 1 or in_planes != self.expansion*planes: 28 | self.shortcut = nn.Sequential( 29 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 30 | nn.BatchNorm2d(self.expansion*planes) 31 | ) 32 | 33 | def forward(self, x): 34 | out = F.relu(self.bn1(self.conv1(x))) 35 | out = self.bn2(self.conv2(out)) 36 | out += self.shortcut(x) 37 | out = F.relu(out) 38 | return out 39 | 40 | 41 | class Bottleneck(nn.Module): 42 | expansion = 4 43 | 44 | def __init__(self, in_planes, planes, stride=1): 45 | super(Bottleneck, self).__init__() 46 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 47 | self.bn1 = nn.BatchNorm2d(planes) 48 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 49 | self.bn2 = nn.BatchNorm2d(planes) 50 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 51 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 52 | 53 | self.shortcut = nn.Sequential() 54 | if stride != 1 or in_planes != self.expansion*planes: 55 | self.shortcut = nn.Sequential( 56 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 57 | nn.BatchNorm2d(self.expansion*planes) 58 | ) 59 | 60 | def forward(self, x): 61 | out = F.relu(self.bn1(self.conv1(x))) 62 | out = F.relu(self.bn2(self.conv2(out))) 63 | out = self.bn3(self.conv3(out)) 64 | out += self.shortcut(x) 65 | out = F.relu(out) 66 | return out 67 | 68 | 69 | class ResNet(nn.Module): 70 | def __init__(self, block, num_blocks, num_classes=7): 71 | super(ResNet, self).__init__() 72 | self.in_planes = 64 73 | 74 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 75 | self.bn1 = nn.BatchNorm2d(64) 76 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 77 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 78 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 79 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 80 | self.linear = nn.Linear(512, num_classes) 81 | 82 | def _make_layer(self, block, planes, num_blocks, stride): 83 | strides = [stride] + [1]*(num_blocks-1) 84 | layers = [] 85 | for stride in strides: 86 | layers.append(block(self.in_planes, planes, stride)) 87 | self.in_planes = planes * block.expansion 88 | return nn.Sequential(*layers) 89 | 90 | def forward(self, x): 91 | out = F.relu(self.bn1(self.conv1(x))) 92 | out = self.layer1(out) 93 | out = self.layer2(out) 94 | out = self.layer3(out) 95 | out = self.layer4(out) 96 | out = F.avg_pool2d(out, 4) 97 | out = out.view(out.size(0), -1) 98 | out = F.dropout(out, p=0.5, training=self.training) 99 | out = self.linear(out) 100 | return out 101 | 102 | 103 | def ResNet18(): 104 | return ResNet(BasicBlock, [2,2,2,2]) -------------------------------------------------------------------------------- /pose-estimation/models/with_mobilenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from modules.conv import conv, conv_dw, conv_dw_no_bn 5 | 6 | 7 | class Cpm(nn.Module): 8 | def __init__(self, in_channels, out_channels): 9 | super().__init__() 10 | self.align = conv(in_channels, out_channels, kernel_size=1, padding=0, bn=False) 11 | self.trunk = nn.Sequential( 12 | conv_dw_no_bn(out_channels, out_channels), 13 | conv_dw_no_bn(out_channels, out_channels), 14 | conv_dw_no_bn(out_channels, out_channels) 15 | ) 16 | self.conv = conv(out_channels, out_channels, bn=False) 17 | 18 | def forward(self, x): 19 | x = self.align(x) 20 | x = self.conv(x + self.trunk(x)) 21 | return x 22 | 23 | 24 | class InitialStage(nn.Module): 25 | def __init__(self, num_channels, num_heatmaps, num_pafs): 26 | super().__init__() 27 | self.trunk = nn.Sequential( 28 | conv(num_channels, num_channels, bn=False), 29 | conv(num_channels, num_channels, bn=False), 30 | conv(num_channels, num_channels, bn=False) 31 | ) 32 | self.heatmaps = nn.Sequential( 33 | conv(num_channels, 512, kernel_size=1, padding=0, bn=False), 34 | conv(512, num_heatmaps, kernel_size=1, padding=0, bn=False, relu=False) 35 | ) 36 | self.pafs = nn.Sequential( 37 | conv(num_channels, 512, kernel_size=1, padding=0, bn=False), 38 | conv(512, num_pafs, kernel_size=1, padding=0, bn=False, relu=False) 39 | ) 40 | 41 | def forward(self, x): 42 | trunk_features = self.trunk(x) 43 | heatmaps = self.heatmaps(trunk_features) 44 | pafs = self.pafs(trunk_features) 45 | return [heatmaps, pafs] 46 | 47 | 48 | class RefinementStageBlock(nn.Module): 49 | def __init__(self, in_channels, out_channels): 50 | super().__init__() 51 | self.initial = conv(in_channels, out_channels, kernel_size=1, padding=0, bn=False) 52 | self.trunk = nn.Sequential( 53 | conv(out_channels, out_channels), 54 | conv(out_channels, out_channels, dilation=2, padding=2) 55 | ) 56 | 57 | def forward(self, x): 58 | initial_features = self.initial(x) 59 | trunk_features = self.trunk(initial_features) 60 | return initial_features + trunk_features 61 | 62 | 63 | class RefinementStage(nn.Module): 64 | def __init__(self, in_channels, out_channels, num_heatmaps, num_pafs): 65 | super().__init__() 66 | self.trunk = nn.Sequential( 67 | RefinementStageBlock(in_channels, out_channels), 68 | RefinementStageBlock(out_channels, out_channels), 69 | RefinementStageBlock(out_channels, out_channels), 70 | RefinementStageBlock(out_channels, out_channels), 71 | RefinementStageBlock(out_channels, out_channels) 72 | ) 73 | self.heatmaps = nn.Sequential( 74 | conv(out_channels, out_channels, kernel_size=1, padding=0, bn=False), 75 | conv(out_channels, num_heatmaps, kernel_size=1, padding=0, bn=False, relu=False) 76 | ) 77 | self.pafs = nn.Sequential( 78 | conv(out_channels, out_channels, kernel_size=1, padding=0, bn=False), 79 | conv(out_channels, num_pafs, kernel_size=1, padding=0, bn=False, relu=False) 80 | ) 81 | 82 | def forward(self, x): 83 | trunk_features = self.trunk(x) 84 | heatmaps = self.heatmaps(trunk_features) 85 | pafs = self.pafs(trunk_features) 86 | return [heatmaps, pafs] 87 | 88 | 89 | class PoseEstimationWithMobileNet(nn.Module): 90 | def __init__(self, num_refinement_stages=1, num_channels=128, num_heatmaps=19, num_pafs=38): 91 | super().__init__() 92 | self.model = nn.Sequential( 93 | conv( 3, 32, stride=2, bias=False), 94 | conv_dw( 32, 64), 95 | conv_dw( 64, 128, stride=2), 96 | conv_dw(128, 128), 97 | conv_dw(128, 256, stride=2), 98 | conv_dw(256, 256), 99 | conv_dw(256, 512), # conv4_2 100 | conv_dw(512, 512, dilation=2, padding=2), 101 | conv_dw(512, 512), 102 | conv_dw(512, 512), 103 | conv_dw(512, 512), 104 | conv_dw(512, 512) # conv5_5 105 | ) 106 | self.cpm = Cpm(512, num_channels) 107 | 108 | self.initial_stage = InitialStage(num_channels, num_heatmaps, num_pafs) 109 | self.refinement_stages = nn.ModuleList() 110 | for idx in range(num_refinement_stages): 111 | self.refinement_stages.append(RefinementStage(num_channels + num_heatmaps + num_pafs, num_channels, 112 | num_heatmaps, num_pafs)) 113 | 114 | def forward(self, x): 115 | backbone_features = self.model(x) 116 | backbone_features = self.cpm(backbone_features) 117 | 118 | stages_output = self.initial_stage(backbone_features) 119 | for refinement_stage in self.refinement_stages: 120 | stages_output.extend( 121 | refinement_stage(torch.cat([backbone_features, stages_output[-2], stages_output[-1]], dim=1))) 122 | 123 | return stages_output 124 | -------------------------------------------------------------------------------- /pose-estimation/modules/pose.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | from modules.keypoints import BODY_PARTS_KPT_IDS, BODY_PARTS_PAF_IDS 5 | from modules.one_euro_filter import OneEuroFilter 6 | 7 | 8 | class Pose: 9 | num_kpts = 18 10 | kpt_names = ['nose', 'neck', 11 | 'r_sho', 'r_elb', 'r_wri', 'l_sho', 'l_elb', 'l_wri', 12 | 'r_hip', 'r_knee', 'r_ank', 'l_hip', 'l_knee', 'l_ank', 13 | 'r_eye', 'l_eye', 14 | 'r_ear', 'l_ear'] 15 | sigmas = np.array([.26, .79, .79, .72, .62, .79, .72, .62, 1.07, .87, .89, 1.07, .87, .89, .25, .25, .35, .35], 16 | dtype=np.float32) / 10.0 17 | vars = (sigmas * 2) ** 2 18 | last_id = -1 19 | color = [0, 224, 255] 20 | 21 | def __init__(self, keypoints, confidence): 22 | super().__init__() 23 | self.keypoints = keypoints 24 | self.confidence = confidence 25 | self.bbox = Pose.get_bbox(self.keypoints) 26 | self.id = None 27 | self.filters = [[OneEuroFilter(), OneEuroFilter()] for _ in range(Pose.num_kpts)] 28 | 29 | @staticmethod 30 | def get_bbox(keypoints): 31 | found_keypoints = np.zeros((np.count_nonzero(keypoints[:, 0] != -1), 2), dtype=np.int32) 32 | found_kpt_id = 0 33 | for kpt_id in range(Pose.num_kpts): 34 | if keypoints[kpt_id, 0] == -1: 35 | continue 36 | found_keypoints[found_kpt_id] = keypoints[kpt_id] 37 | found_kpt_id += 1 38 | bbox = cv2.boundingRect(found_keypoints) 39 | return bbox 40 | 41 | def update_id(self, id=None): 42 | self.id = id 43 | if self.id is None: 44 | self.id = Pose.last_id + 1 45 | Pose.last_id += 1 46 | 47 | def draw(self, img): 48 | assert self.keypoints.shape == (Pose.num_kpts, 2) 49 | 50 | for part_id in range(len(BODY_PARTS_PAF_IDS) - 2): 51 | kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0] 52 | global_kpt_a_id = self.keypoints[kpt_a_id, 0] 53 | if global_kpt_a_id != -1: 54 | x_a, y_a = self.keypoints[kpt_a_id] 55 | cv2.circle(img, (int(x_a), int(y_a)), 3, Pose.color, -1) 56 | kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1] 57 | global_kpt_b_id = self.keypoints[kpt_b_id, 0] 58 | if global_kpt_b_id != -1: 59 | x_b, y_b = self.keypoints[kpt_b_id] 60 | cv2.circle(img, (int(x_b), int(y_b)), 3, Pose.color, -1) 61 | if global_kpt_a_id != -1 and global_kpt_b_id != -1: 62 | cv2.line(img, (int(x_a), int(y_a)), (int(x_b), int(y_b)), Pose.color, 2) 63 | 64 | 65 | def get_similarity(a, b, threshold=0.5): 66 | num_similar_kpt = 0 67 | for kpt_id in range(Pose.num_kpts): 68 | if a.keypoints[kpt_id, 0] != -1 and b.keypoints[kpt_id, 0] != -1: 69 | distance = np.sum((a.keypoints[kpt_id] - b.keypoints[kpt_id]) ** 2) 70 | area = max(a.bbox[2] * a.bbox[3], b.bbox[2] * b.bbox[3]) 71 | similarity = np.exp(-distance / (2 * (area + np.spacing(1)) * Pose.vars[kpt_id])) 72 | if similarity > threshold: 73 | num_similar_kpt += 1 74 | return num_similar_kpt 75 | 76 | 77 | def track_poses(previous_poses, current_poses, threshold=3, smooth=False): 78 | """Propagate poses ids from previous frame results. Id is propagated, 79 | if there are at least `threshold` similar keypoints between pose from previous frame and current. 80 | If correspondence between pose on previous and current frame was established, pose keypoints are smoothed. 81 | 82 | :param previous_poses: poses from previous frame with ids 83 | :param current_poses: poses from current frame to assign ids 84 | :param threshold: minimal number of similar keypoints between poses 85 | :param smooth: smooth pose keypoints between frames 86 | :return: None 87 | """ 88 | current_poses = sorted(current_poses, key=lambda pose: pose.confidence, reverse=True) # match confident poses first 89 | mask = np.ones(len(previous_poses), dtype=np.int32) 90 | for current_pose in current_poses: 91 | best_matched_id = None 92 | best_matched_pose_id = None 93 | best_matched_iou = 0 94 | for id, previous_pose in enumerate(previous_poses): 95 | if not mask[id]: 96 | continue 97 | iou = get_similarity(current_pose, previous_pose) 98 | if iou > best_matched_iou: 99 | best_matched_iou = iou 100 | best_matched_pose_id = previous_pose.id 101 | best_matched_id = id 102 | if best_matched_iou >= threshold: 103 | mask[best_matched_id] = 0 104 | else: # pose not similar to any previous 105 | best_matched_pose_id = None 106 | current_pose.update_id(best_matched_pose_id) 107 | 108 | if smooth: 109 | for kpt_id in range(Pose.num_kpts): 110 | if current_pose.keypoints[kpt_id, 0] == -1: 111 | continue 112 | # reuse filter if previous pose has valid filter 113 | if (best_matched_pose_id is not None 114 | and previous_poses[best_matched_id].keypoints[kpt_id, 0] != -1): 115 | current_pose.filters[kpt_id] = previous_poses[best_matched_id].filters[kpt_id] 116 | current_pose.keypoints[kpt_id, 0] = current_pose.filters[kpt_id][0](current_pose.keypoints[kpt_id, 0]) 117 | current_pose.keypoints[kpt_id, 1] = current_pose.filters[kpt_id][1](current_pose.keypoints[kpt_id, 1]) 118 | current_pose.bbox = Pose.get_bbox(current_pose.keypoints) 119 | -------------------------------------------------------------------------------- /pose-estimation/scripts/prepare_train_labels.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import pickle 4 | 5 | 6 | def prepare_annotations(annotations_per_image, images_info, net_input_size): 7 | """Prepare labels for training. For each annotated person calculates center 8 | to perform crop around it during the training. Also converts data to the internal format. 9 | 10 | :param annotations_per_image: all annotations for specified image id 11 | :param images_info: auxiliary information about all images 12 | :param net_input_size: network input size during training 13 | :return: list of prepared annotations 14 | """ 15 | prepared_annotations = [] 16 | for _, annotations in annotations_per_image.items(): 17 | previous_centers = [] 18 | for annotation in annotations[0]: 19 | if (annotation['num_keypoints'] < 5 20 | or annotation['area'] < 32 * 32): 21 | continue 22 | person_center = [annotation['bbox'][0] + annotation['bbox'][2] / 2, 23 | annotation['bbox'][1] + annotation['bbox'][3] / 2] 24 | is_close = False 25 | for previous_center in previous_centers: 26 | distance_to_previous = ((person_center[0] - previous_center[0]) ** 2 27 | + (person_center[1] - previous_center[1]) ** 2) ** 0.5 28 | if distance_to_previous < previous_center[2] * 0.3: 29 | is_close = True 30 | break 31 | if is_close: 32 | continue 33 | 34 | prepared_annotation = { 35 | 'img_paths': images_info[annotation['image_id']]['file_name'], 36 | 'img_width': images_info[annotation['image_id']]['width'], 37 | 'img_height': images_info[annotation['image_id']]['height'], 38 | 'objpos': person_center, 39 | 'image_id': annotation['image_id'], 40 | 'bbox': annotation['bbox'], 41 | 'segment_area': annotation['area'], 42 | 'scale_provided': annotation['bbox'][3] / net_input_size, 43 | 'num_keypoints': annotation['num_keypoints'], 44 | 'segmentations': annotations[1] 45 | } 46 | 47 | keypoints = [] 48 | for i in range(len(annotation['keypoints']) // 3): 49 | keypoint = [annotation['keypoints'][i * 3], annotation['keypoints'][i * 3 + 1], 2] 50 | if annotation['keypoints'][i * 3 + 2] == 1: 51 | keypoint[2] = 0 52 | elif annotation['keypoints'][i * 3 + 2] == 2: 53 | keypoint[2] = 1 54 | keypoints.append(keypoint) 55 | prepared_annotation['keypoints'] = keypoints 56 | 57 | prepared_other_annotations = [] 58 | for other_annotation in annotations[0]: 59 | if other_annotation == annotation: 60 | continue 61 | 62 | prepared_other_annotation = { 63 | 'objpos': [other_annotation['bbox'][0] + other_annotation['bbox'][2] / 2, 64 | other_annotation['bbox'][1] + other_annotation['bbox'][3] / 2], 65 | 'bbox': other_annotation['bbox'], 66 | 'segment_area': other_annotation['area'], 67 | 'scale_provided': other_annotation['bbox'][3] / net_input_size, 68 | 'num_keypoints': other_annotation['num_keypoints'] 69 | } 70 | 71 | keypoints = [] 72 | for i in range(len(other_annotation['keypoints']) // 3): 73 | keypoint = [other_annotation['keypoints'][i * 3], other_annotation['keypoints'][i * 3 + 1], 2] 74 | if other_annotation['keypoints'][i * 3 + 2] == 1: 75 | keypoint[2] = 0 76 | elif other_annotation['keypoints'][i * 3 + 2] == 2: 77 | keypoint[2] = 1 78 | keypoints.append(keypoint) 79 | prepared_other_annotation['keypoints'] = keypoints 80 | prepared_other_annotations.append(prepared_other_annotation) 81 | 82 | prepared_annotation['processed_other_annotations'] = prepared_other_annotations 83 | prepared_annotations.append(prepared_annotation) 84 | 85 | previous_centers.append((person_center[0], person_center[1], annotation['bbox'][2], annotation['bbox'][3])) 86 | return prepared_annotations 87 | 88 | 89 | if __name__ == '__main__': 90 | parser = argparse.ArgumentParser() 91 | parser.add_argument('--labels', type=str, required=True, help='path to json with keypoints train labels') 92 | parser.add_argument('--output-name', type=str, default='prepared_train_annotation.pkl', 93 | help='name of output file with prepared keypoints annotation') 94 | parser.add_argument('--net-input-size', type=int, default=368, help='network input size') 95 | args = parser.parse_args() 96 | with open(args.labels, 'r') as f: 97 | data = json.load(f) 98 | 99 | annotations_per_image_mapping = {} 100 | for annotation in data['annotations']: 101 | if annotation['num_keypoints'] != 0 and not annotation['iscrowd']: 102 | if annotation['image_id'] not in annotations_per_image_mapping: 103 | annotations_per_image_mapping[annotation['image_id']] = [[], []] 104 | annotations_per_image_mapping[annotation['image_id']][0].append(annotation) 105 | 106 | crowd_segmentations_per_image_mapping = {} 107 | for annotation in data['annotations']: 108 | if annotation['iscrowd']: 109 | if annotation['image_id'] not in crowd_segmentations_per_image_mapping: 110 | crowd_segmentations_per_image_mapping[annotation['image_id']] = [] 111 | crowd_segmentations_per_image_mapping[annotation['image_id']].append(annotation['segmentation']) 112 | 113 | for image_id, crowd_segmentations in crowd_segmentations_per_image_mapping.items(): 114 | if image_id in annotations_per_image_mapping: 115 | annotations_per_image_mapping[image_id][1] = crowd_segmentations 116 | 117 | images_info = {} 118 | for image_info in data['images']: 119 | images_info[image_info['id']] = image_info 120 | 121 | prepared_annotations = prepare_annotations(annotations_per_image_mapping, images_info, args.net_input_size) 122 | 123 | with open(args.output_name, 'wb') as f: 124 | pickle.dump(prepared_annotations, f) 125 | 126 | -------------------------------------------------------------------------------- /pose-estimation/demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import cv2 4 | import numpy as np 5 | import torch 6 | 7 | from models.with_mobilenet import PoseEstimationWithMobileNet 8 | from modules.keypoints import extract_keypoints, group_keypoints 9 | from modules.load_state import load_state 10 | from modules.pose import Pose, track_poses 11 | from val import normalize, pad_width 12 | 13 | 14 | class ImageReader(object): 15 | def __init__(self, file_names): 16 | self.file_names = file_names 17 | self.max_idx = len(file_names) 18 | 19 | def __iter__(self): 20 | self.idx = 0 21 | return self 22 | 23 | def __next__(self): 24 | if self.idx == self.max_idx: 25 | raise StopIteration 26 | img = cv2.imread(self.file_names[self.idx], cv2.IMREAD_COLOR) 27 | if img.size == 0: 28 | raise IOError('Image {} cannot be read'.format(self.file_names[self.idx])) 29 | self.idx = self.idx + 1 30 | return img 31 | 32 | 33 | class VideoReader(object): 34 | def __init__(self, file_name): 35 | self.file_name = file_name 36 | try: # OpenCV needs int to read from webcam 37 | self.file_name = int(file_name) 38 | except ValueError: 39 | pass 40 | 41 | def __iter__(self): 42 | self.cap = cv2.VideoCapture(self.file_name) 43 | if not self.cap.isOpened(): 44 | raise IOError('Video {} cannot be opened'.format(self.file_name)) 45 | return self 46 | 47 | def __next__(self): 48 | was_read, img = self.cap.read() 49 | if not was_read: 50 | raise StopIteration 51 | return img 52 | 53 | 54 | def infer_fast(net, img, net_input_height_size, stride, upsample_ratio, cpu, 55 | pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256): 56 | height, width, _ = img.shape 57 | scale = net_input_height_size / height 58 | 59 | scaled_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 60 | scaled_img = normalize(scaled_img, img_mean, img_scale) 61 | min_dims = [net_input_height_size, max(scaled_img.shape[1], net_input_height_size)] 62 | padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims) 63 | 64 | tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float() 65 | if not cpu: 66 | tensor_img = tensor_img.cuda() 67 | 68 | stages_output = net(tensor_img) 69 | 70 | stage2_heatmaps = stages_output[-2] 71 | heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0)) 72 | heatmaps = cv2.resize(heatmaps, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC) 73 | 74 | stage2_pafs = stages_output[-1] 75 | pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0)) 76 | pafs = cv2.resize(pafs, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC) 77 | 78 | return heatmaps, pafs, scale, pad 79 | 80 | 81 | def run_demo(net, image_provider, height_size, cpu, track, smooth): 82 | net = net.eval() 83 | if not cpu: 84 | net = net.cuda() 85 | 86 | stride = 8 87 | upsample_ratio = 4 88 | num_keypoints = Pose.num_kpts 89 | previous_poses = [] 90 | delay = 33 91 | for img in image_provider: 92 | orig_img = img.copy() 93 | heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) 94 | 95 | total_keypoints_num = 0 96 | all_keypoints_by_type = [] 97 | for kpt_idx in range(num_keypoints): # 19th for bg 98 | total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) 99 | 100 | pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) 101 | for kpt_id in range(all_keypoints.shape[0]): 102 | all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale 103 | all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale 104 | current_poses = [] 105 | for n in range(len(pose_entries)): 106 | if len(pose_entries[n]) == 0: 107 | continue 108 | pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 109 | for kpt_id in range(num_keypoints): 110 | if pose_entries[n][kpt_id] != -1.0: # keypoint was found 111 | pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0]) 112 | pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1]) 113 | pose = Pose(pose_keypoints, pose_entries[n][18]) 114 | current_poses.append(pose) 115 | 116 | if track: 117 | track_poses(previous_poses, current_poses, smooth=smooth) 118 | previous_poses = current_poses 119 | for pose in current_poses: 120 | pose.draw(img) 121 | img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) 122 | for pose in current_poses: 123 | cv2.rectangle(img, (pose.bbox[0], pose.bbox[1]), 124 | (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) 125 | if track: 126 | cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), 127 | cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) 128 | cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) 129 | key = cv2.waitKey(delay) 130 | if key == 27: # esc 131 | return 132 | elif key == 112: # 'p' 133 | if delay == 33: 134 | delay = 0 135 | else: 136 | delay = 33 137 | 138 | 139 | if __name__ == '__main__': 140 | parser = argparse.ArgumentParser( 141 | description='''Lightweight human pose estimation python demo. 142 | This is just for quick results preview. 143 | Please, consider c++ demo for the best performance.''') 144 | parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint') 145 | parser.add_argument('--height-size', type=int, default=256, help='network input layer height size') 146 | parser.add_argument('--video', type=str, default='', help='path to video file or camera id') 147 | parser.add_argument('--images', nargs='+', default='', help='path to input image(s)') 148 | parser.add_argument('--cpu', action='store_true', help='run network inference on cpu') 149 | parser.add_argument('--track', type=int, default=1, help='track pose id in video') 150 | parser.add_argument('--smooth', type=int, default=1, help='smooth pose keypoints') 151 | args = parser.parse_args() 152 | 153 | if args.video == '' and args.images == '': 154 | raise ValueError('Either --video or --image has to be provided') 155 | 156 | net = PoseEstimationWithMobileNet() 157 | checkpoint = torch.load(args.checkpoint_path, map_location='cpu') 158 | load_state(net, checkpoint) 159 | 160 | frame_provider = ImageReader(args.images) 161 | if args.video != '': 162 | frame_provider = VideoReader(args.video) 163 | else: 164 | args.track = 0 165 | 166 | run_demo(net, frame_provider, args.height_size, args.cpu, args.track, args.smooth) 167 | -------------------------------------------------------------------------------- /pose-estimation/val.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import json 4 | import math 5 | import numpy as np 6 | from pycocotools.coco import COCO 7 | from pycocotools.cocoeval import COCOeval 8 | 9 | import torch 10 | 11 | from datasets.coco import CocoValDataset 12 | from models.with_mobilenet import PoseEstimationWithMobileNet 13 | from modules.keypoints import extract_keypoints, group_keypoints 14 | from modules.load_state import load_state 15 | 16 | 17 | def run_coco_eval(gt_file_path, dt_file_path): 18 | annotation_type = 'keypoints' 19 | print('Running test for {} results.'.format(annotation_type)) 20 | 21 | coco_gt = COCO(gt_file_path) 22 | coco_dt = coco_gt.loadRes(dt_file_path) 23 | 24 | result = COCOeval(coco_gt, coco_dt, annotation_type) 25 | result.evaluate() 26 | result.accumulate() 27 | result.summarize() 28 | 29 | 30 | def normalize(img, img_mean, img_scale): 31 | img = np.array(img, dtype=np.float32) 32 | img = (img - img_mean) * img_scale 33 | return img 34 | 35 | 36 | def pad_width(img, stride, pad_value, min_dims): 37 | h, w, _ = img.shape 38 | h = min(min_dims[0], h) 39 | min_dims[0] = math.ceil(min_dims[0] / float(stride)) * stride 40 | min_dims[1] = max(min_dims[1], w) 41 | min_dims[1] = math.ceil(min_dims[1] / float(stride)) * stride 42 | pad = [] 43 | pad.append(int(math.floor((min_dims[0] - h) / 2.0))) 44 | pad.append(int(math.floor((min_dims[1] - w) / 2.0))) 45 | pad.append(int(min_dims[0] - h - pad[0])) 46 | pad.append(int(min_dims[1] - w - pad[1])) 47 | padded_img = cv2.copyMakeBorder(img, pad[0], pad[2], pad[1], pad[3], 48 | cv2.BORDER_CONSTANT, value=pad_value) 49 | return padded_img, pad 50 | 51 | 52 | def convert_to_coco_format(pose_entries, all_keypoints): 53 | coco_keypoints = [] 54 | scores = [] 55 | for n in range(len(pose_entries)): 56 | if len(pose_entries[n]) == 0: 57 | continue 58 | keypoints = [0] * 17 * 3 59 | to_coco_map = [0, -1, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3] 60 | person_score = pose_entries[n][-2] 61 | position_id = -1 62 | for keypoint_id in pose_entries[n][:-2]: 63 | position_id += 1 64 | if position_id == 1: # no 'neck' in COCO 65 | continue 66 | 67 | cx, cy, score, visibility = 0, 0, 0, 0 # keypoint not found 68 | if keypoint_id != -1: 69 | cx, cy, score = all_keypoints[int(keypoint_id), 0:3] 70 | cx = cx + 0.5 71 | cy = cy + 0.5 72 | visibility = 1 73 | keypoints[to_coco_map[position_id] * 3 + 0] = cx 74 | keypoints[to_coco_map[position_id] * 3 + 1] = cy 75 | keypoints[to_coco_map[position_id] * 3 + 2] = visibility 76 | coco_keypoints.append(keypoints) 77 | scores.append(person_score * max(0, (pose_entries[n][-1] - 1))) # -1 for 'neck' 78 | return coco_keypoints, scores 79 | 80 | 81 | def infer(net, img, scales, base_height, stride, pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256): 82 | normed_img = normalize(img, img_mean, img_scale) 83 | height, width, _ = normed_img.shape 84 | scales_ratios = [scale * base_height / float(height) for scale in scales] 85 | avg_heatmaps = np.zeros((height, width, 19), dtype=np.float32) 86 | avg_pafs = np.zeros((height, width, 38), dtype=np.float32) 87 | 88 | for ratio in scales_ratios: 89 | scaled_img = cv2.resize(normed_img, (0, 0), fx=ratio, fy=ratio, interpolation=cv2.INTER_CUBIC) 90 | min_dims = [base_height, max(scaled_img.shape[1], base_height)] 91 | padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims) 92 | 93 | tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float().cuda() 94 | stages_output = net(tensor_img) 95 | 96 | stage2_heatmaps = stages_output[-2] 97 | heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0)) 98 | heatmaps = cv2.resize(heatmaps, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 99 | heatmaps = heatmaps[pad[0]:heatmaps.shape[0] - pad[2], pad[1]:heatmaps.shape[1] - pad[3]:, :] 100 | heatmaps = cv2.resize(heatmaps, (width, height), interpolation=cv2.INTER_CUBIC) 101 | avg_heatmaps = avg_heatmaps + heatmaps / len(scales_ratios) 102 | 103 | stage2_pafs = stages_output[-1] 104 | pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0)) 105 | pafs = cv2.resize(pafs, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 106 | pafs = pafs[pad[0]:pafs.shape[0] - pad[2], pad[1]:pafs.shape[1] - pad[3], :] 107 | pafs = cv2.resize(pafs, (width, height), interpolation=cv2.INTER_CUBIC) 108 | avg_pafs = avg_pafs + pafs / len(scales_ratios) 109 | 110 | return avg_heatmaps, avg_pafs 111 | 112 | 113 | def evaluate(labels, output_name, images_folder, net, multiscale=False, visualize=False): 114 | net = net.cuda().eval() 115 | base_height = 368 116 | scales = [1] 117 | if multiscale: 118 | scales = [0.5, 1.0, 1.5, 2.0] 119 | stride = 8 120 | 121 | dataset = CocoValDataset(labels, images_folder) 122 | coco_result = [] 123 | for sample in dataset: 124 | file_name = sample['file_name'] 125 | img = sample['img'] 126 | 127 | avg_heatmaps, avg_pafs = infer(net, img, scales, base_height, stride) 128 | 129 | total_keypoints_num = 0 130 | all_keypoints_by_type = [] 131 | for kpt_idx in range(18): # 19th for bg 132 | total_keypoints_num += extract_keypoints(avg_heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) 133 | 134 | pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, avg_pafs) 135 | 136 | coco_keypoints, scores = convert_to_coco_format(pose_entries, all_keypoints) 137 | 138 | image_id = int(file_name[0:file_name.rfind('.')]) 139 | for idx in range(len(coco_keypoints)): 140 | coco_result.append({ 141 | 'image_id': image_id, 142 | 'category_id': 1, # person 143 | 'keypoints': coco_keypoints[idx], 144 | 'score': scores[idx] 145 | }) 146 | 147 | if visualize: 148 | for keypoints in coco_keypoints: 149 | for idx in range(len(keypoints) // 3): 150 | cv2.circle(img, (int(keypoints[idx * 3]), int(keypoints[idx * 3 + 1])), 151 | 3, (255, 0, 255), -1) 152 | cv2.imshow('keypoints', img) 153 | key = cv2.waitKey() 154 | if key == 27: # esc 155 | return 156 | 157 | with open(output_name, 'w') as f: 158 | json.dump(coco_result, f, indent=4) 159 | 160 | run_coco_eval(labels, output_name) 161 | 162 | 163 | if __name__ == '__main__': 164 | parser = argparse.ArgumentParser() 165 | parser.add_argument('--labels', type=str, required=True, help='path to json with keypoints val labels') 166 | parser.add_argument('--output-name', type=str, default='detections.json', 167 | help='name of output json file with detected keypoints') 168 | parser.add_argument('--images-folder', type=str, required=True, help='path to COCO val images folder') 169 | parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint') 170 | parser.add_argument('--multiscale', action='store_true', help='average inference results over multiple scales') 171 | parser.add_argument('--visualize', action='store_true', help='show keypoints') 172 | args = parser.parse_args() 173 | 174 | net = PoseEstimationWithMobileNet() 175 | checkpoint = torch.load(args.checkpoint_path) 176 | load_state(net, checkpoint) 177 | 178 | evaluate(args.labels, args.output_name, args.images_folder, net, args.multiscale, args.visualize) 179 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Classroom-Monitoring-System 2 | ### Making the classroom smart using deep learning and Internet of Things 3 | 4 | ![ScreenShot](https://github.com/vineeth-raj/Classroom-Monitoring-System/blob/main/Classroom.png) 5 | 6 | ## Introduction 7 | As a student, we feel like leaving the classroom, frustated due to boring lectures and we always wanted 8 | the lecturer to know about our feelings.In olden era it is quite not possible, but in this deep learning 9 | era we made it possible.Also it is very important to understand that the way lectures are delivered in 10 | college or school is very important in shaping the career of students.Hence we came up with a solution 11 | that can let the lecturer not only know about the students emotion but also he knows the gestures through 12 | which he can make further moves. 13 | 14 | ## Objective 15 | The main objectives of this system are: 16 | 17 | - Make the attendance system flexible (biometric) 18 | - Make the teachers and principal understand the interest of students on the lectures by identifying their emotions and sitting postures. 19 | - Automatic Notes taker. 20 | 21 | ## Components Required 22 | 23 | ### For Biometric Attendance: 24 | - Arduino UNO Board(Atmega 328p Microcontroller) 25 | - Fingerprint Sensor(R307 Module) 26 | - LCD Display(16x2 LCD Module) 27 | - Firebase(database to store students roll number) 28 | 29 | ### For Emotion and Posture Recognition 30 | - Raspberry Pi 31 | - Camera Module 32 | - DeepLearning modules and models 33 | 34 | ### For Automatic Notes Taker 35 | - Raspberry Pi4 36 | - ReSpeaker USB MicArray 37 | 38 | ## Block Diagram of how our system works 39 | 40 | ![Screenshot](https://github.com/vineeth-raj/Classroom-Monitoring-System/blob/main/BlockDiagram-Classroom.png) 41 | 42 | ## Description 43 | 44 | ### Biometric Attendance 45 | So we made a fingerprint based biometric attendance system using Arduino.(inspired by [this](https://circuitdigest.com/microcontroller-projects/fingerprint-attendance-system-using-arduino-uno)) 46 | 47 | ![ScreenShot](https://github.com/vineeth-raj/Classroom-Monitoring-System/blob/main/finger-print-sensor.png) 48 | 49 | ### Emotion Recognition 50 | 51 | ![ScreenShot](https://github.com/vineeth-raj/Classroom-Monitoring-System/blob/main/emotion-detection/images/results/Happy-Result.png) 52 | 53 | ![ScreenShot](https://github.com/vineeth-raj/Classroom-Monitoring-System/blob/main/emotion-detection/images/results/Neutral-Result.png) 54 | 55 | First of all we detected faces from a camera using [BlazeFace-Torch](https://www.kaggle.com/humananalog/blazeface-pytorch) which is a lightweight framework. 56 | Besides a bounding box, BlazeFace also predicts 6 keypoints for face landmarks (2x eyes, 2x ears, nose, mouth). Next using the face detected, we recognized the emotion of face among 7 emotions(Angry, Disgust, Fear, Happy, Sad, Surprise, Neutral) and then got the emoji for respective emotion. This recognition was done using VGG19 model which was trained on a million faces and was trained on Kaggle's GPU for 4 hours. 57 | 58 | ![](https://www.pyimagesearch.com/wp-content/uploads/2017/03/imagenet_vgg16.png) 59 | 60 | ![](https://www.researchgate.net/profile/Clifford_Yang/publication/325137356/figure/fig2/AS:670371271413777@1536840374533/llustration-of-the-network-architecture-of-VGG-19-model-conv-means-convolution-FC-means.jpg) 61 | 62 | ### Pose Estimation 63 | Here we adapt multi-person pose estimation architecture to use it on edge devices.We follow the bottom-up approach from OpenPose because of its decent quality and robustness tonumber of people inside the frame. The networkmodel has4.1M parameters and 9 billions floating-point operations (GFLOPs) complexity,which is just∼15% of the baseline 2-stage OpenPose with almost the same quality. It detects a skeleton (which consists of keypoints and connections between them) to identify human poses for every person inside the image. The pose may contain up to 18 keypoints: ears, eyes, nose, neck, shoulders, elbows, wrists, hips, knees, and ankles. 64 | 65 | ![ScreenShot](https://github.com/vineeth-raj/Classroom-Monitoring-System/blob/main/Pose-Detection.png) 66 | 67 | We profiled the code and removed extra memory allocations, parallelized keypoints extraction withOpenCV’s routine. This made code significantly faster, and the last bottleneck was the resize featuremaps to the input image size.We decided to skip the resize step and performed grouping directly on network output, but accuracydropped significantly. Thus step with upsampling feature maps cannot be avoided, but it is notnecessary to do it to input image size. Our experiments shown, that with upsample factor 8 theaccuracy is the same, as if resize to input image size. We used up-sample factor 4 for the demo purposes. 68 | (inspired from this [paper](https://arxiv.org/pdf/1811.12004.pdf)) 69 | 70 | ### Notes taker 71 | In this we have done like simple audio to text converter using pyaudio and Halo.The written text was mailed to students as a notes. 72 | 73 | ## Procedure 74 | 75 | - Firstly, the professor with the biometric device logs into his account with his finger print and gets the attendance list of the subject he handles for. 76 | - Then the device is given to the students to keep their fingerprints and the attendance of that day is updated with the time of finger prints. 77 | - The professor is kindly advised to keep the device on till his hourends because assoonasthe professor logs in, the notes taker device will start running and will stop if the biometric device stops. 78 | - The notes taking device is connected tothecloudand will take notes of the professor’s wordsandwillsend that to every student’s webmail ID or mail ID once the professor logs out from his biometric device(session over). 79 | - This reduces the students effort of taking notes and make the students listen more on the professors words. 80 | - The professor can log out by placing his finger-print on the device the second time. 81 | - LCD display is kept on the biometric device to see the status whether the professor logged in or not and students fingerprint recognized or not. 82 | - Meanwhile during the class, the emotion of the students is detected using their facial expressions and the overall emotion of the students is displayed on the smart-board(if available) every 3 seconds and will be sent as amessage to the principal/the respective head and the teacher once the session ends. 83 | - Various poses of the students are also detected in this process such as standing, raising hand, leaning on bench which can be used to know the interest of student over the subject. 84 | - Considering the students privacy, we are not taking the student faces to a server..hence we are processing the faces from the cameras inside the board using OpenCV,blazeface-pytorch(used to detect face) and getting the emotions as output and this emotions are only going to the server and also the same for poses. 85 | 86 | ## Final Touch 87 | We were able to create a system similar to the below pic. (inspired from this [website](https://edtechchina.medium.com/schools-using-facial-recognition-system-sparks-privacy-concerns-in-china-d4f706e5cfd0)) 88 | 89 | ![](https://miro.medium.com/max/875/1*TqeG3GUeIOaXY36Dwu8rkA.jpeg) 90 | 91 | - This can also be used in any public speaking platform to assess the emotion of audience and make it available to the speaker to make him deliver the speech better 92 | 93 | ## Future Scope 94 | 95 | - This can be extended to automating the distribution of corrected answer papers through server or in-fact correction of answer papers can be automated or automated invigilation. 96 | - And also automatic on and off of lights and fans which can used to reduce power wastage. The sweeper problem can be solved by using the voice automated on and off. 97 | - We can add recording facilities in the system so that students can view the lectures whenever they wish to watch. 98 | 99 | ## References 100 | 101 | - https://medium.com/@EdtechChina/schools-using-facial-recognition-system-sparks-privacy-concerns-in-china-d4f706e5cfd0 102 | - http://en.people.cn/n3/2018/0519/c90000-9461918.html 103 | 104 | ## Contributors 105 | - [Shantosh](https://www.linkedin.com/in/shanthosh-kumar-921092174/) 106 | -------------------------------------------------------------------------------- /pose-estimation/datasets/coco.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | import math 4 | import os 5 | import pickle 6 | 7 | import cv2 8 | import numpy as np 9 | import pycocotools 10 | 11 | from torch.utils.data.dataset import Dataset 12 | 13 | BODY_PARTS_KPT_IDS = [[1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13], [1, 2], [2, 3], [3, 4], [2, 16], 14 | [1, 5], [5, 6], [6, 7], [5, 17], [1, 0], [0, 14], [0, 15], [14, 16], [15, 17]] 15 | 16 | 17 | def get_mask(segmentations, mask): 18 | for segmentation in segmentations: 19 | rle = pycocotools.mask.frPyObjects(segmentation, mask.shape[0], mask.shape[1]) 20 | mask[pycocotools.mask.decode(rle) > 0.5] = 0 21 | return mask 22 | 23 | 24 | class CocoTrainDataset(Dataset): 25 | def __init__(self, labels, images_folder, stride, sigma, paf_thickness, transform=None): 26 | super().__init__() 27 | self._images_folder = images_folder 28 | self._stride = stride 29 | self._sigma = sigma 30 | self._paf_thickness = paf_thickness 31 | self._transform = transform 32 | with open(labels, 'rb') as f: 33 | self._labels = pickle.load(f) 34 | 35 | def __getitem__(self, idx): 36 | label = copy.deepcopy(self._labels[idx]) # label modified in transform 37 | image = cv2.imread(os.path.join(self._images_folder, label['img_paths']), cv2.IMREAD_COLOR) 38 | mask = np.ones(shape=(label['img_height'], label['img_width']), dtype=np.float32) 39 | mask = get_mask(label['segmentations'], mask) 40 | sample = { 41 | 'label': label, 42 | 'image': image, 43 | 'mask': mask 44 | } 45 | if self._transform: 46 | sample = self._transform(sample) 47 | 48 | mask = cv2.resize(sample['mask'], dsize=None, fx=1/self._stride, fy=1/self._stride, interpolation=cv2.INTER_AREA) 49 | keypoint_maps = self._generate_keypoint_maps(sample) 50 | sample['keypoint_maps'] = keypoint_maps 51 | keypoint_mask = np.zeros(shape=keypoint_maps.shape, dtype=np.float32) 52 | for idx in range(keypoint_mask.shape[0]): 53 | keypoint_mask[idx] = mask 54 | sample['keypoint_mask'] = keypoint_mask 55 | 56 | paf_maps = self._generate_paf_maps(sample) 57 | sample['paf_maps'] = paf_maps 58 | paf_mask = np.zeros(shape=paf_maps.shape, dtype=np.float32) 59 | for idx in range(paf_mask.shape[0]): 60 | paf_mask[idx] = mask 61 | sample['paf_mask'] = paf_mask 62 | 63 | image = sample['image'].astype(np.float32) 64 | image = (image - 128) / 256 65 | sample['image'] = image.transpose((2, 0, 1)) 66 | return sample 67 | 68 | def __len__(self): 69 | return len(self._labels) 70 | 71 | def _generate_keypoint_maps(self, sample): 72 | n_keypoints = 18 73 | n_rows, n_cols, _ = sample['image'].shape 74 | keypoint_maps = np.zeros(shape=(n_keypoints + 1, 75 | n_rows // self._stride, n_cols // self._stride), dtype=np.float32) # +1 for bg 76 | 77 | label = sample['label'] 78 | for keypoint_idx in range(n_keypoints): 79 | keypoint = label['keypoints'][keypoint_idx] 80 | if keypoint[2] <= 1: 81 | self._add_gaussian(keypoint_maps[keypoint_idx], keypoint[0], keypoint[1], self._stride, self._sigma) 82 | for another_annotation in label['processed_other_annotations']: 83 | keypoint = another_annotation['keypoints'][keypoint_idx] 84 | if keypoint[2] <= 1: 85 | self._add_gaussian(keypoint_maps[keypoint_idx], keypoint[0], keypoint[1], self._stride, self._sigma) 86 | keypoint_maps[-1] = 1 - keypoint_maps.max(axis=0) 87 | return keypoint_maps 88 | 89 | def _add_gaussian(self, keypoint_map, x, y, stride, sigma): 90 | n_sigma = 4 91 | tl = [int(x - n_sigma * sigma), int(y - n_sigma * sigma)] 92 | tl[0] = max(tl[0], 0) 93 | tl[1] = max(tl[1], 0) 94 | 95 | br = [int(x + n_sigma * sigma), int(y + n_sigma * sigma)] 96 | map_h, map_w = keypoint_map.shape 97 | br[0] = min(br[0], map_w * stride) 98 | br[1] = min(br[1], map_h * stride) 99 | 100 | shift = stride / 2 - 0.5 101 | for map_y in range(tl[1] // stride, br[1] // stride): 102 | for map_x in range(tl[0] // stride, br[0] // stride): 103 | d2 = (map_x * stride + shift - x) * (map_x * stride + shift - x) + \ 104 | (map_y * stride + shift - y) * (map_y * stride + shift - y) 105 | exponent = d2 / 2 / sigma / sigma 106 | if exponent > 4.6052: # threshold, ln(100), ~0.01 107 | continue 108 | keypoint_map[map_y, map_x] += math.exp(-exponent) 109 | if keypoint_map[map_y, map_x] > 1: 110 | keypoint_map[map_y, map_x] = 1 111 | 112 | def _generate_paf_maps(self, sample): 113 | n_pafs = len(BODY_PARTS_KPT_IDS) 114 | n_rows, n_cols, _ = sample['image'].shape 115 | paf_maps = np.zeros(shape=(n_pafs * 2, n_rows // self._stride, n_cols // self._stride), dtype=np.float32) 116 | 117 | label = sample['label'] 118 | for paf_idx in range(n_pafs): 119 | keypoint_a = label['keypoints'][BODY_PARTS_KPT_IDS[paf_idx][0]] 120 | keypoint_b = label['keypoints'][BODY_PARTS_KPT_IDS[paf_idx][1]] 121 | if keypoint_a[2] <= 1 and keypoint_b[2] <= 1: 122 | self._set_paf(paf_maps[paf_idx * 2:paf_idx * 2 + 2], 123 | keypoint_a[0], keypoint_a[1], keypoint_b[0], keypoint_b[1], 124 | self._stride, self._paf_thickness) 125 | for another_annotation in label['processed_other_annotations']: 126 | keypoint_a = another_annotation['keypoints'][BODY_PARTS_KPT_IDS[paf_idx][0]] 127 | keypoint_b = another_annotation['keypoints'][BODY_PARTS_KPT_IDS[paf_idx][1]] 128 | if keypoint_a[2] <= 1 and keypoint_b[2] <= 1: 129 | self._set_paf(paf_maps[paf_idx * 2:paf_idx * 2 + 2], 130 | keypoint_a[0], keypoint_a[1], keypoint_b[0], keypoint_b[1], 131 | self._stride, self._paf_thickness) 132 | return paf_maps 133 | 134 | def _set_paf(self, paf_map, x_a, y_a, x_b, y_b, stride, thickness): 135 | x_a /= stride 136 | y_a /= stride 137 | x_b /= stride 138 | y_b /= stride 139 | x_ba = x_b - x_a 140 | y_ba = y_b - y_a 141 | _, h_map, w_map = paf_map.shape 142 | x_min = int(max(min(x_a, x_b) - thickness, 0)) 143 | x_max = int(min(max(x_a, x_b) + thickness, w_map)) 144 | y_min = int(max(min(y_a, y_b) - thickness, 0)) 145 | y_max = int(min(max(y_a, y_b) + thickness, h_map)) 146 | norm_ba = (x_ba * x_ba + y_ba * y_ba) ** 0.5 147 | if norm_ba < 1e-7: # Same points, no paf 148 | return 149 | x_ba /= norm_ba 150 | y_ba /= norm_ba 151 | 152 | for y in range(y_min, y_max): 153 | for x in range(x_min, x_max): 154 | x_ca = x - x_a 155 | y_ca = y - y_a 156 | d = math.fabs(x_ca * y_ba - y_ca * x_ba) 157 | if d <= thickness: 158 | paf_map[0, y, x] = x_ba 159 | paf_map[1, y, x] = y_ba 160 | 161 | 162 | class CocoValDataset(Dataset): 163 | def __init__(self, labels, images_folder): 164 | super().__init__() 165 | with open(labels, 'r') as f: 166 | self._labels = json.load(f) 167 | self._images_folder = images_folder 168 | 169 | def __getitem__(self, idx): 170 | file_name = self._labels['images'][idx]['file_name'] 171 | img = cv2.imread(os.path.join(self._images_folder, file_name), cv2.IMREAD_COLOR) 172 | return { 173 | 'img': img, 174 | 'file_name': file_name 175 | } 176 | 177 | def __len__(self): 178 | return len(self._labels['images']) 179 | -------------------------------------------------------------------------------- /pose-estimation/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import os 4 | 5 | import torch 6 | from torch.nn import DataParallel 7 | import torch.optim as optim 8 | from torch.utils.data import DataLoader 9 | from torchvision import transforms 10 | 11 | from datasets.coco import CocoTrainDataset 12 | from datasets.transformations import ConvertKeypoints, Scale, Rotate, CropPad, Flip 13 | from modules.get_parameters import get_parameters_conv, get_parameters_bn, get_parameters_conv_depthwise 14 | from models.with_mobilenet import PoseEstimationWithMobileNet 15 | from modules.loss import l2_loss 16 | from modules.load_state import load_state, load_from_mobilenet 17 | from val import evaluate 18 | 19 | cv2.setNumThreads(0) 20 | cv2.ocl.setUseOpenCL(False) # To prevent freeze of DataLoader 21 | 22 | 23 | def train(prepared_train_labels, train_images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, 24 | num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, 25 | val_labels, val_images_folder, val_output_name, checkpoint_after, val_after): 26 | net = PoseEstimationWithMobileNet(num_refinement_stages) 27 | 28 | stride = 8 29 | sigma = 7 30 | path_thickness = 1 31 | dataset = CocoTrainDataset(prepared_train_labels, train_images_folder, 32 | stride, sigma, path_thickness, 33 | transform=transforms.Compose([ 34 | ConvertKeypoints(), 35 | Scale(), 36 | Rotate(pad=(128, 128, 128)), 37 | CropPad(pad=(128, 128, 128)), 38 | Flip()])) 39 | train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) 40 | 41 | optimizer = optim.Adam([ 42 | {'params': get_parameters_conv(net.model, 'weight')}, 43 | {'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0}, 44 | {'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0}, 45 | {'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, 46 | {'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr}, 47 | {'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, 48 | {'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0}, 49 | {'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr}, 50 | {'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, 51 | {'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4}, 52 | {'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0}, 53 | {'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0}, 54 | {'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, 55 | ], lr=base_lr, weight_decay=5e-4) 56 | 57 | num_iter = 0 58 | current_epoch = 0 59 | drop_after_epoch = [100, 200, 260] 60 | scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333) 61 | if checkpoint_path: 62 | checkpoint = torch.load(checkpoint_path) 63 | 64 | if from_mobilenet: 65 | load_from_mobilenet(net, checkpoint) 66 | else: 67 | load_state(net, checkpoint) 68 | if not weights_only: 69 | optimizer.load_state_dict(checkpoint['optimizer']) 70 | scheduler.load_state_dict(checkpoint['scheduler']) 71 | num_iter = checkpoint['iter'] 72 | current_epoch = checkpoint['current_epoch'] 73 | 74 | net = DataParallel(net).cuda() 75 | net.train() 76 | for epochId in range(current_epoch, 280): 77 | scheduler.step() 78 | total_losses = [0, 0] * (num_refinement_stages + 1) # heatmaps loss, paf loss per stage 79 | batch_per_iter_idx = 0 80 | for batch_data in train_loader: 81 | if batch_per_iter_idx == 0: 82 | optimizer.zero_grad() 83 | 84 | images = batch_data['image'].cuda() 85 | keypoint_masks = batch_data['keypoint_mask'].cuda() 86 | paf_masks = batch_data['paf_mask'].cuda() 87 | keypoint_maps = batch_data['keypoint_maps'].cuda() 88 | paf_maps = batch_data['paf_maps'].cuda() 89 | 90 | stages_output = net(images) 91 | 92 | losses = [] 93 | for loss_idx in range(len(total_losses) // 2): 94 | losses.append(l2_loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0])) 95 | losses.append(l2_loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0])) 96 | total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter 97 | total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter 98 | 99 | loss = losses[0] 100 | for loss_idx in range(1, len(losses)): 101 | loss += losses[loss_idx] 102 | loss /= batches_per_iter 103 | loss.backward() 104 | batch_per_iter_idx += 1 105 | if batch_per_iter_idx == batches_per_iter: 106 | optimizer.step() 107 | batch_per_iter_idx = 0 108 | num_iter += 1 109 | else: 110 | continue 111 | 112 | if num_iter % log_after == 0: 113 | print('Iter: {}'.format(num_iter)) 114 | for loss_idx in range(len(total_losses) // 2): 115 | print('\n'.join(['stage{}_pafs_loss: {}', 'stage{}_heatmaps_loss: {}']).format( 116 | loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after, 117 | loss_idx + 1, total_losses[loss_idx * 2] / log_after)) 118 | for loss_idx in range(len(total_losses)): 119 | total_losses[loss_idx] = 0 120 | if num_iter % checkpoint_after == 0: 121 | snapshot_name = '{}/checkpoint_iter_{}.pth'.format(checkpoints_folder, num_iter) 122 | torch.save({'state_dict': net.module.state_dict(), 123 | 'optimizer': optimizer.state_dict(), 124 | 'scheduler': scheduler.state_dict(), 125 | 'iter': num_iter, 126 | 'current_epoch': epochId}, 127 | snapshot_name) 128 | if num_iter % val_after == 0: 129 | print('Validation...') 130 | evaluate(val_labels, val_output_name, val_images_folder, net) 131 | net.train() 132 | 133 | 134 | if __name__ == '__main__': 135 | parser = argparse.ArgumentParser() 136 | parser.add_argument('--prepared-train-labels', type=str, required=True, 137 | help='path to the file with prepared annotations') 138 | parser.add_argument('--train-images-folder', type=str, required=True, help='path to COCO train images folder') 139 | parser.add_argument('--num-refinement-stages', type=int, default=1, help='number of refinement stages') 140 | parser.add_argument('--base-lr', type=float, default=4e-5, help='initial learning rate') 141 | parser.add_argument('--batch-size', type=int, default=80, help='batch size') 142 | parser.add_argument('--batches-per-iter', type=int, default=1, help='number of batches to accumulate gradient from') 143 | parser.add_argument('--num-workers', type=int, default=8, help='number of workers') 144 | parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint to continue training from') 145 | parser.add_argument('--from-mobilenet', action='store_true', 146 | help='load weights from mobilenet feature extractor') 147 | parser.add_argument('--weights-only', action='store_true', 148 | help='just initialize layers with pre-trained weights and start training from the beginning') 149 | parser.add_argument('--experiment-name', type=str, default='default', 150 | help='experiment name to create folder for checkpoints') 151 | parser.add_argument('--log-after', type=int, default=100, help='number of iterations to print train loss') 152 | 153 | parser.add_argument('--val-labels', type=str, required=True, help='path to json with keypoints val labels') 154 | parser.add_argument('--val-images-folder', type=str, required=True, help='path to COCO val images folder') 155 | parser.add_argument('--val-output-name', type=str, default='detections.json', 156 | help='name of output json file with detected keypoints') 157 | parser.add_argument('--checkpoint-after', type=int, default=5000, 158 | help='number of iterations to save checkpoint') 159 | parser.add_argument('--val-after', type=int, default=5000, 160 | help='number of iterations to run validation') 161 | args = parser.parse_args() 162 | 163 | checkpoints_folder = '{}_checkpoints'.format(args.experiment_name) 164 | if not os.path.exists(checkpoints_folder): 165 | os.makedirs(checkpoints_folder) 166 | 167 | train(args.prepared_train_labels, args.train_images_folder, args.num_refinement_stages, args.base_lr, args.batch_size, 168 | args.batches_per_iter, args.num_workers, args.checkpoint_path, args.weights_only, args.from_mobilenet, 169 | checkpoints_folder, args.log_after, args.val_labels, args.val_images_folder, args.val_output_name, 170 | args.checkpoint_after, args.val_after) 171 | -------------------------------------------------------------------------------- /audio_to_text.py: -------------------------------------------------------------------------------- 1 | import time, logging 2 | from datetime import datetime 3 | import threading, collections, queue, os, os.path 4 | import deepspeech 5 | import numpy as np 6 | import pyaudio 7 | import wave 8 | import webrtcvad 9 | from halo import Halo 10 | from scipy import signal 11 | 12 | logging.basicConfig(level=20) 13 | 14 | class Audio(object): 15 | """Streams raw audio from microphone. Data is received in a separate thread, and stored in a buffer, to be read from.""" 16 | 17 | FORMAT = pyaudio.paInt16 18 | # Network/VAD rate-space 19 | RATE_PROCESS = 16000 20 | CHANNELS = 1 21 | BLOCKS_PER_SECOND = 50 22 | 23 | def __init__(self, callback=None, device=None, input_rate=RATE_PROCESS, file=None): 24 | def proxy_callback(in_data, frame_count, time_info, status): 25 | #pylint: disable=unused-argument 26 | if self.chunk is not None: 27 | in_data = self.wf.readframes(self.chunk) 28 | callback(in_data) 29 | return (None, pyaudio.paContinue) 30 | if callback is None: callback = lambda in_data: self.buffer_queue.put(in_data) 31 | self.buffer_queue = queue.Queue() 32 | self.device = device 33 | self.input_rate = input_rate 34 | self.sample_rate = self.RATE_PROCESS 35 | self.block_size = int(self.RATE_PROCESS / float(self.BLOCKS_PER_SECOND)) 36 | self.block_size_input = int(self.input_rate / float(self.BLOCKS_PER_SECOND)) 37 | self.pa = pyaudio.PyAudio() 38 | 39 | kwargs = { 40 | 'format': self.FORMAT, 41 | 'channels': self.CHANNELS, 42 | 'rate': self.input_rate, 43 | 'input': True, 44 | 'frames_per_buffer': self.block_size_input, 45 | 'stream_callback': proxy_callback, 46 | } 47 | 48 | self.chunk = None 49 | # if not default device 50 | if self.device: 51 | kwargs['input_device_index'] = self.device 52 | elif file is not None: 53 | self.chunk = 320 54 | self.wf = wave.open(file, 'rb') 55 | 56 | self.stream = self.pa.open(**kwargs) 57 | self.stream.start_stream() 58 | 59 | def resample(self, data, input_rate): 60 | """ 61 | Microphone may not support our native processing sampling rate, so 62 | resample from input_rate to RATE_PROCESS here for webrtcvad and 63 | deepspeech 64 | Args: 65 | data (binary): Input audio stream 66 | input_rate (int): Input audio rate to resample from 67 | """ 68 | data16 = np.fromstring(string=data, dtype=np.int16) 69 | resample_size = int(len(data16) / self.input_rate * self.RATE_PROCESS) 70 | resample = signal.resample(data16, resample_size) 71 | resample16 = np.array(resample, dtype=np.int16) 72 | return resample16.tostring() 73 | 74 | def read_resampled(self): 75 | """Return a block of audio data resampled to 16000hz, blocking if necessary.""" 76 | return self.resample(data=self.buffer_queue.get(), 77 | input_rate=self.input_rate) 78 | 79 | def read(self): 80 | """Return a block of audio data, blocking if necessary.""" 81 | return self.buffer_queue.get() 82 | 83 | def destroy(self): 84 | self.stream.stop_stream() 85 | self.stream.close() 86 | self.pa.terminate() 87 | 88 | frame_duration_ms = property(lambda self: 1000 * self.block_size // self.sample_rate) 89 | 90 | def write_wav(self, filename, data): 91 | logging.info("write wav %s", filename) 92 | wf = wave.open(filename, 'wb') 93 | wf.setnchannels(self.CHANNELS) 94 | # wf.setsampwidth(self.pa.get_sample_size(FORMAT)) 95 | assert self.FORMAT == pyaudio.paInt16 96 | wf.setsampwidth(2) 97 | wf.setframerate(self.sample_rate) 98 | wf.writeframes(data) 99 | wf.close() 100 | 101 | 102 | class VADAudio(Audio): 103 | """Filter & segment audio with voice activity detection.""" 104 | 105 | def __init__(self, aggressiveness=3, device=None, input_rate=None, file=None): 106 | super().__init__(device=device, input_rate=input_rate, file=file) 107 | self.vad = webrtcvad.Vad(aggressiveness) 108 | 109 | def frame_generator(self): 110 | """Generator that yields all audio frames from microphone.""" 111 | if self.input_rate == self.RATE_PROCESS: 112 | while True: 113 | yield self.read() 114 | else: 115 | while True: 116 | yield self.read_resampled() 117 | 118 | def vad_collector(self, padding_ms=300, ratio=0.75, frames=None): 119 | """Generator that yields series of consecutive audio frames comprising each utterence, separated by yielding a single None. 120 | Determines voice activity by ratio of frames in padding_ms. Uses a buffer to include padding_ms prior to being triggered. 121 | Example: (frame, ..., frame, None, frame, ..., frame, None, ...) 122 | |---utterence---| |---utterence---| 123 | """ 124 | if frames is None: frames = self.frame_generator() 125 | num_padding_frames = padding_ms // self.frame_duration_ms 126 | ring_buffer = collections.deque(maxlen=num_padding_frames) 127 | triggered = False 128 | 129 | for frame in frames: 130 | if len(frame) < 640: 131 | return 132 | 133 | is_speech = self.vad.is_speech(frame, self.sample_rate) 134 | 135 | if not triggered: 136 | ring_buffer.append((frame, is_speech)) 137 | num_voiced = len([f for f, speech in ring_buffer if speech]) 138 | if num_voiced > ratio * ring_buffer.maxlen: 139 | triggered = True 140 | for f, s in ring_buffer: 141 | yield f 142 | ring_buffer.clear() 143 | 144 | else: 145 | yield frame 146 | ring_buffer.append((frame, is_speech)) 147 | num_unvoiced = len([f for f, speech in ring_buffer if not speech]) 148 | if num_unvoiced > ratio * ring_buffer.maxlen: 149 | triggered = False 150 | yield None 151 | ring_buffer.clear() 152 | 153 | def main(ARGS, subject): 154 | # Load DeepSpeech model 155 | if os.path.isdir(ARGS.model): 156 | model_dir = ARGS.model 157 | ARGS.model = os.path.join(model_dir, 'output_graph.pb') 158 | ARGS.scorer = os.path.join(model_dir, ARGS.scorer) 159 | 160 | print('Initializing model...') 161 | logging.info("ARGS.model: %s", ARGS.model) 162 | model = deepspeech.Model(ARGS.model) 163 | if ARGS.scorer: 164 | logging.info("ARGS.scorer: %s", ARGS.scorer) 165 | model.enableExternalScorer(ARGS.scorer) 166 | 167 | # Start audio with VAD 168 | vad_audio = VADAudio(aggressiveness=ARGS.vad_aggressiveness, 169 | device=ARGS.device, 170 | input_rate=ARGS.rate, 171 | file=ARGS.file) 172 | print("Listening (ctrl-C to exit)...") 173 | frames = vad_audio.vad_collector() 174 | 175 | # Stream from microphone to DeepSpeech using VAD 176 | spinner = None 177 | if not ARGS.nospinner: 178 | spinner = Halo(spinner='line') 179 | stream_context = model.createStream() 180 | wav_data = bytearray() 181 | for frame in frames: 182 | if frame is not None: 183 | if spinner: spinner.start() 184 | logging.debug("streaming frame") 185 | stream_context.feedAudioContent(np.frombuffer(frame, np.int16)) 186 | if ARGS.savewav: wav_data.extend(frame) 187 | else: 188 | if spinner: spinner.stop() 189 | logging.debug("end utterence") 190 | if ARGS.savewav: 191 | vad_audio.write_wav(os.path.join(ARGS.savewav, datetime.now().strftime("savewav_%Y-%m-%d_%H-%M-%S_%f.wav")), wav_data) 192 | wav_data = bytearray() 193 | text = stream_context.finishStream() 194 | print("Recognized: %s" % text) 195 | text_file = open(subject+'.txt','w') 196 | n = text_file.write(text) 197 | text_file.close 198 | stream_context = model.createStream() 199 | 200 | if __name__ == '__main__': 201 | DEFAULT_SAMPLE_RATE = 16000 202 | 203 | import argparse 204 | parser = argparse.ArgumentParser(description="Stream from microphone to DeepSpeech using VAD") 205 | 206 | parser.add_argument('-v', '--vad_aggressiveness', type=int, default=3, 207 | help="Set aggressiveness of VAD: an integer between 0 and 3, 0 being the least aggressive about filtering out non-speech, 3 the most aggressive. Default: 3") 208 | parser.add_argument('--nospinner', action='store_true', 209 | help="Disable spinner") 210 | parser.add_argument('-w', '--savewav', 211 | help="Save .wav files of utterences to given directory") 212 | parser.add_argument('-f', '--file', 213 | help="Read from .wav file instead of microphone") 214 | 215 | parser.add_argument('-m', '--model', required=True, 216 | help="Path to the model (protocol buffer binary file, or entire directory containing all standard-named files for model)") 217 | parser.add_argument('-s', '--scorer', 218 | help="Path to the external scorer file.") 219 | parser.add_argument('-d', '--device', type=int, default=None, 220 | help="Device input index (Int) as listed by pyaudio.PyAudio.get_device_info_by_index(). If not provided, falls back to PyAudio.get_default_device().") 221 | parser.add_argument('-r', '--rate', type=int, default=DEFAULT_SAMPLE_RATE, 222 | help=f"Input device sample rate. Default: {DEFAULT_SAMPLE_RATE}. Your device may require 44100.") 223 | 224 | ARGS = parser.parse_args() 225 | if ARGS.savewav: os.makedirs(ARGS.savewav, exist_ok=True) 226 | main(ARGS, subject) 227 | -------------------------------------------------------------------------------- /pose-estimation/modules/keypoints.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | from operator import itemgetter 4 | 5 | BODY_PARTS_KPT_IDS = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], 6 | [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 16], [5, 17]] 7 | BODY_PARTS_PAF_IDS = ([12, 13], [20, 21], [14, 15], [16, 17], [22, 23], [24, 25], [0, 1], [2, 3], [4, 5], 8 | [6, 7], [8, 9], [10, 11], [28, 29], [30, 31], [34, 35], [32, 33], [36, 37], [18, 19], [26, 27]) 9 | 10 | 11 | def linspace2d(start, stop, n=10): 12 | points = 1 / (n - 1) * (stop - start) 13 | return points[:, None] * np.arange(n) + start[:, None] 14 | 15 | 16 | def extract_keypoints(heatmap, all_keypoints, total_keypoint_num): 17 | heatmap[heatmap < 0.1] = 0 18 | heatmap_with_borders = np.pad(heatmap, [(2, 2), (2, 2)], mode='constant') 19 | heatmap_center = heatmap_with_borders[1:heatmap_with_borders.shape[0]-1, 1:heatmap_with_borders.shape[1]-1] 20 | heatmap_left = heatmap_with_borders[1:heatmap_with_borders.shape[0]-1, 2:heatmap_with_borders.shape[1]] 21 | heatmap_right = heatmap_with_borders[1:heatmap_with_borders.shape[0]-1, 0:heatmap_with_borders.shape[1]-2] 22 | heatmap_up = heatmap_with_borders[2:heatmap_with_borders.shape[0], 1:heatmap_with_borders.shape[1]-1] 23 | heatmap_down = heatmap_with_borders[0:heatmap_with_borders.shape[0]-2, 1:heatmap_with_borders.shape[1]-1] 24 | 25 | heatmap_peaks = (heatmap_center > heatmap_left) &\ 26 | (heatmap_center > heatmap_right) &\ 27 | (heatmap_center > heatmap_up) &\ 28 | (heatmap_center > heatmap_down) 29 | heatmap_peaks = heatmap_peaks[1:heatmap_center.shape[0]-1, 1:heatmap_center.shape[1]-1] 30 | keypoints = list(zip(np.nonzero(heatmap_peaks)[1], np.nonzero(heatmap_peaks)[0])) # (w, h) 31 | keypoints = sorted(keypoints, key=itemgetter(0)) 32 | 33 | suppressed = np.zeros(len(keypoints), np.uint8) 34 | keypoints_with_score_and_id = [] 35 | keypoint_num = 0 36 | for i in range(len(keypoints)): 37 | if suppressed[i]: 38 | continue 39 | for j in range(i+1, len(keypoints)): 40 | if math.sqrt((keypoints[i][0] - keypoints[j][0]) ** 2 + 41 | (keypoints[i][1] - keypoints[j][1]) ** 2) < 6: 42 | suppressed[j] = 1 43 | keypoint_with_score_and_id = (keypoints[i][0], keypoints[i][1], heatmap[keypoints[i][1], keypoints[i][0]], 44 | total_keypoint_num + keypoint_num) 45 | keypoints_with_score_and_id.append(keypoint_with_score_and_id) 46 | keypoint_num += 1 47 | all_keypoints.append(keypoints_with_score_and_id) 48 | return keypoint_num 49 | 50 | 51 | def group_keypoints(all_keypoints_by_type, pafs, pose_entry_size=20, min_paf_score=0.05, demo=False): 52 | pose_entries = [] 53 | all_keypoints = np.array([item for sublist in all_keypoints_by_type for item in sublist]) 54 | for part_id in range(len(BODY_PARTS_PAF_IDS)): 55 | part_pafs = pafs[:, :, BODY_PARTS_PAF_IDS[part_id]] 56 | kpts_a = all_keypoints_by_type[BODY_PARTS_KPT_IDS[part_id][0]] 57 | kpts_b = all_keypoints_by_type[BODY_PARTS_KPT_IDS[part_id][1]] 58 | num_kpts_a = len(kpts_a) 59 | num_kpts_b = len(kpts_b) 60 | kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0] 61 | kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1] 62 | 63 | if num_kpts_a == 0 and num_kpts_b == 0: # no keypoints for such body part 64 | continue 65 | elif num_kpts_a == 0: # body part has just 'b' keypoints 66 | for i in range(num_kpts_b): 67 | num = 0 68 | for j in range(len(pose_entries)): # check if already in some pose, was added by another body part 69 | if pose_entries[j][kpt_b_id] == kpts_b[i][3]: 70 | num += 1 71 | continue 72 | if num == 0: 73 | pose_entry = np.ones(pose_entry_size) * -1 74 | pose_entry[kpt_b_id] = kpts_b[i][3] # keypoint idx 75 | pose_entry[-1] = 1 # num keypoints in pose 76 | pose_entry[-2] = kpts_b[i][2] # pose score 77 | pose_entries.append(pose_entry) 78 | continue 79 | elif num_kpts_b == 0: # body part has just 'a' keypoints 80 | for i in range(num_kpts_a): 81 | num = 0 82 | for j in range(len(pose_entries)): 83 | if pose_entries[j][kpt_a_id] == kpts_a[i][3]: 84 | num += 1 85 | continue 86 | if num == 0: 87 | pose_entry = np.ones(pose_entry_size) * -1 88 | pose_entry[kpt_a_id] = kpts_a[i][3] 89 | pose_entry[-1] = 1 90 | pose_entry[-2] = kpts_a[i][2] 91 | pose_entries.append(pose_entry) 92 | continue 93 | 94 | connections = [] 95 | for i in range(num_kpts_a): 96 | kpt_a = np.array(kpts_a[i][0:2]) 97 | for j in range(num_kpts_b): 98 | kpt_b = np.array(kpts_b[j][0:2]) 99 | mid_point = [(), ()] 100 | mid_point[0] = (int(round((kpt_a[0] + kpt_b[0]) * 0.5)), 101 | int(round((kpt_a[1] + kpt_b[1]) * 0.5))) 102 | mid_point[1] = mid_point[0] 103 | 104 | vec = [kpt_b[0] - kpt_a[0], kpt_b[1] - kpt_a[1]] 105 | vec_norm = math.sqrt(vec[0] ** 2 + vec[1] ** 2) 106 | if vec_norm == 0: 107 | continue 108 | vec[0] /= vec_norm 109 | vec[1] /= vec_norm 110 | cur_point_score = (vec[0] * part_pafs[mid_point[0][1], mid_point[0][0], 0] + 111 | vec[1] * part_pafs[mid_point[1][1], mid_point[1][0], 1]) 112 | 113 | height_n = pafs.shape[0] // 2 114 | success_ratio = 0 115 | point_num = 10 # number of points to integration over paf 116 | if cur_point_score > -100: 117 | passed_point_score = 0 118 | passed_point_num = 0 119 | x, y = linspace2d(kpt_a, kpt_b) 120 | for point_idx in range(point_num): 121 | if not demo: 122 | px = int(round(x[point_idx])) 123 | py = int(round(y[point_idx])) 124 | else: 125 | px = int(x[point_idx]) 126 | py = int(y[point_idx]) 127 | paf = part_pafs[py, px, 0:2] 128 | cur_point_score = vec[0] * paf[0] + vec[1] * paf[1] 129 | if cur_point_score > min_paf_score: 130 | passed_point_score += cur_point_score 131 | passed_point_num += 1 132 | success_ratio = passed_point_num / point_num 133 | ratio = 0 134 | if passed_point_num > 0: 135 | ratio = passed_point_score / passed_point_num 136 | ratio += min(height_n / vec_norm - 1, 0) 137 | if ratio > 0 and success_ratio > 0.8: 138 | score_all = ratio + kpts_a[i][2] + kpts_b[j][2] 139 | connections.append([i, j, ratio, score_all]) 140 | if len(connections) > 0: 141 | connections = sorted(connections, key=itemgetter(2), reverse=True) 142 | 143 | num_connections = min(num_kpts_a, num_kpts_b) 144 | has_kpt_a = np.zeros(num_kpts_a, dtype=np.int32) 145 | has_kpt_b = np.zeros(num_kpts_b, dtype=np.int32) 146 | filtered_connections = [] 147 | for row in range(len(connections)): 148 | if len(filtered_connections) == num_connections: 149 | break 150 | i, j, cur_point_score = connections[row][0:3] 151 | if not has_kpt_a[i] and not has_kpt_b[j]: 152 | filtered_connections.append([kpts_a[i][3], kpts_b[j][3], cur_point_score]) 153 | has_kpt_a[i] = 1 154 | has_kpt_b[j] = 1 155 | connections = filtered_connections 156 | if len(connections) == 0: 157 | continue 158 | 159 | if part_id == 0: 160 | pose_entries = [np.ones(pose_entry_size) * -1 for _ in range(len(connections))] 161 | for i in range(len(connections)): 162 | pose_entries[i][BODY_PARTS_KPT_IDS[0][0]] = connections[i][0] 163 | pose_entries[i][BODY_PARTS_KPT_IDS[0][1]] = connections[i][1] 164 | pose_entries[i][-1] = 2 165 | pose_entries[i][-2] = np.sum(all_keypoints[connections[i][0:2], 2]) + connections[i][2] 166 | elif part_id == 17 or part_id == 18: 167 | kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0] 168 | kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1] 169 | for i in range(len(connections)): 170 | for j in range(len(pose_entries)): 171 | if pose_entries[j][kpt_a_id] == connections[i][0] and pose_entries[j][kpt_b_id] == -1: 172 | pose_entries[j][kpt_b_id] = connections[i][1] 173 | elif pose_entries[j][kpt_b_id] == connections[i][1] and pose_entries[j][kpt_a_id] == -1: 174 | pose_entries[j][kpt_a_id] = connections[i][0] 175 | continue 176 | else: 177 | kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0] 178 | kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1] 179 | for i in range(len(connections)): 180 | num = 0 181 | for j in range(len(pose_entries)): 182 | if pose_entries[j][kpt_a_id] == connections[i][0]: 183 | pose_entries[j][kpt_b_id] = connections[i][1] 184 | num += 1 185 | pose_entries[j][-1] += 1 186 | pose_entries[j][-2] += all_keypoints[connections[i][1], 2] + connections[i][2] 187 | if num == 0: 188 | pose_entry = np.ones(pose_entry_size) * -1 189 | pose_entry[kpt_a_id] = connections[i][0] 190 | pose_entry[kpt_b_id] = connections[i][1] 191 | pose_entry[-1] = 2 192 | pose_entry[-2] = np.sum(all_keypoints[connections[i][0:2], 2]) + connections[i][2] 193 | pose_entries.append(pose_entry) 194 | 195 | filtered_entries = [] 196 | for i in range(len(pose_entries)): 197 | if pose_entries[i][-1] < 3 or (pose_entries[i][-2] / pose_entries[i][-1] < 0.2): 198 | continue 199 | filtered_entries.append(pose_entries[i]) 200 | pose_entries = np.asarray(filtered_entries) 201 | return pose_entries, all_keypoints 202 | -------------------------------------------------------------------------------- /pose-estimation/datasets/transformations.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | 7 | class ConvertKeypoints: 8 | def __call__(self, sample): 9 | label = sample['label'] 10 | h, w, _ = sample['image'].shape 11 | keypoints = label['keypoints'] 12 | for keypoint in keypoints: # keypoint[2] == 0: occluded, == 1: visible, == 2: not in image 13 | if keypoint[0] == keypoint[1] == 0: 14 | keypoint[2] = 2 15 | if (keypoint[0] < 0 16 | or keypoint[0] >= w 17 | or keypoint[1] < 0 18 | or keypoint[1] >= h): 19 | keypoint[2] = 2 20 | for other_label in label['processed_other_annotations']: 21 | keypoints = other_label['keypoints'] 22 | for keypoint in keypoints: 23 | if keypoint[0] == keypoint[1] == 0: 24 | keypoint[2] = 2 25 | if (keypoint[0] < 0 26 | or keypoint[0] >= w 27 | or keypoint[1] < 0 28 | or keypoint[1] >= h): 29 | keypoint[2] = 2 30 | label['keypoints'] = self._convert(label['keypoints'], w, h) 31 | 32 | for other_label in label['processed_other_annotations']: 33 | other_label['keypoints'] = self._convert(other_label['keypoints'], w, h) 34 | return sample 35 | 36 | def _convert(self, keypoints, w, h): 37 | # Nose, Neck, R hand, L hand, R leg, L leg, Eyes, Ears 38 | reorder_map = [1, 7, 9, 11, 6, 8, 10, 13, 15, 17, 12, 14, 16, 3, 2, 5, 4] 39 | converted_keypoints = list(keypoints[i - 1] for i in reorder_map) 40 | converted_keypoints.insert(1, [(keypoints[5][0] + keypoints[6][0]) / 2, 41 | (keypoints[5][1] + keypoints[6][1]) / 2, 0]) # Add neck as a mean of shoulders 42 | if keypoints[5][2] == 2 or keypoints[6][2] == 2: 43 | converted_keypoints[1][2] = 2 44 | elif keypoints[5][2] == 1 and keypoints[6][2] == 1: 45 | converted_keypoints[1][2] = 1 46 | if (converted_keypoints[1][0] < 0 47 | or converted_keypoints[1][0] >= w 48 | or converted_keypoints[1][1] < 0 49 | or converted_keypoints[1][1] >= h): 50 | converted_keypoints[1][2] = 2 51 | return converted_keypoints 52 | 53 | 54 | class Scale: 55 | def __init__(self, prob=1, min_scale=0.5, max_scale=1.1, target_dist=0.6): 56 | self._prob = prob 57 | self._min_scale = min_scale 58 | self._max_scale = max_scale 59 | self._target_dist = target_dist 60 | 61 | def __call__(self, sample): 62 | prob = random.random() 63 | scale_multiplier = 1 64 | if prob <= self._prob: 65 | prob = random.random() 66 | scale_multiplier = (self._max_scale - self._min_scale) * prob + self._min_scale 67 | label = sample['label'] 68 | scale_abs = self._target_dist / label['scale_provided'] 69 | scale = scale_abs * scale_multiplier 70 | sample['image'] = cv2.resize(sample['image'], dsize=(0, 0), fx=scale, fy=scale) 71 | label['img_height'], label['img_width'], _ = sample['image'].shape 72 | sample['mask'] = cv2.resize(sample['mask'], dsize=(0, 0), fx=scale, fy=scale) 73 | 74 | label['objpos'][0] *= scale 75 | label['objpos'][1] *= scale 76 | for keypoint in sample['label']['keypoints']: 77 | keypoint[0] *= scale 78 | keypoint[1] *= scale 79 | for other_annotation in sample['label']['processed_other_annotations']: 80 | other_annotation['objpos'][0] *= scale 81 | other_annotation['objpos'][1] *= scale 82 | for keypoint in other_annotation['keypoints']: 83 | keypoint[0] *= scale 84 | keypoint[1] *= scale 85 | return sample 86 | 87 | 88 | class Rotate: 89 | def __init__(self, pad, max_rotate_degree=40): 90 | self._pad = pad 91 | self._max_rotate_degree = max_rotate_degree 92 | 93 | def __call__(self, sample): 94 | prob = random.random() 95 | degree = (prob - 0.5) * 2 * self._max_rotate_degree 96 | h, w, _ = sample['image'].shape 97 | img_center = (w / 2, h / 2) 98 | R = cv2.getRotationMatrix2D(img_center, degree, 1) 99 | 100 | abs_cos = abs(R[0, 0]) 101 | abs_sin = abs(R[0, 1]) 102 | 103 | bound_w = int(h * abs_sin + w * abs_cos) 104 | bound_h = int(h * abs_cos + w * abs_sin) 105 | dsize = (bound_w, bound_h) 106 | 107 | R[0, 2] += dsize[0] / 2 - img_center[0] 108 | R[1, 2] += dsize[1] / 2 - img_center[1] 109 | sample['image'] = cv2.warpAffine(sample['image'], R, dsize=dsize, 110 | borderMode=cv2.BORDER_CONSTANT, borderValue=self._pad) 111 | sample['label']['img_height'], sample['label']['img_width'], _ = sample['image'].shape 112 | sample['mask'] = cv2.warpAffine(sample['mask'], R, dsize=dsize, 113 | borderMode=cv2.BORDER_CONSTANT, borderValue=(1, 1, 1)) # border is ok 114 | label = sample['label'] 115 | label['objpos'] = self._rotate(label['objpos'], R) 116 | for keypoint in label['keypoints']: 117 | point = [keypoint[0], keypoint[1]] 118 | point = self._rotate(point, R) 119 | keypoint[0], keypoint[1] = point[0], point[1] 120 | for other_annotation in label['processed_other_annotations']: 121 | for keypoint in other_annotation['keypoints']: 122 | point = [keypoint[0], keypoint[1]] 123 | point = self._rotate(point, R) 124 | keypoint[0], keypoint[1] = point[0], point[1] 125 | return sample 126 | 127 | def _rotate(self, point, R): 128 | return [R[0, 0] * point[0] + R[0, 1] * point[1] + R[0, 2], 129 | R[1, 0] * point[0] + R[1, 1] * point[1] + R[1, 2]] 130 | 131 | 132 | class CropPad: 133 | def __init__(self, pad, center_perterb_max=40, crop_x=368, crop_y=368): 134 | self._pad = pad 135 | self._center_perterb_max = center_perterb_max 136 | self._crop_x = crop_x 137 | self._crop_y = crop_y 138 | 139 | def __call__(self, sample): 140 | prob_x = random.random() 141 | prob_y = random.random() 142 | 143 | offset_x = int((prob_x - 0.5) * 2 * self._center_perterb_max) 144 | offset_y = int((prob_y - 0.5) * 2 * self._center_perterb_max) 145 | label = sample['label'] 146 | shifted_center = (label['objpos'][0] + offset_x, label['objpos'][1] + offset_y) 147 | offset_left = -int(shifted_center[0] - self._crop_x / 2) 148 | offset_up = -int(shifted_center[1] - self._crop_y / 2) 149 | 150 | cropped_image = np.empty(shape=(self._crop_y, self._crop_x, 3), dtype=np.uint8) 151 | for i in range(3): 152 | cropped_image[:, :, i].fill(self._pad[i]) 153 | cropped_mask = np.empty(shape=(self._crop_y, self._crop_x), dtype=np.uint8) 154 | cropped_mask.fill(1) 155 | 156 | image_x_start = int(shifted_center[0] - self._crop_x / 2) 157 | image_y_start = int(shifted_center[1] - self._crop_y / 2) 158 | image_x_finish = image_x_start + self._crop_x 159 | image_y_finish = image_y_start + self._crop_y 160 | crop_x_start = 0 161 | crop_y_start = 0 162 | crop_x_finish = self._crop_x 163 | crop_y_finish = self._crop_y 164 | 165 | w, h = label['img_width'], label['img_height'] 166 | should_crop = True 167 | if image_x_start < 0: # Adjust crop area 168 | crop_x_start -= image_x_start 169 | image_x_start = 0 170 | if image_x_start >= w: 171 | should_crop = False 172 | 173 | if image_y_start < 0: 174 | crop_y_start -= image_y_start 175 | image_y_start = 0 176 | if image_y_start >= w: 177 | should_crop = False 178 | 179 | if image_x_finish > w: 180 | diff = image_x_finish - w 181 | image_x_finish -= diff 182 | crop_x_finish -= diff 183 | if image_x_finish < 0: 184 | should_crop = False 185 | 186 | if image_y_finish > h: 187 | diff = image_y_finish - h 188 | image_y_finish -= diff 189 | crop_y_finish -= diff 190 | if image_y_finish < 0: 191 | should_crop = False 192 | 193 | if should_crop: 194 | cropped_image[crop_y_start:crop_y_finish, crop_x_start:crop_x_finish, :] =\ 195 | sample['image'][image_y_start:image_y_finish, image_x_start:image_x_finish, :] 196 | cropped_mask[crop_y_start:crop_y_finish, crop_x_start:crop_x_finish] =\ 197 | sample['mask'][image_y_start:image_y_finish, image_x_start:image_x_finish] 198 | 199 | sample['image'] = cropped_image 200 | sample['mask'] = cropped_mask 201 | label['img_width'] = self._crop_x 202 | label['img_height'] = self._crop_y 203 | 204 | label['objpos'][0] += offset_left 205 | label['objpos'][1] += offset_up 206 | for keypoint in label['keypoints']: 207 | keypoint[0] += offset_left 208 | keypoint[1] += offset_up 209 | for other_annotation in label['processed_other_annotations']: 210 | for keypoint in other_annotation['keypoints']: 211 | keypoint[0] += offset_left 212 | keypoint[1] += offset_up 213 | 214 | return sample 215 | 216 | def _inside(self, point, width, height): 217 | if point[0] < 0 or point[1] < 0: 218 | return False 219 | if point[0] >= width or point[1] >= height: 220 | return False 221 | return True 222 | 223 | 224 | class Flip: 225 | def __init__(self, prob=0.5): 226 | self._prob = prob 227 | 228 | def __call__(self, sample): 229 | prob = random.random() 230 | do_flip = prob <= self._prob 231 | if not do_flip: 232 | return sample 233 | 234 | sample['image'] = cv2.flip(sample['image'], 1) 235 | sample['mask'] = cv2.flip(sample['mask'], 1) 236 | 237 | label = sample['label'] 238 | w, h = label['img_width'], label['img_height'] 239 | label['objpos'][0] = w - 1 - label['objpos'][0] 240 | for keypoint in label['keypoints']: 241 | keypoint[0] = w - 1 - keypoint[0] 242 | label['keypoints'] = self._swap_left_right(label['keypoints']) 243 | 244 | for other_annotation in label['processed_other_annotations']: 245 | other_annotation['objpos'][0] = w - 1 - other_annotation['objpos'][0] 246 | for keypoint in other_annotation['keypoints']: 247 | keypoint[0] = w - 1 - keypoint[0] 248 | other_annotation['keypoints'] = self._swap_left_right(other_annotation['keypoints']) 249 | 250 | return sample 251 | 252 | def _swap_left_right(self, keypoints): 253 | right = [2, 3, 4, 8, 9, 10, 14, 16] 254 | left = [5, 6, 7, 11, 12, 13, 15, 17] 255 | for r, l in zip(right, left): 256 | keypoints[r], keypoints[l] = keypoints[l], keypoints[r] 257 | return keypoints 258 | -------------------------------------------------------------------------------- /emotion-detection/BlazeFace_PyTorch/blazeface.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class BlazeBlock(nn.Module): 8 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1): 9 | super(BlazeBlock, self).__init__() 10 | 11 | self.stride = stride 12 | self.channel_pad = out_channels - in_channels 13 | 14 | # TFLite uses slightly different padding than PyTorch 15 | # on the depthwise conv layer when the stride is 2. 16 | if stride == 2: 17 | self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride) 18 | padding = 0 19 | else: 20 | padding = (kernel_size - 1) // 2 21 | 22 | self.convs = nn.Sequential( 23 | nn.Conv2d(in_channels=in_channels, out_channels=in_channels, 24 | kernel_size=kernel_size, stride=stride, padding=padding, 25 | groups=in_channels, bias=True), 26 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 27 | kernel_size=1, stride=1, padding=0, bias=True), 28 | ) 29 | 30 | self.act = nn.ReLU(inplace=True) 31 | 32 | def forward(self, x): 33 | if self.stride == 2: 34 | h = F.pad(x, (0, 2, 0, 2), "constant", 0) 35 | x = self.max_pool(x) 36 | else: 37 | h = x 38 | 39 | if self.channel_pad > 0: 40 | x = F.pad(x, (0, 0, 0, 0, 0, self.channel_pad), "constant", 0) 41 | 42 | return self.act(self.convs(h) + x) 43 | 44 | 45 | class BlazeFace(nn.Module): 46 | """The BlazeFace face detection model from MediaPipe. 47 | 48 | The version from MediaPipe is simpler than the one in the paper; 49 | it does not use the "double" BlazeBlocks. 50 | 51 | Because we won't be training this model, it doesn't need to have 52 | batchnorm layers. These have already been "folded" into the conv 53 | weights by TFLite. 54 | 55 | The conversion to PyTorch is fairly straightforward, but there are 56 | some small differences between TFLite and PyTorch in how they handle 57 | padding on conv layers with stride 2. 58 | 59 | This version works on batches, while the MediaPipe version can only 60 | handle a single image at a time. 61 | 62 | Based on code from https://github.com/tkat0/PyTorch_BlazeFace/ and 63 | https://github.com/google/mediapipe/ 64 | """ 65 | input_size = (128, 128) 66 | 67 | def __init__(self): 68 | super(BlazeFace, self).__init__() 69 | 70 | # These are the settings from the MediaPipe example graph 71 | # mediapipe/graphs/face_detection/face_detection_mobile_gpu.pbtxt 72 | self.num_classes = 1 73 | self.num_anchors = 896 74 | self.num_coords = 16 75 | self.score_clipping_thresh = 100.0 76 | self.x_scale = 128.0 77 | self.y_scale = 128.0 78 | self.h_scale = 128.0 79 | self.w_scale = 128.0 80 | self.min_score_thresh = 0.75 81 | self.min_suppression_threshold = 0.3 82 | 83 | self._define_layers() 84 | 85 | def _define_layers(self): 86 | self.backbone1 = nn.Sequential( 87 | nn.Conv2d(in_channels=3, out_channels=24, kernel_size=5, stride=2, padding=0, bias=True), 88 | nn.ReLU(inplace=True), 89 | 90 | BlazeBlock(24, 24), 91 | BlazeBlock(24, 28), 92 | BlazeBlock(28, 32, stride=2), 93 | BlazeBlock(32, 36), 94 | BlazeBlock(36, 42), 95 | BlazeBlock(42, 48, stride=2), 96 | BlazeBlock(48, 56), 97 | BlazeBlock(56, 64), 98 | BlazeBlock(64, 72), 99 | BlazeBlock(72, 80), 100 | BlazeBlock(80, 88), 101 | ) 102 | 103 | self.backbone2 = nn.Sequential( 104 | BlazeBlock(88, 96, stride=2), 105 | BlazeBlock(96, 96), 106 | BlazeBlock(96, 96), 107 | BlazeBlock(96, 96), 108 | BlazeBlock(96, 96), 109 | ) 110 | 111 | self.classifier_8 = nn.Conv2d(88, 2, 1, bias=True) 112 | self.classifier_16 = nn.Conv2d(96, 6, 1, bias=True) 113 | 114 | self.regressor_8 = nn.Conv2d(88, 32, 1, bias=True) 115 | self.regressor_16 = nn.Conv2d(96, 96, 1, bias=True) 116 | 117 | def forward(self, x): 118 | # TFLite uses slightly different padding on the first conv layer 119 | # than PyTorch, so do it manually. 120 | x = F.pad(x, (1, 2, 1, 2), "constant", 0) 121 | 122 | b = x.shape[0] # batch size, needed for reshaping later 123 | 124 | x = self.backbone1(x) # (b, 88, 16, 16) 125 | h = self.backbone2(x) # (b, 96, 8, 8) 126 | 127 | # Note: Because PyTorch is NCHW but TFLite is NHWC, we need to 128 | # permute the output from the conv layers before reshaping it. 129 | 130 | c1 = self.classifier_8(x) # (b, 2, 16, 16) 131 | c1 = c1.permute(0, 2, 3, 1) # (b, 16, 16, 2) 132 | c1 = c1.reshape(b, -1, 1) # (b, 512, 1) 133 | 134 | c2 = self.classifier_16(h) # (b, 6, 8, 8) 135 | c2 = c2.permute(0, 2, 3, 1) # (b, 8, 8, 6) 136 | c2 = c2.reshape(b, -1, 1) # (b, 384, 1) 137 | 138 | c = torch.cat((c1, c2), dim=1) # (b, 896, 1) 139 | 140 | r1 = self.regressor_8(x) # (b, 32, 16, 16) 141 | r1 = r1.permute(0, 2, 3, 1) # (b, 16, 16, 32) 142 | r1 = r1.reshape(b, -1, 16) # (b, 512, 16) 143 | 144 | r2 = self.regressor_16(h) # (b, 96, 8, 8) 145 | r2 = r2.permute(0, 2, 3, 1) # (b, 8, 8, 96) 146 | r2 = r2.reshape(b, -1, 16) # (b, 384, 16) 147 | 148 | r = torch.cat((r1, r2), dim=1) # (b, 896, 16) 149 | return [r, c] 150 | 151 | def _device(self): 152 | """Which device (CPU or GPU) is being used by this model?""" 153 | return self.classifier_8.weight.device 154 | 155 | def load_weights(self, path): 156 | self.load_state_dict(torch.load(path)) 157 | self.eval() 158 | 159 | def load_anchors(self, path): 160 | self.anchors = torch.tensor(np.load(path), dtype=torch.float32, device=self._device()) 161 | assert(self.anchors.ndimension() == 2) 162 | assert(self.anchors.shape[0] == self.num_anchors) 163 | assert(self.anchors.shape[1] == 4) 164 | 165 | def _preprocess(self, x): 166 | """Converts the image pixels to the range [-1, 1].""" 167 | return x.float() / 127.5 - 1.0 168 | 169 | def predict_on_image(self, img): 170 | """Makes a prediction on a single image. 171 | 172 | Arguments: 173 | img: a NumPy array of shape (H, W, 3) or a PyTorch tensor of 174 | shape (3, H, W). The image's height and width should be 175 | 128 pixels. 176 | 177 | Returns: 178 | A tensor with face detections. 179 | """ 180 | if isinstance(img, np.ndarray): 181 | img = torch.from_numpy(img).permute((2, 0, 1)) 182 | 183 | return self.predict_on_batch(img.unsqueeze(0))[0] 184 | 185 | def predict_on_batch(self, x, apply_nms=True): 186 | """Makes a prediction on a batch of images. 187 | 188 | Arguments: 189 | x: a NumPy array of shape (b, H, W, 3) or a PyTorch tensor of 190 | shape (b, 3, H, W). The height and width should be 128 pixels. 191 | apply_nms: pass False to not apply non-max suppression 192 | 193 | Returns: 194 | A list containing a tensor of face detections for each image in 195 | the batch. If no faces are found for an image, returns a tensor 196 | of shape (0, 17). 197 | 198 | Each face detection is a PyTorch tensor consisting of 17 numbers: 199 | - ymin, xmin, ymax, xmax 200 | - x,y-coordinates for the 6 keypoints 201 | - confidence score 202 | """ 203 | if isinstance(x, np.ndarray): 204 | x = torch.from_numpy(x).permute((0, 3, 1, 2)) 205 | 206 | assert x.shape[1] == 3 207 | assert x.shape[2] == 128 208 | assert x.shape[3] == 128 209 | 210 | # 1. Preprocess the images into tensors: 211 | x = x.to(self._device()) 212 | x = self._preprocess(x) 213 | 214 | # 2. Run the neural network: 215 | with torch.no_grad(): 216 | out = self.__call__(x) 217 | 218 | # 3. Postprocess the raw predictions: 219 | detections = self._tensors_to_detections(out[0], out[1], self.anchors) 220 | 221 | # 4. Non-maximum suppression to remove overlapping detections: 222 | return self.nms(detections) if apply_nms else detections 223 | 224 | def nms(self, detections): 225 | """Filters out overlapping detections.""" 226 | filtered_detections = [] 227 | for i in range(len(detections)): 228 | faces = self._weighted_non_max_suppression(detections[i]) 229 | faces = torch.stack(faces) if len(faces) > 0 else torch.zeros((0, 17), device=self._device()) 230 | filtered_detections.append(faces) 231 | 232 | return filtered_detections 233 | 234 | def _tensors_to_detections(self, raw_box_tensor, raw_score_tensor, anchors): 235 | """The output of the neural network is a tensor of shape (b, 896, 16) 236 | containing the bounding box regressor predictions, as well as a tensor 237 | of shape (b, 896, 1) with the classification confidences. 238 | 239 | This function converts these two "raw" tensors into proper detections. 240 | Returns a list of (num_detections, 17) tensors, one for each image in 241 | the batch. 242 | 243 | This is based on the source code from: 244 | mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc 245 | mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.proto 246 | """ 247 | assert raw_box_tensor.ndimension() == 3 248 | assert raw_box_tensor.shape[1] == self.num_anchors 249 | assert raw_box_tensor.shape[2] == self.num_coords 250 | 251 | assert raw_score_tensor.ndimension() == 3 252 | assert raw_score_tensor.shape[1] == self.num_anchors 253 | assert raw_score_tensor.shape[2] == self.num_classes 254 | 255 | assert raw_box_tensor.shape[0] == raw_score_tensor.shape[0] 256 | 257 | detection_boxes = self._decode_boxes(raw_box_tensor, anchors) 258 | 259 | thresh = self.score_clipping_thresh 260 | raw_score_tensor = raw_score_tensor.clamp(-thresh, thresh) 261 | detection_scores = raw_score_tensor.sigmoid().squeeze(dim=-1) 262 | 263 | # Note: we stripped off the last dimension from the scores tensor 264 | # because there is only has one class. Now we can simply use a mask 265 | # to filter out the boxes with too low confidence. 266 | mask = detection_scores >= self.min_score_thresh 267 | 268 | # Because each image from the batch can have a different number of 269 | # detections, process them one at a time using a loop. 270 | output_detections = [] 271 | for i in range(raw_box_tensor.shape[0]): 272 | boxes = detection_boxes[i, mask[i]] 273 | scores = detection_scores[i, mask[i]].unsqueeze(dim=-1) 274 | output_detections.append(torch.cat((boxes, scores), dim=-1)) 275 | 276 | return output_detections 277 | 278 | def _decode_boxes(self, raw_boxes, anchors): 279 | """Converts the predictions into actual coordinates using 280 | the anchor boxes. Processes the entire batch at once. 281 | """ 282 | boxes = torch.zeros_like(raw_boxes) 283 | 284 | x_center = raw_boxes[..., 0] / self.x_scale * anchors[:, 2] + anchors[:, 0] 285 | y_center = raw_boxes[..., 1] / self.y_scale * anchors[:, 3] + anchors[:, 1] 286 | 287 | w = raw_boxes[..., 2] / self.w_scale * anchors[:, 2] 288 | h = raw_boxes[..., 3] / self.h_scale * anchors[:, 3] 289 | 290 | boxes[..., 0] = y_center - h / 2. # ymin 291 | boxes[..., 1] = x_center - w / 2. # xmin 292 | boxes[..., 2] = y_center + h / 2. # ymax 293 | boxes[..., 3] = x_center + w / 2. # xmax 294 | 295 | for k in range(6): 296 | offset = 4 + k*2 297 | keypoint_x = raw_boxes[..., offset ] / self.x_scale * anchors[:, 2] + anchors[:, 0] 298 | keypoint_y = raw_boxes[..., offset + 1] / self.y_scale * anchors[:, 3] + anchors[:, 1] 299 | boxes[..., offset ] = keypoint_x 300 | boxes[..., offset + 1] = keypoint_y 301 | 302 | return boxes 303 | 304 | def _weighted_non_max_suppression(self, detections): 305 | """The alternative NMS method as mentioned in the BlazeFace paper: 306 | 307 | "We replace the suppression algorithm with a blending strategy that 308 | estimates the regression parameters of a bounding box as a weighted 309 | mean between the overlapping predictions." 310 | 311 | The original MediaPipe code assigns the score of the most confident 312 | detection to the weighted detection, but we take the average score 313 | of the overlapping detections. 314 | 315 | The input detections should be a Tensor of shape (count, 17). 316 | 317 | Returns a list of PyTorch tensors, one for each detected face. 318 | 319 | This is based on the source code from: 320 | mediapipe/calculators/util/non_max_suppression_calculator.cc 321 | mediapipe/calculators/util/non_max_suppression_calculator.proto 322 | """ 323 | if len(detections) == 0: return [] 324 | 325 | output_detections = [] 326 | 327 | # Sort the detections from highest to lowest score. 328 | remaining = torch.argsort(detections[:, 16], descending=True) 329 | 330 | while len(remaining) > 0: 331 | detection = detections[remaining[0]] 332 | 333 | # Compute the overlap between the first box and the other 334 | # remaining boxes. (Note that the other_boxes also include 335 | # the first_box.) 336 | first_box = detection[:4] 337 | other_boxes = detections[remaining, :4] 338 | ious = overlap_similarity(first_box, other_boxes) 339 | 340 | # If two detections don't overlap enough, they are considered 341 | # to be from different faces. 342 | mask = ious > self.min_suppression_threshold 343 | overlapping = remaining[mask] 344 | remaining = remaining[~mask] 345 | 346 | # Take an average of the coordinates from the overlapping 347 | # detections, weighted by their confidence scores. 348 | weighted_detection = detection.clone() 349 | if len(overlapping) > 1: 350 | coordinates = detections[overlapping, :16] 351 | scores = detections[overlapping, 16:17] 352 | total_score = scores.sum() 353 | weighted = (coordinates * scores).sum(dim=0) / total_score 354 | weighted_detection[:16] = weighted 355 | weighted_detection[16] = total_score / len(overlapping) 356 | 357 | output_detections.append(weighted_detection) 358 | 359 | return output_detections 360 | 361 | 362 | # IOU code from https://github.com/amdegroot/ssd.pytorch/blob/master/layers/box_utils.py 363 | 364 | def intersect(box_a, box_b): 365 | """ We resize both tensors to [A,B,2] without new malloc: 366 | [A,2] -> [A,1,2] -> [A,B,2] 367 | [B,2] -> [1,B,2] -> [A,B,2] 368 | Then we compute the area of intersect between box_a and box_b. 369 | Args: 370 | box_a: (tensor) bounding boxes, Shape: [A,4]. 371 | box_b: (tensor) bounding boxes, Shape: [B,4]. 372 | Return: 373 | (tensor) intersection area, Shape: [A,B]. 374 | """ 375 | A = box_a.size(0) 376 | B = box_b.size(0) 377 | max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), 378 | box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) 379 | min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), 380 | box_b[:, :2].unsqueeze(0).expand(A, B, 2)) 381 | inter = torch.clamp((max_xy - min_xy), min=0) 382 | return inter[:, :, 0] * inter[:, :, 1] 383 | 384 | 385 | def jaccard(box_a, box_b): 386 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap 387 | is simply the intersection over union of two boxes. Here we operate on 388 | ground truth boxes and default boxes. 389 | E.g.: 390 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) 391 | Args: 392 | box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] 393 | box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] 394 | Return: 395 | jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] 396 | """ 397 | inter = intersect(box_a, box_b) 398 | area_a = ((box_a[:, 2]-box_a[:, 0]) * 399 | (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] 400 | area_b = ((box_b[:, 2]-box_b[:, 0]) * 401 | (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] 402 | union = area_a + area_b - inter 403 | return inter / union # [A,B] 404 | 405 | 406 | def overlap_similarity(box, other_boxes): 407 | """Computes the IOU between a bounding box and set of other boxes.""" 408 | return jaccard(box.unsqueeze(0), other_boxes).squeeze(0) 409 | -------------------------------------------------------------------------------- /attendance-code.txt: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | LiquidCrystal lcd(13,12,11,10,9,8); 4 | #include 5 | SoftwareSerial fingerPrint(2, 3); 6 | 7 | #include 8 | #include "RTClib.h" 9 | RTC_DS1307 rtc; 10 | 11 | #include "Adafruit_Fingerprint.h" 12 | uint8_t id; 13 | Adafruit_Fingerprint finger = Adafruit_Fingerprint(&fingerPrint); 14 | 15 | #define enroll 14 16 | #define del 15 17 | #define up 16 18 | #define down 17 19 | #define match 5 20 | #define indFinger 7 21 | #define buzzer 5 22 | 23 | #define records 4 // 5 for 5 user 24 | 25 | int user1,user2,user3,user4,user5; 26 | 27 | DateTime now; 28 | 29 | void setup() 30 | { 31 | delay(1000); 32 | lcd.begin(16,2); 33 | Serial.begin(9600); 34 | pinMode(enroll, INPUT_PULLUP); 35 | pinMode(up, INPUT_PULLUP); 36 | pinMode(down, INPUT_PULLUP); 37 | pinMode(del, INPUT_PULLUP); 38 | pinMode(match, INPUT_PULLUP); 39 | pinMode(buzzer, OUTPUT); 40 | pinMode(indFinger, OUTPUT); 41 | digitalWrite(buzzer, LOW); 42 | if(digitalRead(enroll) == 0) 43 | { 44 | digitalWrite(buzzer, HIGH); 45 | delay(500); 46 | digitalWrite(buzzer, LOW); 47 | lcd.clear(); 48 | lcd.print("Please wait"); 49 | lcd.setCursor(0,1); 50 | lcd.print("Downloding Data"); 51 | 52 | Serial.println("Please wait"); 53 | Serial.println("Downloding Data.."); 54 | Serial.println(); 55 | 56 | Serial.print("S.No. "); 57 | for(int i=0;i"); 180 | lcd.print(now.hour(), DEC); 181 | lcd.print(':'); 182 | lcd.print(now.minute(), DEC); 183 | lcd.print(':'); 184 | lcd.print(now.second(), DEC); 185 | lcd.print(" "); 186 | lcd.setCursor(0,1); 187 | lcd.print("Date->"); 188 | lcd.print(now.day(), DEC); 189 | lcd.print('/'); 190 | lcd.print(now.month(), DEC); 191 | lcd.print('/'); 192 | lcd.print(now.year(), DEC); 193 | lcd.print(" "); 194 | delay(500); 195 | int result=getFingerprintIDez(); 196 | if(result>0) 197 | { 198 | digitalWrite(indFinger, LOW); 199 | digitalWrite(buzzer, HIGH); 200 | delay(100); 201 | digitalWrite(buzzer, LOW); 202 | lcd.clear(); 203 | lcd.print("ID:"); 204 | lcd.print(result); 205 | lcd.setCursor(0,1); 206 | lcd.print("Please Wait...."); 207 | delay(1000); 208 | attendance(result); 209 | lcd.clear(); 210 | lcd.print("Attendance "); 211 | lcd.setCursor(0,1); 212 | lcd.print("Registed"); 213 | delay(1000); 214 | digitalWrite(indFinger, HIGH); 215 | return; 216 | } 217 | checkKeys(); 218 | delay(300); 219 | } 220 | 221 | // dmyyhms - 7 bytes 222 | void attendance(int id) 223 | { 224 | int user=0,eepLoc=0; 225 | if(id == 1) 226 | { 227 | eepLoc=0; 228 | user=user1++; 229 | } 230 | else if(id == 2) 231 | { 232 | eepLoc=210; 233 | user=user2++; 234 | } 235 | else if(id == 3) 236 | { 237 | eepLoc=420; 238 | user=user3++; 239 | } 240 | else if(id == 4) 241 | { 242 | eepLoc=630; 243 | user=user4++; 244 | } 245 | /*else if(id == 5) // fifth user 246 | { 247 | eepLoc=840; 248 | user=user5++; 249 | }*/ 250 | else 251 | return; 252 | 253 | int eepIndex=(user*7)+eepLoc; 254 | EEPROM.write(eepIndex++, now.hour()); 255 | EEPROM.write(eepIndex++, now.minute()); 256 | EEPROM.write(eepIndex++, now.second()); 257 | EEPROM.write(eepIndex++, now.day()); 258 | EEPROM.write(eepIndex++, now.month()); 259 | EEPROM.write(eepIndex++, now.year()>>8 ); 260 | EEPROM.write(eepIndex++, now.year()); 261 | 262 | EEPROM.write(1000,user1); 263 | EEPROM.write(1001,user2); 264 | EEPROM.write(1002,user3); 265 | EEPROM.write(1003,user4); 266 | // EEPROM.write(4,user5); // figth user 267 | } 268 | 269 | void checkKeys() 270 | { 271 | if(digitalRead(enroll) == 0) 272 | { 273 | lcd.clear(); 274 | lcd.print("Please Wait"); 275 | delay(1000); 276 | while(digitalRead(enroll) == 0); 277 | Enroll(); 278 | } 279 | 280 | else if(digitalRead(del) == 0) 281 | { 282 | lcd.clear(); 283 | lcd.print("Please Wait"); 284 | delay(1000); 285 | delet(); 286 | } 287 | } 288 | 289 | void Enroll() 290 | { 291 | int count=1; 292 | lcd.clear(); 293 | lcd.print("Enter Finger ID:"); 294 | 295 | while(1) 296 | { 297 | lcd.setCursor(0,1); 298 | lcd.print(count); 299 | if(digitalRead(up) == 0) 300 | { 301 | count++; 302 | if(count>records) 303 | count=1; 304 | delay(500); 305 | } 306 | 307 | else if(digitalRead(down) == 0) 308 | { 309 | count--; 310 | if(count<1) 311 | count=records; 312 | delay(500); 313 | } 314 | else if(digitalRead(del) == 0) 315 | { 316 | id=count; 317 | getFingerprintEnroll(); 318 | for(int i=0;irecords) 350 | count=1; 351 | delay(500); 352 | } 353 | 354 | else if(digitalRead(down) == 0) 355 | { 356 | count--; 357 | if(count<1) 358 | count=records; 359 | delay(500); 360 | } 361 | else if(digitalRead(del) == 0) 362 | { 363 | id=count; 364 | deleteFingerprint(id); 365 | for(int i=0;i"); 622 | if(EEPROM.read(eepIndex)<10) 623 | Serial.print('0'); 624 | Serial.print(EEPROM.read(eepIndex++)); 625 | Serial.print(':'); 626 | if(EEPROM.read(eepIndex)<10) 627 | Serial.print('0'); 628 | Serial.print(EEPROM.read(eepIndex++)); 629 | Serial.print(':'); 630 | if(EEPROM.read(eepIndex)<10) 631 | Serial.print('0'); 632 | Serial.print(EEPROM.read(eepIndex++)); 633 | Serial.print(" D->"); 634 | if(EEPROM.read(eepIndex)<10) 635 | Serial.print('0'); 636 | Serial.print(EEPROM.read(eepIndex++)); 637 | Serial.print('/'); 638 | if(EEPROM.read(eepIndex)<10) 639 | Serial.print('0'); 640 | Serial.print(EEPROM.read(eepIndex++)); 641 | Serial.print('/'); 642 | Serial.print(EEPROM.read(eepIndex++)<<8 | EEPROM.read(eepIndex++)); 643 | } 644 | else 645 | { 646 | Serial.print("---------------------------"); 647 | } 648 | 649 | Serial.print(" "); 650 | } -------------------------------------------------------------------------------- /emotion-detection/functional.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import torch 3 | import math 4 | import random 5 | from PIL import Image, ImageOps, ImageEnhance 6 | try: 7 | import accimage 8 | except ImportError: 9 | accimage = None 10 | import numpy as np 11 | import numbers 12 | import types 13 | import collections 14 | import warnings 15 | 16 | 17 | def _is_pil_image(img): 18 | if accimage is not None: 19 | return isinstance(img, (Image.Image, accimage.Image)) 20 | else: 21 | return isinstance(img, Image.Image) 22 | 23 | 24 | def _is_tensor_image(img): 25 | return torch.is_tensor(img) and img.ndimension() == 3 26 | 27 | 28 | def _is_numpy_image(img): 29 | return isinstance(img, np.ndarray) and (img.ndim in {2, 3}) 30 | 31 | 32 | def to_tensor(pic): 33 | """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. 34 | See ``ToTensor`` for more details. 35 | Args: 36 | pic (PIL Image or numpy.ndarray): Image to be converted to tensor. 37 | Returns: 38 | Tensor: Converted image. 39 | """ 40 | if not(_is_pil_image(pic) or _is_numpy_image(pic)): 41 | raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic))) 42 | 43 | if isinstance(pic, np.ndarray): 44 | # handle numpy array 45 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 46 | # backward compatibility 47 | return img.float().div(255) 48 | 49 | if accimage is not None and isinstance(pic, accimage.Image): 50 | nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32) 51 | pic.copyto(nppic) 52 | return torch.from_numpy(nppic) 53 | 54 | # handle PIL Image 55 | if pic.mode == 'I': 56 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 57 | elif pic.mode == 'I;16': 58 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 59 | else: 60 | img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) 61 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 62 | if pic.mode == 'YCbCr': 63 | nchannel = 3 64 | elif pic.mode == 'I;16': 65 | nchannel = 1 66 | else: 67 | nchannel = len(pic.mode) 68 | img = img.view(pic.size[1], pic.size[0], nchannel) 69 | # put it from HWC to CHW format 70 | # yikes, this transpose takes 80% of the loading time/CPU 71 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 72 | if isinstance(img, torch.ByteTensor): 73 | return img.float().div(255) 74 | else: 75 | return img 76 | 77 | 78 | def to_pil_image(pic, mode=None): 79 | """Convert a tensor or an ndarray to PIL Image. 80 | See :class:`~torchvision.transforms.ToPIlImage` for more details. 81 | Args: 82 | pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. 83 | mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). 84 | .. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes 85 | Returns: 86 | PIL Image: Image converted to PIL Image. 87 | """ 88 | if not(_is_numpy_image(pic) or _is_tensor_image(pic)): 89 | raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic))) 90 | 91 | npimg = pic 92 | if isinstance(pic, torch.FloatTensor): 93 | pic = pic.mul(255).byte() 94 | if torch.is_tensor(pic): 95 | npimg = np.transpose(pic.numpy(), (1, 2, 0)) 96 | 97 | if not isinstance(npimg, np.ndarray): 98 | raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' + 99 | 'not {}'.format(type(npimg))) 100 | 101 | if npimg.shape[2] == 1: 102 | expected_mode = None 103 | npimg = npimg[:, :, 0] 104 | if npimg.dtype == np.uint8: 105 | expected_mode = 'L' 106 | if npimg.dtype == np.int16: 107 | expected_mode = 'I;16' 108 | if npimg.dtype == np.int32: 109 | expected_mode = 'I' 110 | elif npimg.dtype == np.float32: 111 | expected_mode = 'F' 112 | if mode is not None and mode != expected_mode: 113 | raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}" 114 | .format(mode, np.dtype, expected_mode)) 115 | mode = expected_mode 116 | 117 | elif npimg.shape[2] == 4: 118 | permitted_4_channel_modes = ['RGBA', 'CMYK'] 119 | if mode is not None and mode not in permitted_4_channel_modes: 120 | raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes)) 121 | 122 | if mode is None and npimg.dtype == np.uint8: 123 | mode = 'RGBA' 124 | else: 125 | permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV'] 126 | if mode is not None and mode not in permitted_3_channel_modes: 127 | raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes)) 128 | if mode is None and npimg.dtype == np.uint8: 129 | mode = 'RGB' 130 | 131 | if mode is None: 132 | raise TypeError('Input type {} is not supported'.format(npimg.dtype)) 133 | 134 | return Image.fromarray(npimg, mode=mode) 135 | 136 | 137 | def normalize(tensor, mean, std): 138 | """Normalize a tensor image with mean and standard deviation. 139 | See ``Normalize`` for more details. 140 | Args: 141 | tensor (Tensor): Tensor image of size (C, H, W) to be normalized. 142 | mean (sequence): Sequence of means for each channel. 143 | std (sequence): Sequence of standard deviations for each channely. 144 | Returns: 145 | Tensor: Normalized Tensor image. 146 | """ 147 | if not _is_tensor_image(tensor): 148 | raise TypeError('tensor is not a torch image.') 149 | # TODO: make efficient 150 | for t, m, s in zip(tensor, mean, std): 151 | t.sub_(m).div_(s) 152 | return tensor 153 | 154 | 155 | def resize(img, size, interpolation=Image.BILINEAR): 156 | """Resize the input PIL Image to the given size. 157 | Args: 158 | img (PIL Image): Image to be resized. 159 | size (sequence or int): Desired output size. If size is a sequence like 160 | (h, w), the output size will be matched to this. If size is an int, 161 | the smaller edge of the image will be matched to this number maintaing 162 | the aspect ratio. i.e, if height > width, then image will be rescaled to 163 | (size * height / width, size) 164 | interpolation (int, optional): Desired interpolation. Default is 165 | ``PIL.Image.BILINEAR`` 166 | Returns: 167 | PIL Image: Resized image. 168 | """ 169 | if not _is_pil_image(img): 170 | raise TypeError('img should be PIL Image. Got {}'.format(type(img))) 171 | if not (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)): 172 | raise TypeError('Got inappropriate size arg: {}'.format(size)) 173 | 174 | if isinstance(size, int): 175 | w, h = img.size 176 | if (w <= h and w == size) or (h <= w and h == size): 177 | return img 178 | if w < h: 179 | ow = size 180 | oh = int(size * h / w) 181 | return img.resize((ow, oh), interpolation) 182 | else: 183 | oh = size 184 | ow = int(size * w / h) 185 | return img.resize((ow, oh), interpolation) 186 | else: 187 | return img.resize(size[::-1], interpolation) 188 | 189 | 190 | def scale(*args, **kwargs): 191 | warnings.warn("The use of the transforms.Scale transform is deprecated, " + 192 | "please use transforms.Resize instead.") 193 | return resize(*args, **kwargs) 194 | 195 | 196 | def pad(img, padding, fill=0): 197 | """Pad the given PIL Image on all sides with the given "pad" value. 198 | Args: 199 | img (PIL Image): Image to be padded. 200 | padding (int or tuple): Padding on each border. If a single int is provided this 201 | is used to pad all borders. If tuple of length 2 is provided this is the padding 202 | on left/right and top/bottom respectively. If a tuple of length 4 is provided 203 | this is the padding for the left, top, right and bottom borders 204 | respectively. 205 | fill: Pixel fill value. Default is 0. If a tuple of 206 | length 3, it is used to fill R, G, B channels respectively. 207 | Returns: 208 | PIL Image: Padded image. 209 | """ 210 | if not _is_pil_image(img): 211 | raise TypeError('img should be PIL Image. Got {}'.format(type(img))) 212 | 213 | if not isinstance(padding, (numbers.Number, tuple)): 214 | raise TypeError('Got inappropriate padding arg') 215 | if not isinstance(fill, (numbers.Number, str, tuple)): 216 | raise TypeError('Got inappropriate fill arg') 217 | 218 | if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]: 219 | raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " + 220 | "{} element tuple".format(len(padding))) 221 | 222 | return ImageOps.expand(img, border=padding, fill=fill) 223 | 224 | 225 | def crop(img, i, j, h, w): 226 | """Crop the given PIL Image. 227 | Args: 228 | img (PIL Image): Image to be cropped. 229 | i: Upper pixel coordinate. 230 | j: Left pixel coordinate. 231 | h: Height of the cropped image. 232 | w: Width of the cropped image. 233 | Returns: 234 | PIL Image: Cropped image. 235 | """ 236 | if not _is_pil_image(img): 237 | raise TypeError('img should be PIL Image. Got {}'.format(type(img))) 238 | 239 | return img.crop((j, i, j + w, i + h)) 240 | 241 | 242 | def center_crop(img, output_size): 243 | if isinstance(output_size, numbers.Number): 244 | output_size = (int(output_size), int(output_size)) 245 | w, h = img.size 246 | th, tw = output_size 247 | i = int(round((h - th) / 2.)) 248 | j = int(round((w - tw) / 2.)) 249 | return crop(img, i, j, th, tw) 250 | 251 | 252 | def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR): 253 | """Crop the given PIL Image and resize it to desired size. 254 | Notably used in RandomResizedCrop. 255 | Args: 256 | img (PIL Image): Image to be cropped. 257 | i: Upper pixel coordinate. 258 | j: Left pixel coordinate. 259 | h: Height of the cropped image. 260 | w: Width of the cropped image. 261 | size (sequence or int): Desired output size. Same semantics as ``scale``. 262 | interpolation (int, optional): Desired interpolation. Default is 263 | ``PIL.Image.BILINEAR``. 264 | Returns: 265 | PIL Image: Cropped image. 266 | """ 267 | assert _is_pil_image(img), 'img should be PIL Image' 268 | img = crop(img, i, j, h, w) 269 | img = resize(img, size, interpolation) 270 | return img 271 | 272 | 273 | def hflip(img): 274 | """Horizontally flip the given PIL Image. 275 | Args: 276 | img (PIL Image): Image to be flipped. 277 | Returns: 278 | PIL Image: Horizontall flipped image. 279 | """ 280 | if not _is_pil_image(img): 281 | raise TypeError('img should be PIL Image. Got {}'.format(type(img))) 282 | 283 | return img.transpose(Image.FLIP_LEFT_RIGHT) 284 | 285 | 286 | def vflip(img): 287 | """Vertically flip the given PIL Image. 288 | Args: 289 | img (PIL Image): Image to be flipped. 290 | Returns: 291 | PIL Image: Vertically flipped image. 292 | """ 293 | if not _is_pil_image(img): 294 | raise TypeError('img should be PIL Image. Got {}'.format(type(img))) 295 | 296 | return img.transpose(Image.FLIP_TOP_BOTTOM) 297 | 298 | 299 | def five_crop(img, size): 300 | """Crop the given PIL Image into four corners and the central crop. 301 | .. Note:: 302 | This transform returns a tuple of images and there may be a 303 | mismatch in the number of inputs and targets your ``Dataset`` returns. 304 | Args: 305 | size (sequence or int): Desired output size of the crop. If size is an 306 | int instead of sequence like (h, w), a square crop (size, size) is 307 | made. 308 | Returns: 309 | tuple: tuple (tl, tr, bl, br, center) corresponding top left, 310 | top right, bottom left, bottom right and center crop. 311 | """ 312 | if isinstance(size, numbers.Number): 313 | size = (int(size), int(size)) 314 | else: 315 | assert len(size) == 2, "Please provide only two dimensions (h, w) for size." 316 | 317 | w, h = img.size 318 | crop_h, crop_w = size 319 | if crop_w > w or crop_h > h: 320 | raise ValueError("Requested crop size {} is bigger than input size {}".format(size, 321 | (h, w))) 322 | tl = img.crop((0, 0, crop_w, crop_h)) 323 | tr = img.crop((w - crop_w, 0, w, crop_h)) 324 | bl = img.crop((0, h - crop_h, crop_w, h)) 325 | br = img.crop((w - crop_w, h - crop_h, w, h)) 326 | center = center_crop(img, (crop_h, crop_w)) 327 | return (tl, tr, bl, br, center) 328 | 329 | 330 | def ten_crop(img, size, vertical_flip=False): 331 | """Crop the given PIL Image into four corners and the central crop plus the 332 | flipped version of these (horizontal flipping is used by default). 333 | .. Note:: 334 | This transform returns a tuple of images and there may be a 335 | mismatch in the number of inputs and targets your ``Dataset`` returns. 336 | Args: 337 | size (sequence or int): Desired output size of the crop. If size is an 338 | int instead of sequence like (h, w), a square crop (size, size) is 339 | made. 340 | vertical_flip (bool): Use vertical flipping instead of horizontal 341 | Returns: 342 | tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, 343 | br_flip, center_flip) corresponding top left, top right, 344 | bottom left, bottom right and center crop and same for the 345 | flipped image. 346 | """ 347 | if isinstance(size, numbers.Number): 348 | size = (int(size), int(size)) 349 | else: 350 | assert len(size) == 2, "Please provide only two dimensions (h, w) for size." 351 | 352 | first_five = five_crop(img, size) 353 | 354 | if vertical_flip: 355 | img = vflip(img) 356 | else: 357 | img = hflip(img) 358 | 359 | second_five = five_crop(img, size) 360 | return first_five + second_five 361 | 362 | 363 | def adjust_brightness(img, brightness_factor): 364 | """Adjust brightness of an Image. 365 | Args: 366 | img (PIL Image): PIL Image to be adjusted. 367 | brightness_factor (float): How much to adjust the brightness. Can be 368 | any non negative number. 0 gives a black image, 1 gives the 369 | original image while 2 increases the brightness by a factor of 2. 370 | Returns: 371 | PIL Image: Brightness adjusted image. 372 | """ 373 | if not _is_pil_image(img): 374 | raise TypeError('img should be PIL Image. Got {}'.format(type(img))) 375 | 376 | enhancer = ImageEnhance.Brightness(img) 377 | img = enhancer.enhance(brightness_factor) 378 | return img 379 | 380 | 381 | def adjust_contrast(img, contrast_factor): 382 | """Adjust contrast of an Image. 383 | Args: 384 | img (PIL Image): PIL Image to be adjusted. 385 | contrast_factor (float): How much to adjust the contrast. Can be any 386 | non negative number. 0 gives a solid gray image, 1 gives the 387 | original image while 2 increases the contrast by a factor of 2. 388 | Returns: 389 | PIL Image: Contrast adjusted image. 390 | """ 391 | if not _is_pil_image(img): 392 | raise TypeError('img should be PIL Image. Got {}'.format(type(img))) 393 | 394 | enhancer = ImageEnhance.Contrast(img) 395 | img = enhancer.enhance(contrast_factor) 396 | return img 397 | 398 | 399 | def adjust_saturation(img, saturation_factor): 400 | """Adjust color saturation of an image. 401 | Args: 402 | img (PIL Image): PIL Image to be adjusted. 403 | saturation_factor (float): How much to adjust the saturation. 0 will 404 | give a black and white image, 1 will give the original image while 405 | 2 will enhance the saturation by a factor of 2. 406 | Returns: 407 | PIL Image: Saturation adjusted image. 408 | """ 409 | if not _is_pil_image(img): 410 | raise TypeError('img should be PIL Image. Got {}'.format(type(img))) 411 | 412 | enhancer = ImageEnhance.Color(img) 413 | img = enhancer.enhance(saturation_factor) 414 | return img 415 | 416 | 417 | def adjust_hue(img, hue_factor): 418 | """Adjust hue of an image. 419 | The image hue is adjusted by converting the image to HSV and 420 | cyclically shifting the intensities in the hue channel (H). 421 | The image is then converted back to original image mode. 422 | `hue_factor` is the amount of shift in H channel and must be in the 423 | interval `[-0.5, 0.5]`. 424 | See https://en.wikipedia.org/wiki/Hue for more details on Hue. 425 | Args: 426 | img (PIL Image): PIL Image to be adjusted. 427 | hue_factor (float): How much to shift the hue channel. Should be in 428 | [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in 429 | HSV space in positive and negative direction respectively. 430 | 0 means no shift. Therefore, both -0.5 and 0.5 will give an image 431 | with complementary colors while 0 gives the original image. 432 | Returns: 433 | PIL Image: Hue adjusted image. 434 | """ 435 | if not(-0.5 <= hue_factor <= 0.5): 436 | raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor)) 437 | 438 | if not _is_pil_image(img): 439 | raise TypeError('img should be PIL Image. Got {}'.format(type(img))) 440 | 441 | input_mode = img.mode 442 | if input_mode in {'L', '1', 'I', 'F'}: 443 | return img 444 | 445 | h, s, v = img.convert('HSV').split() 446 | 447 | np_h = np.array(h, dtype=np.uint8) 448 | # uint8 addition take cares of rotation across boundaries 449 | with np.errstate(over='ignore'): 450 | np_h += np.uint8(hue_factor * 255) 451 | h = Image.fromarray(np_h, 'L') 452 | 453 | img = Image.merge('HSV', (h, s, v)).convert(input_mode) 454 | return img 455 | 456 | 457 | def adjust_gamma(img, gamma, gain=1): 458 | """Perform gamma correction on an image. 459 | Also known as Power Law Transform. Intensities in RGB mode are adjusted 460 | based on the following equation: 461 | I_out = 255 * gain * ((I_in / 255) ** gamma) 462 | See https://en.wikipedia.org/wiki/Gamma_correction for more details. 463 | Args: 464 | img (PIL Image): PIL Image to be adjusted. 465 | gamma (float): Non negative real number. gamma larger than 1 make the 466 | shadows darker, while gamma smaller than 1 make dark regions 467 | lighter. 468 | gain (float): The constant multiplier. 469 | """ 470 | if not _is_pil_image(img): 471 | raise TypeError('img should be PIL Image. Got {}'.format(type(img))) 472 | 473 | if gamma < 0: 474 | raise ValueError('Gamma should be a non-negative real number') 475 | 476 | input_mode = img.mode 477 | img = img.convert('RGB') 478 | 479 | np_img = np.array(img, dtype=np.float32) 480 | np_img = 255 * gain * ((np_img / 255) ** gamma) 481 | np_img = np.uint8(np.clip(np_img, 0, 255)) 482 | 483 | img = Image.fromarray(np_img, 'RGB').convert(input_mode) 484 | return img 485 | 486 | 487 | def rotate(img, angle, resample=False, expand=False, center=None): 488 | """Rotate the image by angle and then (optionally) translate it by (n_columns, n_rows) 489 | Args: 490 | img (PIL Image): PIL Image to be rotated. 491 | angle ({float, int}): In degrees degrees counter clockwise order. 492 | resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional): 493 | An optional resampling filter. 494 | See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters 495 | If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST. 496 | expand (bool, optional): Optional expansion flag. 497 | If true, expands the output image to make it large enough to hold the entire rotated image. 498 | If false or omitted, make the output image the same size as the input image. 499 | Note that the expand flag assumes rotation around the center and no translation. 500 | center (2-tuple, optional): Optional center of rotation. 501 | Origin is the upper left corner. 502 | Default is the center of the image. 503 | """ 504 | 505 | if not _is_pil_image(img): 506 | raise TypeError('img should be PIL Image. Got {}'.format(type(img))) 507 | 508 | return img.rotate(angle, resample, expand, center) 509 | 510 | 511 | def to_grayscale(img, num_output_channels=1): 512 | """Convert image to grayscale version of image. 513 | Args: 514 | img (PIL Image): Image to be converted to grayscale. 515 | Returns: 516 | PIL Image: Grayscale version of the image. 517 | if num_output_channels == 1 : returned image is single channel 518 | if num_output_channels == 3 : returned image is 3 channel with r == g == b 519 | """ 520 | if not _is_pil_image(img): 521 | raise TypeError('img should be PIL Image. Got {}'.format(type(img))) 522 | 523 | if num_output_channels == 1: 524 | img = img.convert('L') 525 | elif num_output_channels == 3: 526 | img = img.convert('L') 527 | np_img = np.array(img, dtype=np.uint8) 528 | np_img = np.dstack([np_img, np_img, np_img]) 529 | img = Image.fromarray(np_img, 'RGB') 530 | else: 531 | raise ValueError('num_output_channels should be either 1 or 3') 532 | 533 | return img 534 | -------------------------------------------------------------------------------- /emotion-detection/transforms.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import torch 3 | import math 4 | import random 5 | from PIL import Image, ImageOps, ImageEnhance 6 | try: 7 | import accimage 8 | except ImportError: 9 | accimage = None 10 | import numpy as np 11 | import numbers 12 | import types 13 | import collections 14 | import warnings 15 | 16 | import functional as F 17 | 18 | __all__ = ["Compose", "ToTensor", "ToPILImage", "Normalize", "Resize", "Scale", "CenterCrop", "Pad", 19 | "Lambda", "RandomCrop", "RandomHorizontalFlip", "RandomVerticalFlip", "RandomResizedCrop", 20 | "RandomSizedCrop", "FiveCrop", "TenCrop", "LinearTransformation", "ColorJitter", "RandomRotation", 21 | "Grayscale", "RandomGrayscale"] 22 | 23 | 24 | class Compose(object): 25 | """Composes several transforms together. 26 | Args: 27 | transforms (list of ``Transform`` objects): list of transforms to compose. 28 | Example: 29 | >>> transforms.Compose([ 30 | >>> transforms.CenterCrop(10), 31 | >>> transforms.ToTensor(), 32 | >>> ]) 33 | """ 34 | 35 | def __init__(self, transforms): 36 | self.transforms = transforms 37 | 38 | def __call__(self, img): 39 | for t in self.transforms: 40 | img = t(img) 41 | return img 42 | 43 | 44 | class ToTensor(object): 45 | """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. 46 | Converts a PIL Image or numpy.ndarray (H x W x C) in the range 47 | [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]. 48 | """ 49 | 50 | def __call__(self, pic): 51 | """ 52 | Args: 53 | pic (PIL Image or numpy.ndarray): Image to be converted to tensor. 54 | Returns: 55 | Tensor: Converted image. 56 | """ 57 | return F.to_tensor(pic) 58 | 59 | 60 | class ToPILImage(object): 61 | """Convert a tensor or an ndarray to PIL Image. 62 | Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape 63 | H x W x C to a PIL Image while preserving the value range. 64 | Args: 65 | mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). 66 | If ``mode`` is ``None`` (default) there are some assumptions made about the input data: 67 | 1. If the input has 3 channels, the ``mode`` is assumed to be ``RGB``. 68 | 2. If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``. 69 | 3. If the input has 1 channel, the ``mode`` is determined by the data type (i,e, 70 | ``int``, ``float``, ``short``). 71 | .. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes 72 | """ 73 | def __init__(self, mode=None): 74 | self.mode = mode 75 | 76 | def __call__(self, pic): 77 | """ 78 | Args: 79 | pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. 80 | Returns: 81 | PIL Image: Image converted to PIL Image. 82 | """ 83 | return F.to_pil_image(pic, self.mode) 84 | 85 | 86 | class Normalize(object): 87 | """Normalize an tensor image with mean and standard deviation. 88 | Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform 89 | will normalize each channel of the input ``torch.*Tensor`` i.e. 90 | ``input[channel] = (input[channel] - mean[channel]) / std[channel]`` 91 | Args: 92 | mean (sequence): Sequence of means for each channel. 93 | std (sequence): Sequence of standard deviations for each channel. 94 | """ 95 | 96 | def __init__(self, mean, std): 97 | self.mean = mean 98 | self.std = std 99 | 100 | def __call__(self, tensor): 101 | """ 102 | Args: 103 | tensor (Tensor): Tensor image of size (C, H, W) to be normalized. 104 | Returns: 105 | Tensor: Normalized Tensor image. 106 | """ 107 | return F.normalize(tensor, self.mean, self.std) 108 | 109 | 110 | class Resize(object): 111 | """Resize the input PIL Image to the given size. 112 | Args: 113 | size (sequence or int): Desired output size. If size is a sequence like 114 | (h, w), output size will be matched to this. If size is an int, 115 | smaller edge of the image will be matched to this number. 116 | i.e, if height > width, then image will be rescaled to 117 | (size * height / width, size) 118 | interpolation (int, optional): Desired interpolation. Default is 119 | ``PIL.Image.BILINEAR`` 120 | """ 121 | 122 | def __init__(self, size, interpolation=Image.BILINEAR): 123 | assert isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2) 124 | self.size = size 125 | self.interpolation = interpolation 126 | 127 | def __call__(self, img): 128 | """ 129 | Args: 130 | img (PIL Image): Image to be scaled. 131 | Returns: 132 | PIL Image: Rescaled image. 133 | """ 134 | return F.resize(img, self.size, self.interpolation) 135 | 136 | 137 | class Scale(Resize): 138 | """ 139 | Note: This transform is deprecated in favor of Resize. 140 | """ 141 | def __init__(self, *args, **kwargs): 142 | warnings.warn("The use of the transforms.Scale transform is deprecated, " + 143 | "please use transforms.Resize instead.") 144 | super(Scale, self).__init__(*args, **kwargs) 145 | 146 | 147 | class CenterCrop(object): 148 | """Crops the given PIL Image at the center. 149 | Args: 150 | size (sequence or int): Desired output size of the crop. If size is an 151 | int instead of sequence like (h, w), a square crop (size, size) is 152 | made. 153 | """ 154 | 155 | def __init__(self, size): 156 | if isinstance(size, numbers.Number): 157 | self.size = (int(size), int(size)) 158 | else: 159 | self.size = size 160 | 161 | def __call__(self, img): 162 | """ 163 | Args: 164 | img (PIL Image): Image to be cropped. 165 | Returns: 166 | PIL Image: Cropped image. 167 | """ 168 | return F.center_crop(img, self.size) 169 | 170 | 171 | class Pad(object): 172 | """Pad the given PIL Image on all sides with the given "pad" value. 173 | Args: 174 | padding (int or tuple): Padding on each border. If a single int is provided this 175 | is used to pad all borders. If tuple of length 2 is provided this is the padding 176 | on left/right and top/bottom respectively. If a tuple of length 4 is provided 177 | this is the padding for the left, top, right and bottom borders 178 | respectively. 179 | fill: Pixel fill value. Default is 0. If a tuple of 180 | length 3, it is used to fill R, G, B channels respectively. 181 | """ 182 | 183 | def __init__(self, padding, fill=0): 184 | assert isinstance(padding, (numbers.Number, tuple)) 185 | assert isinstance(fill, (numbers.Number, str, tuple)) 186 | if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]: 187 | raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " + 188 | "{} element tuple".format(len(padding))) 189 | 190 | self.padding = padding 191 | self.fill = fill 192 | 193 | def __call__(self, img): 194 | """ 195 | Args: 196 | img (PIL Image): Image to be padded. 197 | Returns: 198 | PIL Image: Padded image. 199 | """ 200 | return F.pad(img, self.padding, self.fill) 201 | 202 | 203 | class Lambda(object): 204 | """Apply a user-defined lambda as a transform. 205 | Args: 206 | lambd (function): Lambda/function to be used for transform. 207 | """ 208 | 209 | def __init__(self, lambd): 210 | assert isinstance(lambd, types.LambdaType) 211 | self.lambd = lambd 212 | 213 | def __call__(self, img): 214 | return self.lambd(img) 215 | 216 | 217 | class RandomCrop(object): 218 | """Crop the given PIL Image at a random location. 219 | Args: 220 | size (sequence or int): Desired output size of the crop. If size is an 221 | int instead of sequence like (h, w), a square crop (size, size) is 222 | made. 223 | padding (int or sequence, optional): Optional padding on each border 224 | of the image. Default is 0, i.e no padding. If a sequence of length 225 | 4 is provided, it is used to pad left, top, right, bottom borders 226 | respectively. 227 | """ 228 | 229 | def __init__(self, size, padding=0): 230 | if isinstance(size, numbers.Number): 231 | self.size = (int(size), int(size)) 232 | else: 233 | self.size = size 234 | self.padding = padding 235 | 236 | @staticmethod 237 | def get_params(img, output_size): 238 | """Get parameters for ``crop`` for a random crop. 239 | Args: 240 | img (PIL Image): Image to be cropped. 241 | output_size (tuple): Expected output size of the crop. 242 | Returns: 243 | tuple: params (i, j, h, w) to be passed to ``crop`` for random crop. 244 | """ 245 | w, h = img.size 246 | th, tw = output_size 247 | if w == tw and h == th: 248 | return 0, 0, h, w 249 | 250 | i = random.randint(0, h - th) 251 | j = random.randint(0, w - tw) 252 | return i, j, th, tw 253 | 254 | def __call__(self, img): 255 | """ 256 | Args: 257 | img (PIL Image): Image to be cropped. 258 | Returns: 259 | PIL Image: Cropped image. 260 | """ 261 | if self.padding > 0: 262 | img = F.pad(img, self.padding) 263 | 264 | i, j, h, w = self.get_params(img, self.size) 265 | 266 | return F.crop(img, i, j, h, w) 267 | 268 | 269 | class RandomHorizontalFlip(object): 270 | """Horizontally flip the given PIL Image randomly with a probability of 0.5.""" 271 | 272 | def __call__(self, img): 273 | """ 274 | Args: 275 | img (PIL Image): Image to be flipped. 276 | Returns: 277 | PIL Image: Randomly flipped image. 278 | """ 279 | if random.random() < 0.5: 280 | return F.hflip(img) 281 | return img 282 | 283 | 284 | class RandomVerticalFlip(object): 285 | """Vertically flip the given PIL Image randomly with a probability of 0.5.""" 286 | 287 | def __call__(self, img): 288 | """ 289 | Args: 290 | img (PIL Image): Image to be flipped. 291 | Returns: 292 | PIL Image: Randomly flipped image. 293 | """ 294 | if random.random() < 0.5: 295 | return F.vflip(img) 296 | return img 297 | 298 | 299 | class RandomResizedCrop(object): 300 | """Crop the given PIL Image to random size and aspect ratio. 301 | A crop of random size (default: of 0.08 to 1.0) of the original size and a random 302 | aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop 303 | is finally resized to given size. 304 | This is popularly used to train the Inception networks. 305 | Args: 306 | size: expected output size of each edge 307 | scale: range of size of the origin size cropped 308 | ratio: range of aspect ratio of the origin aspect ratio cropped 309 | interpolation: Default: PIL.Image.BILINEAR 310 | """ 311 | 312 | def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR): 313 | self.size = (size, size) 314 | self.interpolation = interpolation 315 | self.scale = scale 316 | self.ratio = ratio 317 | 318 | @staticmethod 319 | def get_params(img, scale, ratio): 320 | """Get parameters for ``crop`` for a random sized crop. 321 | Args: 322 | img (PIL Image): Image to be cropped. 323 | scale (tuple): range of size of the origin size cropped 324 | ratio (tuple): range of aspect ratio of the origin aspect ratio cropped 325 | Returns: 326 | tuple: params (i, j, h, w) to be passed to ``crop`` for a random 327 | sized crop. 328 | """ 329 | for attempt in range(10): 330 | area = img.size[0] * img.size[1] 331 | target_area = random.uniform(*scale) * area 332 | aspect_ratio = random.uniform(*ratio) 333 | 334 | w = int(round(math.sqrt(target_area * aspect_ratio))) 335 | h = int(round(math.sqrt(target_area / aspect_ratio))) 336 | 337 | if random.random() < 0.5: 338 | w, h = h, w 339 | 340 | if w <= img.size[0] and h <= img.size[1]: 341 | i = random.randint(0, img.size[1] - h) 342 | j = random.randint(0, img.size[0] - w) 343 | return i, j, h, w 344 | 345 | # Fallback 346 | w = min(img.size[0], img.size[1]) 347 | i = (img.size[1] - w) // 2 348 | j = (img.size[0] - w) // 2 349 | return i, j, w, w 350 | 351 | def __call__(self, img): 352 | """ 353 | Args: 354 | img (PIL Image): Image to be flipped. 355 | Returns: 356 | PIL Image: Randomly cropped and resize image. 357 | """ 358 | i, j, h, w = self.get_params(img, self.scale, self.ratio) 359 | return F.resized_crop(img, i, j, h, w, self.size, self.interpolation) 360 | 361 | 362 | class RandomSizedCrop(RandomResizedCrop): 363 | """ 364 | Note: This transform is deprecated in favor of RandomResizedCrop. 365 | """ 366 | def __init__(self, *args, **kwargs): 367 | warnings.warn("The use of the transforms.RandomSizedCrop transform is deprecated, " + 368 | "please use transforms.RandomResizedCrop instead.") 369 | super(RandomSizedCrop, self).__init__(*args, **kwargs) 370 | 371 | 372 | class FiveCrop(object): 373 | """Crop the given PIL Image into four corners and the central crop 374 | .. Note:: 375 | This transform returns a tuple of images and there may be a mismatch in the number of 376 | inputs and targets your Dataset returns. See below for an example of how to deal with 377 | this. 378 | Args: 379 | size (sequence or int): Desired output size of the crop. If size is an ``int`` 380 | instead of sequence like (h, w), a square crop of size (size, size) is made. 381 | Example: 382 | >>> transform = Compose([ 383 | >>> FiveCrop(size), # this is a list of PIL Images 384 | >>> Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor 385 | >>> ]) 386 | >>> #In your test loop you can do the following: 387 | >>> input, target = batch # input is a 5d tensor, target is 2d 388 | >>> bs, ncrops, c, h, w = input.size() 389 | >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops 390 | >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops 391 | """ 392 | 393 | def __init__(self, size): 394 | self.size = size 395 | if isinstance(size, numbers.Number): 396 | self.size = (int(size), int(size)) 397 | else: 398 | assert len(size) == 2, "Please provide only two dimensions (h, w) for size." 399 | self.size = size 400 | 401 | def __call__(self, img): 402 | return F.five_crop(img, self.size) 403 | 404 | 405 | class TenCrop(object): 406 | """Crop the given PIL Image into four corners and the central crop plus the flipped version of 407 | these (horizontal flipping is used by default) 408 | .. Note:: 409 | This transform returns a tuple of images and there may be a mismatch in the number of 410 | inputs and targets your Dataset returns. See below for an example of how to deal with 411 | this. 412 | Args: 413 | size (sequence or int): Desired output size of the crop. If size is an 414 | int instead of sequence like (h, w), a square crop (size, size) is 415 | made. 416 | vertical_flip(bool): Use vertical flipping instead of horizontal 417 | Example: 418 | >>> transform = Compose([ 419 | >>> TenCrop(size), # this is a list of PIL Images 420 | >>> Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor 421 | >>> ]) 422 | >>> #In your test loop you can do the following: 423 | >>> input, target = batch # input is a 5d tensor, target is 2d 424 | >>> bs, ncrops, c, h, w = input.size() 425 | >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops 426 | >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops 427 | """ 428 | 429 | def __init__(self, size, vertical_flip=False): 430 | self.size = size 431 | if isinstance(size, numbers.Number): 432 | self.size = (int(size), int(size)) 433 | else: 434 | assert len(size) == 2, "Please provide only two dimensions (h, w) for size." 435 | self.size = size 436 | self.vertical_flip = vertical_flip 437 | 438 | def __call__(self, img): 439 | return F.ten_crop(img, self.size, self.vertical_flip) 440 | 441 | 442 | class LinearTransformation(object): 443 | """Transform a tensor image with a square transformation matrix computed 444 | offline. 445 | Given transformation_matrix, will flatten the torch.*Tensor, compute the dot 446 | product with the transformation matrix and reshape the tensor to its 447 | original shape. 448 | Applications: 449 | - whitening: zero-center the data, compute the data covariance matrix 450 | [D x D] with np.dot(X.T, X), perform SVD on this matrix and 451 | pass it as transformation_matrix. 452 | Args: 453 | transformation_matrix (Tensor): tensor [D x D], D = C x H x W 454 | """ 455 | 456 | def __init__(self, transformation_matrix): 457 | if transformation_matrix.size(0) != transformation_matrix.size(1): 458 | raise ValueError("transformation_matrix should be square. Got " + 459 | "[{} x {}] rectangular matrix.".format(*transformation_matrix.size())) 460 | self.transformation_matrix = transformation_matrix 461 | 462 | def __call__(self, tensor): 463 | """ 464 | Args: 465 | tensor (Tensor): Tensor image of size (C, H, W) to be whitened. 466 | Returns: 467 | Tensor: Transformed image. 468 | """ 469 | if tensor.size(0) * tensor.size(1) * tensor.size(2) != self.transformation_matrix.size(0): 470 | raise ValueError("tensor and transformation matrix have incompatible shape." + 471 | "[{} x {} x {}] != ".format(*tensor.size()) + 472 | "{}".format(self.transformation_matrix.size(0))) 473 | flat_tensor = tensor.view(1, -1) 474 | transformed_tensor = torch.mm(flat_tensor, self.transformation_matrix) 475 | tensor = transformed_tensor.view(tensor.size()) 476 | return tensor 477 | 478 | 479 | class ColorJitter(object): 480 | """Randomly change the brightness, contrast and saturation of an image. 481 | Args: 482 | brightness (float): How much to jitter brightness. brightness_factor 483 | is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]. 484 | contrast (float): How much to jitter contrast. contrast_factor 485 | is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]. 486 | saturation (float): How much to jitter saturation. saturation_factor 487 | is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]. 488 | hue(float): How much to jitter hue. hue_factor is chosen uniformly from 489 | [-hue, hue]. Should be >=0 and <= 0.5. 490 | """ 491 | def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): 492 | self.brightness = brightness 493 | self.contrast = contrast 494 | self.saturation = saturation 495 | self.hue = hue 496 | 497 | @staticmethod 498 | def get_params(brightness, contrast, saturation, hue): 499 | """Get a randomized transform to be applied on image. 500 | Arguments are same as that of __init__. 501 | Returns: 502 | Transform which randomly adjusts brightness, contrast and 503 | saturation in a random order. 504 | """ 505 | transforms = [] 506 | if brightness > 0: 507 | brightness_factor = np.random.uniform(max(0, 1 - brightness), 1 + brightness) 508 | transforms.append(Lambda(lambda img: F.adjust_brightness(img, brightness_factor))) 509 | 510 | if contrast > 0: 511 | contrast_factor = np.random.uniform(max(0, 1 - contrast), 1 + contrast) 512 | transforms.append(Lambda(lambda img: F.adjust_contrast(img, contrast_factor))) 513 | 514 | if saturation > 0: 515 | saturation_factor = np.random.uniform(max(0, 1 - saturation), 1 + saturation) 516 | transforms.append(Lambda(lambda img: F.adjust_saturation(img, saturation_factor))) 517 | 518 | if hue > 0: 519 | hue_factor = np.random.uniform(-hue, hue) 520 | transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor))) 521 | 522 | np.random.shuffle(transforms) 523 | transform = Compose(transforms) 524 | 525 | return transform 526 | 527 | def __call__(self, img): 528 | """ 529 | Args: 530 | img (PIL Image): Input image. 531 | Returns: 532 | PIL Image: Color jittered image. 533 | """ 534 | transform = self.get_params(self.brightness, self.contrast, 535 | self.saturation, self.hue) 536 | return transform(img) 537 | 538 | 539 | class RandomRotation(object): 540 | """Rotate the image by angle. 541 | Args: 542 | degrees (sequence or float or int): Range of degrees to select from. 543 | If degrees is a number instead of sequence like (min, max), the range of degrees 544 | will be (-degrees, +degrees). 545 | resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional): 546 | An optional resampling filter. 547 | See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters 548 | If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST. 549 | expand (bool, optional): Optional expansion flag. 550 | If true, expands the output to make it large enough to hold the entire rotated image. 551 | If false or omitted, make the output image the same size as the input image. 552 | Note that the expand flag assumes rotation around the center and no translation. 553 | center (2-tuple, optional): Optional center of rotation. 554 | Origin is the upper left corner. 555 | Default is the center of the image. 556 | """ 557 | 558 | def __init__(self, degrees, resample=False, expand=False, center=None): 559 | if isinstance(degrees, numbers.Number): 560 | if degrees < 0: 561 | raise ValueError("If degrees is a single number, it must be positive.") 562 | self.degrees = (-degrees, degrees) 563 | else: 564 | if len(degrees) != 2: 565 | raise ValueError("If degrees is a sequence, it must be of len 2.") 566 | self.degrees = degrees 567 | 568 | self.resample = resample 569 | self.expand = expand 570 | self.center = center 571 | 572 | @staticmethod 573 | def get_params(degrees): 574 | """Get parameters for ``rotate`` for a random rotation. 575 | Returns: 576 | sequence: params to be passed to ``rotate`` for random rotation. 577 | """ 578 | angle = np.random.uniform(degrees[0], degrees[1]) 579 | 580 | return angle 581 | 582 | def __call__(self, img): 583 | """ 584 | img (PIL Image): Image to be rotated. 585 | Returns: 586 | PIL Image: Rotated image. 587 | """ 588 | 589 | angle = self.get_params(self.degrees) 590 | 591 | return F.rotate(img, angle, self.resample, self.expand, self.center) 592 | 593 | 594 | class Grayscale(object): 595 | """Convert image to grayscale. 596 | Args: 597 | num_output_channels (int): (1 or 3) number of channels desired for output image 598 | Returns: 599 | PIL Image: Grayscale version of the input. 600 | - If num_output_channels == 1 : returned image is single channel 601 | - If num_output_channels == 3 : returned image is 3 channel with r == g == b 602 | """ 603 | 604 | def __init__(self, num_output_channels=1): 605 | self.num_output_channels = num_output_channels 606 | 607 | def __call__(self, img): 608 | """ 609 | Args: 610 | img (PIL Image): Image to be converted to grayscale. 611 | Returns: 612 | PIL Image: Randomly grayscaled image. 613 | """ 614 | return F.to_grayscale(img, num_output_channels=self.num_output_channels) 615 | 616 | 617 | class RandomGrayscale(object): 618 | """Randomly convert image to grayscale with a probability of p (default 0.1). 619 | Args: 620 | p (float): probability that image should be converted to grayscale. 621 | Returns: 622 | PIL Image: Grayscale version of the input image with probability p and unchanged 623 | with probability (1-p). 624 | - If input image is 1 channel: grayscale version is 1 channel 625 | - If input image is 3 channel: grayscale version is 3 channel with r == g == b 626 | """ 627 | 628 | def __init__(self, p=0.1): 629 | self.p = p 630 | 631 | def __call__(self, img): 632 | """ 633 | Args: 634 | img (PIL Image): Image to be converted to grayscale. 635 | Returns: 636 | PIL Image: Randomly grayscaled image. 637 | """ 638 | num_output_channels = 1 if img.mode == 'L' else 3 639 | if random.random() < self.p: 640 | return F.to_grayscale(img, num_output_channels=num_output_channels) 641 | return img 642 | --------------------------------------------------------------------------------