├── pose-estimation
    ├── datasets
    │   ├── x
    │   ├── coco.py
    │   └── transformations.py
    ├── modules
    │   ├── loss.py
    │   ├── get_parameters.py
    │   ├── conv.py
    │   ├── load_state.py
    │   ├── one_euro_filter.py
    │   ├── pose.py
    │   └── keypoints.py
    ├── readme.md
    ├── scripts
    │   ├── convert_to_onnx.py
    │   ├── make_val_subset.py
    │   └── prepare_train_labels.py
    ├── models
    │   └── with_mobilenet.py
    ├── demo.py
    ├── val.py
    └── train.py
├── emotion-detection
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── vgg.cpython-37.pyc
    │   │   ├── resnet.cpython-37.pyc
    │   │   └── __init__.cpython-37.pyc
    │   ├── vgg.py
    │   └── resnet.py
    ├── images
    │   ├── emojis
    │   │   ├── Sad.png
    │   │   ├── Angry.png
    │   │   ├── Fear.png
    │   │   ├── Happy.png
    │   │   ├── Disgust.png
    │   │   ├── Neutral.png
    │   │   └── Surprise.png
    │   └── results
    │   │   ├── Happy-Result.png
    │   │   └── Neutral-Result.png
    ├── BlazeFace_PyTorch
    │   ├── anchors.npy
    │   ├── blazeface.pth
    │   ├── __pycache__
    │   │   └── blazeface.cpython-37.pyc
    │   └── blazeface.py
    ├── emotion_taker.py
    ├── emotion_detection.py
    ├── visualize.py
    ├── functional.py
    └── transforms.py
├── Classroom.png
├── Pose-Detection.png
├── finger-print-sensor.png
├── BlockDiagram-Classroom.png
├── LICENSE
├── README.md
├── audio_to_text.py
└── attendance-code.txt


/pose-estimation/datasets/x:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/emotion-detection/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .vgg import *
2 | from .resnet import *


--------------------------------------------------------------------------------
/Classroom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/Classroom.png


--------------------------------------------------------------------------------
/Pose-Detection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/Pose-Detection.png


--------------------------------------------------------------------------------
/finger-print-sensor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/finger-print-sensor.png


--------------------------------------------------------------------------------
/BlockDiagram-Classroom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/BlockDiagram-Classroom.png


--------------------------------------------------------------------------------
/emotion-detection/images/emojis/Sad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Sad.png


--------------------------------------------------------------------------------
/emotion-detection/images/emojis/Angry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Angry.png


--------------------------------------------------------------------------------
/emotion-detection/images/emojis/Fear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Fear.png


--------------------------------------------------------------------------------
/emotion-detection/images/emojis/Happy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Happy.png


--------------------------------------------------------------------------------
/emotion-detection/images/emojis/Disgust.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Disgust.png


--------------------------------------------------------------------------------
/emotion-detection/images/emojis/Neutral.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Neutral.png


--------------------------------------------------------------------------------
/emotion-detection/images/emojis/Surprise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/emojis/Surprise.png


--------------------------------------------------------------------------------
/emotion-detection/BlazeFace_PyTorch/anchors.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/BlazeFace_PyTorch/anchors.npy


--------------------------------------------------------------------------------
/emotion-detection/BlazeFace_PyTorch/blazeface.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/BlazeFace_PyTorch/blazeface.pth


--------------------------------------------------------------------------------
/emotion-detection/images/results/Happy-Result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/results/Happy-Result.png


--------------------------------------------------------------------------------
/emotion-detection/images/results/Neutral-Result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/images/results/Neutral-Result.png


--------------------------------------------------------------------------------
/emotion-detection/models/__pycache__/vgg.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/models/__pycache__/vgg.cpython-37.pyc


--------------------------------------------------------------------------------
/emotion-detection/models/__pycache__/resnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/models/__pycache__/resnet.cpython-37.pyc


--------------------------------------------------------------------------------
/emotion-detection/models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/models/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/pose-estimation/modules/loss.py:
--------------------------------------------------------------------------------
1 | def l2_loss(input, target, mask, batch_size):
2 |     loss = (input - target) * mask
3 |     loss = (loss * loss) / 2 / batch_size
4 | 
5 |     return loss.sum()
6 | 


--------------------------------------------------------------------------------
/emotion-detection/BlazeFace_PyTorch/__pycache__/blazeface.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vineeth-raj/Classroom-Monitoring-System/HEAD/emotion-detection/BlazeFace_PyTorch/__pycache__/blazeface.cpython-37.pyc


--------------------------------------------------------------------------------
/pose-estimation/readme.md:
--------------------------------------------------------------------------------
1 | The pretrained-weights for this model are available in this [link](https://drive.google.com/drive/folders/1GQYH4M5X5gclQo5k9pBID9n0j7XvMdrZ?usp=sharing). 
2 | In order to run successfully, make sure you put it in a file with the same name here.
3 | 


--------------------------------------------------------------------------------
/emotion-detection/emotion_taker.py:
--------------------------------------------------------------------------------
 1 | import visualize
 2 | emotion_array = visualize.visualizer()
 3 | emotion_array = np.array(emotion_array)
 4 | emotion_array = (emotion_array/sum(emotion_array))*100
 5 | 
 6 | plt.rcParams['figure.figsize'] = (13.5,5.5)
 7 | for i in range(len(emotion_array)):
 8 |     axes = plt.subplot(2, 4, i)
 9 |     emojis_img = io.imread('images/emojis/%s.png' % str(class_names[i]))
10 |     plt.imshow(emojis_img)
11 |     plt.xlabel(str(emotion_array(i)), fontsize=16)
12 |     axes.set_xticks([])
13 |     axes.set_yticks([])
14 | plt.tight_layout()
15 | plt.savefig(os.path.join('images/results/{}.png'.format(i+1)))
16 | plt.close()
17 | 


--------------------------------------------------------------------------------
/pose-estimation/modules/get_parameters.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | def get_parameters(model, predicate):
 5 |     for module in model.modules():
 6 |         for param_name, param in module.named_parameters():
 7 |             if predicate(module, param_name):
 8 |                 yield param
 9 | 
10 | 
11 | def get_parameters_conv(model, name):
12 |     return get_parameters(model, lambda m, p: isinstance(m, nn.Conv2d) and m.groups == 1 and p == name)
13 | 
14 | 
15 | def get_parameters_conv_depthwise(model, name):
16 |     return get_parameters(model, lambda m, p: isinstance(m, nn.Conv2d)
17 |                                               and m.groups == m.in_channels
18 |                                               and m.in_channels == m.out_channels
19 |                                               and p == name)
20 | 
21 | 
22 | def get_parameters_bn(model, name):
23 |     return get_parameters(model, lambda m, p: isinstance(m, nn.BatchNorm2d) and p == name)
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Vineeth Raj B
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/pose-estimation/scripts/convert_to_onnx.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import torch
 4 | 
 5 | from models.with_mobilenet import PoseEstimationWithMobileNet
 6 | from modules.load_state import load_state
 7 | 
 8 | 
 9 | def convert_to_onnx(net, output_name):
10 |     input = torch.randn(1, 3, 256, 456)
11 |     input_names = ['data']
12 |     output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs',
13 |                     'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs']
14 | 
15 |     torch.onnx.export(net, input, output_name, verbose=True, input_names=input_names, output_names=output_names)
16 | 
17 | 
18 | if __name__ == '__main__':
19 |     parser = argparse.ArgumentParser()
20 |     parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint')
21 |     parser.add_argument('--output-name', type=str, default='human-pose-estimation.onnx',
22 |                         help='name of output model in ONNX format')
23 |     args = parser.parse_args()
24 | 
25 |     net = PoseEstimationWithMobileNet()
26 |     checkpoint = torch.load(args.checkpoint_path)
27 |     load_state(net, checkpoint)
28 | 
29 |     convert_to_onnx(net, args.output_name)
30 | 


--------------------------------------------------------------------------------
/pose-estimation/modules/conv.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | def conv(in_channels, out_channels, kernel_size=3, padding=1, bn=True, dilation=1, stride=1, relu=True, bias=True):
 5 |     modules = [nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias)]
 6 |     if bn:
 7 |         modules.append(nn.BatchNorm2d(out_channels))
 8 |     if relu:
 9 |         modules.append(nn.ReLU(inplace=True))
10 |     return nn.Sequential(*modules)
11 | 
12 | 
13 | def conv_dw(in_channels, out_channels, kernel_size=3, padding=1, stride=1, dilation=1):
14 |     return nn.Sequential(
15 |         nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, dilation=dilation, groups=in_channels, bias=False),
16 |         nn.BatchNorm2d(in_channels),
17 |         nn.ReLU(inplace=True),
18 | 
19 |         nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False),
20 |         nn.BatchNorm2d(out_channels),
21 |         nn.ReLU(inplace=True),
22 |     )
23 | 
24 | 
25 | def conv_dw_no_bn(in_channels, out_channels, kernel_size=3, padding=1, stride=1, dilation=1):
26 |     return nn.Sequential(
27 |         nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, dilation=dilation, groups=in_channels, bias=False),
28 |         nn.ELU(inplace=True),
29 | 
30 |         nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False),
31 |         nn.ELU(inplace=True),
32 |     )
33 | 


--------------------------------------------------------------------------------
/pose-estimation/modules/load_state.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | 
 4 | def load_state(net, checkpoint):
 5 |     source_state = checkpoint['state_dict']
 6 |     target_state = net.state_dict()
 7 |     new_target_state = collections.OrderedDict()
 8 |     for target_key, target_value in target_state.items():
 9 |         if target_key in source_state and source_state[target_key].size() == target_state[target_key].size():
10 |             new_target_state[target_key] = source_state[target_key]
11 |         else:
12 |             new_target_state[target_key] = target_state[target_key]
13 |             print('[WARNING] Not found pre-trained parameters for {}'.format(target_key))
14 | 
15 |     net.load_state_dict(new_target_state)
16 | 
17 | 
18 | def load_from_mobilenet(net, checkpoint):
19 |     source_state = checkpoint['state_dict']
20 |     target_state = net.state_dict()
21 |     new_target_state = collections.OrderedDict()
22 |     for target_key, target_value in target_state.items():
23 |         k = target_key
24 |         if k.find('model') != -1:
25 |             k = k.replace('model', 'module.model')
26 |         if k in source_state and source_state[k].size() == target_state[target_key].size():
27 |             new_target_state[target_key] = source_state[k]
28 |         else:
29 |             new_target_state[target_key] = target_state[target_key]
30 |             print('[WARNING] Not found pre-trained parameters for {}'.format(target_key))
31 | 
32 |     net.load_state_dict(new_target_state)
33 | 


--------------------------------------------------------------------------------
/emotion-detection/models/vgg.py:
--------------------------------------------------------------------------------
 1 | '''VGG11/13/16/19 in Pytorch.'''
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from torch.autograd import Variable
 6 | 
 7 | 
 8 | cfg = {
 9 |     'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
10 |     'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
11 |     'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
12 |     'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
13 | }
14 | 
15 | 
16 | class VGG(nn.Module):
17 |     def __init__(self, vgg_name):
18 |         super(VGG, self).__init__()
19 |         self.features = self._make_layers(cfg[vgg_name])
20 |         self.classifier = nn.Linear(512, 7)
21 | 
22 |     def forward(self, x):
23 |         out = self.features(x)
24 |         out = out.view(out.size(0), -1)
25 |         out = F.dropout(out, p=0.5, training=self.training)
26 |         out = self.classifier(out)
27 |         return out
28 | 
29 |     def _make_layers(self, cfg):
30 |         layers = []
31 |         in_channels = 3
32 |         for x in cfg:
33 |             if x == 'M':
34 |                 layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
35 |             else:
36 |                 layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
37 |                            nn.BatchNorm2d(x),
38 |                            nn.ReLU(inplace=True)]
39 |                 in_channels = x
40 |         layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
41 |         return nn.Sequential(*layers)
42 | 


--------------------------------------------------------------------------------
/pose-estimation/modules/one_euro_filter.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | 
 4 | def get_alpha(rate=30, cutoff=1):
 5 |     tau = 1 / (2 * math.pi * cutoff)
 6 |     te = 1 / rate
 7 |     return 1 / (1 + tau / te)
 8 | 
 9 | 
10 | class LowPassFilter:
11 |     def __init__(self):
12 |         self.x_previous = None
13 | 
14 |     def __call__(self, x, alpha=0.5):
15 |         if self.x_previous is None:
16 |             self.x_previous = x
17 |             return x
18 |         x_filtered = alpha * x + (1 - alpha) * self.x_previous
19 |         self.x_previous = x_filtered
20 |         return x_filtered
21 | 
22 | 
23 | class OneEuroFilter:
24 |     def __init__(self, freq=15, mincutoff=1, beta=0.05, dcutoff=1):
25 |         self.freq = freq
26 |         self.mincutoff = mincutoff
27 |         self.beta = beta
28 |         self.dcutoff = dcutoff
29 |         self.filter_x = LowPassFilter()
30 |         self.filter_dx = LowPassFilter()
31 |         self.x_previous = None
32 |         self.dx = None
33 | 
34 |     def __call__(self, x):
35 |         if self.dx is None:
36 |             self.dx = 0
37 |         else:
38 |             self.dx = (x - self.x_previous) * self.freq
39 |         dx_smoothed = self.filter_dx(self.dx, get_alpha(self.freq, self.dcutoff))
40 |         cutoff = self.mincutoff + self.beta * abs(dx_smoothed)
41 |         x_filtered = self.filter_x(x, get_alpha(self.freq, cutoff))
42 |         self.x_previous = x
43 |         return x_filtered
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     filter = OneEuroFilter(freq=15, beta=0.1)
48 |     for val in range(10):
49 |         x = val + (-1)**(val % 2)
50 |         x_filtered = filter(x)
51 |         print(x_filtered, x)
52 | 


--------------------------------------------------------------------------------
/pose-estimation/scripts/make_val_subset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import random
 4 | 
 5 | 
 6 | if __name__ == '__main__':
 7 |     parser = argparse.ArgumentParser()
 8 |     parser.add_argument('--labels', type=str, required=True, help='path to json with keypoints val labels')
 9 |     parser.add_argument('--output-name', type=str, default='val_subset.json',
10 |                         help='name of output file with subset of val labels')
11 |     parser.add_argument('--num-images', type=int, default=250, help='number of images in subset')
12 |     args = parser.parse_args()
13 | 
14 |     with open(args.labels, 'r') as f:
15 |         data = json.load(f)
16 | 
17 |     random.seed(0)
18 |     total_val_images = 5000
19 |     idxs = list(range(total_val_images))
20 |     random.shuffle(idxs)
21 | 
22 |     images_by_id = {}
23 |     for idx in idxs[:args.num_images]:
24 |         images_by_id[data['images'][idx]['id']] = data['images'][idx]
25 | 
26 |     annotations_by_image_id = {}
27 |     for annotation in data['annotations']:
28 |         if annotation['image_id'] in images_by_id:
29 |             if not annotation['image_id'] in annotations_by_image_id:
30 |                 annotations_by_image_id[annotation['image_id']] = []
31 |             annotations_by_image_id[annotation['image_id']].append(annotation)
32 | 
33 |     subset = {
34 |         'info': data['info'],
35 |         'licenses': data['licenses'],
36 |         'images': [],
37 |         'annotations': [],
38 |         'categories': data['categories']
39 |     }
40 |     for image_id, image in images_by_id.items():
41 |         subset['images'].append(image)
42 |         if image_id in annotations_by_image_id:  # image has at least 1 annotation
43 |             subset['annotations'].extend(annotations_by_image_id[image_id])
44 | 
45 |     with open(args.output_name, 'w') as f:
46 |         json.dump(subset, f, indent=4)
47 | 
48 | 


--------------------------------------------------------------------------------
/emotion-detection/emotion_detection.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # In[21]:
  5 | 
  6 | 
  7 | import torch
  8 | from torch import nn
  9 | import torchvision
 10 | from BlazeFace_PyTorch import blazeface
 11 | 
 12 | gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 13 | net = blazeface.BlazeFace().to(gpu)
 14 | net.load_weights("BlazeFace_PyTorch/blazeface.pth")
 15 | net.load_anchors("BlazeFace_PyTorch/anchors.npy")
 16 | 
 17 | 
 18 | # In[39]:
 19 | 
 20 | 
 21 | import cv2
 22 | import matplotlib.pyplot as plt
 23 | import numpy as np
 24 | get_ipython().run_line_magic('matplotlib', 'inline')
 25 | 
 26 | video_capture = cv2.VideoCapture(0)
 27 | 
 28 | while True:
 29 |     # Capture frame-by-frame
 30 |     ret, frame = video_capture.read()
 31 |     #cv2.imshow('Video', frame)
 32 |     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
 33 |     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 34 |     #frame = cv2.resize(frame, (128, 128))
 35 | 
 36 |     '''faces = faceCascade.detectMultiScale(
 37 |         gray,
 38 |         scaleFactor=1.1,
 39 |         minNeighbors=5,
 40 |         minSize=(30, 30),
 41 |         flags=cv2.cv.CV_HAAR_SCALE_IMAGE
 42 |     )
 43 | 
 44 |     # Draw a rectangle around the faces
 45 |     for (x, y, w, h) in faces:
 46 |         cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)'''
 47 | 
 48 |     # Display the resulting frame
 49 |     cv2.imshow('Video', frame)
 50 | 
 51 |     if cv2.waitKey(1) & 0xFF == ord('q'):
 52 |         break
 53 | # When everything is done, release the capture
 54 | frame = cv2.resize(frame, (128, 128))
 55 | detections = net.predict_on_image(frame)
 56 | detections = detections.cpu().numpy()
 57 | for i in range(detections.shape[0]):
 58 |     ymin = int(detections[0, 0] * frame.shape[0])
 59 |     xmin = int(detections[0, 1] * frame.shape[1])
 60 |     ymax = int(detections[0, 2] * frame.shape[0])
 61 |     xmax = int(detections[0, 3] * frame.shape[1])
 62 |     face = frame[xmin:xmax, ymin:ymax]
 63 |     face = cv2.flip(face, 1)
 64 |     face = cv2.resize(face, (128, 128))
 65 |     sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
 66 |     faces = cv2.filter2D(face, -1, sharpen_kernel)
 67 |     cv2.imwrite('images/{}.jpg'.format(i+1), faces)
 68 |     plt.imshow(faces)
 69 | video_capture.release()
 70 | cv2.destroyAllWindows()
 71 | #plt.show()
 72 | 
 73 | 
 74 | # In[17]:
 75 | 
 76 | 
 77 | get_ipython().system('python visualize.py')
 78 | 
 79 | 
 80 | # In[18]:
 81 | 
 82 | 
 83 | detections.shape
 84 | 
 85 | 
 86 | # In[19]:
 87 | 
 88 | 
 89 | detections.ndim
 90 | 
 91 | 
 92 | # In[20]:
 93 | 
 94 | 
 95 | detections.shape[0]
 96 | 
 97 | 
 98 | # In[ ]:
 99 | 
100 | 
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/emotion-detection/visualize.py:
--------------------------------------------------------------------------------
  1 | """
  2 | visualize results for test image
  3 | """
  4 | 
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | from PIL import Image
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | import os
 12 | from torch.autograd import Variable
 13 | 
 14 | import transforms as transforms
 15 | from skimage import io
 16 | from skimage.transform import resize
 17 | from models import *
 18 | 
 19 | 
 20 | 
 21 | def rgb2gray(rgb):
 22 |     return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])
 23 | def visualizer():
 24 |     cut_size = 44
 25 |     transform_test = transforms.Compose([
 26 |     transforms.TenCrop(cut_size),
 27 |     transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),])
 28 |     x = 0
 29 |     for root, dirs, files in os.walk('images/'):
 30 |       for f in files:
 31 |           x = x+1
 32 |     emotion_array = [0,0,0,0,0,0,0]
 33 |     for i in range(x):
 34 |         raw_img = io.imread('images/{}.jpg'.format(i+1))
 35 |         gray = rgb2gray(raw_img)
 36 |         gray = resize(gray, (48,48), mode='symmetric').astype(np.uint8)
 37 | 
 38 |         img = gray[:, :, np.newaxis]
 39 | 
 40 |         img = np.concatenate((img, img, img), axis=2)
 41 |         img = Image.fromarray(img)
 42 |         inputs = transform_test(img)
 43 | 
 44 |         class_names = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
 45 | 
 46 |         net = VGG('VGG19')
 47 |         checkpoint = torch.load(os.path.join('FER2013_VGG19', 'PrivateTest_model.t7'), map_location=torch.device('cuda'))
 48 |         net.load_state_dict(checkpoint['net'])
 49 |         net.cuda()
 50 |         net.eval()
 51 | 
 52 |         ncrops, c, h, w = np.shape(inputs)
 53 | 
 54 |         inputs = inputs.view(-1, c, h, w)
 55 |         inputs = inputs.cuda()
 56 |         inputs = Variable(inputs, volatile=True)
 57 |         outputs = net(inputs)
 58 | 
 59 |         outputs_avg = outputs.view(ncrops, -1).mean(0)  # avg over crops
 60 | 
 61 |         score = F.softmax(outputs_avg)
 62 |         _, predicted = torch.max(outputs_avg.data, 0)
 63 | 
 64 |         plt.rcParams['figure.figsize'] = (13.5,5.5)
 65 |         axes=plt.subplot(1, 3, 1)
 66 |         plt.imshow(raw_img)
 67 |         plt.xlabel('Input Image', fontsize=16)
 68 |         axes.set_xticks([])
 69 |         axes.set_yticks([])
 70 |         plt.tight_layout()
 71 | 
 72 | 
 73 |         plt.subplots_adjust(left=0.05, bottom=0.2, right=0.95, top=0.9, hspace=0.02, wspace=0.3)
 74 | 
 75 |         plt.subplot(1, 3, 2)
 76 |         ind = 0.1+0.6*np.arange(len(class_names))    # the x locations for the groups
 77 |         width = 0.4       # the width of the bars: can also be len(x) sequence
 78 |         color_list = ['red','orangered','darkorange','limegreen','darkgreen','royalblue','navy']
 79 |         for i in range(len(class_names)):
 80 |             plt.bar(ind[i], score.data.cpu().numpy()[i], width, color=color_list[i])
 81 |         plt.title("Classification results ",fontsize=20)
 82 |         plt.xlabel(" Expression Category ",fontsize=16)
 83 |         plt.ylabel(" Classification Score ",fontsize=16)
 84 |         plt.xticks(ind, class_names, rotation=45, fontsize=14)
 85 | 
 86 |         axes=plt.subplot(1, 3, 3)
 87 |         emojis_img = io.imread('images/emojis/%s.png' % str(class_names[int(predicted.cpu().numpy())]))
 88 |         plt.imshow(emojis_img)
 89 |         plt.xlabel('Emoji Expression', fontsize=16)
 90 |         axes.set_xticks([])
 91 |         axes.set_yticks([])
 92 |         plt.tight_layout()
 93 |         # show emojis
 94 | 
 95 |         #plt.show()
 96 |         plt.savefig(os.path.join('images/results/{}.png'.format(i+1)))
 97 |         plt.close()
 98 | 
 99 |         #print("The Expression is %s" %str(class_names[int(predicted.cpu().numpy())]))
100 |         emotion_array[int(predicted.cpu().numpy())] += 1
101 |         return emotion_array
102 | 


--------------------------------------------------------------------------------
/emotion-detection/models/resnet.py:
--------------------------------------------------------------------------------
  1 | '''ResNet in PyTorch.
  2 | 
  3 | For Pre-activation ResNet, see 'preact_resnet.py'.
  4 | 
  5 | Reference:
  6 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  7 |     Deep Residual Learning for Image Recognition. arXiv:1512.03385
  8 | '''
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | 
 13 | from torch.autograd import Variable
 14 | 
 15 | 
 16 | class BasicBlock(nn.Module):
 17 |     expansion = 1
 18 | 
 19 |     def __init__(self, in_planes, planes, stride=1):
 20 |         super(BasicBlock, self).__init__()
 21 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 22 |         self.bn1 = nn.BatchNorm2d(planes)
 23 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 24 |         self.bn2 = nn.BatchNorm2d(planes)
 25 | 
 26 |         self.shortcut = nn.Sequential()
 27 |         if stride != 1 or in_planes != self.expansion*planes:
 28 |             self.shortcut = nn.Sequential(
 29 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 30 |                 nn.BatchNorm2d(self.expansion*planes)
 31 |             )
 32 | 
 33 |     def forward(self, x):
 34 |         out = F.relu(self.bn1(self.conv1(x)))
 35 |         out = self.bn2(self.conv2(out))
 36 |         out += self.shortcut(x)
 37 |         out = F.relu(out)
 38 |         return out
 39 | 
 40 | 
 41 | class Bottleneck(nn.Module):
 42 |     expansion = 4
 43 | 
 44 |     def __init__(self, in_planes, planes, stride=1):
 45 |         super(Bottleneck, self).__init__()
 46 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 47 |         self.bn1 = nn.BatchNorm2d(planes)
 48 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 49 |         self.bn2 = nn.BatchNorm2d(planes)
 50 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 51 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 52 | 
 53 |         self.shortcut = nn.Sequential()
 54 |         if stride != 1 or in_planes != self.expansion*planes:
 55 |             self.shortcut = nn.Sequential(
 56 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 57 |                 nn.BatchNorm2d(self.expansion*planes)
 58 |             )
 59 | 
 60 |     def forward(self, x):
 61 |         out = F.relu(self.bn1(self.conv1(x)))
 62 |         out = F.relu(self.bn2(self.conv2(out)))
 63 |         out = self.bn3(self.conv3(out))
 64 |         out += self.shortcut(x)
 65 |         out = F.relu(out)
 66 |         return out
 67 | 
 68 | 
 69 | class ResNet(nn.Module):
 70 |     def __init__(self, block, num_blocks, num_classes=7):
 71 |         super(ResNet, self).__init__()
 72 |         self.in_planes = 64
 73 | 
 74 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 75 |         self.bn1 = nn.BatchNorm2d(64)
 76 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 77 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 78 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 79 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 80 |         self.linear = nn.Linear(512, num_classes)
 81 | 
 82 |     def _make_layer(self, block, planes, num_blocks, stride):
 83 |         strides = [stride] + [1]*(num_blocks-1)
 84 |         layers = []
 85 |         for stride in strides:
 86 |             layers.append(block(self.in_planes, planes, stride))
 87 |             self.in_planes = planes * block.expansion
 88 |         return nn.Sequential(*layers)
 89 | 
 90 |     def forward(self, x):
 91 |         out = F.relu(self.bn1(self.conv1(x)))
 92 |         out = self.layer1(out)
 93 |         out = self.layer2(out)
 94 |         out = self.layer3(out)
 95 |         out = self.layer4(out)
 96 |         out = F.avg_pool2d(out, 4)
 97 |         out = out.view(out.size(0), -1)
 98 |         out = F.dropout(out, p=0.5, training=self.training)
 99 |         out = self.linear(out)
100 |         return out
101 | 
102 | 
103 | def ResNet18():
104 |     return ResNet(BasicBlock, [2,2,2,2])


--------------------------------------------------------------------------------
/pose-estimation/models/with_mobilenet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | 
  4 | from modules.conv import conv, conv_dw, conv_dw_no_bn
  5 | 
  6 | 
  7 | class Cpm(nn.Module):
  8 |     def __init__(self, in_channels, out_channels):
  9 |         super().__init__()
 10 |         self.align = conv(in_channels, out_channels, kernel_size=1, padding=0, bn=False)
 11 |         self.trunk = nn.Sequential(
 12 |             conv_dw_no_bn(out_channels, out_channels),
 13 |             conv_dw_no_bn(out_channels, out_channels),
 14 |             conv_dw_no_bn(out_channels, out_channels)
 15 |         )
 16 |         self.conv = conv(out_channels, out_channels, bn=False)
 17 | 
 18 |     def forward(self, x):
 19 |         x = self.align(x)
 20 |         x = self.conv(x + self.trunk(x))
 21 |         return x
 22 | 
 23 | 
 24 | class InitialStage(nn.Module):
 25 |     def __init__(self, num_channels, num_heatmaps, num_pafs):
 26 |         super().__init__()
 27 |         self.trunk = nn.Sequential(
 28 |             conv(num_channels, num_channels, bn=False),
 29 |             conv(num_channels, num_channels, bn=False),
 30 |             conv(num_channels, num_channels, bn=False)
 31 |         )
 32 |         self.heatmaps = nn.Sequential(
 33 |             conv(num_channels, 512, kernel_size=1, padding=0, bn=False),
 34 |             conv(512, num_heatmaps, kernel_size=1, padding=0, bn=False, relu=False)
 35 |         )
 36 |         self.pafs = nn.Sequential(
 37 |             conv(num_channels, 512, kernel_size=1, padding=0, bn=False),
 38 |             conv(512, num_pafs, kernel_size=1, padding=0, bn=False, relu=False)
 39 |         )
 40 | 
 41 |     def forward(self, x):
 42 |         trunk_features = self.trunk(x)
 43 |         heatmaps = self.heatmaps(trunk_features)
 44 |         pafs = self.pafs(trunk_features)
 45 |         return [heatmaps, pafs]
 46 | 
 47 | 
 48 | class RefinementStageBlock(nn.Module):
 49 |     def __init__(self, in_channels, out_channels):
 50 |         super().__init__()
 51 |         self.initial = conv(in_channels, out_channels, kernel_size=1, padding=0, bn=False)
 52 |         self.trunk = nn.Sequential(
 53 |             conv(out_channels, out_channels),
 54 |             conv(out_channels, out_channels, dilation=2, padding=2)
 55 |         )
 56 | 
 57 |     def forward(self, x):
 58 |         initial_features = self.initial(x)
 59 |         trunk_features = self.trunk(initial_features)
 60 |         return initial_features + trunk_features
 61 | 
 62 | 
 63 | class RefinementStage(nn.Module):
 64 |     def __init__(self, in_channels, out_channels, num_heatmaps, num_pafs):
 65 |         super().__init__()
 66 |         self.trunk = nn.Sequential(
 67 |             RefinementStageBlock(in_channels, out_channels),
 68 |             RefinementStageBlock(out_channels, out_channels),
 69 |             RefinementStageBlock(out_channels, out_channels),
 70 |             RefinementStageBlock(out_channels, out_channels),
 71 |             RefinementStageBlock(out_channels, out_channels)
 72 |         )
 73 |         self.heatmaps = nn.Sequential(
 74 |             conv(out_channels, out_channels, kernel_size=1, padding=0, bn=False),
 75 |             conv(out_channels, num_heatmaps, kernel_size=1, padding=0, bn=False, relu=False)
 76 |         )
 77 |         self.pafs = nn.Sequential(
 78 |             conv(out_channels, out_channels, kernel_size=1, padding=0, bn=False),
 79 |             conv(out_channels, num_pafs, kernel_size=1, padding=0, bn=False, relu=False)
 80 |         )
 81 | 
 82 |     def forward(self, x):
 83 |         trunk_features = self.trunk(x)
 84 |         heatmaps = self.heatmaps(trunk_features)
 85 |         pafs = self.pafs(trunk_features)
 86 |         return [heatmaps, pafs]
 87 | 
 88 | 
 89 | class PoseEstimationWithMobileNet(nn.Module):
 90 |     def __init__(self, num_refinement_stages=1, num_channels=128, num_heatmaps=19, num_pafs=38):
 91 |         super().__init__()
 92 |         self.model = nn.Sequential(
 93 |             conv(     3,  32, stride=2, bias=False),
 94 |             conv_dw( 32,  64),
 95 |             conv_dw( 64, 128, stride=2),
 96 |             conv_dw(128, 128),
 97 |             conv_dw(128, 256, stride=2),
 98 |             conv_dw(256, 256),
 99 |             conv_dw(256, 512),  # conv4_2
100 |             conv_dw(512, 512, dilation=2, padding=2),
101 |             conv_dw(512, 512),
102 |             conv_dw(512, 512),
103 |             conv_dw(512, 512),
104 |             conv_dw(512, 512)   # conv5_5
105 |         )
106 |         self.cpm = Cpm(512, num_channels)
107 | 
108 |         self.initial_stage = InitialStage(num_channels, num_heatmaps, num_pafs)
109 |         self.refinement_stages = nn.ModuleList()
110 |         for idx in range(num_refinement_stages):
111 |             self.refinement_stages.append(RefinementStage(num_channels + num_heatmaps + num_pafs, num_channels,
112 |                                                           num_heatmaps, num_pafs))
113 | 
114 |     def forward(self, x):
115 |         backbone_features = self.model(x)
116 |         backbone_features = self.cpm(backbone_features)
117 | 
118 |         stages_output = self.initial_stage(backbone_features)
119 |         for refinement_stage in self.refinement_stages:
120 |             stages_output.extend(
121 |                 refinement_stage(torch.cat([backbone_features, stages_output[-2], stages_output[-1]], dim=1)))
122 | 
123 |         return stages_output
124 | 


--------------------------------------------------------------------------------
/pose-estimation/modules/pose.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | 
  4 | from modules.keypoints import BODY_PARTS_KPT_IDS, BODY_PARTS_PAF_IDS
  5 | from modules.one_euro_filter import OneEuroFilter
  6 | 
  7 | 
  8 | class Pose:
  9 |     num_kpts = 18
 10 |     kpt_names = ['nose', 'neck',
 11 |                  'r_sho', 'r_elb', 'r_wri', 'l_sho', 'l_elb', 'l_wri',
 12 |                  'r_hip', 'r_knee', 'r_ank', 'l_hip', 'l_knee', 'l_ank',
 13 |                  'r_eye', 'l_eye',
 14 |                  'r_ear', 'l_ear']
 15 |     sigmas = np.array([.26, .79, .79, .72, .62, .79, .72, .62, 1.07, .87, .89, 1.07, .87, .89, .25, .25, .35, .35],
 16 |                       dtype=np.float32) / 10.0
 17 |     vars = (sigmas * 2) ** 2
 18 |     last_id = -1
 19 |     color = [0, 224, 255]
 20 | 
 21 |     def __init__(self, keypoints, confidence):
 22 |         super().__init__()
 23 |         self.keypoints = keypoints
 24 |         self.confidence = confidence
 25 |         self.bbox = Pose.get_bbox(self.keypoints)
 26 |         self.id = None
 27 |         self.filters = [[OneEuroFilter(), OneEuroFilter()] for _ in range(Pose.num_kpts)]
 28 | 
 29 |     @staticmethod
 30 |     def get_bbox(keypoints):
 31 |         found_keypoints = np.zeros((np.count_nonzero(keypoints[:, 0] != -1), 2), dtype=np.int32)
 32 |         found_kpt_id = 0
 33 |         for kpt_id in range(Pose.num_kpts):
 34 |             if keypoints[kpt_id, 0] == -1:
 35 |                 continue
 36 |             found_keypoints[found_kpt_id] = keypoints[kpt_id]
 37 |             found_kpt_id += 1
 38 |         bbox = cv2.boundingRect(found_keypoints)
 39 |         return bbox
 40 | 
 41 |     def update_id(self, id=None):
 42 |         self.id = id
 43 |         if self.id is None:
 44 |             self.id = Pose.last_id + 1
 45 |             Pose.last_id += 1
 46 | 
 47 |     def draw(self, img):
 48 |         assert self.keypoints.shape == (Pose.num_kpts, 2)
 49 | 
 50 |         for part_id in range(len(BODY_PARTS_PAF_IDS) - 2):
 51 |             kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0]
 52 |             global_kpt_a_id = self.keypoints[kpt_a_id, 0]
 53 |             if global_kpt_a_id != -1:
 54 |                 x_a, y_a = self.keypoints[kpt_a_id]
 55 |                 cv2.circle(img, (int(x_a), int(y_a)), 3, Pose.color, -1)
 56 |             kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1]
 57 |             global_kpt_b_id = self.keypoints[kpt_b_id, 0]
 58 |             if global_kpt_b_id != -1:
 59 |                 x_b, y_b = self.keypoints[kpt_b_id]
 60 |                 cv2.circle(img, (int(x_b), int(y_b)), 3, Pose.color, -1)
 61 |             if global_kpt_a_id != -1 and global_kpt_b_id != -1:
 62 |                 cv2.line(img, (int(x_a), int(y_a)), (int(x_b), int(y_b)), Pose.color, 2)
 63 | 
 64 | 
 65 | def get_similarity(a, b, threshold=0.5):
 66 |     num_similar_kpt = 0
 67 |     for kpt_id in range(Pose.num_kpts):
 68 |         if a.keypoints[kpt_id, 0] != -1 and b.keypoints[kpt_id, 0] != -1:
 69 |             distance = np.sum((a.keypoints[kpt_id] - b.keypoints[kpt_id]) ** 2)
 70 |             area = max(a.bbox[2] * a.bbox[3], b.bbox[2] * b.bbox[3])
 71 |             similarity = np.exp(-distance / (2 * (area + np.spacing(1)) * Pose.vars[kpt_id]))
 72 |             if similarity > threshold:
 73 |                 num_similar_kpt += 1
 74 |     return num_similar_kpt
 75 | 
 76 | 
 77 | def track_poses(previous_poses, current_poses, threshold=3, smooth=False):
 78 |     """Propagate poses ids from previous frame results. Id is propagated,
 79 |     if there are at least `threshold` similar keypoints between pose from previous frame and current.
 80 |     If correspondence between pose on previous and current frame was established, pose keypoints are smoothed.
 81 | 
 82 |     :param previous_poses: poses from previous frame with ids
 83 |     :param current_poses: poses from current frame to assign ids
 84 |     :param threshold: minimal number of similar keypoints between poses
 85 |     :param smooth: smooth pose keypoints between frames
 86 |     :return: None
 87 |     """
 88 |     current_poses = sorted(current_poses, key=lambda pose: pose.confidence, reverse=True)  # match confident poses first
 89 |     mask = np.ones(len(previous_poses), dtype=np.int32)
 90 |     for current_pose in current_poses:
 91 |         best_matched_id = None
 92 |         best_matched_pose_id = None
 93 |         best_matched_iou = 0
 94 |         for id, previous_pose in enumerate(previous_poses):
 95 |             if not mask[id]:
 96 |                 continue
 97 |             iou = get_similarity(current_pose, previous_pose)
 98 |             if iou > best_matched_iou:
 99 |                 best_matched_iou = iou
100 |                 best_matched_pose_id = previous_pose.id
101 |                 best_matched_id = id
102 |         if best_matched_iou >= threshold:
103 |             mask[best_matched_id] = 0
104 |         else:  # pose not similar to any previous
105 |             best_matched_pose_id = None
106 |         current_pose.update_id(best_matched_pose_id)
107 | 
108 |         if smooth:
109 |             for kpt_id in range(Pose.num_kpts):
110 |                 if current_pose.keypoints[kpt_id, 0] == -1:
111 |                     continue
112 |                 # reuse filter if previous pose has valid filter
113 |                 if (best_matched_pose_id is not None
114 |                         and previous_poses[best_matched_id].keypoints[kpt_id, 0] != -1):
115 |                     current_pose.filters[kpt_id] = previous_poses[best_matched_id].filters[kpt_id]
116 |                 current_pose.keypoints[kpt_id, 0] = current_pose.filters[kpt_id][0](current_pose.keypoints[kpt_id, 0])
117 |                 current_pose.keypoints[kpt_id, 1] = current_pose.filters[kpt_id][1](current_pose.keypoints[kpt_id, 1])
118 |             current_pose.bbox = Pose.get_bbox(current_pose.keypoints)
119 | 


--------------------------------------------------------------------------------
/pose-estimation/scripts/prepare_train_labels.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import pickle
  4 | 
  5 | 
  6 | def prepare_annotations(annotations_per_image, images_info, net_input_size):
  7 |     """Prepare labels for training. For each annotated person calculates center
  8 |     to perform crop around it during the training. Also converts data to the internal format.
  9 | 
 10 |     :param annotations_per_image: all annotations for specified image id
 11 |     :param images_info: auxiliary information about all images
 12 |     :param net_input_size: network input size during training
 13 |     :return: list of prepared annotations
 14 |     """
 15 |     prepared_annotations = []
 16 |     for _, annotations in annotations_per_image.items():
 17 |         previous_centers = []
 18 |         for annotation in annotations[0]:
 19 |             if (annotation['num_keypoints'] < 5
 20 |                     or annotation['area'] < 32 * 32):
 21 |                 continue
 22 |             person_center = [annotation['bbox'][0] + annotation['bbox'][2] / 2,
 23 |                              annotation['bbox'][1] + annotation['bbox'][3] / 2]
 24 |             is_close = False
 25 |             for previous_center in previous_centers:
 26 |                 distance_to_previous = ((person_center[0] - previous_center[0]) ** 2
 27 |                                         + (person_center[1] - previous_center[1]) ** 2) ** 0.5
 28 |                 if distance_to_previous < previous_center[2] * 0.3:
 29 |                     is_close = True
 30 |                     break
 31 |             if is_close:
 32 |                 continue
 33 | 
 34 |             prepared_annotation = {
 35 |                 'img_paths': images_info[annotation['image_id']]['file_name'],
 36 |                 'img_width': images_info[annotation['image_id']]['width'],
 37 |                 'img_height': images_info[annotation['image_id']]['height'],
 38 |                 'objpos': person_center,
 39 |                 'image_id': annotation['image_id'],
 40 |                 'bbox': annotation['bbox'],
 41 |                 'segment_area': annotation['area'],
 42 |                 'scale_provided': annotation['bbox'][3] / net_input_size,
 43 |                 'num_keypoints': annotation['num_keypoints'],
 44 |                 'segmentations': annotations[1]
 45 |             }
 46 | 
 47 |             keypoints = []
 48 |             for i in range(len(annotation['keypoints']) // 3):
 49 |                 keypoint = [annotation['keypoints'][i * 3], annotation['keypoints'][i * 3 + 1], 2]
 50 |                 if annotation['keypoints'][i * 3 + 2] == 1:
 51 |                     keypoint[2] = 0
 52 |                 elif annotation['keypoints'][i * 3 + 2] == 2:
 53 |                     keypoint[2] = 1
 54 |                 keypoints.append(keypoint)
 55 |             prepared_annotation['keypoints'] = keypoints
 56 | 
 57 |             prepared_other_annotations = []
 58 |             for other_annotation in annotations[0]:
 59 |                 if other_annotation == annotation:
 60 |                     continue
 61 | 
 62 |                 prepared_other_annotation = {
 63 |                     'objpos': [other_annotation['bbox'][0] + other_annotation['bbox'][2] / 2,
 64 |                                other_annotation['bbox'][1] + other_annotation['bbox'][3] / 2],
 65 |                     'bbox': other_annotation['bbox'],
 66 |                     'segment_area': other_annotation['area'],
 67 |                     'scale_provided': other_annotation['bbox'][3] / net_input_size,
 68 |                     'num_keypoints': other_annotation['num_keypoints']
 69 |                 }
 70 | 
 71 |                 keypoints = []
 72 |                 for i in range(len(other_annotation['keypoints']) // 3):
 73 |                     keypoint = [other_annotation['keypoints'][i * 3], other_annotation['keypoints'][i * 3 + 1], 2]
 74 |                     if other_annotation['keypoints'][i * 3 + 2] == 1:
 75 |                         keypoint[2] = 0
 76 |                     elif other_annotation['keypoints'][i * 3 + 2] == 2:
 77 |                         keypoint[2] = 1
 78 |                     keypoints.append(keypoint)
 79 |                 prepared_other_annotation['keypoints'] = keypoints
 80 |                 prepared_other_annotations.append(prepared_other_annotation)
 81 | 
 82 |             prepared_annotation['processed_other_annotations'] = prepared_other_annotations
 83 |             prepared_annotations.append(prepared_annotation)
 84 | 
 85 |             previous_centers.append((person_center[0], person_center[1], annotation['bbox'][2], annotation['bbox'][3]))
 86 |     return prepared_annotations
 87 | 
 88 | 
 89 | if __name__ == '__main__':
 90 |     parser = argparse.ArgumentParser()
 91 |     parser.add_argument('--labels', type=str, required=True, help='path to json with keypoints train labels')
 92 |     parser.add_argument('--output-name', type=str, default='prepared_train_annotation.pkl',
 93 |                         help='name of output file with prepared keypoints annotation')
 94 |     parser.add_argument('--net-input-size', type=int, default=368, help='network input size')
 95 |     args = parser.parse_args()
 96 |     with open(args.labels, 'r') as f:
 97 |         data = json.load(f)
 98 | 
 99 |     annotations_per_image_mapping = {}
100 |     for annotation in data['annotations']:
101 |         if annotation['num_keypoints'] != 0 and not annotation['iscrowd']:
102 |             if annotation['image_id'] not in annotations_per_image_mapping:
103 |                 annotations_per_image_mapping[annotation['image_id']] = [[], []]
104 |             annotations_per_image_mapping[annotation['image_id']][0].append(annotation)
105 | 
106 |     crowd_segmentations_per_image_mapping = {}
107 |     for annotation in data['annotations']:
108 |         if annotation['iscrowd']:
109 |             if annotation['image_id'] not in crowd_segmentations_per_image_mapping:
110 |                 crowd_segmentations_per_image_mapping[annotation['image_id']] = []
111 |             crowd_segmentations_per_image_mapping[annotation['image_id']].append(annotation['segmentation'])
112 | 
113 |     for image_id, crowd_segmentations in crowd_segmentations_per_image_mapping.items():
114 |         if image_id in annotations_per_image_mapping:
115 |             annotations_per_image_mapping[image_id][1] = crowd_segmentations
116 | 
117 |     images_info = {}
118 |     for image_info in data['images']:
119 |         images_info[image_info['id']] = image_info
120 | 
121 |     prepared_annotations = prepare_annotations(annotations_per_image_mapping, images_info, args.net_input_size)
122 | 
123 |     with open(args.output_name, 'wb') as f:
124 |         pickle.dump(prepared_annotations, f)
125 | 
126 | 


--------------------------------------------------------------------------------
/pose-estimation/demo.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | import torch
  6 | 
  7 | from models.with_mobilenet import PoseEstimationWithMobileNet
  8 | from modules.keypoints import extract_keypoints, group_keypoints
  9 | from modules.load_state import load_state
 10 | from modules.pose import Pose, track_poses
 11 | from val import normalize, pad_width
 12 | 
 13 | 
 14 | class ImageReader(object):
 15 |     def __init__(self, file_names):
 16 |         self.file_names = file_names
 17 |         self.max_idx = len(file_names)
 18 | 
 19 |     def __iter__(self):
 20 |         self.idx = 0
 21 |         return self
 22 | 
 23 |     def __next__(self):
 24 |         if self.idx == self.max_idx:
 25 |             raise StopIteration
 26 |         img = cv2.imread(self.file_names[self.idx], cv2.IMREAD_COLOR)
 27 |         if img.size == 0:
 28 |             raise IOError('Image {} cannot be read'.format(self.file_names[self.idx]))
 29 |         self.idx = self.idx + 1
 30 |         return img
 31 | 
 32 | 
 33 | class VideoReader(object):
 34 |     def __init__(self, file_name):
 35 |         self.file_name = file_name
 36 |         try:  # OpenCV needs int to read from webcam
 37 |             self.file_name = int(file_name)
 38 |         except ValueError:
 39 |             pass
 40 | 
 41 |     def __iter__(self):
 42 |         self.cap = cv2.VideoCapture(self.file_name)
 43 |         if not self.cap.isOpened():
 44 |             raise IOError('Video {} cannot be opened'.format(self.file_name))
 45 |         return self
 46 | 
 47 |     def __next__(self):
 48 |         was_read, img = self.cap.read()
 49 |         if not was_read:
 50 |             raise StopIteration
 51 |         return img
 52 | 
 53 | 
 54 | def infer_fast(net, img, net_input_height_size, stride, upsample_ratio, cpu,
 55 |                pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256):
 56 |     height, width, _ = img.shape
 57 |     scale = net_input_height_size / height
 58 | 
 59 |     scaled_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
 60 |     scaled_img = normalize(scaled_img, img_mean, img_scale)
 61 |     min_dims = [net_input_height_size, max(scaled_img.shape[1], net_input_height_size)]
 62 |     padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims)
 63 | 
 64 |     tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float()
 65 |     if not cpu:
 66 |         tensor_img = tensor_img.cuda()
 67 | 
 68 |     stages_output = net(tensor_img)
 69 | 
 70 |     stage2_heatmaps = stages_output[-2]
 71 |     heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0))
 72 |     heatmaps = cv2.resize(heatmaps, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)
 73 | 
 74 |     stage2_pafs = stages_output[-1]
 75 |     pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0))
 76 |     pafs = cv2.resize(pafs, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)
 77 | 
 78 |     return heatmaps, pafs, scale, pad
 79 | 
 80 | 
 81 | def run_demo(net, image_provider, height_size, cpu, track, smooth):
 82 |     net = net.eval()
 83 |     if not cpu:
 84 |         net = net.cuda()
 85 | 
 86 |     stride = 8
 87 |     upsample_ratio = 4
 88 |     num_keypoints = Pose.num_kpts
 89 |     previous_poses = []
 90 |     delay = 33
 91 |     for img in image_provider:
 92 |         orig_img = img.copy()
 93 |         heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu)
 94 | 
 95 |         total_keypoints_num = 0
 96 |         all_keypoints_by_type = []
 97 |         for kpt_idx in range(num_keypoints):  # 19th for bg
 98 |             total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)
 99 | 
100 |         pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True)
101 |         for kpt_id in range(all_keypoints.shape[0]):
102 |             all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale
103 |             all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale
104 |         current_poses = []
105 |         for n in range(len(pose_entries)):
106 |             if len(pose_entries[n]) == 0:
107 |                 continue
108 |             pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1
109 |             for kpt_id in range(num_keypoints):
110 |                 if pose_entries[n][kpt_id] != -1.0:  # keypoint was found
111 |                     pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0])
112 |                     pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1])
113 |             pose = Pose(pose_keypoints, pose_entries[n][18])
114 |             current_poses.append(pose)
115 | 
116 |         if track:
117 |             track_poses(previous_poses, current_poses, smooth=smooth)
118 |             previous_poses = current_poses
119 |         for pose in current_poses:
120 |             pose.draw(img)
121 |         img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0)
122 |         for pose in current_poses:
123 |             cv2.rectangle(img, (pose.bbox[0], pose.bbox[1]),
124 |                           (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0))
125 |             if track:
126 |                 cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16),
127 |                             cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255))
128 |         cv2.imshow('Lightweight Human Pose Estimation Python Demo', img)
129 |         key = cv2.waitKey(delay)
130 |         if key == 27:  # esc
131 |             return
132 |         elif key == 112:  # 'p'
133 |             if delay == 33:
134 |                 delay = 0
135 |             else:
136 |                 delay = 33
137 | 
138 | 
139 | if __name__ == '__main__':
140 |     parser = argparse.ArgumentParser(
141 |         description='''Lightweight human pose estimation python demo.
142 |                        This is just for quick results preview.
143 |                        Please, consider c++ demo for the best performance.''')
144 |     parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint')
145 |     parser.add_argument('--height-size', type=int, default=256, help='network input layer height size')
146 |     parser.add_argument('--video', type=str, default='', help='path to video file or camera id')
147 |     parser.add_argument('--images', nargs='+', default='', help='path to input image(s)')
148 |     parser.add_argument('--cpu', action='store_true', help='run network inference on cpu')
149 |     parser.add_argument('--track', type=int, default=1, help='track pose id in video')
150 |     parser.add_argument('--smooth', type=int, default=1, help='smooth pose keypoints')
151 |     args = parser.parse_args()
152 | 
153 |     if args.video == '' and args.images == '':
154 |         raise ValueError('Either --video or --image has to be provided')
155 | 
156 |     net = PoseEstimationWithMobileNet()
157 |     checkpoint = torch.load(args.checkpoint_path, map_location='cpu')
158 |     load_state(net, checkpoint)
159 | 
160 |     frame_provider = ImageReader(args.images)
161 |     if args.video != '':
162 |         frame_provider = VideoReader(args.video)
163 |     else:
164 |         args.track = 0
165 | 
166 |     run_demo(net, frame_provider, args.height_size, args.cpu, args.track, args.smooth)
167 | 


--------------------------------------------------------------------------------
/pose-estimation/val.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import cv2
  3 | import json
  4 | import math
  5 | import numpy as np
  6 | from pycocotools.coco import COCO
  7 | from pycocotools.cocoeval import COCOeval
  8 | 
  9 | import torch
 10 | 
 11 | from datasets.coco import CocoValDataset
 12 | from models.with_mobilenet import PoseEstimationWithMobileNet
 13 | from modules.keypoints import extract_keypoints, group_keypoints
 14 | from modules.load_state import load_state
 15 | 
 16 | 
 17 | def run_coco_eval(gt_file_path, dt_file_path):
 18 |     annotation_type = 'keypoints'
 19 |     print('Running test for {} results.'.format(annotation_type))
 20 | 
 21 |     coco_gt = COCO(gt_file_path)
 22 |     coco_dt = coco_gt.loadRes(dt_file_path)
 23 | 
 24 |     result = COCOeval(coco_gt, coco_dt, annotation_type)
 25 |     result.evaluate()
 26 |     result.accumulate()
 27 |     result.summarize()
 28 | 
 29 | 
 30 | def normalize(img, img_mean, img_scale):
 31 |     img = np.array(img, dtype=np.float32)
 32 |     img = (img - img_mean) * img_scale
 33 |     return img
 34 | 
 35 | 
 36 | def pad_width(img, stride, pad_value, min_dims):
 37 |     h, w, _ = img.shape
 38 |     h = min(min_dims[0], h)
 39 |     min_dims[0] = math.ceil(min_dims[0] / float(stride)) * stride
 40 |     min_dims[1] = max(min_dims[1], w)
 41 |     min_dims[1] = math.ceil(min_dims[1] / float(stride)) * stride
 42 |     pad = []
 43 |     pad.append(int(math.floor((min_dims[0] - h) / 2.0)))
 44 |     pad.append(int(math.floor((min_dims[1] - w) / 2.0)))
 45 |     pad.append(int(min_dims[0] - h - pad[0]))
 46 |     pad.append(int(min_dims[1] - w - pad[1]))
 47 |     padded_img = cv2.copyMakeBorder(img, pad[0], pad[2], pad[1], pad[3],
 48 |                                     cv2.BORDER_CONSTANT, value=pad_value)
 49 |     return padded_img, pad
 50 | 
 51 | 
 52 | def convert_to_coco_format(pose_entries, all_keypoints):
 53 |     coco_keypoints = []
 54 |     scores = []
 55 |     for n in range(len(pose_entries)):
 56 |         if len(pose_entries[n]) == 0:
 57 |             continue
 58 |         keypoints = [0] * 17 * 3
 59 |         to_coco_map = [0, -1, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3]
 60 |         person_score = pose_entries[n][-2]
 61 |         position_id = -1
 62 |         for keypoint_id in pose_entries[n][:-2]:
 63 |             position_id += 1
 64 |             if position_id == 1:  # no 'neck' in COCO
 65 |                 continue
 66 | 
 67 |             cx, cy, score, visibility = 0, 0, 0, 0  # keypoint not found
 68 |             if keypoint_id != -1:
 69 |                 cx, cy, score = all_keypoints[int(keypoint_id), 0:3]
 70 |                 cx = cx + 0.5
 71 |                 cy = cy + 0.5
 72 |                 visibility = 1
 73 |             keypoints[to_coco_map[position_id] * 3 + 0] = cx
 74 |             keypoints[to_coco_map[position_id] * 3 + 1] = cy
 75 |             keypoints[to_coco_map[position_id] * 3 + 2] = visibility
 76 |         coco_keypoints.append(keypoints)
 77 |         scores.append(person_score * max(0, (pose_entries[n][-1] - 1)))  # -1 for 'neck'
 78 |     return coco_keypoints, scores
 79 | 
 80 | 
 81 | def infer(net, img, scales, base_height, stride, pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256):
 82 |     normed_img = normalize(img, img_mean, img_scale)
 83 |     height, width, _ = normed_img.shape
 84 |     scales_ratios = [scale * base_height / float(height) for scale in scales]
 85 |     avg_heatmaps = np.zeros((height, width, 19), dtype=np.float32)
 86 |     avg_pafs = np.zeros((height, width, 38), dtype=np.float32)
 87 | 
 88 |     for ratio in scales_ratios:
 89 |         scaled_img = cv2.resize(normed_img, (0, 0), fx=ratio, fy=ratio, interpolation=cv2.INTER_CUBIC)
 90 |         min_dims = [base_height, max(scaled_img.shape[1], base_height)]
 91 |         padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims)
 92 | 
 93 |         tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float().cuda()
 94 |         stages_output = net(tensor_img)
 95 | 
 96 |         stage2_heatmaps = stages_output[-2]
 97 |         heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0))
 98 |         heatmaps = cv2.resize(heatmaps, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
 99 |         heatmaps = heatmaps[pad[0]:heatmaps.shape[0] - pad[2], pad[1]:heatmaps.shape[1] - pad[3]:, :]
100 |         heatmaps = cv2.resize(heatmaps, (width, height), interpolation=cv2.INTER_CUBIC)
101 |         avg_heatmaps = avg_heatmaps + heatmaps / len(scales_ratios)
102 | 
103 |         stage2_pafs = stages_output[-1]
104 |         pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0))
105 |         pafs = cv2.resize(pafs, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
106 |         pafs = pafs[pad[0]:pafs.shape[0] - pad[2], pad[1]:pafs.shape[1] - pad[3], :]
107 |         pafs = cv2.resize(pafs, (width, height), interpolation=cv2.INTER_CUBIC)
108 |         avg_pafs = avg_pafs + pafs / len(scales_ratios)
109 | 
110 |     return avg_heatmaps, avg_pafs
111 | 
112 | 
113 | def evaluate(labels, output_name, images_folder, net, multiscale=False, visualize=False):
114 |     net = net.cuda().eval()
115 |     base_height = 368
116 |     scales = [1]
117 |     if multiscale:
118 |         scales = [0.5, 1.0, 1.5, 2.0]
119 |     stride = 8
120 | 
121 |     dataset = CocoValDataset(labels, images_folder)
122 |     coco_result = []
123 |     for sample in dataset:
124 |         file_name = sample['file_name']
125 |         img = sample['img']
126 | 
127 |         avg_heatmaps, avg_pafs = infer(net, img, scales, base_height, stride)
128 | 
129 |         total_keypoints_num = 0
130 |         all_keypoints_by_type = []
131 |         for kpt_idx in range(18):  # 19th for bg
132 |             total_keypoints_num += extract_keypoints(avg_heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)
133 | 
134 |         pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, avg_pafs)
135 | 
136 |         coco_keypoints, scores = convert_to_coco_format(pose_entries, all_keypoints)
137 | 
138 |         image_id = int(file_name[0:file_name.rfind('.')])
139 |         for idx in range(len(coco_keypoints)):
140 |             coco_result.append({
141 |                 'image_id': image_id,
142 |                 'category_id': 1,  # person
143 |                 'keypoints': coco_keypoints[idx],
144 |                 'score': scores[idx]
145 |             })
146 | 
147 |         if visualize:
148 |             for keypoints in coco_keypoints:
149 |                 for idx in range(len(keypoints) // 3):
150 |                     cv2.circle(img, (int(keypoints[idx * 3]), int(keypoints[idx * 3 + 1])),
151 |                                3, (255, 0, 255), -1)
152 |             cv2.imshow('keypoints', img)
153 |             key = cv2.waitKey()
154 |             if key == 27:  # esc
155 |                 return
156 | 
157 |     with open(output_name, 'w') as f:
158 |         json.dump(coco_result, f, indent=4)
159 | 
160 |     run_coco_eval(labels, output_name)
161 | 
162 | 
163 | if __name__ == '__main__':
164 |     parser = argparse.ArgumentParser()
165 |     parser.add_argument('--labels', type=str, required=True, help='path to json with keypoints val labels')
166 |     parser.add_argument('--output-name', type=str, default='detections.json',
167 |                         help='name of output json file with detected keypoints')
168 |     parser.add_argument('--images-folder', type=str, required=True, help='path to COCO val images folder')
169 |     parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint')
170 |     parser.add_argument('--multiscale', action='store_true', help='average inference results over multiple scales')
171 |     parser.add_argument('--visualize', action='store_true', help='show keypoints')
172 |     args = parser.parse_args()
173 | 
174 |     net = PoseEstimationWithMobileNet()
175 |     checkpoint = torch.load(args.checkpoint_path)
176 |     load_state(net, checkpoint)
177 | 
178 |     evaluate(args.labels, args.output_name, args.images_folder, net, args.multiscale, args.visualize)
179 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Classroom-Monitoring-System
  2 | ### Making the classroom smart using deep learning and Internet of Things
  3 | 
  4 | ![ScreenShot](https://github.com/vineeth-raj/Classroom-Monitoring-System/blob/main/Classroom.png)
  5 | 
  6 | ## Introduction
  7 | As a student, we feel like leaving the classroom, frustated due to boring lectures and we always wanted
  8 | the lecturer to know about our feelings.In olden era it is quite not possible, but in this deep learning
  9 | era we made it possible.Also it is very important to understand that the way lectures are delivered in
 10 | college or school is very important in shaping the career of students.Hence we came up with a solution
 11 | that can let the lecturer not only know about the students emotion but also he knows the gestures through
 12 | which he can make further moves.
 13 | 
 14 | ## Objective
 15 | The main objectives of this system are:
 16 | 
 17 | - Make the attendance system flexible (biometric)
 18 | - Make the teachers and principal understand the interest of students on  the lectures by identifying their emotions and sitting postures.
 19 | - Automatic Notes taker.
 20 | 
 21 | ## Components Required
 22 | 
 23 | ### For Biometric Attendance:
 24 | - Arduino UNO Board(Atmega 328p Microcontroller)
 25 | - Fingerprint Sensor(R307 Module)
 26 | - LCD Display(16x2 LCD Module)
 27 | - Firebase(database to store students roll number)
 28 | 
 29 | ### For Emotion and Posture Recognition
 30 | - Raspberry Pi
 31 | - Camera Module
 32 | - DeepLearning modules and models
 33 | 
 34 | ### For Automatic Notes Taker
 35 | - Raspberry Pi4
 36 | - ReSpeaker USB MicArray
 37 | 
 38 | ## Block Diagram of how our system works
 39 | 
 40 | ![Screenshot](https://github.com/vineeth-raj/Classroom-Monitoring-System/blob/main/BlockDiagram-Classroom.png)
 41 | 
 42 | ## Description
 43 | 
 44 | ### Biometric Attendance
 45 | So we made a  fingerprint based biometric attendance system using Arduino.(inspired by [this](https://circuitdigest.com/microcontroller-projects/fingerprint-attendance-system-using-arduino-uno))
 46 | 
 47 | ![ScreenShot](https://github.com/vineeth-raj/Classroom-Monitoring-System/blob/main/finger-print-sensor.png)
 48 | 
 49 | ### Emotion Recognition
 50 | 
 51 | ![ScreenShot](https://github.com/vineeth-raj/Classroom-Monitoring-System/blob/main/emotion-detection/images/results/Happy-Result.png)
 52 | 
 53 | ![ScreenShot](https://github.com/vineeth-raj/Classroom-Monitoring-System/blob/main/emotion-detection/images/results/Neutral-Result.png)
 54 | 
 55 | First of all we detected faces from a camera using [BlazeFace-Torch](https://www.kaggle.com/humananalog/blazeface-pytorch) which is a lightweight framework.
 56 | Besides a bounding box, BlazeFace also predicts 6 keypoints for face landmarks (2x eyes, 2x ears, nose, mouth). Next using the face detected, we recognized the emotion of face among 7 emotions(Angry, Disgust, Fear, Happy, Sad, Surprise, Neutral) and then got the emoji for respective emotion. This recognition was done using VGG19 model which was trained on a million faces and was trained on Kaggle's GPU for 4 hours.
 57 | 
 58 | ![](https://www.pyimagesearch.com/wp-content/uploads/2017/03/imagenet_vgg16.png)
 59 | 
 60 | ![](https://www.researchgate.net/profile/Clifford_Yang/publication/325137356/figure/fig2/AS:670371271413777@1536840374533/llustration-of-the-network-architecture-of-VGG-19-model-conv-means-convolution-FC-means.jpg)
 61 | 
 62 | ### Pose Estimation
 63 | Here we adapt multi-person pose estimation architecture to use it on edge devices.We follow the bottom-up approach from OpenPose because of its decent quality and robustness tonumber of people inside the frame. The networkmodel has4.1M parameters and 9 billions floating-point operations (GFLOPs) complexity,which is just∼15% of the baseline 2-stage OpenPose with almost the same quality. It detects a skeleton (which consists of keypoints and connections between them) to identify human poses for every person inside the image. The pose may contain up to 18 keypoints: ears, eyes, nose, neck, shoulders, elbows, wrists, hips, knees, and ankles.
 64 | 
 65 | ![ScreenShot](https://github.com/vineeth-raj/Classroom-Monitoring-System/blob/main/Pose-Detection.png)
 66 | 
 67 | We profiled the code and removed extra memory allocations, parallelized keypoints extraction withOpenCV’s routine. This made code significantly faster, and the last bottleneck was the resize featuremaps to the input image size.We decided to skip the resize step and performed grouping directly on network output, but accuracydropped significantly. Thus step with upsampling feature maps cannot be avoided, but it is notnecessary to do it to input image size. Our experiments shown, that with upsample factor 8 theaccuracy is the same, as if resize to input image size. We used up-sample factor 4 for the demo purposes.
 68 | (inspired from this [paper](https://arxiv.org/pdf/1811.12004.pdf))
 69 | 
 70 | ### Notes taker
 71 | In this we have done like simple audio to text converter using pyaudio and Halo.The written text was mailed to students as a notes.
 72 | 
 73 | ## Procedure
 74 | 
 75 | - Firstly, the professor with the biometric device logs into his account  with his finger print and gets the attendance list of the subject he  handles for.
 76 | - Then the device is given to the students to keep their fingerprints and the attendance of that day is updated with the time of finger prints.
 77 | - The professor is kindly advised to keep the device on till his hourends  because assoonasthe professor logs in, the notes taker device will  start running and will stop if the biometric device stops.
 78 | - The notes taking device is connected tothecloudand will take  notes of the professor’s wordsandwillsend that to every student’s  webmail ID or mail ID once the professor logs out from his biometric  device(session over).
 79 | - This reduces the students effort of taking notes and make the  students listen more on the professors words.
 80 | - The professor can log out by placing his finger-print on the device the  second time.
 81 | - LCD display is kept on the biometric device to see the status whether the professor logged in or not and students fingerprint recognized or not.
 82 | - Meanwhile during the class, the emotion of the students is detected using their  facial expressions and the overall emotion of the students is displayed on the  smart-board(if available) every 3 seconds and will be sent as amessage to the  principal/the respective head and the teacher once the session ends.
 83 | - Various poses of the students are also detected in this process such as standing, raising hand, leaning on bench which can be used to know the interest of student over the subject.
 84 | - Considering the students privacy, we are not taking the student faces to a  server..hence we are processing the faces from the cameras inside the board using  OpenCV,blazeface-pytorch(used to detect face) and getting the emotions as output and this emotions are only going to the server and also the same for poses.
 85 | 
 86 | ## Final Touch
 87 | We were able to create a system similar to the below pic. (inspired from this [website](https://edtechchina.medium.com/schools-using-facial-recognition-system-sparks-privacy-concerns-in-china-d4f706e5cfd0))
 88 | 
 89 | ![](https://miro.medium.com/max/875/1*TqeG3GUeIOaXY36Dwu8rkA.jpeg)
 90 | 
 91 | - This can also be used in any public speaking platform to assess the emotion of audience and make it available to the speaker to make him deliver the speech better
 92 | 
 93 | ## Future Scope
 94 | 
 95 | - This can be extended to automating the distribution of  corrected answer papers through server or in-fact  correction of answer papers can be automated or  automated invigilation.
 96 | - And also automatic on and off of lights and fans which  can used to reduce power wastage. The sweeper problem  can be solved by using the voice automated on and off.
 97 | - We can add recording facilities in the system so that  students can view the lectures whenever they wish to  watch.
 98 | 
 99 | ## References
100 | 
101 | - https://medium.com/@EdtechChina/schools-using-facial-recognition-system-sparks-privacy-concerns-in-china-d4f706e5cfd0
102 | - http://en.people.cn/n3/2018/0519/c90000-9461918.html
103 | 
104 | ## Contributors
105 | - [Shantosh](https://www.linkedin.com/in/shanthosh-kumar-921092174/)
106 | 


--------------------------------------------------------------------------------
/pose-estimation/datasets/coco.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import json
  3 | import math
  4 | import os
  5 | import pickle
  6 | 
  7 | import cv2
  8 | import numpy as np
  9 | import pycocotools
 10 | 
 11 | from torch.utils.data.dataset import Dataset
 12 | 
 13 | BODY_PARTS_KPT_IDS = [[1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13], [1, 2], [2, 3], [3, 4], [2, 16],
 14 |                       [1, 5], [5, 6], [6, 7], [5, 17], [1, 0], [0, 14], [0, 15], [14, 16], [15, 17]]
 15 | 
 16 | 
 17 | def get_mask(segmentations, mask):
 18 |     for segmentation in segmentations:
 19 |         rle = pycocotools.mask.frPyObjects(segmentation, mask.shape[0], mask.shape[1])
 20 |         mask[pycocotools.mask.decode(rle) > 0.5] = 0
 21 |     return mask
 22 | 
 23 | 
 24 | class CocoTrainDataset(Dataset):
 25 |     def __init__(self, labels, images_folder, stride, sigma, paf_thickness, transform=None):
 26 |         super().__init__()
 27 |         self._images_folder = images_folder
 28 |         self._stride = stride
 29 |         self._sigma = sigma
 30 |         self._paf_thickness = paf_thickness
 31 |         self._transform = transform
 32 |         with open(labels, 'rb') as f:
 33 |             self._labels = pickle.load(f)
 34 | 
 35 |     def __getitem__(self, idx):
 36 |         label = copy.deepcopy(self._labels[idx])  # label modified in transform
 37 |         image = cv2.imread(os.path.join(self._images_folder, label['img_paths']), cv2.IMREAD_COLOR)
 38 |         mask = np.ones(shape=(label['img_height'], label['img_width']), dtype=np.float32)
 39 |         mask = get_mask(label['segmentations'], mask)
 40 |         sample = {
 41 |             'label': label,
 42 |             'image': image,
 43 |             'mask': mask
 44 |         }
 45 |         if self._transform:
 46 |             sample = self._transform(sample)
 47 | 
 48 |         mask = cv2.resize(sample['mask'], dsize=None, fx=1/self._stride, fy=1/self._stride, interpolation=cv2.INTER_AREA)
 49 |         keypoint_maps = self._generate_keypoint_maps(sample)
 50 |         sample['keypoint_maps'] = keypoint_maps
 51 |         keypoint_mask = np.zeros(shape=keypoint_maps.shape, dtype=np.float32)
 52 |         for idx in range(keypoint_mask.shape[0]):
 53 |             keypoint_mask[idx] = mask
 54 |         sample['keypoint_mask'] = keypoint_mask
 55 | 
 56 |         paf_maps = self._generate_paf_maps(sample)
 57 |         sample['paf_maps'] = paf_maps
 58 |         paf_mask = np.zeros(shape=paf_maps.shape, dtype=np.float32)
 59 |         for idx in range(paf_mask.shape[0]):
 60 |             paf_mask[idx] = mask
 61 |         sample['paf_mask'] = paf_mask
 62 | 
 63 |         image = sample['image'].astype(np.float32)
 64 |         image = (image - 128) / 256
 65 |         sample['image'] = image.transpose((2, 0, 1))
 66 |         return sample
 67 | 
 68 |     def __len__(self):
 69 |         return len(self._labels)
 70 | 
 71 |     def _generate_keypoint_maps(self, sample):
 72 |         n_keypoints = 18
 73 |         n_rows, n_cols, _ = sample['image'].shape
 74 |         keypoint_maps = np.zeros(shape=(n_keypoints + 1,
 75 |                                         n_rows // self._stride, n_cols // self._stride), dtype=np.float32)  # +1 for bg
 76 | 
 77 |         label = sample['label']
 78 |         for keypoint_idx in range(n_keypoints):
 79 |             keypoint = label['keypoints'][keypoint_idx]
 80 |             if keypoint[2] <= 1:
 81 |                 self._add_gaussian(keypoint_maps[keypoint_idx], keypoint[0], keypoint[1], self._stride, self._sigma)
 82 |             for another_annotation in label['processed_other_annotations']:
 83 |                 keypoint = another_annotation['keypoints'][keypoint_idx]
 84 |                 if keypoint[2] <= 1:
 85 |                     self._add_gaussian(keypoint_maps[keypoint_idx], keypoint[0], keypoint[1], self._stride, self._sigma)
 86 |         keypoint_maps[-1] = 1 - keypoint_maps.max(axis=0)
 87 |         return keypoint_maps
 88 | 
 89 |     def _add_gaussian(self, keypoint_map, x, y, stride, sigma):
 90 |         n_sigma = 4
 91 |         tl = [int(x - n_sigma * sigma), int(y - n_sigma * sigma)]
 92 |         tl[0] = max(tl[0], 0)
 93 |         tl[1] = max(tl[1], 0)
 94 | 
 95 |         br = [int(x + n_sigma * sigma), int(y + n_sigma * sigma)]
 96 |         map_h, map_w = keypoint_map.shape
 97 |         br[0] = min(br[0], map_w * stride)
 98 |         br[1] = min(br[1], map_h * stride)
 99 | 
100 |         shift = stride / 2 - 0.5
101 |         for map_y in range(tl[1] // stride, br[1] // stride):
102 |             for map_x in range(tl[0] // stride, br[0] // stride):
103 |                 d2 = (map_x * stride + shift - x) * (map_x * stride + shift - x) + \
104 |                     (map_y * stride + shift - y) * (map_y * stride + shift - y)
105 |                 exponent = d2 / 2 / sigma / sigma
106 |                 if exponent > 4.6052:  # threshold, ln(100), ~0.01
107 |                     continue
108 |                 keypoint_map[map_y, map_x] += math.exp(-exponent)
109 |                 if keypoint_map[map_y, map_x] > 1:
110 |                     keypoint_map[map_y, map_x] = 1
111 | 
112 |     def _generate_paf_maps(self, sample):
113 |         n_pafs = len(BODY_PARTS_KPT_IDS)
114 |         n_rows, n_cols, _ = sample['image'].shape
115 |         paf_maps = np.zeros(shape=(n_pafs * 2, n_rows // self._stride, n_cols // self._stride), dtype=np.float32)
116 | 
117 |         label = sample['label']
118 |         for paf_idx in range(n_pafs):
119 |             keypoint_a = label['keypoints'][BODY_PARTS_KPT_IDS[paf_idx][0]]
120 |             keypoint_b = label['keypoints'][BODY_PARTS_KPT_IDS[paf_idx][1]]
121 |             if keypoint_a[2] <= 1 and keypoint_b[2] <= 1:
122 |                 self._set_paf(paf_maps[paf_idx * 2:paf_idx * 2 + 2],
123 |                               keypoint_a[0], keypoint_a[1], keypoint_b[0], keypoint_b[1],
124 |                               self._stride, self._paf_thickness)
125 |             for another_annotation in label['processed_other_annotations']:
126 |                 keypoint_a = another_annotation['keypoints'][BODY_PARTS_KPT_IDS[paf_idx][0]]
127 |                 keypoint_b = another_annotation['keypoints'][BODY_PARTS_KPT_IDS[paf_idx][1]]
128 |                 if keypoint_a[2] <= 1 and keypoint_b[2] <= 1:
129 |                     self._set_paf(paf_maps[paf_idx * 2:paf_idx * 2 + 2],
130 |                                   keypoint_a[0], keypoint_a[1], keypoint_b[0], keypoint_b[1],
131 |                                   self._stride, self._paf_thickness)
132 |         return paf_maps
133 | 
134 |     def _set_paf(self, paf_map, x_a, y_a, x_b, y_b, stride, thickness):
135 |         x_a /= stride
136 |         y_a /= stride
137 |         x_b /= stride
138 |         y_b /= stride
139 |         x_ba = x_b - x_a
140 |         y_ba = y_b - y_a
141 |         _, h_map, w_map = paf_map.shape
142 |         x_min = int(max(min(x_a, x_b) - thickness, 0))
143 |         x_max = int(min(max(x_a, x_b) + thickness, w_map))
144 |         y_min = int(max(min(y_a, y_b) - thickness, 0))
145 |         y_max = int(min(max(y_a, y_b) + thickness, h_map))
146 |         norm_ba = (x_ba * x_ba + y_ba * y_ba) ** 0.5
147 |         if norm_ba < 1e-7:  # Same points, no paf
148 |             return
149 |         x_ba /= norm_ba
150 |         y_ba /= norm_ba
151 | 
152 |         for y in range(y_min, y_max):
153 |             for x in range(x_min, x_max):
154 |                 x_ca = x - x_a
155 |                 y_ca = y - y_a
156 |                 d = math.fabs(x_ca * y_ba - y_ca * x_ba)
157 |                 if d <= thickness:
158 |                     paf_map[0, y, x] = x_ba
159 |                     paf_map[1, y, x] = y_ba
160 | 
161 | 
162 | class CocoValDataset(Dataset):
163 |     def __init__(self, labels, images_folder):
164 |         super().__init__()
165 |         with open(labels, 'r') as f:
166 |             self._labels = json.load(f)
167 |         self._images_folder = images_folder
168 | 
169 |     def __getitem__(self, idx):
170 |         file_name = self._labels['images'][idx]['file_name']
171 |         img = cv2.imread(os.path.join(self._images_folder, file_name), cv2.IMREAD_COLOR)
172 |         return {
173 |             'img': img,
174 |             'file_name': file_name
175 |         }
176 | 
177 |     def __len__(self):
178 |         return len(self._labels['images'])
179 | 


--------------------------------------------------------------------------------
/pose-estimation/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import cv2
  3 | import os
  4 | 
  5 | import torch
  6 | from torch.nn import DataParallel
  7 | import torch.optim as optim
  8 | from torch.utils.data import DataLoader
  9 | from torchvision import transforms
 10 | 
 11 | from datasets.coco import CocoTrainDataset
 12 | from datasets.transformations import ConvertKeypoints, Scale, Rotate, CropPad, Flip
 13 | from modules.get_parameters import get_parameters_conv, get_parameters_bn, get_parameters_conv_depthwise
 14 | from models.with_mobilenet import PoseEstimationWithMobileNet
 15 | from modules.loss import l2_loss
 16 | from modules.load_state import load_state, load_from_mobilenet
 17 | from val import evaluate
 18 | 
 19 | cv2.setNumThreads(0)
 20 | cv2.ocl.setUseOpenCL(False)  # To prevent freeze of DataLoader
 21 | 
 22 | 
 23 | def train(prepared_train_labels, train_images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter,
 24 |           num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after,
 25 |           val_labels, val_images_folder, val_output_name, checkpoint_after, val_after):
 26 |     net = PoseEstimationWithMobileNet(num_refinement_stages)
 27 | 
 28 |     stride = 8
 29 |     sigma = 7
 30 |     path_thickness = 1
 31 |     dataset = CocoTrainDataset(prepared_train_labels, train_images_folder,
 32 |                                stride, sigma, path_thickness,
 33 |                                transform=transforms.Compose([
 34 |                                    ConvertKeypoints(),
 35 |                                    Scale(),
 36 |                                    Rotate(pad=(128, 128, 128)),
 37 |                                    CropPad(pad=(128, 128, 128)),
 38 |                                    Flip()]))
 39 |     train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
 40 | 
 41 |     optimizer = optim.Adam([
 42 |         {'params': get_parameters_conv(net.model, 'weight')},
 43 |         {'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0},
 44 |         {'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0},
 45 |         {'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0},
 46 |         {'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr},
 47 |         {'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0},
 48 |         {'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0},
 49 |         {'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr},
 50 |         {'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0},
 51 |         {'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4},
 52 |         {'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0},
 53 |         {'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0},
 54 |         {'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0},
 55 |     ], lr=base_lr, weight_decay=5e-4)
 56 | 
 57 |     num_iter = 0
 58 |     current_epoch = 0
 59 |     drop_after_epoch = [100, 200, 260]
 60 |     scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333)
 61 |     if checkpoint_path:
 62 |         checkpoint = torch.load(checkpoint_path)
 63 | 
 64 |         if from_mobilenet:
 65 |             load_from_mobilenet(net, checkpoint)
 66 |         else:
 67 |             load_state(net, checkpoint)
 68 |             if not weights_only:
 69 |                 optimizer.load_state_dict(checkpoint['optimizer'])
 70 |                 scheduler.load_state_dict(checkpoint['scheduler'])
 71 |                 num_iter = checkpoint['iter']
 72 |                 current_epoch = checkpoint['current_epoch']
 73 | 
 74 |     net = DataParallel(net).cuda()
 75 |     net.train()
 76 |     for epochId in range(current_epoch, 280):
 77 |         scheduler.step()
 78 |         total_losses = [0, 0] * (num_refinement_stages + 1)  # heatmaps loss, paf loss per stage
 79 |         batch_per_iter_idx = 0
 80 |         for batch_data in train_loader:
 81 |             if batch_per_iter_idx == 0:
 82 |                 optimizer.zero_grad()
 83 | 
 84 |             images = batch_data['image'].cuda()
 85 |             keypoint_masks = batch_data['keypoint_mask'].cuda()
 86 |             paf_masks = batch_data['paf_mask'].cuda()
 87 |             keypoint_maps = batch_data['keypoint_maps'].cuda()
 88 |             paf_maps = batch_data['paf_maps'].cuda()
 89 | 
 90 |             stages_output = net(images)
 91 | 
 92 |             losses = []
 93 |             for loss_idx in range(len(total_losses) // 2):
 94 |                 losses.append(l2_loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0]))
 95 |                 losses.append(l2_loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0]))
 96 |                 total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter
 97 |                 total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter
 98 | 
 99 |             loss = losses[0]
100 |             for loss_idx in range(1, len(losses)):
101 |                 loss += losses[loss_idx]
102 |             loss /= batches_per_iter
103 |             loss.backward()
104 |             batch_per_iter_idx += 1
105 |             if batch_per_iter_idx == batches_per_iter:
106 |                 optimizer.step()
107 |                 batch_per_iter_idx = 0
108 |                 num_iter += 1
109 |             else:
110 |                 continue
111 | 
112 |             if num_iter % log_after == 0:
113 |                 print('Iter: {}'.format(num_iter))
114 |                 for loss_idx in range(len(total_losses) // 2):
115 |                     print('\n'.join(['stage{}_pafs_loss:     {}', 'stage{}_heatmaps_loss: {}']).format(
116 |                         loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after,
117 |                         loss_idx + 1, total_losses[loss_idx * 2] / log_after))
118 |                 for loss_idx in range(len(total_losses)):
119 |                     total_losses[loss_idx] = 0
120 |             if num_iter % checkpoint_after == 0:
121 |                 snapshot_name = '{}/checkpoint_iter_{}.pth'.format(checkpoints_folder, num_iter)
122 |                 torch.save({'state_dict': net.module.state_dict(),
123 |                             'optimizer': optimizer.state_dict(),
124 |                             'scheduler': scheduler.state_dict(),
125 |                             'iter': num_iter,
126 |                             'current_epoch': epochId},
127 |                            snapshot_name)
128 |             if num_iter % val_after == 0:
129 |                 print('Validation...')
130 |                 evaluate(val_labels, val_output_name, val_images_folder, net)
131 |                 net.train()
132 | 
133 | 
134 | if __name__ == '__main__':
135 |     parser = argparse.ArgumentParser()
136 |     parser.add_argument('--prepared-train-labels', type=str, required=True,
137 |                         help='path to the file with prepared annotations')
138 |     parser.add_argument('--train-images-folder', type=str, required=True, help='path to COCO train images folder')
139 |     parser.add_argument('--num-refinement-stages', type=int, default=1, help='number of refinement stages')
140 |     parser.add_argument('--base-lr', type=float, default=4e-5, help='initial learning rate')
141 |     parser.add_argument('--batch-size', type=int, default=80, help='batch size')
142 |     parser.add_argument('--batches-per-iter', type=int, default=1, help='number of batches to accumulate gradient from')
143 |     parser.add_argument('--num-workers', type=int, default=8, help='number of workers')
144 |     parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint to continue training from')
145 |     parser.add_argument('--from-mobilenet', action='store_true',
146 |                         help='load weights from mobilenet feature extractor')
147 |     parser.add_argument('--weights-only', action='store_true',
148 |                         help='just initialize layers with pre-trained weights and start training from the beginning')
149 |     parser.add_argument('--experiment-name', type=str, default='default',
150 |                         help='experiment name to create folder for checkpoints')
151 |     parser.add_argument('--log-after', type=int, default=100, help='number of iterations to print train loss')
152 | 
153 |     parser.add_argument('--val-labels', type=str, required=True, help='path to json with keypoints val labels')
154 |     parser.add_argument('--val-images-folder', type=str, required=True, help='path to COCO val images folder')
155 |     parser.add_argument('--val-output-name', type=str, default='detections.json',
156 |                         help='name of output json file with detected keypoints')
157 |     parser.add_argument('--checkpoint-after', type=int, default=5000,
158 |                         help='number of iterations to save checkpoint')
159 |     parser.add_argument('--val-after', type=int, default=5000,
160 |                         help='number of iterations to run validation')
161 |     args = parser.parse_args()
162 | 
163 |     checkpoints_folder = '{}_checkpoints'.format(args.experiment_name)
164 |     if not os.path.exists(checkpoints_folder):
165 |         os.makedirs(checkpoints_folder)
166 | 
167 |     train(args.prepared_train_labels, args.train_images_folder, args.num_refinement_stages, args.base_lr, args.batch_size,
168 |           args.batches_per_iter, args.num_workers, args.checkpoint_path, args.weights_only, args.from_mobilenet,
169 |           checkpoints_folder, args.log_after, args.val_labels, args.val_images_folder, args.val_output_name,
170 |           args.checkpoint_after, args.val_after)
171 | 


--------------------------------------------------------------------------------
/audio_to_text.py:
--------------------------------------------------------------------------------
  1 | import time, logging
  2 | from datetime import datetime
  3 | import threading, collections, queue, os, os.path
  4 | import deepspeech
  5 | import numpy as np
  6 | import pyaudio
  7 | import wave
  8 | import webrtcvad
  9 | from halo import Halo
 10 | from scipy import signal
 11 | 
 12 | logging.basicConfig(level=20)
 13 | 
 14 | class Audio(object):
 15 |     """Streams raw audio from microphone. Data is received in a separate thread, and stored in a buffer, to be read from."""
 16 | 
 17 |     FORMAT = pyaudio.paInt16
 18 |     # Network/VAD rate-space
 19 |     RATE_PROCESS = 16000
 20 |     CHANNELS = 1
 21 |     BLOCKS_PER_SECOND = 50
 22 | 
 23 |     def __init__(self, callback=None, device=None, input_rate=RATE_PROCESS, file=None):
 24 |         def proxy_callback(in_data, frame_count, time_info, status):
 25 |             #pylint: disable=unused-argument
 26 |             if self.chunk is not None:
 27 |                 in_data = self.wf.readframes(self.chunk)
 28 |             callback(in_data)
 29 |             return (None, pyaudio.paContinue)
 30 |         if callback is None: callback = lambda in_data: self.buffer_queue.put(in_data)
 31 |         self.buffer_queue = queue.Queue()
 32 |         self.device = device
 33 |         self.input_rate = input_rate
 34 |         self.sample_rate = self.RATE_PROCESS
 35 |         self.block_size = int(self.RATE_PROCESS / float(self.BLOCKS_PER_SECOND))
 36 |         self.block_size_input = int(self.input_rate / float(self.BLOCKS_PER_SECOND))
 37 |         self.pa = pyaudio.PyAudio()
 38 | 
 39 |         kwargs = {
 40 |             'format': self.FORMAT,
 41 |             'channels': self.CHANNELS,
 42 |             'rate': self.input_rate,
 43 |             'input': True,
 44 |             'frames_per_buffer': self.block_size_input,
 45 |             'stream_callback': proxy_callback,
 46 |         }
 47 | 
 48 |         self.chunk = None
 49 |         # if not default device
 50 |         if self.device:
 51 |             kwargs['input_device_index'] = self.device
 52 |         elif file is not None:
 53 |             self.chunk = 320
 54 |             self.wf = wave.open(file, 'rb')
 55 | 
 56 |         self.stream = self.pa.open(**kwargs)
 57 |         self.stream.start_stream()
 58 | 
 59 |     def resample(self, data, input_rate):
 60 |         """
 61 |         Microphone may not support our native processing sampling rate, so
 62 |         resample from input_rate to RATE_PROCESS here for webrtcvad and
 63 |         deepspeech
 64 |         Args:
 65 |             data (binary): Input audio stream
 66 |             input_rate (int): Input audio rate to resample from
 67 |         """
 68 |         data16 = np.fromstring(string=data, dtype=np.int16)
 69 |         resample_size = int(len(data16) / self.input_rate * self.RATE_PROCESS)
 70 |         resample = signal.resample(data16, resample_size)
 71 |         resample16 = np.array(resample, dtype=np.int16)
 72 |         return resample16.tostring()
 73 | 
 74 |     def read_resampled(self):
 75 |         """Return a block of audio data resampled to 16000hz, blocking if necessary."""
 76 |         return self.resample(data=self.buffer_queue.get(),
 77 |                              input_rate=self.input_rate)
 78 | 
 79 |     def read(self):
 80 |         """Return a block of audio data, blocking if necessary."""
 81 |         return self.buffer_queue.get()
 82 | 
 83 |     def destroy(self):
 84 |         self.stream.stop_stream()
 85 |         self.stream.close()
 86 |         self.pa.terminate()
 87 | 
 88 |     frame_duration_ms = property(lambda self: 1000 * self.block_size // self.sample_rate)
 89 | 
 90 |     def write_wav(self, filename, data):
 91 |         logging.info("write wav %s", filename)
 92 |         wf = wave.open(filename, 'wb')
 93 |         wf.setnchannels(self.CHANNELS)
 94 |         # wf.setsampwidth(self.pa.get_sample_size(FORMAT))
 95 |         assert self.FORMAT == pyaudio.paInt16
 96 |         wf.setsampwidth(2)
 97 |         wf.setframerate(self.sample_rate)
 98 |         wf.writeframes(data)
 99 |         wf.close()
100 | 
101 | 
102 | class VADAudio(Audio):
103 |     """Filter & segment audio with voice activity detection."""
104 | 
105 |     def __init__(self, aggressiveness=3, device=None, input_rate=None, file=None):
106 |         super().__init__(device=device, input_rate=input_rate, file=file)
107 |         self.vad = webrtcvad.Vad(aggressiveness)
108 | 
109 |     def frame_generator(self):
110 |         """Generator that yields all audio frames from microphone."""
111 |         if self.input_rate == self.RATE_PROCESS:
112 |             while True:
113 |                 yield self.read()
114 |         else:
115 |             while True:
116 |                 yield self.read_resampled()
117 | 
118 |     def vad_collector(self, padding_ms=300, ratio=0.75, frames=None):
119 |         """Generator that yields series of consecutive audio frames comprising each utterence, separated by yielding a single None.
120 |             Determines voice activity by ratio of frames in padding_ms. Uses a buffer to include padding_ms prior to being triggered.
121 |             Example: (frame, ..., frame, None, frame, ..., frame, None, ...)
122 |                       |---utterence---|        |---utterence---|
123 |         """
124 |         if frames is None: frames = self.frame_generator()
125 |         num_padding_frames = padding_ms // self.frame_duration_ms
126 |         ring_buffer = collections.deque(maxlen=num_padding_frames)
127 |         triggered = False
128 | 
129 |         for frame in frames:
130 |             if len(frame) < 640:
131 |                 return
132 | 
133 |             is_speech = self.vad.is_speech(frame, self.sample_rate)
134 | 
135 |             if not triggered:
136 |                 ring_buffer.append((frame, is_speech))
137 |                 num_voiced = len([f for f, speech in ring_buffer if speech])
138 |                 if num_voiced > ratio * ring_buffer.maxlen:
139 |                     triggered = True
140 |                     for f, s in ring_buffer:
141 |                         yield f
142 |                     ring_buffer.clear()
143 | 
144 |             else:
145 |                 yield frame
146 |                 ring_buffer.append((frame, is_speech))
147 |                 num_unvoiced = len([f for f, speech in ring_buffer if not speech])
148 |                 if num_unvoiced > ratio * ring_buffer.maxlen:
149 |                     triggered = False
150 |                     yield None
151 |                     ring_buffer.clear()
152 | 
153 | def main(ARGS, subject):
154 |     # Load DeepSpeech model
155 |     if os.path.isdir(ARGS.model):
156 |         model_dir = ARGS.model
157 |         ARGS.model = os.path.join(model_dir, 'output_graph.pb')
158 |         ARGS.scorer = os.path.join(model_dir, ARGS.scorer)
159 | 
160 |     print('Initializing model...')
161 |     logging.info("ARGS.model: %s", ARGS.model)
162 |     model = deepspeech.Model(ARGS.model)
163 |     if ARGS.scorer:
164 |         logging.info("ARGS.scorer: %s", ARGS.scorer)
165 |         model.enableExternalScorer(ARGS.scorer)
166 | 
167 |     # Start audio with VAD
168 |     vad_audio = VADAudio(aggressiveness=ARGS.vad_aggressiveness,
169 |                          device=ARGS.device,
170 |                          input_rate=ARGS.rate,
171 |                          file=ARGS.file)
172 |     print("Listening (ctrl-C to exit)...")
173 |     frames = vad_audio.vad_collector()
174 | 
175 |     # Stream from microphone to DeepSpeech using VAD
176 |     spinner = None
177 |     if not ARGS.nospinner:
178 |         spinner = Halo(spinner='line')
179 |     stream_context = model.createStream()
180 |     wav_data = bytearray()
181 |     for frame in frames:
182 |         if frame is not None:
183 |             if spinner: spinner.start()
184 |             logging.debug("streaming frame")
185 |             stream_context.feedAudioContent(np.frombuffer(frame, np.int16))
186 |             if ARGS.savewav: wav_data.extend(frame)
187 |         else:
188 |             if spinner: spinner.stop()
189 |             logging.debug("end utterence")
190 |             if ARGS.savewav:
191 |                 vad_audio.write_wav(os.path.join(ARGS.savewav, datetime.now().strftime("savewav_%Y-%m-%d_%H-%M-%S_%f.wav")), wav_data)
192 |                 wav_data = bytearray()
193 |             text = stream_context.finishStream()
194 |             print("Recognized: %s" % text)
195 |             text_file = open(subject+'.txt','w')
196 |             n = text_file.write(text)
197 |             text_file.close
198 |             stream_context = model.createStream()
199 | 
200 | if __name__ == '__main__':
201 |     DEFAULT_SAMPLE_RATE = 16000
202 | 
203 |     import argparse
204 |     parser = argparse.ArgumentParser(description="Stream from microphone to DeepSpeech using VAD")
205 | 
206 |     parser.add_argument('-v', '--vad_aggressiveness', type=int, default=3,
207 |                         help="Set aggressiveness of VAD: an integer between 0 and 3, 0 being the least aggressive about filtering out non-speech, 3 the most aggressive. Default: 3")
208 |     parser.add_argument('--nospinner', action='store_true',
209 |                         help="Disable spinner")
210 |     parser.add_argument('-w', '--savewav',
211 |                         help="Save .wav files of utterences to given directory")
212 |     parser.add_argument('-f', '--file',
213 |                         help="Read from .wav file instead of microphone")
214 | 
215 |     parser.add_argument('-m', '--model', required=True,
216 |                         help="Path to the model (protocol buffer binary file, or entire directory containing all standard-named files for model)")
217 |     parser.add_argument('-s', '--scorer',
218 |                         help="Path to the external scorer file.")
219 |     parser.add_argument('-d', '--device', type=int, default=None,
220 |                         help="Device input index (Int) as listed by pyaudio.PyAudio.get_device_info_by_index(). If not provided, falls back to PyAudio.get_default_device().")
221 |     parser.add_argument('-r', '--rate', type=int, default=DEFAULT_SAMPLE_RATE,
222 |                         help=f"Input device sample rate. Default: {DEFAULT_SAMPLE_RATE}. Your device may require 44100.")
223 | 
224 |     ARGS = parser.parse_args()
225 |     if ARGS.savewav: os.makedirs(ARGS.savewav, exist_ok=True)
226 |     main(ARGS, subject)
227 | 


--------------------------------------------------------------------------------
/pose-estimation/modules/keypoints.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | from operator import itemgetter
  4 | 
  5 | BODY_PARTS_KPT_IDS = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11],
  6 |                       [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 16], [5, 17]]
  7 | BODY_PARTS_PAF_IDS = ([12, 13], [20, 21], [14, 15], [16, 17], [22, 23], [24, 25], [0, 1], [2, 3], [4, 5],
  8 |                       [6, 7], [8, 9], [10, 11], [28, 29], [30, 31], [34, 35], [32, 33], [36, 37], [18, 19], [26, 27])
  9 | 
 10 | 
 11 | def linspace2d(start, stop, n=10):
 12 |     points = 1 / (n - 1) * (stop - start)
 13 |     return points[:, None] * np.arange(n) + start[:, None]
 14 | 
 15 | 
 16 | def extract_keypoints(heatmap, all_keypoints, total_keypoint_num):
 17 |     heatmap[heatmap < 0.1] = 0
 18 |     heatmap_with_borders = np.pad(heatmap, [(2, 2), (2, 2)], mode='constant')
 19 |     heatmap_center = heatmap_with_borders[1:heatmap_with_borders.shape[0]-1, 1:heatmap_with_borders.shape[1]-1]
 20 |     heatmap_left = heatmap_with_borders[1:heatmap_with_borders.shape[0]-1, 2:heatmap_with_borders.shape[1]]
 21 |     heatmap_right = heatmap_with_borders[1:heatmap_with_borders.shape[0]-1, 0:heatmap_with_borders.shape[1]-2]
 22 |     heatmap_up = heatmap_with_borders[2:heatmap_with_borders.shape[0], 1:heatmap_with_borders.shape[1]-1]
 23 |     heatmap_down = heatmap_with_borders[0:heatmap_with_borders.shape[0]-2, 1:heatmap_with_borders.shape[1]-1]
 24 | 
 25 |     heatmap_peaks = (heatmap_center > heatmap_left) &\
 26 |                     (heatmap_center > heatmap_right) &\
 27 |                     (heatmap_center > heatmap_up) &\
 28 |                     (heatmap_center > heatmap_down)
 29 |     heatmap_peaks = heatmap_peaks[1:heatmap_center.shape[0]-1, 1:heatmap_center.shape[1]-1]
 30 |     keypoints = list(zip(np.nonzero(heatmap_peaks)[1], np.nonzero(heatmap_peaks)[0]))  # (w, h)
 31 |     keypoints = sorted(keypoints, key=itemgetter(0))
 32 | 
 33 |     suppressed = np.zeros(len(keypoints), np.uint8)
 34 |     keypoints_with_score_and_id = []
 35 |     keypoint_num = 0
 36 |     for i in range(len(keypoints)):
 37 |         if suppressed[i]:
 38 |             continue
 39 |         for j in range(i+1, len(keypoints)):
 40 |             if math.sqrt((keypoints[i][0] - keypoints[j][0]) ** 2 +
 41 |                          (keypoints[i][1] - keypoints[j][1]) ** 2) < 6:
 42 |                 suppressed[j] = 1
 43 |         keypoint_with_score_and_id = (keypoints[i][0], keypoints[i][1], heatmap[keypoints[i][1], keypoints[i][0]],
 44 |                                       total_keypoint_num + keypoint_num)
 45 |         keypoints_with_score_and_id.append(keypoint_with_score_and_id)
 46 |         keypoint_num += 1
 47 |     all_keypoints.append(keypoints_with_score_and_id)
 48 |     return keypoint_num
 49 | 
 50 | 
 51 | def group_keypoints(all_keypoints_by_type, pafs, pose_entry_size=20, min_paf_score=0.05, demo=False):
 52 |     pose_entries = []
 53 |     all_keypoints = np.array([item for sublist in all_keypoints_by_type for item in sublist])
 54 |     for part_id in range(len(BODY_PARTS_PAF_IDS)):
 55 |         part_pafs = pafs[:, :, BODY_PARTS_PAF_IDS[part_id]]
 56 |         kpts_a = all_keypoints_by_type[BODY_PARTS_KPT_IDS[part_id][0]]
 57 |         kpts_b = all_keypoints_by_type[BODY_PARTS_KPT_IDS[part_id][1]]
 58 |         num_kpts_a = len(kpts_a)
 59 |         num_kpts_b = len(kpts_b)
 60 |         kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0]
 61 |         kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1]
 62 | 
 63 |         if num_kpts_a == 0 and num_kpts_b == 0:  # no keypoints for such body part
 64 |             continue
 65 |         elif num_kpts_a == 0:  # body part has just 'b' keypoints
 66 |             for i in range(num_kpts_b):
 67 |                 num = 0
 68 |                 for j in range(len(pose_entries)):  # check if already in some pose, was added by another body part
 69 |                     if pose_entries[j][kpt_b_id] == kpts_b[i][3]:
 70 |                         num += 1
 71 |                         continue
 72 |                 if num == 0:
 73 |                     pose_entry = np.ones(pose_entry_size) * -1
 74 |                     pose_entry[kpt_b_id] = kpts_b[i][3]  # keypoint idx
 75 |                     pose_entry[-1] = 1                   # num keypoints in pose
 76 |                     pose_entry[-2] = kpts_b[i][2]        # pose score
 77 |                     pose_entries.append(pose_entry)
 78 |             continue
 79 |         elif num_kpts_b == 0:  # body part has just 'a' keypoints
 80 |             for i in range(num_kpts_a):
 81 |                 num = 0
 82 |                 for j in range(len(pose_entries)):
 83 |                     if pose_entries[j][kpt_a_id] == kpts_a[i][3]:
 84 |                         num += 1
 85 |                         continue
 86 |                 if num == 0:
 87 |                     pose_entry = np.ones(pose_entry_size) * -1
 88 |                     pose_entry[kpt_a_id] = kpts_a[i][3]
 89 |                     pose_entry[-1] = 1
 90 |                     pose_entry[-2] = kpts_a[i][2]
 91 |                     pose_entries.append(pose_entry)
 92 |             continue
 93 | 
 94 |         connections = []
 95 |         for i in range(num_kpts_a):
 96 |             kpt_a = np.array(kpts_a[i][0:2])
 97 |             for j in range(num_kpts_b):
 98 |                 kpt_b = np.array(kpts_b[j][0:2])
 99 |                 mid_point = [(), ()]
100 |                 mid_point[0] = (int(round((kpt_a[0] + kpt_b[0]) * 0.5)),
101 |                                 int(round((kpt_a[1] + kpt_b[1]) * 0.5)))
102 |                 mid_point[1] = mid_point[0]
103 | 
104 |                 vec = [kpt_b[0] - kpt_a[0], kpt_b[1] - kpt_a[1]]
105 |                 vec_norm = math.sqrt(vec[0] ** 2 + vec[1] ** 2)
106 |                 if vec_norm == 0:
107 |                     continue
108 |                 vec[0] /= vec_norm
109 |                 vec[1] /= vec_norm
110 |                 cur_point_score = (vec[0] * part_pafs[mid_point[0][1], mid_point[0][0], 0] +
111 |                                    vec[1] * part_pafs[mid_point[1][1], mid_point[1][0], 1])
112 | 
113 |                 height_n = pafs.shape[0] // 2
114 |                 success_ratio = 0
115 |                 point_num = 10  # number of points to integration over paf
116 |                 if cur_point_score > -100:
117 |                     passed_point_score = 0
118 |                     passed_point_num = 0
119 |                     x, y = linspace2d(kpt_a, kpt_b)
120 |                     for point_idx in range(point_num):
121 |                         if not demo:
122 |                             px = int(round(x[point_idx]))
123 |                             py = int(round(y[point_idx]))
124 |                         else:
125 |                             px = int(x[point_idx])
126 |                             py = int(y[point_idx])
127 |                         paf = part_pafs[py, px, 0:2]
128 |                         cur_point_score = vec[0] * paf[0] + vec[1] * paf[1]
129 |                         if cur_point_score > min_paf_score:
130 |                             passed_point_score += cur_point_score
131 |                             passed_point_num += 1
132 |                     success_ratio = passed_point_num / point_num
133 |                     ratio = 0
134 |                     if passed_point_num > 0:
135 |                         ratio = passed_point_score / passed_point_num
136 |                     ratio += min(height_n / vec_norm - 1, 0)
137 |                 if ratio > 0 and success_ratio > 0.8:
138 |                     score_all = ratio + kpts_a[i][2] + kpts_b[j][2]
139 |                     connections.append([i, j, ratio, score_all])
140 |         if len(connections) > 0:
141 |             connections = sorted(connections, key=itemgetter(2), reverse=True)
142 | 
143 |         num_connections = min(num_kpts_a, num_kpts_b)
144 |         has_kpt_a = np.zeros(num_kpts_a, dtype=np.int32)
145 |         has_kpt_b = np.zeros(num_kpts_b, dtype=np.int32)
146 |         filtered_connections = []
147 |         for row in range(len(connections)):
148 |             if len(filtered_connections) == num_connections:
149 |                 break
150 |             i, j, cur_point_score = connections[row][0:3]
151 |             if not has_kpt_a[i] and not has_kpt_b[j]:
152 |                 filtered_connections.append([kpts_a[i][3], kpts_b[j][3], cur_point_score])
153 |                 has_kpt_a[i] = 1
154 |                 has_kpt_b[j] = 1
155 |         connections = filtered_connections
156 |         if len(connections) == 0:
157 |             continue
158 | 
159 |         if part_id == 0:
160 |             pose_entries = [np.ones(pose_entry_size) * -1 for _ in range(len(connections))]
161 |             for i in range(len(connections)):
162 |                 pose_entries[i][BODY_PARTS_KPT_IDS[0][0]] = connections[i][0]
163 |                 pose_entries[i][BODY_PARTS_KPT_IDS[0][1]] = connections[i][1]
164 |                 pose_entries[i][-1] = 2
165 |                 pose_entries[i][-2] = np.sum(all_keypoints[connections[i][0:2], 2]) + connections[i][2]
166 |         elif part_id == 17 or part_id == 18:
167 |             kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0]
168 |             kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1]
169 |             for i in range(len(connections)):
170 |                 for j in range(len(pose_entries)):
171 |                     if pose_entries[j][kpt_a_id] == connections[i][0] and pose_entries[j][kpt_b_id] == -1:
172 |                         pose_entries[j][kpt_b_id] = connections[i][1]
173 |                     elif pose_entries[j][kpt_b_id] == connections[i][1] and pose_entries[j][kpt_a_id] == -1:
174 |                         pose_entries[j][kpt_a_id] = connections[i][0]
175 |             continue
176 |         else:
177 |             kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0]
178 |             kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1]
179 |             for i in range(len(connections)):
180 |                 num = 0
181 |                 for j in range(len(pose_entries)):
182 |                     if pose_entries[j][kpt_a_id] == connections[i][0]:
183 |                         pose_entries[j][kpt_b_id] = connections[i][1]
184 |                         num += 1
185 |                         pose_entries[j][-1] += 1
186 |                         pose_entries[j][-2] += all_keypoints[connections[i][1], 2] + connections[i][2]
187 |                 if num == 0:
188 |                     pose_entry = np.ones(pose_entry_size) * -1
189 |                     pose_entry[kpt_a_id] = connections[i][0]
190 |                     pose_entry[kpt_b_id] = connections[i][1]
191 |                     pose_entry[-1] = 2
192 |                     pose_entry[-2] = np.sum(all_keypoints[connections[i][0:2], 2]) + connections[i][2]
193 |                     pose_entries.append(pose_entry)
194 | 
195 |     filtered_entries = []
196 |     for i in range(len(pose_entries)):
197 |         if pose_entries[i][-1] < 3 or (pose_entries[i][-2] / pose_entries[i][-1] < 0.2):
198 |             continue
199 |         filtered_entries.append(pose_entries[i])
200 |     pose_entries = np.asarray(filtered_entries)
201 |     return pose_entries, all_keypoints
202 | 


--------------------------------------------------------------------------------
/pose-estimation/datasets/transformations.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | 
  6 | 
  7 | class ConvertKeypoints:
  8 |     def __call__(self, sample):
  9 |         label = sample['label']
 10 |         h, w, _ = sample['image'].shape
 11 |         keypoints = label['keypoints']
 12 |         for keypoint in keypoints:  # keypoint[2] == 0: occluded, == 1: visible, == 2: not in image
 13 |             if keypoint[0] == keypoint[1] == 0:
 14 |                 keypoint[2] = 2
 15 |             if (keypoint[0] < 0
 16 |                     or keypoint[0] >= w
 17 |                     or keypoint[1] < 0
 18 |                     or keypoint[1] >= h):
 19 |                 keypoint[2] = 2
 20 |         for other_label in label['processed_other_annotations']:
 21 |             keypoints = other_label['keypoints']
 22 |             for keypoint in keypoints:
 23 |                 if keypoint[0] == keypoint[1] == 0:
 24 |                     keypoint[2] = 2
 25 |                 if (keypoint[0] < 0
 26 |                         or keypoint[0] >= w
 27 |                         or keypoint[1] < 0
 28 |                         or keypoint[1] >= h):
 29 |                     keypoint[2] = 2
 30 |         label['keypoints'] = self._convert(label['keypoints'], w, h)
 31 | 
 32 |         for other_label in label['processed_other_annotations']:
 33 |             other_label['keypoints'] = self._convert(other_label['keypoints'], w, h)
 34 |         return sample
 35 | 
 36 |     def _convert(self, keypoints, w, h):
 37 |         # Nose, Neck, R hand, L hand, R leg, L leg, Eyes, Ears
 38 |         reorder_map = [1, 7, 9, 11, 6, 8, 10, 13, 15, 17, 12, 14, 16, 3, 2, 5, 4]
 39 |         converted_keypoints = list(keypoints[i - 1] for i in reorder_map)
 40 |         converted_keypoints.insert(1, [(keypoints[5][0] + keypoints[6][0]) / 2,
 41 |                                        (keypoints[5][1] + keypoints[6][1]) / 2, 0])  # Add neck as a mean of shoulders
 42 |         if keypoints[5][2] == 2 or keypoints[6][2] == 2:
 43 |             converted_keypoints[1][2] = 2
 44 |         elif keypoints[5][2] == 1 and keypoints[6][2] == 1:
 45 |             converted_keypoints[1][2] = 1
 46 |         if (converted_keypoints[1][0] < 0
 47 |                 or converted_keypoints[1][0] >= w
 48 |                 or converted_keypoints[1][1] < 0
 49 |                 or converted_keypoints[1][1] >= h):
 50 |             converted_keypoints[1][2] = 2
 51 |         return converted_keypoints
 52 | 
 53 | 
 54 | class Scale:
 55 |     def __init__(self, prob=1, min_scale=0.5, max_scale=1.1, target_dist=0.6):
 56 |         self._prob = prob
 57 |         self._min_scale = min_scale
 58 |         self._max_scale = max_scale
 59 |         self._target_dist = target_dist
 60 | 
 61 |     def __call__(self, sample):
 62 |         prob = random.random()
 63 |         scale_multiplier = 1
 64 |         if prob <= self._prob:
 65 |             prob = random.random()
 66 |             scale_multiplier = (self._max_scale - self._min_scale) * prob + self._min_scale
 67 |         label = sample['label']
 68 |         scale_abs = self._target_dist / label['scale_provided']
 69 |         scale = scale_abs * scale_multiplier
 70 |         sample['image'] = cv2.resize(sample['image'], dsize=(0, 0), fx=scale, fy=scale)
 71 |         label['img_height'], label['img_width'], _ = sample['image'].shape
 72 |         sample['mask'] = cv2.resize(sample['mask'], dsize=(0, 0), fx=scale, fy=scale)
 73 | 
 74 |         label['objpos'][0] *= scale
 75 |         label['objpos'][1] *= scale
 76 |         for keypoint in sample['label']['keypoints']:
 77 |             keypoint[0] *= scale
 78 |             keypoint[1] *= scale
 79 |         for other_annotation in sample['label']['processed_other_annotations']:
 80 |             other_annotation['objpos'][0] *= scale
 81 |             other_annotation['objpos'][1] *= scale
 82 |             for keypoint in other_annotation['keypoints']:
 83 |                 keypoint[0] *= scale
 84 |                 keypoint[1] *= scale
 85 |         return sample
 86 | 
 87 | 
 88 | class Rotate:
 89 |     def __init__(self, pad, max_rotate_degree=40):
 90 |         self._pad = pad
 91 |         self._max_rotate_degree = max_rotate_degree
 92 | 
 93 |     def __call__(self, sample):
 94 |         prob = random.random()
 95 |         degree = (prob - 0.5) * 2 * self._max_rotate_degree
 96 |         h, w, _ = sample['image'].shape
 97 |         img_center = (w / 2, h / 2)
 98 |         R = cv2.getRotationMatrix2D(img_center, degree, 1)
 99 | 
100 |         abs_cos = abs(R[0, 0])
101 |         abs_sin = abs(R[0, 1])
102 | 
103 |         bound_w = int(h * abs_sin + w * abs_cos)
104 |         bound_h = int(h * abs_cos + w * abs_sin)
105 |         dsize = (bound_w, bound_h)
106 | 
107 |         R[0, 2] += dsize[0] / 2 - img_center[0]
108 |         R[1, 2] += dsize[1] / 2 - img_center[1]
109 |         sample['image'] = cv2.warpAffine(sample['image'], R, dsize=dsize,
110 |                                          borderMode=cv2.BORDER_CONSTANT, borderValue=self._pad)
111 |         sample['label']['img_height'], sample['label']['img_width'], _ = sample['image'].shape
112 |         sample['mask'] = cv2.warpAffine(sample['mask'], R, dsize=dsize,
113 |                                         borderMode=cv2.BORDER_CONSTANT, borderValue=(1, 1, 1))  # border is ok
114 |         label = sample['label']
115 |         label['objpos'] = self._rotate(label['objpos'], R)
116 |         for keypoint in label['keypoints']:
117 |             point = [keypoint[0], keypoint[1]]
118 |             point = self._rotate(point, R)
119 |             keypoint[0], keypoint[1] = point[0], point[1]
120 |         for other_annotation in label['processed_other_annotations']:
121 |             for keypoint in other_annotation['keypoints']:
122 |                 point = [keypoint[0], keypoint[1]]
123 |                 point = self._rotate(point, R)
124 |                 keypoint[0], keypoint[1] = point[0], point[1]
125 |         return sample
126 | 
127 |     def _rotate(self, point, R):
128 |         return [R[0, 0] * point[0] + R[0, 1] * point[1] + R[0, 2],
129 |                 R[1, 0] * point[0] + R[1, 1] * point[1] + R[1, 2]]
130 | 
131 | 
132 | class CropPad:
133 |     def __init__(self, pad, center_perterb_max=40, crop_x=368, crop_y=368):
134 |         self._pad = pad
135 |         self._center_perterb_max = center_perterb_max
136 |         self._crop_x = crop_x
137 |         self._crop_y = crop_y
138 | 
139 |     def __call__(self, sample):
140 |         prob_x = random.random()
141 |         prob_y = random.random()
142 | 
143 |         offset_x = int((prob_x - 0.5) * 2 * self._center_perterb_max)
144 |         offset_y = int((prob_y - 0.5) * 2 * self._center_perterb_max)
145 |         label = sample['label']
146 |         shifted_center = (label['objpos'][0] + offset_x, label['objpos'][1] + offset_y)
147 |         offset_left = -int(shifted_center[0] - self._crop_x / 2)
148 |         offset_up = -int(shifted_center[1] - self._crop_y / 2)
149 | 
150 |         cropped_image = np.empty(shape=(self._crop_y, self._crop_x, 3), dtype=np.uint8)
151 |         for i in range(3):
152 |             cropped_image[:, :, i].fill(self._pad[i])
153 |         cropped_mask = np.empty(shape=(self._crop_y, self._crop_x), dtype=np.uint8)
154 |         cropped_mask.fill(1)
155 | 
156 |         image_x_start = int(shifted_center[0] - self._crop_x / 2)
157 |         image_y_start = int(shifted_center[1] - self._crop_y / 2)
158 |         image_x_finish = image_x_start + self._crop_x
159 |         image_y_finish = image_y_start + self._crop_y
160 |         crop_x_start = 0
161 |         crop_y_start = 0
162 |         crop_x_finish = self._crop_x
163 |         crop_y_finish = self._crop_y
164 | 
165 |         w, h = label['img_width'], label['img_height']
166 |         should_crop = True
167 |         if image_x_start < 0:  # Adjust crop area
168 |             crop_x_start -= image_x_start
169 |             image_x_start = 0
170 |         if image_x_start >= w:
171 |             should_crop = False
172 | 
173 |         if image_y_start < 0:
174 |             crop_y_start -= image_y_start
175 |             image_y_start = 0
176 |         if image_y_start >= w:
177 |             should_crop = False
178 | 
179 |         if image_x_finish > w:
180 |             diff = image_x_finish - w
181 |             image_x_finish -= diff
182 |             crop_x_finish -= diff
183 |         if image_x_finish < 0:
184 |             should_crop = False
185 | 
186 |         if image_y_finish > h:
187 |             diff = image_y_finish - h
188 |             image_y_finish -= diff
189 |             crop_y_finish -= diff
190 |         if image_y_finish < 0:
191 |             should_crop = False
192 | 
193 |         if should_crop:
194 |             cropped_image[crop_y_start:crop_y_finish, crop_x_start:crop_x_finish, :] =\
195 |                 sample['image'][image_y_start:image_y_finish, image_x_start:image_x_finish, :]
196 |             cropped_mask[crop_y_start:crop_y_finish, crop_x_start:crop_x_finish] =\
197 |                 sample['mask'][image_y_start:image_y_finish, image_x_start:image_x_finish]
198 | 
199 |         sample['image'] = cropped_image
200 |         sample['mask'] = cropped_mask
201 |         label['img_width'] = self._crop_x
202 |         label['img_height'] = self._crop_y
203 | 
204 |         label['objpos'][0] += offset_left
205 |         label['objpos'][1] += offset_up
206 |         for keypoint in label['keypoints']:
207 |             keypoint[0] += offset_left
208 |             keypoint[1] += offset_up
209 |         for other_annotation in label['processed_other_annotations']:
210 |             for keypoint in other_annotation['keypoints']:
211 |                 keypoint[0] += offset_left
212 |                 keypoint[1] += offset_up
213 | 
214 |         return sample
215 | 
216 |     def _inside(self, point, width, height):
217 |         if point[0] < 0 or point[1] < 0:
218 |             return False
219 |         if point[0] >= width or point[1] >= height:
220 |             return False
221 |         return True
222 | 
223 | 
224 | class Flip:
225 |     def __init__(self, prob=0.5):
226 |         self._prob = prob
227 | 
228 |     def __call__(self, sample):
229 |         prob = random.random()
230 |         do_flip = prob <= self._prob
231 |         if not do_flip:
232 |             return sample
233 | 
234 |         sample['image'] = cv2.flip(sample['image'], 1)
235 |         sample['mask'] = cv2.flip(sample['mask'], 1)
236 | 
237 |         label = sample['label']
238 |         w, h = label['img_width'], label['img_height']
239 |         label['objpos'][0] = w - 1 - label['objpos'][0]
240 |         for keypoint in label['keypoints']:
241 |             keypoint[0] = w - 1 - keypoint[0]
242 |         label['keypoints'] = self._swap_left_right(label['keypoints'])
243 | 
244 |         for other_annotation in label['processed_other_annotations']:
245 |             other_annotation['objpos'][0] = w - 1 - other_annotation['objpos'][0]
246 |             for keypoint in other_annotation['keypoints']:
247 |                 keypoint[0] = w - 1 - keypoint[0]
248 |             other_annotation['keypoints'] = self._swap_left_right(other_annotation['keypoints'])
249 | 
250 |         return sample
251 | 
252 |     def _swap_left_right(self, keypoints):
253 |         right = [2, 3, 4, 8, 9, 10, 14, 16]
254 |         left = [5, 6, 7, 11, 12, 13, 15, 17]
255 |         for r, l in zip(right, left):
256 |             keypoints[r], keypoints[l] = keypoints[l], keypoints[r]
257 |         return keypoints
258 | 


--------------------------------------------------------------------------------
/emotion-detection/BlazeFace_PyTorch/blazeface.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | class BlazeBlock(nn.Module):
  8 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1):
  9 |         super(BlazeBlock, self).__init__()
 10 | 
 11 |         self.stride = stride
 12 |         self.channel_pad = out_channels - in_channels
 13 | 
 14 |         # TFLite uses slightly different padding than PyTorch 
 15 |         # on the depthwise conv layer when the stride is 2.
 16 |         if stride == 2:
 17 |             self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride)
 18 |             padding = 0
 19 |         else:
 20 |             padding = (kernel_size - 1) // 2
 21 | 
 22 |         self.convs = nn.Sequential(
 23 |             nn.Conv2d(in_channels=in_channels, out_channels=in_channels, 
 24 |                       kernel_size=kernel_size, stride=stride, padding=padding, 
 25 |                       groups=in_channels, bias=True),
 26 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 
 27 |                       kernel_size=1, stride=1, padding=0, bias=True),
 28 |         )
 29 | 
 30 |         self.act = nn.ReLU(inplace=True)
 31 | 
 32 |     def forward(self, x):
 33 |         if self.stride == 2:
 34 |             h = F.pad(x, (0, 2, 0, 2), "constant", 0)
 35 |             x = self.max_pool(x)
 36 |         else:
 37 |             h = x
 38 | 
 39 |         if self.channel_pad > 0:
 40 |             x = F.pad(x, (0, 0, 0, 0, 0, self.channel_pad), "constant", 0)
 41 | 
 42 |         return self.act(self.convs(h) + x)
 43 | 
 44 | 
 45 | class BlazeFace(nn.Module):
 46 |     """The BlazeFace face detection model from MediaPipe.
 47 |     
 48 |     The version from MediaPipe is simpler than the one in the paper; 
 49 |     it does not use the "double" BlazeBlocks.
 50 | 
 51 |     Because we won't be training this model, it doesn't need to have
 52 |     batchnorm layers. These have already been "folded" into the conv 
 53 |     weights by TFLite.
 54 | 
 55 |     The conversion to PyTorch is fairly straightforward, but there are 
 56 |     some small differences between TFLite and PyTorch in how they handle
 57 |     padding on conv layers with stride 2.
 58 | 
 59 |     This version works on batches, while the MediaPipe version can only
 60 |     handle a single image at a time.
 61 | 
 62 |     Based on code from https://github.com/tkat0/PyTorch_BlazeFace/ and
 63 |     https://github.com/google/mediapipe/
 64 |     """
 65 |     input_size = (128, 128)
 66 |     
 67 |     def __init__(self):
 68 |         super(BlazeFace, self).__init__()
 69 | 
 70 |         # These are the settings from the MediaPipe example graph
 71 |         # mediapipe/graphs/face_detection/face_detection_mobile_gpu.pbtxt
 72 |         self.num_classes = 1
 73 |         self.num_anchors = 896
 74 |         self.num_coords = 16
 75 |         self.score_clipping_thresh = 100.0
 76 |         self.x_scale = 128.0
 77 |         self.y_scale = 128.0
 78 |         self.h_scale = 128.0
 79 |         self.w_scale = 128.0
 80 |         self.min_score_thresh = 0.75
 81 |         self.min_suppression_threshold = 0.3
 82 | 
 83 |         self._define_layers()
 84 | 
 85 |     def _define_layers(self):
 86 |         self.backbone1 = nn.Sequential(
 87 |             nn.Conv2d(in_channels=3, out_channels=24, kernel_size=5, stride=2, padding=0, bias=True),
 88 |             nn.ReLU(inplace=True),
 89 | 
 90 |             BlazeBlock(24, 24),
 91 |             BlazeBlock(24, 28),
 92 |             BlazeBlock(28, 32, stride=2),
 93 |             BlazeBlock(32, 36),
 94 |             BlazeBlock(36, 42),
 95 |             BlazeBlock(42, 48, stride=2),
 96 |             BlazeBlock(48, 56),
 97 |             BlazeBlock(56, 64),
 98 |             BlazeBlock(64, 72),
 99 |             BlazeBlock(72, 80),
100 |             BlazeBlock(80, 88),
101 |         )
102 |         
103 |         self.backbone2 = nn.Sequential(
104 |             BlazeBlock(88, 96, stride=2),
105 |             BlazeBlock(96, 96),
106 |             BlazeBlock(96, 96),
107 |             BlazeBlock(96, 96),
108 |             BlazeBlock(96, 96),
109 |         )
110 | 
111 |         self.classifier_8 = nn.Conv2d(88, 2, 1, bias=True)
112 |         self.classifier_16 = nn.Conv2d(96, 6, 1, bias=True)
113 | 
114 |         self.regressor_8 = nn.Conv2d(88, 32, 1, bias=True)
115 |         self.regressor_16 = nn.Conv2d(96, 96, 1, bias=True)
116 |         
117 |     def forward(self, x):
118 |         # TFLite uses slightly different padding on the first conv layer
119 |         # than PyTorch, so do it manually.
120 |         x = F.pad(x, (1, 2, 1, 2), "constant", 0)
121 |         
122 |         b = x.shape[0]      # batch size, needed for reshaping later
123 | 
124 |         x = self.backbone1(x)           # (b, 88, 16, 16)
125 |         h = self.backbone2(x)           # (b, 96, 8, 8)
126 |         
127 |         # Note: Because PyTorch is NCHW but TFLite is NHWC, we need to
128 |         # permute the output from the conv layers before reshaping it.
129 |         
130 |         c1 = self.classifier_8(x)       # (b, 2, 16, 16)
131 |         c1 = c1.permute(0, 2, 3, 1)     # (b, 16, 16, 2)
132 |         c1 = c1.reshape(b, -1, 1)       # (b, 512, 1)
133 | 
134 |         c2 = self.classifier_16(h)      # (b, 6, 8, 8)
135 |         c2 = c2.permute(0, 2, 3, 1)     # (b, 8, 8, 6)
136 |         c2 = c2.reshape(b, -1, 1)       # (b, 384, 1)
137 | 
138 |         c = torch.cat((c1, c2), dim=1)  # (b, 896, 1)
139 | 
140 |         r1 = self.regressor_8(x)        # (b, 32, 16, 16)
141 |         r1 = r1.permute(0, 2, 3, 1)     # (b, 16, 16, 32)
142 |         r1 = r1.reshape(b, -1, 16)      # (b, 512, 16)
143 | 
144 |         r2 = self.regressor_16(h)       # (b, 96, 8, 8)
145 |         r2 = r2.permute(0, 2, 3, 1)     # (b, 8, 8, 96)
146 |         r2 = r2.reshape(b, -1, 16)      # (b, 384, 16)
147 | 
148 |         r = torch.cat((r1, r2), dim=1)  # (b, 896, 16)
149 |         return [r, c]
150 | 
151 |     def _device(self):
152 |         """Which device (CPU or GPU) is being used by this model?"""
153 |         return self.classifier_8.weight.device
154 |     
155 |     def load_weights(self, path):
156 |         self.load_state_dict(torch.load(path))
157 |         self.eval()        
158 |     
159 |     def load_anchors(self, path):
160 |         self.anchors = torch.tensor(np.load(path), dtype=torch.float32, device=self._device())
161 |         assert(self.anchors.ndimension() == 2)
162 |         assert(self.anchors.shape[0] == self.num_anchors)
163 |         assert(self.anchors.shape[1] == 4)
164 | 
165 |     def _preprocess(self, x):
166 |         """Converts the image pixels to the range [-1, 1]."""
167 |         return x.float() / 127.5 - 1.0
168 | 
169 |     def predict_on_image(self, img):
170 |         """Makes a prediction on a single image.
171 | 
172 |         Arguments:
173 |             img: a NumPy array of shape (H, W, 3) or a PyTorch tensor of
174 |                  shape (3, H, W). The image's height and width should be 
175 |                  128 pixels.
176 | 
177 |         Returns:
178 |             A tensor with face detections.
179 |         """
180 |         if isinstance(img, np.ndarray):
181 |             img = torch.from_numpy(img).permute((2, 0, 1))
182 | 
183 |         return self.predict_on_batch(img.unsqueeze(0))[0]
184 | 
185 |     def predict_on_batch(self, x, apply_nms=True):
186 |         """Makes a prediction on a batch of images.
187 | 
188 |         Arguments:
189 |             x: a NumPy array of shape (b, H, W, 3) or a PyTorch tensor of
190 |                shape (b, 3, H, W). The height and width should be 128 pixels.
191 |             apply_nms: pass False to not apply non-max suppression
192 | 
193 |         Returns:
194 |             A list containing a tensor of face detections for each image in 
195 |             the batch. If no faces are found for an image, returns a tensor
196 |             of shape (0, 17).
197 | 
198 |         Each face detection is a PyTorch tensor consisting of 17 numbers:
199 |             - ymin, xmin, ymax, xmax
200 |             - x,y-coordinates for the 6 keypoints
201 |             - confidence score
202 |         """
203 |         if isinstance(x, np.ndarray):
204 |             x = torch.from_numpy(x).permute((0, 3, 1, 2))
205 | 
206 |         assert x.shape[1] == 3
207 |         assert x.shape[2] == 128
208 |         assert x.shape[3] == 128
209 | 
210 |         # 1. Preprocess the images into tensors:
211 |         x = x.to(self._device())
212 |         x = self._preprocess(x)
213 | 
214 |         # 2. Run the neural network:
215 |         with torch.no_grad():
216 |             out = self.__call__(x)
217 | 
218 |         # 3. Postprocess the raw predictions:
219 |         detections = self._tensors_to_detections(out[0], out[1], self.anchors)
220 | 
221 |         # 4. Non-maximum suppression to remove overlapping detections:
222 |         return self.nms(detections) if apply_nms else detections
223 | 
224 |     def nms(self, detections):
225 |         """Filters out overlapping detections."""
226 |         filtered_detections = []
227 |         for i in range(len(detections)):
228 |             faces = self._weighted_non_max_suppression(detections[i])
229 |             faces = torch.stack(faces) if len(faces) > 0 else torch.zeros((0, 17), device=self._device())
230 |             filtered_detections.append(faces)
231 | 
232 |         return filtered_detections
233 |     
234 |     def _tensors_to_detections(self, raw_box_tensor, raw_score_tensor, anchors):
235 |         """The output of the neural network is a tensor of shape (b, 896, 16)
236 |         containing the bounding box regressor predictions, as well as a tensor 
237 |         of shape (b, 896, 1) with the classification confidences.
238 | 
239 |         This function converts these two "raw" tensors into proper detections.
240 |         Returns a list of (num_detections, 17) tensors, one for each image in
241 |         the batch.
242 | 
243 |         This is based on the source code from:
244 |         mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc
245 |         mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.proto
246 |         """
247 |         assert raw_box_tensor.ndimension() == 3
248 |         assert raw_box_tensor.shape[1] == self.num_anchors
249 |         assert raw_box_tensor.shape[2] == self.num_coords
250 | 
251 |         assert raw_score_tensor.ndimension() == 3
252 |         assert raw_score_tensor.shape[1] == self.num_anchors
253 |         assert raw_score_tensor.shape[2] == self.num_classes
254 | 
255 |         assert raw_box_tensor.shape[0] == raw_score_tensor.shape[0]
256 |         
257 |         detection_boxes = self._decode_boxes(raw_box_tensor, anchors)
258 |         
259 |         thresh = self.score_clipping_thresh
260 |         raw_score_tensor = raw_score_tensor.clamp(-thresh, thresh)
261 |         detection_scores = raw_score_tensor.sigmoid().squeeze(dim=-1)
262 |         
263 |         # Note: we stripped off the last dimension from the scores tensor
264 |         # because there is only has one class. Now we can simply use a mask
265 |         # to filter out the boxes with too low confidence.
266 |         mask = detection_scores >= self.min_score_thresh
267 | 
268 |         # Because each image from the batch can have a different number of
269 |         # detections, process them one at a time using a loop.
270 |         output_detections = []
271 |         for i in range(raw_box_tensor.shape[0]):
272 |             boxes = detection_boxes[i, mask[i]]
273 |             scores = detection_scores[i, mask[i]].unsqueeze(dim=-1)
274 |             output_detections.append(torch.cat((boxes, scores), dim=-1))
275 | 
276 |         return output_detections
277 | 
278 |     def _decode_boxes(self, raw_boxes, anchors):
279 |         """Converts the predictions into actual coordinates using
280 |         the anchor boxes. Processes the entire batch at once.
281 |         """
282 |         boxes = torch.zeros_like(raw_boxes)
283 | 
284 |         x_center = raw_boxes[..., 0] / self.x_scale * anchors[:, 2] + anchors[:, 0]
285 |         y_center = raw_boxes[..., 1] / self.y_scale * anchors[:, 3] + anchors[:, 1]
286 | 
287 |         w = raw_boxes[..., 2] / self.w_scale * anchors[:, 2]
288 |         h = raw_boxes[..., 3] / self.h_scale * anchors[:, 3]
289 | 
290 |         boxes[..., 0] = y_center - h / 2.  # ymin
291 |         boxes[..., 1] = x_center - w / 2.  # xmin
292 |         boxes[..., 2] = y_center + h / 2.  # ymax
293 |         boxes[..., 3] = x_center + w / 2.  # xmax
294 | 
295 |         for k in range(6):
296 |             offset = 4 + k*2
297 |             keypoint_x = raw_boxes[..., offset    ] / self.x_scale * anchors[:, 2] + anchors[:, 0]
298 |             keypoint_y = raw_boxes[..., offset + 1] / self.y_scale * anchors[:, 3] + anchors[:, 1]
299 |             boxes[..., offset    ] = keypoint_x
300 |             boxes[..., offset + 1] = keypoint_y
301 | 
302 |         return boxes
303 | 
304 |     def _weighted_non_max_suppression(self, detections):
305 |         """The alternative NMS method as mentioned in the BlazeFace paper:
306 | 
307 |         "We replace the suppression algorithm with a blending strategy that
308 |         estimates the regression parameters of a bounding box as a weighted
309 |         mean between the overlapping predictions."
310 | 
311 |         The original MediaPipe code assigns the score of the most confident
312 |         detection to the weighted detection, but we take the average score
313 |         of the overlapping detections.
314 | 
315 |         The input detections should be a Tensor of shape (count, 17).
316 | 
317 |         Returns a list of PyTorch tensors, one for each detected face.
318 |         
319 |         This is based on the source code from:
320 |         mediapipe/calculators/util/non_max_suppression_calculator.cc
321 |         mediapipe/calculators/util/non_max_suppression_calculator.proto
322 |         """
323 |         if len(detections) == 0: return []
324 | 
325 |         output_detections = []
326 | 
327 |         # Sort the detections from highest to lowest score.
328 |         remaining = torch.argsort(detections[:, 16], descending=True)
329 | 
330 |         while len(remaining) > 0:
331 |             detection = detections[remaining[0]]
332 | 
333 |             # Compute the overlap between the first box and the other 
334 |             # remaining boxes. (Note that the other_boxes also include
335 |             # the first_box.)
336 |             first_box = detection[:4]
337 |             other_boxes = detections[remaining, :4]
338 |             ious = overlap_similarity(first_box, other_boxes)
339 | 
340 |             # If two detections don't overlap enough, they are considered
341 |             # to be from different faces.
342 |             mask = ious > self.min_suppression_threshold
343 |             overlapping = remaining[mask]
344 |             remaining = remaining[~mask]
345 | 
346 |             # Take an average of the coordinates from the overlapping
347 |             # detections, weighted by their confidence scores.
348 |             weighted_detection = detection.clone()
349 |             if len(overlapping) > 1:
350 |                 coordinates = detections[overlapping, :16]
351 |                 scores = detections[overlapping, 16:17]
352 |                 total_score = scores.sum()
353 |                 weighted = (coordinates * scores).sum(dim=0) / total_score
354 |                 weighted_detection[:16] = weighted
355 |                 weighted_detection[16] = total_score / len(overlapping)
356 | 
357 |             output_detections.append(weighted_detection)
358 | 
359 |         return output_detections    
360 | 
361 | 
362 | # IOU code from https://github.com/amdegroot/ssd.pytorch/blob/master/layers/box_utils.py
363 | 
364 | def intersect(box_a, box_b):
365 |     """ We resize both tensors to [A,B,2] without new malloc:
366 |     [A,2] -> [A,1,2] -> [A,B,2]
367 |     [B,2] -> [1,B,2] -> [A,B,2]
368 |     Then we compute the area of intersect between box_a and box_b.
369 |     Args:
370 |       box_a: (tensor) bounding boxes, Shape: [A,4].
371 |       box_b: (tensor) bounding boxes, Shape: [B,4].
372 |     Return:
373 |       (tensor) intersection area, Shape: [A,B].
374 |     """
375 |     A = box_a.size(0)
376 |     B = box_b.size(0)
377 |     max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
378 |                        box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
379 |     min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
380 |                        box_b[:, :2].unsqueeze(0).expand(A, B, 2))
381 |     inter = torch.clamp((max_xy - min_xy), min=0)
382 |     return inter[:, :, 0] * inter[:, :, 1]
383 | 
384 | 
385 | def jaccard(box_a, box_b):
386 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
387 |     is simply the intersection over union of two boxes.  Here we operate on
388 |     ground truth boxes and default boxes.
389 |     E.g.:
390 |         A âˆ© B / A âˆª B = A âˆ© B / (area(A) + area(B) - A âˆ© B)
391 |     Args:
392 |         box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
393 |         box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
394 |     Return:
395 |         jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
396 |     """
397 |     inter = intersect(box_a, box_b)
398 |     area_a = ((box_a[:, 2]-box_a[:, 0]) *
399 |               (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
400 |     area_b = ((box_b[:, 2]-box_b[:, 0]) *
401 |               (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
402 |     union = area_a + area_b - inter
403 |     return inter / union  # [A,B]
404 | 
405 | 
406 | def overlap_similarity(box, other_boxes):
407 |     """Computes the IOU between a bounding box and set of other boxes."""
408 |     return jaccard(box.unsqueeze(0), other_boxes).squeeze(0)
409 | 


--------------------------------------------------------------------------------
/attendance-code.txt:
--------------------------------------------------------------------------------
  1 | #include<EEPROM.h>
  2 | #include<LiquidCrystal.h>
  3 | LiquidCrystal lcd(13,12,11,10,9,8);
  4 | #include <SoftwareSerial.h>
  5 | SoftwareSerial fingerPrint(2, 3);
  6 | 
  7 | #include <Wire.h>
  8 | #include "RTClib.h"
  9 | RTC_DS1307 rtc;
 10 | 
 11 | #include "Adafruit_Fingerprint.h"
 12 | uint8_t id;
 13 | Adafruit_Fingerprint finger = Adafruit_Fingerprint(&fingerPrint);
 14 | 
 15 | #define enroll 14
 16 | #define del 15
 17 | #define up 16
 18 | #define down 17
 19 | #define match 5
 20 | #define indFinger 7
 21 | #define buzzer 5
 22 | 
 23 | #define records 4  // 5 for 5 user
 24 | 
 25 | int user1,user2,user3,user4,user5;
 26 | 
 27 | DateTime now;
 28 | 
 29 | void setup()
 30 | {
 31 |     delay(1000);
 32 |     lcd.begin(16,2);
 33 |     Serial.begin(9600);
 34 |     pinMode(enroll, INPUT_PULLUP);
 35 |     pinMode(up, INPUT_PULLUP);
 36 |     pinMode(down, INPUT_PULLUP);
 37 |     pinMode(del, INPUT_PULLUP);
 38 |     pinMode(match, INPUT_PULLUP);
 39 |     pinMode(buzzer, OUTPUT);
 40 |     pinMode(indFinger, OUTPUT);
 41 |     digitalWrite(buzzer, LOW);
 42 |     if(digitalRead(enroll) == 0)
 43 |     {
 44 |       digitalWrite(buzzer, HIGH);
 45 |       delay(500);
 46 |       digitalWrite(buzzer, LOW);
 47 |       lcd.clear();
 48 |       lcd.print("Please wait");
 49 |       lcd.setCursor(0,1);
 50 |       lcd.print("Downloding Data");
 51 | 
 52 |       Serial.println("Please wait");
 53 |       Serial.println("Downloding Data..");
 54 |       Serial.println();
 55 | 
 56 |       Serial.print("S.No.         ");
 57 |       for(int i=0;i<records;i++)
 58 |       {
 59 |               digitalWrite(buzzer, HIGH);
 60 |       delay(500);
 61 |       digitalWrite(buzzer, LOW);
 62 |         Serial.print("         User ID");
 63 |         Serial.print(i+1);
 64 |         Serial.print("                   ");
 65 |       }
 66 |       Serial.println();
 67 |       int eepIndex=0;
 68 |       for(int i=0;i<30;i++)
 69 |       {
 70 |         if(i+1<10)
 71 |         Serial.print('0');
 72 |         Serial.print(i+1);
 73 |         Serial.print("          ");
 74 |         eepIndex=(i*7);
 75 |         download(eepIndex);
 76 |         eepIndex=(i*7)+210;
 77 |         download(eepIndex);
 78 |         eepIndex=(i*7)+420;
 79 |         download(eepIndex);
 80 |         eepIndex=(i*7)+630;
 81 |         download(eepIndex);
 82 |       //  eepIndex=(i*7)+840;   // 5th user
 83 |       //  download(eepIndex);
 84 |         Serial.println();
 85 |       }
 86 |     }
 87 |     if(digitalRead(del) == 0)
 88 |     {
 89 |       lcd.clear();
 90 |       lcd.print("Please Wait");
 91 |       lcd.setCursor(0,1);
 92 |       lcd.print("Reseting.....");
 93 |       for(int i=1000;i<1005;i++)
 94 |       EEPROM.write(i,0);
 95 |       for(int i=0;i<841;i++)
 96 |       EEPROM.write(i, 0xff);
 97 |       lcd.clear();
 98 |       lcd.print("System Reset");
 99 |       delay(1000);
100 |     }
101 | 
102 |     
103 |     lcd.clear();
104 |     lcd.print("   Attendance   ");
105 |     lcd.setCursor(0,1);
106 |     lcd.print("     System     ");
107 |     delay(2000);
108 |     lcd.clear();
109 |     lcd.print("Circuit Digest");
110 |     lcd.setCursor(0,1);
111 |     lcd.print("Saddam Khan");
112 |     delay(2000);
113 |           digitalWrite(buzzer, HIGH);
114 |       delay(500);
115 |       digitalWrite(buzzer, LOW);
116 |   for(int i=1000;i<1000+records;i++)
117 |   {
118 |     if(EEPROM.read(i) == 0xff)
119 |         EEPROM.write(i,0);
120 |    }
121 | 
122 |     finger.begin(57600);
123 |     Serial.begin(9600);
124 |     lcd.clear();
125 |     lcd.print("Finding Module");
126 |     lcd.setCursor(0,1);
127 |     delay(1000);
128 |     if (finger.verifyPassword())
129 |     {
130 |       Serial.println("Found fingerprint sensor!");
131 |       lcd.clear();
132 |       lcd.print("Found Module ");
133 |       delay(1000);
134 |     }
135 |     else
136 |     {
137 |     Serial.println("Did not find fingerprint sensor :(");
138 |     lcd.clear();
139 |     lcd.print("module not Found");
140 |     lcd.setCursor(0,1);
141 |     lcd.print("Check Connections");
142 |     while (1);
143 |     }
144 | 
145 |      if (! rtc.begin())
146 |        Serial.println("Couldn't find RTC");
147 | 
148 |     // rtc.adjust(DateTime(F(__DATE__), F(__TIME__)));
149 | 
150 |     if (! rtc.isrunning())
151 |     {
152 |     Serial.println("RTC is NOT running!");
153 |     // following line sets the RTC to the date & time this sketch was compiled
154 |        rtc.adjust(DateTime(F(__DATE__), F(__TIME__)));
155 |     // This line sets the RTC with an explicit date & time, for example to set
156 |     // January 21, 2014 at 3am you would call:
157 |     // rtc.adjust(DateTime(2014, 1, 21, 3, 0, 0));
158 |     }
159 | lcd.setCursor(0,0);
160 |  lcd.print("Press Match to ");
161 |  lcd.setCursor(0,1);
162 |  lcd.print("Start System");
163 |  delay(2000);
164 | 
165 |  user1=EEPROM.read(1000);
166 |   user2=EEPROM.read(1001);
167 |    user3=EEPROM.read(1002); 
168 |    user4=EEPROM.read(1003);
169 |     user5=EEPROM.read(1004);
170 |     lcd.clear();
171 |     digitalWrite(indFinger, HIGH);
172 |     
173 | }
174 | 
175 | void loop()
176 | {
177 |     now = rtc.now();
178 |     lcd.setCursor(0,0);
179 |     lcd.print("Time->");
180 |     lcd.print(now.hour(), DEC);
181 |     lcd.print(':');
182 |     lcd.print(now.minute(), DEC);
183 |     lcd.print(':');
184 |     lcd.print(now.second(), DEC);
185 |     lcd.print("    ");
186 |     lcd.setCursor(0,1);
187 |     lcd.print("Date->");
188 |     lcd.print(now.day(), DEC);
189 |     lcd.print('/');
190 |     lcd.print(now.month(), DEC);
191 |     lcd.print('/');
192 |     lcd.print(now.year(), DEC);
193 |     lcd.print("     ");
194 |     delay(500);
195 |     int result=getFingerprintIDez();
196 |     if(result>0)
197 |     {
198 |               digitalWrite(indFinger, LOW);
199 |               digitalWrite(buzzer, HIGH);
200 |               delay(100);
201 |               digitalWrite(buzzer, LOW);
202 |               lcd.clear();
203 |               lcd.print("ID:");
204 |               lcd.print(result);
205 |               lcd.setCursor(0,1);
206 |               lcd.print("Please Wait....");
207 |               delay(1000);
208 |               attendance(result);
209 |               lcd.clear();
210 |               lcd.print("Attendance ");
211 |               lcd.setCursor(0,1);
212 |               lcd.print("Registed");
213 |               delay(1000);
214 |         digitalWrite(indFinger, HIGH);
215 |         return;
216 |  }
217 |  checkKeys();
218 |  delay(300);
219 | }
220 | 
221 | //     dmyyhms - 7 bytes
222 | void attendance(int id)
223 | {
224 |   int user=0,eepLoc=0;
225 |   if(id == 1)
226 |   {
227 |     eepLoc=0;
228 |     user=user1++;
229 |   }
230 |   else if(id == 2)
231 |   {
232 |     eepLoc=210;
233 |     user=user2++;
234 |   }
235 |   else if(id == 3)
236 |   {
237 |     eepLoc=420;
238 |     user=user3++;
239 |   }
240 |   else if(id == 4)
241 |   {
242 |     eepLoc=630;
243 |     user=user4++;
244 |   }
245 |   /*else if(id == 5)   // fifth user
246 |   {
247 |     eepLoc=840;
248 |     user=user5++;
249 |   }*/
250 |   else 
251 |   return;
252 |   
253 |     int eepIndex=(user*7)+eepLoc;
254 |     EEPROM.write(eepIndex++, now.hour());
255 |     EEPROM.write(eepIndex++, now.minute());
256 |     EEPROM.write(eepIndex++, now.second());
257 |     EEPROM.write(eepIndex++, now.day());
258 |     EEPROM.write(eepIndex++, now.month());
259 |     EEPROM.write(eepIndex++, now.year()>>8 );
260 |     EEPROM.write(eepIndex++, now.year());
261 | 
262 |     EEPROM.write(1000,user1);
263 |     EEPROM.write(1001,user2);
264 |     EEPROM.write(1002,user3);
265 |     EEPROM.write(1003,user4);
266 |   //  EEPROM.write(4,user5);   // figth user
267 | }
268 | 
269 | void checkKeys()
270 | {
271 |    if(digitalRead(enroll) == 0)
272 |    {
273 |     lcd.clear();
274 |     lcd.print("Please Wait");
275 |     delay(1000);
276 |     while(digitalRead(enroll) == 0);
277 |     Enroll();
278 |    }
279 | 
280 |    else if(digitalRead(del) == 0)
281 |    {
282 |     lcd.clear();
283 |     lcd.print("Please Wait");
284 |     delay(1000);
285 |     delet();
286 |    }
287 | }
288 | 
289 | void Enroll()
290 | {
291 |    int count=1;
292 |    lcd.clear();
293 |    lcd.print("Enter Finger ID:");
294 | 
295 |    while(1)
296 |    {
297 |     lcd.setCursor(0,1);
298 |      lcd.print(count);
299 |      if(digitalRead(up) == 0)
300 |      {
301 |        count++;
302 |        if(count>records)
303 |        count=1;
304 |        delay(500);
305 |      }
306 | 
307 |      else if(digitalRead(down) == 0)
308 |      {
309 |        count--;
310 |        if(count<1)
311 |        count=records;
312 |        delay(500);
313 |      }
314 |      else if(digitalRead(del) == 0)
315 |      {
316 |           id=count;
317 |           getFingerprintEnroll();
318 |           for(int i=0;i<records;i++)
319 |           {
320 |             if(EEPROM.read(i) != 0xff)
321 |             {
322 |               EEPROM.write(i, id);
323 |               break;
324 |             }
325 |           }
326 |           return;
327 |      }
328 | 
329 |        else if(digitalRead(enroll) == 0)
330 |      {
331 |           return;
332 |      }
333 |  }
334 | }
335 | 
336 | void delet()
337 | {
338 |    int count=1;
339 |    lcd.clear();
340 |    lcd.print("Enter Finger ID");
341 | 
342 |    while(1)
343 |    {
344 |     lcd.setCursor(0,1);
345 |      lcd.print(count);
346 |      if(digitalRead(up) == 0)
347 |      {
348 |        count++;
349 |        if(count>records)
350 |        count=1;
351 |        delay(500);
352 |      }
353 | 
354 |      else if(digitalRead(down) == 0)
355 |      {
356 |        count--;
357 |        if(count<1)
358 |        count=records;
359 |        delay(500);
360 |      }
361 |      else if(digitalRead(del) == 0)
362 |      {
363 |           id=count;
364 |           deleteFingerprint(id);
365 |           for(int i=0;i<records;i++)
366 |           {
367 |             if(EEPROM.read(i) == id)
368 |             {
369 |               EEPROM.write(i, 0xff);
370 |               break;
371 |             }
372 |           }
373 |           return;
374 |      }
375 | 
376 |        else if(digitalRead(enroll) == 0)
377 |      {
378 |           return;
379 |      }
380 |  }
381 | }
382 | 
383 | uint8_t getFingerprintEnroll()
384 | {
385 |   int p = -1;
386 |   lcd.clear();
387 |   lcd.print("finger ID:");
388 |   lcd.print(id);
389 |   lcd.setCursor(0,1);
390 |   lcd.print("Place Finger");
391 |   delay(2000);
392 |   while (p != FINGERPRINT_OK)
393 |   {
394 |     p = finger.getImage();
395 |     switch (p)
396 |     {
397 |     case FINGERPRINT_OK:
398 |       Serial.println("Image taken");
399 |       lcd.clear();
400 |       lcd.print("Image taken");
401 |       break;
402 |     case FINGERPRINT_NOFINGER:
403 |       Serial.println("No Finger");
404 |       lcd.clear();
405 |       lcd.print("No Finger");
406 |       break;
407 |     case FINGERPRINT_PACKETRECIEVEERR:
408 |       Serial.println("Communication error");
409 |       lcd.clear();
410 |       lcd.print("Comm Error");
411 |       break;
412 |     case FINGERPRINT_IMAGEFAIL:
413 |       Serial.println("Imaging error");
414 |       lcd.clear();
415 |       lcd.print("Imaging Error");
416 |       break;
417 |     default:
418 |       Serial.println("Unknown error");
419 |        lcd.clear();
420 |       lcd.print("Unknown Error");
421 |       break;
422 |     }
423 |   }
424 | 
425 |   // OK success!
426 | 
427 |   p = finger.image2Tz(1);
428 |   switch (p) {
429 |     case FINGERPRINT_OK:
430 |       Serial.println("Image converted");
431 |       lcd.clear();
432 |       lcd.print("Image converted");
433 |       break;
434 |     case FINGERPRINT_IMAGEMESS:
435 |       Serial.println("Image too messy");
436 |        lcd.clear();
437 |        lcd.print("Image too messy");
438 |       return p;
439 |     case FINGERPRINT_PACKETRECIEVEERR:
440 |       Serial.println("Communication error");
441 |             lcd.clear();
442 |       lcd.print("Comm Error");
443 |       return p;
444 |     case FINGERPRINT_FEATUREFAIL:
445 |       Serial.println("Could not find fingerprint features");
446 |             lcd.clear();
447 |       lcd.print("Feature Not Found");
448 |       return p;
449 |     case FINGERPRINT_INVALIDIMAGE:
450 |       Serial.println("Could not find fingerprint features");
451 |                   lcd.clear();
452 |       lcd.print("Feature Not Found");
453 |       return p;
454 |     default:
455 |       Serial.println("Unknown error");
456 |                   lcd.clear();
457 |       lcd.print("Unknown Error");
458 |       return p;
459 |   }
460 | 
461 |   Serial.println("Remove finger");
462 |   lcd.clear();
463 |   lcd.print("Remove Finger");
464 |   delay(2000);
465 |   p = 0;
466 |   while (p != FINGERPRINT_NOFINGER) {
467 |     p = finger.getImage();
468 |   }
469 |   Serial.print("ID "); Serial.println(id);
470 |   p = -1;
471 |   Serial.println("Place same finger again");
472 |    lcd.clear();
473 |       lcd.print("Place Finger");
474 |       lcd.setCursor(0,1);
475 |       lcd.print("   Again");
476 |   while (p != FINGERPRINT_OK) {
477 |     p = finger.getImage();
478 |     switch (p) {
479 |     case FINGERPRINT_OK:
480 |       Serial.println("Image taken");
481 |       break;
482 |     case FINGERPRINT_NOFINGER:
483 |       Serial.print(".");
484 |       break;
485 |     case FINGERPRINT_PACKETRECIEVEERR:
486 |       Serial.println("Communication error");
487 |       break;
488 |     case FINGERPRINT_IMAGEFAIL:
489 |       Serial.println("Imaging error");
490 |       break;
491 |     default:
492 |       Serial.println("Unknown error");
493 |       return;
494 |     }
495 |   }
496 | 
497 |   // OK success!
498 | 
499 |   p = finger.image2Tz(2);
500 |   switch (p) {
501 |     case FINGERPRINT_OK:
502 |       Serial.println("Image converted");
503 |       break;
504 |     case FINGERPRINT_IMAGEMESS:
505 |       Serial.println("Image too messy");
506 |       return p;
507 |     case FINGERPRINT_PACKETRECIEVEERR:
508 |       Serial.println("Communication error");
509 |       return p;
510 |     case FINGERPRINT_FEATUREFAIL:
511 |       Serial.println("Could not find fingerprint features");
512 |       return p;
513 |     case FINGERPRINT_INVALIDIMAGE:
514 |       Serial.println("Could not find fingerprint features");
515 |       return p;
516 |     default:
517 |       Serial.println("Unknown error");
518 |       return p;
519 |   }
520 | 
521 |   // OK converted!
522 |   Serial.print("Creating model for #");  Serial.println(id);
523 | 
524 |   p = finger.createModel();
525 |   if (p == FINGERPRINT_OK) {
526 |     Serial.println("Prints matched!");
527 |   } else if (p == FINGERPRINT_PACKETRECIEVEERR) {
528 |     Serial.println("Communication error");
529 |     return p;
530 |   } else if (p == FINGERPRINT_ENROLLMISMATCH) {
531 |     Serial.println("Fingerprints did not match");
532 |     return p;
533 |   } else {
534 |     Serial.println("Unknown error");
535 |     return p;
536 |   }
537 | 
538 |   Serial.print("ID "); Serial.println(id);
539 |   p = finger.storeModel(id);
540 |   if (p == FINGERPRINT_OK) {
541 |     Serial.println("Stored!");
542 |     lcd.clear();
543 |     lcd.print("Stored!");
544 |     delay(2000);
545 |   } else if (p == FINGERPRINT_PACKETRECIEVEERR) {
546 |     Serial.println("Communication error");
547 |     return p;
548 |   } else if (p == FINGERPRINT_BADLOCATION) {
549 |     Serial.println("Could not store in that location");
550 |     return p;
551 |   } else if (p == FINGERPRINT_FLASHERR) {
552 |     Serial.println("Error writing to flash");
553 |     return p;
554 |   }
555 |   else {
556 |     Serial.println("Unknown error");
557 |     return p;
558 |   }
559 | }
560 | 
561 | int getFingerprintIDez()
562 | {
563 |   uint8_t p = finger.getImage();
564 | 
565 |   if (p != FINGERPRINT_OK)
566 |   return -1;
567 | 
568 |   p = finger.image2Tz();
569 |   if (p != FINGERPRINT_OK)
570 |   return -1;
571 | 
572 |   p = finger.fingerFastSearch();
573 |   if (p != FINGERPRINT_OK)
574 |   {
575 |    lcd.clear();
576 |    lcd.print("Finger Not Found");
577 |    lcd.setCursor(0,1);
578 |    lcd.print("Try Later");
579 |    delay(2000);
580 |   return -1;
581 |   }
582 |   // found a match!
583 |   Serial.print("Found ID #");
584 |   Serial.print(finger.fingerID);
585 |   return finger.fingerID;
586 | }
587 | 
588 | uint8_t deleteFingerprint(uint8_t id)
589 | {
590 |   uint8_t p = -1;
591 |   lcd.clear();
592 |   lcd.print("Please wait");
593 |   p = finger.deleteModel(id);
594 |   if (p == FINGERPRINT_OK)
595 |   {
596 |     Serial.println("Deleted!");
597 |     lcd.clear();
598 |     lcd.print("Figer Deleted");
599 |     lcd.setCursor(0,1);
600 |     lcd.print("Successfully");
601 |     delay(1000);
602 |   }
603 | 
604 |   else
605 |   {
606 |     Serial.print("Something Wrong");
607 |     lcd.clear();
608 |     lcd.print("Something Wrong");
609 |     lcd.setCursor(0,1);
610 |     lcd.print("Try Again Later");
611 |     delay(2000);
612 |     return p;
613 |   }
614 | }
615 | 
616 | void download(int eepIndex)
617 | {
618 |             
619 |             if(EEPROM.read(eepIndex) != 0xff)
620 |             {
621 |               Serial.print("T->");
622 |               if(EEPROM.read(eepIndex)<10)
623 |               Serial.print('0');
624 |               Serial.print(EEPROM.read(eepIndex++));
625 |               Serial.print(':');  
626 |               if(EEPROM.read(eepIndex)<10)
627 |               Serial.print('0');
628 |               Serial.print(EEPROM.read(eepIndex++));
629 |               Serial.print(':'); 
630 |               if(EEPROM.read(eepIndex)<10)
631 |               Serial.print('0');
632 |               Serial.print(EEPROM.read(eepIndex++));
633 |               Serial.print("   D->");
634 |               if(EEPROM.read(eepIndex)<10)
635 |               Serial.print('0');
636 |               Serial.print(EEPROM.read(eepIndex++));
637 |               Serial.print('/');
638 |               if(EEPROM.read(eepIndex)<10)
639 |               Serial.print('0');           
640 |               Serial.print(EEPROM.read(eepIndex++));
641 |               Serial.print('/');            
642 |               Serial.print(EEPROM.read(eepIndex++)<<8 | EEPROM.read(eepIndex++));
643 |             }
644 |             else
645 |             {
646 |                Serial.print("---------------------------");
647 |             }
648 | 
649 |             Serial.print("         ");
650 | }


--------------------------------------------------------------------------------
/emotion-detection/functional.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import torch
  3 | import math
  4 | import random
  5 | from PIL import Image, ImageOps, ImageEnhance
  6 | try:
  7 |     import accimage
  8 | except ImportError:
  9 |     accimage = None
 10 | import numpy as np
 11 | import numbers
 12 | import types
 13 | import collections
 14 | import warnings
 15 | 
 16 | 
 17 | def _is_pil_image(img):
 18 |     if accimage is not None:
 19 |         return isinstance(img, (Image.Image, accimage.Image))
 20 |     else:
 21 |         return isinstance(img, Image.Image)
 22 | 
 23 | 
 24 | def _is_tensor_image(img):
 25 |     return torch.is_tensor(img) and img.ndimension() == 3
 26 | 
 27 | 
 28 | def _is_numpy_image(img):
 29 |     return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
 30 | 
 31 | 
 32 | def to_tensor(pic):
 33 |     """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
 34 |     See ``ToTensor`` for more details.
 35 |     Args:
 36 |         pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
 37 |     Returns:
 38 |         Tensor: Converted image.
 39 |     """
 40 |     if not(_is_pil_image(pic) or _is_numpy_image(pic)):
 41 |         raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
 42 | 
 43 |     if isinstance(pic, np.ndarray):
 44 |         # handle numpy array
 45 |         img = torch.from_numpy(pic.transpose((2, 0, 1)))
 46 |         # backward compatibility
 47 |         return img.float().div(255)
 48 | 
 49 |     if accimage is not None and isinstance(pic, accimage.Image):
 50 |         nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
 51 |         pic.copyto(nppic)
 52 |         return torch.from_numpy(nppic)
 53 | 
 54 |     # handle PIL Image
 55 |     if pic.mode == 'I':
 56 |         img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 57 |     elif pic.mode == 'I;16':
 58 |         img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 59 |     else:
 60 |         img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
 61 |     # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 62 |     if pic.mode == 'YCbCr':
 63 |         nchannel = 3
 64 |     elif pic.mode == 'I;16':
 65 |         nchannel = 1
 66 |     else:
 67 |         nchannel = len(pic.mode)
 68 |     img = img.view(pic.size[1], pic.size[0], nchannel)
 69 |     # put it from HWC to CHW format
 70 |     # yikes, this transpose takes 80% of the loading time/CPU
 71 |     img = img.transpose(0, 1).transpose(0, 2).contiguous()
 72 |     if isinstance(img, torch.ByteTensor):
 73 |         return img.float().div(255)
 74 |     else:
 75 |         return img
 76 | 
 77 | 
 78 | def to_pil_image(pic, mode=None):
 79 |     """Convert a tensor or an ndarray to PIL Image.
 80 |     See :class:`~torchvision.transforms.ToPIlImage` for more details.
 81 |     Args:
 82 |         pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
 83 |         mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
 84 |     .. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes
 85 |     Returns:
 86 |         PIL Image: Image converted to PIL Image.
 87 |     """
 88 |     if not(_is_numpy_image(pic) or _is_tensor_image(pic)):
 89 |         raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic)))
 90 | 
 91 |     npimg = pic
 92 |     if isinstance(pic, torch.FloatTensor):
 93 |         pic = pic.mul(255).byte()
 94 |     if torch.is_tensor(pic):
 95 |         npimg = np.transpose(pic.numpy(), (1, 2, 0))
 96 | 
 97 |     if not isinstance(npimg, np.ndarray):
 98 |         raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' +
 99 |                         'not {}'.format(type(npimg)))
100 | 
101 |     if npimg.shape[2] == 1:
102 |         expected_mode = None
103 |         npimg = npimg[:, :, 0]
104 |         if npimg.dtype == np.uint8:
105 |             expected_mode = 'L'
106 |         if npimg.dtype == np.int16:
107 |             expected_mode = 'I;16'
108 |         if npimg.dtype == np.int32:
109 |             expected_mode = 'I'
110 |         elif npimg.dtype == np.float32:
111 |             expected_mode = 'F'
112 |         if mode is not None and mode != expected_mode:
113 |             raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}"
114 |                              .format(mode, np.dtype, expected_mode))
115 |         mode = expected_mode
116 | 
117 |     elif npimg.shape[2] == 4:
118 |         permitted_4_channel_modes = ['RGBA', 'CMYK']
119 |         if mode is not None and mode not in permitted_4_channel_modes:
120 |             raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes))
121 | 
122 |         if mode is None and npimg.dtype == np.uint8:
123 |             mode = 'RGBA'
124 |     else:
125 |         permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV']
126 |         if mode is not None and mode not in permitted_3_channel_modes:
127 |             raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes))
128 |         if mode is None and npimg.dtype == np.uint8:
129 |             mode = 'RGB'
130 | 
131 |     if mode is None:
132 |         raise TypeError('Input type {} is not supported'.format(npimg.dtype))
133 | 
134 |     return Image.fromarray(npimg, mode=mode)
135 | 
136 | 
137 | def normalize(tensor, mean, std):
138 |     """Normalize a tensor image with mean and standard deviation.
139 |     See ``Normalize`` for more details.
140 |     Args:
141 |         tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
142 |         mean (sequence): Sequence of means for each channel.
143 |         std (sequence): Sequence of standard deviations for each channely.
144 |     Returns:
145 |         Tensor: Normalized Tensor image.
146 |     """
147 |     if not _is_tensor_image(tensor):
148 |         raise TypeError('tensor is not a torch image.')
149 |     # TODO: make efficient
150 |     for t, m, s in zip(tensor, mean, std):
151 |         t.sub_(m).div_(s)
152 |     return tensor
153 | 
154 | 
155 | def resize(img, size, interpolation=Image.BILINEAR):
156 |     """Resize the input PIL Image to the given size.
157 |     Args:
158 |         img (PIL Image): Image to be resized.
159 |         size (sequence or int): Desired output size. If size is a sequence like
160 |             (h, w), the output size will be matched to this. If size is an int,
161 |             the smaller edge of the image will be matched to this number maintaing
162 |             the aspect ratio. i.e, if height > width, then image will be rescaled to
163 |             (size * height / width, size)
164 |         interpolation (int, optional): Desired interpolation. Default is
165 |             ``PIL.Image.BILINEAR``
166 |     Returns:
167 |         PIL Image: Resized image.
168 |     """
169 |     if not _is_pil_image(img):
170 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
171 |     if not (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)):
172 |         raise TypeError('Got inappropriate size arg: {}'.format(size))
173 | 
174 |     if isinstance(size, int):
175 |         w, h = img.size
176 |         if (w <= h and w == size) or (h <= w and h == size):
177 |             return img
178 |         if w < h:
179 |             ow = size
180 |             oh = int(size * h / w)
181 |             return img.resize((ow, oh), interpolation)
182 |         else:
183 |             oh = size
184 |             ow = int(size * w / h)
185 |             return img.resize((ow, oh), interpolation)
186 |     else:
187 |         return img.resize(size[::-1], interpolation)
188 | 
189 | 
190 | def scale(*args, **kwargs):
191 |     warnings.warn("The use of the transforms.Scale transform is deprecated, " +
192 |                   "please use transforms.Resize instead.")
193 |     return resize(*args, **kwargs)
194 | 
195 | 
196 | def pad(img, padding, fill=0):
197 |     """Pad the given PIL Image on all sides with the given "pad" value.
198 |     Args:
199 |         img (PIL Image): Image to be padded.
200 |         padding (int or tuple): Padding on each border. If a single int is provided this
201 |             is used to pad all borders. If tuple of length 2 is provided this is the padding
202 |             on left/right and top/bottom respectively. If a tuple of length 4 is provided
203 |             this is the padding for the left, top, right and bottom borders
204 |             respectively.
205 |         fill: Pixel fill value. Default is 0. If a tuple of
206 |             length 3, it is used to fill R, G, B channels respectively.
207 |     Returns:
208 |         PIL Image: Padded image.
209 |     """
210 |     if not _is_pil_image(img):
211 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
212 | 
213 |     if not isinstance(padding, (numbers.Number, tuple)):
214 |         raise TypeError('Got inappropriate padding arg')
215 |     if not isinstance(fill, (numbers.Number, str, tuple)):
216 |         raise TypeError('Got inappropriate fill arg')
217 | 
218 |     if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]:
219 |         raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " +
220 |                          "{} element tuple".format(len(padding)))
221 | 
222 |     return ImageOps.expand(img, border=padding, fill=fill)
223 | 
224 | 
225 | def crop(img, i, j, h, w):
226 |     """Crop the given PIL Image.
227 |     Args:
228 |         img (PIL Image): Image to be cropped.
229 |         i: Upper pixel coordinate.
230 |         j: Left pixel coordinate.
231 |         h: Height of the cropped image.
232 |         w: Width of the cropped image.
233 |     Returns:
234 |         PIL Image: Cropped image.
235 |     """
236 |     if not _is_pil_image(img):
237 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
238 | 
239 |     return img.crop((j, i, j + w, i + h))
240 | 
241 | 
242 | def center_crop(img, output_size):
243 |     if isinstance(output_size, numbers.Number):
244 |         output_size = (int(output_size), int(output_size))
245 |     w, h = img.size
246 |     th, tw = output_size
247 |     i = int(round((h - th) / 2.))
248 |     j = int(round((w - tw) / 2.))
249 |     return crop(img, i, j, th, tw)
250 | 
251 | 
252 | def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR):
253 |     """Crop the given PIL Image and resize it to desired size.
254 |     Notably used in RandomResizedCrop.
255 |     Args:
256 |         img (PIL Image): Image to be cropped.
257 |         i: Upper pixel coordinate.
258 |         j: Left pixel coordinate.
259 |         h: Height of the cropped image.
260 |         w: Width of the cropped image.
261 |         size (sequence or int): Desired output size. Same semantics as ``scale``.
262 |         interpolation (int, optional): Desired interpolation. Default is
263 |             ``PIL.Image.BILINEAR``.
264 |     Returns:
265 |         PIL Image: Cropped image.
266 |     """
267 |     assert _is_pil_image(img), 'img should be PIL Image'
268 |     img = crop(img, i, j, h, w)
269 |     img = resize(img, size, interpolation)
270 |     return img
271 | 
272 | 
273 | def hflip(img):
274 |     """Horizontally flip the given PIL Image.
275 |     Args:
276 |         img (PIL Image): Image to be flipped.
277 |     Returns:
278 |         PIL Image:  Horizontall flipped image.
279 |     """
280 |     if not _is_pil_image(img):
281 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
282 | 
283 |     return img.transpose(Image.FLIP_LEFT_RIGHT)
284 | 
285 | 
286 | def vflip(img):
287 |     """Vertically flip the given PIL Image.
288 |     Args:
289 |         img (PIL Image): Image to be flipped.
290 |     Returns:
291 |         PIL Image:  Vertically flipped image.
292 |     """
293 |     if not _is_pil_image(img):
294 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
295 | 
296 |     return img.transpose(Image.FLIP_TOP_BOTTOM)
297 | 
298 | 
299 | def five_crop(img, size):
300 |     """Crop the given PIL Image into four corners and the central crop.
301 |     .. Note::
302 |         This transform returns a tuple of images and there may be a
303 |         mismatch in the number of inputs and targets your ``Dataset`` returns.
304 |     Args:
305 |        size (sequence or int): Desired output size of the crop. If size is an
306 |            int instead of sequence like (h, w), a square crop (size, size) is
307 |            made.
308 |     Returns:
309 |         tuple: tuple (tl, tr, bl, br, center) corresponding top left,
310 |             top right, bottom left, bottom right and center crop.
311 |     """
312 |     if isinstance(size, numbers.Number):
313 |         size = (int(size), int(size))
314 |     else:
315 |         assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
316 | 
317 |     w, h = img.size
318 |     crop_h, crop_w = size
319 |     if crop_w > w or crop_h > h:
320 |         raise ValueError("Requested crop size {} is bigger than input size {}".format(size,
321 |                                                                                       (h, w)))
322 |     tl = img.crop((0, 0, crop_w, crop_h))
323 |     tr = img.crop((w - crop_w, 0, w, crop_h))
324 |     bl = img.crop((0, h - crop_h, crop_w, h))
325 |     br = img.crop((w - crop_w, h - crop_h, w, h))
326 |     center = center_crop(img, (crop_h, crop_w))
327 |     return (tl, tr, bl, br, center)
328 | 
329 | 
330 | def ten_crop(img, size, vertical_flip=False):
331 |     """Crop the given PIL Image into four corners and the central crop plus the
332 |        flipped version of these (horizontal flipping is used by default).
333 |     .. Note::
334 |         This transform returns a tuple of images and there may be a
335 |         mismatch in the number of inputs and targets your ``Dataset`` returns.
336 |        Args:
337 |            size (sequence or int): Desired output size of the crop. If size is an
338 |                int instead of sequence like (h, w), a square crop (size, size) is
339 |                made.
340 |            vertical_flip (bool): Use vertical flipping instead of horizontal
341 |         Returns:
342 |             tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip,
343 |                 br_flip, center_flip) corresponding top left, top right,
344 |                 bottom left, bottom right and center crop and same for the
345 |                 flipped image.
346 |     """
347 |     if isinstance(size, numbers.Number):
348 |         size = (int(size), int(size))
349 |     else:
350 |         assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
351 | 
352 |     first_five = five_crop(img, size)
353 | 
354 |     if vertical_flip:
355 |         img = vflip(img)
356 |     else:
357 |         img = hflip(img)
358 | 
359 |     second_five = five_crop(img, size)
360 |     return first_five + second_five
361 | 
362 | 
363 | def adjust_brightness(img, brightness_factor):
364 |     """Adjust brightness of an Image.
365 |     Args:
366 |         img (PIL Image): PIL Image to be adjusted.
367 |         brightness_factor (float):  How much to adjust the brightness. Can be
368 |             any non negative number. 0 gives a black image, 1 gives the
369 |             original image while 2 increases the brightness by a factor of 2.
370 |     Returns:
371 |         PIL Image: Brightness adjusted image.
372 |     """
373 |     if not _is_pil_image(img):
374 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
375 | 
376 |     enhancer = ImageEnhance.Brightness(img)
377 |     img = enhancer.enhance(brightness_factor)
378 |     return img
379 | 
380 | 
381 | def adjust_contrast(img, contrast_factor):
382 |     """Adjust contrast of an Image.
383 |     Args:
384 |         img (PIL Image): PIL Image to be adjusted.
385 |         contrast_factor (float): How much to adjust the contrast. Can be any
386 |             non negative number. 0 gives a solid gray image, 1 gives the
387 |             original image while 2 increases the contrast by a factor of 2.
388 |     Returns:
389 |         PIL Image: Contrast adjusted image.
390 |     """
391 |     if not _is_pil_image(img):
392 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
393 | 
394 |     enhancer = ImageEnhance.Contrast(img)
395 |     img = enhancer.enhance(contrast_factor)
396 |     return img
397 | 
398 | 
399 | def adjust_saturation(img, saturation_factor):
400 |     """Adjust color saturation of an image.
401 |     Args:
402 |         img (PIL Image): PIL Image to be adjusted.
403 |         saturation_factor (float):  How much to adjust the saturation. 0 will
404 |             give a black and white image, 1 will give the original image while
405 |             2 will enhance the saturation by a factor of 2.
406 |     Returns:
407 |         PIL Image: Saturation adjusted image.
408 |     """
409 |     if not _is_pil_image(img):
410 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
411 | 
412 |     enhancer = ImageEnhance.Color(img)
413 |     img = enhancer.enhance(saturation_factor)
414 |     return img
415 | 
416 | 
417 | def adjust_hue(img, hue_factor):
418 |     """Adjust hue of an image.
419 |     The image hue is adjusted by converting the image to HSV and
420 |     cyclically shifting the intensities in the hue channel (H).
421 |     The image is then converted back to original image mode.
422 |     `hue_factor` is the amount of shift in H channel and must be in the
423 |     interval `[-0.5, 0.5]`.
424 |     See https://en.wikipedia.org/wiki/Hue for more details on Hue.
425 |     Args:
426 |         img (PIL Image): PIL Image to be adjusted.
427 |         hue_factor (float):  How much to shift the hue channel. Should be in
428 |             [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
429 |             HSV space in positive and negative direction respectively.
430 |             0 means no shift. Therefore, both -0.5 and 0.5 will give an image
431 |             with complementary colors while 0 gives the original image.
432 |     Returns:
433 |         PIL Image: Hue adjusted image.
434 |     """
435 |     if not(-0.5 <= hue_factor <= 0.5):
436 |         raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor))
437 | 
438 |     if not _is_pil_image(img):
439 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
440 | 
441 |     input_mode = img.mode
442 |     if input_mode in {'L', '1', 'I', 'F'}:
443 |         return img
444 | 
445 |     h, s, v = img.convert('HSV').split()
446 | 
447 |     np_h = np.array(h, dtype=np.uint8)
448 |     # uint8 addition take cares of rotation across boundaries
449 |     with np.errstate(over='ignore'):
450 |         np_h += np.uint8(hue_factor * 255)
451 |     h = Image.fromarray(np_h, 'L')
452 | 
453 |     img = Image.merge('HSV', (h, s, v)).convert(input_mode)
454 |     return img
455 | 
456 | 
457 | def adjust_gamma(img, gamma, gain=1):
458 |     """Perform gamma correction on an image.
459 |     Also known as Power Law Transform. Intensities in RGB mode are adjusted
460 |     based on the following equation:
461 |         I_out = 255 * gain * ((I_in / 255) ** gamma)
462 |     See https://en.wikipedia.org/wiki/Gamma_correction for more details.
463 |     Args:
464 |         img (PIL Image): PIL Image to be adjusted.
465 |         gamma (float): Non negative real number. gamma larger than 1 make the
466 |             shadows darker, while gamma smaller than 1 make dark regions
467 |             lighter.
468 |         gain (float): The constant multiplier.
469 |     """
470 |     if not _is_pil_image(img):
471 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
472 | 
473 |     if gamma < 0:
474 |         raise ValueError('Gamma should be a non-negative real number')
475 | 
476 |     input_mode = img.mode
477 |     img = img.convert('RGB')
478 | 
479 |     np_img = np.array(img, dtype=np.float32)
480 |     np_img = 255 * gain * ((np_img / 255) ** gamma)
481 |     np_img = np.uint8(np.clip(np_img, 0, 255))
482 | 
483 |     img = Image.fromarray(np_img, 'RGB').convert(input_mode)
484 |     return img
485 | 
486 | 
487 | def rotate(img, angle, resample=False, expand=False, center=None):
488 |     """Rotate the image by angle and then (optionally) translate it by (n_columns, n_rows)
489 |     Args:
490 |         img (PIL Image): PIL Image to be rotated.
491 |         angle ({float, int}): In degrees degrees counter clockwise order.
492 |         resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
493 |             An optional resampling filter.
494 |             See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
495 |             If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
496 |         expand (bool, optional): Optional expansion flag.
497 |             If true, expands the output image to make it large enough to hold the entire rotated image.
498 |             If false or omitted, make the output image the same size as the input image.
499 |             Note that the expand flag assumes rotation around the center and no translation.
500 |         center (2-tuple, optional): Optional center of rotation.
501 |             Origin is the upper left corner.
502 |             Default is the center of the image.
503 |     """
504 | 
505 |     if not _is_pil_image(img):
506 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
507 | 
508 |     return img.rotate(angle, resample, expand, center)
509 | 
510 | 
511 | def to_grayscale(img, num_output_channels=1):
512 |     """Convert image to grayscale version of image.
513 |     Args:
514 |         img (PIL Image): Image to be converted to grayscale.
515 |     Returns:
516 |         PIL Image:  Grayscale version of the image.
517 |                     if num_output_channels == 1 : returned image is single channel
518 |                     if num_output_channels == 3 : returned image is 3 channel with r == g == b
519 |     """
520 |     if not _is_pil_image(img):
521 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
522 | 
523 |     if num_output_channels == 1:
524 |         img = img.convert('L')
525 |     elif num_output_channels == 3:
526 |         img = img.convert('L')
527 |         np_img = np.array(img, dtype=np.uint8)
528 |         np_img = np.dstack([np_img, np_img, np_img])
529 |         img = Image.fromarray(np_img, 'RGB')
530 |     else:
531 |         raise ValueError('num_output_channels should be either 1 or 3')
532 | 
533 |     return img
534 | 


--------------------------------------------------------------------------------
/emotion-detection/transforms.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import torch
  3 | import math
  4 | import random
  5 | from PIL import Image, ImageOps, ImageEnhance
  6 | try:
  7 |     import accimage
  8 | except ImportError:
  9 |     accimage = None
 10 | import numpy as np
 11 | import numbers
 12 | import types
 13 | import collections
 14 | import warnings
 15 | 
 16 | import functional as F
 17 | 
 18 | __all__ = ["Compose", "ToTensor", "ToPILImage", "Normalize", "Resize", "Scale", "CenterCrop", "Pad",
 19 |            "Lambda", "RandomCrop", "RandomHorizontalFlip", "RandomVerticalFlip", "RandomResizedCrop",
 20 |            "RandomSizedCrop", "FiveCrop", "TenCrop", "LinearTransformation", "ColorJitter", "RandomRotation",
 21 |            "Grayscale", "RandomGrayscale"]
 22 | 
 23 | 
 24 | class Compose(object):
 25 |     """Composes several transforms together.
 26 |     Args:
 27 |         transforms (list of ``Transform`` objects): list of transforms to compose.
 28 |     Example:
 29 |         >>> transforms.Compose([
 30 |         >>>     transforms.CenterCrop(10),
 31 |         >>>     transforms.ToTensor(),
 32 |         >>> ])
 33 |     """
 34 | 
 35 |     def __init__(self, transforms):
 36 |         self.transforms = transforms
 37 | 
 38 |     def __call__(self, img):
 39 |         for t in self.transforms:
 40 |             img = t(img)
 41 |         return img
 42 | 
 43 | 
 44 | class ToTensor(object):
 45 |     """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
 46 |     Converts a PIL Image or numpy.ndarray (H x W x C) in the range
 47 |     [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0].
 48 |     """
 49 | 
 50 |     def __call__(self, pic):
 51 |         """
 52 |         Args:
 53 |             pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
 54 |         Returns:
 55 |             Tensor: Converted image.
 56 |         """
 57 |         return F.to_tensor(pic)
 58 | 
 59 | 
 60 | class ToPILImage(object):
 61 |     """Convert a tensor or an ndarray to PIL Image.
 62 |     Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape
 63 |     H x W x C to a PIL Image while preserving the value range.
 64 |     Args:
 65 |         mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
 66 |             If ``mode`` is ``None`` (default) there are some assumptions made about the input data:
 67 |             1. If the input has 3 channels, the ``mode`` is assumed to be ``RGB``.
 68 |             2. If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``.
 69 |             3. If the input has 1 channel, the ``mode`` is determined by the data type (i,e,
 70 |             ``int``, ``float``, ``short``).
 71 |     .. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes
 72 |     """
 73 |     def __init__(self, mode=None):
 74 |         self.mode = mode
 75 | 
 76 |     def __call__(self, pic):
 77 |         """
 78 |         Args:
 79 |             pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
 80 |         Returns:
 81 |             PIL Image: Image converted to PIL Image.
 82 |         """
 83 |         return F.to_pil_image(pic, self.mode)
 84 | 
 85 | 
 86 | class Normalize(object):
 87 |     """Normalize an tensor image with mean and standard deviation.
 88 |     Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
 89 |     will normalize each channel of the input ``torch.*Tensor`` i.e.
 90 |     ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
 91 |     Args:
 92 |         mean (sequence): Sequence of means for each channel.
 93 |         std (sequence): Sequence of standard deviations for each channel.
 94 |     """
 95 | 
 96 |     def __init__(self, mean, std):
 97 |         self.mean = mean
 98 |         self.std = std
 99 | 
100 |     def __call__(self, tensor):
101 |         """
102 |         Args:
103 |             tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
104 |         Returns:
105 |             Tensor: Normalized Tensor image.
106 |         """
107 |         return F.normalize(tensor, self.mean, self.std)
108 | 
109 | 
110 | class Resize(object):
111 |     """Resize the input PIL Image to the given size.
112 |     Args:
113 |         size (sequence or int): Desired output size. If size is a sequence like
114 |             (h, w), output size will be matched to this. If size is an int,
115 |             smaller edge of the image will be matched to this number.
116 |             i.e, if height > width, then image will be rescaled to
117 |             (size * height / width, size)
118 |         interpolation (int, optional): Desired interpolation. Default is
119 |             ``PIL.Image.BILINEAR``
120 |     """
121 | 
122 |     def __init__(self, size, interpolation=Image.BILINEAR):
123 |         assert isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)
124 |         self.size = size
125 |         self.interpolation = interpolation
126 | 
127 |     def __call__(self, img):
128 |         """
129 |         Args:
130 |             img (PIL Image): Image to be scaled.
131 |         Returns:
132 |             PIL Image: Rescaled image.
133 |         """
134 |         return F.resize(img, self.size, self.interpolation)
135 | 
136 | 
137 | class Scale(Resize):
138 |     """
139 |     Note: This transform is deprecated in favor of Resize.
140 |     """
141 |     def __init__(self, *args, **kwargs):
142 |         warnings.warn("The use of the transforms.Scale transform is deprecated, " +
143 |                       "please use transforms.Resize instead.")
144 |         super(Scale, self).__init__(*args, **kwargs)
145 | 
146 | 
147 | class CenterCrop(object):
148 |     """Crops the given PIL Image at the center.
149 |     Args:
150 |         size (sequence or int): Desired output size of the crop. If size is an
151 |             int instead of sequence like (h, w), a square crop (size, size) is
152 |             made.
153 |     """
154 | 
155 |     def __init__(self, size):
156 |         if isinstance(size, numbers.Number):
157 |             self.size = (int(size), int(size))
158 |         else:
159 |             self.size = size
160 | 
161 |     def __call__(self, img):
162 |         """
163 |         Args:
164 |             img (PIL Image): Image to be cropped.
165 |         Returns:
166 |             PIL Image: Cropped image.
167 |         """
168 |         return F.center_crop(img, self.size)
169 | 
170 | 
171 | class Pad(object):
172 |     """Pad the given PIL Image on all sides with the given "pad" value.
173 |     Args:
174 |         padding (int or tuple): Padding on each border. If a single int is provided this
175 |             is used to pad all borders. If tuple of length 2 is provided this is the padding
176 |             on left/right and top/bottom respectively. If a tuple of length 4 is provided
177 |             this is the padding for the left, top, right and bottom borders
178 |             respectively.
179 |         fill: Pixel fill value. Default is 0. If a tuple of
180 |             length 3, it is used to fill R, G, B channels respectively.
181 |     """
182 | 
183 |     def __init__(self, padding, fill=0):
184 |         assert isinstance(padding, (numbers.Number, tuple))
185 |         assert isinstance(fill, (numbers.Number, str, tuple))
186 |         if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]:
187 |             raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " +
188 |                              "{} element tuple".format(len(padding)))
189 | 
190 |         self.padding = padding
191 |         self.fill = fill
192 | 
193 |     def __call__(self, img):
194 |         """
195 |         Args:
196 |             img (PIL Image): Image to be padded.
197 |         Returns:
198 |             PIL Image: Padded image.
199 |         """
200 |         return F.pad(img, self.padding, self.fill)
201 | 
202 | 
203 | class Lambda(object):
204 |     """Apply a user-defined lambda as a transform.
205 |     Args:
206 |         lambd (function): Lambda/function to be used for transform.
207 |     """
208 | 
209 |     def __init__(self, lambd):
210 |         assert isinstance(lambd, types.LambdaType)
211 |         self.lambd = lambd
212 | 
213 |     def __call__(self, img):
214 |         return self.lambd(img)
215 | 
216 | 
217 | class RandomCrop(object):
218 |     """Crop the given PIL Image at a random location.
219 |     Args:
220 |         size (sequence or int): Desired output size of the crop. If size is an
221 |             int instead of sequence like (h, w), a square crop (size, size) is
222 |             made.
223 |         padding (int or sequence, optional): Optional padding on each border
224 |             of the image. Default is 0, i.e no padding. If a sequence of length
225 |             4 is provided, it is used to pad left, top, right, bottom borders
226 |             respectively.
227 |     """
228 | 
229 |     def __init__(self, size, padding=0):
230 |         if isinstance(size, numbers.Number):
231 |             self.size = (int(size), int(size))
232 |         else:
233 |             self.size = size
234 |         self.padding = padding
235 | 
236 |     @staticmethod
237 |     def get_params(img, output_size):
238 |         """Get parameters for ``crop`` for a random crop.
239 |         Args:
240 |             img (PIL Image): Image to be cropped.
241 |             output_size (tuple): Expected output size of the crop.
242 |         Returns:
243 |             tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
244 |         """
245 |         w, h = img.size
246 |         th, tw = output_size
247 |         if w == tw and h == th:
248 |             return 0, 0, h, w
249 | 
250 |         i = random.randint(0, h - th)
251 |         j = random.randint(0, w - tw)
252 |         return i, j, th, tw
253 | 
254 |     def __call__(self, img):
255 |         """
256 |         Args:
257 |             img (PIL Image): Image to be cropped.
258 |         Returns:
259 |             PIL Image: Cropped image.
260 |         """
261 |         if self.padding > 0:
262 |             img = F.pad(img, self.padding)
263 | 
264 |         i, j, h, w = self.get_params(img, self.size)
265 | 
266 |         return F.crop(img, i, j, h, w)
267 | 
268 | 
269 | class RandomHorizontalFlip(object):
270 |     """Horizontally flip the given PIL Image randomly with a probability of 0.5."""
271 | 
272 |     def __call__(self, img):
273 |         """
274 |         Args:
275 |             img (PIL Image): Image to be flipped.
276 |         Returns:
277 |             PIL Image: Randomly flipped image.
278 |         """
279 |         if random.random() < 0.5:
280 |             return F.hflip(img)
281 |         return img
282 | 
283 | 
284 | class RandomVerticalFlip(object):
285 |     """Vertically flip the given PIL Image randomly with a probability of 0.5."""
286 | 
287 |     def __call__(self, img):
288 |         """
289 |         Args:
290 |             img (PIL Image): Image to be flipped.
291 |         Returns:
292 |             PIL Image: Randomly flipped image.
293 |         """
294 |         if random.random() < 0.5:
295 |             return F.vflip(img)
296 |         return img
297 | 
298 | 
299 | class RandomResizedCrop(object):
300 |     """Crop the given PIL Image to random size and aspect ratio.
301 |     A crop of random size (default: of 0.08 to 1.0) of the original size and a random
302 |     aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
303 |     is finally resized to given size.
304 |     This is popularly used to train the Inception networks.
305 |     Args:
306 |         size: expected output size of each edge
307 |         scale: range of size of the origin size cropped
308 |         ratio: range of aspect ratio of the origin aspect ratio cropped
309 |         interpolation: Default: PIL.Image.BILINEAR
310 |     """
311 | 
312 |     def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR):
313 |         self.size = (size, size)
314 |         self.interpolation = interpolation
315 |         self.scale = scale
316 |         self.ratio = ratio
317 | 
318 |     @staticmethod
319 |     def get_params(img, scale, ratio):
320 |         """Get parameters for ``crop`` for a random sized crop.
321 |         Args:
322 |             img (PIL Image): Image to be cropped.
323 |             scale (tuple): range of size of the origin size cropped
324 |             ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
325 |         Returns:
326 |             tuple: params (i, j, h, w) to be passed to ``crop`` for a random
327 |                 sized crop.
328 |         """
329 |         for attempt in range(10):
330 |             area = img.size[0] * img.size[1]
331 |             target_area = random.uniform(*scale) * area
332 |             aspect_ratio = random.uniform(*ratio)
333 | 
334 |             w = int(round(math.sqrt(target_area * aspect_ratio)))
335 |             h = int(round(math.sqrt(target_area / aspect_ratio)))
336 | 
337 |             if random.random() < 0.5:
338 |                 w, h = h, w
339 | 
340 |             if w <= img.size[0] and h <= img.size[1]:
341 |                 i = random.randint(0, img.size[1] - h)
342 |                 j = random.randint(0, img.size[0] - w)
343 |                 return i, j, h, w
344 | 
345 |         # Fallback
346 |         w = min(img.size[0], img.size[1])
347 |         i = (img.size[1] - w) // 2
348 |         j = (img.size[0] - w) // 2
349 |         return i, j, w, w
350 | 
351 |     def __call__(self, img):
352 |         """
353 |         Args:
354 |             img (PIL Image): Image to be flipped.
355 |         Returns:
356 |             PIL Image: Randomly cropped and resize image.
357 |         """
358 |         i, j, h, w = self.get_params(img, self.scale, self.ratio)
359 |         return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
360 | 
361 | 
362 | class RandomSizedCrop(RandomResizedCrop):
363 |     """
364 |     Note: This transform is deprecated in favor of RandomResizedCrop.
365 |     """
366 |     def __init__(self, *args, **kwargs):
367 |         warnings.warn("The use of the transforms.RandomSizedCrop transform is deprecated, " +
368 |                       "please use transforms.RandomResizedCrop instead.")
369 |         super(RandomSizedCrop, self).__init__(*args, **kwargs)
370 | 
371 | 
372 | class FiveCrop(object):
373 |     """Crop the given PIL Image into four corners and the central crop
374 |     .. Note::
375 |          This transform returns a tuple of images and there may be a mismatch in the number of
376 |          inputs and targets your Dataset returns. See below for an example of how to deal with
377 |          this.
378 |     Args:
379 |          size (sequence or int): Desired output size of the crop. If size is an ``int``
380 |             instead of sequence like (h, w), a square crop of size (size, size) is made.
381 |     Example:
382 |          >>> transform = Compose([
383 |          >>>    FiveCrop(size), # this is a list of PIL Images
384 |          >>>    Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor
385 |          >>> ])
386 |          >>> #In your test loop you can do the following:
387 |          >>> input, target = batch # input is a 5d tensor, target is 2d
388 |          >>> bs, ncrops, c, h, w = input.size()
389 |          >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops
390 |          >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops
391 |     """
392 | 
393 |     def __init__(self, size):
394 |         self.size = size
395 |         if isinstance(size, numbers.Number):
396 |             self.size = (int(size), int(size))
397 |         else:
398 |             assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
399 |             self.size = size
400 | 
401 |     def __call__(self, img):
402 |         return F.five_crop(img, self.size)
403 | 
404 | 
405 | class TenCrop(object):
406 |     """Crop the given PIL Image into four corners and the central crop plus the flipped version of
407 |     these (horizontal flipping is used by default)
408 |     .. Note::
409 |          This transform returns a tuple of images and there may be a mismatch in the number of
410 |          inputs and targets your Dataset returns. See below for an example of how to deal with
411 |          this.
412 |     Args:
413 |         size (sequence or int): Desired output size of the crop. If size is an
414 |             int instead of sequence like (h, w), a square crop (size, size) is
415 |             made.
416 |         vertical_flip(bool): Use vertical flipping instead of horizontal
417 |     Example:
418 |          >>> transform = Compose([
419 |          >>>    TenCrop(size), # this is a list of PIL Images
420 |          >>>    Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor
421 |          >>> ])
422 |          >>> #In your test loop you can do the following:
423 |          >>> input, target = batch # input is a 5d tensor, target is 2d
424 |          >>> bs, ncrops, c, h, w = input.size()
425 |          >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops
426 |          >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops
427 |     """
428 | 
429 |     def __init__(self, size, vertical_flip=False):
430 |         self.size = size
431 |         if isinstance(size, numbers.Number):
432 |             self.size = (int(size), int(size))
433 |         else:
434 |             assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
435 |             self.size = size
436 |         self.vertical_flip = vertical_flip
437 | 
438 |     def __call__(self, img):
439 |         return F.ten_crop(img, self.size, self.vertical_flip)
440 | 
441 | 
442 | class LinearTransformation(object):
443 |     """Transform a tensor image with a square transformation matrix computed
444 |     offline.
445 |     Given transformation_matrix, will flatten the torch.*Tensor, compute the dot
446 |     product with the transformation matrix and reshape the tensor to its
447 |     original shape.
448 |     Applications:
449 |     - whitening: zero-center the data, compute the data covariance matrix
450 |                  [D x D] with np.dot(X.T, X), perform SVD on this matrix and
451 |                  pass it as transformation_matrix.
452 |     Args:
453 |         transformation_matrix (Tensor): tensor [D x D], D = C x H x W
454 |     """
455 | 
456 |     def __init__(self, transformation_matrix):
457 |         if transformation_matrix.size(0) != transformation_matrix.size(1):
458 |             raise ValueError("transformation_matrix should be square. Got " +
459 |                              "[{} x {}] rectangular matrix.".format(*transformation_matrix.size()))
460 |         self.transformation_matrix = transformation_matrix
461 | 
462 |     def __call__(self, tensor):
463 |         """
464 |         Args:
465 |             tensor (Tensor): Tensor image of size (C, H, W) to be whitened.
466 |         Returns:
467 |             Tensor: Transformed image.
468 |         """
469 |         if tensor.size(0) * tensor.size(1) * tensor.size(2) != self.transformation_matrix.size(0):
470 |             raise ValueError("tensor and transformation matrix have incompatible shape." +
471 |                              "[{} x {} x {}] != ".format(*tensor.size()) +
472 |                              "{}".format(self.transformation_matrix.size(0)))
473 |         flat_tensor = tensor.view(1, -1)
474 |         transformed_tensor = torch.mm(flat_tensor, self.transformation_matrix)
475 |         tensor = transformed_tensor.view(tensor.size())
476 |         return tensor
477 | 
478 | 
479 | class ColorJitter(object):
480 |     """Randomly change the brightness, contrast and saturation of an image.
481 |     Args:
482 |         brightness (float): How much to jitter brightness. brightness_factor
483 |             is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
484 |         contrast (float): How much to jitter contrast. contrast_factor
485 |             is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
486 |         saturation (float): How much to jitter saturation. saturation_factor
487 |             is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
488 |         hue(float): How much to jitter hue. hue_factor is chosen uniformly from
489 |             [-hue, hue]. Should be >=0 and <= 0.5.
490 |     """
491 |     def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
492 |         self.brightness = brightness
493 |         self.contrast = contrast
494 |         self.saturation = saturation
495 |         self.hue = hue
496 | 
497 |     @staticmethod
498 |     def get_params(brightness, contrast, saturation, hue):
499 |         """Get a randomized transform to be applied on image.
500 |         Arguments are same as that of __init__.
501 |         Returns:
502 |             Transform which randomly adjusts brightness, contrast and
503 |             saturation in a random order.
504 |         """
505 |         transforms = []
506 |         if brightness > 0:
507 |             brightness_factor = np.random.uniform(max(0, 1 - brightness), 1 + brightness)
508 |             transforms.append(Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))
509 | 
510 |         if contrast > 0:
511 |             contrast_factor = np.random.uniform(max(0, 1 - contrast), 1 + contrast)
512 |             transforms.append(Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))
513 | 
514 |         if saturation > 0:
515 |             saturation_factor = np.random.uniform(max(0, 1 - saturation), 1 + saturation)
516 |             transforms.append(Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))
517 | 
518 |         if hue > 0:
519 |             hue_factor = np.random.uniform(-hue, hue)
520 |             transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
521 | 
522 |         np.random.shuffle(transforms)
523 |         transform = Compose(transforms)
524 | 
525 |         return transform
526 | 
527 |     def __call__(self, img):
528 |         """
529 |         Args:
530 |             img (PIL Image): Input image.
531 |         Returns:
532 |             PIL Image: Color jittered image.
533 |         """
534 |         transform = self.get_params(self.brightness, self.contrast,
535 |                                     self.saturation, self.hue)
536 |         return transform(img)
537 | 
538 | 
539 | class RandomRotation(object):
540 |     """Rotate the image by angle.
541 |     Args:
542 |         degrees (sequence or float or int): Range of degrees to select from.
543 |             If degrees is a number instead of sequence like (min, max), the range of degrees
544 |             will be (-degrees, +degrees).
545 |         resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
546 |             An optional resampling filter.
547 |             See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
548 |             If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
549 |         expand (bool, optional): Optional expansion flag.
550 |             If true, expands the output to make it large enough to hold the entire rotated image.
551 |             If false or omitted, make the output image the same size as the input image.
552 |             Note that the expand flag assumes rotation around the center and no translation.
553 |         center (2-tuple, optional): Optional center of rotation.
554 |             Origin is the upper left corner.
555 |             Default is the center of the image.
556 |     """
557 | 
558 |     def __init__(self, degrees, resample=False, expand=False, center=None):
559 |         if isinstance(degrees, numbers.Number):
560 |             if degrees < 0:
561 |                 raise ValueError("If degrees is a single number, it must be positive.")
562 |             self.degrees = (-degrees, degrees)
563 |         else:
564 |             if len(degrees) != 2:
565 |                 raise ValueError("If degrees is a sequence, it must be of len 2.")
566 |             self.degrees = degrees
567 | 
568 |         self.resample = resample
569 |         self.expand = expand
570 |         self.center = center
571 | 
572 |     @staticmethod
573 |     def get_params(degrees):
574 |         """Get parameters for ``rotate`` for a random rotation.
575 |         Returns:
576 |             sequence: params to be passed to ``rotate`` for random rotation.
577 |         """
578 |         angle = np.random.uniform(degrees[0], degrees[1])
579 | 
580 |         return angle
581 | 
582 |     def __call__(self, img):
583 |         """
584 |             img (PIL Image): Image to be rotated.
585 |         Returns:
586 |             PIL Image: Rotated image.
587 |         """
588 | 
589 |         angle = self.get_params(self.degrees)
590 | 
591 |         return F.rotate(img, angle, self.resample, self.expand, self.center)
592 | 
593 | 
594 | class Grayscale(object):
595 |     """Convert image to grayscale.
596 |     Args:
597 |         num_output_channels (int): (1 or 3) number of channels desired for output image
598 |     Returns:
599 |         PIL Image: Grayscale version of the input.
600 |         - If num_output_channels == 1 : returned image is single channel
601 |         - If num_output_channels == 3 : returned image is 3 channel with r == g == b
602 |     """
603 | 
604 |     def __init__(self, num_output_channels=1):
605 |         self.num_output_channels = num_output_channels
606 | 
607 |     def __call__(self, img):
608 |         """
609 |         Args:
610 |             img (PIL Image): Image to be converted to grayscale.
611 |         Returns:
612 |             PIL Image: Randomly grayscaled image.
613 |         """
614 |         return F.to_grayscale(img, num_output_channels=self.num_output_channels)
615 | 
616 | 
617 | class RandomGrayscale(object):
618 |     """Randomly convert image to grayscale with a probability of p (default 0.1).
619 |     Args:
620 |         p (float): probability that image should be converted to grayscale.
621 |     Returns:
622 |         PIL Image: Grayscale version of the input image with probability p and unchanged
623 |         with probability (1-p).
624 |         - If input image is 1 channel: grayscale version is 1 channel
625 |         - If input image is 3 channel: grayscale version is 3 channel with r == g == b
626 |     """
627 | 
628 |     def __init__(self, p=0.1):
629 |         self.p = p
630 | 
631 |     def __call__(self, img):
632 |         """
633 |         Args:
634 |             img (PIL Image): Image to be converted to grayscale.
635 |         Returns:
636 |             PIL Image: Randomly grayscaled image.
637 |         """
638 |         num_output_channels = 1 if img.mode == 'L' else 3
639 |         if random.random() < self.p:
640 |             return F.to_grayscale(img, num_output_channels=num_output_channels)
641 |         return img
642 | 


--------------------------------------------------------------------------------