├── .gitignore
├── LICENSE
├── README.md
├── analysis.py
├── config.py
├── src
    ├── __init__.py
    ├── dataset.py
    ├── model.py
    └── prediction.py
└── train.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 hackiey
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # keypoints
 2 | A pytorch implementation of Google's paper &lt;Towards Accurate Multi-person Pose Estimation in the Wild>
 3 | 
 4 | # dataset
 5 | ## labels example
 6 | ```
 7 | [[x1,y1,v1,x2,y2,v2,...],[...]]
 8 | ```
 9 | where v = 1 (visible), v = 2 (not visible)
10 | 


--------------------------------------------------------------------------------
/analysis.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import torch
 3 | from torchvision import transforms
 4 | from torch.utils.data import DataLoader
 5 | from config import NUM_CLASSES, IMG_HEIGHT, IMG_WIDTH, IMG_SMALL_HEIGHT, IMG_SMALL_WIDTH, RADIUS, epochs, batch_size
 6 | from src.model import Keypoints
 7 | from src.dataset import KeypointsDataset, transform
 8 | from src.prediction import Prediction
 9 | from datetime import datetime
10 | import matplotlib.pyplot as plt
11 | from PIL import Image
12 | import numpy as np
13 | # model
14 | keypoints = Keypoints(NUM_CLASSES, img_height=IMG_HEIGHT, img_width=IMG_WIDTH)
15 | keypoints.load_state_dict(torch.load('../character_checkpoints/model_1_18_4.pth'))
16 | 
17 | # cuda
18 | use_cuda = torch.cuda.is_available()
19 | # use_cuda = False
20 | if use_cuda:
21 |     torch.cuda.set_device(2)
22 |     keypoints = keypoints.cuda()
23 | 
24 | prediction = Prediction(keypoints, NUM_CLASSES, IMG_HEIGHT, IMG_WIDTH, IMG_SMALL_HEIGHT, IMG_SMALL_WIDTH, use_cuda)
25 | transform = transform = transforms.Compose([
26 |     transforms.ToTensor()
27 | ])
28 | 
29 | img = Image.open('../data/test_cropped_humans/0.jpg')
30 | img = np.array(img)
31 | img_t = transform(img)
32 | img_t = img_t.cuda()
33 | result, keypoints = prediction.predict(img_t)
34 | 
35 | keypoints = keypoints.cpu().numpy()
36 | prediction.plot(img, result, keypoints[0])
37 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | 
 2 | NUM_CLASSES = 14
 3 | IMG_HEIGHT  = 353
 4 | IMG_WIDTH   = 257
 5 | 
 6 | IMG_SMALL_HEIGHT = 120
 7 | IMG_SMALL_WIDTH  = 96
 8 | 
 9 | RADIUS = 25
10 | 
11 | epochs = 10
12 | batch_size = 64


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hackiey/keypoints/7224d5870b81284d650f988ee7ddd1d0437407ca/src/__init__.py


--------------------------------------------------------------------------------
/src/dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | from torchvision import transforms, utils
 4 | from PIL import Image
 5 | import numpy as np
 6 | import pickle
 7 | import os
 8 | from datetime import datetime
 9 | 
10 | transform = transforms.Compose([transforms.ToTensor()])
11 | 
12 | class KeypointsDataset(Dataset):
13 |     def __init__(self, img_folder, labels, num_classes, img_height, img_width, radius, transform):
14 |         self.num_classes = num_classes
15 |         self.img_height = img_height
16 |         self.img_width = img_width
17 |         self.radius = radius         
18 |         self.transform = transform
19 | 
20 |         self.imgs = []
21 |         self.labels = labels
22 | 
23 |         for i in range(len(self.labels)):
24 |             self.imgs.append(os.path.join(img_folder, str(i)+'.jpg'))
25 |         
26 |         self.map_value = np.array([[np.linalg.norm([self.img_width - _x, self.img_height - _y]) 
27 |                           for _x in range(img_width * 2)] for _y in range(img_height * 2)])
28 |         
29 |         self.offsets_x_value = np.array([[self.img_width - _x for _x in range(self.img_width * 2)] 
30 |                                          for _y in range(self.img_height * 2)])
31 |         self.offsets_y_value = np.array([[self.img_height - _y for _x in range(self.img_width * 2)] 
32 |                                          for _y in range(self.img_height * 2)])
33 |         
34 |     def __getitem__(self, index):  
35 |        
36 |         starttime = datetime.now() 
37 |         img = self.transform(Image.open(self.imgs[index]))
38 |         labels = self.labels[index]
39 | 
40 |         visible = np.zeros(self.num_classes)
41 |         keypoints = np.zeros((self.num_classes, 2))      
42 |      
43 |         maps = np.zeros((self.num_classes, self.img_height, self.img_width), dtype='float32')
44 |         offsets_x = np.zeros((self.num_classes, self.img_height, self.img_width), dtype='float32')
45 |         offsets_y = np.zeros((self.num_classes, self.img_height, self.img_width), dtype='float32')
46 |         
47 |         for i in range(0, self.num_classes * 3, 3):
48 |             x = labels[i]
49 |             y = labels[i + 1]
50 |             
51 |             _i = i // 3
52 | 
53 |             if labels[i + 2] > 0:
54 |                 visible[_i] = 1
55 |             else:
56 |                 visible[_i] = 0
57 |             
58 |             keypoints[_i][0] = x
59 |             keypoints[_i][1] = y
60 | 
61 |             if x == 0 and y == 0:
62 |                 maps[_i] = np.zeros((self.img_height, self.img_width))
63 |                 continue
64 |             if self.img_height - y < 0 or self.img_width - x < 0:
65 |                 continue          
66 |             maps[_i] = self.map_value[self.img_height - y : self.img_height * 2 - y, 
67 |                                       self.img_width  - x : self.img_width * 2  - x]       
68 |             maps[_i][maps[_i] <= self.radius] = 1
69 |             maps[_i][maps[_i] >  self.radius] = 0
70 |             offsets_x[_i] = self.offsets_x_value[self.img_height - y : self.img_height * 2 - y, 
71 |                                                  self.img_width  - x : self.img_width * 2  - x]
72 |             offsets_y[_i] = self.offsets_y_value[self.img_height - y : self.img_height * 2 - y, 
73 |                                                  self.img_width  - x : self.img_width * 2  - x]      
74 |         return img, (maps, offsets_x, offsets_y), (visible, keypoints)
75 |     
76 |     def __len__(self):
77 |         return len(self.labels)
78 | 


--------------------------------------------------------------------------------
/src/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision
 3 | import torch.nn as nn
 4 | 
 5 | class Keypoints(nn.Module):
 6 |     def __init__(self, num_classes, img_height=353, img_width=257, resnet=18):
 7 |         super(Keypoints, self).__init__()
 8 |         
 9 |         self.num_classes = num_classes
10 |         self.num_outputs = num_classes * 3
11 |         self.img_height = img_height
12 |         self.img_width = img_width
13 |         
14 |         if resnet == 18:
15 |             self.resnet = torchvision.models.resnet18()
16 |             self.conv1by1 = nn.Conv2d(512, self.num_outputs, (1,1))
17 |         elif resnet == 101:
18 |             self.resnet = torchvision.models.resnet101()
19 |             self.conv1by1 = nn.Conv2d(2048, self.num_outputs, (1,1))
20 | 
21 |         self.resnet = nn.Sequential(*list(self.resnet.children())[:-2])
22 |         self.resnet = self.resnet
23 |             
24 |         self.conv_transpose = nn.ConvTranspose2d(self.num_outputs, self.num_outputs, kernel_size=32, stride=8)
25 |         self.sigmoid = torch.nn.Sigmoid()
26 |         
27 |     def forward(self, x):
28 |         x = self.resnet(x)
29 |         x = self.conv1by1(x)
30 |         x = self.conv_transpose(x)
31 |         output = nn.Upsample(size=(self.img_height, self.img_width), mode='bilinear')(x)
32 |         
33 |         maps = self.sigmoid(output[:,:self.num_classes, :, :])
34 |         offsets_x = output[:, self.num_classes:2*self.num_classes, :, :]
35 |         offsets_y = output[:, 2*self.num_classes:3*self.num_classes, :, :]
36 |         
37 |         maps_pred = self.sigmoid(x[:,:self.num_classes, :, :])
38 |         offsets_x_pred = x[:, self.num_classes:2*self.num_classes, :, :]
39 |         offsets_y_pred = x[:, 2*self.num_classes:3*self.num_classes, :, :]
40 | 
41 |         return (maps, offsets_x, offsets_y), (maps_pred, offsets_x_pred, offsets_y_pred)
42 | 


--------------------------------------------------------------------------------
/src/prediction.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Variable
  3 | import matplotlib.pyplot as plt
  4 | from datetime import datetime
  5 | import numpy as np
  6 | class Prediction:
  7 |     def __init__(self, model, num_classes, img_height, img_width, img_small_height, img_small_width, use_cuda):
  8 |         self.model = model
  9 |         
 10 |         self.num_classes = num_classes
 11 |         self.img_height  = img_height
 12 |         self.img_width   = img_width
 13 |         
 14 |         self.img_small_height = img_small_height
 15 |         self.img_small_width  = img_small_width
 16 |         
 17 |         self.use_cuda = use_cuda
 18 |         
 19 |         self.offset_x_ij = torch.arange(0, self.img_small_width) \
 20 |             .repeat(self.img_small_height).view(1,1,self.img_small_height, self.img_small_width)
 21 |         self.offset_y_ij = torch.arange(0, self.img_small_height) \
 22 |             .repeat(self.img_small_width).view(self.img_small_width, self.img_small_height).t().contiguous() \
 23 |             .view(1,1,self.img_small_height, self.img_small_width)
 24 |         
 25 |         if self.use_cuda:
 26 |             self.offset_x_ij = self.offset_x_ij.cuda()
 27 |             self.offset_y_ij = self.offset_y_ij.cuda()
 28 |         
 29 |         self.offset_x_add = (0 - self.offset_x_ij).view(self.img_small_height, self.img_small_width, 1, 1)
 30 |         self.offset_y_add = (0 - self.offset_y_ij).view(self.img_small_height, self.img_small_width, 1, 1)
 31 |         
 32 |         self.offset_x_ij = (self.offset_x_ij + self.offset_x_add) * self.img_width / self.img_small_width
 33 |         self.offset_y_ij = (self.offset_y_ij + self.offset_y_add) * self.img_height/ self.img_small_height
 34 |         
 35 |     def predict(self, imgs):
 36 |         # img: torch.Tensor(3, height, width) 
 37 |         if len(imgs.shape) == 4:
 38 |             imgs = imgs.view(-1, imgs.shape[1], imgs.shape[2], imgs.shape[3])    
 39 |         elif len(imgs.shape) == 3:
 40 |             imgs = imgs.view(-1, imgs.shape[0], imgs.shape[1], imgs.shape[2])
 41 |             
 42 |         result, (maps_pred, offsets_x_pred, offsets_y_pred) = self.model.forward(Variable(imgs))
 43 |         maps_pred = maps_pred.data
 44 |         offsets_x_pred = offsets_x_pred.data
 45 |         offsets_y_pred = offsets_y_pred.data
 46 |         keypoints = torch.zeros(imgs.shape[0], self.num_classes, 2, 1)
 47 |         keypoints = keypoints.type(torch.cuda.LongTensor)
 48 |         for i in range(imgs.shape[0]):
 49 |             for k in range(self.num_classes):
 50 |                 offsets_x_ij = self.offset_x_ij + offsets_x_pred[i][k]
 51 |                 offsets_y_ij = self.offset_y_ij + offsets_y_pred[i][k]
 52 |                 distances_ij = torch.sqrt(offsets_x_ij * offsets_x_ij + offsets_y_ij * offsets_y_ij)
 53 | 
 54 |                 distances_ij[distances_ij > 1] = 1
 55 |                 distances_ij = 1 - distances_ij
 56 |                 score_ij = (distances_ij * maps_pred[i][k]).sum(3).sum(2)
 57 | 
 58 |                 v1,index_y = score_ij.max(0)
 59 |                 v2,index_x = v1.max(0)
 60 |                 
 61 |                 keypoints[i][k][0] = index_y[index_x]
 62 |                 keypoints[i][k][1] = index_x
 63 |                 
 64 |         keypoints = keypoints.view(imgs.shape[0], self.num_classes, 2)
 65 |         
 66 |         maps_array = result[0]
 67 |         offsets_x_array = result[1]
 68 |         offsets_y_array = result[2]
 69 |         
 70 |         return (maps_array, offsets_x_array, offsets_y_array), keypoints
 71 |     
 72 |     def plot(self, plt_img, result, keypoints):
 73 |         
 74 |         maps_array = result[0]
 75 |         offsets_x_array = result[1]
 76 |         offsets_y_array = result[2]
 77 |         if self.use_cuda:
 78 |             maps_array = maps_array.cpu().data.numpy()
 79 |             offsets_x_array = offsets_x_array.cpu().data.numpy()
 80 |             offsets_y_array = offsets_y_array.cpu().data.numpy()
 81 |         else:
 82 |             maps_array = maps_array.data.numpy()
 83 |             offsets_x_array = offsets_x_array.data.numpy()
 84 |             offsets_y_array = offsets_y_array.data.numpy()
 85 |         
 86 |         plt.imshow(plt_img)
 87 |         
 88 |         for i in range(self.num_classes):
 89 | 
 90 |             heatmap = plt_img.copy()            
 91 |             plt.figure(figsize=(12, 9))
 92 |             
 93 |             # heatmap
 94 |             plt.subplot(1, 4, 1)
 95 |             plt.title(str(i))
 96 |             indexes = maps_array[0][i] > np.percentile(maps_array[0][i], 98.5)
 97 |             indexes = maps_array[0][i] > 0.5
 98 |             
 99 |             heatmap[indexes] = maps_array[0][i].repeat(3).reshape((self.img_height, self.img_width, 3))[indexes]
100 |             plt.imshow(heatmap)
101 |             
102 |             # offsets            
103 |             offsets = np.sqrt(offsets_x_array[0][i] * offsets_x_array[0][i] + offsets_y_array[0][i] * offsets_y_array[0][i])
104 |             offsets_repeated = offsets.repeat(3)
105 |             
106 |             plt.subplot(1, 4, 2)
107 |             plt.title(str(i))
108 |             offsets_array = offsets_repeated.reshape((self.img_height, self.img_width, 3))
109 |             offsets_array = offsets_array / offsets_array.max()
110 |             plt.imshow(offsets_array)
111 |             
112 |             # offsets disk
113 |             plt.subplot(1, 4, 3)
114 |             plt.title(str(i))
115 |             offsets_array = np.zeros((self.img_height, self.img_width, 3))
116 |             offsets_array[indexes] = offsets_repeated.reshape((self.img_height, self.img_width, 3))[indexes]
117 |             offsets_array = offsets_array / offsets_array.max()
118 |             plt.imshow(offsets_array)
119 |             
120 |             # final result
121 |             plt.subplot(1, 4, 4)
122 |             plt.imshow(plt_img)
123 |             # 0.91 1.1
124 |             plt.scatter(keypoints[i][1] * self.img_height / self.img_small_height, keypoints[i][0] * self.img_width / self.img_small_width)
125 |             plt.show()
126 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.optim as optim
 5 | import numpy as np
 6 | from torch.autograd import Variable
 7 | from torch.utils.data import DataLoader
 8 | from config import NUM_CLASSES, IMG_HEIGHT, IMG_WIDTH, RADIUS, epochs, batch_size
 9 | from src.model import Keypoints
10 | from src.dataset import KeypointsDataset, transform
11 | 
12 | def custom_loss(predictions_maps, maps, predictions_offsets_x, offsets_x, predictions_offsets_y, offsets_y):
13 |     
14 |     loss_h = bceLoss(predictions_maps, maps)
15 | 
16 |     distance_x = predictions_offsets_x[maps==1] - offsets_x[maps==1]
17 |     distance_y = predictions_offsets_y[maps==1] - offsets_y[maps==1]
18 |     distances = torch.sqrt(distance_x * distance_x + distance_y * distance_y)
19 |     zero_distances = Variable(
20 |         torch.zeros(distance_x.shape).cuda() if use_cuda else torch.zeros(distance_x.shape))
21 |     loss_o = smoothL1Loss(distances, zero_distances)
22 |     loss = 4 * loss_h + loss_o
23 |     return loss
24 | 
25 | def forward(sample_batched, model):
26 |     X = sample_batched[0]
27 |     maps, offsets_x, offsets_y = sample_batched[1]
28 | 
29 |     maps = Variable(maps.cuda() if use_cuda else maps)
30 |     offsets_x = Variable(offsets_x.cuda() if use_cuda else offsets_x)
31 |     offsets_y = Variable(offsets_y.cuda() if use_cuda else offsets_y)
32 | 
33 |     # forward
34 |     X = Variable(X.cuda() if use_cuda else X)
35 |     (predictions_maps, predictions_offsets_x, predictions_offsets_y), pred = model.forward(X)
36 |     
37 |     return custom_loss(predictions_maps, maps, predictions_offsets_x, offsets_x, predictions_offsets_y, offsets_y)
38 | 
39 | def fit(train_data, test_data, model, loss_function, epochs, checkpoint_path = ''):
40 |     for epoch in range(epochs):
41 |         # training 
42 |         train_loss = 0.0
43 |         for i_batch, sample_batched in enumerate(train_data):
44 |             optimizer.zero_grad()
45 |             
46 |             loss = forward(sample_batched, model)
47 |             
48 |             loss.backward()
49 |             optimizer.step()
50 | 
51 |             train_loss += loss.data[0]
52 | 
53 |             print('[%d, %5d] loss: %.3f' % (epoch + 1, i_batch + 1, loss.data[0]), end='')
54 |             print('\r', end='')
55 |         print('train loss:', train_loss / i_batch)
56 |         
57 |         test_loss = 0.0
58 |         for i_batch, sample_batched in enumerate(test_data):
59 |             loss = forward(sample_batched, model)
60 |             test_loss += loss.data[0]
61 |         print('test loss:', test_loss / i_batch)
62 |         
63 |         torch.save(keypoints.state_dict(), checkpoint_path + 'model_2_1_' + str(epoch)+'.pth')
64 | 
65 | # dataset
66 | with open('../data/annotation/annotation_train_cropped_humans.pkl', 'rb') as f:
67 |     train_labels = pickle.load(f)
68 | train_dataset = KeypointsDataset('../data/train_cropped_humans/',
69 |                            train_labels, NUM_CLASSES, IMG_HEIGHT, IMG_WIDTH, RADIUS, transform=transform)
70 | train_data = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=32)
71 | 
72 | with open('../data/annotation/annotation_test_cropped_humans.pkl', 'rb') as f:
73 |     test_labels = pickle.load(f)
74 | test_dataset = KeypointsDataset('../data/test_cropped_humans/',
75 |                            test_labels, NUM_CLASSES, IMG_HEIGHT, IMG_WIDTH, RADIUS, transform=transform)
76 | test_data = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=32)
77 | 
78 | use_cuda = torch.cuda.is_available()
79 | # use_cuda = False
80 | Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
81 | if use_cuda:
82 |     torch.cuda.set_device(2)
83 | 
84 | # loss
85 | smoothL1Loss = nn.SmoothL1Loss()
86 | bceLoss = nn.BCELoss()
87 | # model
88 | keypoints = Keypoints(NUM_CLASSES)
89 | keypoints = keypoints.cuda() if use_cuda else keypoints
90 | # optimizer
91 | optimizer = optim.Adam(keypoints.parameters(), lr=0.0001)
92 | 
93 | fit(train_data, test_data, keypoints, custom_loss, epochs=200, checkpoint_path='../checkpoints/')
94 | 


--------------------------------------------------------------------------------