├── .gitignore ├── LICENSE ├── README.md ├── analysis.py ├── config.py ├── src ├── __init__.py ├── dataset.py ├── model.py └── prediction.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 hackiey 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # keypoints 2 | A pytorch implementation of Google's paper <Towards Accurate Multi-person Pose Estimation in the Wild> 3 | 4 | # dataset 5 | ## labels example 6 | ``` 7 | [[x1,y1,v1,x2,y2,v2,...],[...]] 8 | ``` 9 | where v = 1 (visible), v = 2 (not visible) 10 | -------------------------------------------------------------------------------- /analysis.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import torch 3 | from torchvision import transforms 4 | from torch.utils.data import DataLoader 5 | from config import NUM_CLASSES, IMG_HEIGHT, IMG_WIDTH, IMG_SMALL_HEIGHT, IMG_SMALL_WIDTH, RADIUS, epochs, batch_size 6 | from src.model import Keypoints 7 | from src.dataset import KeypointsDataset, transform 8 | from src.prediction import Prediction 9 | from datetime import datetime 10 | import matplotlib.pyplot as plt 11 | from PIL import Image 12 | import numpy as np 13 | # model 14 | keypoints = Keypoints(NUM_CLASSES, img_height=IMG_HEIGHT, img_width=IMG_WIDTH) 15 | keypoints.load_state_dict(torch.load('../character_checkpoints/model_1_18_4.pth')) 16 | 17 | # cuda 18 | use_cuda = torch.cuda.is_available() 19 | # use_cuda = False 20 | if use_cuda: 21 | torch.cuda.set_device(2) 22 | keypoints = keypoints.cuda() 23 | 24 | prediction = Prediction(keypoints, NUM_CLASSES, IMG_HEIGHT, IMG_WIDTH, IMG_SMALL_HEIGHT, IMG_SMALL_WIDTH, use_cuda) 25 | transform = transform = transforms.Compose([ 26 | transforms.ToTensor() 27 | ]) 28 | 29 | img = Image.open('../data/test_cropped_humans/0.jpg') 30 | img = np.array(img) 31 | img_t = transform(img) 32 | img_t = img_t.cuda() 33 | result, keypoints = prediction.predict(img_t) 34 | 35 | keypoints = keypoints.cpu().numpy() 36 | prediction.plot(img, result, keypoints[0]) 37 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | 2 | NUM_CLASSES = 14 3 | IMG_HEIGHT = 353 4 | IMG_WIDTH = 257 5 | 6 | IMG_SMALL_HEIGHT = 120 7 | IMG_SMALL_WIDTH = 96 8 | 9 | RADIUS = 25 10 | 11 | epochs = 10 12 | batch_size = 64 -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackiey/keypoints/7224d5870b81284d650f988ee7ddd1d0437407ca/src/__init__.py -------------------------------------------------------------------------------- /src/dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset 3 | from torchvision import transforms, utils 4 | from PIL import Image 5 | import numpy as np 6 | import pickle 7 | import os 8 | from datetime import datetime 9 | 10 | transform = transforms.Compose([transforms.ToTensor()]) 11 | 12 | class KeypointsDataset(Dataset): 13 | def __init__(self, img_folder, labels, num_classes, img_height, img_width, radius, transform): 14 | self.num_classes = num_classes 15 | self.img_height = img_height 16 | self.img_width = img_width 17 | self.radius = radius 18 | self.transform = transform 19 | 20 | self.imgs = [] 21 | self.labels = labels 22 | 23 | for i in range(len(self.labels)): 24 | self.imgs.append(os.path.join(img_folder, str(i)+'.jpg')) 25 | 26 | self.map_value = np.array([[np.linalg.norm([self.img_width - _x, self.img_height - _y]) 27 | for _x in range(img_width * 2)] for _y in range(img_height * 2)]) 28 | 29 | self.offsets_x_value = np.array([[self.img_width - _x for _x in range(self.img_width * 2)] 30 | for _y in range(self.img_height * 2)]) 31 | self.offsets_y_value = np.array([[self.img_height - _y for _x in range(self.img_width * 2)] 32 | for _y in range(self.img_height * 2)]) 33 | 34 | def __getitem__(self, index): 35 | 36 | starttime = datetime.now() 37 | img = self.transform(Image.open(self.imgs[index])) 38 | labels = self.labels[index] 39 | 40 | visible = np.zeros(self.num_classes) 41 | keypoints = np.zeros((self.num_classes, 2)) 42 | 43 | maps = np.zeros((self.num_classes, self.img_height, self.img_width), dtype='float32') 44 | offsets_x = np.zeros((self.num_classes, self.img_height, self.img_width), dtype='float32') 45 | offsets_y = np.zeros((self.num_classes, self.img_height, self.img_width), dtype='float32') 46 | 47 | for i in range(0, self.num_classes * 3, 3): 48 | x = labels[i] 49 | y = labels[i + 1] 50 | 51 | _i = i // 3 52 | 53 | if labels[i + 2] > 0: 54 | visible[_i] = 1 55 | else: 56 | visible[_i] = 0 57 | 58 | keypoints[_i][0] = x 59 | keypoints[_i][1] = y 60 | 61 | if x == 0 and y == 0: 62 | maps[_i] = np.zeros((self.img_height, self.img_width)) 63 | continue 64 | if self.img_height - y < 0 or self.img_width - x < 0: 65 | continue 66 | maps[_i] = self.map_value[self.img_height - y : self.img_height * 2 - y, 67 | self.img_width - x : self.img_width * 2 - x] 68 | maps[_i][maps[_i] <= self.radius] = 1 69 | maps[_i][maps[_i] > self.radius] = 0 70 | offsets_x[_i] = self.offsets_x_value[self.img_height - y : self.img_height * 2 - y, 71 | self.img_width - x : self.img_width * 2 - x] 72 | offsets_y[_i] = self.offsets_y_value[self.img_height - y : self.img_height * 2 - y, 73 | self.img_width - x : self.img_width * 2 - x] 74 | return img, (maps, offsets_x, offsets_y), (visible, keypoints) 75 | 76 | def __len__(self): 77 | return len(self.labels) 78 | -------------------------------------------------------------------------------- /src/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torch.nn as nn 4 | 5 | class Keypoints(nn.Module): 6 | def __init__(self, num_classes, img_height=353, img_width=257, resnet=18): 7 | super(Keypoints, self).__init__() 8 | 9 | self.num_classes = num_classes 10 | self.num_outputs = num_classes * 3 11 | self.img_height = img_height 12 | self.img_width = img_width 13 | 14 | if resnet == 18: 15 | self.resnet = torchvision.models.resnet18() 16 | self.conv1by1 = nn.Conv2d(512, self.num_outputs, (1,1)) 17 | elif resnet == 101: 18 | self.resnet = torchvision.models.resnet101() 19 | self.conv1by1 = nn.Conv2d(2048, self.num_outputs, (1,1)) 20 | 21 | self.resnet = nn.Sequential(*list(self.resnet.children())[:-2]) 22 | self.resnet = self.resnet 23 | 24 | self.conv_transpose = nn.ConvTranspose2d(self.num_outputs, self.num_outputs, kernel_size=32, stride=8) 25 | self.sigmoid = torch.nn.Sigmoid() 26 | 27 | def forward(self, x): 28 | x = self.resnet(x) 29 | x = self.conv1by1(x) 30 | x = self.conv_transpose(x) 31 | output = nn.Upsample(size=(self.img_height, self.img_width), mode='bilinear')(x) 32 | 33 | maps = self.sigmoid(output[:,:self.num_classes, :, :]) 34 | offsets_x = output[:, self.num_classes:2*self.num_classes, :, :] 35 | offsets_y = output[:, 2*self.num_classes:3*self.num_classes, :, :] 36 | 37 | maps_pred = self.sigmoid(x[:,:self.num_classes, :, :]) 38 | offsets_x_pred = x[:, self.num_classes:2*self.num_classes, :, :] 39 | offsets_y_pred = x[:, 2*self.num_classes:3*self.num_classes, :, :] 40 | 41 | return (maps, offsets_x, offsets_y), (maps_pred, offsets_x_pred, offsets_y_pred) 42 | -------------------------------------------------------------------------------- /src/prediction.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | import matplotlib.pyplot as plt 4 | from datetime import datetime 5 | import numpy as np 6 | class Prediction: 7 | def __init__(self, model, num_classes, img_height, img_width, img_small_height, img_small_width, use_cuda): 8 | self.model = model 9 | 10 | self.num_classes = num_classes 11 | self.img_height = img_height 12 | self.img_width = img_width 13 | 14 | self.img_small_height = img_small_height 15 | self.img_small_width = img_small_width 16 | 17 | self.use_cuda = use_cuda 18 | 19 | self.offset_x_ij = torch.arange(0, self.img_small_width) \ 20 | .repeat(self.img_small_height).view(1,1,self.img_small_height, self.img_small_width) 21 | self.offset_y_ij = torch.arange(0, self.img_small_height) \ 22 | .repeat(self.img_small_width).view(self.img_small_width, self.img_small_height).t().contiguous() \ 23 | .view(1,1,self.img_small_height, self.img_small_width) 24 | 25 | if self.use_cuda: 26 | self.offset_x_ij = self.offset_x_ij.cuda() 27 | self.offset_y_ij = self.offset_y_ij.cuda() 28 | 29 | self.offset_x_add = (0 - self.offset_x_ij).view(self.img_small_height, self.img_small_width, 1, 1) 30 | self.offset_y_add = (0 - self.offset_y_ij).view(self.img_small_height, self.img_small_width, 1, 1) 31 | 32 | self.offset_x_ij = (self.offset_x_ij + self.offset_x_add) * self.img_width / self.img_small_width 33 | self.offset_y_ij = (self.offset_y_ij + self.offset_y_add) * self.img_height/ self.img_small_height 34 | 35 | def predict(self, imgs): 36 | # img: torch.Tensor(3, height, width) 37 | if len(imgs.shape) == 4: 38 | imgs = imgs.view(-1, imgs.shape[1], imgs.shape[2], imgs.shape[3]) 39 | elif len(imgs.shape) == 3: 40 | imgs = imgs.view(-1, imgs.shape[0], imgs.shape[1], imgs.shape[2]) 41 | 42 | result, (maps_pred, offsets_x_pred, offsets_y_pred) = self.model.forward(Variable(imgs)) 43 | maps_pred = maps_pred.data 44 | offsets_x_pred = offsets_x_pred.data 45 | offsets_y_pred = offsets_y_pred.data 46 | keypoints = torch.zeros(imgs.shape[0], self.num_classes, 2, 1) 47 | keypoints = keypoints.type(torch.cuda.LongTensor) 48 | for i in range(imgs.shape[0]): 49 | for k in range(self.num_classes): 50 | offsets_x_ij = self.offset_x_ij + offsets_x_pred[i][k] 51 | offsets_y_ij = self.offset_y_ij + offsets_y_pred[i][k] 52 | distances_ij = torch.sqrt(offsets_x_ij * offsets_x_ij + offsets_y_ij * offsets_y_ij) 53 | 54 | distances_ij[distances_ij > 1] = 1 55 | distances_ij = 1 - distances_ij 56 | score_ij = (distances_ij * maps_pred[i][k]).sum(3).sum(2) 57 | 58 | v1,index_y = score_ij.max(0) 59 | v2,index_x = v1.max(0) 60 | 61 | keypoints[i][k][0] = index_y[index_x] 62 | keypoints[i][k][1] = index_x 63 | 64 | keypoints = keypoints.view(imgs.shape[0], self.num_classes, 2) 65 | 66 | maps_array = result[0] 67 | offsets_x_array = result[1] 68 | offsets_y_array = result[2] 69 | 70 | return (maps_array, offsets_x_array, offsets_y_array), keypoints 71 | 72 | def plot(self, plt_img, result, keypoints): 73 | 74 | maps_array = result[0] 75 | offsets_x_array = result[1] 76 | offsets_y_array = result[2] 77 | if self.use_cuda: 78 | maps_array = maps_array.cpu().data.numpy() 79 | offsets_x_array = offsets_x_array.cpu().data.numpy() 80 | offsets_y_array = offsets_y_array.cpu().data.numpy() 81 | else: 82 | maps_array = maps_array.data.numpy() 83 | offsets_x_array = offsets_x_array.data.numpy() 84 | offsets_y_array = offsets_y_array.data.numpy() 85 | 86 | plt.imshow(plt_img) 87 | 88 | for i in range(self.num_classes): 89 | 90 | heatmap = plt_img.copy() 91 | plt.figure(figsize=(12, 9)) 92 | 93 | # heatmap 94 | plt.subplot(1, 4, 1) 95 | plt.title(str(i)) 96 | indexes = maps_array[0][i] > np.percentile(maps_array[0][i], 98.5) 97 | indexes = maps_array[0][i] > 0.5 98 | 99 | heatmap[indexes] = maps_array[0][i].repeat(3).reshape((self.img_height, self.img_width, 3))[indexes] 100 | plt.imshow(heatmap) 101 | 102 | # offsets 103 | offsets = np.sqrt(offsets_x_array[0][i] * offsets_x_array[0][i] + offsets_y_array[0][i] * offsets_y_array[0][i]) 104 | offsets_repeated = offsets.repeat(3) 105 | 106 | plt.subplot(1, 4, 2) 107 | plt.title(str(i)) 108 | offsets_array = offsets_repeated.reshape((self.img_height, self.img_width, 3)) 109 | offsets_array = offsets_array / offsets_array.max() 110 | plt.imshow(offsets_array) 111 | 112 | # offsets disk 113 | plt.subplot(1, 4, 3) 114 | plt.title(str(i)) 115 | offsets_array = np.zeros((self.img_height, self.img_width, 3)) 116 | offsets_array[indexes] = offsets_repeated.reshape((self.img_height, self.img_width, 3))[indexes] 117 | offsets_array = offsets_array / offsets_array.max() 118 | plt.imshow(offsets_array) 119 | 120 | # final result 121 | plt.subplot(1, 4, 4) 122 | plt.imshow(plt_img) 123 | # 0.91 1.1 124 | plt.scatter(keypoints[i][1] * self.img_height / self.img_small_height, keypoints[i][0] * self.img_width / self.img_small_width) 125 | plt.show() 126 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import torch 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | import numpy as np 6 | from torch.autograd import Variable 7 | from torch.utils.data import DataLoader 8 | from config import NUM_CLASSES, IMG_HEIGHT, IMG_WIDTH, RADIUS, epochs, batch_size 9 | from src.model import Keypoints 10 | from src.dataset import KeypointsDataset, transform 11 | 12 | def custom_loss(predictions_maps, maps, predictions_offsets_x, offsets_x, predictions_offsets_y, offsets_y): 13 | 14 | loss_h = bceLoss(predictions_maps, maps) 15 | 16 | distance_x = predictions_offsets_x[maps==1] - offsets_x[maps==1] 17 | distance_y = predictions_offsets_y[maps==1] - offsets_y[maps==1] 18 | distances = torch.sqrt(distance_x * distance_x + distance_y * distance_y) 19 | zero_distances = Variable( 20 | torch.zeros(distance_x.shape).cuda() if use_cuda else torch.zeros(distance_x.shape)) 21 | loss_o = smoothL1Loss(distances, zero_distances) 22 | loss = 4 * loss_h + loss_o 23 | return loss 24 | 25 | def forward(sample_batched, model): 26 | X = sample_batched[0] 27 | maps, offsets_x, offsets_y = sample_batched[1] 28 | 29 | maps = Variable(maps.cuda() if use_cuda else maps) 30 | offsets_x = Variable(offsets_x.cuda() if use_cuda else offsets_x) 31 | offsets_y = Variable(offsets_y.cuda() if use_cuda else offsets_y) 32 | 33 | # forward 34 | X = Variable(X.cuda() if use_cuda else X) 35 | (predictions_maps, predictions_offsets_x, predictions_offsets_y), pred = model.forward(X) 36 | 37 | return custom_loss(predictions_maps, maps, predictions_offsets_x, offsets_x, predictions_offsets_y, offsets_y) 38 | 39 | def fit(train_data, test_data, model, loss_function, epochs, checkpoint_path = ''): 40 | for epoch in range(epochs): 41 | # training 42 | train_loss = 0.0 43 | for i_batch, sample_batched in enumerate(train_data): 44 | optimizer.zero_grad() 45 | 46 | loss = forward(sample_batched, model) 47 | 48 | loss.backward() 49 | optimizer.step() 50 | 51 | train_loss += loss.data[0] 52 | 53 | print('[%d, %5d] loss: %.3f' % (epoch + 1, i_batch + 1, loss.data[0]), end='') 54 | print('\r', end='') 55 | print('train loss:', train_loss / i_batch) 56 | 57 | test_loss = 0.0 58 | for i_batch, sample_batched in enumerate(test_data): 59 | loss = forward(sample_batched, model) 60 | test_loss += loss.data[0] 61 | print('test loss:', test_loss / i_batch) 62 | 63 | torch.save(keypoints.state_dict(), checkpoint_path + 'model_2_1_' + str(epoch)+'.pth') 64 | 65 | # dataset 66 | with open('../data/annotation/annotation_train_cropped_humans.pkl', 'rb') as f: 67 | train_labels = pickle.load(f) 68 | train_dataset = KeypointsDataset('../data/train_cropped_humans/', 69 | train_labels, NUM_CLASSES, IMG_HEIGHT, IMG_WIDTH, RADIUS, transform=transform) 70 | train_data = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=32) 71 | 72 | with open('../data/annotation/annotation_test_cropped_humans.pkl', 'rb') as f: 73 | test_labels = pickle.load(f) 74 | test_dataset = KeypointsDataset('../data/test_cropped_humans/', 75 | test_labels, NUM_CLASSES, IMG_HEIGHT, IMG_WIDTH, RADIUS, transform=transform) 76 | test_data = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=32) 77 | 78 | use_cuda = torch.cuda.is_available() 79 | # use_cuda = False 80 | Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor 81 | if use_cuda: 82 | torch.cuda.set_device(2) 83 | 84 | # loss 85 | smoothL1Loss = nn.SmoothL1Loss() 86 | bceLoss = nn.BCELoss() 87 | # model 88 | keypoints = Keypoints(NUM_CLASSES) 89 | keypoints = keypoints.cuda() if use_cuda else keypoints 90 | # optimizer 91 | optimizer = optim.Adam(keypoints.parameters(), lr=0.0001) 92 | 93 | fit(train_data, test_data, keypoints, custom_loss, epochs=200, checkpoint_path='../checkpoints/') 94 | --------------------------------------------------------------------------------