├── Arial.ttf
├── LICENSE.txt
├── README.md
├── Sim3DR
    ├── Sim3DR.py
    ├── __init__.py
    ├── _init_paths.py
    ├── build_sim3dr.sh
    ├── lib
    │   ├── rasterize.cpp
    │   ├── rasterize.h
    │   ├── rasterize.pyx
    │   └── rasterize_kernel.cpp
    ├── lighting.py
    ├── readme.md
    ├── setup.py
    └── tests
    │   ├── CMakeLists.txt
    │   ├── io.cpp
    │   ├── io.h
    │   └── test.cpp
├── data
    ├── 300w_lp_aflw2000.yaml
    ├── 300w_lp_biwi.yaml
    ├── agora_coco.yaml
    ├── cmu_panoptic_coco.yaml
    ├── hyp-p6.yaml
    └── widerface_coco.yaml
├── demos
    ├── image.py
    ├── image_vis3d.py
    ├── image_vis3d_6DRepNet.py
    ├── video.py
    └── video_vis3d.py
├── exps
    ├── AGORA
    │   ├── agora_evaluation
    │   │   ├── get_joints_verts_from_dataframe.py
    │   │   └── projection.py
    │   ├── data_process_hpe.py
    │   ├── data_process_hpe_slim.py
    │   └── hpe_utils.py
    ├── CMU
    │   ├── data_split_hpe.py
    │   ├── data_statistic_hpe.py
    │   ├── data_statistic_hpe_slim.py
    │   ├── hpe_utils.py
    │   ├── panoptic-toolbox
    │   │   ├── getDB_panoptic_ver1_2_hdVideo.sh
    │   │   ├── getDB_panoptic_ver1_2_hdVideo_t1.sh
    │   │   ├── getDB_panoptic_ver1_2_hdVideo_t2.sh
    │   │   ├── getDB_panoptic_ver1_2_hdVideo_t3.sh
    │   │   ├── getDB_panoptic_ver1_2_hdVideo_t4.sh
    │   │   └── getData_hdVideo.sh
    │   ├── released_seqs_excel.xlsx
    │   └── selected_HPE_list.txt
    ├── compare_3ddfa.py
    ├── compare_3ddfa_v2.py
    ├── compare_6DRepNet.py
    ├── compare_FSANet.py
    ├── compare_HopeNet.py
    ├── compare_SynergyNet.py
    ├── compare_WHENetONNX.py
    ├── compare_dad3dnet.py
    ├── compare_error_analysis_v2.py
    ├── compare_img2pose.py
    ├── convert_coco_style_300wlp_aflw2000.py
    ├── convert_coco_style_300wlp_biwi.py
    ├── convert_coco_style_img2pose.py
    ├── gen_dataset_full_AGORA_CMU.py
    ├── gen_dataset_single_AGORA.py
    ├── gen_dataset_single_CMU.py
    ├── sixdrepnet.zip
    └── statistic_angles.py
├── materials
    ├── 000000002685_vis3d_res.jpg
    ├── 000000018380_vis3d_res.jpg
    ├── 000000038829_vis3d_res.jpg
    ├── 000000081988_vis3d_res.jpg
    ├── 000000161925_vis3d_res.jpg
    ├── 000000183648_vis3d_res.jpg
    ├── 000002_mpiinew_test_DirectMHP_vis3d.gif
    ├── 000003_mpiinew_test_DirectMHP_vis3d.gif
    ├── datasetexamples.png
    ├── full_range.png
    └── illustration.png
├── models
    ├── common.py
    ├── experimental.py
    ├── yolo.py
    ├── yolov5l6.yaml
    ├── yolov5m6.yaml
    └── yolov5s6.yaml
├── pose_references
    ├── reference_3d_5_points_trans.npy
    ├── reference_3d_68_points_trans.npy
    ├── triangles.npy
    └── vertices_trans.npy
├── requirements.txt
├── test_imgs
    ├── AGORA
    │   ├── agora_val_2000400001.jpg
    │   └── agora_val_2000400205.jpg
    ├── CMU
    │   ├── cmu_val_10400060013.jpg
    │   ├── cmu_val_10602142026.jpg
    │   └── cmu_val_11500144012.jpg
    └── COCO
    │   ├── 000000002685.jpg
    │   ├── 000000018380.jpg
    │   ├── 000000038829.jpg
    │   ├── 000000081988.jpg
    │   ├── 000000161925.jpg
    │   └── 000000183648.jpg
├── train.py
├── utils
    ├── activations.py
    ├── augmentations.py
    ├── autoanchor.py
    ├── callbacks.py
    ├── datasets.py
    ├── downloads.py
    ├── general.py
    ├── labels.py
    ├── labels_v2.py
    ├── loggers
    │   ├── __init__.py
    │   └── wandb
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── log_dataset.py
    │   │   ├── sweep.py
    │   │   ├── sweep.yaml
    │   │   └── wandb_utils.py
    ├── loss.py
    ├── mae.py
    ├── metrics.py
    ├── plots.py
    ├── renderer.py
    └── torch_utils.py
├── val.py
└── weights
    └── readme.md


/Arial.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/Arial.ttf


--------------------------------------------------------------------------------
/Sim3DR/Sim3DR.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from . import _init_paths
 4 | import numpy as np
 5 | import Sim3DR_Cython
 6 | 
 7 | 
 8 | def get_normal(vertices, triangles):
 9 |     normal = np.zeros_like(vertices, dtype=np.float32)
10 |     Sim3DR_Cython.get_normal(normal, vertices, triangles, vertices.shape[0], triangles.shape[0])
11 |     return normal
12 | 
13 | 
14 | def rasterize(vertices, triangles, colors, bg=None,
15 |               height=None, width=None, channel=None,
16 |               reverse=False):
17 |     if bg is not None:
18 |         height, width, channel = bg.shape
19 |     else:
20 |         assert height is not None and width is not None and channel is not None
21 |         bg = np.zeros((height, width, channel), dtype=np.uint8)
22 | 
23 |     buffer = np.zeros((height, width), dtype=np.float32) - 1e8
24 | 
25 |     if colors.dtype != np.float32:
26 |         colors = colors.astype(np.float32)
27 |     Sim3DR_Cython.rasterize(bg, vertices, triangles, colors, buffer, triangles.shape[0], height, width, channel,
28 |                             reverse=reverse)
29 |     return bg
30 | 


--------------------------------------------------------------------------------
/Sim3DR/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | 
3 | from .Sim3DR import get_normal, rasterize
4 | from .lighting import RenderPipeline
5 | 


--------------------------------------------------------------------------------
/Sim3DR/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | import os.path as osp
 4 | import sys
 5 | 
 6 | 
 7 | def add_path(path):
 8 |     if path not in sys.path:
 9 |         sys.path.insert(0, path)
10 | 
11 | 
12 | this_dir = osp.dirname(__file__)
13 | lib_path = osp.join(this_dir, '.')
14 | add_path(lib_path)
15 | 


--------------------------------------------------------------------------------
/Sim3DR/build_sim3dr.sh:
--------------------------------------------------------------------------------
1 | python3 setup.py build_ext --inplace


--------------------------------------------------------------------------------
/Sim3DR/lib/rasterize.h:
--------------------------------------------------------------------------------
  1 | #ifndef MESH_CORE_HPP_
  2 | #define MESH_CORE_HPP_
  3 | 
  4 | #include <stdio.h>
  5 | #include <cmath>
  6 | #include <algorithm>
  7 | #include <string>
  8 | #include <iostream>
  9 | #include <fstream>
 10 | 
 11 | using namespace std;
 12 | 
 13 | class Point3D {
 14 | public:
 15 |     float x;
 16 |     float y;
 17 |     float z;
 18 | 
 19 | public:
 20 |     Point3D() : x(0.f), y(0.f), z(0.f) {}
 21 |     Point3D(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {}
 22 | 
 23 |     void initialize(float x_, float y_, float z_){
 24 |         this->x = x_; this->y = y_; this->z = z_;
 25 |     }
 26 | 
 27 |     Point3D cross(Point3D &p){
 28 |         Point3D c;
 29 |         c.x = this->y * p.z - this->z * p.y;
 30 |         c.y = this->z * p.x - this->x * p.z;
 31 |         c.z = this->x * p.y - this->y * p.x;
 32 |         return c;
 33 |     }
 34 | 
 35 |     float dot(Point3D &p) {
 36 |         return this->x * p.x + this->y * p.y + this->z * p.z;
 37 |     }
 38 | 
 39 |     Point3D operator-(const Point3D &p) {
 40 |         Point3D np;
 41 |         np.x = this->x - p.x;
 42 |         np.y = this->y - p.y;
 43 |         np.z = this->z - p.z;
 44 |         return np;
 45 |     }
 46 | 
 47 | };
 48 | 
 49 | class Point {
 50 | public:
 51 |     float x;
 52 |     float y;
 53 | 
 54 | public:
 55 |     Point() : x(0.f), y(0.f) {}
 56 |     Point(float x_, float y_) : x(x_), y(y_) {}
 57 |     float dot(Point p) {
 58 |         return this->x * p.x + this->y * p.y;
 59 |     }
 60 | 
 61 |     Point operator-(const Point &p) {
 62 |         Point np;
 63 |         np.x = this->x - p.x;
 64 |         np.y = this->y - p.y;
 65 |         return np;
 66 |     }
 67 | 
 68 |     Point operator+(const Point &p) {
 69 |         Point np;
 70 |         np.x = this->x + p.x;
 71 |         np.y = this->y + p.y;
 72 |         return np;
 73 |     }
 74 | 
 75 |     Point operator*(float s) {
 76 |         Point np;
 77 |         np.x = s * this->x;
 78 |         np.y = s * this->y;
 79 |         return np;
 80 |     }
 81 | };
 82 | 
 83 | 
 84 | bool is_point_in_tri(Point p, Point p0, Point p1, Point p2);
 85 | 
 86 | void get_point_weight(float *weight, Point p, Point p0, Point p1, Point p2);
 87 | 
 88 | void _get_tri_normal(float *tri_normal, float *vertices, int *triangles, int ntri, bool norm_flg);
 89 | 
 90 | void _get_ver_normal(float *ver_normal, float *tri_normal, int *triangles, int nver, int ntri);
 91 | 
 92 | void _get_normal(float *ver_normal, float *vertices, int *triangles, int nver, int ntri);
 93 | 
 94 | void _rasterize_triangles(
 95 |         float *vertices, int *triangles, float *depth_buffer, int *triangle_buffer, float *barycentric_weight,
 96 |         int ntri, int h, int w);
 97 | 
 98 | void _rasterize(
 99 |         unsigned char *image, float *vertices, int *triangles, float *colors,
100 |         float *depth_buffer, int ntri, int h, int w, int c, float alpha, bool reverse);
101 | 
102 | void _render_texture_core(
103 |         float *image, float *vertices, int *triangles,
104 |         float *texture, float *tex_coords, int *tex_triangles,
105 |         float *depth_buffer,
106 |         int nver, int tex_nver, int ntri,
107 |         int h, int w, int c,
108 |         int tex_h, int tex_w, int tex_c,
109 |         int mapping_type);
110 | 
111 | void _write_obj_with_colors_texture(string filename, string mtl_name,
112 |                                     float *vertices, int *triangles, float *colors, float *uv_coords,
113 |                                     int nver, int ntri, int ntexver);
114 | 
115 | #endif


--------------------------------------------------------------------------------
/Sim3DR/lib/rasterize.pyx:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | cimport numpy as np
  3 | # from libcpp.string cimport string
  4 | cimport cython
  5 | from libcpp cimport bool
  6 | 
  7 | # from cpython import bool
  8 | 
  9 | # use the Numpy-C-API from Cython
 10 | np.import_array()
 11 | 
 12 | # cdefine the signature of our c function
 13 | cdef extern from "rasterize.h":
 14 |     void _rasterize_triangles(
 15 |             float*vertices, int*triangles, float*depth_buffer, int*triangle_buffer, float*barycentric_weight,
 16 |             int ntri, int h, int w
 17 |     )
 18 | 
 19 |     void _rasterize(
 20 |             unsigned char*image, float*vertices, int*triangles, float*colors, float*depth_buffer,
 21 |             int ntri, int h, int w, int c, float alpha, bool reverse
 22 |     )
 23 | 
 24 |     # void _render_texture_core(
 25 |     #     float* image, float* vertices, int* triangles,
 26 |     #     float* texture, float* tex_coords, int* tex_triangles,
 27 |     #     float* depth_buffer,
 28 |     #     int nver, int tex_nver, int ntri,
 29 |     #     int h, int w, int c,
 30 |     #     int tex_h, int tex_w, int tex_c,
 31 |     #     int mapping_type)
 32 | 
 33 |     void _get_tri_normal(float *tri_normal, float *vertices, int *triangles, int nver, bool norm_flg)
 34 |     void _get_ver_normal(float *ver_normal, float*tri_normal, int*triangles, int nver, int ntri)
 35 |     void _get_normal(float *ver_normal, float *vertices, int *triangles, int nver, int ntri)
 36 | 
 37 | 
 38 |     # void _write_obj_with_colors_texture(string filename, string mtl_name,
 39 |     #     float* vertices, int* triangles, float* colors, float* uv_coords,
 40 |     #     int nver, int ntri, int ntexver)
 41 | 
 42 | @cython.boundscheck(False)
 43 | @cython.wraparound(False)
 44 | def get_tri_normal(np.ndarray[float, ndim=2, mode="c"] tri_normal not None,
 45 |                    np.ndarray[float, ndim=2, mode = "c"] vertices not None,
 46 |                    np.ndarray[int, ndim=2, mode="c"] triangles not None,
 47 |                    int ntri, bool norm_flg = False):
 48 |     _get_tri_normal(<float*> np.PyArray_DATA(tri_normal), <float*> np.PyArray_DATA(vertices),
 49 |                     <int*> np.PyArray_DATA(triangles), ntri, norm_flg)
 50 | 
 51 | @cython.boundscheck(False)  # turn off bounds-checking for entire function
 52 | @cython.wraparound(False)  # turn off negative index wrapping for entire function
 53 | def get_ver_normal(np.ndarray[float, ndim=2, mode = "c"] ver_normal not None,
 54 |                    np.ndarray[float, ndim=2, mode = "c"] tri_normal not None,
 55 |                    np.ndarray[int, ndim=2, mode="c"] triangles not None,
 56 |                    int nver, int ntri):
 57 |     _get_ver_normal(
 58 |         <float*> np.PyArray_DATA(ver_normal), <float*> np.PyArray_DATA(tri_normal), <int*> np.PyArray_DATA(triangles),
 59 |         nver, ntri)
 60 | 
 61 | @cython.boundscheck(False)  # turn off bounds-checking for entire function
 62 | @cython.wraparound(False)  # turn off negative index wrapping for entire function
 63 | def get_normal(np.ndarray[float, ndim=2, mode = "c"] ver_normal not None,
 64 |                    np.ndarray[float, ndim=2, mode = "c"] vertices not None,
 65 |                    np.ndarray[int, ndim=2, mode="c"] triangles not None,
 66 |                    int nver, int ntri):
 67 |     _get_normal(
 68 |         <float*> np.PyArray_DATA(ver_normal), <float*> np.PyArray_DATA(vertices), <int*> np.PyArray_DATA(triangles),
 69 |         nver, ntri)
 70 | 
 71 | 
 72 | @cython.boundscheck(False)  # turn off bounds-checking for entire function
 73 | @cython.wraparound(False)  # turn off negative index wrapping for entire function
 74 | def rasterize_triangles(
 75 |         np.ndarray[float, ndim=2, mode = "c"] vertices not None,
 76 |         np.ndarray[int, ndim=2, mode="c"] triangles not None,
 77 |         np.ndarray[float, ndim=2, mode = "c"] depth_buffer not None,
 78 |         np.ndarray[int, ndim=2, mode = "c"] triangle_buffer not None,
 79 |         np.ndarray[float, ndim=2, mode = "c"] barycentric_weight not None,
 80 |         int ntri, int h, int w
 81 | ):
 82 |     _rasterize_triangles(
 83 |         <float*> np.PyArray_DATA(vertices), <int*> np.PyArray_DATA(triangles),
 84 |         <float*> np.PyArray_DATA(depth_buffer), <int*> np.PyArray_DATA(triangle_buffer),
 85 |         <float*> np.PyArray_DATA(barycentric_weight),
 86 |         ntri, h, w)
 87 | 
 88 | @cython.boundscheck(False)  # turn off bounds-checking for entire function
 89 | @cython.wraparound(False)  # turn off negative index wrapping for entire function
 90 | def rasterize(np.ndarray[unsigned char, ndim=3, mode = "c"] image not None,
 91 |               np.ndarray[float, ndim=2, mode = "c"] vertices not None,
 92 |               np.ndarray[int, ndim=2, mode="c"] triangles not None,
 93 |               np.ndarray[float, ndim=2, mode = "c"] colors not None,
 94 |               np.ndarray[float, ndim=2, mode = "c"] depth_buffer not None,
 95 |               int ntri, int h, int w, int c, float alpha = 1, bool reverse = False
 96 |               ):
 97 |     _rasterize(
 98 |         <unsigned char*> np.PyArray_DATA(image), <float*> np.PyArray_DATA(vertices),
 99 |         <int*> np.PyArray_DATA(triangles),
100 |         <float*> np.PyArray_DATA(colors),
101 |         <float*> np.PyArray_DATA(depth_buffer),
102 |         ntri, h, w, c, alpha, reverse)
103 | 
104 | # def render_texture_core(np.ndarray[float, ndim=3, mode = "c"] image not None,
105 | #                 np.ndarray[float, ndim=2, mode = "c"] vertices not None,
106 | #                 np.ndarray[int, ndim=2, mode="c"] triangles not None,
107 | #                 np.ndarray[float, ndim=3, mode = "c"] texture not None,
108 | #                 np.ndarray[float, ndim=2, mode = "c"] tex_coords not None,
109 | #                 np.ndarray[int, ndim=2, mode="c"] tex_triangles not None,
110 | #                 np.ndarray[float, ndim=2, mode = "c"] depth_buffer not None,
111 | #                 int nver, int tex_nver, int ntri,
112 | #                 int h, int w, int c,
113 | #                 int tex_h, int tex_w, int tex_c,
114 | #                 int mapping_type
115 | #                 ):
116 | #     _render_texture_core(
117 | #         <float*> np.PyArray_DATA(image), <float*> np.PyArray_DATA(vertices), <int*> np.PyArray_DATA(triangles),
118 | #         <float*> np.PyArray_DATA(texture), <float*> np.PyArray_DATA(tex_coords), <int*> np.PyArray_DATA(tex_triangles),
119 | #         <float*> np.PyArray_DATA(depth_buffer),
120 | #         nver, tex_nver, ntri,
121 | #         h, w, c,
122 | #         tex_h, tex_w, tex_c,
123 | #         mapping_type)
124 | #
125 | # def write_obj_with_colors_texture_core(string filename, string mtl_name,
126 | #                 np.ndarray[float, ndim=2, mode = "c"] vertices not None,
127 | #                 np.ndarray[int, ndim=2, mode="c"] triangles not None,
128 | #                 np.ndarray[float, ndim=2, mode = "c"] colors not None,
129 | #                 np.ndarray[float, ndim=2, mode = "c"] uv_coords not None,
130 | #                 int nver, int ntri, int ntexver
131 | #                 ):
132 | #     _write_obj_with_colors_texture(filename, mtl_name,
133 | #         <float*> np.PyArray_DATA(vertices), <int*> np.PyArray_DATA(triangles), <float*> np.PyArray_DATA(colors), <float*> np.PyArray_DATA(uv_coords),
134 | #         nver, ntri, ntexver)
135 | 


--------------------------------------------------------------------------------
/Sim3DR/lighting.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | import numpy as np
 4 | from .Sim3DR import get_normal, rasterize
 5 | 
 6 | _norm = lambda arr: arr / np.sqrt(np.sum(arr ** 2, axis=1))[:, None]
 7 | 
 8 | 
 9 | def norm_vertices(vertices):
10 |     vertices -= vertices.min(0)[None, :]
11 |     vertices /= vertices.max()
12 |     vertices *= 2
13 |     vertices -= vertices.max(0)[None, :] / 2
14 |     return vertices
15 | 
16 | 
17 | def convert_type(obj):
18 |     if isinstance(obj, tuple) or isinstance(obj, list):
19 |         return np.array(obj, dtype=np.float32)[None, :]
20 |     return obj
21 | 
22 | 
23 | class RenderPipeline(object):
24 |     def __init__(self, **kwargs):
25 |         self.intensity_ambient = convert_type(kwargs.get('intensity_ambient', 0.3))
26 |         self.intensity_directional = convert_type(kwargs.get('intensity_directional', 0.6))
27 |         self.intensity_specular = convert_type(kwargs.get('intensity_specular', 0.1))
28 |         self.specular_exp = kwargs.get('specular_exp', 5)
29 |         self.color_ambient = convert_type(kwargs.get('color_ambient', (1, 1, 1)))
30 |         self.color_directional = convert_type(kwargs.get('color_directional', (1, 1, 1)))
31 |         self.light_pos = convert_type(kwargs.get('light_pos', (0, 0, 5)))
32 |         self.view_pos = convert_type(kwargs.get('view_pos', (0, 0, 5)))
33 | 
34 |     def update_light_pos(self, light_pos):
35 |         self.light_pos = convert_type(light_pos)
36 | 
37 |     def __call__(self, vertices, triangles, bg, texture=None):
38 |         normal = get_normal(vertices, triangles)
39 | 
40 |         # 2. lighting
41 |         light = np.zeros_like(vertices, dtype=np.float32)
42 |         # ambient component
43 |         if self.intensity_ambient > 0:
44 |             light += self.intensity_ambient * self.color_ambient
45 | 
46 |         vertices_n = norm_vertices(vertices.copy())
47 |         if self.intensity_directional > 0:
48 |             # diffuse component
49 |             direction = _norm(self.light_pos - vertices_n)
50 |             cos = np.sum(normal * direction, axis=1)[:, None]
51 |             # cos = np.clip(cos, 0, 1)
52 |             #  todo: check below
53 |             light += self.intensity_directional * (self.color_directional * np.clip(cos, 0, 1))
54 | 
55 |             # specular component
56 |             if self.intensity_specular > 0:
57 |                 v2v = _norm(self.view_pos - vertices_n)
58 |                 reflection = 2 * cos * normal - direction
59 |                 spe = np.sum((v2v * reflection) ** self.specular_exp, axis=1)[:, None]
60 |                 spe = np.where(cos != 0, np.clip(spe, 0, 1), np.zeros_like(spe))
61 |                 light += self.intensity_specular * self.color_directional * np.clip(spe, 0, 1)
62 |         light = np.clip(light, 0, 1)
63 | 
64 |         # 2. rasterization, [0, 1]
65 |         if texture is None:
66 |             render_img = rasterize(vertices, triangles, light, bg=bg)
67 |             return render_img
68 |         else:
69 |             texture *= light
70 |             render_img = rasterize(vertices, triangles, texture, bg=bg)
71 |             return render_img
72 | 
73 | 
74 | def main():
75 |     pass
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     main()
80 | 


--------------------------------------------------------------------------------
/Sim3DR/readme.md:
--------------------------------------------------------------------------------
 1 | ## Forked from https://github.com/cleardusk/3DDFA_V2/tree/master/Sim3DR
 2 | 
 3 | ## Sim3DR
 4 | This is a simple 3D render, written by c++ and cython. 
 5 | 
 6 | ### Build Sim3DR
 7 | 
 8 | ```shell script
 9 | python3 setup.py build_ext --inplace
10 | ```
11 | 
12 | 


--------------------------------------------------------------------------------
/Sim3DR/setup.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | python setup.py build_ext -i
 3 | to compile
 4 | '''
 5 | 
 6 | from distutils.core import setup, Extension
 7 | from Cython.Build import cythonize
 8 | from Cython.Distutils import build_ext
 9 | import numpy
10 | 
11 | setup(
12 |     name='Sim3DR_Cython',  # not the package name
13 |     cmdclass={'build_ext': build_ext},
14 |     ext_modules=[Extension("Sim3DR_Cython",
15 |                            sources=["lib/rasterize.pyx", "lib/rasterize_kernel.cpp"],
16 |                            language='c++',
17 |                            include_dirs=[numpy.get_include()],
18 |                            extra_compile_args=["-std=c++11"])],
19 | )
20 | 


--------------------------------------------------------------------------------
/Sim3DR/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | 
 3 | set(TARGET test)
 4 | project(${TARGET})
 5 | 
 6 | #find_package( OpenCV REQUIRED )
 7 | #include_directories( ${OpenCV_INCLUDE_DIRS} )
 8 | 
 9 | #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -O3")
10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -std=c++11")
11 | add_executable(${TARGET} test.cpp rasterize_kernel.cpp io.cpp)
12 | target_include_directories(${TARGET} PRIVATE ${PROJECT_SOURCE_DIR})
13 | 


--------------------------------------------------------------------------------
/Sim3DR/tests/io.cpp:
--------------------------------------------------------------------------------
 1 | #include "io.h"
 2 | 
 3 | //void load_obj(const string obj_fp, float* vertices, float* colors, float* triangles){
 4 | //    string line;
 5 | //    ifstream in(obj_fp);
 6 | //
 7 | //    if(in.is_open()){
 8 | //        while (getline(in, line)){
 9 | //            stringstream ss(line);
10 | //
11 | //            char t; // type: v, f
12 | //            ss >> t;
13 | //            if (t == 'v'){
14 | //
15 | //            }
16 | //        }
17 | //    }
18 | //}
19 | 
20 | void load_obj(const char *obj_fp, float *vertices, float *colors, int *triangles, int nver, int ntri) {
21 |     FILE *fp;
22 |     fp = fopen(obj_fp, "r");
23 | 
24 |     char t; // type: v or f
25 |     if (fp != nullptr) {
26 |         for (int i = 0; i < nver; ++i) {
27 |             fscanf(fp, "%c", &t);
28 |             for (int j = 0; j < 3; ++j)
29 |                 fscanf(fp, " %f", &vertices[3 * i + j]);
30 |             for (int j = 0; j < 3; ++j)
31 |                 fscanf(fp, " %f", &colors[3 * i + j]);
32 |             fscanf(fp, "\n");
33 |         }
34 | //        fscanf(fp, "%c", &t);
35 |         for (int i = 0; i < ntri; ++i) {
36 |             fscanf(fp, "%c", &t);
37 |             for (int j = 0; j < 3; ++j) {
38 |                 fscanf(fp, " %d", &triangles[3 * i + j]);
39 |                 triangles[3 * i + j] -= 1;
40 |             }
41 |             fscanf(fp, "\n");
42 |         }
43 | 
44 |         fclose(fp);
45 |     }
46 | }
47 | 
48 | void load_ply(const char *ply_fp, float *vertices, int *triangles, int nver, int ntri) {
49 |     FILE *fp;
50 |     fp = fopen(ply_fp, "r");
51 | 
52 | //    char s[256];
53 |     char t;
54 |     if (fp != nullptr) {
55 | //        for (int i = 0; i < 9; ++i)
56 | //            fscanf(fp, "%s", s);
57 |         for (int i = 0; i < nver; ++i)
58 |             fscanf(fp, "%f %f %f\n", &vertices[3 * i], &vertices[3 * i + 1], &vertices[3 * i + 2]);
59 | 
60 |         for (int i = 0; i < ntri; ++i)
61 |             fscanf(fp, "%c %d %d %d\n", &t, &triangles[3 * i], &triangles[3 * i + 1], &triangles[3 * i + 2]);
62 | 
63 |         fclose(fp);
64 |     }
65 | }
66 | 
67 | void write_ppm(const char *filename, unsigned char *img, int h, int w, int c) {
68 |     FILE *fp;
69 |     //open file for output
70 |     fp = fopen(filename, "wb");
71 |     if (!fp) {
72 |         fprintf(stderr, "Unable to open file '%s'\n", filename);
73 |         exit(1);
74 |     }
75 | 
76 |     //write the header file
77 |     //image format
78 |     fprintf(fp, "P6\n");
79 | 
80 |     //image size
81 |     fprintf(fp, "%d %d\n", w, h);
82 | 
83 |     // rgb component depth
84 |     fprintf(fp, "%d\n", MAX_PXL_VALUE);
85 | 
86 |     // pixel data
87 |     fwrite(img, sizeof(unsigned char), size_t(h * w * c), fp);
88 |     fclose(fp);
89 | }


--------------------------------------------------------------------------------
/Sim3DR/tests/io.h:
--------------------------------------------------------------------------------
 1 | #ifndef IO_H_
 2 | #define IO_H_
 3 | 
 4 | #include <iostream>
 5 | #include <string>
 6 | #include <fstream>
 7 | #include <sstream>
 8 | #include <stdio.h>
 9 | 
10 | using namespace std;
11 | 
12 | #define MAX_PXL_VALUE 255
13 | 
14 | void load_obj(const char* obj_fp, float* vertices, float* colors, int* triangles, int nver, int ntri);
15 | void load_ply(const char* ply_fp, float* vertices, int* triangles, int nver, int ntri);
16 | 
17 | 
18 | void write_ppm(const char *filename, unsigned char *img, int h, int w, int c);
19 | 
20 | #endif


--------------------------------------------------------------------------------
/Sim3DR/tests/test.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Tesing cases
  3 |  */
  4 | 
  5 | #include <iostream>
  6 | #include <time.h>
  7 | #include "rasterize.h"
  8 | #include "io.h"
  9 | 
 10 | void test_isPointInTri() {
 11 |     Point p0(0, 0);
 12 |     Point p1(1, 0);
 13 |     Point p2(1, 1);
 14 | 
 15 |     Point p(0.2, 0.2);
 16 | 
 17 |     if (is_point_in_tri(p, p0, p1, p2))
 18 |         std::cout << "In";
 19 |     else
 20 |         std::cout << "Out";
 21 |     std::cout << std::endl;
 22 | }
 23 | 
 24 | void test_getPointWeight() {
 25 |     Point p0(0, 0);
 26 |     Point p1(1, 0);
 27 |     Point p2(1, 1);
 28 | 
 29 |     Point p(0.2, 0.2);
 30 | 
 31 |     float weight[3];
 32 |     get_point_weight(weight, p, p0, p1, p2);
 33 |     std::cout << weight[0] << " " << weight[1] << " " << weight[2] << std::endl;
 34 | }
 35 | 
 36 | void test_get_tri_normal() {
 37 |     float tri_normal[3];
 38 | //    float vertices[9] = {1, 0, 0, 0, 0, 0, 0, 1, 0};
 39 |     float vertices[9] = {1, 1.1, 0, 0, 0, 0, 0, 0.6, 0.7};
 40 |     int triangles[3] = {0, 1, 2};
 41 |     int ntri = 1;
 42 | 
 43 |     _get_tri_normal(tri_normal, vertices, triangles, ntri);
 44 | 
 45 |     for (int i = 0; i < 3; ++i)
 46 |         std::cout << tri_normal[i] << ", ";
 47 |     std::cout << std::endl;
 48 | }
 49 | 
 50 | void test_load_obj() {
 51 |     const char *fp = "../data/vd005_mesh.obj";
 52 |     int nver = 35709;
 53 |     int ntri = 70789;
 54 | 
 55 |     auto *vertices = new float[nver];
 56 |     auto *colors = new float[nver];
 57 |     auto *triangles = new int[ntri];
 58 |     load_obj(fp, vertices, colors, triangles, nver, ntri);
 59 | 
 60 |     delete[] vertices;
 61 |     delete[] colors;
 62 |     delete[] triangles;
 63 | }
 64 | 
 65 | void test_render() {
 66 |     // 1. loading obj
 67 | //    const char *fp = "/Users/gjz/gjzprojects/Sim3DR/data/vd005_mesh.obj";
 68 |     const char *fp = "/Users/gjz/gjzprojects/Sim3DR/data/face1.obj";
 69 |     int nver = 35709; //53215; //35709;
 70 |     int ntri = 70789; //105840;//70789;
 71 | 
 72 |     auto *vertices = new float[3 * nver];
 73 |     auto *colors = new float[3 * nver];
 74 |     auto *triangles = new int[3 * ntri];
 75 |     load_obj(fp, vertices, colors, triangles, nver, ntri);
 76 | 
 77 |     // 2. rendering
 78 |     int h = 224, w = 224, c = 3;
 79 | 
 80 |     // enlarging
 81 |     int scale = 4;
 82 |     h *= scale;
 83 |     w *= scale;
 84 |     for (int i = 0; i < nver * 3; ++i) vertices[i] *= scale;
 85 | 
 86 |     auto *image = new unsigned char[h * w * c]();
 87 |     auto *depth_buffer = new float[h * w]();
 88 | 
 89 |     for (int i = 0; i < h * w; ++i) depth_buffer[i] = -999999;
 90 | 
 91 |     clock_t t;
 92 |     t = clock();
 93 | 
 94 |     _rasterize(image, vertices, triangles, colors, depth_buffer, ntri, h, w, c, true);
 95 |     t = clock() - t;
 96 |     double time_taken = ((double) t) / CLOCKS_PER_SEC; // in seconds
 97 |     printf("Render took %f seconds to execute \n", time_taken);
 98 | 
 99 | 
100 | //    auto *image_char = new u_char[h * w * c]();
101 | //    for (int i = 0; i < h * w * c; ++i)
102 | //        image_char[i] = u_char(255 * image[i]);
103 |     write_ppm("res.ppm", image, h, w, c);
104 | 
105 | //    delete[] image_char;
106 |     delete[] vertices;
107 |     delete[] colors;
108 |     delete[] triangles;
109 |     delete[] image;
110 |     delete[] depth_buffer;
111 | }
112 | 
113 | void test_light() {
114 |     // 1. loading obj
115 |     const char *fp = "/Users/gjz/gjzprojects/Sim3DR/data/emma_input_0_noheader.ply";
116 |     int nver = 53215; //35709;
117 |     int ntri = 105840; //70789;
118 | 
119 |     auto *vertices = new float[3 * nver];
120 |     auto *colors = new float[3 * nver];
121 |     auto *triangles = new int[3 * ntri];
122 |     load_ply(fp, vertices, triangles, nver, ntri);
123 | 
124 |     // 2. rendering
125 | //    int h = 1901, w = 3913, c = 3;
126 |     int h = 2000, w = 4000, c = 3;
127 | 
128 |     // enlarging
129 | //    int scale = 1;
130 | //    h *= scale;
131 | //    w *= scale;
132 | //    for (int i = 0; i < nver * 3; ++i) vertices[i] *= scale;
133 | 
134 |     auto *image = new unsigned char[h * w * c]();
135 |     auto *depth_buffer = new float[h * w]();
136 | 
137 |     for (int i = 0; i < h * w; ++i) depth_buffer[i] = -999999;
138 |     for (int i = 0; i < 3 * nver; ++i) colors[i] = 0.8;
139 | 
140 |     clock_t t;
141 |     t = clock();
142 | 
143 |     _rasterize(image, vertices, triangles, colors, depth_buffer, ntri, h, w, c, true);
144 |     t = clock() - t;
145 |     double time_taken = ((double) t) / CLOCKS_PER_SEC; // in seconds
146 |     printf("Render took %f seconds to execute \n", time_taken);
147 | 
148 | 
149 | //    auto *image_char = new u_char[h * w * c]();
150 | //    for (int i = 0; i < h * w * c; ++i)
151 | //        image_char[i] = u_char(255 * image[i]);
152 |     write_ppm("emma.ppm", image, h, w, c);
153 | 
154 | //    delete[] image_char;
155 |     delete[] vertices;
156 |     delete[] colors;
157 |     delete[] triangles;
158 |     delete[] image;
159 |     delete[] depth_buffer;
160 | }
161 | 
162 | int main(int argc, char *argv[]) {
163 | //    std::cout << "Hello CMake!" << std::endl;
164 | 
165 | //    test_isPointInTri();
166 | //    test_getPointWeight();
167 | //    test_get_tri_normal();
168 | //    test_load_obj();
169 | //    test_render();
170 |     test_light();
171 |     return 0;
172 | }


--------------------------------------------------------------------------------
/data/300w_lp_aflw2000.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
 3 | path: /datasdc/zhouhuayi/dataset/headpose/HeadCube3D/
 4 | labels: yolov5_labels
 5 | train: yolov5_labels/img_txt/train.txt
 6 | val: yolov5_labels/img_txt/validation.txt
 7 | 
 8 | train_annotations: annotations/train_300W_LP_coco_style.json
 9 | val_annotations: annotations/val_AFLW2000_coco_style.json
10 | 
11 | nc: 1  # number of classes (only one class: human head)
12 | num_angles: 3  # number of Euler angles is 3 (pitch, yaw, roll)
13 | names: [ 'person' ]  # class names. We still use 'person' in json file
14 | 
15 | 


--------------------------------------------------------------------------------
/data/300w_lp_biwi.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
 3 | path: /datasdc/zhouhuayi/dataset/headpose/HeadCube3D/
 4 | labels: yolov5_labels
 5 | #train: yolov5_labels/img_txt/train.txt
 6 | val: yolov5_labels/img_txt/validation.txt
 7 | 
 8 | #train_annotations: annotations/train_300W_LP_coco_style.json
 9 | val_annotations: annotations/BIWI_test_coco_style.json
10 | 
11 | nc: 1  # number of classes (only one class: human head)
12 | num_angles: 3  # number of Euler angles is 3 (pitch, yaw, roll)
13 | names: [ 'person' ]  # class names. We still use 'person' in json file
14 | 
15 | 


--------------------------------------------------------------------------------
/data/agora_coco.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
 3 | path: /datasdc/zhouhuayi/dataset/AGORA/HPE/
 4 | labels: yolov5_labels_coco
 5 | train: yolov5_labels_coco/img_txt/train.txt
 6 | val: yolov5_labels_coco/img_txt/validation.txt
 7 | 
 8 | train_annotations: annotations/coco_style_train.json
 9 | val_annotations: annotations/coco_style_validation.json
10 | 
11 | nc: 1  # number of classes (only one class: human head)
12 | num_angles: 3  # number of Euler angles is 3 (pitch, yaw, roll)
13 | names: [ 'person' ]  # class names. We still use 'person' in json file
14 | 
15 | # nc: 18  # number of classes (person class + 17 keypoint classes)
16 | # num_coords: 34  # number of keypoint coordinates (x, y)
17 | 
18 | # names: [ 'person', 'nose',  # class names
19 |          # 'left_eye', 'right_eye',
20 |          # 'left_ear', 'right_ear',
21 |          # 'left_shoulder', 'right_shoulder',
22 |          # 'left_elbow', 'right_elbow',
23 |          # 'left_wrist', 'right_wrist',
24 |          # 'left_hip', 'right_hip',
25 |          # 'left_knee', 'right_knee',
26 |          # 'left_ankle', 'right_ankle' ]
27 | 
28 | # kp_flip: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]  # for left-right keypoint flipping
29 | # kp_left: [1, 3, 5, 7, 9, 11, 13, 15]  # left keypoints
30 | # kp_face: [0, 1, 2, 3, 4]
31 | 
32 | 


--------------------------------------------------------------------------------
/data/cmu_panoptic_coco.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
 3 | path: /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/
 4 | labels: yolov5_labels_coco
 5 | train: yolov5_labels_coco/img_txt/train.txt
 6 | val: yolov5_labels_coco/img_txt/val.txt
 7 | 
 8 | train_annotations: annotations/coco_style_sampled_train.json
 9 | val_annotations: annotations/coco_style_sampled_val.json
10 | 
11 | nc: 1  # number of classes (only one class: human head)
12 | num_angles: 3  # number of Euler angles is 3 (pitch, yaw, roll)
13 | names: [ 'person' ]  # class names. We still use 'person' in json file
14 | 
15 | # nc: 18  # number of classes (person class + 17 keypoint classes)
16 | # num_coords: 34  # number of keypoint coordinates (x, y)
17 | 
18 | 
19 | # names: [ 'person', 'nose', # class names
20 |          # 'left_eye', 'right_eye',
21 |          # 'left_ear', 'right_ear',
22 |          # 'left_shoulder', 'right_shoulder',
23 |          # 'left_elbow', 'right_elbow',
24 |          # 'left_wrist', 'right_wrist',
25 |          # 'left_hip', 'right_hip',
26 |          # 'left_knee', 'right_knee',
27 |          # 'left_ankle', 'right_ankle' ]
28 | 
29 | # kp_flip: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]  # for left-right keypoint flipping
30 | # kp_left: [1, 3, 5, 7, 9, 11, 13, 15]  # left keypoints
31 | # kp_face: [0, 1, 2, 3, 4]
32 | 


--------------------------------------------------------------------------------
/data/hyp-p6.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | # Hyperparameters for COCO training from scratch
 3 | # python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
 4 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
 5 | 
 6 | lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 7 | lrf: 0.2  # final OneCycleLR learning rate (lr0 * lrf)
 8 | momentum: 0.937  # SGD momentum/Adam beta1
 9 | weight_decay: 0.0005  # optimizer weight decay 5e-4
10 | warmup_epochs: 3.0  # warmup epochs (fractions ok)
11 | warmup_momentum: 0.8  # warmup initial momentum
12 | warmup_bias_lr: 0.1  # warmup initial bias lr
13 | box: 0.05  # box loss gain
14 | cls: 0.3  # cls loss gain
15 | cls_pw: 1.0  # cls BCELoss positive_weight
16 | obj: 0.7  # obj loss gain (scale with pixels)
17 | obj_pw: 1.0  # obj BCELoss positive_weight
18 | mse: 0.1  # Euler angles mse loss gain
19 | iou_t: 0.20  # IoU training threshold
20 | anchor_t: 4.0  # anchor-multiple threshold
21 | # anchors: 3  # anchors per output layer (0 to ignore)
22 | fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
23 | hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
24 | hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
25 | hsv_v: 0.4  # image HSV-Value augmentation (fraction)
26 | degrees: 0.0  # image rotation (+/- deg)
27 | translate: 0.1  # image translation (+/- fraction)
28 | scale: 0.9  # image scale (+/- gain)
29 | shear: 0.0  # image shear (+/- deg)
30 | perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
31 | flipud: 0.0  # image flip up-down (probability)
32 | fliplr: 0.0  # image flip left-right (probability)
33 | mosaic: 1.0  # image mosaic (probability)
34 | mixup: 0.0  # image mixup (probability)
35 | copy_paste: 0.0  # segment copy-paste (probability)
36 | 


--------------------------------------------------------------------------------
/data/widerface_coco.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
 3 | path: /datasdc/zhouhuayi/dataset/WiderFace/
 4 | labels: yolov5_labels_coco
 5 | train: yolov5_labels_coco/img_txt/train.txt
 6 | val: yolov5_labels_coco/img_txt/val.txt
 7 | 
 8 | train_annotations: annotations/coco_style_img2pose_train.json
 9 | val_annotations: annotations/coco_style_img2pose_val.json
10 | 
11 | nc: 1  # number of classes (only one class: human head)
12 | num_angles: 3  # number of Euler angles is 3 (pitch, yaw, roll)
13 | names: [ 'person' ]  # class names. We still use 'person' in json file
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/demos/image.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from pathlib import Path
  3 | FILE = Path(__file__).absolute()
  4 | sys.path.append(FILE.parents[1].as_posix())
  5 | 
  6 | import torch
  7 | import argparse
  8 | import yaml
  9 | import cv2
 10 | import math
 11 | from math import cos, sin
 12 | import os.path as osp
 13 | import numpy as np
 14 | 
 15 | from utils.torch_utils import select_device
 16 | from utils.general import check_img_size, scale_coords, non_max_suppression
 17 | from utils.datasets import LoadImages
 18 | from models.experimental import attempt_load
 19 | 
 20 | 
 21 | def plot_3axis_Zaxis(img, yaw, pitch, roll, tdx=None, tdy=None, size=50., limited=True, thickness=2):
 22 |     # Input is a cv2 image
 23 |     # pose_params: (pitch, yaw, roll, tdx, tdy)
 24 |     # Where (tdx, tdy) is the translation of the face.
 25 |     # For pose we have [pitch yaw roll tdx tdy tdz scale_factor]
 26 | 
 27 |     p = pitch * np.pi / 180
 28 |     y = -(yaw * np.pi / 180)
 29 |     r = roll * np.pi / 180
 30 |     
 31 |     if tdx != None and tdy != None:
 32 |         face_x = tdx
 33 |         face_y = tdy
 34 |     else:
 35 |         height, width = img.shape[:2]
 36 |         face_x = width / 2
 37 |         face_y = height / 2
 38 | 
 39 |     # X-Axis (pointing to right) drawn in red
 40 |     x1 = size * (cos(y) * cos(r)) + face_x
 41 |     y1 = size * (cos(p) * sin(r) + cos(r) * sin(p) * sin(y)) + face_y
 42 |     
 43 |     # Y-Axis (pointing to down) drawn in green
 44 |     x2 = size * (-cos(y) * sin(r)) + face_x
 45 |     y2 = size * (cos(p) * cos(r) - sin(p) * sin(y) * sin(r)) + face_y
 46 |     
 47 |     # Z-Axis (out of the screen) drawn in blue
 48 |     x3 = size * (sin(y)) + face_x
 49 |     y3 = size * (-cos(y) * sin(p)) + face_y
 50 | 
 51 |     # Plot head oritation line in black
 52 |     # scale_ratio = 5
 53 |     scale_ratio = 2
 54 |     base_len = math.sqrt((face_x - x3)**2 + (face_y - y3)**2)
 55 |     if face_x == x3:
 56 |         endx = tdx
 57 |         if face_y < y3:
 58 |             if limited:
 59 |                 endy = tdy + (y3 - face_y) * scale_ratio
 60 |             else:
 61 |                 endy = img.shape[0]
 62 |         else:
 63 |             if limited:
 64 |                 endy = tdy - (face_y - y3) * scale_ratio
 65 |             else:
 66 |                 endy = 0
 67 |     elif face_x > x3:
 68 |         if limited:
 69 |             endx = tdx - (face_x - x3) * scale_ratio
 70 |             endy = tdy - (face_y - y3) * scale_ratio
 71 |         else:
 72 |             endx = 0
 73 |             endy = tdy - (face_y - y3) / (face_x - x3) * tdx
 74 |     else:
 75 |         if limited:
 76 |             endx = tdx + (x3 - face_x) * scale_ratio
 77 |             endy = tdy + (y3 - face_y) * scale_ratio
 78 |         else:
 79 |             endx = img.shape[1]
 80 |             endy = tdy - (face_y - y3) / (face_x - x3) * (tdx - endx)
 81 |     # cv2.line(img, (int(tdx), int(tdy)), (int(endx), int(endy)), (0,0,0), 2)
 82 |     # cv2.line(img, (int(tdx), int(tdy)), (int(endx), int(endy)), (255,255,0), 2)
 83 |     cv2.line(img, (int(tdx), int(tdy)), (int(endx), int(endy)), (0,255,255), thickness)
 84 | 
 85 |     # X-Axis pointing to right. drawn in red
 86 |     cv2.line(img, (int(face_x), int(face_y)), (int(x1),int(y1)),(0,0,255),thickness)
 87 |     # Y-Axis pointing to down. drawn in green    
 88 |     cv2.line(img, (int(face_x), int(face_y)), (int(x2),int(y2)),(0,255,0),thickness)
 89 |     # Z-Axis (out of the screen) drawn in blue
 90 |     cv2.line(img, (int(face_x), int(face_y)), (int(x3),int(y3)),(255,0,0),thickness)
 91 | 
 92 |     return img
 93 | 
 94 | if __name__ == '__main__':
 95 |     parser = argparse.ArgumentParser()
 96 |     parser.add_argument('-p', '--img-path', default='test_imgs/100024.jpg', help='path to image or dir')
 97 |     parser.add_argument('--data', type=str, default='data/agora_coco.yaml')
 98 |     parser.add_argument('--imgsz', type=int, default=1280)
 99 |     parser.add_argument('--weights', default='yolov5m6.pt')
100 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or cpu')
101 |     parser.add_argument('--conf-thres', type=float, default=0.7, help='confidence threshold')
102 |     parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
103 |     parser.add_argument('--scales', type=float, nargs='+', default=[1])
104 |     parser.add_argument('--thickness', type=int, default=2, help='thickness of Euler angle lines')
105 | 
106 |     args = parser.parse_args()
107 | 
108 |     with open(args.data) as f:
109 |         data = yaml.safe_load(f)  # load data dict
110 | 
111 |     device = select_device(args.device, batch_size=1)
112 |     print('Using device: {}'.format(device))
113 | 
114 |     model = attempt_load(args.weights, map_location=device)
115 |     stride = int(model.stride.max())  # model stride
116 |     imgsz = check_img_size(args.imgsz, s=stride)  # check image size
117 |     dataset = LoadImages(args.img_path, img_size=imgsz, stride=stride, auto=True)
118 |     dataset_iter = iter(dataset)
119 |     
120 |     for index in range(len(dataset)):
121 |         
122 |         (single_path, img, im0, _) = next(dataset_iter)
123 |         
124 |         if '_res' in single_path: continue
125 |         
126 |         print(index, single_path, "\n")
127 |         
128 |         img = torch.from_numpy(img).to(device)
129 |         img = img / 255.0  # 0 - 255 to 0.0 - 1.0
130 |         if len(img.shape) == 3:
131 |             img = img[None]  # expand for batch dim
132 | 
133 |         out_ori = model(img, augment=True, scales=args.scales)[0]
134 |         out = non_max_suppression(out_ori, args.conf_thres, args.iou_thres, num_angles=data['num_angles'])
135 |         
136 |         # predictions (Array[N, 9]), x1, y1, x2, y2, conf, class, pitch, yaw, roll
137 |         bboxes = scale_coords(img.shape[2:], out[0][:, :4], im0.shape[:2]).cpu().numpy()  # native-space pred
138 |         scores = out[0][:, 4].cpu().numpy() 
139 |         pitchs_yaws_rolls = out[0][:, 6:].cpu().numpy()   # N*3
140 |         for i, [x1, y1, x2, y2] in enumerate(bboxes):
141 |             im0 = cv2.rectangle(im0, (int(x1), int(y1)), (int(x2), int(y2)), 
142 |                 [255,255,255], thickness=args.thickness)
143 |             # im0 = cv2.putText(im0, str(round(scores[i], 3)), (int(x1), int(y1)), 
144 |                 # cv2.FONT_HERSHEY_PLAIN, 0.7, (255,255,255), thickness=2)
145 |             pitch = (pitchs_yaws_rolls[i][0] - 0.5) * 180
146 |             yaw = (pitchs_yaws_rolls[i][1] - 0.5) * 360
147 |             roll = (pitchs_yaws_rolls[i][2] - 0.5) * 180
148 |             im0 = plot_3axis_Zaxis(im0, yaw, pitch, roll, tdx=(x1+x2)/2, tdy=(y1+y2)/2, 
149 |                 size=max(y2-y1, x2-x1)*0.8, thickness=args.thickness)
150 |                 
151 |         cv2.imwrite(single_path[:-4]+"_res.jpg", im0)
152 |         
153 |   


--------------------------------------------------------------------------------
/demos/video.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from pathlib import Path
  3 | FILE = Path(__file__).absolute()
  4 | sys.path.append(FILE.parents[1].as_posix())
  5 | 
  6 | import argparse
  7 | import torch
  8 | import cv2
  9 | import yaml
 10 | import imageio
 11 | from tqdm import tqdm
 12 | import os.path as osp
 13 | import numpy as np
 14 | 
 15 | from utils.torch_utils import select_device, time_sync
 16 | from utils.general import check_img_size, scale_coords, non_max_suppression
 17 | from utils.datasets import LoadImages
 18 | from utils.plots import plot_3axis_Zaxis
 19 | from models.experimental import attempt_load
 20 | 
 21 | 
 22 | if __name__ == '__main__':
 23 |     parser = argparse.ArgumentParser()
 24 | 
 25 |     # video options
 26 |     parser.add_argument('-p', '--video-path', default='', help='path to video file')
 27 | 
 28 |     parser.add_argument('--data', type=str, default='data/agora_coco.yaml')
 29 |     parser.add_argument('--imgsz', type=int, default=1280)
 30 |     parser.add_argument('--save-size', type=int, default=1080)
 31 |     parser.add_argument('--weights', default='yolov5m6.pt')
 32 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or cpu')
 33 |     parser.add_argument('--conf-thres', type=float, default=0.7, help='confidence threshold')
 34 |     parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
 35 |     parser.add_argument('--scales', type=float, nargs='+', default=[1])
 36 |     
 37 |     parser.add_argument('--start', type=int, default=0, help='start time (s)')
 38 |     parser.add_argument('--end', type=int, default=-1, help='end time (s), -1 for remainder of video')
 39 |     parser.add_argument('--color', type=int, nargs='+', default=[255, 255, 255], help='head bbox color')
 40 |     parser.add_argument('--thickness', type=int, default=2, help='thickness of Euler angle lines')
 41 |     parser.add_argument('--alpha', type=float, default=0.4, help='head bbox and head pose alpha')
 42 |     
 43 |     parser.add_argument('--display', action='store_true', help='display inference results')
 44 |     parser.add_argument('--fps-size', type=int, default=1)
 45 |     parser.add_argument('--gif', action='store_true', help='create gif')
 46 |     parser.add_argument('--gif-size', type=int, nargs='+', default=[480, 270])
 47 | 
 48 |     args = parser.parse_args()
 49 | 
 50 |     with open(args.data) as f:
 51 |         data = yaml.safe_load(f)  # load data dict
 52 | 
 53 |     device = select_device(args.device, batch_size=1)
 54 |     print('Using device: {}'.format(device))
 55 | 
 56 |     model = attempt_load(args.weights, map_location=device)  # load FP32 model
 57 |     stride = int(model.stride.max())  # model stride
 58 |     imgsz = check_img_size(args.imgsz, s=stride)  # check image size
 59 |     dataset = LoadImages(args.video_path, img_size=imgsz, stride=stride, auto=True)
 60 | 
 61 |     if device.type != 'cpu':
 62 |         model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
 63 | 
 64 | 
 65 |     cap = dataset.cap
 66 |     cap.set(cv2.CAP_PROP_POS_MSEC, args.start * 1000)
 67 |     fps = cap.get(cv2.CAP_PROP_FPS)
 68 |     if args.end == -1:
 69 |         n = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) - fps * args.start)
 70 |     else:
 71 |         n = int(fps * (args.end - args.start))
 72 |     h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 73 |     w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
 74 |     gif_frames = []
 75 |     out_path = '{}_{}'.format(osp.splitext(args.video_path)[0], "DirectMHP")
 76 |     print("fps:", fps, "\t total frames:", n, "\t out_path:", out_path)
 77 | 
 78 |     write_video = not args.display and not args.gif
 79 |     if write_video:
 80 |         # writer = cv2.VideoWriter(out_path + '.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
 81 |         writer = cv2.VideoWriter(out_path + '.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, 
 82 |             (int(args.save_size*w/h), args.save_size))
 83 | 
 84 |     dataset = tqdm(dataset, desc='Running inference', total=n)
 85 |     t0 = time_sync()
 86 |     for i, (path, img, im0, _) in enumerate(dataset):
 87 |         img = torch.from_numpy(img).to(device)
 88 |         img = img / 255.0  # 0 - 255 to 0.0 - 1.0
 89 |         if len(img.shape) == 3:
 90 |             img = img[None]  # expand for batch dim
 91 |         
 92 |         out_ori = model(img, augment=True, scales=args.scales)[0]
 93 |         out = non_max_suppression(out_ori, args.conf_thres, args.iou_thres, num_angles=data['num_angles'])
 94 |         # predictions (Array[N, 9]), x1, y1, x2, y2, conf, class, pitch, yaw, roll
 95 |         bboxes = scale_coords(img.shape[2:], out[0][:, :4], im0.shape[:2]).cpu().numpy()  # native-space pred
 96 |         scores = out[0][:, 4].cpu().numpy() 
 97 |         pitchs_yaws_rolls = out[0][:, 6:].cpu().numpy()   # N*3
 98 |         
 99 |         im0_copy = im0.copy()
100 |         
101 |         # draw head bboxes and pose
102 |         for j, [x1, y1, x2, y2] in enumerate(bboxes):
103 |             im0_copy = cv2.rectangle(im0_copy, (int(x1), int(y1)), (int(x2), int(y2)), 
104 |                 args.color, thickness=args.thickness)
105 |             # im0_copy = cv2.putText(im0_copy, str(round(scores[j], 3)), (int(x1), int(y1)), 
106 |                 # cv2.FONT_HERSHEY_PLAIN, 0.7, (255,255,255), thickness=2)
107 |             pitch = (pitchs_yaws_rolls[j][0] - 0.5) * 180
108 |             yaw = (pitchs_yaws_rolls[j][1] - 0.5) * 360
109 |             roll = (pitchs_yaws_rolls[j][2] - 0.5) * 180
110 |             im0_copy = plot_3axis_Zaxis(im0_copy, yaw, pitch, roll, tdx=(x1+x2)/2, tdy=(y1+y2)/2, 
111 |                 size=max(y2-y1, x2-x1)*0.8, thickness=args.thickness)
112 |                 
113 |         im0 = cv2.addWeighted(im0, args.alpha, im0_copy, 1 - args.alpha, gamma=0)
114 | 
115 |         if i == 0:
116 |             t = time_sync() - t0
117 |         else:
118 |             t = time_sync() - t1
119 | 
120 |         if not args.gif and args.fps_size:
121 |             cv2.putText(im0, '{:.1f} FPS'.format(1 / t), (5 * args.fps_size, 25 * args.fps_size),
122 |                 cv2.FONT_HERSHEY_SIMPLEX, args.fps_size, (255, 255, 255), thickness=2 * args.fps_size)
123 | 
124 |         if args.gif:
125 |             gif_img = cv2.cvtColor(cv2.resize(im0, dsize=tuple(args.gif_size)), cv2.COLOR_RGB2BGR)
126 |             if args.fps_size:
127 |                 cv2.putText(gif_img, '{:.1f} FPS'.format(1 / t), (5 * args.fps_size, 25 * args.fps_size),
128 |                     cv2.FONT_HERSHEY_SIMPLEX, args.fps_size, (255, 255, 255), thickness=2 * args.fps_size)
129 |             gif_frames.append(gif_img)
130 |         elif write_video:
131 |             im0 = cv2.resize(im0, dsize=(int(args.save_size*w/h), args.save_size))
132 |             writer.write(im0)
133 |         else:
134 |             cv2.imshow('', im0)
135 |             cv2.waitKey(1)
136 | 
137 |         t1 = time_sync()
138 |         if i == n - 1:
139 |             break
140 | 
141 |     cv2.destroyAllWindows()
142 |     cap.release()
143 |     if write_video:
144 |         writer.release()
145 | 
146 |     if args.gif:
147 |         print('Saving GIF...')
148 |         with imageio.get_writer(out_path + '.gif', mode="I", fps=fps) as writer:
149 |             for idx, frame in tqdm(enumerate(gif_frames)):
150 |                 writer.append_data(frame)
151 | 
152 | 


--------------------------------------------------------------------------------
/exps/AGORA/agora_evaluation/projection.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
  5 | # holder of all proprietary rights on this computer program.
  6 | # You can only use this computer program if you have closed
  7 | # a license agreement with MPG or you get the right to use the computer
  8 | # program from someone who is authorized to grant you that right.
  9 | # Any use of the computer program without a valid license is prohibited and
 10 | # liable to prosecution.
 11 | #
 12 | # Copyright©2021 Max-Planck-Gesellschaft zur Förderung
 13 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
 14 | # for Intelligent Systems. All rights reserved.
 15 | #
 16 | # Contact: ps-license@tuebingen.mpg.de
 17 | #------------------------------------------------------------------------------
 18 | import logging
 19 | import math
 20 | import os
 21 | 
 22 | import cv2
 23 | import numpy as np
 24 | import matplotlib.pyplot as plt
 25 | 
 26 | 
 27 | logging.basicConfig(level=logging.DEBUG)
 28 | 
 29 | 
 30 | def focalLength_mm2px(focalLength, dslr_sens, focalPoint):
 31 |     focal_pixel = (focalLength / dslr_sens) * focalPoint * 2
 32 |     return focal_pixel
 33 | 
 34 | 
 35 | def toCamCoords(j3d, camPosWorld):
 36 |     # transform gt to camera coordinate frame
 37 |     j3d = j3d - camPosWorld
 38 |     return j3d
 39 | 
 40 | 
 41 | def unreal2cv2(points):
 42 |     # x --> y, y --> z, z --> x
 43 |     points = np.roll(points, 2, 1)
 44 |     # change direction of y
 45 |     points = points * np.array([1, -1, 1])
 46 |     return points
 47 | 
 48 | 
 49 | def smpl2opencv(j3d):
 50 |     # change sign of axis 1 and axis 2
 51 |     j3d = j3d * np.array([1, -1, -1])
 52 |     return j3d
 53 | 
 54 | 
 55 | def project_point(joint, RT, KKK):
 56 | 
 57 |     P = np.dot(KKK, RT)
 58 |     joints_2d = np.dot(P, joint)
 59 |     joints_2d = joints_2d[0:2] / joints_2d[2]
 60 | 
 61 |     return joints_2d
 62 | 
 63 | 
 64 | def project_2d(
 65 |         args,
 66 |         df,
 67 |         i,
 68 |         pNum,
 69 |         joints3d,
 70 |         meanPose=False):
 71 | 
 72 |     dslr_sens_width = 36
 73 |     dslr_sens_height = 20.25
 74 |     imgWidth = args.imgWidth
 75 |     imgHeight = args.imgHeight
 76 |     debug_path = args.debug_path
 77 |     imgBase = args.imgFolder
 78 |     imgName = df.iloc[i]['imgPath']
 79 |     if imgWidth == 1280 and '_1280x720.png' not in imgName:
 80 |         #If 1280x720 images are used then image name needs to be updated
 81 |         imgName = imgName.replace('.png','_1280x720.png')
 82 |         df.iloc[i]['imgPath']=imgName
 83 | 
 84 |     imgPath = os.path.join(imgBase, df.iloc[i]['imgPath'])
 85 |     if 'hdri' in imgPath:
 86 |         ground_plane = [0, 0, 0]
 87 |         scene3d = False
 88 |         focalLength = 50
 89 |         camPosWorld = [0, 0, 170]
 90 |         camYaw = 0
 91 |         camPitch = 0
 92 | 
 93 |     elif 'cam00' in imgPath:
 94 |         ground_plane = [0, 0, 0]
 95 |         scene3d = True
 96 |         focalLength = 18
 97 |         camPosWorld = [400, -275, 265]
 98 |         camYaw = 135
 99 |         camPitch = 30
100 |     elif 'cam01' in imgPath:
101 |         ground_plane = [0, 0, 0]
102 |         scene3d = True
103 |         focalLength = 18
104 |         camPosWorld = [400, 225, 265]
105 |         camYaw = -135
106 |         camPitch = 30
107 |     elif 'cam02' in imgPath:
108 |         ground_plane = [0, 0, 0]
109 |         scene3d = True
110 |         focalLength = 18
111 |         camPosWorld = [-490, 170, 265]
112 |         camYaw = -45
113 |         camPitch = 30
114 |     elif 'cam03' in imgPath:
115 |         ground_plane = [0, 0, 0]
116 |         scene3d = True
117 |         focalLength = 18
118 |         camPosWorld = [-490, -275, 265]
119 |         camYaw = 45
120 |         camPitch = 30
121 |     elif 'ag2' in imgPath:
122 |         ground_plane = [0, 0, 0]
123 |         scene3d = False
124 |         focalLength = 28
125 |         camPosWorld = [0, 0, 170]
126 |         camYaw = 0
127 |         camPitch = 15
128 |     else:
129 |         ground_plane = [0, -1.7, 0]
130 |         scene3d = True
131 |         focalLength = 28
132 |         camPosWorld = [
133 |             df.iloc[i]['camX'],
134 |             df.iloc[i]['camY'],
135 |             df.iloc[i]['camZ']]
136 |         camYaw = df.iloc[i]['camYaw']
137 |         camPitch = 0
138 | 
139 |     if meanPose:
140 |         yawSMPL = 0
141 |         trans3d = [0, 0, 0]
142 |     else:
143 |         yawSMPL = df.iloc[i]['Yaw'][pNum]
144 |         trans3d = [df.iloc[i]['X'][pNum],
145 |                    df.iloc[i]['Y'][pNum],
146 |                    df.iloc[i]['Z'][pNum]]
147 | 
148 |     # gt2d, gt3d_camCoord = project2d(joints3d, focalLength=focalLength, scene3d=scene3d,
149 |     gt2d, gt3d_camCoord, cam_j3d, camR, camT, camK = project2d(joints3d, focalLength=focalLength, scene3d=scene3d,
150 |                                     trans3d=trans3d,
151 |                                     dslr_sens_width=dslr_sens_width,
152 |                                     dslr_sens_height=dslr_sens_height,
153 |                                     camPosWorld=camPosWorld,
154 |                                     cy=imgHeight / 2,
155 |                                     cx=imgWidth / 2,
156 |                                     imgPath=imgPath,
157 |                                     yawSMPL=yawSMPL,
158 |                                     ground_plane=ground_plane,
159 |                                     debug_path=debug_path,
160 |                                     debug=args.debug,
161 |                                     ind=i,
162 |                                     pNum=pNum,
163 |                                     meanPose=meanPose, camPitch=camPitch, camYaw=camYaw)
164 |     # return gt2d, gt3d_camCoord
165 |     return gt2d, gt3d_camCoord, cam_j3d, camR, camT, camK
166 | 
167 | 
168 | def project2d(
169 |         j3d,
170 |         focalLength,
171 |         scene3d,
172 |         trans3d,
173 |         dslr_sens_width,
174 |         dslr_sens_height,
175 |         camPosWorld,
176 |         cy,
177 |         cx,
178 |         imgPath,
179 |         yawSMPL,
180 |         ground_plane,
181 |         debug_path,
182 |         debug=False,
183 |         ind=-1,
184 |         pNum=-1,
185 |         meanPose=False,
186 |         camPitch=0,
187 |         camYaw=0):
188 | 
189 |     focalLength_x = focalLength_mm2px(focalLength, dslr_sens_width, cx)
190 |     focalLength_y = focalLength_mm2px(focalLength, dslr_sens_height, cy)
191 | 
192 |     camMat = np.array([[focalLength_x, 0, cx],
193 |                        [0, focalLength_y, cy],
194 |                        [0, 0, 1]])
195 | 
196 |     # camPosWorld and trans3d are in cm. Transform to meter
197 |     trans3d = np.array(trans3d) / 100
198 |     trans3d = unreal2cv2(np.reshape(trans3d, (1, 3)))
199 |     camPosWorld = np.array(camPosWorld) / 100
200 |     if scene3d:
201 |         camPosWorld = unreal2cv2(
202 |             np.reshape(
203 |                 camPosWorld, (1, 3))) + np.array(ground_plane)
204 |     else:
205 |         camPosWorld = unreal2cv2(np.reshape(camPosWorld, (1, 3)))
206 | 
207 |     # get points in camera coordinate system
208 |     j3d = smpl2opencv(j3d)
209 |     
210 |     ''' newly added for Euler angles Calculation '''
211 |     cam_j3d = j3d.copy()
212 | 
213 |     # scans have a 90deg rotation, but for mean pose from vposer there is no
214 |     # such rotation
215 |     if meanPose:
216 |         rotMat, _ = cv2.Rodrigues(
217 |             np.array([[0, (yawSMPL) / 180 * np.pi, 0]], dtype=float))
218 |     else:
219 |         rotMat, _ = cv2.Rodrigues(
220 |             np.array([[0, ((yawSMPL - 90) / 180) * np.pi, 0]], dtype=float))
221 | 
222 |     j3d = np.matmul(rotMat, j3d.T).T
223 |     j3d = j3d + trans3d
224 | 
225 |     camera_rotationMatrix, _ = cv2.Rodrigues(
226 |         np.array([0, ((-camYaw) / 180) * np.pi, 0]).reshape(3, 1))
227 |     camera_rotationMatrix2, _ = cv2.Rodrigues(
228 |         np.array([camPitch / 180 * np.pi, 0, 0]).reshape(3, 1))
229 | 
230 |     j3d_new = np.matmul(camera_rotationMatrix, j3d.T - camPosWorld.T).T
231 |     j3d_new = np.matmul(camera_rotationMatrix2, j3d_new.T).T
232 | 
233 |     RT = np.concatenate((np.diag([1., 1., 1.]), np.zeros((3, 1))), axis=1)
234 |     j2d = np.zeros((j3d_new.shape[0], 2))
235 |     for i in range(j3d_new.shape[0]):
236 |         j2d[i, :] = project_point(np.concatenate(
237 |             [j3d_new[i, :], np.array([1])]), RT, camMat)
238 | 
239 | 
240 |     ''' newly added for Euler angles Calculation '''
241 |     camR = np.matmul(camera_rotationMatrix2, np.matmul(camera_rotationMatrix, rotMat))
242 |     camT = np.dot(np.matmul(camera_rotationMatrix2, camera_rotationMatrix), trans3d.T - camPosWorld.T)
243 |     camK = camMat
244 |     
245 | 
246 |     if debug:
247 |         import matplotlib.cm as cm
248 |         if not os.path.exists(debug_path):
249 |             os.makedirs(debug_path)
250 | 
251 |         if len(j2d) < 200:  # No rendering for verts
252 |             if not (imgPath is None):
253 |                 img = cv2.imread(imgPath)
254 |                 img = img[:, :, ::-1]
255 |                 colors = cm.tab20c(np.linspace(0, 1, 25))
256 |                 fig = plt.figure(dpi=300)
257 |                 ax = fig.add_subplot(111)
258 |                 if not (imgPath is None):
259 |                     ax.imshow(img)
260 |                 for i in range(22):
261 |                     ax.scatter(j2d[i, 0], j2d[i, 1], c=colors[i], s=0.1)
262 |                     #ax.scatter(j2d[i,0], j2d[i,1], c=np.array([1,0,0]), s=0.1)
263 |                     # ax.text(j2d[i,0], j2d[i,1], str(i))
264 |                 # plt.show()
265 | 
266 |                 if not (imgPath is None):
267 |                     savename = imgPath.split('/')[-1]
268 |                     savename = savename.replace('.pkl', '.jpg')
269 |                     plt.savefig(
270 |                         os.path.join(
271 |                             debug_path,
272 |                             'image' +
273 |                             str(pNum) +
274 |                             savename))
275 |                     plt.close('all')
276 |     
277 |     # return j2d, j3d_new
278 |     
279 |     ''' newly added for Euler angles Calculation '''
280 |     return j2d, j3d_new, cam_j3d, camR, camT, camK
281 | 
282 | 


--------------------------------------------------------------------------------
/exps/CMU/data_split_hpe.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import json
  4 | import copy
  5 | import numpy as np
  6 | from tqdm import tqdm
  7 | 
  8 | import shutil
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | ############################################################################################
 12 | 
 13 | # Face keypoint orders follow Openpose keypoint output
 14 | # https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/output.md
 15 | # Face outline points (0-16) are unstable
 16 | face_edges = np.array([ 
 17 |     # [0,1],[1,2],[2,3],[3,4],[4,5],[5,6],[6,7],[7,8],[8,9],[9,10],[11,12],[12,13],[14,15],[15,16], #outline (ignored)
 18 |     [17,18],[18,19],[19,20],[20,21], #right eyebrow
 19 |     [22,23],[23,24],[24,25],[25,26], #left eyebrow
 20 |     [27,28],[28,29],[29,30],   #nose upper part
 21 |     [31,32],[32,33],[33,34],[34,35], #nose lower part
 22 |     [36,37],[37,38],[38,39],[39,40],[40,41],[41,36], #right eye
 23 |     [42,43],[43,44],[44,45],[45,46],[46,47],[47,42], #left eye
 24 |     [48,49],[49,50],[50,51],[51,52],[52,53],[53,54],[54,55],[55,56],[56,57],[57,58],[58,59],[59,48], #Lip outline
 25 |     [60,61],[61,62],[62,63],[63,64],[64,65],[65,66],[66,67],[67,60] #Lip inner line 
 26 |     ])
 27 | 
 28 | coco_dict_template = {
 29 |     'info': {
 30 |         'description': 'Face landmarks and Euler angles of CMU Panoptic Studio Dataset',
 31 |         'url': 'http://domedb.perception.cs.cmu.edu/',
 32 |         'version': '1.0',
 33 |         'year': 2022,
 34 |         'contributor': 'Huayi Zhou',
 35 |         'date_created': '2022/02/17',
 36 |     },
 37 |     'licences': [{
 38 |         'url': 'http://creativecommons.org/licenses/by-nc/2.0',
 39 |         'name': 'Attribution-NonCommercial License'
 40 |     }],
 41 |     'images': [],
 42 |     'annotations': [],
 43 |     'categories': [{
 44 |         'supercategory': 'person',
 45 |         'id': 1,
 46 |         'name': 'person',
 47 |         'face_edges': face_edges.tolist()
 48 |     }]
 49 | }
 50 | 
 51 | ############################################################################################
 52 | 
 53 | def sort_labels_by_image_id(labels_list):
 54 |     images_labels_dict = {}
 55 |     for i, labels_dict in enumerate(labels_list):
 56 |         image_id = str(labels_dict['image_id'])
 57 |         if 'head_bbox' in labels_dict:
 58 |             labels_dict['bbox'] = labels_dict['head_bbox']  # please use the default 'bbox' as key in cocoapi
 59 |             del labels_dict['head_bbox']
 60 |         if 'area' not in labels_dict:  # generate standard COCO style json file
 61 |             labels_dict['segmentation'] = []  # This script is not for segmentation
 62 |             labels_dict['area'] = round(labels_dict['bbox'][-1] * labels_dict['bbox'][-2], 4)
 63 |         if image_id in images_labels_dict.keys():
 64 |             images_labels_dict[image_id].append(labels_dict)
 65 |         else:
 66 |             images_labels_dict[image_id] = [labels_dict]
 67 |     return images_labels_dict
 68 | 
 69 | 
 70 | if __name__ == "__main__":
 71 |     
 72 |     sampled_anno_path = "./HPE/annotations/coco_style_sample.json"
 73 |     sampled_train_path = "./HPE/annotations/coco_style_sampled_train.json"
 74 |     sampled_val_path = "./HPE/annotations/coco_style_sampled_val.json"
 75 |     
 76 |     image_root_path = "./HPE/images_sampled"
 77 |     
 78 |     image_dst_path = "./HPE/images"
 79 |     if os.path.exists(image_dst_path):
 80 |         shutil.rmtree(image_dst_path)
 81 |     os.mkdir(image_dst_path)
 82 |     os.mkdir(os.path.join(image_dst_path, "train"))
 83 |     os.mkdir(os.path.join(image_dst_path, "val"))
 84 |     
 85 |     
 86 |     '''[start] do not change'''
 87 |     seq_names = ["171204_pose3", "171026_pose3", "170221_haggling_b3", "170221_haggling_m3", "170224_haggling_a3", "170228_haggling_b1", "170404_haggling_a1", "170407_haggling_a2", "170407_haggling_b2", "171026_cello3", "161029_piano4", "160422_ultimatum1", "160224_haggling1", "170307_dance5", "160906_ian1", "170915_office1", "160906_pizza1"]  # 17 names
 88 |     
 89 |     seq_names_train = ["171204_pose3", "161029_piano4", "160422_ultimatum1", "170307_dance5", "160906_pizza1", "170221_haggling_b3", "170224_haggling_a3", "170404_haggling_a1", "170407_haggling_b2"]  # 9 names, person: 1+1+7+1+5+3+3+3+3
 90 |     seq_names_val = ["171026_pose3", "171026_cello3", "160224_haggling1", "160906_ian1", "170915_office1", "170221_haggling_m3", "170228_haggling_b1", "170407_haggling_a2"]  # 8 names, person: 1+1+3+2+1+3+3+3
 91 |     
 92 | 
 93 |     train_seq_num_list, val_seq_num_list = [], []
 94 |     for seq_num, seq_name in enumerate(seq_names):
 95 |         if seq_name in seq_names_train: train_seq_num_list.append(seq_num)
 96 |         if seq_name in seq_names_val: val_seq_num_list.append(seq_num)
 97 | 
 98 |     with open(sampled_anno_path, "r") as json_file:
 99 |         annos_dict = json.load(json_file)
100 |     images_list = annos_dict['images']
101 |     labels_list = annos_dict['annotations']
102 |     images_labels_dict = sort_labels_by_image_id(labels_list)
103 | 
104 |     coco_dict_train = copy.deepcopy(coco_dict_template)
105 |     coco_dict_val = copy.deepcopy(coco_dict_template)
106 |     
107 |     person_instances_stat = {}
108 |     euler_angles_stat = [[],[],[]]  # pitch, yaw, roll
109 | 
110 |     for image_dict in tqdm(images_list):
111 |         image_id = image_dict['id']
112 |         seq_num = (image_id - 10000000000) // 100000000 - 1
113 |         if seq_num in train_seq_num_list: target_type = "train"
114 |         if seq_num in val_seq_num_list: target_type = "val"
115 |         
116 |         labels_list = images_labels_dict[str(image_id)]
117 |         anno_nums = len(labels_list)
118 | 
119 |         image_dict['seq'] = seq_names[seq_num]
120 |         
121 |         src_image_path = os.path.join(image_root_path, image_dict['file_name'])
122 |         dst_image_path = os.path.join(image_dst_path, target_type, image_dict['file_name'])
123 |         if os.path.exists(src_image_path):
124 |             shutil.move(src_image_path, dst_image_path)
125 | 
126 |         if target_type == "train":
127 |             coco_dict_train['images'].append(image_dict)
128 |             coco_dict_train['annotations'] += labels_list
129 |             if str(anno_nums) not in person_instances_stat:
130 |                 person_instances_stat[str(anno_nums)] = [1,0]  # [1, 0] for [train, val]
131 |             else:
132 |                 person_instances_stat[str(anno_nums)][0] += 1
133 |         if target_type == "val":
134 |             coco_dict_val['images'].append(image_dict)
135 |             coco_dict_val['annotations'] += labels_list
136 |             if str(anno_nums) not in person_instances_stat:
137 |                 person_instances_stat[str(anno_nums)] = [0,1]  # [0, 1] for [train, val]
138 |             else:
139 |                 person_instances_stat[str(anno_nums)][1] += 1
140 |         
141 |         for labels in labels_list:
142 |             [pitch, yaw, roll] = labels['euler_angles']
143 |             euler_angles_stat[0].append(pitch)
144 |             euler_angles_stat[1].append(yaw)
145 |             euler_angles_stat[2].append(roll)
146 |             
147 |     '''[end] do not change'''
148 |     
149 |     print("\nperson_instances_stat:", person_instances_stat)
150 |     image_cnt, person_cnt = [0,0], [0,0]
151 |     for key, value in person_instances_stat.items():
152 |         image_cnt[0], image_cnt[1] = image_cnt[0] + value[0], image_cnt[1] + value[1]
153 |         person_cnt[0], person_cnt[1] = person_cnt[0] + int(key)*value[0], person_cnt[1] + int(key)*value[1]
154 |         print("Images number containing [%s] persons: %d, \ttrain/val = %d/%d"%(key, sum(value), value[0], value[1]))
155 |     print("Perosn instances per image: %.4f, \ttrain/val = %.4f/%.4f"%(
156 |         sum(person_cnt)/sum(image_cnt), person_cnt[0]/image_cnt[0], person_cnt[1]/image_cnt[1]))
157 | 
158 |     print("\ntrain: images --> %d, head instances --> %d"%(len(coco_dict_train['images']), len(coco_dict_train['annotations'])))  
159 |     with open(sampled_train_path, "w") as json_file:
160 |         json.dump(coco_dict_train, json_file)
161 |     print("val: images --> %d, head instances --> %d"%(len(coco_dict_val['images']), len(coco_dict_val['annotations'])))
162 |     with open(sampled_val_path, "w") as json_file:
163 |         json.dump(coco_dict_val, json_file)
164 |     
165 |     '''CMUPanoptic Euler Angels Stat'''
166 |     interval = 10  # 10 or 15 is better
167 |     bins = 360 // interval
168 |     density = False  # True or False, density=False would make counts
169 |     colors = ['r', 'g', 'b']
170 |     labels = ["Pitch", "Yaw", "Roll"]
171 |     plt.hist(euler_angles_stat, bins=bins, alpha=0.7, density=density, histtype='bar', label=labels, color=colors)
172 |     plt.legend(prop ={'size': 10})
173 |     # plt.xlim(-180, 180)
174 |     plt.xticks(range(-180,181,interval))
175 |     if density: plt.ylabel('Percentage')
176 |     else: plt.ylabel('Counts')
177 |     plt.xlabel('Degree')
178 |     plt.show()
179 | 
180 | 
181 |     '''final results
182 |     100%|███████████████████████████████████████████████████████████████████████████████████████| 31934/31934 [00:40<00:00, 794.51it/s]
183 | 
184 |     person_instances_stat: {'1': [7416, 7291], '2': [1313, 1328], '3': [4937, 7597], '4': [479, 0], '5': [567, 0], '7': [85, 0], '6': [921, 0]}
185 |     Images number containing [1] persons: 14707,    train/val = 7416/7291
186 |     Images number containing [2] persons: 2641,     train/val = 1313/1328
187 |     Images number containing [3] persons: 12534,    train/val = 4937/7597
188 |     Images number containing [4] persons: 479,      train/val = 479/0
189 |     Images number containing [5] persons: 567,      train/val = 567/0
190 |     Images number containing [7] persons: 85,       train/val = 85/0
191 |     Images number containing [6] persons: 921,      train/val = 921/0
192 |     Perosn instances per image: 2.1439,     train/val = 2.2729/2.0189
193 | 
194 |     train: images --> 15718, head instances --> 35725
195 |     val: images --> 16216, head instances --> 32738
196 |     '''


--------------------------------------------------------------------------------
/exps/CMU/panoptic-toolbox/getDB_panoptic_ver1_2_hdVideo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #This list is named as "Panoptic Studio DB Ver 1.2"
 4 | 
 5 | curPath=$(dirname "$0")
 6 | hdVideoNum=31
 7 | 
 8 | #Range of motion sequences
 9 | $curPath/getData_hdVideo.sh 171204_pose3 $hdVideoNum
10 | $curPath/getData_hdVideo.sh 171026_pose3 $hdVideoNum
11 | 
12 | #Download All Haggling Sequences without downloading videos
13 | $curPath/getData_hdVideo.sh 170221_haggling_m3 $hdVideoNum
14 | $curPath/getData_hdVideo.sh 170404_haggling_a1 $hdVideoNum
15 | $curPath/getData_hdVideo.sh 170407_haggling_b2 $hdVideoNum
16 | 
17 | #Musical Instruments
18 | $curPath/getData_hdVideo.sh 171026_cello3 $hdVideoNum
19 | $curPath/getData_hdVideo.sh 161029_piano4 $hdVideoNum
20 | 
21 | #SocialGame sequences
22 | $curPath/getData_hdVideo.sh 160422_ultimatum1 $hdVideoNum
23 | $curPath/getData_hdVideo.sh 160224_haggling1 $hdVideoNum
24 | 
25 | #Dance sequences
26 | $curPath/getData_hdVideo.sh 170307_dance5 $hdVideoNum
27 | 
28 | #Toddler sequences
29 | $curPath/getData_hdVideo.sh 160906_ian1 $hdVideoNum
30 | 
31 | #Others sequences
32 | $curPath/getData_hdVideo.sh 170915_office1 $hdVideoNum
33 | $curPath/getData_hdVideo.sh 160906_pizza1 $hdVideoNum
34 | 
35 | 
36 | #*** 4 other more names list ***
37 | #Social Games (Haggling)
38 | $curPath/getData_hdVideo.sh 170221_haggling_b3 $hdVideoNum
39 | $curPath/getData_hdVideo.sh 170224_haggling_a3 $hdVideoNum
40 | $curPath/getData_hdVideo.sh 170228_haggling_b1 $hdVideoNum
41 | $curPath/getData_hdVideo.sh 170407_haggling_a2 $hdVideoNum
42 | 


--------------------------------------------------------------------------------
/exps/CMU/panoptic-toolbox/getDB_panoptic_ver1_2_hdVideo_t1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #This list is named as "Panoptic Studio DB Ver 1.2"
 4 | 
 5 | curPath=$(dirname "$0")
 6 | hdVideoNum=31
 7 | 
 8 | #Range of motion sequences
 9 | $curPath/getData_hdVideo.sh 171204_pose3 $hdVideoNum
10 | $curPath/getData_hdVideo.sh 171026_pose3 $hdVideoNum
11 | 
12 | #Download All Haggling Sequences without downloading videos
13 | $curPath/getData_hdVideo.sh 170221_haggling_m3 $hdVideoNum
14 | $curPath/getData_hdVideo.sh 170404_haggling_a1 $hdVideoNum
15 | $curPath/getData_hdVideo.sh 170407_haggling_b2 $hdVideoNum
16 | 


--------------------------------------------------------------------------------
/exps/CMU/panoptic-toolbox/getDB_panoptic_ver1_2_hdVideo_t2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #This list is named as "Panoptic Studio DB Ver 1.2"
 4 | 
 5 | curPath=$(dirname "$0")
 6 | hdVideoNum=31
 7 | 
 8 | #Musical Instruments
 9 | $curPath/getData_hdVideo.sh 171026_cello3 $hdVideoNum
10 | $curPath/getData_hdVideo.sh 161029_piano4 $hdVideoNum
11 | 
12 | #SocialGame sequences
13 | $curPath/getData_hdVideo.sh 160422_ultimatum1 $hdVideoNum
14 | $curPath/getData_hdVideo.sh 160224_haggling1 $hdVideoNum
15 | 


--------------------------------------------------------------------------------
/exps/CMU/panoptic-toolbox/getDB_panoptic_ver1_2_hdVideo_t3.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #This list is named as "Panoptic Studio DB Ver 1.2"
 4 | 
 5 | curPath=$(dirname "$0")
 6 | hdVideoNum=31
 7 | 
 8 | #Dance sequences
 9 | $curPath/getData_hdVideo.sh 170307_dance5 $hdVideoNum
10 | 
11 | #Toddler sequences
12 | $curPath/getData_hdVideo.sh 160906_ian1 $hdVideoNum
13 | 
14 | #Others sequences
15 | $curPath/getData_hdVideo.sh 170915_office1 $hdVideoNum
16 | $curPath/getData_hdVideo.sh 160906_pizza1 $hdVideoNum
17 | 


--------------------------------------------------------------------------------
/exps/CMU/panoptic-toolbox/getDB_panoptic_ver1_2_hdVideo_t4.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #This list is named as "Panoptic Studio DB Ver 1.2"
 4 | 
 5 | curPath=$(dirname "$0")
 6 | hdVideoNum=31
 7 | 
 8 | #Social Games (Haggling)
 9 | $curPath/getData_hdVideo.sh 170221_haggling_b3 $hdVideoNum
10 | $curPath/getData_hdVideo.sh 170224_haggling_a3 $hdVideoNum
11 | $curPath/getData_hdVideo.sh 170228_haggling_b1 $hdVideoNum
12 | $curPath/getData_hdVideo.sh 170407_haggling_a2 $hdVideoNum
13 | 


--------------------------------------------------------------------------------
/exps/CMU/panoptic-toolbox/getData_hdVideo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script downloads videos for a specific sequence:
 4 | # ./getData.sh [sequenceName] [numHDViews]
 5 | #
 6 | # e.g., to download 10 VGA camera views for the "sampleData" sequence:
 7 | # ./getData.sh sampleData 10 0
 8 | # 
 9 | 
10 | datasetName=${1-sampleData}
11 | numHDViews=${2-31} #Specify the number of hd views you want to donwload. Up to 31
12 | 
13 | # Select wget or curl, with appropriate options
14 | if command -v wget >/dev/null 2>&1; then 
15 | 	WGET="wget -c"
16 | 	mO="-O"
17 | elif command -v curl >/dev/null 2>&1; then
18 | 	WGET="curl -C -" 
19 | 	mO="-o"
20 | else
21 | 	echo "This script requires wget or curl to download files."
22 | 	echo "Aborting."
23 | 	exit 1;
24 | fi
25 | 
26 | # Each sequence gets its own subdirectory
27 | mkdir $datasetName		
28 | cd $datasetName
29 | 
30 | 
31 | # Download calibration data
32 | $WGET $mO calibration_${datasetName}.json http://domedb.perception.cs.cmu.edu/webdata/dataset/$datasetName/calibration_${datasetName}.json || rm -v calibration_${datasetName}.json
33 | 
34 | 
35 | # 3D Face 
36 | if [ ! -f hdFace3d.tar ]; then
37 | $WGET $mO hdFace3d.tar http://domedb.perception.cs.cmu.edu/webdata/dataset/$datasetName/hdFace3d.tar || rm -v hdFace3d.tar 
38 | fi
39 | 
40 | 
41 | # Extract 3D Keypoints
42 | if [ -f hdFace3d.tar ]; then
43 | 	tar -xf hdFace3d.tar
44 | fi
45 | 
46 | 
47 | 
48 | #####################
49 | # Download hd videos
50 | #####################
51 | mkdir -p hdVideos
52 | panel=0
53 | nodes=(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)
54 | for (( c=0; c<$numHDViews; c++))
55 | do
56 |   fileName=$(printf "hdVideos/hd_%02d_%02d.mp4" ${panel} ${nodes[c]})
57 |   echo $fileName;
58 |   #Download and delete if the file is blank
59 | 	cmd=$(printf "$WGET $mO hdVideos/hd_%02d_%02d.mp4 http://domedb.perception.cs.cmu.edu/webdata/dataset/$datasetName/videos/hd_shared_crf20/hd_%02d_%02d.mp4 || rm -v $fileName" ${panel} ${nodes[c]} ${panel} ${nodes[c]})
60 | 	eval $cmd
61 | done
62 | 
63 | 


--------------------------------------------------------------------------------
/exps/CMU/released_seqs_excel.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/exps/CMU/released_seqs_excel.xlsx


--------------------------------------------------------------------------------
/exps/CMU/selected_HPE_list.txt:
--------------------------------------------------------------------------------
 1 | 【Totally 562 GB】
 2 | 
 3 | ***13 selected names list***
 4 | 171204_pose3
 5 | 171026_pose3
 6 | 
 7 | 170221_haggling_m3
 8 | 170404_haggling_a1
 9 | 170407_haggling_b2
10 | 
11 | 171026_cello3
12 | 161029_piano4
13 | 
14 | 160422_ultimatum1
15 | 160224_haggling1
16 | 
17 | 170307_dance5
18 | 
19 | 160906_ian1
20 | 
21 | 170915_office1
22 | 160906_pizza1
23 | 
24 | ***4 more other selected names list***
25 | 170221_haggling_b3
26 | 170224_haggling_a3
27 | 170228_haggling_b1
28 | 170407_haggling_a2
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/exps/compare_3ddfa.py:
--------------------------------------------------------------------------------
  1 | 
  2 | __author__ = 'Huayi Zhou'
  3 | 
  4 | '''
  5 | Put this file under the main folder of codes project 3DDFA https://github.com/cleardusk/3DDFA
  6 | 
  7 | usage:
  8 | python compare_3ddfa.py --root-imgdir /path/to/root/imgdir \
  9 |     --json-file /path/to/prepared/json/file \
 10 |     --save-file /path/to/saving/npy/file -m gpu
 11 | 
 12 | e.g.:
 13 | python compare_3ddfa.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --save-file ./agora_val_3DDFA.npy -m gpu --debug false
 14 | [results]
 15 | Saving all results in one file ./agora_val_3DDFA.npy ...
 16 | Inference one image taking time: 0.011305771888510957
 17 | face number: 3403 / 3403; MAE: 48.5867, [pitch_error, yaw_error, roll_error]: 42.5566, 39.6174, 63.5861
 18 | 
 19 | 
 20 | python compare_3ddfa.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --save-file ./cmu_val_3DDFA.npy -m gpu --debug false
 21 | [results]
 22 | Saving all results in one file ./cmu_val_3DDFA.npy ...
 23 | Inference one image taking time: 0.017735703712104287
 24 | face number: 15871 / 15871; MAE: 27.1172, [pitch_error, yaw_error, roll_error]: 26.3376, 23.3927, 31.6214
 25 | 
 26 | '''
 27 | 
 28 | import os
 29 | import torch
 30 | import torchvision.transforms as transforms
 31 | import mobilenet_v1
 32 | import numpy as np
 33 | import cv2
 34 | import argparse
 35 | import torch.backends.cudnn as cudnn
 36 | import time
 37 | from tqdm import tqdm
 38 | import json
 39 | 
 40 | from utils.ddfa import ToTensorGjz, NormalizeGjz, str2bool
 41 | from utils.inference import parse_roi_box_from_landmark, \
 42 |     crop_img, predict_68pts, parse_roi_box_from_bbox, predict_dense
 43 | from utils.estimate_pose import parse_pose, parse_pose_v2
 44 | from utils.cv_plot import plot_pose_box
 45 | 
 46 | STD_SIZE = 120
 47 | 
 48 | os.environ["CUDA_VISIBLE_DEVICES"] = '3'
 49 | 
 50 | def main(args):
 51 | 
 52 |     # 1. load pre-tained model
 53 |     checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
 54 |     arch = 'mobilenet_1'
 55 | 
 56 |     checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
 57 |     model = getattr(mobilenet_v1, arch)(num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)
 58 | 
 59 |     model_dict = model.state_dict()
 60 |     # because the model is trained by multiple gpus, prefix module should be removed
 61 |     for k in checkpoint.keys():
 62 |         model_dict[k.replace('module.', '')] = checkpoint[k]
 63 |     model.load_state_dict(model_dict)
 64 |     if args.mode == 'gpu':
 65 |         cudnn.benchmark = True
 66 |         model = model.cuda()
 67 |     model.eval()
 68 | 
 69 | 
 70 |     # 2. forward
 71 |     transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
 72 |     
 73 |     with open(args.json_file, "r") as json_f:
 74 |         pd_results_list = json.load(json_f)
 75 |      
 76 |     # face_imgs = []  # cropped face images collection
 77 |     pts_res = []  # 3d facial landmarks collection
 78 |     camPs = []  # Camera matrix collection
 79 |     pd_poses = []  # predicted pose collection
 80 |     gt_poses = []  # ground-truth pose collection
 81 |     taking_time_list = []  # how many ms per face
 82 |     valid_face_num = 0
 83 |     for ind, pd_results in enumerate(tqdm(pd_results_list)):
 84 |         if args.debug and ind > 50: break  # for testing
 85 |         
 86 |         img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg")
 87 |         img_ori = cv2.imread(img_path)
 88 |         
 89 |         bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1]
 90 |         bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]
 91 | 
 92 |         gt_pitch = pd_results['gt_pitch']
 93 |         gt_yaw = pd_results['gt_yaw']
 94 |         gt_roll = pd_results['gt_roll']
 95 |         
 96 |         ''' We do not need this enlarge operation. Or results will be super bad.'''
 97 |         # roi_box = parse_roi_box_from_bbox(bbox)  
 98 |         roi_box = bbox
 99 |         img = crop_img(img_ori, roi_box)
100 |         
101 |         t1 = time.time()
102 |         # forward: one step
103 |         img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
104 |         input = transform(img).unsqueeze(0)
105 |         with torch.no_grad():
106 |             if args.mode == 'gpu':
107 |                 input = input.cuda()
108 |             param = model(input)
109 |             param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
110 |         
111 |         # 68 pts
112 |         pts68 = predict_68pts(param, roi_box)
113 | 
114 |         ''' two-step for more accurate bbox to crop face '''
115 |         # roi_box = parse_roi_box_from_landmark(pts68)
116 |         # img_step2 = crop_img(img_ori, roi_box)
117 |         # img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
118 |         # input = transform(img_step2).unsqueeze(0)
119 |         # with torch.no_grad():
120 |             # if args.mode == 'gpu':
121 |                 # input = input.cuda()
122 |             # param = model(input)
123 |             # param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
124 |         # pts68 = predict_68pts(param, roi_box)
125 |             
126 |         t2 = time.time()
127 |         taking_time_list.append(t2-t1)
128 |         
129 |         camP, pose = parse_pose(param)
130 |         # camP, pose = parse_pose_v2(param, pts68)
131 |         if pose is None:
132 |             continue
133 |         
134 |         valid_face_num += 1
135 |         pts_res.append(pts68)
136 |         camPs.append(camP)
137 |         
138 |         # the predicted order of 3DDFA is: [yaw, -pitch, -roll], and in range (-np.pi/2, np.pi/2) 
139 |         pd_poses.append([-pose[1]*180/np.pi, pose[0]*180/np.pi, -pose[2]*180/np.pi])  # for parse_pose()
140 |         # pd_poses.append([pose[1], pose[0], pose[2]])  # for parse_pose_v2()
141 |         gt_poses.append([gt_pitch, gt_yaw, gt_roll])
142 |         
143 | 
144 |         if args.debug:
145 |             save_img_path = "./tmp/"+str(ind).zfill(0)+\
146 |                 "_p"+str(round(gt_pitch, 2))+"v"+str(round(-pose[1]*180/np.pi, 2))+\
147 |                 "_y"+str(round(gt_yaw, 2))+"v"+str(round(pose[0]*180/np.pi, 2))+\
148 |                 "_r"+str(round(gt_roll, 2))+"v"+str(round(-pose[2]*180/np.pi, 2))+".jpg"  # for parse_pose()
149 |             # save_img_path = "./tmp/"+str(ind).zfill(0)+\
150 |                 # "_p"+str(round(gt_pitch, 2))+"v"+str(round(pose[1], 2))+\
151 |                 # "_y"+str(round(gt_yaw, 2))+"v"+str(round(pose[0], 2))+\
152 |                 # "_r"+str(round(gt_roll, 2))+"v"+str(round(pose[2], 2))+".jpg"  # for parse_pose_v2()
153 |             
154 | 
155 |             cv2.rectangle(img_ori, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,255,255), 2)
156 |             for i in range(len(pts68[0, :])):
157 |                 cv2.circle(img_ori, (int(pts68[0, i]), int(pts68[1, i])), 1, (0,255,255), -1)
158 |             img_ori = plot_pose_box(img_ori, [camP], [pts68])
159 |             cv2.imwrite(save_img_path, img_ori)
160 |             
161 |             
162 | 
163 |     '''print all results'''
164 |     print("Saving all results in one file %s ..."%(args.save_file))
165 |     np.savez(args.save_file, camPs=np.array(camPs), 
166 |         pts_res=np.array(pts_res),
167 |         # image=np.array(face_imgs), 
168 |         pd_pose=np.array(pd_poses), 
169 |         gt_poses=np.array(gt_poses))
170 |     # db_dict = np.load(args.save_file)
171 |     # print(args.save_file, list(db_dict.keys()))
172 |    
173 |    
174 |     print("Inference one image taking time:", sum(taking_time_list)/len(taking_time_list))
175 |     
176 |     
177 |     error_list = np.abs(np.array(gt_poses) - np.array(pd_poses))
178 |     # error_list[:, 1] = np.min((error_list[:, 1], 360 - error_list[:, 1]), axis=0)  # yaw range may be [-180,180]
179 |     error_list = np.min((error_list, 360 - error_list), axis=0)
180 |     pose_matrix = np.mean(error_list, axis=0)
181 |     MAE = np.mean(pose_matrix)
182 |     print("face number: %d / %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(
183 |         valid_face_num, len(taking_time_list), round(MAE, 4), 
184 |         round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4)))
185 |         
186 |             
187 | if __name__ == '__main__':
188 |     parser = argparse.ArgumentParser(description='3DDFA inference pipeline')
189 |     
190 |     parser.add_argument('--root-imgdir', default='',
191 |                         help='root path to multiple images')
192 |     parser.add_argument('--json-file', default='',
193 |                         help='json file path that contains multiple images and their head bboxes')
194 |     parser.add_argument('--save-file', default='',
195 |                         help='.npy file path to save all results')
196 |     parser.add_argument('-m', '--mode', default='cpu', type=str, help='gpu or cpu mode')
197 |     parser.add_argument('--debug', default='false', type=str2bool, help='whether set into debug mode')
198 |     
199 |     args = parser.parse_args()
200 |     main(args)


--------------------------------------------------------------------------------
/exps/compare_3ddfa_v2.py:
--------------------------------------------------------------------------------
  1 | 
  2 | __author__ = 'Huayi Zhou'
  3 | 
  4 | '''
  5 | 
  6 | pip install onnxruntime
  7 | 
  8 | Put this file under the main folder of codes project 3DDFA_v2 https://github.com/cleardusk/3DDFA_V2
  9 | or
 10 | Put this file under the main folder of codes using project 3DDFA_v2 https://github.com/bubingy/HeadPoseEstimate
 11 | 
 12 | usage:
 13 | python compare_3ddfa_v2.py --root-imgdir /path/to/root/imgdir \
 14 |     --json-file /path/to/prepared/json/file \
 15 |     --save-file /path/to/saving/npy/file -m gpu
 16 | 
 17 | e.g.:
 18 | python compare_3ddfa_v2.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --save-file ./agora_val_3DDFA_v2.npy -m gpu --debug
 19 | [results]
 20 | Saving all results in one file ./agora_val_3DDFA_v2.npy ...
 21 | Inference one image taking time: 0.015800806553336474
 22 | face number: 3403; MAE: 22.7539, [pitch_error, yaw_error, roll_error]: 20.5154, 28.4544, 19.2918
 23 | 
 24 | 
 25 | python compare_3ddfa_v2.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --save-file ./cmu_val_3DDFA_v2.npy -m gpu --debug
 26 | [results]
 27 | Saving all results in one file ./cmu_val_3DDFA_v2.npy ...
 28 | Inference one image taking time: 0.016364026179303746
 29 | face number: 15871; MAE: 17.3448, [pitch_error, yaw_error, roll_error]: 18.6524, 17.0074, 16.3747
 30 | 
 31 | '''
 32 | 
 33 | import cv2
 34 | import os
 35 | import time
 36 | import json
 37 | import argparse
 38 | from tqdm import tqdm
 39 | import numpy as np
 40 | 
 41 | from model.pose import estimate_head_pose
 42 | from model.plot import draw_pose
 43 | 
 44 | os.environ["CUDA_VISIBLE_DEVICES"] = '3'
 45 | 
 46 | def main(args):
 47 | 
 48 |     os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
 49 |     os.environ['OMP_NUM_THREADS'] = '1'
 50 |     from model.FaceAlignment3D.TDDFA_ONNX import TDDFA_ONNX
 51 |     tddfa = TDDFA_ONNX()
 52 | 
 53 |     with open(args.json_file, "r") as json_f:
 54 |         pd_results_list = json.load(json_f)
 55 |      
 56 | 
 57 |     pts_res = []  # 3d facial landmarks collection
 58 |     pd_poses = []  # predicted pose collection
 59 |     gt_poses = []  # ground-truth pose collection
 60 |     taking_time_list = []  # how many ms per face
 61 | 
 62 |     for ind, pd_results in enumerate(tqdm(pd_results_list)):
 63 |         if args.debug and ind > 50: break  # for testing
 64 |         
 65 |         img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg")
 66 |         img_ori = cv2.imread(img_path)
 67 |         
 68 |         bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1]
 69 |         bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]
 70 | 
 71 |         gt_pitch = pd_results['gt_pitch']
 72 |         gt_yaw = pd_results['gt_yaw']
 73 |         gt_roll = pd_results['gt_roll']
 74 |         
 75 |         tic = time.time()
 76 |         
 77 |         param_lst, roi_box_lst = tddfa(img_ori, [bbox])
 78 |         
 79 |         # calculate Euler angle
 80 |         ver_lst = tddfa.recon_vers(param_lst, roi_box_lst)
 81 |         euler_angle_lst, directions_lst, landmarks_lst = estimate_head_pose(ver_lst, True)
 82 |         
 83 |         toc = time.time()
 84 |         taking_time_list.append(toc-tic)
 85 | 
 86 |         pts_res.append(landmarks_lst[0])
 87 |         pose = euler_angle_lst[0]
 88 |         
 89 |         # the predicted order of 3DDFA_v2 is: [-roll, -yaw, -pitch]
 90 |         pose[:] = -pose[:]
 91 |         
 92 |         pd_poses.append([pose[2], pose[1], pose[0]])
 93 |         gt_poses.append([gt_pitch, gt_yaw, gt_roll])
 94 |         
 95 | 
 96 |         if args.debug:
 97 |             save_img_path = "./tmp/"+str(ind).zfill(0)+\
 98 |                 "_p"+str(round(gt_pitch, 2))+"v"+str(round(pose[2], 2))+\
 99 |                 "_y"+str(round(gt_yaw, 2))+"v"+str(round(pose[1], 2))+\
100 |                 "_r"+str(round(gt_roll, 2))+"v"+str(round(pose[0], 2))+".jpg"
101 |                 
102 |             # cv2.rectangle(img_ori, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,255,255), 2)
103 | 
104 |             show_img = draw_pose(img_ori, directions_lst, np.array([bbox]), landmarks_lst,
105 |                 show_bbox=True, show_landmarks=True)
106 |             cv2.imwrite(save_img_path, show_img)
107 | 
108 |     '''print all results'''
109 |     print("Saving all results in one file %s ..."%(args.save_file))
110 |     np.savez(args.save_file,
111 |         pts_res=np.array(pts_res),
112 |         pd_pose=np.array(pd_poses), 
113 |         gt_poses=np.array(gt_poses))
114 |     # db_dict = np.load(args.save_file)
115 |     # print(args.save_file, list(db_dict.keys()))
116 |    
117 |     print("Inference one image taking time:", sum(taking_time_list)/len(taking_time_list))
118 |     
119 |     error_list = np.abs(np.array(gt_poses) - np.array(pd_poses))
120 |     # error_list[:, 1] = np.min((error_list[:, 1], 360 - error_list[:, 1]), axis=0)  # yaw range may be [-180,180]
121 |     error_list = np.min((error_list, 360 - error_list), axis=0)
122 |     pose_matrix = np.mean(error_list, axis=0)
123 |     MAE = np.mean(pose_matrix)
124 |     print("face number: %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(len(taking_time_list), 
125 |         round(MAE, 4), round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4)))
126 |         
127 |             
128 | if __name__ == '__main__':
129 |     parser = argparse.ArgumentParser(description='3DDFA inference pipeline')
130 |     
131 |     parser.add_argument('--root-imgdir', default='',
132 |                         help='root path to multiple images')
133 |     parser.add_argument('--json-file', default='',
134 |                         help='json file path that contains multiple images and their head bboxes')
135 |     parser.add_argument('--save-file', default='',
136 |                         help='.npy file path to save all results')
137 |     parser.add_argument('-m', '--mode', default='cpu', type=str, help='gpu or cpu mode')
138 |     parser.add_argument('--debug',  action='store_true', help='whether set into debug mode')
139 |     
140 |     args = parser.parse_args()
141 |     main(args)


--------------------------------------------------------------------------------
/exps/compare_FSANet.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | __author__ = 'Huayi Zhou'
  4 | 
  5 | '''
  6 | 
  7 | 
  8 | Put this file under the main folder of codes project FSA-Net_pytorch
  9 | git clone https://github.com/omasaht/headpose-fsanet-pytorch FSA-Net_pytorch
 10 | 
 11 | 
 12 | usage:
 13 | python compare_FSANet.py --root-imgdir /path/to/root/imgdir \
 14 |     --json-file /path/to/prepared/json/file
 15 | 
 16 | e.g.:
 17 | python compare_FSANet.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug
 18 | [results]
 19 | Inference one image taking time: 0.013793717896777136
 20 | face number: 3403; MAE: 18.9809, [pitch_error, yaw_error, roll_error]: 18.9733, 21.6935, 16.2759
 21 | 
 22 | 
 23 | python compare_FSANet.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug
 24 | [results]
 25 | Inference one image taking time: 0.016242261164280028
 26 | face number: 15871; MAE: 15.6144, [pitch_error, yaw_error, roll_error]: 16.343, 17.515, 12.9852
 27 | 
 28 | '''
 29 | 
 30 | import os
 31 | import argparse
 32 | import time
 33 | import json
 34 | import cv2
 35 | 
 36 | import onnxruntime
 37 | 
 38 | import numpy as np
 39 | from tqdm import tqdm
 40 | from pathlib import Path
 41 | from math import cos, sin, pi
 42 | 
 43 | os.environ["CUDA_VISIBLE_DEVICES"] = '3'
 44 | 
 45 | root_path = str(Path(__file__).absolute().parent.parent)
 46 | 
 47 | 
 48 | def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size = 100):
 49 |     # Referenced from HopeNet https://github.com/natanielruiz/deep-head-pose
 50 |     pitch = pitch * np.pi / 180
 51 |     yaw = -(yaw * np.pi / 180)
 52 |     roll = roll * np.pi / 180
 53 | 
 54 |     if tdx != None and tdy != None:
 55 |         tdx = tdx
 56 |         tdy = tdy
 57 |     else:
 58 |         height, width = img.shape[:2]
 59 |         tdx = width / 2
 60 |         tdy = height / 2
 61 | 
 62 |     # X-Axis pointing to right. drawn in red
 63 |     x1 = size * (cos(yaw) * cos(roll)) + tdx
 64 |     y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
 65 | 
 66 |     # Y-Axis | drawn in green
 67 |     #        v
 68 |     x2 = size * (-cos(yaw) * sin(roll)) + tdx
 69 |     y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
 70 | 
 71 |     # Z-Axis (out of the screen) drawn in blue
 72 |     x3 = size * (sin(yaw)) + tdx
 73 |     y3 = size * (-cos(yaw) * sin(pitch)) + tdy
 74 | 
 75 |     cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),2)
 76 |     cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),2)
 77 |     cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2)
 78 |     return img
 79 | 
 80 | def main(args):
 81 |     
 82 |     sess1 = onnxruntime.InferenceSession(f'{root_path}/FSA-Net_pytorch/pretrained/fsanet-1x1-iter-688590.onnx')
 83 |     sess2 = onnxruntime.InferenceSession(f'{root_path}/FSA-Net_pytorch/pretrained/fsanet-var-iter-688590.onnx')
 84 |     
 85 |     # sess1 + sess2  --> 2.37 MB
 86 |     
 87 |     
 88 |     with open(args.json_file, "r") as json_f:
 89 |         pd_results_list = json.load(json_f)
 90 | 
 91 |     pd_poses = []  # predicted pose collection
 92 |     gt_poses = []  # ground-truth pose collection
 93 |     taking_time_list = []  # how many ms per face
 94 | 
 95 |     for ind, pd_results in enumerate(tqdm(pd_results_list)):
 96 |         if args.debug and ind > 50: break  # for testing
 97 |         
 98 |         img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg")
 99 |         img_ori = cv2.imread(img_path)
100 |         
101 |         bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1]
102 |         bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]
103 | 
104 |         gt_pitch = pd_results['gt_pitch']
105 |         gt_yaw = pd_results['gt_yaw']
106 |         gt_roll = pd_results['gt_roll']
107 |         
108 |         
109 |         t1 = time.time()
110 |         [x1, y1, x2, y2] = [int(i) for i in bbox]
111 |         face_roi = img_ori[y1:y2+1,x1:x2+1]
112 |         
113 |         # preprocess headpose model input
114 |         face_roi = cv2.resize(face_roi,(64,64))
115 |         face_roi = face_roi.transpose((2,0,1))
116 |         face_roi = np.expand_dims(face_roi,axis=0)
117 |         face_roi = (face_roi-127.5)/128
118 |         face_roi = face_roi.astype(np.float32)
119 | 
120 |         # get headpose
121 |         res1 = sess1.run(["output"], {"input": face_roi})[0]
122 |         res2 = sess2.run(["output"], {"input": face_roi})[0]
123 | 
124 |         yaw, pitch, roll = np.mean(np.vstack((res1,res2)),axis=0)
125 |         t2 = time.time()
126 |         taking_time_list.append(t2-t1)
127 | 
128 |         pd_poses.append([pitch, yaw, roll])
129 |         gt_poses.append([gt_pitch, gt_yaw, gt_roll])
130 | 
131 |         if args.debug:
132 |             save_img_path = "./tmp/"+str(ind).zfill(0)+\
133 |                 "_p"+str(round(gt_pitch, 2))+"v"+str(round(pitch, 2))+\
134 |                 "_y"+str(round(gt_yaw, 2))+"v"+str(round(yaw, 2))+\
135 |                 "_r"+str(round(gt_roll, 2))+"v"+str(round(roll, 2))+".jpg"
136 | 
137 |             cv2.rectangle(img_ori, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,255,255), 2)
138 |             img_ori = draw_axis(img_ori, yaw, pitch, roll, 
139 |                 tdx=(bbox[0]+bbox[2])/2, tdy=(bbox[1]+bbox[3])/2, size=100)
140 |             cv2.imwrite(save_img_path, img_ori)
141 | 
142 |     '''print all results'''
143 |     print("Inference one image taking time:", sum(taking_time_list[1:])/len(taking_time_list[1:]))
144 |     
145 |     error_list = np.abs(np.array(gt_poses) - np.array(pd_poses))
146 |     error_list = np.min((error_list, 360 - error_list), axis=0)
147 |     pose_matrix = np.mean(error_list, axis=0)
148 |     MAE = np.mean(pose_matrix)
149 |     print("face number: %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(len(taking_time_list),
150 |         round(MAE, 4), round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4)))
151 |         
152 |             
153 | if __name__ == '__main__':
154 |     parser = argparse.ArgumentParser(description='FAN inference pipeline')
155 |     
156 |     parser.add_argument('--root-imgdir', default='',
157 |                         help='root path to multiple images')
158 |     parser.add_argument('--json-file', default='',
159 |                         help='json file path that contains multiple images and their head bboxes')
160 |     parser.add_argument('--debug',  action='store_true', help='whether set into debug mode')
161 |     
162 |     args = parser.parse_args()
163 |     main(args)


--------------------------------------------------------------------------------
/exps/compare_HopeNet.py:
--------------------------------------------------------------------------------
  1 | 
  2 | '''Too slow inference speed'''
  3 | 
  4 | __author__ = 'Huayi Zhou'
  5 | 
  6 | '''
  7 | 
  8 | git clone https://github.com/natanielruiz/deep-head-pose ./HopeNet
  9 | 
 10 | Put this file under the main folder of codes project HopeNet
 11 | https://github.com/natanielruiz/deep-head-pose
 12 | 
 13 | usage:
 14 | python compare_HopeNet.py --root-imgdir /path/to/root/imgdir \
 15 |     --json-file /path/to/prepared/json/file
 16 | 
 17 | 
 18 | e.g.:
 19 | python compare_HopeNet.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug
 20 | [results]
 21 | Inference one image taking time: 0.011602783960009378
 22 | face number: 3403; MAE: 19.9984, [pitch_error, yaw_error, roll_error]: 19.1262, 24.0867, 16.7823
 23 | 
 24 | 
 25 | python compare_HopeNet.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug
 26 | [results]
 27 | Inference one image taking time: 0.011030971019915537
 28 | face number: 15871; MAE: 17.0851, [pitch_error, yaw_error, roll_error]: 17.4948, 20.3525, 13.4079
 29 | 
 30 | '''
 31 | 
 32 | import os
 33 | import argparse
 34 | import time
 35 | import json
 36 | import cv2
 37 | 
 38 | import numpy as np
 39 | from tqdm import tqdm
 40 | from pathlib import Path
 41 | from math import cos, sin, pi
 42 | 
 43 | import torch
 44 | import torchvision
 45 | import torch.backends.cudnn as cudnn
 46 | from codes import hopenet, utils
 47 | from torchvision import transforms
 48 | from PIL import Image
 49 | from torch.autograd import Variable
 50 | 
 51 | 
 52 | os.environ["CUDA_VISIBLE_DEVICES"] = '3'
 53 | 
 54 | root_path = str(Path(__file__).absolute().parent.parent)
 55 | 
 56 | 
 57 | def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size = 100):
 58 |     # Referenced from HopeNet https://github.com/natanielruiz/deep-head-pose
 59 |     pitch = pitch * np.pi / 180
 60 |     yaw = -(yaw * np.pi / 180)
 61 |     roll = roll * np.pi / 180
 62 | 
 63 |     if tdx != None and tdy != None:
 64 |         tdx = tdx
 65 |         tdy = tdy
 66 |     else:
 67 |         height, width = img.shape[:2]
 68 |         tdx = width / 2
 69 |         tdy = height / 2
 70 | 
 71 |     # X-Axis pointing to right. drawn in red
 72 |     x1 = size * (cos(yaw) * cos(roll)) + tdx
 73 |     y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
 74 | 
 75 |     # Y-Axis | drawn in green
 76 |     #        v
 77 |     x2 = size * (-cos(yaw) * sin(roll)) + tdx
 78 |     y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
 79 | 
 80 |     # Z-Axis (out of the screen) drawn in blue
 81 |     x3 = size * (sin(yaw)) + tdx
 82 |     y3 = size * (-cos(yaw) * sin(pitch)) + tdy
 83 | 
 84 |     cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),2)
 85 |     cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),2)
 86 |     cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2)
 87 |     return img
 88 | 
 89 | def main(args):
 90 | 
 91 |     cudnn.enabled = True
 92 |     snapshot_path = "./hopenet_robust_alpha1.pkl"  # 91.4 MB
 93 | 
 94 |     # ResNet50 structure
 95 |     model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66)
 96 | 
 97 |     # Load snapshot
 98 |     saved_state_dict = torch.load(snapshot_path)
 99 |     model.load_state_dict(saved_state_dict)
100 |     model.cuda()
101 |     model.eval() 
102 |     
103 |     transformations = transforms.Compose([transforms.Scale(224),
104 |     transforms.CenterCrop(224), transforms.ToTensor(),
105 |     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
106 | 
107 |     idx_tensor = [idx for idx in range(66)]
108 |     idx_tensor = torch.FloatTensor(idx_tensor).cuda()
109 |     
110 |     
111 |     
112 |     with open(args.json_file, "r") as json_f:
113 |         pd_results_list = json.load(json_f)
114 | 
115 |     pd_poses = []  # predicted pose collection
116 |     gt_poses = []  # ground-truth pose collection
117 |     taking_time_list = []  # how many ms per face
118 | 
119 |     for ind, pd_results in enumerate(tqdm(pd_results_list)):
120 |         if args.debug and ind > 50: break  # for testing
121 |         
122 |         img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg")
123 |         img_ori = cv2.imread(img_path)
124 |         
125 |         bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1]
126 |         bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]
127 | 
128 |         gt_pitch = pd_results['gt_pitch']
129 |         gt_yaw = pd_results['gt_yaw']
130 |         gt_roll = pd_results['gt_roll']
131 |         
132 |         
133 |         t1 = time.time()
134 |         [x1, y1, x2, y2] = [int(i) for i in bbox]
135 |         face_roi = img_ori[y1:y2+1,x1:x2+1]
136 |         
137 |         # preprocess headpose model input
138 |         face_roi = Image.fromarray(cv2.cvtColor(face_roi, cv2.COLOR_BGR2RGB))  # opencv --> PIL
139 |         face_roi = transformations(face_roi)
140 |         face_roi = face_roi.unsqueeze(0)
141 |         
142 |         face_roi = Variable(face_roi).cuda()
143 |         
144 |         # get headpose
145 |         yaw_predicted, pitch_predicted, roll_predicted = model(face_roi)
146 |         
147 |         # Continuous predictions
148 |         yaw_predicted = utils.softmax_temperature(yaw_predicted.data, 1)
149 |         pitch_predicted = utils.softmax_temperature(pitch_predicted.data, 1)
150 |         roll_predicted = utils.softmax_temperature(roll_predicted.data, 1)
151 | 
152 |         yaw_predicted = torch.sum(yaw_predicted * idx_tensor, 1).cpu() * 3 - 99
153 |         pitch_predicted = torch.sum(pitch_predicted * idx_tensor, 1).cpu() * 3 - 99
154 |         roll_predicted = torch.sum(roll_predicted * idx_tensor, 1).cpu() * 3 - 99
155 |         
156 |         yaw = yaw_predicted[0].cpu().numpy()
157 |         pitch = pitch_predicted[0].cpu().numpy()
158 |         roll = roll_predicted[0].cpu().numpy()
159 |         
160 |         t2 = time.time()
161 |         taking_time_list.append(t2-t1)
162 | 
163 |         pd_poses.append([pitch, yaw, roll])
164 |         gt_poses.append([gt_pitch, gt_yaw, gt_roll])
165 | 
166 |         if args.debug:
167 |             save_img_path = "./tmp/"+str(ind).zfill(0)+\
168 |                 "_p"+str(round(gt_pitch, 2))+"v"+str(np.round(pitch, 2))+\
169 |                 "_y"+str(round(gt_yaw, 2))+"v"+str(np.round(yaw, 2))+\
170 |                 "_r"+str(round(gt_roll, 2))+"v"+str(np.round(roll, 2))+".jpg"
171 | 
172 |             cv2.rectangle(img_ori, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,255,255), 2)
173 |             img_ori = draw_axis(img_ori, yaw, pitch, roll, 
174 |                 tdx=(bbox[0]+bbox[2])/2, tdy=(bbox[1]+bbox[3])/2, size=100)
175 |             cv2.imwrite(save_img_path, img_ori)
176 | 
177 |     '''print all results'''
178 |     print("Inference one image taking time:", sum(taking_time_list[1:])/len(taking_time_list[1:]))
179 |     
180 |     error_list = np.abs(np.array(gt_poses) - np.array(pd_poses))
181 |     error_list = np.min((error_list, 360 - error_list), axis=0)
182 |     pose_matrix = np.mean(error_list, axis=0)
183 |     MAE = np.mean(pose_matrix)
184 |     print("face number: %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(len(taking_time_list),
185 |         round(MAE, 4), round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4)))
186 |         
187 |             
188 | if __name__ == '__main__':
189 |     parser = argparse.ArgumentParser(description='FAN inference pipeline')
190 |     
191 |     parser.add_argument('--root-imgdir', default='',
192 |                         help='root path to multiple images')
193 |     parser.add_argument('--json-file', default='',
194 |                         help='json file path that contains multiple images and their head bboxes')
195 |     parser.add_argument('--debug',  action='store_true', help='whether set into debug mode')
196 |     
197 |     args = parser.parse_args()
198 |     main(args)


--------------------------------------------------------------------------------
/exps/compare_SynergyNet.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | __author__ = 'Huayi Zhou'
  4 | 
  5 | '''
  6 | 
  7 | 
  8 | Put this file under the main folder of codes project SynergyNet
  9 | git clone https://github.com/choyingw/SynergyNet SynergyNet
 10 | https://drive.google.com/file/d/1SQsMhvAmpD1O8Hm0yEGom0C0rXtA0qs8/view [3dmm_data] data link
 11 | https://drive.google.com/file/d/1BVHbiLTfX6iTeJcNbh-jgHjWDoemfrzG/view [pretrained weight] data link
 12 | 
 13 | usage:
 14 | python compare_SynergyNet.py --root-imgdir /path/to/root/imgdir \
 15 |     --json-file /path/to/prepared/json/file
 16 | 
 17 | e.g.:
 18 | python compare_SynergyNet.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug
 19 | [results][2023-01-15]
 20 | Inference one image taking time: 0.007218158028596455
 21 | face number: 3415; MAE: 42.212, [pitch_error, yaw_error, roll_error]: 35.5837, 39.5468, 51.5054
 22 | 
 23 | python compare_SynergyNet.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug
 24 | [results][2023-01-15]
 25 | Inference one image taking time: 0.006917836413339054
 26 | face number: 15885; MAE: 24.6768, [pitch_error, yaw_error, roll_error]: 23.518, 27.5607, 22.9518
 27 | '''
 28 | 
 29 | import os
 30 | import argparse
 31 | import time
 32 | import json
 33 | import cv2
 34 | 
 35 | import numpy as np
 36 | from tqdm import tqdm
 37 | from pathlib import Path
 38 | from math import cos, sin, pi
 39 | 
 40 | import types
 41 | import torch
 42 | import torchvision.transforms as transforms
 43 | from utils.ddfa import ToTensor, Normalize
 44 | from utils.inference import predict_pose, predict_sparseVert, predict_denseVert, crop_img
 45 | from model_building import SynergyNet
 46 | 
 47 | 
 48 | os.environ["CUDA_VISIBLE_DEVICES"] = '3'
 49 | 
 50 | root_path = str(Path(__file__).absolute().parent.parent)
 51 | 
 52 | 
 53 | def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size = 100):
 54 |     # Referenced from HopeNet https://github.com/natanielruiz/deep-head-pose
 55 |     pitch = pitch * np.pi / 180
 56 |     yaw = -(yaw * np.pi / 180)
 57 |     roll = roll * np.pi / 180
 58 | 
 59 |     if tdx != None and tdy != None:
 60 |         tdx = tdx
 61 |         tdy = tdy
 62 |     else:
 63 |         height, width = img.shape[:2]
 64 |         tdx = width / 2
 65 |         tdy = height / 2
 66 | 
 67 |     # X-Axis pointing to right. drawn in red
 68 |     x1 = size * (cos(yaw) * cos(roll)) + tdx
 69 |     y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
 70 | 
 71 |     # Y-Axis | drawn in green
 72 |     #        v
 73 |     x2 = size * (-cos(yaw) * sin(roll)) + tdx
 74 |     y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
 75 | 
 76 |     # Z-Axis (out of the screen) drawn in blue
 77 |     x3 = size * (sin(yaw)) + tdx
 78 |     y3 = size * (-cos(yaw) * sin(pitch)) + tdy
 79 | 
 80 |     cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),2)
 81 |     cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),2)
 82 |     cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2)
 83 |     return img
 84 | 
 85 | def main(args):
 86 | 
 87 |     IMG_SIZE = 120  # Following 3DDFA-V2, we also use 120x120 resolution
 88 |     transform = transforms.Compose([ToTensor(), Normalize(mean=127.5, std=128)])
 89 | 
 90 |     # load pre-tained model
 91 |     checkpoint_fp = 'pretrained/best.pth.tar' 
 92 |     checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
 93 | 
 94 |     args_SynergyNet = types.SimpleNamespace()
 95 |     args_SynergyNet.arch = 'mobilenet_v2'
 96 |     args_SynergyNet.img_size = 120
 97 |     args_SynergyNet.devices_id = [0]
 98 | 
 99 |     model = SynergyNet(args_SynergyNet)
100 |     model_dict = model.state_dict()
101 | 
102 |     # because the model is trained by multiple gpus, prefix 'module' should be removed
103 |     for k in checkpoint.keys():
104 |         model_dict[k.replace('module.', '')] = checkpoint[k]
105 | 
106 |     model.load_state_dict(model_dict, strict=False)
107 |     model = model.cuda()
108 |     model.eval()
109 |     
110 | 
111 |     with open(args.json_file, "r") as json_f:
112 |         pd_results_list = json.load(json_f)
113 | 
114 |     pd_poses = []  # predicted pose collection
115 |     gt_poses = []  # ground-truth pose collection
116 |     taking_time_list = []  # how many ms per face
117 | 
118 |     for ind, pd_results in enumerate(tqdm(pd_results_list)):
119 |         if args.debug and ind > 50: break  # for testing
120 |         
121 |         img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg")
122 |         img_ori = cv2.imread(img_path)
123 |         
124 |         # bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1]
125 |         bbox = pd_results['gt_bbox']
126 |         bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]
127 | 
128 |         gt_pitch = pd_results['gt_pitch']
129 |         gt_yaw = pd_results['gt_yaw']
130 |         gt_roll = pd_results['gt_roll']
131 |         
132 |         
133 |         t1 = time.time()
134 |         # [x1, y1, x2, y2] = [int(i) for i in bbox]
135 |         # face_roi = img_ori[y1:y2+1,x1:x2+1]
136 | 
137 |         HCenter = (bbox[1] + bbox[3])/2
138 |         WCenter = (bbox[0] + bbox[2])/2
139 |         side_len = bbox[3]-bbox[1]
140 |         margin = side_len * 0.75 // 2  # a larger bbox will result a worse MAE
141 |         bbox[0], bbox[1], bbox[2], bbox[3] = WCenter-margin, HCenter-margin, WCenter+margin, HCenter+margin
142 |         face_roi = crop_img(img_ori, bbox)
143 |         
144 |         img = cv2.resize(face_roi, dsize=(IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_LINEAR)
145 |         input = transform(img).unsqueeze(0)
146 |         with torch.no_grad():
147 |             input = input.cuda()
148 |             param = model.forward_test(input)
149 |             param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
150 |         # inferences
151 |         # lmks = predict_sparseVert(param, bbox, transform=True)
152 |         # vertices = predict_denseVert(param, bbox, transform=True)
153 |         angles, translation = predict_pose(param, bbox)
154 |         yaw, pitch, roll = angles[0], angles[1], angles[2]
155 |         
156 |         t2 = time.time()
157 |         taking_time_list.append(t2-t1)
158 | 
159 |         pd_poses.append([pitch, yaw, roll])
160 |         gt_poses.append([gt_pitch, gt_yaw, gt_roll])
161 | 
162 |         if args.debug:
163 |             save_img_path = "./tmp/"+str(ind).zfill(0)+\
164 |                 "_p"+str(round(gt_pitch, 2))+"v"+str(round(pitch, 2))+\
165 |                 "_y"+str(round(gt_yaw, 2))+"v"+str(round(yaw, 2))+\
166 |                 "_r"+str(round(gt_roll, 2))+"v"+str(round(roll, 2))+".jpg"
167 | 
168 |             cv2.rectangle(img_ori, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,255,255), 2)
169 |             img_ori = draw_axis(img_ori, yaw, pitch, roll, 
170 |                 tdx=(bbox[0]+bbox[2])/2, tdy=(bbox[1]+bbox[3])/2, size=100)
171 |             cv2.imwrite(save_img_path, img_ori)
172 | 
173 |     '''print all results'''
174 |     print("Inference one image taking time:", sum(taking_time_list[1:])/len(taking_time_list[1:]))
175 |     
176 |     error_list = np.abs(np.array(gt_poses) - np.array(pd_poses))
177 |     error_list = np.min((error_list, 360 - error_list), axis=0)
178 |     pose_matrix = np.mean(error_list, axis=0)
179 |     MAE = np.mean(pose_matrix)
180 |     print("face number: %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(len(taking_time_list),
181 |         round(MAE, 4), round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4)))
182 |         
183 |             
184 | if __name__ == '__main__':
185 |     parser = argparse.ArgumentParser(description='FAN inference pipeline')
186 |     
187 |     parser.add_argument('--root-imgdir', default='',
188 |                         help='root path to multiple images')
189 |     parser.add_argument('--json-file', default='',
190 |                         help='json file path that contains multiple images and their head bboxes')
191 |     parser.add_argument('--debug',  action='store_true', help='whether set into debug mode')
192 |     
193 |     args = parser.parse_args()
194 |     main(args)


--------------------------------------------------------------------------------
/exps/compare_dad3dnet.py:
--------------------------------------------------------------------------------
  1 | 
  2 | '''Too slow inference speed'''
  3 | 
  4 | __author__ = 'Huayi Zhou'
  5 | 
  6 | '''
  7 | 
  8 | usage:
  9 | python compare_dad3dnet.py --root-imgdir /path/to/root/imgdir \
 10 |     --json-file /path/to/prepared/json/file
 11 | 
 12 | e.g.:
 13 | python compare_dad3dnet.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_full.json --debug
 14 | [results]
 15 | Inference one image taking time: 0.018059632885267938
 16 | frontal face number: 3741; MAE_frontal: 35.1075, [pitch_error, yaw_error, roll_error]: 41.2611, 22.4362, 41.6252
 17 | face number: 7414; MAE: 80.1786, [pitch_error, yaw_error, roll_error]: 85.2124, 68.098, 87.2253
 18 | [results][2023-01-14]
 19 | Inference one image taking time: 0.01842204154443272
 20 | frontal face number: 3413; MAE_frontal: 32.6388, [pitch_error, yaw_error, roll_error]: 38.889, 19.987, 39.0404
 21 | face number: 6715; MAE: 80.2083, [pitch_error, yaw_error, roll_error]: 86.3455, 65.9651, 88.3143
 22 | 
 23 | python compare_dad3dnet.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_full.json --debug
 24 | [results]
 25 | Inference one image taking time: 0.02023921753516816
 26 | frontal face number: 16396; MAE_frontal: 21.7698, [pitch_error, yaw_error, roll_error]: 26.1876, 11.4288, 27.6928
 27 | face number: 32604; MAE: 80.5461, [pitch_error, yaw_error, roll_error]: 88.7443, 58.7891, 94.1048
 28 | [results][2023-01-14]
 29 | Inference one image taking time: 0.018940799204607884
 30 | frontal face number: 15886; MAE_frontal: 18.9887, [pitch_error, yaw_error, roll_error]: 22.4626, 10.58, 23.9235
 31 | face number: 31976; MAE: 79.7676, [pitch_error, yaw_error, roll_error]: 87.6178, 58.6636, 93.0214
 32 | 
 33 | '''
 34 | 
 35 | import os
 36 | import argparse
 37 | import time
 38 | import json
 39 | import cv2
 40 | 
 41 | import numpy as np
 42 | from tqdm import tqdm
 43 | from pathlib import Path
 44 | from math import cos, sin, pi
 45 | 
 46 | from model_training.model.flame import calculate_rpy, FlameParams, FLAME_CONSTS
 47 | from pytorch_toolbelt.utils import read_rgb_image
 48 | from predictor import FaceMeshPredictor
 49 | predictor_dad3dnet = FaceMeshPredictor.dad_3dnet()
 50 | 
 51 | 
 52 | np.set_printoptions(suppress=True)
 53 | 
 54 | os.environ["CUDA_VISIBLE_DEVICES"] = '3'
 55 | 
 56 | root_path = str(Path(__file__).absolute().parent.parent)
 57 | 
 58 | 
 59 | def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size = 100):
 60 |     # Referenced from HopeNet https://github.com/natanielruiz/deep-head-pose
 61 |     pitch = pitch * np.pi / 180
 62 |     yaw = -(yaw * np.pi / 180)
 63 |     roll = roll * np.pi / 180
 64 | 
 65 |     if tdx != None and tdy != None:
 66 |         tdx = tdx
 67 |         tdy = tdy
 68 |     else:
 69 |         height, width = img.shape[:2]
 70 |         tdx = width / 2
 71 |         tdy = height / 2
 72 | 
 73 |     # X-Axis pointing to right. drawn in red
 74 |     x1 = size * (cos(yaw) * cos(roll)) + tdx
 75 |     y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
 76 | 
 77 |     # Y-Axis | drawn in green
 78 |     #        v
 79 |     x2 = size * (-cos(yaw) * sin(roll)) + tdx
 80 |     y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
 81 | 
 82 |     # Z-Axis (out of the screen) drawn in blue
 83 |     x3 = size * (sin(yaw)) + tdx
 84 |     y3 = size * (-cos(yaw) * sin(pitch)) + tdy
 85 | 
 86 |     cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),2)
 87 |     cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),2)
 88 |     cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2)
 89 |     return img
 90 | 
 91 | 
 92 | def main(args):
 93 |  
 94 |     # with open(args.json_file, "r") as json_f:
 95 |         # gt_results_dict = json.load(json_f)
 96 |         
 97 |         
 98 |     with open(args.json_file, "r") as json_f:
 99 |         pd_results_list = json.load(json_f)
100 |         
101 |     pd_poses = []  # predicted pose collection
102 |     gt_poses = []  # ground-truth pose collection
103 |     pd_poses_frontal = []  # predicted pose collection of frontal face
104 |     gt_poses_frontal = []  # ground-truth pose collection of frontal face
105 |     taking_time_list = []  # how many ms per face
106 |     
107 |     for ind, pd_results in enumerate(tqdm(pd_results_list)):
108 |         if args.debug and ind > 50: break  # for testing
109 |         
110 |         img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg")
111 |         img_ori = cv2.imread(img_path)
112 |         
113 |         # bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1]
114 |         bbox = pd_results['gt_bbox']
115 |         bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]
116 | 
117 |         gt_pitch = pd_results['gt_pitch']
118 |         gt_yaw = pd_results['gt_yaw']
119 |         gt_roll = pd_results['gt_roll']
120 |         
121 |         t1 = time.time()
122 |         [x1, y1, x2, y2] = [int(i) for i in bbox]
123 |         face_roi = img_ori[y1:y2+1,x1:x2+1]
124 |     
125 |         cropped_img_path = "./temp_cropped_img.jpg"
126 |         cv2.imwrite(cropped_img_path, face_roi)
127 |         image = read_rgb_image(cropped_img_path)
128 |         predictions = predictor_dad3dnet(image)
129 |         params_3dmm = predictions["3dmm_params"].float()
130 |         flame_params = FlameParams.from_3dmm(params_3dmm, FLAME_CONSTS)
131 |         rpy = calculate_rpy(flame_params)
132 |         yaw, pitch, roll = rpy.yaw, rpy.pitch, rpy.roll
133 |         
134 |         t2 = time.time()
135 |         taking_time_list.append(t2-t1)
136 | 
137 |         pd_poses.append([pitch, yaw, roll])
138 |         gt_poses.append([gt_pitch, gt_yaw, gt_roll])
139 |         
140 |         if abs(gt_yaw) < 90:
141 |             pd_poses_frontal.append([pitch, yaw, roll])
142 |             gt_poses_frontal.append([gt_pitch, gt_yaw, gt_roll])
143 | 
144 |         if args.debug:
145 |             save_img_path = "./tmp/"+str(ind).zfill(2)+"#"+str(id).zfill(2)+\
146 |                 "_p"+str(round(gt_pitch, 2))+"#"+str(np.round(pitch, 2))+\
147 |                 "_y"+str(round(gt_yaw, 2))+"#"+str(np.round(yaw, 2))+\
148 |                 "_r"+str(round(gt_roll, 2))+"#"+str(np.round(roll, 2))+".jpg"
149 | 
150 |             img_ori_copy = cv2.rectangle(img_ori.copy(), (int(bbox[0]), int(bbox[1])), 
151 |                 (int(bbox[2]), int(bbox[3])), (255,255,255), 2)
152 |             img_ori_copy = draw_axis(img_ori_copy, yaw, pitch, roll, 
153 |                 tdx=(bbox[0]+bbox[2])/2, tdy=(bbox[1]+bbox[3])/2, size=100)
154 |             cv2.imwrite(save_img_path, img_ori_copy)
155 | 
156 |         ind += 1
157 |     os.remove(cropped_img_path)
158 |     
159 |     
160 |     '''print all results'''
161 |     print("Inference one image taking time:", sum(taking_time_list[1:])/len(taking_time_list[1:]))
162 | 
163 |     error_list_frontal = np.abs(np.array(pd_poses_frontal) - np.array(gt_poses_frontal))
164 |     error_list_frontal = np.min((error_list_frontal, 360 - error_list_frontal), axis=0)
165 |     pose_matrix_frontal = np.mean(error_list_frontal, axis=0)
166 |     MAE_frontal = np.mean(pose_matrix_frontal)
167 |     print("frontal face number: %d; MAE_frontal: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(
168 |         len(error_list_frontal), round(MAE_frontal, 4), round(pose_matrix_frontal[0], 4),
169 |         round(pose_matrix_frontal[1], 4), round(pose_matrix_frontal[2], 4)))
170 | 
171 |     error_list = np.abs(np.array(gt_poses) - np.array(pd_poses))
172 |     error_list = np.min((error_list, 360 - error_list), axis=0)
173 |     pose_matrix = np.mean(error_list, axis=0)
174 |     MAE = np.mean(pose_matrix)
175 |     print("face number: %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(len(taking_time_list),
176 |         round(MAE, 4), round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4)))
177 |         
178 |             
179 | if __name__ == '__main__':
180 |     parser = argparse.ArgumentParser(description='FAN inference pipeline')
181 |     
182 |     parser.add_argument('--root-imgdir', default='',
183 |                         help='root path to multiple images')
184 |     parser.add_argument('--json-file', default='',
185 |                         help='json file path that contains multiple images and their head bboxes')
186 |     parser.add_argument('--debug',  action='store_true', help='whether set into debug mode')
187 |     
188 |     args = parser.parse_args()
189 |     main(args)


--------------------------------------------------------------------------------
/exps/compare_img2pose.py:
--------------------------------------------------------------------------------
  1 | 
  2 | '''Too slow inference speed'''
  3 | 
  4 | __author__ = 'Huayi Zhou'
  5 | 
  6 | '''
  7 | 
  8 | git clone https://github.com/vitoralbiero/img2pose ./img2pose
  9 | 
 10 | Put this file under the main folder of codes project img2pose
 11 | 
 12 | usage:
 13 | python compare_img2pose.py --root-imgdir /path/to/root/imgdir \
 14 |     --json-file /path/to/prepared/json/file
 15 | 
 16 | 
 17 | e.g.:
 18 | python compare_img2pose.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug
 19 | [results]
 20 | Inference one image taking time: 0.019194672508227584
 21 | face number: 3138; MAE: 19.9507, [pitch_error, yaw_error, roll_error]: 22.1878, 17.2238, 20.4407
 22 | 
 23 | 
 24 | python compare_img2pose.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug
 25 | [results]
 26 | Inference one image taking time: 0.019501390085946375
 27 | face number: 15724; MAE: 15.0667, [pitch_error, yaw_error, roll_error]: 16.6038, 13.0171, 15.5792
 28 | 
 29 | '''
 30 | 
 31 | import os
 32 | import argparse
 33 | import time
 34 | import json
 35 | import cv2
 36 | 
 37 | import numpy as np
 38 | from tqdm import tqdm
 39 | from pathlib import Path
 40 | from math import cos, sin, pi
 41 | 
 42 | 
 43 | import torch
 44 | from torchvision import transforms
 45 | from PIL import Image
 46 | from scipy.spatial.transform import Rotation
 47 | from img2pose import img2poseModel
 48 | from model_loader import load_model
 49 | 
 50 | 
 51 | np.set_printoptions(suppress=True)
 52 | 
 53 | os.environ["CUDA_VISIBLE_DEVICES"] = '3'
 54 | 
 55 | root_path = str(Path(__file__).absolute().parent.parent)
 56 | 
 57 | 
 58 | def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size = 100):
 59 |     # Referenced from HopeNet https://github.com/natanielruiz/deep-head-pose
 60 |     pitch = pitch * np.pi / 180
 61 |     yaw = -(yaw * np.pi / 180)
 62 |     roll = roll * np.pi / 180
 63 | 
 64 |     if tdx != None and tdy != None:
 65 |         tdx = tdx
 66 |         tdy = tdy
 67 |     else:
 68 |         height, width = img.shape[:2]
 69 |         tdx = width / 2
 70 |         tdy = height / 2
 71 | 
 72 |     # X-Axis pointing to right. drawn in red
 73 |     x1 = size * (cos(yaw) * cos(roll)) + tdx
 74 |     y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
 75 | 
 76 |     # Y-Axis | drawn in green
 77 |     #        v
 78 |     x2 = size * (-cos(yaw) * sin(roll)) + tdx
 79 |     y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
 80 | 
 81 |     # Z-Axis (out of the screen) drawn in blue
 82 |     x3 = size * (sin(yaw)) + tdx
 83 |     y3 = size * (-cos(yaw) * sin(pitch)) + tdy
 84 | 
 85 |     cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),2)
 86 |     cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),2)
 87 |     cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2)
 88 |     return img
 89 | 
 90 | 
 91 | def convert_to_aflw(rotvec, is_rotvec=True):
 92 |     if is_rotvec:
 93 |         rotvec = Rotation.from_rotvec(rotvec).as_matrix()
 94 |     rot_mat_2 = np.transpose(rotvec)
 95 |     angle = Rotation.from_matrix(rot_mat_2).as_euler('xyz', degrees=True)
 96 | 
 97 |     return np.array([angle[0], -angle[1], -angle[2]])  # Pitch, Yaw, Roll
 98 | 
 99 |     
100 | def main(args):
101 |  
102 |     transform = transforms.Compose([transforms.ToTensor()])
103 | 
104 |     DEPTH = 18
105 |     MAX_SIZE = 1400
106 |     MIN_SIZE = 400
107 | 
108 |     POSE_MEAN = "./models/WIDER_train_pose_mean_v1.npy"
109 |     POSE_STDDEV = "./models/WIDER_train_pose_stddev_v1.npy"
110 |     # MODEL_PATH = "./models/img2pose_v1_ft_300w_lp.pth"
111 |     MODEL_PATH = "./models/img2pose_v1.pth"  # 161 MB
112 | 
113 |     threed_points = np.load('./pose_references/reference_3d_68_points_trans.npy')
114 | 
115 |     pose_mean = np.load(POSE_MEAN)
116 |     pose_stddev = np.load(POSE_STDDEV)
117 | 
118 |     img2pose_model = img2poseModel(
119 |         DEPTH, MIN_SIZE, MAX_SIZE, 
120 |         pose_mean=pose_mean, pose_stddev=pose_stddev,
121 |         threed_68_points=threed_points,
122 |         rpn_pre_nms_top_n_test=500,
123 |         rpn_post_nms_top_n_test=10,
124 |     )
125 |     load_model(img2pose_model.fpn_model, MODEL_PATH, cpu_mode=str(img2pose_model.device) == "cpu", model_only=True)
126 |     img2pose_model.evaluate()
127 |     
128 |     
129 |     total_failures = 0
130 |     
131 |     
132 |     with open(args.json_file, "r") as json_f:
133 |         pd_results_list = json.load(json_f)
134 | 
135 |     pd_poses = []  # predicted pose collection
136 |     gt_poses = []  # ground-truth pose collection
137 |     taking_time_list = []  # how many ms per face
138 | 
139 |     for ind, pd_results in enumerate(tqdm(pd_results_list)):
140 |         if args.debug and ind > 50: break  # for testing
141 |         
142 |         img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg")
143 |         img_ori = cv2.imread(img_path)
144 |         
145 |         bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1]
146 |         bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]
147 | 
148 |         gt_pitch = pd_results['gt_pitch']
149 |         gt_yaw = pd_results['gt_yaw']
150 |         gt_roll = pd_results['gt_roll']
151 |         
152 |         
153 |         t1 = time.time()
154 |         [x1, y1, x2, y2] = [int(i) for i in bbox]
155 |         face_roi = img_ori[y1:y2+1,x1:x2+1]
156 |         
157 |         # preprocess headpose model input
158 |         face_roi = Image.fromarray(cv2.cvtColor(face_roi, cv2.COLOR_BGR2RGB))  # opencv --> PIL
159 | 
160 |         # get headpose
161 |         res = img2pose_model.predict([transform(face_roi)])
162 |         
163 |         res = res[0]
164 |         bboxes = res["boxes"].cpu().numpy().astype('float')
165 | 
166 |         if len(bboxes) == 0:
167 |             total_failures += 1
168 |             continue
169 |             
170 |         max_score = 0
171 |         best_index = -1
172 |         for i in range(len(bboxes)):
173 |             score = res["scores"][i]
174 |             if score > max_score:
175 |                 max_score = score
176 |                 best_index = i  
177 |          
178 |         pose_pred = res["dofs"].cpu().numpy()[best_index].astype('float')
179 |         pose_pred = np.asarray(pose_pred.squeeze())
180 |         pose_pred[:3] = convert_to_aflw(pose_pred[:3])
181 |         
182 |         [pitch, yaw, roll] = pose_pred[:3]
183 |         
184 |         t2 = time.time()
185 |         taking_time_list.append(t2-t1)
186 | 
187 |         pd_poses.append([pitch, yaw, roll])
188 |         gt_poses.append([gt_pitch, gt_yaw, gt_roll])
189 | 
190 |         if args.debug:
191 |             save_img_path = "./tmp/"+str(ind).zfill(0)+\
192 |                 "_p"+str(round(gt_pitch, 2))+"v"+str(np.round(pitch, 2))+\
193 |                 "_y"+str(round(gt_yaw, 2))+"v"+str(np.round(yaw, 2))+\
194 |                 "_r"+str(round(gt_roll, 2))+"v"+str(np.round(roll, 2))+".jpg"
195 | 
196 |             cv2.rectangle(img_ori, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,255,255), 2)
197 |             img_ori = draw_axis(img_ori, yaw, pitch, roll, 
198 |                 tdx=(bbox[0]+bbox[2])/2, tdy=(bbox[1]+bbox[3])/2, size=100)
199 |             cv2.imwrite(save_img_path, img_ori)
200 | 
201 |     '''print all results'''
202 |     print("Inference one image taking time:", sum(taking_time_list[1:])/len(taking_time_list[1:]))
203 |     
204 |     error_list = np.abs(np.array(gt_poses) - np.array(pd_poses))
205 |     error_list = np.min((error_list, 360 - error_list), axis=0)
206 |     pose_matrix = np.mean(error_list, axis=0)
207 |     MAE = np.mean(pose_matrix)
208 |     print("face number: %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(len(taking_time_list),
209 |         round(MAE, 4), round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4)))
210 |         
211 |             
212 | if __name__ == '__main__':
213 |     parser = argparse.ArgumentParser(description='FAN inference pipeline')
214 |     
215 |     parser.add_argument('--root-imgdir', default='',
216 |                         help='root path to multiple images')
217 |     parser.add_argument('--json-file', default='',
218 |                         help='json file path that contains multiple images and their head bboxes')
219 |     parser.add_argument('--debug',  action='store_true', help='whether set into debug mode')
220 |     
221 |     args = parser.parse_args()
222 |     main(args)


--------------------------------------------------------------------------------
/exps/convert_coco_style_300wlp_aflw2000.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import copy
 4 | import shutil
 5 | from tqdm import tqdm
 6 | 
 7 | coco_dict_template = {
 8 |     'info': {
 9 |         'description': 'Face landmarks, Euler angles and 3D Cubes of 300W_LP & AFLW2000 Dataset',
10 |         'url': 'http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/main.htm',
11 |         'version': '1.0',
12 |         'year': 2022,
13 |         'contributor': 'Huayi Zhou',
14 |         'date_created': '2022/07/28',
15 |     },
16 |     'licences': [{
17 |         'url': 'http://creativecommons.org/licenses/by-nc/2.0',
18 |         'name': 'Attribution-NonCommercial License'
19 |     }],
20 |     'images': [],
21 |     'annotations': [],
22 |     'categories': [{
23 |         'supercategory': 'person',
24 |         'id': 1,
25 |         'name': 'person'
26 |     }]
27 | }
28 | 
29 | def convert_to_coco_style(source_img, target_img, source_json, target_json, coco_dict):
30 |     print(source_img, " --> ", target_img)
31 |     print(source_json, " --> ", target_json)
32 |     
33 |     if os.path.exists(target_img):
34 |         shutil.rmtree(target_img)
35 |     os.mkdir(target_img)
36 |     
37 |     json_img_dict = json.load(open(source_json, "r"))
38 |     index_id = 0
39 |     for img_name in tqdm(json_img_dict.keys()):
40 |         labels = json_img_dict[img_name]
41 |         
42 |         image_id = 1000000 + index_id  # 300W_LP has about 122217 images
43 |         temp_image = {'file_name': str(image_id)+".jpg", 
44 |             'height': labels['height'], 'width': labels['width'], 'id': image_id}
45 |         
46 |         source_img_path = os.path.join(source_img, img_name)
47 |         target_img_path = os.path.join(target_img, str(image_id)+".jpg")
48 |         shutil.copy(source_img_path, target_img_path)
49 |         
50 |         # bbox: [xmin, ymin, xmax, ymax] --> [xmin, ymin, w, h]
51 |         [xmin, ymin, xmax, ymax] = labels["bbox"]
52 |         labels["bbox"] = [xmin, ymin, xmax-xmin, ymax-ymin]
53 |         
54 |         # pose: [yaw, pitch, roll] --> [pitch, yaw, roll]
55 |         [yaw, pitch, roll] = labels["pose"]
56 |         labels["pose"] = [pitch, yaw, roll]
57 |         
58 |         labels_new = {
59 |             'face2d_pts': labels["landmarks"],
60 |             'bbox': labels["bbox"],
61 |             'euler_angles': labels["pose"], 
62 |             'cube': labels["cube"],
63 |             'image_id': image_id,
64 |             'id': image_id,  # only one head in each image
65 |             'category_id': 1,
66 |             'iscrowd': 0,
67 |             'segmentation': [],  # This script is not for segmentation
68 |             'area': round(labels["bbox"][-1] * labels["bbox"][-2], 4)
69 |         }
70 |         coco_dict['images'].append(temp_image)
71 |         coco_dict['annotations'].append(labels_new)
72 |         
73 |         index_id += 1
74 |         
75 |     with open(target_json, "w") as dst_ann_file:
76 |         json.dump(coco_dict, dst_ann_file)
77 |         
78 |     
79 | if __name__ == '__main__':
80 | 
81 |     train_image_file = "./HeadCube3D/images/300W_LP/"
82 |     train_image_file_coco = "./HeadCube3D/images/train/"
83 |     train_json_file = "./HeadCube3D/annotations/train_300W_LP.json"
84 |     train_json_file_coco = "./HeadCube3D/annotations/train_300W_LP_coco_style.json"
85 |     coco_dict_train = copy.deepcopy(coco_dict_template)
86 |     convert_to_coco_style(train_image_file, train_image_file_coco,
87 |         train_json_file, train_json_file_coco, coco_dict_train)
88 |     
89 |     
90 |     val_image_file = "./HeadCube3D/images/AFLW2000/"
91 |     val_image_file_coco = "./HeadCube3D/images/validation/"
92 |     val_json_file = "./HeadCube3D/annotations/val_AFLW2000.json"
93 |     val_json_file_coco = "./HeadCube3D/annotations/val_AFLW2000_coco_style.json"
94 |     coco_dict_val = copy.deepcopy(coco_dict_template)
95 |     convert_to_coco_style(val_image_file, val_image_file_coco,
96 |         val_json_file, val_json_file_coco, coco_dict_val)
97 |     


--------------------------------------------------------------------------------
/exps/convert_coco_style_300wlp_biwi.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import copy
  4 | import shutil
  5 | from tqdm import tqdm
  6 | 
  7 | coco_dict_template = {
  8 |     'info': {
  9 |         'description': 'Face landmarks, Euler angles and 3D Cubes of 300W_LP & AFLW2000 Dataset',
 10 |         'url': 'http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/main.htm',
 11 |         'version': '1.0',
 12 |         'year': 2022,
 13 |         'contributor': 'Huayi Zhou',
 14 |         'date_created': '2022/07/28',
 15 |     },
 16 |     'licences': [{
 17 |         'url': 'http://creativecommons.org/licenses/by-nc/2.0',
 18 |         'name': 'Attribution-NonCommercial License'
 19 |     }],
 20 |     'images': [],
 21 |     'annotations': [],
 22 |     'categories': [{
 23 |         'supercategory': 'person',
 24 |         'id': 1,
 25 |         'name': 'person'
 26 |     }]
 27 | }
 28 | 
 29 | def convert_to_coco_style(source_img, target_img, source_json, target_json, coco_dict):
 30 |     print(source_img, " --> ", target_img)
 31 |     print(source_json, " --> ", target_json)
 32 | 
 33 |     if os.path.exists(target_img):
 34 |         shutil.rmtree(target_img)
 35 |     os.mkdir(target_img)
 36 | 
 37 |     json_img_dict = json.load(open(source_json, "r"))
 38 |     index_id = 0
 39 |     for img_name in tqdm(json_img_dict.keys()):
 40 |         labels = json_img_dict[img_name]
 41 |         
 42 |         image_id = 1000000 + index_id  # 300W_LP has about 122217 images
 43 |         temp_image = {'file_name': str(image_id)+".jpg", 
 44 |             'height': labels['height'], 'width': labels['width'], 'id': image_id}
 45 |         
 46 |         source_img_path = os.path.join(source_img, img_name)
 47 |         target_img_path = os.path.join(target_img, str(image_id)+".jpg")
 48 |         # shutil.copy(source_img_path, target_img_path)
 49 |         os.system('ln -s %s %s'%(source_img_path, target_img_path))
 50 |         
 51 |         # bbox: [xmin, ymin, xmax, ymax] --> [xmin, ymin, w, h]
 52 |         [xmin, ymin, xmax, ymax] = labels["bbox"]
 53 |         labels["bbox"] = [xmin, ymin, xmax-xmin, ymax-ymin]
 54 |         
 55 |         # pose: [yaw, pitch, roll] --> [pitch, yaw, roll]
 56 |         [yaw, pitch, roll] = labels["pose"]
 57 |         labels["pose"] = [pitch, yaw, roll]
 58 |         
 59 |         labels_new = {
 60 |             'face2d_pts': labels["landmarks"],
 61 |             'bbox': labels["bbox"],
 62 |             'euler_angles': labels["pose"], 
 63 |             'cube': labels["cube"],
 64 |             'image_id': image_id,
 65 |             'id': image_id,  # only one head in each image
 66 |             'category_id': 1,
 67 |             'iscrowd': 0,
 68 |             'segmentation': [],  # This script is not for segmentation
 69 |             'area': round(labels["bbox"][-1] * labels["bbox"][-2], 4)
 70 |         }
 71 |         coco_dict['images'].append(temp_image)
 72 |         coco_dict['annotations'].append(labels_new)
 73 |         
 74 |         index_id += 1
 75 |         
 76 |     with open(target_json, "w") as dst_ann_file:
 77 |         json.dump(coco_dict, dst_ann_file)
 78 |         
 79 |     
 80 | if __name__ == '__main__':
 81 | 
 82 |     # train_image_file = "/datasdc/zhouhuayi/dataset/headpose/HeadCube3D/images/300W_LP/"
 83 |     # train_image_file_coco = "./HeadCube3D/images/train/"
 84 |     # if os.path.exists(train_image_file_coco):
 85 |         # shutil.rmtree(train_image_file_coco)
 86 |     # os.mkdir(train_image_file_coco)
 87 |     # train_json_file = "./HeadCube3D/annotations/train_300W_LP.json"
 88 |     # train_json_file_coco = "./HeadCube3D/annotations/train_300W_LP_coco_style.json"
 89 |     # coco_dict_train = copy.deepcopy(coco_dict_template)
 90 |     # convert_to_coco_style(train_image_file, train_image_file_coco,
 91 |         # train_json_file, train_json_file_coco, coco_dict_train)
 92 |     
 93 |     
 94 |     val_image_file = "/datasdc/zhouhuayi/dataset/headpose/HeadCube3D/images/BIWI_test/"
 95 |     val_image_file_coco = "./HeadCube3D/images/validation/"
 96 |     if os.path.exists(val_image_file_coco):
 97 |         shutil.rmtree(val_image_file_coco)
 98 |     os.mkdir(val_image_file_coco)
 99 |     val_json_file = "./HeadCube3D/annotations/BIWI_test.json"
100 |     val_json_file_coco = "./HeadCube3D/annotations/BIWI_test_coco_style.json"
101 |     coco_dict_val = copy.deepcopy(coco_dict_template)
102 |     convert_to_coco_style(val_image_file, val_image_file_coco,
103 |         val_json_file, val_json_file_coco, coco_dict_val)
104 |     


--------------------------------------------------------------------------------
/exps/gen_dataset_full_AGORA_CMU.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import cv2
  4 | import json
  5 | import argparse
  6 | import numpy as np
  7 | 
  8 | from tqdm import tqdm
  9 | 
 10 | def get_args():
 11 |     parser = argparse.ArgumentParser(description="This script cleans-up noisy labels "
 12 |                                                  "and creates database for training.",
 13 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 14 |     parser.add_argument("--db", type=str, default='./AGORA',
 15 |                         help="path to database")
 16 |     parser.add_argument("--data_type", type=str, default='train',
 17 |                         help="data type, train or val")
 18 |     parser.add_argument("--img_size", type=int, default=256,
 19 |                         help="output image size")
 20 |     parser.add_argument("--plot", type=bool, default=False,
 21 |                         help="plot image flag")
 22 | 
 23 |     parser.add_argument('--root_dir = ', 
 24 |         dest='root_dir', 
 25 |         help='root directory of the datasets files', 
 26 |         default='./datasets/AGORA/', 
 27 |         type=str)
 28 |     parser.add_argument('--filename', 
 29 |         dest='filename', 
 30 |         help='Output filename.',
 31 |         default='files_train.txt', 
 32 |         type=str)
 33 | 
 34 |     args = parser.parse_args()
 35 |     return args
 36 | 
 37 | def sort_labels_by_image_id(labels_list):
 38 |     images_labels_dict = {}
 39 |     for i, labels_dict in enumerate(labels_list):
 40 |         image_id = str(labels_dict['image_id'])
 41 |         if image_id in images_labels_dict.keys():
 42 |             images_labels_dict[image_id].append(labels_dict)
 43 |         else:
 44 |             images_labels_dict[image_id] = [labels_dict]
 45 |     return images_labels_dict
 46 | 
 47 | def main():
 48 | 
 49 |     args = get_args()
 50 |     mypath = args.db
 51 |     data_type = args.data_type
 52 |     img_size = args.img_size
 53 |     isPlot = args.plot
 54 |     
 55 |     output_path = args.root_dir
 56 |     filename = args.filename
 57 |     
 58 |     if not os.path.exists(output_path):
 59 |         os.mkdir(output_path)
 60 |     
 61 |     if "train" == data_type:
 62 |         if "AGORA" in mypath:
 63 |             img_path = os.path.join(mypath, "images", "train")
 64 |             json_path = os.path.join(mypath, "annotations", "coco_style_train_v2.json")
 65 |         if "CMU" in mypath:
 66 |             img_path = os.path.join(mypath, "images", "train")
 67 |             json_path = os.path.join(mypath, "annotations", "coco_style_sampled_train_v2.json")
 68 |     if "val" == data_type:
 69 |         if "AGORA" in mypath:
 70 |             img_path = os.path.join(mypath, "images", "validation")
 71 |             json_path = os.path.join(mypath, "annotations", "coco_style_validation_v2.json")
 72 |         if "CMU" in mypath:
 73 |             img_path = os.path.join(mypath, "images", "val")
 74 |             json_path = os.path.join(mypath, "annotations", "coco_style_sampled_val_v2.json")
 75 |             
 76 |     save_img_path = os.path.join(output_path, data_type)
 77 |     save_filename = os.path.join(output_path, filename)
 78 | 
 79 |     if os.path.exists(save_img_path):
 80 |         shutil.rmtree(save_img_path)
 81 |     os.mkdir(save_img_path)
 82 | 
 83 |     anno_json_dict = json.load(open(json_path, "r"))
 84 |     imgs_dict_list = anno_json_dict["images"]
 85 |     imgs_labels_dict = sort_labels_by_image_id(anno_json_dict["annotations"])
 86 |     
 87 |     print("Json file: %s\n[images number]: %d\n[head instances number]: %d"%(
 88 |         json_path, len(imgs_dict_list), len(anno_json_dict["annotations"]) ))
 89 |     
 90 |     out_imgs = []
 91 |     out_poses = []
 92 |     
 93 |     outfile = open(save_filename, 'w')
 94 |     for i, imgs_dict in enumerate(tqdm(imgs_dict_list)):
 95 |         img_name = imgs_dict["file_name"]
 96 |         img_id = str(imgs_dict["id"])
 97 |         
 98 |         img_ori = cv2.imread(os.path.join(img_path, img_name))
 99 |         
100 |         img_anno_list = imgs_labels_dict[img_id]
101 |         for img_anno in img_anno_list:
102 |             [x, y, w, h] = img_anno["bbox"]
103 |             [pitch, yaw, roll] = img_anno["euler_angles"]
104 |             instance_id = img_anno["id"]
105 |             
106 |             # if abs(yaw) < 90:  # for FSA-Net, we only focus on the head with frontal face
107 |                 # img_crop = img_ori[int(y):int(y+h), int(x):int(x+w)]
108 |                 # img_crop = cv2.resize(img_crop, (img_size, img_size))
109 | 
110 |                 # out_imgs.append(img_crop)
111 |                 # out_poses.append(np.array([yaw, pitch, roll]))
112 |             # else:
113 |                 # continue
114 |             
115 |             
116 |             # for 6DRepNet with full-range design, we focus on all the labeled heads
117 |             img_crop = img_ori[int(y):int(y+h), int(x):int(x+w)]
118 |             img_crop = cv2.resize(img_crop, (img_size, img_size))
119 |             
120 |             save_img_path_abs = os.path.join(save_img_path, str(instance_id)+".jpg")
121 |             cv2.imwrite(save_img_path_abs, img_crop)
122 |             
123 |             outfile.write(str(instance_id)+".jpg" + " %.4f %.4f %.4f\n"%(pitch, yaw, roll))
124 | 
125 | 
126 |             if i < 2:
127 |                 if "AGORA" in mypath:
128 |                     cv2.imwrite("./tmp/"+str(instance_id)+"_agora.jpg", img_crop)
129 |                 if "CMU" in mypath:
130 |                     cv2.imwrite("./tmp/"+str(instance_id)+"_cmu.jpg", img_crop)
131 |                     
132 |             # Checking the cropped image
133 |             if isPlot:
134 |                 cv2.imshow('check', img_crop)
135 |                 k=cv2.waitKey(300)
136 | 
137 |     outfile.close()
138 |     
139 | if __name__ == "__main__":
140 |     main()
141 |     
142 | '''
143 | AGORA
144 | Json file: /datasdc/zhouhuayi/dataset/AGORA/HPE/annotations/coco_style_train_v2.json
145 | [images number]: 14408
146 | [head instances number]: 105046
147 | 
148 | Json file: /datasdc/zhouhuayi/dataset/AGORA/HPE/annotations/coco_style_validation_v2.json
149 | [images number]: 1070
150 | [head instances number]: 7505
151 | 
152 | 
153 | CMU
154 | Json file: /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/annotations/coco_style_sampled_train_v2.json
155 | [images number]: 15718
156 | [head instances number]: 35725
157 | 
158 | Json file: /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/annotations/coco_style_sampled_val_v2.json
159 | [images number]: 16216
160 | [head instances number]: 32738
161 | '''


--------------------------------------------------------------------------------
/exps/gen_dataset_single_AGORA.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import cv2
  4 | import json
  5 | import argparse
  6 | import numpy as np
  7 | 
  8 | from tqdm import tqdm
  9 | 
 10 | def get_args():
 11 |     parser = argparse.ArgumentParser(description="This script cleans-up noisy labels "
 12 |                                                  "and creates database for training.",
 13 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 14 |     parser.add_argument("--db", type=str, default='./AGORA',
 15 |                         help="path to database")
 16 |     parser.add_argument("--output", type=str, default='./AGORA.npz',
 17 |                         help="path to output database mat file")
 18 |     parser.add_argument("--img_size", type=int, default=64,
 19 |                         help="output image size")
 20 |     parser.add_argument("--plot", type=bool, default=False,
 21 |                         help="plot image flag")
 22 | 
 23 | 
 24 |     args = parser.parse_args()
 25 |     return args
 26 | 
 27 | def sort_labels_by_image_id(labels_list):
 28 |     images_labels_dict = {}
 29 |     for i, labels_dict in enumerate(labels_list):
 30 |         image_id = str(labels_dict['image_id'])
 31 |         if image_id in images_labels_dict.keys():
 32 |             images_labels_dict[image_id].append(labels_dict)
 33 |         else:
 34 |             images_labels_dict[image_id] = [labels_dict]
 35 |     return images_labels_dict
 36 | 
 37 | def main():
 38 | 
 39 |     args = get_args()
 40 |     mypath = args.db
 41 |     output_path = args.output
 42 |     img_size = args.img_size
 43 |     isPlot = args.plot
 44 |     
 45 |     if "train" in output_path:
 46 |         img_path = os.path.join(mypath, "images", "train")
 47 |         json_path = os.path.join(mypath, "annotations", "coco_style_train_v2.json")
 48 |     if "val" in output_path:
 49 |         img_path = os.path.join(mypath, "images", "validation")
 50 |         json_path = os.path.join(mypath, "annotations", "coco_style_validation_v2.json")
 51 | 
 52 | 
 53 |     anno_json_dict = json.load(open(json_path, "r"))
 54 |     imgs_dict_list = anno_json_dict["images"]
 55 |     imgs_labels_dict = sort_labels_by_image_id(anno_json_dict["annotations"])
 56 |     
 57 |     print("Json file: %s\n[images number]: %d\n[head instances number]: %d"%(
 58 |         json_path, len(imgs_dict_list), len(anno_json_dict["annotations"]) ))
 59 |     
 60 |     out_imgs = []
 61 |     out_poses = []
 62 |     for i, imgs_dict in enumerate(tqdm(imgs_dict_list)):
 63 |         img_name = imgs_dict["file_name"]
 64 |         img_id = str(imgs_dict["id"])
 65 |         
 66 |         img_ori = cv2.imread(os.path.join(img_path, img_name))
 67 |         
 68 |         img_anno_list = imgs_labels_dict[img_id]
 69 |         for img_anno in img_anno_list:
 70 |             [x, y, w, h] = img_anno["bbox"]
 71 |             [pitch, yaw, roll] = img_anno["euler_angles"]
 72 |             instance_id = img_anno["id"]
 73 |             
 74 |             if abs(yaw) < 90:  # for FSA-Net, we only focus on the head with frontal face
 75 |                 img_crop = img_ori[int(y):int(y+h), int(x):int(x+w)]
 76 |                 img_crop = cv2.resize(img_crop, (img_size, img_size))
 77 | 
 78 |                 out_imgs.append(img_crop)
 79 |                 out_poses.append(np.array([yaw, pitch, roll]))
 80 |             else:
 81 |                 continue
 82 |             
 83 |             if i < 2:
 84 |                 cv2.imwrite("./tmp/"+str(instance_id)+"_agora.jpg", img_crop)
 85 |             
 86 |             # Checking the cropped image
 87 |             if isPlot:
 88 |                 cv2.imshow('check', img_crop)
 89 |                 k=cv2.waitKey(300)
 90 | 
 91 |     print("[left head instances]: %d"%(len(out_imgs) ))
 92 | 
 93 |     np.savez(output_path, image=np.array(out_imgs), pose=np.array(out_poses), img_size=img_size)
 94 | 
 95 | if __name__ == "__main__":
 96 |     main()
 97 |     
 98 | '''
 99 | Json file: /datasdc/zhouhuayi/dataset/AGORA/HPE/annotations/coco_style_train_v2.json
100 | [images number]: 14408
101 | [head instances number]: 105046
102 | [left head instances]: 52639
103 | 
104 | Json file: /datasdc/zhouhuayi/dataset/AGORA/HPE/annotations/coco_style_validation_v2.json
105 | [images number]: 1070
106 | [head instances number]: 7505
107 | [left head instances]: 3781
108 | '''


--------------------------------------------------------------------------------
/exps/gen_dataset_single_CMU.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import cv2
  4 | import json
  5 | import argparse
  6 | import numpy as np
  7 | 
  8 | from tqdm import tqdm
  9 | 
 10 | def get_args():
 11 |     parser = argparse.ArgumentParser(description="This script cleans-up noisy labels "
 12 |                                                  "and creates database for training.",
 13 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 14 |     parser.add_argument("--db", type=str, default='./CMU',
 15 |                         help="path to database")
 16 |     parser.add_argument("--output", type=str, default='./CMU.npz',
 17 |                         help="path to output database mat file")
 18 |     parser.add_argument("--img_size", type=int, default=64,
 19 |                         help="output image size")
 20 |     parser.add_argument("--plot", type=bool, default=False,
 21 |                         help="plot image flag")
 22 | 
 23 | 
 24 |     args = parser.parse_args()
 25 |     return args
 26 | 
 27 | def sort_labels_by_image_id(labels_list):
 28 |     images_labels_dict = {}
 29 |     for i, labels_dict in enumerate(labels_list):
 30 |         image_id = str(labels_dict['image_id'])
 31 |         if image_id in images_labels_dict.keys():
 32 |             images_labels_dict[image_id].append(labels_dict)
 33 |         else:
 34 |             images_labels_dict[image_id] = [labels_dict]
 35 |     return images_labels_dict
 36 | 
 37 | def main():
 38 | 
 39 |     args = get_args()
 40 |     mypath = args.db
 41 |     output_path = args.output
 42 |     img_size = args.img_size
 43 |     isPlot = args.plot
 44 |     
 45 |     if "train" in output_path:
 46 |         img_path = os.path.join(mypath, "images", "train")
 47 |         json_path = os.path.join(mypath, "annotations", "coco_style_sampled_train_v2.json")
 48 |     if "val" in output_path:
 49 |         img_path = os.path.join(mypath, "images", "val")
 50 |         json_path = os.path.join(mypath, "annotations", "coco_style_sampled_val_v2.json")
 51 | 
 52 | 
 53 |     anno_json_dict = json.load(open(json_path, "r"))
 54 |     imgs_dict_list = anno_json_dict["images"]
 55 |     imgs_labels_dict = sort_labels_by_image_id(anno_json_dict["annotations"])
 56 |     
 57 |     print("Json file: %s\n[images number]: %d\n[head instances number]: %d"%(
 58 |         json_path, len(imgs_dict_list), len(anno_json_dict["annotations"]) ))
 59 |     
 60 |     out_imgs = []
 61 |     out_poses = []
 62 |     for i, imgs_dict in enumerate(tqdm(imgs_dict_list)):
 63 |         img_name = imgs_dict["file_name"]
 64 |         img_id = str(imgs_dict["id"])
 65 |         
 66 |         img_ori = cv2.imread(os.path.join(img_path, img_name))
 67 |         
 68 |         img_anno_list = imgs_labels_dict[img_id]
 69 |         for img_anno in img_anno_list:
 70 |             [x, y, w, h] = img_anno["bbox"]
 71 |             [pitch, yaw, roll] = img_anno["euler_angles"]
 72 |             instance_id = img_anno["id"]
 73 |             
 74 |             if abs(yaw) < 90:  # for FSA-Net, we only focus on the head with frontal face
 75 |                 img_crop = img_ori[int(y):int(y+h), int(x):int(x+w)]
 76 |                 img_crop = cv2.resize(img_crop, (img_size, img_size))
 77 | 
 78 |                 out_imgs.append(img_crop)
 79 |                 out_poses.append(np.array([yaw, pitch, roll]))
 80 |             else:
 81 |                 continue
 82 |             
 83 |             if i < 2:
 84 |                 cv2.imwrite("./tmp/"+str(instance_id)+"_cmu.jpg", img_crop)
 85 |             
 86 |             # Checking the cropped image
 87 |             if isPlot:
 88 |                 cv2.imshow('check', img_crop)
 89 |                 k=cv2.waitKey(300)
 90 | 
 91 |     print("[left head instances]: %d"%(len(out_imgs) ))
 92 | 
 93 |     np.savez(output_path, image=np.array(out_imgs), pose=np.array(out_poses), img_size=img_size)
 94 | 
 95 | if __name__ == "__main__":
 96 |     main()
 97 |     
 98 | '''
 99 | Json file: /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/annotations/coco_style_sampled_train_v2.json
100 | [images number]: 15718
101 | [head instances number]: 35725
102 | [left head instances]: 18447
103 | 
104 | Json file: /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/annotations/coco_style_sampled_val_v2.json
105 | [images number]: 16216
106 | [head instances number]: 32738
107 | [left head instances]: 16497
108 | '''


--------------------------------------------------------------------------------
/exps/sixdrepnet.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/exps/sixdrepnet.zip


--------------------------------------------------------------------------------
/exps/statistic_angles.py:
--------------------------------------------------------------------------------
  1 | import scipy.io as sio
  2 | from os import listdir
  3 | from os.path import isfile, join
  4 | from tqdm import tqdm
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | 
  8 | def main_300W_LP():
  9 |     db_name = "300W_LP"
 10 |     db_paths = [
 11 |         "../300W_LP/AFW", "../300W_LP/AFW_Flip",
 12 |         "../300W_LP/HELEN", "../300W_LP/HELEN_Flip",
 13 |         "../300W_LP/IBUG", "../300W_LP/IBUG_Flip",
 14 |         "../300W_LP/LFPW", "../300W_LP/LFPW_Flip",
 15 |         "../300W_LP/AFW", "../300W_LP/AFW_Flip",
 16 |         "../AFLW2000"]  # 300W_LP & AFLW2000
 17 |     
 18 |     total_num = 0
 19 |     euler_angles_stat = [[],[],[]]  # pitch, yaw, roll
 20 |     
 21 |     for db_path in db_paths:    
 22 |         onlyfiles_mat = []
 23 |         for f in listdir(db_path):
 24 |             if isfile(join(db_path, f)) and join(db_path, f).endswith('.mat'):
 25 |                 onlyfiles_mat.append(f)
 26 |         onlyfiles_mat.sort()
 27 |         print(db_path, "\t", len(onlyfiles_mat))
 28 |         
 29 |         for i in tqdm(range(len(onlyfiles_mat))):
 30 |             mat_name = onlyfiles_mat[i]
 31 |             mat_contents = sio.loadmat(db_path + '/' + mat_name)
 32 |             pose_para = mat_contents['Pose_Para'][0]
 33 |             pt2d = mat_contents['pt2d']
 34 |             
 35 |             pitch = pose_para[0] * 180 / np.pi
 36 |             yaw = pose_para[1] * 180 / np.pi
 37 |             roll = pose_para[2] * 180 / np.pi
 38 |              
 39 |             if abs(pitch)>99 or abs(yaw)>99 or abs(roll)>99:
 40 |                 continue
 41 |              
 42 |             euler_angles_stat[0].append(pitch)
 43 |             euler_angles_stat[1].append(yaw)
 44 |             euler_angles_stat[2].append(roll)
 45 |             total_num += 1
 46 | 
 47 |     print("total_num:\t", total_num)
 48 |     
 49 |     '''Euler Angels Stat'''
 50 |     plt.figure(figsize=(10, 5), dpi=100)
 51 |     plt.title("300W_LP and AFLW2000")
 52 |     interval = 10  # 10 or 15 is better
 53 |     bins = 200 // interval
 54 |     density = True  # True or False, density=False would make counts
 55 |     colors = ['r', 'g', 'b']
 56 |     labels = ["Pitch", "Yaw", "Roll"]
 57 |     plt.hist(euler_angles_stat, bins=bins, alpha=0.7, density=density, histtype='bar', label=labels, color=colors)
 58 |     plt.legend(prop ={'size': 10})
 59 |     # plt.xlim(-90, 91)
 60 |     plt.xticks(range(-100,101,interval))
 61 |     if density: plt.ylabel('Percentage')
 62 |     else: plt.ylabel('Counts')
 63 |     plt.xlabel('Degree')
 64 |     plt.show()
 65 |     
 66 | 
 67 | def main_BIWI():
 68 |     db_path_train = "./BIWI_train.npz"
 69 |     db_path_test = "./BIWI_test.npz"
 70 |     
 71 |     total_num = 0
 72 |     euler_angles_stat = [[],[],[]]  # pitch, yaw, roll
 73 |     
 74 |     for db_path in [db_path_train, db_path_test]:
 75 |         db_dict = np.load(db_path)
 76 |         print(db_path, list(db_dict.keys()))
 77 |         
 78 |         for cont_labels in tqdm(db_dict['pose']):
 79 |             [yaw, pitch, roll] = cont_labels
 80 |             
 81 |             if abs(pitch)>90 or abs(yaw)>90 or abs(roll)>90:
 82 |                 continue
 83 | 
 84 |             euler_angles_stat[0].append(pitch)
 85 |             euler_angles_stat[1].append(yaw)
 86 |             euler_angles_stat[2].append(roll)
 87 |             total_num += 1
 88 |             
 89 |     print("total_num:\t", total_num) 
 90 |     
 91 |     '''Euler Angels Stat'''
 92 |     plt.figure(figsize=(10, 5), dpi=100)
 93 |     plt.title("BIWI")
 94 |     interval = 10  # 10 or 15 is better
 95 |     bins = 180 // interval
 96 |     density = True  # True or False, density=False would make counts
 97 |     colors = ['r', 'g', 'b']
 98 |     labels = ["Pitch", "Yaw", "Roll"]
 99 |     plt.hist(euler_angles_stat, bins=bins, alpha=0.7, density=density, histtype='bar', label=labels, color=colors)
100 |     plt.legend(prop ={'size': 10})
101 |     # plt.xlim(-90, 91)
102 |     plt.xticks(range(-90,91,interval))
103 |     if density: plt.ylabel('Percentage')
104 |     else: plt.ylabel('Counts')
105 |     plt.xlabel('Degree')
106 |     plt.show()
107 |     
108 |     
109 | if __name__ == '__main__':
110 |     '''https://github.com/shamangary/FSA-Net'''
111 |     # main_300W_LP()  # total_num 134793
112 |     main_BIWI()  # total_num 15678


--------------------------------------------------------------------------------
/materials/000000002685_vis3d_res.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000000002685_vis3d_res.jpg


--------------------------------------------------------------------------------
/materials/000000018380_vis3d_res.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000000018380_vis3d_res.jpg


--------------------------------------------------------------------------------
/materials/000000038829_vis3d_res.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000000038829_vis3d_res.jpg


--------------------------------------------------------------------------------
/materials/000000081988_vis3d_res.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000000081988_vis3d_res.jpg


--------------------------------------------------------------------------------
/materials/000000161925_vis3d_res.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000000161925_vis3d_res.jpg


--------------------------------------------------------------------------------
/materials/000000183648_vis3d_res.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000000183648_vis3d_res.jpg


--------------------------------------------------------------------------------
/materials/000002_mpiinew_test_DirectMHP_vis3d.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000002_mpiinew_test_DirectMHP_vis3d.gif


--------------------------------------------------------------------------------
/materials/000003_mpiinew_test_DirectMHP_vis3d.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000003_mpiinew_test_DirectMHP_vis3d.gif


--------------------------------------------------------------------------------
/materials/datasetexamples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/datasetexamples.png


--------------------------------------------------------------------------------
/materials/full_range.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/full_range.png


--------------------------------------------------------------------------------
/materials/illustration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/illustration.png


--------------------------------------------------------------------------------
/models/experimental.py:
--------------------------------------------------------------------------------
  1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2 | """
  3 | Experimental modules
  4 | """
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | import torch.nn as nn
  9 | 
 10 | from models.common import Conv
 11 | from utils.downloads import attempt_download
 12 | 
 13 | 
 14 | class CrossConv(nn.Module):
 15 |     # Cross Convolution Downsample
 16 |     def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
 17 |         # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
 18 |         super().__init__()
 19 |         c_ = int(c2 * e)  # hidden channels
 20 |         self.cv1 = Conv(c1, c_, (1, k), (1, s))
 21 |         self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
 22 |         self.add = shortcut and c1 == c2
 23 | 
 24 |     def forward(self, x):
 25 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
 26 | 
 27 | 
 28 | class Sum(nn.Module):
 29 |     # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
 30 |     def __init__(self, n, weight=False):  # n: number of inputs
 31 |         super().__init__()
 32 |         self.weight = weight  # apply weights boolean
 33 |         self.iter = range(n - 1)  # iter object
 34 |         if weight:
 35 |             self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights
 36 | 
 37 |     def forward(self, x):
 38 |         y = x[0]  # no weight
 39 |         if self.weight:
 40 |             w = torch.sigmoid(self.w) * 2
 41 |             for i in self.iter:
 42 |                 y = y + x[i + 1] * w[i]
 43 |         else:
 44 |             for i in self.iter:
 45 |                 y = y + x[i + 1]
 46 |         return y
 47 | 
 48 | 
 49 | class MixConv2d(nn.Module):
 50 |     # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
 51 |     def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
 52 |         super().__init__()
 53 |         groups = len(k)
 54 |         if equal_ch:  # equal c_ per group
 55 |             i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
 56 |             c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
 57 |         else:  # equal weight.numel() per group
 58 |             b = [c2] + [0] * groups
 59 |             a = np.eye(groups + 1, groups, k=-1)
 60 |             a -= np.roll(a, 1, axis=1)
 61 |             a *= np.array(k) ** 2
 62 |             a[0] = 1
 63 |             c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
 64 | 
 65 |         self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
 66 |         self.bn = nn.BatchNorm2d(c2)
 67 |         self.act = nn.LeakyReLU(0.1, inplace=True)
 68 | 
 69 |     def forward(self, x):
 70 |         return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
 71 | 
 72 | 
 73 | class Ensemble(nn.ModuleList):
 74 |     # Ensemble of models
 75 |     def __init__(self):
 76 |         super().__init__()
 77 | 
 78 |     def forward(self, x, augment=False, profile=False, visualize=False):
 79 |         y = []
 80 |         for module in self:
 81 |             y.append(module(x, augment, profile, visualize)[0])
 82 |         # y = torch.stack(y).max(0)[0]  # max ensemble
 83 |         # y = torch.stack(y).mean(0)  # mean ensemble
 84 |         y = torch.cat(y, 1)  # nms ensemble
 85 |         return y, None  # inference, train output
 86 | 
 87 | 
 88 | def attempt_load(weights, map_location=None, inplace=True, fuse=True):
 89 |     from models.yolo import Detect, Model
 90 | 
 91 |     # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
 92 |     model = Ensemble()
 93 |     for w in weights if isinstance(weights, list) else [weights]:
 94 |         ckpt = torch.load(attempt_download(w), map_location=map_location)  # load
 95 |         if fuse:
 96 |             model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval())  # FP32 model
 97 |         else:
 98 |             model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval())  # without layer fuse
 99 | 
100 | 
101 |     # Compatibility updates
102 |     for m in model.modules():
103 |         if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]:
104 |             m.inplace = inplace  # pytorch 1.7.0 compatibility
105 |         elif type(m) is Conv:
106 |             m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
107 | 
108 |     if len(model) == 1:
109 |         return model[-1]  # return model
110 |     else:
111 |         print(f'Ensemble created with {weights}\n')
112 |         for k in ['names']:
113 |             setattr(model, k, getattr(model[-1], k))
114 |         model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride  # max stride
115 |         return model  # return ensemble
116 | 


--------------------------------------------------------------------------------
/models/yolov5l6.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | anchors:
 8 |   - [19,27,  44,40,  38,94]  # P3/8
 9 |   - [96,68,  86,152,  180,137]  # P4/16
10 |   - [140,301,  303,264,  238,542]  # P5/32
11 |   - [436,615,  739,380,  925,792]  # P6/64
12 | 
13 | # YOLOv5 backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
17 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
18 |    [-1, 3, C3, [128]],
19 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
20 |    [-1, 9, C3, [256]],
21 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
22 |    [-1, 9, C3, [512]],
23 |    [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
24 |    [-1, 3, C3, [768]],
25 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
26 |    [-1, 1, SPP, [1024, [3, 5, 7]]],
27 |    [-1, 3, C3, [1024, False]],  # 11
28 |   ]
29 | 
30 | # YOLOv5 head
31 | head:
32 |   [[-1, 1, Conv, [768, 1, 1]],
33 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P5
35 |    [-1, 3, C3, [768, False]],  # 15
36 | 
37 |    [-1, 1, Conv, [512, 1, 1]],
38 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
40 |    [-1, 3, C3, [512, False]],  # 19
41 | 
42 |    [-1, 1, Conv, [256, 1, 1]],
43 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
45 |    [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
46 | 
47 |    [-1, 1, Conv, [256, 3, 2]],
48 |    [[-1, 20], 1, Concat, [1]],  # cat head P4
49 |    [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
50 | 
51 |    [-1, 1, Conv, [512, 3, 2]],
52 |    [[-1, 16], 1, Concat, [1]],  # cat head P5
53 |    [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
54 | 
55 |    [-1, 1, Conv, [768, 3, 2]],
56 |    [[-1, 12], 1, Concat, [1]],  # cat head P6
57 |    [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
58 | 
59 |    [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
60 |   ]


--------------------------------------------------------------------------------
/models/yolov5m6.yaml:
--------------------------------------------------------------------------------
 1 |    # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.67  # model depth multiple
 6 | width_multiple: 0.75  # layer channel multiple
 7 | anchors:
 8 |   - [19,27,  44,40,  38,94]  # P3/8
 9 |   - [96,68,  86,152,  180,137]  # P4/16
10 |   - [140,301,  303,264,  238,542]  # P5/32
11 |   - [436,615,  739,380,  925,792]  # P6/64
12 | 
13 | # YOLOv5 backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
17 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
18 |    [-1, 3, C3, [128]],
19 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
20 |    [-1, 9, C3, [256]],
21 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
22 |    [-1, 9, C3, [512]],
23 |    [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
24 |    [-1, 3, C3, [768]],
25 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
26 |    [-1, 1, SPP, [1024, [3, 5, 7]]],
27 |    [-1, 3, C3, [1024, False]],  # 11
28 |   ]
29 | 
30 | # YOLOv5 head
31 | head:
32 |   [[-1, 1, Conv, [768, 1, 1]],
33 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P5
35 |    [-1, 3, C3, [768, False]],  # 15
36 | 
37 |    [-1, 1, Conv, [512, 1, 1]],
38 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
40 |    [-1, 3, C3, [512, False]],  # 19
41 | 
42 |    [-1, 1, Conv, [256, 1, 1]],
43 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
45 |    [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
46 | 
47 |    [-1, 1, Conv, [256, 3, 2]],
48 |    [[-1, 20], 1, Concat, [1]],  # cat head P4
49 |    [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
50 | 
51 |    [-1, 1, Conv, [512, 3, 2]],
52 |    [[-1, 16], 1, Concat, [1]],  # cat head P5
53 |    [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
54 | 
55 |    [-1, 1, Conv, [768, 3, 2]],
56 |    [[-1, 12], 1, Concat, [1]],  # cat head P6
57 |    [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
58 | 
59 |    [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
60 |   ]


--------------------------------------------------------------------------------
/models/yolov5s6.yaml:
--------------------------------------------------------------------------------
 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | anchors:
 8 |   - [19,27,  44,40,  38,94]  # P3/8
 9 |   - [96,68,  86,152,  180,137]  # P4/16
10 |   - [140,301,  303,264,  238,542]  # P5/32
11 |   - [436,615,  739,380,  925,792]  # P6/64
12 | 
13 | # YOLOv5 backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
17 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
18 |    [-1, 3, C3, [128]],
19 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
20 |    [-1, 9, C3, [256]],
21 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
22 |    [-1, 9, C3, [512]],
23 |    [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
24 |    [-1, 3, C3, [768]],
25 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
26 |    [-1, 1, SPP, [1024, [3, 5, 7]]],
27 |    [-1, 3, C3, [1024, False]],  # 11
28 |   ]
29 | 
30 | # YOLOv5 head
31 | head:
32 |   [[-1, 1, Conv, [768, 1, 1]],
33 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P5
35 |    [-1, 3, C3, [768, False]],  # 15
36 | 
37 |    [-1, 1, Conv, [512, 1, 1]],
38 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
40 |    [-1, 3, C3, [512, False]],  # 19
41 | 
42 |    [-1, 1, Conv, [256, 1, 1]],
43 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
45 |    [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
46 | 
47 |    [-1, 1, Conv, [256, 3, 2]],
48 |    [[-1, 20], 1, Concat, [1]],  # cat head P4
49 |    [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
50 | 
51 |    [-1, 1, Conv, [512, 3, 2]],
52 |    [[-1, 16], 1, Concat, [1]],  # cat head P5
53 |    [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
54 | 
55 |    [-1, 1, Conv, [768, 3, 2]],
56 |    [[-1, 12], 1, Concat, [1]],  # cat head P6
57 |    [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
58 | 
59 |    [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
60 |   ]


--------------------------------------------------------------------------------
/pose_references/reference_3d_5_points_trans.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/pose_references/reference_3d_5_points_trans.npy


--------------------------------------------------------------------------------
/pose_references/reference_3d_68_points_trans.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/pose_references/reference_3d_68_points_trans.npy


--------------------------------------------------------------------------------
/pose_references/triangles.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/pose_references/triangles.npy


--------------------------------------------------------------------------------
/pose_references/vertices_trans.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/pose_references/vertices_trans.npy


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
 2 | 
 3 | # pip3 install torch==1.10.0+cu111 torchvision==0.11.1+cu111 torchaudio==0.10.0+cu111 -f https://download.pytorch.org/whl/cu111/torch_stable.html
 4 | # wget https://download.pytorch.org/whl/cu111/torch-1.10.0%2Bcu111-cp38-cp38-linux_x86_64.whl
 5 | # wget https://download.pytorch.org/whl/cu111/torchvision-0.11.1%2Bcu111-cp38-cp38-linux_x86_64.whl
 6 | # wget https://download.pytorch.org/whl/cu111/torchaudio-0.10.0%2Bcu111-cp38-cp38-linux_x86_64.whl
 7 | 
 8 | # base ----------------------------------------
 9 | matplotlib>=3.2.2
10 | numpy>=1.18.5
11 | opencv-python>=4.1.2
12 | Pillow
13 | PyYAML>=5.3.1
14 | scipy>=1.4.1
15 | torch>=1.7.0
16 | torchvision>=0.8.1
17 | tqdm>=4.41.0
18 | 
19 | # logging -------------------------------------
20 | tensorboard>=2.4.1
21 | wandb
22 | 
23 | # plotting ------------------------------------
24 | seaborn>=0.11.0
25 | pandas
26 | 
27 | # export --------------------------------------
28 | # coremltools>=4.1
29 | # onnx>=1.9.0
30 | # scikit-learn==0.19.2  # for coreml quantization
31 | 
32 | # extras --------------------------------------
33 | # Cython  # for pycocotools https://github.com/cocodataset/cocoapi/issues/172
34 | # pycocotools>=2.0  # COCO mAP
35 | thop  # FLOPs computation


--------------------------------------------------------------------------------
/test_imgs/AGORA/agora_val_2000400001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/AGORA/agora_val_2000400001.jpg


--------------------------------------------------------------------------------
/test_imgs/AGORA/agora_val_2000400205.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/AGORA/agora_val_2000400205.jpg


--------------------------------------------------------------------------------
/test_imgs/CMU/cmu_val_10400060013.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/CMU/cmu_val_10400060013.jpg


--------------------------------------------------------------------------------
/test_imgs/CMU/cmu_val_10602142026.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/CMU/cmu_val_10602142026.jpg


--------------------------------------------------------------------------------
/test_imgs/CMU/cmu_val_11500144012.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/CMU/cmu_val_11500144012.jpg


--------------------------------------------------------------------------------
/test_imgs/COCO/000000002685.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/COCO/000000002685.jpg


--------------------------------------------------------------------------------
/test_imgs/COCO/000000018380.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/COCO/000000018380.jpg


--------------------------------------------------------------------------------
/test_imgs/COCO/000000038829.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/COCO/000000038829.jpg


--------------------------------------------------------------------------------
/test_imgs/COCO/000000081988.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/COCO/000000081988.jpg


--------------------------------------------------------------------------------
/test_imgs/COCO/000000161925.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/COCO/000000161925.jpg


--------------------------------------------------------------------------------
/test_imgs/COCO/000000183648.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/COCO/000000183648.jpg


--------------------------------------------------------------------------------
/utils/activations.py:
--------------------------------------------------------------------------------
  1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2 | """
  3 | Activation functions
  4 | """
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | 
 10 | 
 11 | # SiLU https://arxiv.org/pdf/1606.08415.pdf ----------------------------------------------------------------------------
 12 | class SiLU(nn.Module):  # export-friendly version of nn.SiLU()
 13 |     @staticmethod
 14 |     def forward(x):
 15 |         return x * torch.sigmoid(x)
 16 | 
 17 | 
 18 | class Hardswish(nn.Module):  # export-friendly version of nn.Hardswish()
 19 |     @staticmethod
 20 |     def forward(x):
 21 |         # return x * F.hardsigmoid(x)  # for torchscript and CoreML
 22 |         return x * F.hardtanh(x + 3, 0., 6.) / 6.  # for torchscript, CoreML and ONNX
 23 | 
 24 | 
 25 | # Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
 26 | class Mish(nn.Module):
 27 |     @staticmethod
 28 |     def forward(x):
 29 |         return x * F.softplus(x).tanh()
 30 | 
 31 | 
 32 | class MemoryEfficientMish(nn.Module):
 33 |     class F(torch.autograd.Function):
 34 |         @staticmethod
 35 |         def forward(ctx, x):
 36 |             ctx.save_for_backward(x)
 37 |             return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
 38 | 
 39 |         @staticmethod
 40 |         def backward(ctx, grad_output):
 41 |             x = ctx.saved_tensors[0]
 42 |             sx = torch.sigmoid(x)
 43 |             fx = F.softplus(x).tanh()
 44 |             return grad_output * (fx + x * sx * (1 - fx * fx))
 45 | 
 46 |     def forward(self, x):
 47 |         return self.F.apply(x)
 48 | 
 49 | 
 50 | # FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
 51 | class FReLU(nn.Module):
 52 |     def __init__(self, c1, k=3):  # ch_in, kernel
 53 |         super().__init__()
 54 |         self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
 55 |         self.bn = nn.BatchNorm2d(c1)
 56 | 
 57 |     def forward(self, x):
 58 |         return torch.max(x, self.bn(self.conv(x)))
 59 | 
 60 | 
 61 | # ACON https://arxiv.org/pdf/2009.04759.pdf ----------------------------------------------------------------------------
 62 | class AconC(nn.Module):
 63 |     r""" ACON activation (activate or not).
 64 |     AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
 65 |     according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
 66 |     """
 67 | 
 68 |     def __init__(self, c1):
 69 |         super().__init__()
 70 |         self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
 71 |         self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
 72 |         self.beta = nn.Parameter(torch.ones(1, c1, 1, 1))
 73 | 
 74 |     def forward(self, x):
 75 |         dpx = (self.p1 - self.p2) * x
 76 |         return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x
 77 | 
 78 | 
 79 | class MetaAconC(nn.Module):
 80 |     r""" ACON activation (activate or not).
 81 |     MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network
 82 |     according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
 83 |     """
 84 | 
 85 |     def __init__(self, c1, k=1, s=1, r=16):  # ch_in, kernel, stride, r
 86 |         super().__init__()
 87 |         c2 = max(r, c1 // r)
 88 |         self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
 89 |         self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
 90 |         self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True)
 91 |         self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True)
 92 |         # self.bn1 = nn.BatchNorm2d(c2)
 93 |         # self.bn2 = nn.BatchNorm2d(c1)
 94 | 
 95 |     def forward(self, x):
 96 |         y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True)
 97 |         # batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891
 98 |         # beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y)))))  # bug/unstable
 99 |         beta = torch.sigmoid(self.fc2(self.fc1(y)))  # bug patch BN layers removed
100 |         dpx = (self.p1 - self.p2) * x
101 |         return dpx * torch.sigmoid(beta * dpx) + self.p2 * x
102 | 


--------------------------------------------------------------------------------
/utils/autoanchor.py:
--------------------------------------------------------------------------------
  1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2 | """
  3 | Auto-anchor utils
  4 | """
  5 | 
  6 | import random
  7 | 
  8 | import numpy as np
  9 | import torch
 10 | import yaml
 11 | from tqdm import tqdm
 12 | 
 13 | from utils.general import colorstr
 14 | 
 15 | 
 16 | def check_anchor_order(m):
 17 |     # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
 18 |     a = m.anchor_grid.prod(-1).view(-1)  # anchor area
 19 |     da = a[-1] - a[0]  # delta a
 20 |     ds = m.stride[-1] - m.stride[0]  # delta s
 21 |     if da.sign() != ds.sign():  # same order
 22 |         print('Reversing anchor order')
 23 |         m.anchors[:] = m.anchors.flip(0)
 24 |         m.anchor_grid[:] = m.anchor_grid.flip(0)
 25 | 
 26 | 
 27 | def check_anchors(dataset, model, thr=4.0, imgsz=640):
 28 |     # Check anchor fit to data, recompute if necessary
 29 |     prefix = colorstr('autoanchor: ')
 30 |     print(f'\n{prefix}Analyzing anchors... ', end='')
 31 |     m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1]  # Detect()
 32 |     shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
 33 |     scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1))  # augment scale
 34 |     wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float()  # wh
 35 | 
 36 |     def metric(k):  # compute metric
 37 |         r = wh[:, None] / k[None]
 38 |         x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
 39 |         best = x.max(1)[0]  # best_x
 40 |         aat = (x > 1. / thr).float().sum(1).mean()  # anchors above threshold
 41 |         bpr = (best > 1. / thr).float().mean()  # best possible recall
 42 |         return bpr, aat
 43 | 
 44 |     anchors = m.anchor_grid.clone().cpu().view(-1, 2)  # current anchors
 45 |     bpr, aat = metric(anchors)
 46 |     print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='')
 47 |     if bpr < 0.98:  # threshold to recompute
 48 |         print('. Attempting to improve anchors, please wait...')
 49 |         na = m.anchor_grid.numel() // 2  # number of anchors
 50 |         try:
 51 |             anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
 52 |         except Exception as e:
 53 |             print(f'{prefix}ERROR: {e}')
 54 |         new_bpr = metric(anchors)[0]
 55 |         if new_bpr > bpr:  # replace anchors
 56 |             anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
 57 |             m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid)  # for inference
 58 |             m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1)  # loss
 59 |             check_anchor_order(m)
 60 |             print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.')
 61 |         else:
 62 |             print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.')
 63 |     print('')  # newline
 64 | 
 65 | 
 66 | def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
 67 |     """ Creates kmeans-evolved anchors from training dataset
 68 | 
 69 |         Arguments:
 70 |             dataset: path to data.yaml, or a loaded dataset
 71 |             n: number of anchors
 72 |             img_size: image size used for training
 73 |             thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
 74 |             gen: generations to evolve anchors using genetic algorithm
 75 |             verbose: print all results
 76 | 
 77 |         Return:
 78 |             k: kmeans evolved anchors
 79 | 
 80 |         Usage:
 81 |             from utils.autoanchor import *; _ = kmean_anchors()
 82 |     """
 83 |     from scipy.cluster.vq import kmeans
 84 | 
 85 |     thr = 1. / thr
 86 |     prefix = colorstr('autoanchor: ')
 87 | 
 88 |     def metric(k, wh):  # compute metrics
 89 |         r = wh[:, None] / k[None]
 90 |         x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
 91 |         # x = wh_iou(wh, torch.tensor(k))  # iou metric
 92 |         return x, x.max(1)[0]  # x, best_x
 93 | 
 94 |     def anchor_fitness(k):  # mutation fitness
 95 |         _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
 96 |         return (best * (best > thr).float()).mean()  # fitness
 97 | 
 98 |     def print_results(k):
 99 |         k = k[np.argsort(k.prod(1))]  # sort small to large
100 |         x, best = metric(k, wh0)
101 |         bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n  # best possible recall, anch > thr
102 |         print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr')
103 |         print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, '
104 |               f'past_thr={x[x > thr].mean():.3f}-mean: ', end='')
105 |         for i, x in enumerate(k):
106 |             print('%i,%i' % (round(x[0]), round(x[1])), end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
107 |         return k
108 | 
109 |     if isinstance(dataset, str):  # *.yaml file
110 |         with open(dataset, errors='ignore') as f:
111 |             data_dict = yaml.safe_load(f)  # model dict
112 |         from utils.datasets import LoadImagesAndLabels
113 |         dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
114 | 
115 |     # Get label wh
116 |     shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
117 |     wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  # wh
118 | 
119 |     # Filter
120 |     i = (wh0 < 3.0).any(1).sum()
121 |     if i:
122 |         print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
123 |     wh = wh0[(wh0 >= 2.0).any(1)]  # filter > 2 pixels
124 |     # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1
125 | 
126 |     # Kmeans calculation
127 |     print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...')
128 |     s = wh.std(0)  # sigmas for whitening
129 |     k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
130 |     assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}')
131 |     k *= s
132 |     wh = torch.tensor(wh, dtype=torch.float32)  # filtered
133 |     wh0 = torch.tensor(wh0, dtype=torch.float32)  # unfiltered
134 |     k = print_results(k)
135 | 
136 |     # Plot
137 |     # k, d = [None] * 20, [None] * 20
138 |     # for i in tqdm(range(1, 21)):
139 |     #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
140 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
141 |     # ax = ax.ravel()
142 |     # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
143 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh
144 |     # ax[0].hist(wh[wh[:, 0]<100, 0],400)
145 |     # ax[1].hist(wh[wh[:, 1]<100, 1],400)
146 |     # fig.savefig('wh.png', dpi=200)
147 | 
148 |     # Evolve
149 |     npr = np.random
150 |     f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
151 |     pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:')  # progress bar
152 |     for _ in pbar:
153 |         v = np.ones(sh)
154 |         while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
155 |             v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
156 |         kg = (k.copy() * v).clip(min=2.0)
157 |         fg = anchor_fitness(kg)
158 |         if fg > f:
159 |             f, k = fg, kg.copy()
160 |             pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
161 |             if verbose:
162 |                 print_results(k)
163 | 
164 |     return print_results(k)
165 | 


--------------------------------------------------------------------------------
/utils/callbacks.py:
--------------------------------------------------------------------------------
  1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2 | """
  3 | Callback utils
  4 | """
  5 | 
  6 | 
  7 | class Callbacks:
  8 |     """"
  9 |     Handles all registered callbacks for YOLOv5 Hooks
 10 |     """
 11 | 
 12 |     _callbacks = {
 13 |         'on_pretrain_routine_start': [],
 14 |         'on_pretrain_routine_end': [],
 15 | 
 16 |         'on_train_start': [],
 17 |         'on_train_epoch_start': [],
 18 |         'on_train_batch_start': [],
 19 |         'optimizer_step': [],
 20 |         'on_before_zero_grad': [],
 21 |         'on_train_batch_end': [],
 22 |         'on_train_epoch_end': [],
 23 | 
 24 |         'on_val_start': [],
 25 |         'on_val_batch_start': [],
 26 |         'on_val_image_end': [],
 27 |         'on_val_batch_end': [],
 28 |         'on_val_end': [],
 29 | 
 30 |         'on_fit_epoch_end': [],  # fit = train + val
 31 |         'on_model_save': [],
 32 |         'on_train_end': [],
 33 | 
 34 |         'teardown': [],
 35 |     }
 36 | 
 37 |     def __init__(self):
 38 |         return
 39 | 
 40 |     def register_action(self, hook, name='', callback=None):
 41 |         """
 42 |         Register a new action to a callback hook
 43 | 
 44 |         Args:
 45 |             hook        The callback hook name to register the action to
 46 |             name        The name of the action
 47 |             callback    The callback to fire
 48 |         """
 49 |         assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
 50 |         assert callable(callback), f"callback '{callback}' is not callable"
 51 |         self._callbacks[hook].append({'name': name, 'callback': callback})
 52 | 
 53 |     def get_registered_actions(self, hook=None):
 54 |         """"
 55 |         Returns all the registered actions by callback hook
 56 | 
 57 |         Args:
 58 |             hook The name of the hook to check, defaults to all
 59 |         """
 60 |         if hook:
 61 |             return self._callbacks[hook]
 62 |         else:
 63 |             return self._callbacks
 64 | 
 65 |     def run_callbacks(self, hook, *args, **kwargs):
 66 |         """
 67 |         Loop through the registered actions and fire all callbacks
 68 |         """
 69 |         for logger in self._callbacks[hook]:
 70 |             # print(f"Running callbacks.{logger['callback'].__name__}()")
 71 |             logger['callback'](*args, **kwargs)
 72 | 
 73 |     def on_pretrain_routine_start(self, *args, **kwargs):
 74 |         """
 75 |         Fires all registered callbacks at the start of each pretraining routine
 76 |         """
 77 |         self.run_callbacks('on_pretrain_routine_start', *args, **kwargs)
 78 | 
 79 |     def on_pretrain_routine_end(self, *args, **kwargs):
 80 |         """
 81 |         Fires all registered callbacks at the end of each pretraining routine
 82 |         """
 83 |         self.run_callbacks('on_pretrain_routine_end', *args, **kwargs)
 84 | 
 85 |     def on_train_start(self, *args, **kwargs):
 86 |         """
 87 |         Fires all registered callbacks at the start of each training
 88 |         """
 89 |         self.run_callbacks('on_train_start', *args, **kwargs)
 90 | 
 91 |     def on_train_epoch_start(self, *args, **kwargs):
 92 |         """
 93 |         Fires all registered callbacks at the start of each training epoch
 94 |         """
 95 |         self.run_callbacks('on_train_epoch_start', *args, **kwargs)
 96 | 
 97 |     def on_train_batch_start(self, *args, **kwargs):
 98 |         """
 99 |         Fires all registered callbacks at the start of each training batch
100 |         """
101 |         self.run_callbacks('on_train_batch_start', *args, **kwargs)
102 | 
103 |     def optimizer_step(self, *args, **kwargs):
104 |         """
105 |         Fires all registered callbacks on each optimizer step
106 |         """
107 |         self.run_callbacks('optimizer_step', *args, **kwargs)
108 | 
109 |     def on_before_zero_grad(self, *args, **kwargs):
110 |         """
111 |         Fires all registered callbacks before zero grad
112 |         """
113 |         self.run_callbacks('on_before_zero_grad', *args, **kwargs)
114 | 
115 |     def on_train_batch_end(self, *args, **kwargs):
116 |         """
117 |         Fires all registered callbacks at the end of each training batch
118 |         """
119 |         self.run_callbacks('on_train_batch_end', *args, **kwargs)
120 | 
121 |     def on_train_epoch_end(self, *args, **kwargs):
122 |         """
123 |         Fires all registered callbacks at the end of each training epoch
124 |         """
125 |         self.run_callbacks('on_train_epoch_end', *args, **kwargs)
126 | 
127 |     def on_val_start(self, *args, **kwargs):
128 |         """
129 |         Fires all registered callbacks at the start of the validation
130 |         """
131 |         self.run_callbacks('on_val_start', *args, **kwargs)
132 | 
133 |     def on_val_batch_start(self, *args, **kwargs):
134 |         """
135 |         Fires all registered callbacks at the start of each validation batch
136 |         """
137 |         self.run_callbacks('on_val_batch_start', *args, **kwargs)
138 | 
139 |     def on_val_image_end(self, *args, **kwargs):
140 |         """
141 |         Fires all registered callbacks at the end of each val image
142 |         """
143 |         self.run_callbacks('on_val_image_end', *args, **kwargs)
144 | 
145 |     def on_val_batch_end(self, *args, **kwargs):
146 |         """
147 |         Fires all registered callbacks at the end of each validation batch
148 |         """
149 |         self.run_callbacks('on_val_batch_end', *args, **kwargs)
150 | 
151 |     def on_val_end(self, *args, **kwargs):
152 |         """
153 |         Fires all registered callbacks at the end of the validation
154 |         """
155 |         self.run_callbacks('on_val_end', *args, **kwargs)
156 | 
157 |     def on_fit_epoch_end(self, *args, **kwargs):
158 |         """
159 |         Fires all registered callbacks at the end of each fit (train+val) epoch
160 |         """
161 |         self.run_callbacks('on_fit_epoch_end', *args, **kwargs)
162 | 
163 |     def on_model_save(self, *args, **kwargs):
164 |         """
165 |         Fires all registered callbacks after each model save
166 |         """
167 |         self.run_callbacks('on_model_save', *args, **kwargs)
168 | 
169 |     def on_train_end(self, *args, **kwargs):
170 |         """
171 |         Fires all registered callbacks at the end of training
172 |         """
173 |         self.run_callbacks('on_train_end', *args, **kwargs)
174 | 
175 |     def teardown(self, *args, **kwargs):
176 |         """
177 |         Fires all registered callbacks before teardown
178 |         """
179 |         self.run_callbacks('teardown', *args, **kwargs)
180 | 


--------------------------------------------------------------------------------
/utils/downloads.py:
--------------------------------------------------------------------------------
  1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2 | """
  3 | Download utils
  4 | """
  5 | 
  6 | import os
  7 | import platform
  8 | import subprocess
  9 | import time
 10 | import urllib
 11 | from pathlib import Path
 12 | 
 13 | import requests
 14 | import torch
 15 | 
 16 | 
 17 | def gsutil_getsize(url=''):
 18 |     # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
 19 |     s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
 20 |     return eval(s.split(' ')[0]) if len(s) else 0  # bytes
 21 | 
 22 | 
 23 | def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
 24 |     # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
 25 |     file = Path(file)
 26 |     assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
 27 |     try:  # url1
 28 |         print(f'Downloading {url} to {file}...')
 29 |         torch.hub.download_url_to_file(url, str(file))
 30 |         assert file.exists() and file.stat().st_size > min_bytes, assert_msg  # check
 31 |     except Exception as e:  # url2
 32 |         file.unlink(missing_ok=True)  # remove partial downloads
 33 |         print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
 34 |         os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -")  # curl download, retry and resume on fail
 35 |     finally:
 36 |         if not file.exists() or file.stat().st_size < min_bytes:  # check
 37 |             file.unlink(missing_ok=True)  # remove partial downloads
 38 |             print(f"ERROR: {assert_msg}\n{error_msg}")
 39 |         print('')
 40 | 
 41 | 
 42 | def attempt_download(file, repo='ultralytics/yolov5'):  # from utils.downloads import *; attempt_download()
 43 |     # Attempt file download if does not exist
 44 |     file = Path(str(file).strip().replace("'", ''))
 45 | 
 46 |     if not file.exists():
 47 |         # URL specified
 48 |         name = Path(urllib.parse.unquote(str(file))).name  # decode '%2F' to '/' etc.
 49 |         if str(file).startswith(('http:/', 'https:/')):  # download
 50 |             url = str(file).replace(':/', '://')  # Pathlib turns :// -> :/
 51 |             name = name.split('?')[0]  # parse authentication https://url.com/file.txt?auth...
 52 |             safe_download(file=name, url=url, min_bytes=1E5)
 53 |             return name
 54 | 
 55 |         # GitHub assets
 56 |         file.parent.mkdir(parents=True, exist_ok=True)  # make parent dir (if required)
 57 |         try:
 58 |             response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json()  # github api
 59 |             assets = [x['name'] for x in response['assets']]  # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...]
 60 |             tag = response['tag_name']  # i.e. 'v1.0'
 61 |         except:  # fallback plan
 62 |             assets = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt',
 63 |                       'yolov5s6.pt', 'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt']
 64 |             try:
 65 |                 tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
 66 |             except:
 67 |                 tag = 'v5.0'  # current release
 68 |         tag = 'v5.0'  # download v5.0 models
 69 |         if name in assets:
 70 |             safe_download(file,
 71 |                           url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
 72 |                           # url2=f'https://storage.googleapis.com/{repo}/ckpt/{name}',  # backup url (optional)
 73 |                           min_bytes=1E5,
 74 |                           error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/')
 75 | 
 76 |     return str(file)
 77 | 
 78 | 
 79 | def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
 80 |     # Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download()
 81 |     t = time.time()
 82 |     file = Path(file)
 83 |     cookie = Path('cookie')  # gdrive cookie
 84 |     print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
 85 |     file.unlink(missing_ok=True)  # remove existing file
 86 |     cookie.unlink(missing_ok=True)  # remove existing cookie
 87 | 
 88 |     # Attempt file download
 89 |     out = "NUL" if platform.system() == "Windows" else "/dev/null"
 90 |     os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
 91 |     if os.path.exists('cookie'):  # large file
 92 |         s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
 93 |     else:  # small file
 94 |         s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
 95 |     r = os.system(s)  # execute, capture return
 96 |     cookie.unlink(missing_ok=True)  # remove existing cookie
 97 | 
 98 |     # Error check
 99 |     if r != 0:
100 |         file.unlink(missing_ok=True)  # remove partial
101 |         print('Download error ')  # raise Exception('Download error')
102 |         return r
103 | 
104 |     # Unzip if archive
105 |     if file.suffix == '.zip':
106 |         print('unzipping... ', end='')
107 |         os.system(f'unzip -q {file}')  # unzip
108 |         file.unlink()  # remove zip to free space
109 | 
110 |     print(f'Done ({time.time() - t:.1f}s)')
111 |     return r
112 | 
113 | 
114 | def get_token(cookie="./cookie"):
115 |     with open(cookie) as f:
116 |         for line in f:
117 |             if "download" in line:
118 |                 return line.split()[-1]
119 |     return ""
120 | 
121 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
122 | #
123 | #
124 | # def upload_blob(bucket_name, source_file_name, destination_blob_name):
125 | #     # Uploads a file to a bucket
126 | #     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
127 | #
128 | #     storage_client = storage.Client()
129 | #     bucket = storage_client.get_bucket(bucket_name)
130 | #     blob = bucket.blob(destination_blob_name)
131 | #
132 | #     blob.upload_from_filename(source_file_name)
133 | #
134 | #     print('File {} uploaded to {}.'.format(
135 | #         source_file_name,
136 | #         destination_blob_name))
137 | #
138 | #
139 | # def download_blob(bucket_name, source_blob_name, destination_file_name):
140 | #     # Uploads a blob from a bucket
141 | #     storage_client = storage.Client()
142 | #     bucket = storage_client.get_bucket(bucket_name)
143 | #     blob = bucket.blob(source_blob_name)
144 | #
145 | #     blob.download_to_filename(destination_file_name)
146 | #
147 | #     print('Blob {} downloaded to {}.'.format(
148 | #         source_blob_name,
149 | #         destination_file_name))
150 | 


--------------------------------------------------------------------------------
/utils/labels.py:
--------------------------------------------------------------------------------
 1 | import os, os.path as osp
 2 | import argparse
 3 | import numpy as np
 4 | import yaml
 5 | from tqdm import tqdm
 6 | 
 7 | from pycocotools.coco import COCO
 8 | 
 9 | def write_yolov5_labels(data):
10 |     assert not osp.isdir(osp.join(data['path'], data['labels'])), \
11 |         'Labels already generated. Remove or choose new name for labels.'
12 | 
13 |     splits = [osp.splitext(osp.split(data[s])[-1])[0] for s in ['train', 'val', 'test'] if s in data]
14 |     annotations = [osp.join(data['path'], data['{}_annotations'.format(s)]) for s in ['train', 'val', 'test'] if s in data]
15 |     test_split = [0 if s in ['train', 'val'] else 1 for s in ['train', 'val', 'test'] if s in data]
16 |     img_txt_dir = osp.join(data['path'], data['labels'], 'img_txt')
17 |     os.makedirs(img_txt_dir, exist_ok=True)
18 | 
19 |     for split, annot, is_test in zip(splits, annotations, test_split):
20 |         img_txt_path = osp.join(img_txt_dir, '{}.txt'.format(split))
21 |         labels_path = osp.join(data['path'], '{}/{}'.format(data['labels'], split))
22 |         if not is_test:
23 |             os.makedirs(labels_path, exist_ok=True)
24 |         coco = COCO(annot)
25 |         if not is_test:
26 |             pbar = tqdm(coco.anns.keys(), total=len(coco.anns.keys()))
27 |             pbar.desc = 'Writing {} labels to {}'.format(split, labels_path)
28 |             for id in pbar:
29 |                 a = coco.anns[id]
30 | 
31 |                 if a['image_id'] not in coco.imgs:
32 |                     continue
33 | 
34 |                 if 'train' in split and a['iscrowd']:
35 |                     continue
36 | 
37 |                 img_info = coco.imgs[a['image_id']]
38 |                 img_h, img_w = img_info['height'], img_info['width']
39 |                 # x, y, w, h = a['head_bbox']
40 |                 x, y, w, h = a['bbox']
41 |                 xc, yc = x + w / 2, y + h / 2
42 |                 xc /= img_w
43 |                 yc /= img_h
44 |                 w /= img_w
45 |                 h /= img_h
46 |                 
47 |                 [pitch, yaw, roll] = a['euler_angles']
48 |                 pitch = (pitch / 180 + 0.5)  # (-90,90)/180 + 0.5 --> (0,1)
49 |                 yaw = (yaw / 360 + 0.5)  # (-180, 180)/360 + 0.5 --> (0,1)
50 |                 roll = (roll / 180 + 0.5)  # (-90,90)/180 + 0.5 --> (0,1)
51 |                 
52 |                 yolov5_label_txt = '{}.txt'.format(osp.splitext(img_info['file_name'])[0])
53 |                 with open(osp.join(labels_path, yolov5_label_txt), 'a') as f:
54 |                     f.write('{} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f}\n'.format(
55 |                         0, xc, yc, w, h, pitch, yaw, roll))
56 |             pbar.close()
57 | 
58 |         with open(img_txt_path, 'w') as f:
59 |             for img_info in coco.imgs.values():
60 |                 f.write(osp.join(data['path'], 'images', '{}'.format(split), img_info['file_name']) + '\n')
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     parser = argparse.ArgumentParser()
65 |     parser.add_argument('--data', default='data/coco-kp.yaml')
66 |     args = parser.parse_args()
67 | 
68 |     assert osp.isfile(args.data), 'Data config file not found at {}'.format(args.data)
69 | 
70 |     with open(args.data, 'rb') as f:
71 |         data = yaml.safe_load(f)
72 |     write_yolov5_labels(data)


--------------------------------------------------------------------------------
/utils/labels_v2.py:
--------------------------------------------------------------------------------
 1 | import os, os.path as osp
 2 | import argparse
 3 | import numpy as np
 4 | import yaml
 5 | import json
 6 | from tqdm import tqdm
 7 | 
 8 | def write_yolov5_labels(data):
 9 |     assert not osp.isdir(osp.join(data['path'], data['labels'])), \
10 |         'Labels already generated. Remove or choose new name for labels.'
11 | 
12 |     splits = [osp.splitext(osp.split(data[s])[-1])[0] for s in ['train', 'val', 'test'] if s in data]
13 |     annotations = [osp.join(data['path'], data['{}_annotations'.format(s)]) for s in ['train', 'val', 'test'] if s in data]
14 |     test_split = [0 if s in ['train', 'val'] else 1 for s in ['train', 'val', 'test'] if s in data]
15 |     img_txt_dir = osp.join(data['path'], data['labels'], 'img_txt')
16 |     os.makedirs(img_txt_dir, exist_ok=True)
17 | 
18 |     for split, annot, is_test in zip(splits, annotations, test_split):
19 |         img_txt_path = osp.join(img_txt_dir, '{}.txt'.format(split))
20 |         labels_path = osp.join(data['path'], '{}/{}'.format(data['labels'], split))
21 |         if not is_test:
22 |             os.makedirs(labels_path, exist_ok=True)
23 | 
24 |         json_img_dict = json.load(open(annot, "r"))
25 |         if not is_test:
26 |             pbar = tqdm(json_img_dict.keys(), total=len(json_img_dict.keys()))
27 |             pbar.desc = 'Writing {} labels to {}'.format(split, labels_path)
28 |             for img_name in pbar:  # the id is img_name
29 |                 a = json_img_dict[img_name]
30 |                 img_h, img_w = a['height'], a['width']
31 |                 xmin, ymin, xmax, ymax = a['bbox']
32 |                 x, y, w, h = xmin, ymin, xmax-xmin, ymax-ymin
33 |                 xc, yc = x + w / 2, y + h / 2
34 |                 xc /= img_w
35 |                 yc /= img_h
36 |                 w /= img_w
37 |                 h /= img_h
38 |                 
39 |                 [yaw, pitch, roll] = a['pose']
40 |                 pitch = (pitch / 180 + 0.5)  # (-90,90)/180 + 0.5 --> (0,1)
41 |                 yaw = (yaw / 360 + 0.5)  # (-180, 180)/360 + 0.5 --> (0,1)
42 |                 roll = (roll / 180 + 0.5)  # (-90,90)/180 + 0.5 --> (0,1)
43 |                 
44 |                 yolov5_label_txt = '{}.txt'.format(osp.splitext(img_name)[0])
45 |                 with open(osp.join(labels_path, yolov5_label_txt), 'a') as f:
46 |                     f.write('{} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f}\n'.format(
47 |                         0, xc, yc, w, h, pitch, yaw, roll))
48 |             pbar.close()
49 | 
50 |         with open(img_txt_path, 'w') as f:
51 |             for img_name in json_img_dict.keys():
52 |                 f.write(osp.join(data['path'], 'images', '{}'.format(split), img_name) + '\n')
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     parser = argparse.ArgumentParser()
57 |     parser.add_argument('--data', default='data/coco-kp.yaml')
58 |     args = parser.parse_args()
59 | 
60 |     assert osp.isfile(args.data), 'Data config file not found at {}'.format(args.data)
61 | 
62 |     with open(args.data, 'rb') as f:
63 |         data = yaml.safe_load(f)
64 |     write_yolov5_labels(data)


--------------------------------------------------------------------------------
/utils/loggers/__init__.py:
--------------------------------------------------------------------------------
  1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2 | """
  3 | Logging utils
  4 | """
  5 | 
  6 | import warnings
  7 | from threading import Thread
  8 | 
  9 | import torch
 10 | from torch.utils.tensorboard import SummaryWriter
 11 | 
 12 | from utils.general import colorstr, emojis
 13 | from utils.loggers.wandb.wandb_utils import WandbLogger
 14 | from utils.plots import plot_images, plot_results
 15 | from utils.torch_utils import de_parallel
 16 | 
 17 | LOGGERS = ('csv', 'tb', 'wandb')  # text-file, TensorBoard, Weights & Biases
 18 | 
 19 | try:
 20 |     import wandb
 21 | 
 22 |     assert hasattr(wandb, '__version__')  # verify package import not local dir
 23 | except (ImportError, AssertionError):
 24 |     wandb = None
 25 | 
 26 | 
 27 | class Loggers():
 28 |     # YOLOv5 Loggers class
 29 |     def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
 30 |         self.save_dir = save_dir
 31 |         self.weights = weights
 32 |         self.opt = opt
 33 |         self.hyp = hyp
 34 |         self.logger = logger  # for printing results to console
 35 |         self.include = include
 36 |         self.keys = ['train/box_loss', 'train/obj_loss', 'train/cls_loss',  'train/mse_loss', # train loss
 37 |                      'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',  # metrics
 38 |                      'val/box_loss', 'val/obj_loss', 'val/cls_loss', 'val/mse_loss', # val loss
 39 |                      'metrics/error_MAE', 'metrics/error_pitch', 'metrics/error_yaw', 'metrics/error_roll', # metrics error
 40 |                      'x/lr0', 'x/lr1', 'x/lr2']  # params
 41 |         for k in LOGGERS:
 42 |             setattr(self, k, None)  # init empty logger dictionary
 43 |         self.csv = True  # always log to csv
 44 | 
 45 |         # Message
 46 |         if not wandb:
 47 |             prefix = colorstr('Weights & Biases: ')
 48 |             s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs (RECOMMENDED)"
 49 |             print(emojis(s))
 50 | 
 51 |         # TensorBoard
 52 |         s = self.save_dir
 53 |         if 'tb' in self.include and not self.opt.evolve:
 54 |             prefix = colorstr('TensorBoard: ')
 55 |             self.logger.info(f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/")
 56 |             self.tb = SummaryWriter(str(s))
 57 | 
 58 |         # W&B
 59 |         if wandb and 'wandb' in self.include:
 60 |             wandb_artifact_resume = isinstance(self.opt.resume, str) and self.opt.resume.startswith('wandb-artifact://')
 61 |             run_id = torch.load(self.weights).get('wandb_id') if self.opt.resume and not wandb_artifact_resume else None
 62 |             self.opt.hyp = self.hyp  # add hyperparameters
 63 |             self.wandb = WandbLogger(self.opt, run_id)
 64 |         else:
 65 |             self.wandb = None
 66 | 
 67 |     def on_pretrain_routine_end(self):
 68 |         # Callback runs on pre-train routine end
 69 |         paths = self.save_dir.glob('*labels*.jpg')  # training labels
 70 |         if self.wandb:
 71 |             self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]})
 72 | 
 73 |     def on_train_batch_end(self, ni, model, imgs, targets, paths, plots, sync_bn):
 74 |         # Callback runs on train batch end
 75 |         if plots:
 76 |             if ni == 0:
 77 |                 if not sync_bn:  # tb.add_graph() --sync known issue https://github.com/ultralytics/yolov5/issues/3754
 78 |                     with warnings.catch_warnings():
 79 |                         warnings.simplefilter('ignore')  # suppress jit trace warning
 80 |                         self.tb.add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), [])
 81 |             if ni < 3:
 82 |                 f = self.save_dir / f'train_batch{ni}.jpg'  # filename
 83 |                 Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
 84 |             if self.wandb and ni == 10:
 85 |                 files = sorted(self.save_dir.glob('train*.jpg'))
 86 |                 self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]})
 87 | 
 88 |     def on_train_epoch_end(self, epoch):
 89 |         # Callback runs on train epoch end
 90 |         if self.wandb:
 91 |             self.wandb.current_epoch = epoch + 1
 92 | 
 93 |     def on_val_image_end(self, pred, predn, path, names, im):
 94 |         # Callback runs on val image end
 95 |         if self.wandb:
 96 |             self.wandb.val_one_image(pred, predn, path, names, im)
 97 | 
 98 |     def on_val_end(self):
 99 |         # Callback runs on val end
100 |         if self.wandb:
101 |             files = sorted(self.save_dir.glob('val*.jpg'))
102 |             self.wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in files]})
103 | 
104 |     def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
105 |         # Callback runs at the end of each fit (train+val) epoch
106 |         x = {k: v for k, v in zip(self.keys, vals)}  # dict
107 |         if self.csv:
108 |             file = self.save_dir / 'results.csv'
109 |             n = len(x) + 1  # number of cols
110 |             s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n')  # add header
111 |             with open(file, 'a') as f:
112 |                 f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
113 | 
114 |         if self.tb:
115 |             for k, v in x.items():
116 |                 self.tb.add_scalar(k, v, epoch)
117 | 
118 |         if self.wandb:
119 |             self.wandb.log(x)
120 |             self.wandb.end_epoch(best_result=best_fitness == fi)
121 | 
122 |     def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
123 |         # Callback runs on model save event
124 |         if self.wandb:
125 |             if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
126 |                 self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
127 | 
128 |     def on_train_end(self, last, best, plots, epoch):
129 |         # Callback runs on training end
130 |         if plots:
131 |             plot_results(file=self.save_dir / 'results.csv')  # save results.png
132 |         files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
133 |         files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()]  # filter
134 | 
135 |         if self.tb:
136 |             import cv2
137 |             for f in files:
138 |                 self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
139 | 
140 |         if self.wandb:
141 |             self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]})
142 |             # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model
143 |             if not self.opt.evolve:
144 |                 wandb.log_artifact(str(best if best.exists() else last), type='model',
145 |                                    name='run_' + self.wandb.wandb_run.id + '_model',
146 |                                    aliases=['latest', 'best', 'stripped'])
147 |                 self.wandb.finish_run()
148 |             else:
149 |                 self.wandb.finish_run()
150 |                 self.wandb = WandbLogger(self.opt)
151 | 


--------------------------------------------------------------------------------
/utils/loggers/wandb/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/utils/loggers/wandb/__init__.py


--------------------------------------------------------------------------------
/utils/loggers/wandb/log_dataset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from wandb_utils import WandbLogger
 4 | 
 5 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
 6 | 
 7 | 
 8 | def create_dataset_artifact(opt):
 9 |     logger = WandbLogger(opt, None, job_type='Dataset Creation')  # TODO: return value unused
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
15 |     parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
16 |     parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project')
17 |     parser.add_argument('--entity', default=None, help='W&B entity')
18 |     parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run')
19 | 
20 |     opt = parser.parse_args()
21 |     opt.resume = False  # Explicitly disallow resume check for dataset upload job
22 | 
23 |     create_dataset_artifact(opt)
24 | 


--------------------------------------------------------------------------------
/utils/loggers/wandb/sweep.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | 
 4 | import wandb
 5 | 
 6 | FILE = Path(__file__).absolute()
 7 | sys.path.append(FILE.parents[3].as_posix())  # add utils/ to path
 8 | 
 9 | from train import train, parse_opt
10 | from utils.general import increment_path
11 | from utils.torch_utils import select_device
12 | 
13 | 
14 | def sweep():
15 |     wandb.init()
16 |     # Get hyp dict from sweep agent
17 |     hyp_dict = vars(wandb.config).get("_items")
18 | 
19 |     # Workaround: get necessary opt args
20 |     opt = parse_opt(known=True)
21 |     opt.batch_size = hyp_dict.get("batch_size")
22 |     opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
23 |     opt.epochs = hyp_dict.get("epochs")
24 |     opt.nosave = True
25 |     opt.data = hyp_dict.get("data")
26 |     device = select_device(opt.device, batch_size=opt.batch_size)
27 | 
28 |     # train
29 |     train(hyp_dict, opt, device)
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     sweep()
34 | 


--------------------------------------------------------------------------------
/utils/loggers/wandb/sweep.yaml:
--------------------------------------------------------------------------------
  1 | # Hyperparameters for training
  2 | # To set range- 
  3 | # Provide min and max values as:
  4 | #      parameter:
  5 | #         
  6 | #         min: scalar
  7 | #         max: scalar
  8 | # OR
  9 | #
 10 | # Set a specific list of search space-
 11 | #     parameter: 
 12 | #         values: [scalar1, scalar2, scalar3...]
 13 | #         
 14 | # You can use grid, bayesian and hyperopt search strategy 
 15 | # For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration
 16 | 
 17 | program: utils/loggers/wandb/sweep.py
 18 | method: random
 19 | metric:
 20 |   name: metrics/mAP_0.5
 21 |   goal: maximize
 22 | 
 23 | parameters:
 24 |   # hyperparameters: set either min, max range or values list
 25 |   data:
 26 |     value: "data/coco128.yaml"
 27 |   batch_size:
 28 |     values: [64]
 29 |   epochs:
 30 |     values: [10]
 31 | 
 32 |   lr0:
 33 |     distribution: uniform
 34 |     min: 1e-5
 35 |     max: 1e-1
 36 |   lrf:
 37 |     distribution: uniform
 38 |     min: 0.01
 39 |     max: 1.0
 40 |   momentum:
 41 |     distribution: uniform
 42 |     min: 0.6
 43 |     max: 0.98
 44 |   weight_decay:
 45 |     distribution: uniform
 46 |     min: 0.0
 47 |     max: 0.001
 48 |   warmup_epochs:
 49 |     distribution: uniform
 50 |     min: 0.0
 51 |     max: 5.0
 52 |   warmup_momentum:
 53 |     distribution: uniform
 54 |     min: 0.0
 55 |     max: 0.95
 56 |   warmup_bias_lr:
 57 |     distribution: uniform
 58 |     min: 0.0
 59 |     max: 0.2
 60 |   box:
 61 |     distribution: uniform
 62 |     min: 0.02
 63 |     max: 0.2
 64 |   cls:
 65 |     distribution: uniform
 66 |     min: 0.2
 67 |     max: 4.0
 68 |   cls_pw:
 69 |     distribution: uniform
 70 |     min: 0.5
 71 |     max: 2.0
 72 |   obj:
 73 |     distribution: uniform
 74 |     min: 0.2
 75 |     max: 4.0
 76 |   obj_pw:
 77 |     distribution: uniform
 78 |     min: 0.5
 79 |     max: 2.0
 80 |   iou_t:
 81 |     distribution: uniform
 82 |     min: 0.1
 83 |     max: 0.7
 84 |   anchor_t:
 85 |     distribution: uniform
 86 |     min: 2.0
 87 |     max: 8.0
 88 |   fl_gamma:
 89 |     distribution: uniform
 90 |     min: 0.0
 91 |     max: 0.1
 92 |   hsv_h:
 93 |     distribution: uniform
 94 |     min: 0.0
 95 |     max: 0.1
 96 |   hsv_s:
 97 |     distribution: uniform
 98 |     min: 0.0
 99 |     max: 0.9
100 |   hsv_v:
101 |     distribution: uniform
102 |     min: 0.0
103 |     max: 0.9
104 |   degrees:
105 |     distribution: uniform
106 |     min: 0.0
107 |     max: 45.0
108 |   translate:
109 |     distribution: uniform
110 |     min: 0.0
111 |     max: 0.9
112 |   scale:
113 |     distribution: uniform
114 |     min: 0.0
115 |     max: 0.9
116 |   shear:
117 |     distribution: uniform
118 |     min: 0.0
119 |     max: 10.0
120 |   perspective:
121 |     distribution: uniform
122 |     min: 0.0
123 |     max: 0.001
124 |   flipud:
125 |     distribution: uniform
126 |     min: 0.0
127 |     max: 1.0
128 |   fliplr:
129 |     distribution: uniform
130 |     min: 0.0
131 |     max: 1.0
132 |   mosaic:
133 |     distribution: uniform
134 |     min: 0.0
135 |     max: 1.0
136 |   mixup:
137 |     distribution: uniform
138 |     min: 0.0
139 |     max: 1.0
140 |   copy_paste:
141 |     distribution: uniform
142 |     min: 0.0
143 |     max: 1.0
144 | 


--------------------------------------------------------------------------------
/utils/renderer.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | The code is from https://github.com/vitoralbiero/img2pose/blob/main/utils/renderer.py
  3 | '''
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from Sim3DR import RenderPipeline
  8 | from scipy.spatial.transform import Rotation
  9 | 
 10 | def transform_points(points, pose):
 11 |     return points.dot(Rotation.from_rotvec(pose[:3]).as_matrix().T) + pose[3:]
 12 | 
 13 | def plot_3d_landmark(verts, campose, intrinsics):
 14 |     lm_3d_trans = transform_points(verts, campose)
 15 | 
 16 |     # project to image plane
 17 |     lms_3d_trans_proj = intrinsics.dot(lm_3d_trans.T).T
 18 |     lms_projected = (
 19 |         lms_3d_trans_proj[:, :2] / np.tile(lms_3d_trans_proj[:, 2], (2, 1)).T
 20 |     )
 21 | 
 22 |     return lms_projected, lms_3d_trans_proj
 23 | 
 24 | 
 25 | def _to_ctype(arr):
 26 |     if not arr.flags.c_contiguous:
 27 |         return arr.copy(order="C")
 28 |     return arr
 29 | 
 30 | 
 31 | def get_colors(img, ver):
 32 |     h, w, _ = img.shape
 33 |     ver[0, :] = np.minimum(np.maximum(ver[0, :], 0), w - 1)  # x
 34 |     ver[1, :] = np.minimum(np.maximum(ver[1, :], 0), h - 1)  # y
 35 |     ind = np.round(ver).astype(np.int32)
 36 |     colors = img[ind[1, :], ind[0, :], :] / 255.0  # n x 3
 37 | 
 38 |     return colors.copy()
 39 | 
 40 | 
 41 | class Renderer:
 42 |     def __init__(
 43 |         self,
 44 |         vertices_path="../pose_references/vertices_trans.npy",
 45 |         triangles_path="../pose_references/triangles.npy",
 46 |     ):
 47 |         self.vertices = np.load(vertices_path)
 48 |         self.triangles = _to_ctype(np.load(triangles_path).T)
 49 |         self.vertices[:, 0] *= -1
 50 | 
 51 |         self.cfg = {
 52 |             "intensity_ambient": 0.3,
 53 |             "color_ambient": (1, 1, 1),
 54 |             "intensity_directional": 0.6,
 55 |             "color_directional": (1, 1, 1),
 56 |             "intensity_specular": 0.1,
 57 |             "specular_exp": 5,
 58 |             "light_pos": (0, 0, 5),
 59 |             "view_pos": (0, 0, 5),
 60 |         }
 61 | 
 62 |         self.render_app = RenderPipeline(**self.cfg)
 63 | 
 64 |     def transform_vertices(self, img, poses, global_intrinsics=None):
 65 |         # (w, h) = img.size  # PIL
 66 |         (h, w, c) = img.shape  # cv2
 67 |         if global_intrinsics is None:
 68 |             global_intrinsics = np.array(
 69 |                 [[w + h, 0, w // 2], [0, w + h, h // 2], [0, 0, 1]]
 70 |             )
 71 | 
 72 |         transformed_vertices = []
 73 |         for pose in poses:
 74 |             projected_lms = np.zeros_like(self.vertices)
 75 |             projected_lms[:, :2], lms_3d_trans_proj = plot_3d_landmark(
 76 |                 self.vertices, pose, global_intrinsics
 77 |             )
 78 |             projected_lms[:, 2] = lms_3d_trans_proj[:, 2] * -1
 79 | 
 80 |             range_x = np.max(projected_lms[:, 0]) - np.min(projected_lms[:, 0])
 81 |             range_y = np.max(projected_lms[:, 1]) - np.min(projected_lms[:, 1])
 82 | 
 83 |             s = (h + w) / pose[5]
 84 |             projected_lms[:, 2] *= s
 85 |             projected_lms[:, 2] += (range_x + range_y) * 3
 86 | 
 87 |             transformed_vertices.append(projected_lms)
 88 | 
 89 |         return transformed_vertices
 90 | 
 91 |     def render(self, img, transformed_vertices, alpha=0.9, save_path=None):
 92 |         img = np.asarray(img)
 93 |         overlap = img.copy()
 94 | 
 95 |         for vertices in transformed_vertices:
 96 |             vertices = _to_ctype(vertices)  # transpose
 97 |             overlap = self.render_app(vertices, self.triangles, overlap)
 98 | 
 99 |         res = cv2.addWeighted(img, 1 - alpha, overlap, alpha, 0)
100 | 
101 |         if save_path is not None:
102 |             cv2.imwrite(save_path, res)
103 |             print(f"Save visualization result to {save_path}")
104 | 
105 |         return res
106 | 
107 |     def save_to_obj(self, img, ver_lst, height, save_path):
108 |         n_obj = len(ver_lst)  # count obj
109 | 
110 |         if n_obj <= 0:
111 |             return
112 | 
113 |         n_vertex = ver_lst[0].T.shape[1]
114 |         n_face = self.triangles.shape[0]
115 | 
116 |         with open(save_path, "w") as f:
117 |             for i in range(n_obj):
118 |                 ver = ver_lst[i].T
119 |                 colors = get_colors(img, ver)
120 | 
121 |                 for j in range(n_vertex):
122 |                     x, y, z = ver[:, j]
123 |                     f.write(
124 |                         f"v {x:.2f} {height - y:.2f} {z:.2f} {colors[j, 2]:.2f} "
125 |                         f"{colors[j, 1]:.2f} {colors[j, 0]:.2f}\n"
126 |                     )
127 | 
128 |             for i in range(n_obj):
129 |                 offset = i * n_vertex
130 |                 for j in range(n_face):
131 |                     idx1, idx2, idx3 = self.triangles[j]  # m x 3
132 |                     f.write(
133 |                         f"f {idx3 + 1 + offset} {idx2 + 1 + offset} "
134 |                         f"{idx1 + 1 + offset}\n"
135 |                     )
136 | 
137 |         print(f"Dump tp {save_path}")
138 | 


--------------------------------------------------------------------------------
/weights/readme.md:
--------------------------------------------------------------------------------
1 | Put the pretrained YOLOv5 model weights here.


--------------------------------------------------------------------------------