├── Arial.ttf ├── LICENSE.txt ├── README.md ├── Sim3DR ├── Sim3DR.py ├── __init__.py ├── _init_paths.py ├── build_sim3dr.sh ├── lib │ ├── rasterize.cpp │ ├── rasterize.h │ ├── rasterize.pyx │ └── rasterize_kernel.cpp ├── lighting.py ├── readme.md ├── setup.py └── tests │ ├── CMakeLists.txt │ ├── io.cpp │ ├── io.h │ └── test.cpp ├── data ├── 300w_lp_aflw2000.yaml ├── 300w_lp_biwi.yaml ├── agora_coco.yaml ├── cmu_panoptic_coco.yaml ├── hyp-p6.yaml └── widerface_coco.yaml ├── demos ├── image.py ├── image_vis3d.py ├── image_vis3d_6DRepNet.py ├── video.py └── video_vis3d.py ├── exps ├── AGORA │ ├── agora_evaluation │ │ ├── get_joints_verts_from_dataframe.py │ │ └── projection.py │ ├── data_process_hpe.py │ ├── data_process_hpe_slim.py │ └── hpe_utils.py ├── CMU │ ├── data_split_hpe.py │ ├── data_statistic_hpe.py │ ├── data_statistic_hpe_slim.py │ ├── hpe_utils.py │ ├── panoptic-toolbox │ │ ├── getDB_panoptic_ver1_2_hdVideo.sh │ │ ├── getDB_panoptic_ver1_2_hdVideo_t1.sh │ │ ├── getDB_panoptic_ver1_2_hdVideo_t2.sh │ │ ├── getDB_panoptic_ver1_2_hdVideo_t3.sh │ │ ├── getDB_panoptic_ver1_2_hdVideo_t4.sh │ │ └── getData_hdVideo.sh │ ├── released_seqs_excel.xlsx │ └── selected_HPE_list.txt ├── compare_3ddfa.py ├── compare_3ddfa_v2.py ├── compare_6DRepNet.py ├── compare_FSANet.py ├── compare_HopeNet.py ├── compare_SynergyNet.py ├── compare_WHENetONNX.py ├── compare_dad3dnet.py ├── compare_error_analysis_v2.py ├── compare_img2pose.py ├── convert_coco_style_300wlp_aflw2000.py ├── convert_coco_style_300wlp_biwi.py ├── convert_coco_style_img2pose.py ├── gen_dataset_full_AGORA_CMU.py ├── gen_dataset_single_AGORA.py ├── gen_dataset_single_CMU.py ├── sixdrepnet.zip └── statistic_angles.py ├── materials ├── 000000002685_vis3d_res.jpg ├── 000000018380_vis3d_res.jpg ├── 000000038829_vis3d_res.jpg ├── 000000081988_vis3d_res.jpg ├── 000000161925_vis3d_res.jpg ├── 000000183648_vis3d_res.jpg ├── 000002_mpiinew_test_DirectMHP_vis3d.gif ├── 000003_mpiinew_test_DirectMHP_vis3d.gif ├── datasetexamples.png ├── full_range.png └── illustration.png ├── models ├── common.py ├── experimental.py ├── yolo.py ├── yolov5l6.yaml ├── yolov5m6.yaml └── yolov5s6.yaml ├── pose_references ├── reference_3d_5_points_trans.npy ├── reference_3d_68_points_trans.npy ├── triangles.npy └── vertices_trans.npy ├── requirements.txt ├── test_imgs ├── AGORA │ ├── agora_val_2000400001.jpg │ └── agora_val_2000400205.jpg ├── CMU │ ├── cmu_val_10400060013.jpg │ ├── cmu_val_10602142026.jpg │ └── cmu_val_11500144012.jpg └── COCO │ ├── 000000002685.jpg │ ├── 000000018380.jpg │ ├── 000000038829.jpg │ ├── 000000081988.jpg │ ├── 000000161925.jpg │ └── 000000183648.jpg ├── train.py ├── utils ├── activations.py ├── augmentations.py ├── autoanchor.py ├── callbacks.py ├── datasets.py ├── downloads.py ├── general.py ├── labels.py ├── labels_v2.py ├── loggers │ ├── __init__.py │ └── wandb │ │ ├── README.md │ │ ├── __init__.py │ │ ├── log_dataset.py │ │ ├── sweep.py │ │ ├── sweep.yaml │ │ └── wandb_utils.py ├── loss.py ├── mae.py ├── metrics.py ├── plots.py ├── renderer.py └── torch_utils.py ├── val.py └── weights └── readme.md /Arial.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/Arial.ttf -------------------------------------------------------------------------------- /Sim3DR/Sim3DR.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from . import _init_paths 4 | import numpy as np 5 | import Sim3DR_Cython 6 | 7 | 8 | def get_normal(vertices, triangles): 9 | normal = np.zeros_like(vertices, dtype=np.float32) 10 | Sim3DR_Cython.get_normal(normal, vertices, triangles, vertices.shape[0], triangles.shape[0]) 11 | return normal 12 | 13 | 14 | def rasterize(vertices, triangles, colors, bg=None, 15 | height=None, width=None, channel=None, 16 | reverse=False): 17 | if bg is not None: 18 | height, width, channel = bg.shape 19 | else: 20 | assert height is not None and width is not None and channel is not None 21 | bg = np.zeros((height, width, channel), dtype=np.uint8) 22 | 23 | buffer = np.zeros((height, width), dtype=np.float32) - 1e8 24 | 25 | if colors.dtype != np.float32: 26 | colors = colors.astype(np.float32) 27 | Sim3DR_Cython.rasterize(bg, vertices, triangles, colors, buffer, triangles.shape[0], height, width, channel, 28 | reverse=reverse) 29 | return bg 30 | -------------------------------------------------------------------------------- /Sim3DR/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from .Sim3DR import get_normal, rasterize 4 | from .lighting import RenderPipeline 5 | -------------------------------------------------------------------------------- /Sim3DR/_init_paths.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import os.path as osp 4 | import sys 5 | 6 | 7 | def add_path(path): 8 | if path not in sys.path: 9 | sys.path.insert(0, path) 10 | 11 | 12 | this_dir = osp.dirname(__file__) 13 | lib_path = osp.join(this_dir, '.') 14 | add_path(lib_path) 15 | -------------------------------------------------------------------------------- /Sim3DR/build_sim3dr.sh: -------------------------------------------------------------------------------- 1 | python3 setup.py build_ext --inplace -------------------------------------------------------------------------------- /Sim3DR/lib/rasterize.h: -------------------------------------------------------------------------------- 1 | #ifndef MESH_CORE_HPP_ 2 | #define MESH_CORE_HPP_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | class Point3D { 14 | public: 15 | float x; 16 | float y; 17 | float z; 18 | 19 | public: 20 | Point3D() : x(0.f), y(0.f), z(0.f) {} 21 | Point3D(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {} 22 | 23 | void initialize(float x_, float y_, float z_){ 24 | this->x = x_; this->y = y_; this->z = z_; 25 | } 26 | 27 | Point3D cross(Point3D &p){ 28 | Point3D c; 29 | c.x = this->y * p.z - this->z * p.y; 30 | c.y = this->z * p.x - this->x * p.z; 31 | c.z = this->x * p.y - this->y * p.x; 32 | return c; 33 | } 34 | 35 | float dot(Point3D &p) { 36 | return this->x * p.x + this->y * p.y + this->z * p.z; 37 | } 38 | 39 | Point3D operator-(const Point3D &p) { 40 | Point3D np; 41 | np.x = this->x - p.x; 42 | np.y = this->y - p.y; 43 | np.z = this->z - p.z; 44 | return np; 45 | } 46 | 47 | }; 48 | 49 | class Point { 50 | public: 51 | float x; 52 | float y; 53 | 54 | public: 55 | Point() : x(0.f), y(0.f) {} 56 | Point(float x_, float y_) : x(x_), y(y_) {} 57 | float dot(Point p) { 58 | return this->x * p.x + this->y * p.y; 59 | } 60 | 61 | Point operator-(const Point &p) { 62 | Point np; 63 | np.x = this->x - p.x; 64 | np.y = this->y - p.y; 65 | return np; 66 | } 67 | 68 | Point operator+(const Point &p) { 69 | Point np; 70 | np.x = this->x + p.x; 71 | np.y = this->y + p.y; 72 | return np; 73 | } 74 | 75 | Point operator*(float s) { 76 | Point np; 77 | np.x = s * this->x; 78 | np.y = s * this->y; 79 | return np; 80 | } 81 | }; 82 | 83 | 84 | bool is_point_in_tri(Point p, Point p0, Point p1, Point p2); 85 | 86 | void get_point_weight(float *weight, Point p, Point p0, Point p1, Point p2); 87 | 88 | void _get_tri_normal(float *tri_normal, float *vertices, int *triangles, int ntri, bool norm_flg); 89 | 90 | void _get_ver_normal(float *ver_normal, float *tri_normal, int *triangles, int nver, int ntri); 91 | 92 | void _get_normal(float *ver_normal, float *vertices, int *triangles, int nver, int ntri); 93 | 94 | void _rasterize_triangles( 95 | float *vertices, int *triangles, float *depth_buffer, int *triangle_buffer, float *barycentric_weight, 96 | int ntri, int h, int w); 97 | 98 | void _rasterize( 99 | unsigned char *image, float *vertices, int *triangles, float *colors, 100 | float *depth_buffer, int ntri, int h, int w, int c, float alpha, bool reverse); 101 | 102 | void _render_texture_core( 103 | float *image, float *vertices, int *triangles, 104 | float *texture, float *tex_coords, int *tex_triangles, 105 | float *depth_buffer, 106 | int nver, int tex_nver, int ntri, 107 | int h, int w, int c, 108 | int tex_h, int tex_w, int tex_c, 109 | int mapping_type); 110 | 111 | void _write_obj_with_colors_texture(string filename, string mtl_name, 112 | float *vertices, int *triangles, float *colors, float *uv_coords, 113 | int nver, int ntri, int ntexver); 114 | 115 | #endif -------------------------------------------------------------------------------- /Sim3DR/lib/rasterize.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | # from libcpp.string cimport string 4 | cimport cython 5 | from libcpp cimport bool 6 | 7 | # from cpython import bool 8 | 9 | # use the Numpy-C-API from Cython 10 | np.import_array() 11 | 12 | # cdefine the signature of our c function 13 | cdef extern from "rasterize.h": 14 | void _rasterize_triangles( 15 | float*vertices, int*triangles, float*depth_buffer, int*triangle_buffer, float*barycentric_weight, 16 | int ntri, int h, int w 17 | ) 18 | 19 | void _rasterize( 20 | unsigned char*image, float*vertices, int*triangles, float*colors, float*depth_buffer, 21 | int ntri, int h, int w, int c, float alpha, bool reverse 22 | ) 23 | 24 | # void _render_texture_core( 25 | # float* image, float* vertices, int* triangles, 26 | # float* texture, float* tex_coords, int* tex_triangles, 27 | # float* depth_buffer, 28 | # int nver, int tex_nver, int ntri, 29 | # int h, int w, int c, 30 | # int tex_h, int tex_w, int tex_c, 31 | # int mapping_type) 32 | 33 | void _get_tri_normal(float *tri_normal, float *vertices, int *triangles, int nver, bool norm_flg) 34 | void _get_ver_normal(float *ver_normal, float*tri_normal, int*triangles, int nver, int ntri) 35 | void _get_normal(float *ver_normal, float *vertices, int *triangles, int nver, int ntri) 36 | 37 | 38 | # void _write_obj_with_colors_texture(string filename, string mtl_name, 39 | # float* vertices, int* triangles, float* colors, float* uv_coords, 40 | # int nver, int ntri, int ntexver) 41 | 42 | @cython.boundscheck(False) 43 | @cython.wraparound(False) 44 | def get_tri_normal(np.ndarray[float, ndim=2, mode="c"] tri_normal not None, 45 | np.ndarray[float, ndim=2, mode = "c"] vertices not None, 46 | np.ndarray[int, ndim=2, mode="c"] triangles not None, 47 | int ntri, bool norm_flg = False): 48 | _get_tri_normal( np.PyArray_DATA(tri_normal), np.PyArray_DATA(vertices), 49 | np.PyArray_DATA(triangles), ntri, norm_flg) 50 | 51 | @cython.boundscheck(False) # turn off bounds-checking for entire function 52 | @cython.wraparound(False) # turn off negative index wrapping for entire function 53 | def get_ver_normal(np.ndarray[float, ndim=2, mode = "c"] ver_normal not None, 54 | np.ndarray[float, ndim=2, mode = "c"] tri_normal not None, 55 | np.ndarray[int, ndim=2, mode="c"] triangles not None, 56 | int nver, int ntri): 57 | _get_ver_normal( 58 | np.PyArray_DATA(ver_normal), np.PyArray_DATA(tri_normal), np.PyArray_DATA(triangles), 59 | nver, ntri) 60 | 61 | @cython.boundscheck(False) # turn off bounds-checking for entire function 62 | @cython.wraparound(False) # turn off negative index wrapping for entire function 63 | def get_normal(np.ndarray[float, ndim=2, mode = "c"] ver_normal not None, 64 | np.ndarray[float, ndim=2, mode = "c"] vertices not None, 65 | np.ndarray[int, ndim=2, mode="c"] triangles not None, 66 | int nver, int ntri): 67 | _get_normal( 68 | np.PyArray_DATA(ver_normal), np.PyArray_DATA(vertices), np.PyArray_DATA(triangles), 69 | nver, ntri) 70 | 71 | 72 | @cython.boundscheck(False) # turn off bounds-checking for entire function 73 | @cython.wraparound(False) # turn off negative index wrapping for entire function 74 | def rasterize_triangles( 75 | np.ndarray[float, ndim=2, mode = "c"] vertices not None, 76 | np.ndarray[int, ndim=2, mode="c"] triangles not None, 77 | np.ndarray[float, ndim=2, mode = "c"] depth_buffer not None, 78 | np.ndarray[int, ndim=2, mode = "c"] triangle_buffer not None, 79 | np.ndarray[float, ndim=2, mode = "c"] barycentric_weight not None, 80 | int ntri, int h, int w 81 | ): 82 | _rasterize_triangles( 83 | np.PyArray_DATA(vertices), np.PyArray_DATA(triangles), 84 | np.PyArray_DATA(depth_buffer), np.PyArray_DATA(triangle_buffer), 85 | np.PyArray_DATA(barycentric_weight), 86 | ntri, h, w) 87 | 88 | @cython.boundscheck(False) # turn off bounds-checking for entire function 89 | @cython.wraparound(False) # turn off negative index wrapping for entire function 90 | def rasterize(np.ndarray[unsigned char, ndim=3, mode = "c"] image not None, 91 | np.ndarray[float, ndim=2, mode = "c"] vertices not None, 92 | np.ndarray[int, ndim=2, mode="c"] triangles not None, 93 | np.ndarray[float, ndim=2, mode = "c"] colors not None, 94 | np.ndarray[float, ndim=2, mode = "c"] depth_buffer not None, 95 | int ntri, int h, int w, int c, float alpha = 1, bool reverse = False 96 | ): 97 | _rasterize( 98 | np.PyArray_DATA(image), np.PyArray_DATA(vertices), 99 | np.PyArray_DATA(triangles), 100 | np.PyArray_DATA(colors), 101 | np.PyArray_DATA(depth_buffer), 102 | ntri, h, w, c, alpha, reverse) 103 | 104 | # def render_texture_core(np.ndarray[float, ndim=3, mode = "c"] image not None, 105 | # np.ndarray[float, ndim=2, mode = "c"] vertices not None, 106 | # np.ndarray[int, ndim=2, mode="c"] triangles not None, 107 | # np.ndarray[float, ndim=3, mode = "c"] texture not None, 108 | # np.ndarray[float, ndim=2, mode = "c"] tex_coords not None, 109 | # np.ndarray[int, ndim=2, mode="c"] tex_triangles not None, 110 | # np.ndarray[float, ndim=2, mode = "c"] depth_buffer not None, 111 | # int nver, int tex_nver, int ntri, 112 | # int h, int w, int c, 113 | # int tex_h, int tex_w, int tex_c, 114 | # int mapping_type 115 | # ): 116 | # _render_texture_core( 117 | # np.PyArray_DATA(image), np.PyArray_DATA(vertices), np.PyArray_DATA(triangles), 118 | # np.PyArray_DATA(texture), np.PyArray_DATA(tex_coords), np.PyArray_DATA(tex_triangles), 119 | # np.PyArray_DATA(depth_buffer), 120 | # nver, tex_nver, ntri, 121 | # h, w, c, 122 | # tex_h, tex_w, tex_c, 123 | # mapping_type) 124 | # 125 | # def write_obj_with_colors_texture_core(string filename, string mtl_name, 126 | # np.ndarray[float, ndim=2, mode = "c"] vertices not None, 127 | # np.ndarray[int, ndim=2, mode="c"] triangles not None, 128 | # np.ndarray[float, ndim=2, mode = "c"] colors not None, 129 | # np.ndarray[float, ndim=2, mode = "c"] uv_coords not None, 130 | # int nver, int ntri, int ntexver 131 | # ): 132 | # _write_obj_with_colors_texture(filename, mtl_name, 133 | # np.PyArray_DATA(vertices), np.PyArray_DATA(triangles), np.PyArray_DATA(colors), np.PyArray_DATA(uv_coords), 134 | # nver, ntri, ntexver) 135 | -------------------------------------------------------------------------------- /Sim3DR/lighting.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import numpy as np 4 | from .Sim3DR import get_normal, rasterize 5 | 6 | _norm = lambda arr: arr / np.sqrt(np.sum(arr ** 2, axis=1))[:, None] 7 | 8 | 9 | def norm_vertices(vertices): 10 | vertices -= vertices.min(0)[None, :] 11 | vertices /= vertices.max() 12 | vertices *= 2 13 | vertices -= vertices.max(0)[None, :] / 2 14 | return vertices 15 | 16 | 17 | def convert_type(obj): 18 | if isinstance(obj, tuple) or isinstance(obj, list): 19 | return np.array(obj, dtype=np.float32)[None, :] 20 | return obj 21 | 22 | 23 | class RenderPipeline(object): 24 | def __init__(self, **kwargs): 25 | self.intensity_ambient = convert_type(kwargs.get('intensity_ambient', 0.3)) 26 | self.intensity_directional = convert_type(kwargs.get('intensity_directional', 0.6)) 27 | self.intensity_specular = convert_type(kwargs.get('intensity_specular', 0.1)) 28 | self.specular_exp = kwargs.get('specular_exp', 5) 29 | self.color_ambient = convert_type(kwargs.get('color_ambient', (1, 1, 1))) 30 | self.color_directional = convert_type(kwargs.get('color_directional', (1, 1, 1))) 31 | self.light_pos = convert_type(kwargs.get('light_pos', (0, 0, 5))) 32 | self.view_pos = convert_type(kwargs.get('view_pos', (0, 0, 5))) 33 | 34 | def update_light_pos(self, light_pos): 35 | self.light_pos = convert_type(light_pos) 36 | 37 | def __call__(self, vertices, triangles, bg, texture=None): 38 | normal = get_normal(vertices, triangles) 39 | 40 | # 2. lighting 41 | light = np.zeros_like(vertices, dtype=np.float32) 42 | # ambient component 43 | if self.intensity_ambient > 0: 44 | light += self.intensity_ambient * self.color_ambient 45 | 46 | vertices_n = norm_vertices(vertices.copy()) 47 | if self.intensity_directional > 0: 48 | # diffuse component 49 | direction = _norm(self.light_pos - vertices_n) 50 | cos = np.sum(normal * direction, axis=1)[:, None] 51 | # cos = np.clip(cos, 0, 1) 52 | # todo: check below 53 | light += self.intensity_directional * (self.color_directional * np.clip(cos, 0, 1)) 54 | 55 | # specular component 56 | if self.intensity_specular > 0: 57 | v2v = _norm(self.view_pos - vertices_n) 58 | reflection = 2 * cos * normal - direction 59 | spe = np.sum((v2v * reflection) ** self.specular_exp, axis=1)[:, None] 60 | spe = np.where(cos != 0, np.clip(spe, 0, 1), np.zeros_like(spe)) 61 | light += self.intensity_specular * self.color_directional * np.clip(spe, 0, 1) 62 | light = np.clip(light, 0, 1) 63 | 64 | # 2. rasterization, [0, 1] 65 | if texture is None: 66 | render_img = rasterize(vertices, triangles, light, bg=bg) 67 | return render_img 68 | else: 69 | texture *= light 70 | render_img = rasterize(vertices, triangles, texture, bg=bg) 71 | return render_img 72 | 73 | 74 | def main(): 75 | pass 76 | 77 | 78 | if __name__ == '__main__': 79 | main() 80 | -------------------------------------------------------------------------------- /Sim3DR/readme.md: -------------------------------------------------------------------------------- 1 | ## Forked from https://github.com/cleardusk/3DDFA_V2/tree/master/Sim3DR 2 | 3 | ## Sim3DR 4 | This is a simple 3D render, written by c++ and cython. 5 | 6 | ### Build Sim3DR 7 | 8 | ```shell script 9 | python3 setup.py build_ext --inplace 10 | ``` 11 | 12 | -------------------------------------------------------------------------------- /Sim3DR/setup.py: -------------------------------------------------------------------------------- 1 | ''' 2 | python setup.py build_ext -i 3 | to compile 4 | ''' 5 | 6 | from distutils.core import setup, Extension 7 | from Cython.Build import cythonize 8 | from Cython.Distutils import build_ext 9 | import numpy 10 | 11 | setup( 12 | name='Sim3DR_Cython', # not the package name 13 | cmdclass={'build_ext': build_ext}, 14 | ext_modules=[Extension("Sim3DR_Cython", 15 | sources=["lib/rasterize.pyx", "lib/rasterize_kernel.cpp"], 16 | language='c++', 17 | include_dirs=[numpy.get_include()], 18 | extra_compile_args=["-std=c++11"])], 19 | ) 20 | -------------------------------------------------------------------------------- /Sim3DR/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | set(TARGET test) 4 | project(${TARGET}) 5 | 6 | #find_package( OpenCV REQUIRED ) 7 | #include_directories( ${OpenCV_INCLUDE_DIRS} ) 8 | 9 | #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -O3") 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -std=c++11") 11 | add_executable(${TARGET} test.cpp rasterize_kernel.cpp io.cpp) 12 | target_include_directories(${TARGET} PRIVATE ${PROJECT_SOURCE_DIR}) 13 | -------------------------------------------------------------------------------- /Sim3DR/tests/io.cpp: -------------------------------------------------------------------------------- 1 | #include "io.h" 2 | 3 | //void load_obj(const string obj_fp, float* vertices, float* colors, float* triangles){ 4 | // string line; 5 | // ifstream in(obj_fp); 6 | // 7 | // if(in.is_open()){ 8 | // while (getline(in, line)){ 9 | // stringstream ss(line); 10 | // 11 | // char t; // type: v, f 12 | // ss >> t; 13 | // if (t == 'v'){ 14 | // 15 | // } 16 | // } 17 | // } 18 | //} 19 | 20 | void load_obj(const char *obj_fp, float *vertices, float *colors, int *triangles, int nver, int ntri) { 21 | FILE *fp; 22 | fp = fopen(obj_fp, "r"); 23 | 24 | char t; // type: v or f 25 | if (fp != nullptr) { 26 | for (int i = 0; i < nver; ++i) { 27 | fscanf(fp, "%c", &t); 28 | for (int j = 0; j < 3; ++j) 29 | fscanf(fp, " %f", &vertices[3 * i + j]); 30 | for (int j = 0; j < 3; ++j) 31 | fscanf(fp, " %f", &colors[3 * i + j]); 32 | fscanf(fp, "\n"); 33 | } 34 | // fscanf(fp, "%c", &t); 35 | for (int i = 0; i < ntri; ++i) { 36 | fscanf(fp, "%c", &t); 37 | for (int j = 0; j < 3; ++j) { 38 | fscanf(fp, " %d", &triangles[3 * i + j]); 39 | triangles[3 * i + j] -= 1; 40 | } 41 | fscanf(fp, "\n"); 42 | } 43 | 44 | fclose(fp); 45 | } 46 | } 47 | 48 | void load_ply(const char *ply_fp, float *vertices, int *triangles, int nver, int ntri) { 49 | FILE *fp; 50 | fp = fopen(ply_fp, "r"); 51 | 52 | // char s[256]; 53 | char t; 54 | if (fp != nullptr) { 55 | // for (int i = 0; i < 9; ++i) 56 | // fscanf(fp, "%s", s); 57 | for (int i = 0; i < nver; ++i) 58 | fscanf(fp, "%f %f %f\n", &vertices[3 * i], &vertices[3 * i + 1], &vertices[3 * i + 2]); 59 | 60 | for (int i = 0; i < ntri; ++i) 61 | fscanf(fp, "%c %d %d %d\n", &t, &triangles[3 * i], &triangles[3 * i + 1], &triangles[3 * i + 2]); 62 | 63 | fclose(fp); 64 | } 65 | } 66 | 67 | void write_ppm(const char *filename, unsigned char *img, int h, int w, int c) { 68 | FILE *fp; 69 | //open file for output 70 | fp = fopen(filename, "wb"); 71 | if (!fp) { 72 | fprintf(stderr, "Unable to open file '%s'\n", filename); 73 | exit(1); 74 | } 75 | 76 | //write the header file 77 | //image format 78 | fprintf(fp, "P6\n"); 79 | 80 | //image size 81 | fprintf(fp, "%d %d\n", w, h); 82 | 83 | // rgb component depth 84 | fprintf(fp, "%d\n", MAX_PXL_VALUE); 85 | 86 | // pixel data 87 | fwrite(img, sizeof(unsigned char), size_t(h * w * c), fp); 88 | fclose(fp); 89 | } -------------------------------------------------------------------------------- /Sim3DR/tests/io.h: -------------------------------------------------------------------------------- 1 | #ifndef IO_H_ 2 | #define IO_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | #define MAX_PXL_VALUE 255 13 | 14 | void load_obj(const char* obj_fp, float* vertices, float* colors, int* triangles, int nver, int ntri); 15 | void load_ply(const char* ply_fp, float* vertices, int* triangles, int nver, int ntri); 16 | 17 | 18 | void write_ppm(const char *filename, unsigned char *img, int h, int w, int c); 19 | 20 | #endif -------------------------------------------------------------------------------- /Sim3DR/tests/test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Tesing cases 3 | */ 4 | 5 | #include 6 | #include 7 | #include "rasterize.h" 8 | #include "io.h" 9 | 10 | void test_isPointInTri() { 11 | Point p0(0, 0); 12 | Point p1(1, 0); 13 | Point p2(1, 1); 14 | 15 | Point p(0.2, 0.2); 16 | 17 | if (is_point_in_tri(p, p0, p1, p2)) 18 | std::cout << "In"; 19 | else 20 | std::cout << "Out"; 21 | std::cout << std::endl; 22 | } 23 | 24 | void test_getPointWeight() { 25 | Point p0(0, 0); 26 | Point p1(1, 0); 27 | Point p2(1, 1); 28 | 29 | Point p(0.2, 0.2); 30 | 31 | float weight[3]; 32 | get_point_weight(weight, p, p0, p1, p2); 33 | std::cout << weight[0] << " " << weight[1] << " " << weight[2] << std::endl; 34 | } 35 | 36 | void test_get_tri_normal() { 37 | float tri_normal[3]; 38 | // float vertices[9] = {1, 0, 0, 0, 0, 0, 0, 1, 0}; 39 | float vertices[9] = {1, 1.1, 0, 0, 0, 0, 0, 0.6, 0.7}; 40 | int triangles[3] = {0, 1, 2}; 41 | int ntri = 1; 42 | 43 | _get_tri_normal(tri_normal, vertices, triangles, ntri); 44 | 45 | for (int i = 0; i < 3; ++i) 46 | std::cout << tri_normal[i] << ", "; 47 | std::cout << std::endl; 48 | } 49 | 50 | void test_load_obj() { 51 | const char *fp = "../data/vd005_mesh.obj"; 52 | int nver = 35709; 53 | int ntri = 70789; 54 | 55 | auto *vertices = new float[nver]; 56 | auto *colors = new float[nver]; 57 | auto *triangles = new int[ntri]; 58 | load_obj(fp, vertices, colors, triangles, nver, ntri); 59 | 60 | delete[] vertices; 61 | delete[] colors; 62 | delete[] triangles; 63 | } 64 | 65 | void test_render() { 66 | // 1. loading obj 67 | // const char *fp = "/Users/gjz/gjzprojects/Sim3DR/data/vd005_mesh.obj"; 68 | const char *fp = "/Users/gjz/gjzprojects/Sim3DR/data/face1.obj"; 69 | int nver = 35709; //53215; //35709; 70 | int ntri = 70789; //105840;//70789; 71 | 72 | auto *vertices = new float[3 * nver]; 73 | auto *colors = new float[3 * nver]; 74 | auto *triangles = new int[3 * ntri]; 75 | load_obj(fp, vertices, colors, triangles, nver, ntri); 76 | 77 | // 2. rendering 78 | int h = 224, w = 224, c = 3; 79 | 80 | // enlarging 81 | int scale = 4; 82 | h *= scale; 83 | w *= scale; 84 | for (int i = 0; i < nver * 3; ++i) vertices[i] *= scale; 85 | 86 | auto *image = new unsigned char[h * w * c](); 87 | auto *depth_buffer = new float[h * w](); 88 | 89 | for (int i = 0; i < h * w; ++i) depth_buffer[i] = -999999; 90 | 91 | clock_t t; 92 | t = clock(); 93 | 94 | _rasterize(image, vertices, triangles, colors, depth_buffer, ntri, h, w, c, true); 95 | t = clock() - t; 96 | double time_taken = ((double) t) / CLOCKS_PER_SEC; // in seconds 97 | printf("Render took %f seconds to execute \n", time_taken); 98 | 99 | 100 | // auto *image_char = new u_char[h * w * c](); 101 | // for (int i = 0; i < h * w * c; ++i) 102 | // image_char[i] = u_char(255 * image[i]); 103 | write_ppm("res.ppm", image, h, w, c); 104 | 105 | // delete[] image_char; 106 | delete[] vertices; 107 | delete[] colors; 108 | delete[] triangles; 109 | delete[] image; 110 | delete[] depth_buffer; 111 | } 112 | 113 | void test_light() { 114 | // 1. loading obj 115 | const char *fp = "/Users/gjz/gjzprojects/Sim3DR/data/emma_input_0_noheader.ply"; 116 | int nver = 53215; //35709; 117 | int ntri = 105840; //70789; 118 | 119 | auto *vertices = new float[3 * nver]; 120 | auto *colors = new float[3 * nver]; 121 | auto *triangles = new int[3 * ntri]; 122 | load_ply(fp, vertices, triangles, nver, ntri); 123 | 124 | // 2. rendering 125 | // int h = 1901, w = 3913, c = 3; 126 | int h = 2000, w = 4000, c = 3; 127 | 128 | // enlarging 129 | // int scale = 1; 130 | // h *= scale; 131 | // w *= scale; 132 | // for (int i = 0; i < nver * 3; ++i) vertices[i] *= scale; 133 | 134 | auto *image = new unsigned char[h * w * c](); 135 | auto *depth_buffer = new float[h * w](); 136 | 137 | for (int i = 0; i < h * w; ++i) depth_buffer[i] = -999999; 138 | for (int i = 0; i < 3 * nver; ++i) colors[i] = 0.8; 139 | 140 | clock_t t; 141 | t = clock(); 142 | 143 | _rasterize(image, vertices, triangles, colors, depth_buffer, ntri, h, w, c, true); 144 | t = clock() - t; 145 | double time_taken = ((double) t) / CLOCKS_PER_SEC; // in seconds 146 | printf("Render took %f seconds to execute \n", time_taken); 147 | 148 | 149 | // auto *image_char = new u_char[h * w * c](); 150 | // for (int i = 0; i < h * w * c; ++i) 151 | // image_char[i] = u_char(255 * image[i]); 152 | write_ppm("emma.ppm", image, h, w, c); 153 | 154 | // delete[] image_char; 155 | delete[] vertices; 156 | delete[] colors; 157 | delete[] triangles; 158 | delete[] image; 159 | delete[] depth_buffer; 160 | } 161 | 162 | int main(int argc, char *argv[]) { 163 | // std::cout << "Hello CMake!" << std::endl; 164 | 165 | // test_isPointInTri(); 166 | // test_getPointWeight(); 167 | // test_get_tri_normal(); 168 | // test_load_obj(); 169 | // test_render(); 170 | test_light(); 171 | return 0; 172 | } -------------------------------------------------------------------------------- /data/300w_lp_aflw2000.yaml: -------------------------------------------------------------------------------- 1 | 2 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 3 | path: /datasdc/zhouhuayi/dataset/headpose/HeadCube3D/ 4 | labels: yolov5_labels 5 | train: yolov5_labels/img_txt/train.txt 6 | val: yolov5_labels/img_txt/validation.txt 7 | 8 | train_annotations: annotations/train_300W_LP_coco_style.json 9 | val_annotations: annotations/val_AFLW2000_coco_style.json 10 | 11 | nc: 1 # number of classes (only one class: human head) 12 | num_angles: 3 # number of Euler angles is 3 (pitch, yaw, roll) 13 | names: [ 'person' ] # class names. We still use 'person' in json file 14 | 15 | -------------------------------------------------------------------------------- /data/300w_lp_biwi.yaml: -------------------------------------------------------------------------------- 1 | 2 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 3 | path: /datasdc/zhouhuayi/dataset/headpose/HeadCube3D/ 4 | labels: yolov5_labels 5 | #train: yolov5_labels/img_txt/train.txt 6 | val: yolov5_labels/img_txt/validation.txt 7 | 8 | #train_annotations: annotations/train_300W_LP_coco_style.json 9 | val_annotations: annotations/BIWI_test_coco_style.json 10 | 11 | nc: 1 # number of classes (only one class: human head) 12 | num_angles: 3 # number of Euler angles is 3 (pitch, yaw, roll) 13 | names: [ 'person' ] # class names. We still use 'person' in json file 14 | 15 | -------------------------------------------------------------------------------- /data/agora_coco.yaml: -------------------------------------------------------------------------------- 1 | 2 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 3 | path: /datasdc/zhouhuayi/dataset/AGORA/HPE/ 4 | labels: yolov5_labels_coco 5 | train: yolov5_labels_coco/img_txt/train.txt 6 | val: yolov5_labels_coco/img_txt/validation.txt 7 | 8 | train_annotations: annotations/coco_style_train.json 9 | val_annotations: annotations/coco_style_validation.json 10 | 11 | nc: 1 # number of classes (only one class: human head) 12 | num_angles: 3 # number of Euler angles is 3 (pitch, yaw, roll) 13 | names: [ 'person' ] # class names. We still use 'person' in json file 14 | 15 | # nc: 18 # number of classes (person class + 17 keypoint classes) 16 | # num_coords: 34 # number of keypoint coordinates (x, y) 17 | 18 | # names: [ 'person', 'nose', # class names 19 | # 'left_eye', 'right_eye', 20 | # 'left_ear', 'right_ear', 21 | # 'left_shoulder', 'right_shoulder', 22 | # 'left_elbow', 'right_elbow', 23 | # 'left_wrist', 'right_wrist', 24 | # 'left_hip', 'right_hip', 25 | # 'left_knee', 'right_knee', 26 | # 'left_ankle', 'right_ankle' ] 27 | 28 | # kp_flip: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] # for left-right keypoint flipping 29 | # kp_left: [1, 3, 5, 7, 9, 11, 13, 15] # left keypoints 30 | # kp_face: [0, 1, 2, 3, 4] 31 | 32 | -------------------------------------------------------------------------------- /data/cmu_panoptic_coco.yaml: -------------------------------------------------------------------------------- 1 | 2 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 3 | path: /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/ 4 | labels: yolov5_labels_coco 5 | train: yolov5_labels_coco/img_txt/train.txt 6 | val: yolov5_labels_coco/img_txt/val.txt 7 | 8 | train_annotations: annotations/coco_style_sampled_train.json 9 | val_annotations: annotations/coco_style_sampled_val.json 10 | 11 | nc: 1 # number of classes (only one class: human head) 12 | num_angles: 3 # number of Euler angles is 3 (pitch, yaw, roll) 13 | names: [ 'person' ] # class names. We still use 'person' in json file 14 | 15 | # nc: 18 # number of classes (person class + 17 keypoint classes) 16 | # num_coords: 34 # number of keypoint coordinates (x, y) 17 | 18 | 19 | # names: [ 'person', 'nose', # class names 20 | # 'left_eye', 'right_eye', 21 | # 'left_ear', 'right_ear', 22 | # 'left_shoulder', 'right_shoulder', 23 | # 'left_elbow', 'right_elbow', 24 | # 'left_wrist', 'right_wrist', 25 | # 'left_hip', 'right_hip', 26 | # 'left_knee', 'right_knee', 27 | # 'left_ankle', 'right_ankle' ] 28 | 29 | # kp_flip: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] # for left-right keypoint flipping 30 | # kp_left: [1, 3, 5, 7, 9, 11, 13, 15] # left keypoints 31 | # kp_face: [0, 1, 2, 3, 4] 32 | -------------------------------------------------------------------------------- /data/hyp-p6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Hyperparameters for COCO training from scratch 3 | # python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300 4 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.3 # cls loss gain 15 | cls_pw: 1.0 # cls BCELoss positive_weight 16 | obj: 0.7 # obj loss gain (scale with pixels) 17 | obj_pw: 1.0 # obj BCELoss positive_weight 18 | mse: 0.1 # Euler angles mse loss gain 19 | iou_t: 0.20 # IoU training threshold 20 | anchor_t: 4.0 # anchor-multiple threshold 21 | # anchors: 3 # anchors per output layer (0 to ignore) 22 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 23 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 24 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 25 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 26 | degrees: 0.0 # image rotation (+/- deg) 27 | translate: 0.1 # image translation (+/- fraction) 28 | scale: 0.9 # image scale (+/- gain) 29 | shear: 0.0 # image shear (+/- deg) 30 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 31 | flipud: 0.0 # image flip up-down (probability) 32 | fliplr: 0.0 # image flip left-right (probability) 33 | mosaic: 1.0 # image mosaic (probability) 34 | mixup: 0.0 # image mixup (probability) 35 | copy_paste: 0.0 # segment copy-paste (probability) 36 | -------------------------------------------------------------------------------- /data/widerface_coco.yaml: -------------------------------------------------------------------------------- 1 | 2 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 3 | path: /datasdc/zhouhuayi/dataset/WiderFace/ 4 | labels: yolov5_labels_coco 5 | train: yolov5_labels_coco/img_txt/train.txt 6 | val: yolov5_labels_coco/img_txt/val.txt 7 | 8 | train_annotations: annotations/coco_style_img2pose_train.json 9 | val_annotations: annotations/coco_style_img2pose_val.json 10 | 11 | nc: 1 # number of classes (only one class: human head) 12 | num_angles: 3 # number of Euler angles is 3 (pitch, yaw, roll) 13 | names: [ 'person' ] # class names. We still use 'person' in json file 14 | 15 | 16 | -------------------------------------------------------------------------------- /demos/image.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | FILE = Path(__file__).absolute() 4 | sys.path.append(FILE.parents[1].as_posix()) 5 | 6 | import torch 7 | import argparse 8 | import yaml 9 | import cv2 10 | import math 11 | from math import cos, sin 12 | import os.path as osp 13 | import numpy as np 14 | 15 | from utils.torch_utils import select_device 16 | from utils.general import check_img_size, scale_coords, non_max_suppression 17 | from utils.datasets import LoadImages 18 | from models.experimental import attempt_load 19 | 20 | 21 | def plot_3axis_Zaxis(img, yaw, pitch, roll, tdx=None, tdy=None, size=50., limited=True, thickness=2): 22 | # Input is a cv2 image 23 | # pose_params: (pitch, yaw, roll, tdx, tdy) 24 | # Where (tdx, tdy) is the translation of the face. 25 | # For pose we have [pitch yaw roll tdx tdy tdz scale_factor] 26 | 27 | p = pitch * np.pi / 180 28 | y = -(yaw * np.pi / 180) 29 | r = roll * np.pi / 180 30 | 31 | if tdx != None and tdy != None: 32 | face_x = tdx 33 | face_y = tdy 34 | else: 35 | height, width = img.shape[:2] 36 | face_x = width / 2 37 | face_y = height / 2 38 | 39 | # X-Axis (pointing to right) drawn in red 40 | x1 = size * (cos(y) * cos(r)) + face_x 41 | y1 = size * (cos(p) * sin(r) + cos(r) * sin(p) * sin(y)) + face_y 42 | 43 | # Y-Axis (pointing to down) drawn in green 44 | x2 = size * (-cos(y) * sin(r)) + face_x 45 | y2 = size * (cos(p) * cos(r) - sin(p) * sin(y) * sin(r)) + face_y 46 | 47 | # Z-Axis (out of the screen) drawn in blue 48 | x3 = size * (sin(y)) + face_x 49 | y3 = size * (-cos(y) * sin(p)) + face_y 50 | 51 | # Plot head oritation line in black 52 | # scale_ratio = 5 53 | scale_ratio = 2 54 | base_len = math.sqrt((face_x - x3)**2 + (face_y - y3)**2) 55 | if face_x == x3: 56 | endx = tdx 57 | if face_y < y3: 58 | if limited: 59 | endy = tdy + (y3 - face_y) * scale_ratio 60 | else: 61 | endy = img.shape[0] 62 | else: 63 | if limited: 64 | endy = tdy - (face_y - y3) * scale_ratio 65 | else: 66 | endy = 0 67 | elif face_x > x3: 68 | if limited: 69 | endx = tdx - (face_x - x3) * scale_ratio 70 | endy = tdy - (face_y - y3) * scale_ratio 71 | else: 72 | endx = 0 73 | endy = tdy - (face_y - y3) / (face_x - x3) * tdx 74 | else: 75 | if limited: 76 | endx = tdx + (x3 - face_x) * scale_ratio 77 | endy = tdy + (y3 - face_y) * scale_ratio 78 | else: 79 | endx = img.shape[1] 80 | endy = tdy - (face_y - y3) / (face_x - x3) * (tdx - endx) 81 | # cv2.line(img, (int(tdx), int(tdy)), (int(endx), int(endy)), (0,0,0), 2) 82 | # cv2.line(img, (int(tdx), int(tdy)), (int(endx), int(endy)), (255,255,0), 2) 83 | cv2.line(img, (int(tdx), int(tdy)), (int(endx), int(endy)), (0,255,255), thickness) 84 | 85 | # X-Axis pointing to right. drawn in red 86 | cv2.line(img, (int(face_x), int(face_y)), (int(x1),int(y1)),(0,0,255),thickness) 87 | # Y-Axis pointing to down. drawn in green 88 | cv2.line(img, (int(face_x), int(face_y)), (int(x2),int(y2)),(0,255,0),thickness) 89 | # Z-Axis (out of the screen) drawn in blue 90 | cv2.line(img, (int(face_x), int(face_y)), (int(x3),int(y3)),(255,0,0),thickness) 91 | 92 | return img 93 | 94 | if __name__ == '__main__': 95 | parser = argparse.ArgumentParser() 96 | parser.add_argument('-p', '--img-path', default='test_imgs/100024.jpg', help='path to image or dir') 97 | parser.add_argument('--data', type=str, default='data/agora_coco.yaml') 98 | parser.add_argument('--imgsz', type=int, default=1280) 99 | parser.add_argument('--weights', default='yolov5m6.pt') 100 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or cpu') 101 | parser.add_argument('--conf-thres', type=float, default=0.7, help='confidence threshold') 102 | parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') 103 | parser.add_argument('--scales', type=float, nargs='+', default=[1]) 104 | parser.add_argument('--thickness', type=int, default=2, help='thickness of Euler angle lines') 105 | 106 | args = parser.parse_args() 107 | 108 | with open(args.data) as f: 109 | data = yaml.safe_load(f) # load data dict 110 | 111 | device = select_device(args.device, batch_size=1) 112 | print('Using device: {}'.format(device)) 113 | 114 | model = attempt_load(args.weights, map_location=device) 115 | stride = int(model.stride.max()) # model stride 116 | imgsz = check_img_size(args.imgsz, s=stride) # check image size 117 | dataset = LoadImages(args.img_path, img_size=imgsz, stride=stride, auto=True) 118 | dataset_iter = iter(dataset) 119 | 120 | for index in range(len(dataset)): 121 | 122 | (single_path, img, im0, _) = next(dataset_iter) 123 | 124 | if '_res' in single_path: continue 125 | 126 | print(index, single_path, "\n") 127 | 128 | img = torch.from_numpy(img).to(device) 129 | img = img / 255.0 # 0 - 255 to 0.0 - 1.0 130 | if len(img.shape) == 3: 131 | img = img[None] # expand for batch dim 132 | 133 | out_ori = model(img, augment=True, scales=args.scales)[0] 134 | out = non_max_suppression(out_ori, args.conf_thres, args.iou_thres, num_angles=data['num_angles']) 135 | 136 | # predictions (Array[N, 9]), x1, y1, x2, y2, conf, class, pitch, yaw, roll 137 | bboxes = scale_coords(img.shape[2:], out[0][:, :4], im0.shape[:2]).cpu().numpy() # native-space pred 138 | scores = out[0][:, 4].cpu().numpy() 139 | pitchs_yaws_rolls = out[0][:, 6:].cpu().numpy() # N*3 140 | for i, [x1, y1, x2, y2] in enumerate(bboxes): 141 | im0 = cv2.rectangle(im0, (int(x1), int(y1)), (int(x2), int(y2)), 142 | [255,255,255], thickness=args.thickness) 143 | # im0 = cv2.putText(im0, str(round(scores[i], 3)), (int(x1), int(y1)), 144 | # cv2.FONT_HERSHEY_PLAIN, 0.7, (255,255,255), thickness=2) 145 | pitch = (pitchs_yaws_rolls[i][0] - 0.5) * 180 146 | yaw = (pitchs_yaws_rolls[i][1] - 0.5) * 360 147 | roll = (pitchs_yaws_rolls[i][2] - 0.5) * 180 148 | im0 = plot_3axis_Zaxis(im0, yaw, pitch, roll, tdx=(x1+x2)/2, tdy=(y1+y2)/2, 149 | size=max(y2-y1, x2-x1)*0.8, thickness=args.thickness) 150 | 151 | cv2.imwrite(single_path[:-4]+"_res.jpg", im0) 152 | 153 | -------------------------------------------------------------------------------- /demos/video.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | FILE = Path(__file__).absolute() 4 | sys.path.append(FILE.parents[1].as_posix()) 5 | 6 | import argparse 7 | import torch 8 | import cv2 9 | import yaml 10 | import imageio 11 | from tqdm import tqdm 12 | import os.path as osp 13 | import numpy as np 14 | 15 | from utils.torch_utils import select_device, time_sync 16 | from utils.general import check_img_size, scale_coords, non_max_suppression 17 | from utils.datasets import LoadImages 18 | from utils.plots import plot_3axis_Zaxis 19 | from models.experimental import attempt_load 20 | 21 | 22 | if __name__ == '__main__': 23 | parser = argparse.ArgumentParser() 24 | 25 | # video options 26 | parser.add_argument('-p', '--video-path', default='', help='path to video file') 27 | 28 | parser.add_argument('--data', type=str, default='data/agora_coco.yaml') 29 | parser.add_argument('--imgsz', type=int, default=1280) 30 | parser.add_argument('--save-size', type=int, default=1080) 31 | parser.add_argument('--weights', default='yolov5m6.pt') 32 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or cpu') 33 | parser.add_argument('--conf-thres', type=float, default=0.7, help='confidence threshold') 34 | parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') 35 | parser.add_argument('--scales', type=float, nargs='+', default=[1]) 36 | 37 | parser.add_argument('--start', type=int, default=0, help='start time (s)') 38 | parser.add_argument('--end', type=int, default=-1, help='end time (s), -1 for remainder of video') 39 | parser.add_argument('--color', type=int, nargs='+', default=[255, 255, 255], help='head bbox color') 40 | parser.add_argument('--thickness', type=int, default=2, help='thickness of Euler angle lines') 41 | parser.add_argument('--alpha', type=float, default=0.4, help='head bbox and head pose alpha') 42 | 43 | parser.add_argument('--display', action='store_true', help='display inference results') 44 | parser.add_argument('--fps-size', type=int, default=1) 45 | parser.add_argument('--gif', action='store_true', help='create gif') 46 | parser.add_argument('--gif-size', type=int, nargs='+', default=[480, 270]) 47 | 48 | args = parser.parse_args() 49 | 50 | with open(args.data) as f: 51 | data = yaml.safe_load(f) # load data dict 52 | 53 | device = select_device(args.device, batch_size=1) 54 | print('Using device: {}'.format(device)) 55 | 56 | model = attempt_load(args.weights, map_location=device) # load FP32 model 57 | stride = int(model.stride.max()) # model stride 58 | imgsz = check_img_size(args.imgsz, s=stride) # check image size 59 | dataset = LoadImages(args.video_path, img_size=imgsz, stride=stride, auto=True) 60 | 61 | if device.type != 'cpu': 62 | model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once 63 | 64 | 65 | cap = dataset.cap 66 | cap.set(cv2.CAP_PROP_POS_MSEC, args.start * 1000) 67 | fps = cap.get(cv2.CAP_PROP_FPS) 68 | if args.end == -1: 69 | n = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) - fps * args.start) 70 | else: 71 | n = int(fps * (args.end - args.start)) 72 | h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 73 | w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 74 | gif_frames = [] 75 | out_path = '{}_{}'.format(osp.splitext(args.video_path)[0], "DirectMHP") 76 | print("fps:", fps, "\t total frames:", n, "\t out_path:", out_path) 77 | 78 | write_video = not args.display and not args.gif 79 | if write_video: 80 | # writer = cv2.VideoWriter(out_path + '.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) 81 | writer = cv2.VideoWriter(out_path + '.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, 82 | (int(args.save_size*w/h), args.save_size)) 83 | 84 | dataset = tqdm(dataset, desc='Running inference', total=n) 85 | t0 = time_sync() 86 | for i, (path, img, im0, _) in enumerate(dataset): 87 | img = torch.from_numpy(img).to(device) 88 | img = img / 255.0 # 0 - 255 to 0.0 - 1.0 89 | if len(img.shape) == 3: 90 | img = img[None] # expand for batch dim 91 | 92 | out_ori = model(img, augment=True, scales=args.scales)[0] 93 | out = non_max_suppression(out_ori, args.conf_thres, args.iou_thres, num_angles=data['num_angles']) 94 | # predictions (Array[N, 9]), x1, y1, x2, y2, conf, class, pitch, yaw, roll 95 | bboxes = scale_coords(img.shape[2:], out[0][:, :4], im0.shape[:2]).cpu().numpy() # native-space pred 96 | scores = out[0][:, 4].cpu().numpy() 97 | pitchs_yaws_rolls = out[0][:, 6:].cpu().numpy() # N*3 98 | 99 | im0_copy = im0.copy() 100 | 101 | # draw head bboxes and pose 102 | for j, [x1, y1, x2, y2] in enumerate(bboxes): 103 | im0_copy = cv2.rectangle(im0_copy, (int(x1), int(y1)), (int(x2), int(y2)), 104 | args.color, thickness=args.thickness) 105 | # im0_copy = cv2.putText(im0_copy, str(round(scores[j], 3)), (int(x1), int(y1)), 106 | # cv2.FONT_HERSHEY_PLAIN, 0.7, (255,255,255), thickness=2) 107 | pitch = (pitchs_yaws_rolls[j][0] - 0.5) * 180 108 | yaw = (pitchs_yaws_rolls[j][1] - 0.5) * 360 109 | roll = (pitchs_yaws_rolls[j][2] - 0.5) * 180 110 | im0_copy = plot_3axis_Zaxis(im0_copy, yaw, pitch, roll, tdx=(x1+x2)/2, tdy=(y1+y2)/2, 111 | size=max(y2-y1, x2-x1)*0.8, thickness=args.thickness) 112 | 113 | im0 = cv2.addWeighted(im0, args.alpha, im0_copy, 1 - args.alpha, gamma=0) 114 | 115 | if i == 0: 116 | t = time_sync() - t0 117 | else: 118 | t = time_sync() - t1 119 | 120 | if not args.gif and args.fps_size: 121 | cv2.putText(im0, '{:.1f} FPS'.format(1 / t), (5 * args.fps_size, 25 * args.fps_size), 122 | cv2.FONT_HERSHEY_SIMPLEX, args.fps_size, (255, 255, 255), thickness=2 * args.fps_size) 123 | 124 | if args.gif: 125 | gif_img = cv2.cvtColor(cv2.resize(im0, dsize=tuple(args.gif_size)), cv2.COLOR_RGB2BGR) 126 | if args.fps_size: 127 | cv2.putText(gif_img, '{:.1f} FPS'.format(1 / t), (5 * args.fps_size, 25 * args.fps_size), 128 | cv2.FONT_HERSHEY_SIMPLEX, args.fps_size, (255, 255, 255), thickness=2 * args.fps_size) 129 | gif_frames.append(gif_img) 130 | elif write_video: 131 | im0 = cv2.resize(im0, dsize=(int(args.save_size*w/h), args.save_size)) 132 | writer.write(im0) 133 | else: 134 | cv2.imshow('', im0) 135 | cv2.waitKey(1) 136 | 137 | t1 = time_sync() 138 | if i == n - 1: 139 | break 140 | 141 | cv2.destroyAllWindows() 142 | cap.release() 143 | if write_video: 144 | writer.release() 145 | 146 | if args.gif: 147 | print('Saving GIF...') 148 | with imageio.get_writer(out_path + '.gif', mode="I", fps=fps) as writer: 149 | for idx, frame in tqdm(enumerate(gif_frames)): 150 | writer.append_data(frame) 151 | 152 | -------------------------------------------------------------------------------- /exps/AGORA/agora_evaluation/projection.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # -*- coding: utf-8 -*- 3 | 4 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 5 | # holder of all proprietary rights on this computer program. 6 | # You can only use this computer program if you have closed 7 | # a license agreement with MPG or you get the right to use the computer 8 | # program from someone who is authorized to grant you that right. 9 | # Any use of the computer program without a valid license is prohibited and 10 | # liable to prosecution. 11 | # 12 | # Copyright©2021 Max-Planck-Gesellschaft zur Förderung 13 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 14 | # for Intelligent Systems. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | #------------------------------------------------------------------------------ 18 | import logging 19 | import math 20 | import os 21 | 22 | import cv2 23 | import numpy as np 24 | import matplotlib.pyplot as plt 25 | 26 | 27 | logging.basicConfig(level=logging.DEBUG) 28 | 29 | 30 | def focalLength_mm2px(focalLength, dslr_sens, focalPoint): 31 | focal_pixel = (focalLength / dslr_sens) * focalPoint * 2 32 | return focal_pixel 33 | 34 | 35 | def toCamCoords(j3d, camPosWorld): 36 | # transform gt to camera coordinate frame 37 | j3d = j3d - camPosWorld 38 | return j3d 39 | 40 | 41 | def unreal2cv2(points): 42 | # x --> y, y --> z, z --> x 43 | points = np.roll(points, 2, 1) 44 | # change direction of y 45 | points = points * np.array([1, -1, 1]) 46 | return points 47 | 48 | 49 | def smpl2opencv(j3d): 50 | # change sign of axis 1 and axis 2 51 | j3d = j3d * np.array([1, -1, -1]) 52 | return j3d 53 | 54 | 55 | def project_point(joint, RT, KKK): 56 | 57 | P = np.dot(KKK, RT) 58 | joints_2d = np.dot(P, joint) 59 | joints_2d = joints_2d[0:2] / joints_2d[2] 60 | 61 | return joints_2d 62 | 63 | 64 | def project_2d( 65 | args, 66 | df, 67 | i, 68 | pNum, 69 | joints3d, 70 | meanPose=False): 71 | 72 | dslr_sens_width = 36 73 | dslr_sens_height = 20.25 74 | imgWidth = args.imgWidth 75 | imgHeight = args.imgHeight 76 | debug_path = args.debug_path 77 | imgBase = args.imgFolder 78 | imgName = df.iloc[i]['imgPath'] 79 | if imgWidth == 1280 and '_1280x720.png' not in imgName: 80 | #If 1280x720 images are used then image name needs to be updated 81 | imgName = imgName.replace('.png','_1280x720.png') 82 | df.iloc[i]['imgPath']=imgName 83 | 84 | imgPath = os.path.join(imgBase, df.iloc[i]['imgPath']) 85 | if 'hdri' in imgPath: 86 | ground_plane = [0, 0, 0] 87 | scene3d = False 88 | focalLength = 50 89 | camPosWorld = [0, 0, 170] 90 | camYaw = 0 91 | camPitch = 0 92 | 93 | elif 'cam00' in imgPath: 94 | ground_plane = [0, 0, 0] 95 | scene3d = True 96 | focalLength = 18 97 | camPosWorld = [400, -275, 265] 98 | camYaw = 135 99 | camPitch = 30 100 | elif 'cam01' in imgPath: 101 | ground_plane = [0, 0, 0] 102 | scene3d = True 103 | focalLength = 18 104 | camPosWorld = [400, 225, 265] 105 | camYaw = -135 106 | camPitch = 30 107 | elif 'cam02' in imgPath: 108 | ground_plane = [0, 0, 0] 109 | scene3d = True 110 | focalLength = 18 111 | camPosWorld = [-490, 170, 265] 112 | camYaw = -45 113 | camPitch = 30 114 | elif 'cam03' in imgPath: 115 | ground_plane = [0, 0, 0] 116 | scene3d = True 117 | focalLength = 18 118 | camPosWorld = [-490, -275, 265] 119 | camYaw = 45 120 | camPitch = 30 121 | elif 'ag2' in imgPath: 122 | ground_plane = [0, 0, 0] 123 | scene3d = False 124 | focalLength = 28 125 | camPosWorld = [0, 0, 170] 126 | camYaw = 0 127 | camPitch = 15 128 | else: 129 | ground_plane = [0, -1.7, 0] 130 | scene3d = True 131 | focalLength = 28 132 | camPosWorld = [ 133 | df.iloc[i]['camX'], 134 | df.iloc[i]['camY'], 135 | df.iloc[i]['camZ']] 136 | camYaw = df.iloc[i]['camYaw'] 137 | camPitch = 0 138 | 139 | if meanPose: 140 | yawSMPL = 0 141 | trans3d = [0, 0, 0] 142 | else: 143 | yawSMPL = df.iloc[i]['Yaw'][pNum] 144 | trans3d = [df.iloc[i]['X'][pNum], 145 | df.iloc[i]['Y'][pNum], 146 | df.iloc[i]['Z'][pNum]] 147 | 148 | # gt2d, gt3d_camCoord = project2d(joints3d, focalLength=focalLength, scene3d=scene3d, 149 | gt2d, gt3d_camCoord, cam_j3d, camR, camT, camK = project2d(joints3d, focalLength=focalLength, scene3d=scene3d, 150 | trans3d=trans3d, 151 | dslr_sens_width=dslr_sens_width, 152 | dslr_sens_height=dslr_sens_height, 153 | camPosWorld=camPosWorld, 154 | cy=imgHeight / 2, 155 | cx=imgWidth / 2, 156 | imgPath=imgPath, 157 | yawSMPL=yawSMPL, 158 | ground_plane=ground_plane, 159 | debug_path=debug_path, 160 | debug=args.debug, 161 | ind=i, 162 | pNum=pNum, 163 | meanPose=meanPose, camPitch=camPitch, camYaw=camYaw) 164 | # return gt2d, gt3d_camCoord 165 | return gt2d, gt3d_camCoord, cam_j3d, camR, camT, camK 166 | 167 | 168 | def project2d( 169 | j3d, 170 | focalLength, 171 | scene3d, 172 | trans3d, 173 | dslr_sens_width, 174 | dslr_sens_height, 175 | camPosWorld, 176 | cy, 177 | cx, 178 | imgPath, 179 | yawSMPL, 180 | ground_plane, 181 | debug_path, 182 | debug=False, 183 | ind=-1, 184 | pNum=-1, 185 | meanPose=False, 186 | camPitch=0, 187 | camYaw=0): 188 | 189 | focalLength_x = focalLength_mm2px(focalLength, dslr_sens_width, cx) 190 | focalLength_y = focalLength_mm2px(focalLength, dslr_sens_height, cy) 191 | 192 | camMat = np.array([[focalLength_x, 0, cx], 193 | [0, focalLength_y, cy], 194 | [0, 0, 1]]) 195 | 196 | # camPosWorld and trans3d are in cm. Transform to meter 197 | trans3d = np.array(trans3d) / 100 198 | trans3d = unreal2cv2(np.reshape(trans3d, (1, 3))) 199 | camPosWorld = np.array(camPosWorld) / 100 200 | if scene3d: 201 | camPosWorld = unreal2cv2( 202 | np.reshape( 203 | camPosWorld, (1, 3))) + np.array(ground_plane) 204 | else: 205 | camPosWorld = unreal2cv2(np.reshape(camPosWorld, (1, 3))) 206 | 207 | # get points in camera coordinate system 208 | j3d = smpl2opencv(j3d) 209 | 210 | ''' newly added for Euler angles Calculation ''' 211 | cam_j3d = j3d.copy() 212 | 213 | # scans have a 90deg rotation, but for mean pose from vposer there is no 214 | # such rotation 215 | if meanPose: 216 | rotMat, _ = cv2.Rodrigues( 217 | np.array([[0, (yawSMPL) / 180 * np.pi, 0]], dtype=float)) 218 | else: 219 | rotMat, _ = cv2.Rodrigues( 220 | np.array([[0, ((yawSMPL - 90) / 180) * np.pi, 0]], dtype=float)) 221 | 222 | j3d = np.matmul(rotMat, j3d.T).T 223 | j3d = j3d + trans3d 224 | 225 | camera_rotationMatrix, _ = cv2.Rodrigues( 226 | np.array([0, ((-camYaw) / 180) * np.pi, 0]).reshape(3, 1)) 227 | camera_rotationMatrix2, _ = cv2.Rodrigues( 228 | np.array([camPitch / 180 * np.pi, 0, 0]).reshape(3, 1)) 229 | 230 | j3d_new = np.matmul(camera_rotationMatrix, j3d.T - camPosWorld.T).T 231 | j3d_new = np.matmul(camera_rotationMatrix2, j3d_new.T).T 232 | 233 | RT = np.concatenate((np.diag([1., 1., 1.]), np.zeros((3, 1))), axis=1) 234 | j2d = np.zeros((j3d_new.shape[0], 2)) 235 | for i in range(j3d_new.shape[0]): 236 | j2d[i, :] = project_point(np.concatenate( 237 | [j3d_new[i, :], np.array([1])]), RT, camMat) 238 | 239 | 240 | ''' newly added for Euler angles Calculation ''' 241 | camR = np.matmul(camera_rotationMatrix2, np.matmul(camera_rotationMatrix, rotMat)) 242 | camT = np.dot(np.matmul(camera_rotationMatrix2, camera_rotationMatrix), trans3d.T - camPosWorld.T) 243 | camK = camMat 244 | 245 | 246 | if debug: 247 | import matplotlib.cm as cm 248 | if not os.path.exists(debug_path): 249 | os.makedirs(debug_path) 250 | 251 | if len(j2d) < 200: # No rendering for verts 252 | if not (imgPath is None): 253 | img = cv2.imread(imgPath) 254 | img = img[:, :, ::-1] 255 | colors = cm.tab20c(np.linspace(0, 1, 25)) 256 | fig = plt.figure(dpi=300) 257 | ax = fig.add_subplot(111) 258 | if not (imgPath is None): 259 | ax.imshow(img) 260 | for i in range(22): 261 | ax.scatter(j2d[i, 0], j2d[i, 1], c=colors[i], s=0.1) 262 | #ax.scatter(j2d[i,0], j2d[i,1], c=np.array([1,0,0]), s=0.1) 263 | # ax.text(j2d[i,0], j2d[i,1], str(i)) 264 | # plt.show() 265 | 266 | if not (imgPath is None): 267 | savename = imgPath.split('/')[-1] 268 | savename = savename.replace('.pkl', '.jpg') 269 | plt.savefig( 270 | os.path.join( 271 | debug_path, 272 | 'image' + 273 | str(pNum) + 274 | savename)) 275 | plt.close('all') 276 | 277 | # return j2d, j3d_new 278 | 279 | ''' newly added for Euler angles Calculation ''' 280 | return j2d, j3d_new, cam_j3d, camR, camT, camK 281 | 282 | -------------------------------------------------------------------------------- /exps/CMU/data_split_hpe.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import json 4 | import copy 5 | import numpy as np 6 | from tqdm import tqdm 7 | 8 | import shutil 9 | import matplotlib.pyplot as plt 10 | 11 | ############################################################################################ 12 | 13 | # Face keypoint orders follow Openpose keypoint output 14 | # https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/output.md 15 | # Face outline points (0-16) are unstable 16 | face_edges = np.array([ 17 | # [0,1],[1,2],[2,3],[3,4],[4,5],[5,6],[6,7],[7,8],[8,9],[9,10],[11,12],[12,13],[14,15],[15,16], #outline (ignored) 18 | [17,18],[18,19],[19,20],[20,21], #right eyebrow 19 | [22,23],[23,24],[24,25],[25,26], #left eyebrow 20 | [27,28],[28,29],[29,30], #nose upper part 21 | [31,32],[32,33],[33,34],[34,35], #nose lower part 22 | [36,37],[37,38],[38,39],[39,40],[40,41],[41,36], #right eye 23 | [42,43],[43,44],[44,45],[45,46],[46,47],[47,42], #left eye 24 | [48,49],[49,50],[50,51],[51,52],[52,53],[53,54],[54,55],[55,56],[56,57],[57,58],[58,59],[59,48], #Lip outline 25 | [60,61],[61,62],[62,63],[63,64],[64,65],[65,66],[66,67],[67,60] #Lip inner line 26 | ]) 27 | 28 | coco_dict_template = { 29 | 'info': { 30 | 'description': 'Face landmarks and Euler angles of CMU Panoptic Studio Dataset', 31 | 'url': 'http://domedb.perception.cs.cmu.edu/', 32 | 'version': '1.0', 33 | 'year': 2022, 34 | 'contributor': 'Huayi Zhou', 35 | 'date_created': '2022/02/17', 36 | }, 37 | 'licences': [{ 38 | 'url': 'http://creativecommons.org/licenses/by-nc/2.0', 39 | 'name': 'Attribution-NonCommercial License' 40 | }], 41 | 'images': [], 42 | 'annotations': [], 43 | 'categories': [{ 44 | 'supercategory': 'person', 45 | 'id': 1, 46 | 'name': 'person', 47 | 'face_edges': face_edges.tolist() 48 | }] 49 | } 50 | 51 | ############################################################################################ 52 | 53 | def sort_labels_by_image_id(labels_list): 54 | images_labels_dict = {} 55 | for i, labels_dict in enumerate(labels_list): 56 | image_id = str(labels_dict['image_id']) 57 | if 'head_bbox' in labels_dict: 58 | labels_dict['bbox'] = labels_dict['head_bbox'] # please use the default 'bbox' as key in cocoapi 59 | del labels_dict['head_bbox'] 60 | if 'area' not in labels_dict: # generate standard COCO style json file 61 | labels_dict['segmentation'] = [] # This script is not for segmentation 62 | labels_dict['area'] = round(labels_dict['bbox'][-1] * labels_dict['bbox'][-2], 4) 63 | if image_id in images_labels_dict.keys(): 64 | images_labels_dict[image_id].append(labels_dict) 65 | else: 66 | images_labels_dict[image_id] = [labels_dict] 67 | return images_labels_dict 68 | 69 | 70 | if __name__ == "__main__": 71 | 72 | sampled_anno_path = "./HPE/annotations/coco_style_sample.json" 73 | sampled_train_path = "./HPE/annotations/coco_style_sampled_train.json" 74 | sampled_val_path = "./HPE/annotations/coco_style_sampled_val.json" 75 | 76 | image_root_path = "./HPE/images_sampled" 77 | 78 | image_dst_path = "./HPE/images" 79 | if os.path.exists(image_dst_path): 80 | shutil.rmtree(image_dst_path) 81 | os.mkdir(image_dst_path) 82 | os.mkdir(os.path.join(image_dst_path, "train")) 83 | os.mkdir(os.path.join(image_dst_path, "val")) 84 | 85 | 86 | '''[start] do not change''' 87 | seq_names = ["171204_pose3", "171026_pose3", "170221_haggling_b3", "170221_haggling_m3", "170224_haggling_a3", "170228_haggling_b1", "170404_haggling_a1", "170407_haggling_a2", "170407_haggling_b2", "171026_cello3", "161029_piano4", "160422_ultimatum1", "160224_haggling1", "170307_dance5", "160906_ian1", "170915_office1", "160906_pizza1"] # 17 names 88 | 89 | seq_names_train = ["171204_pose3", "161029_piano4", "160422_ultimatum1", "170307_dance5", "160906_pizza1", "170221_haggling_b3", "170224_haggling_a3", "170404_haggling_a1", "170407_haggling_b2"] # 9 names, person: 1+1+7+1+5+3+3+3+3 90 | seq_names_val = ["171026_pose3", "171026_cello3", "160224_haggling1", "160906_ian1", "170915_office1", "170221_haggling_m3", "170228_haggling_b1", "170407_haggling_a2"] # 8 names, person: 1+1+3+2+1+3+3+3 91 | 92 | 93 | train_seq_num_list, val_seq_num_list = [], [] 94 | for seq_num, seq_name in enumerate(seq_names): 95 | if seq_name in seq_names_train: train_seq_num_list.append(seq_num) 96 | if seq_name in seq_names_val: val_seq_num_list.append(seq_num) 97 | 98 | with open(sampled_anno_path, "r") as json_file: 99 | annos_dict = json.load(json_file) 100 | images_list = annos_dict['images'] 101 | labels_list = annos_dict['annotations'] 102 | images_labels_dict = sort_labels_by_image_id(labels_list) 103 | 104 | coco_dict_train = copy.deepcopy(coco_dict_template) 105 | coco_dict_val = copy.deepcopy(coco_dict_template) 106 | 107 | person_instances_stat = {} 108 | euler_angles_stat = [[],[],[]] # pitch, yaw, roll 109 | 110 | for image_dict in tqdm(images_list): 111 | image_id = image_dict['id'] 112 | seq_num = (image_id - 10000000000) // 100000000 - 1 113 | if seq_num in train_seq_num_list: target_type = "train" 114 | if seq_num in val_seq_num_list: target_type = "val" 115 | 116 | labels_list = images_labels_dict[str(image_id)] 117 | anno_nums = len(labels_list) 118 | 119 | image_dict['seq'] = seq_names[seq_num] 120 | 121 | src_image_path = os.path.join(image_root_path, image_dict['file_name']) 122 | dst_image_path = os.path.join(image_dst_path, target_type, image_dict['file_name']) 123 | if os.path.exists(src_image_path): 124 | shutil.move(src_image_path, dst_image_path) 125 | 126 | if target_type == "train": 127 | coco_dict_train['images'].append(image_dict) 128 | coco_dict_train['annotations'] += labels_list 129 | if str(anno_nums) not in person_instances_stat: 130 | person_instances_stat[str(anno_nums)] = [1,0] # [1, 0] for [train, val] 131 | else: 132 | person_instances_stat[str(anno_nums)][0] += 1 133 | if target_type == "val": 134 | coco_dict_val['images'].append(image_dict) 135 | coco_dict_val['annotations'] += labels_list 136 | if str(anno_nums) not in person_instances_stat: 137 | person_instances_stat[str(anno_nums)] = [0,1] # [0, 1] for [train, val] 138 | else: 139 | person_instances_stat[str(anno_nums)][1] += 1 140 | 141 | for labels in labels_list: 142 | [pitch, yaw, roll] = labels['euler_angles'] 143 | euler_angles_stat[0].append(pitch) 144 | euler_angles_stat[1].append(yaw) 145 | euler_angles_stat[2].append(roll) 146 | 147 | '''[end] do not change''' 148 | 149 | print("\nperson_instances_stat:", person_instances_stat) 150 | image_cnt, person_cnt = [0,0], [0,0] 151 | for key, value in person_instances_stat.items(): 152 | image_cnt[0], image_cnt[1] = image_cnt[0] + value[0], image_cnt[1] + value[1] 153 | person_cnt[0], person_cnt[1] = person_cnt[0] + int(key)*value[0], person_cnt[1] + int(key)*value[1] 154 | print("Images number containing [%s] persons: %d, \ttrain/val = %d/%d"%(key, sum(value), value[0], value[1])) 155 | print("Perosn instances per image: %.4f, \ttrain/val = %.4f/%.4f"%( 156 | sum(person_cnt)/sum(image_cnt), person_cnt[0]/image_cnt[0], person_cnt[1]/image_cnt[1])) 157 | 158 | print("\ntrain: images --> %d, head instances --> %d"%(len(coco_dict_train['images']), len(coco_dict_train['annotations']))) 159 | with open(sampled_train_path, "w") as json_file: 160 | json.dump(coco_dict_train, json_file) 161 | print("val: images --> %d, head instances --> %d"%(len(coco_dict_val['images']), len(coco_dict_val['annotations']))) 162 | with open(sampled_val_path, "w") as json_file: 163 | json.dump(coco_dict_val, json_file) 164 | 165 | '''CMUPanoptic Euler Angels Stat''' 166 | interval = 10 # 10 or 15 is better 167 | bins = 360 // interval 168 | density = False # True or False, density=False would make counts 169 | colors = ['r', 'g', 'b'] 170 | labels = ["Pitch", "Yaw", "Roll"] 171 | plt.hist(euler_angles_stat, bins=bins, alpha=0.7, density=density, histtype='bar', label=labels, color=colors) 172 | plt.legend(prop ={'size': 10}) 173 | # plt.xlim(-180, 180) 174 | plt.xticks(range(-180,181,interval)) 175 | if density: plt.ylabel('Percentage') 176 | else: plt.ylabel('Counts') 177 | plt.xlabel('Degree') 178 | plt.show() 179 | 180 | 181 | '''final results 182 | 100%|███████████████████████████████████████████████████████████████████████████████████████| 31934/31934 [00:40<00:00, 794.51it/s] 183 | 184 | person_instances_stat: {'1': [7416, 7291], '2': [1313, 1328], '3': [4937, 7597], '4': [479, 0], '5': [567, 0], '7': [85, 0], '6': [921, 0]} 185 | Images number containing [1] persons: 14707, train/val = 7416/7291 186 | Images number containing [2] persons: 2641, train/val = 1313/1328 187 | Images number containing [3] persons: 12534, train/val = 4937/7597 188 | Images number containing [4] persons: 479, train/val = 479/0 189 | Images number containing [5] persons: 567, train/val = 567/0 190 | Images number containing [7] persons: 85, train/val = 85/0 191 | Images number containing [6] persons: 921, train/val = 921/0 192 | Perosn instances per image: 2.1439, train/val = 2.2729/2.0189 193 | 194 | train: images --> 15718, head instances --> 35725 195 | val: images --> 16216, head instances --> 32738 196 | ''' -------------------------------------------------------------------------------- /exps/CMU/panoptic-toolbox/getDB_panoptic_ver1_2_hdVideo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #This list is named as "Panoptic Studio DB Ver 1.2" 4 | 5 | curPath=$(dirname "$0") 6 | hdVideoNum=31 7 | 8 | #Range of motion sequences 9 | $curPath/getData_hdVideo.sh 171204_pose3 $hdVideoNum 10 | $curPath/getData_hdVideo.sh 171026_pose3 $hdVideoNum 11 | 12 | #Download All Haggling Sequences without downloading videos 13 | $curPath/getData_hdVideo.sh 170221_haggling_m3 $hdVideoNum 14 | $curPath/getData_hdVideo.sh 170404_haggling_a1 $hdVideoNum 15 | $curPath/getData_hdVideo.sh 170407_haggling_b2 $hdVideoNum 16 | 17 | #Musical Instruments 18 | $curPath/getData_hdVideo.sh 171026_cello3 $hdVideoNum 19 | $curPath/getData_hdVideo.sh 161029_piano4 $hdVideoNum 20 | 21 | #SocialGame sequences 22 | $curPath/getData_hdVideo.sh 160422_ultimatum1 $hdVideoNum 23 | $curPath/getData_hdVideo.sh 160224_haggling1 $hdVideoNum 24 | 25 | #Dance sequences 26 | $curPath/getData_hdVideo.sh 170307_dance5 $hdVideoNum 27 | 28 | #Toddler sequences 29 | $curPath/getData_hdVideo.sh 160906_ian1 $hdVideoNum 30 | 31 | #Others sequences 32 | $curPath/getData_hdVideo.sh 170915_office1 $hdVideoNum 33 | $curPath/getData_hdVideo.sh 160906_pizza1 $hdVideoNum 34 | 35 | 36 | #*** 4 other more names list *** 37 | #Social Games (Haggling) 38 | $curPath/getData_hdVideo.sh 170221_haggling_b3 $hdVideoNum 39 | $curPath/getData_hdVideo.sh 170224_haggling_a3 $hdVideoNum 40 | $curPath/getData_hdVideo.sh 170228_haggling_b1 $hdVideoNum 41 | $curPath/getData_hdVideo.sh 170407_haggling_a2 $hdVideoNum 42 | -------------------------------------------------------------------------------- /exps/CMU/panoptic-toolbox/getDB_panoptic_ver1_2_hdVideo_t1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #This list is named as "Panoptic Studio DB Ver 1.2" 4 | 5 | curPath=$(dirname "$0") 6 | hdVideoNum=31 7 | 8 | #Range of motion sequences 9 | $curPath/getData_hdVideo.sh 171204_pose3 $hdVideoNum 10 | $curPath/getData_hdVideo.sh 171026_pose3 $hdVideoNum 11 | 12 | #Download All Haggling Sequences without downloading videos 13 | $curPath/getData_hdVideo.sh 170221_haggling_m3 $hdVideoNum 14 | $curPath/getData_hdVideo.sh 170404_haggling_a1 $hdVideoNum 15 | $curPath/getData_hdVideo.sh 170407_haggling_b2 $hdVideoNum 16 | -------------------------------------------------------------------------------- /exps/CMU/panoptic-toolbox/getDB_panoptic_ver1_2_hdVideo_t2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #This list is named as "Panoptic Studio DB Ver 1.2" 4 | 5 | curPath=$(dirname "$0") 6 | hdVideoNum=31 7 | 8 | #Musical Instruments 9 | $curPath/getData_hdVideo.sh 171026_cello3 $hdVideoNum 10 | $curPath/getData_hdVideo.sh 161029_piano4 $hdVideoNum 11 | 12 | #SocialGame sequences 13 | $curPath/getData_hdVideo.sh 160422_ultimatum1 $hdVideoNum 14 | $curPath/getData_hdVideo.sh 160224_haggling1 $hdVideoNum 15 | -------------------------------------------------------------------------------- /exps/CMU/panoptic-toolbox/getDB_panoptic_ver1_2_hdVideo_t3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #This list is named as "Panoptic Studio DB Ver 1.2" 4 | 5 | curPath=$(dirname "$0") 6 | hdVideoNum=31 7 | 8 | #Dance sequences 9 | $curPath/getData_hdVideo.sh 170307_dance5 $hdVideoNum 10 | 11 | #Toddler sequences 12 | $curPath/getData_hdVideo.sh 160906_ian1 $hdVideoNum 13 | 14 | #Others sequences 15 | $curPath/getData_hdVideo.sh 170915_office1 $hdVideoNum 16 | $curPath/getData_hdVideo.sh 160906_pizza1 $hdVideoNum 17 | -------------------------------------------------------------------------------- /exps/CMU/panoptic-toolbox/getDB_panoptic_ver1_2_hdVideo_t4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #This list is named as "Panoptic Studio DB Ver 1.2" 4 | 5 | curPath=$(dirname "$0") 6 | hdVideoNum=31 7 | 8 | #Social Games (Haggling) 9 | $curPath/getData_hdVideo.sh 170221_haggling_b3 $hdVideoNum 10 | $curPath/getData_hdVideo.sh 170224_haggling_a3 $hdVideoNum 11 | $curPath/getData_hdVideo.sh 170228_haggling_b1 $hdVideoNum 12 | $curPath/getData_hdVideo.sh 170407_haggling_a2 $hdVideoNum 13 | -------------------------------------------------------------------------------- /exps/CMU/panoptic-toolbox/getData_hdVideo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script downloads videos for a specific sequence: 4 | # ./getData.sh [sequenceName] [numHDViews] 5 | # 6 | # e.g., to download 10 VGA camera views for the "sampleData" sequence: 7 | # ./getData.sh sampleData 10 0 8 | # 9 | 10 | datasetName=${1-sampleData} 11 | numHDViews=${2-31} #Specify the number of hd views you want to donwload. Up to 31 12 | 13 | # Select wget or curl, with appropriate options 14 | if command -v wget >/dev/null 2>&1; then 15 | WGET="wget -c" 16 | mO="-O" 17 | elif command -v curl >/dev/null 2>&1; then 18 | WGET="curl -C -" 19 | mO="-o" 20 | else 21 | echo "This script requires wget or curl to download files." 22 | echo "Aborting." 23 | exit 1; 24 | fi 25 | 26 | # Each sequence gets its own subdirectory 27 | mkdir $datasetName 28 | cd $datasetName 29 | 30 | 31 | # Download calibration data 32 | $WGET $mO calibration_${datasetName}.json http://domedb.perception.cs.cmu.edu/webdata/dataset/$datasetName/calibration_${datasetName}.json || rm -v calibration_${datasetName}.json 33 | 34 | 35 | # 3D Face 36 | if [ ! -f hdFace3d.tar ]; then 37 | $WGET $mO hdFace3d.tar http://domedb.perception.cs.cmu.edu/webdata/dataset/$datasetName/hdFace3d.tar || rm -v hdFace3d.tar 38 | fi 39 | 40 | 41 | # Extract 3D Keypoints 42 | if [ -f hdFace3d.tar ]; then 43 | tar -xf hdFace3d.tar 44 | fi 45 | 46 | 47 | 48 | ##################### 49 | # Download hd videos 50 | ##################### 51 | mkdir -p hdVideos 52 | panel=0 53 | nodes=(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30) 54 | for (( c=0; c<$numHDViews; c++)) 55 | do 56 | fileName=$(printf "hdVideos/hd_%02d_%02d.mp4" ${panel} ${nodes[c]}) 57 | echo $fileName; 58 | #Download and delete if the file is blank 59 | cmd=$(printf "$WGET $mO hdVideos/hd_%02d_%02d.mp4 http://domedb.perception.cs.cmu.edu/webdata/dataset/$datasetName/videos/hd_shared_crf20/hd_%02d_%02d.mp4 || rm -v $fileName" ${panel} ${nodes[c]} ${panel} ${nodes[c]}) 60 | eval $cmd 61 | done 62 | 63 | -------------------------------------------------------------------------------- /exps/CMU/released_seqs_excel.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/exps/CMU/released_seqs_excel.xlsx -------------------------------------------------------------------------------- /exps/CMU/selected_HPE_list.txt: -------------------------------------------------------------------------------- 1 | 【Totally 562 GB】 2 | 3 | ***13 selected names list*** 4 | 171204_pose3 5 | 171026_pose3 6 | 7 | 170221_haggling_m3 8 | 170404_haggling_a1 9 | 170407_haggling_b2 10 | 11 | 171026_cello3 12 | 161029_piano4 13 | 14 | 160422_ultimatum1 15 | 160224_haggling1 16 | 17 | 170307_dance5 18 | 19 | 160906_ian1 20 | 21 | 170915_office1 22 | 160906_pizza1 23 | 24 | ***4 more other selected names list*** 25 | 170221_haggling_b3 26 | 170224_haggling_a3 27 | 170228_haggling_b1 28 | 170407_haggling_a2 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /exps/compare_3ddfa.py: -------------------------------------------------------------------------------- 1 | 2 | __author__ = 'Huayi Zhou' 3 | 4 | ''' 5 | Put this file under the main folder of codes project 3DDFA https://github.com/cleardusk/3DDFA 6 | 7 | usage: 8 | python compare_3ddfa.py --root-imgdir /path/to/root/imgdir \ 9 | --json-file /path/to/prepared/json/file \ 10 | --save-file /path/to/saving/npy/file -m gpu 11 | 12 | e.g.: 13 | python compare_3ddfa.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --save-file ./agora_val_3DDFA.npy -m gpu --debug false 14 | [results] 15 | Saving all results in one file ./agora_val_3DDFA.npy ... 16 | Inference one image taking time: 0.011305771888510957 17 | face number: 3403 / 3403; MAE: 48.5867, [pitch_error, yaw_error, roll_error]: 42.5566, 39.6174, 63.5861 18 | 19 | 20 | python compare_3ddfa.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --save-file ./cmu_val_3DDFA.npy -m gpu --debug false 21 | [results] 22 | Saving all results in one file ./cmu_val_3DDFA.npy ... 23 | Inference one image taking time: 0.017735703712104287 24 | face number: 15871 / 15871; MAE: 27.1172, [pitch_error, yaw_error, roll_error]: 26.3376, 23.3927, 31.6214 25 | 26 | ''' 27 | 28 | import os 29 | import torch 30 | import torchvision.transforms as transforms 31 | import mobilenet_v1 32 | import numpy as np 33 | import cv2 34 | import argparse 35 | import torch.backends.cudnn as cudnn 36 | import time 37 | from tqdm import tqdm 38 | import json 39 | 40 | from utils.ddfa import ToTensorGjz, NormalizeGjz, str2bool 41 | from utils.inference import parse_roi_box_from_landmark, \ 42 | crop_img, predict_68pts, parse_roi_box_from_bbox, predict_dense 43 | from utils.estimate_pose import parse_pose, parse_pose_v2 44 | from utils.cv_plot import plot_pose_box 45 | 46 | STD_SIZE = 120 47 | 48 | os.environ["CUDA_VISIBLE_DEVICES"] = '3' 49 | 50 | def main(args): 51 | 52 | # 1. load pre-tained model 53 | checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' 54 | arch = 'mobilenet_1' 55 | 56 | checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] 57 | model = getattr(mobilenet_v1, arch)(num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) 58 | 59 | model_dict = model.state_dict() 60 | # because the model is trained by multiple gpus, prefix module should be removed 61 | for k in checkpoint.keys(): 62 | model_dict[k.replace('module.', '')] = checkpoint[k] 63 | model.load_state_dict(model_dict) 64 | if args.mode == 'gpu': 65 | cudnn.benchmark = True 66 | model = model.cuda() 67 | model.eval() 68 | 69 | 70 | # 2. forward 71 | transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) 72 | 73 | with open(args.json_file, "r") as json_f: 74 | pd_results_list = json.load(json_f) 75 | 76 | # face_imgs = [] # cropped face images collection 77 | pts_res = [] # 3d facial landmarks collection 78 | camPs = [] # Camera matrix collection 79 | pd_poses = [] # predicted pose collection 80 | gt_poses = [] # ground-truth pose collection 81 | taking_time_list = [] # how many ms per face 82 | valid_face_num = 0 83 | for ind, pd_results in enumerate(tqdm(pd_results_list)): 84 | if args.debug and ind > 50: break # for testing 85 | 86 | img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg") 87 | img_ori = cv2.imread(img_path) 88 | 89 | bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1] 90 | bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] 91 | 92 | gt_pitch = pd_results['gt_pitch'] 93 | gt_yaw = pd_results['gt_yaw'] 94 | gt_roll = pd_results['gt_roll'] 95 | 96 | ''' We do not need this enlarge operation. Or results will be super bad.''' 97 | # roi_box = parse_roi_box_from_bbox(bbox) 98 | roi_box = bbox 99 | img = crop_img(img_ori, roi_box) 100 | 101 | t1 = time.time() 102 | # forward: one step 103 | img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) 104 | input = transform(img).unsqueeze(0) 105 | with torch.no_grad(): 106 | if args.mode == 'gpu': 107 | input = input.cuda() 108 | param = model(input) 109 | param = param.squeeze().cpu().numpy().flatten().astype(np.float32) 110 | 111 | # 68 pts 112 | pts68 = predict_68pts(param, roi_box) 113 | 114 | ''' two-step for more accurate bbox to crop face ''' 115 | # roi_box = parse_roi_box_from_landmark(pts68) 116 | # img_step2 = crop_img(img_ori, roi_box) 117 | # img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) 118 | # input = transform(img_step2).unsqueeze(0) 119 | # with torch.no_grad(): 120 | # if args.mode == 'gpu': 121 | # input = input.cuda() 122 | # param = model(input) 123 | # param = param.squeeze().cpu().numpy().flatten().astype(np.float32) 124 | # pts68 = predict_68pts(param, roi_box) 125 | 126 | t2 = time.time() 127 | taking_time_list.append(t2-t1) 128 | 129 | camP, pose = parse_pose(param) 130 | # camP, pose = parse_pose_v2(param, pts68) 131 | if pose is None: 132 | continue 133 | 134 | valid_face_num += 1 135 | pts_res.append(pts68) 136 | camPs.append(camP) 137 | 138 | # the predicted order of 3DDFA is: [yaw, -pitch, -roll], and in range (-np.pi/2, np.pi/2) 139 | pd_poses.append([-pose[1]*180/np.pi, pose[0]*180/np.pi, -pose[2]*180/np.pi]) # for parse_pose() 140 | # pd_poses.append([pose[1], pose[0], pose[2]]) # for parse_pose_v2() 141 | gt_poses.append([gt_pitch, gt_yaw, gt_roll]) 142 | 143 | 144 | if args.debug: 145 | save_img_path = "./tmp/"+str(ind).zfill(0)+\ 146 | "_p"+str(round(gt_pitch, 2))+"v"+str(round(-pose[1]*180/np.pi, 2))+\ 147 | "_y"+str(round(gt_yaw, 2))+"v"+str(round(pose[0]*180/np.pi, 2))+\ 148 | "_r"+str(round(gt_roll, 2))+"v"+str(round(-pose[2]*180/np.pi, 2))+".jpg" # for parse_pose() 149 | # save_img_path = "./tmp/"+str(ind).zfill(0)+\ 150 | # "_p"+str(round(gt_pitch, 2))+"v"+str(round(pose[1], 2))+\ 151 | # "_y"+str(round(gt_yaw, 2))+"v"+str(round(pose[0], 2))+\ 152 | # "_r"+str(round(gt_roll, 2))+"v"+str(round(pose[2], 2))+".jpg" # for parse_pose_v2() 153 | 154 | 155 | cv2.rectangle(img_ori, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,255,255), 2) 156 | for i in range(len(pts68[0, :])): 157 | cv2.circle(img_ori, (int(pts68[0, i]), int(pts68[1, i])), 1, (0,255,255), -1) 158 | img_ori = plot_pose_box(img_ori, [camP], [pts68]) 159 | cv2.imwrite(save_img_path, img_ori) 160 | 161 | 162 | 163 | '''print all results''' 164 | print("Saving all results in one file %s ..."%(args.save_file)) 165 | np.savez(args.save_file, camPs=np.array(camPs), 166 | pts_res=np.array(pts_res), 167 | # image=np.array(face_imgs), 168 | pd_pose=np.array(pd_poses), 169 | gt_poses=np.array(gt_poses)) 170 | # db_dict = np.load(args.save_file) 171 | # print(args.save_file, list(db_dict.keys())) 172 | 173 | 174 | print("Inference one image taking time:", sum(taking_time_list)/len(taking_time_list)) 175 | 176 | 177 | error_list = np.abs(np.array(gt_poses) - np.array(pd_poses)) 178 | # error_list[:, 1] = np.min((error_list[:, 1], 360 - error_list[:, 1]), axis=0) # yaw range may be [-180,180] 179 | error_list = np.min((error_list, 360 - error_list), axis=0) 180 | pose_matrix = np.mean(error_list, axis=0) 181 | MAE = np.mean(pose_matrix) 182 | print("face number: %d / %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%( 183 | valid_face_num, len(taking_time_list), round(MAE, 4), 184 | round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4))) 185 | 186 | 187 | if __name__ == '__main__': 188 | parser = argparse.ArgumentParser(description='3DDFA inference pipeline') 189 | 190 | parser.add_argument('--root-imgdir', default='', 191 | help='root path to multiple images') 192 | parser.add_argument('--json-file', default='', 193 | help='json file path that contains multiple images and their head bboxes') 194 | parser.add_argument('--save-file', default='', 195 | help='.npy file path to save all results') 196 | parser.add_argument('-m', '--mode', default='cpu', type=str, help='gpu or cpu mode') 197 | parser.add_argument('--debug', default='false', type=str2bool, help='whether set into debug mode') 198 | 199 | args = parser.parse_args() 200 | main(args) -------------------------------------------------------------------------------- /exps/compare_3ddfa_v2.py: -------------------------------------------------------------------------------- 1 | 2 | __author__ = 'Huayi Zhou' 3 | 4 | ''' 5 | 6 | pip install onnxruntime 7 | 8 | Put this file under the main folder of codes project 3DDFA_v2 https://github.com/cleardusk/3DDFA_V2 9 | or 10 | Put this file under the main folder of codes using project 3DDFA_v2 https://github.com/bubingy/HeadPoseEstimate 11 | 12 | usage: 13 | python compare_3ddfa_v2.py --root-imgdir /path/to/root/imgdir \ 14 | --json-file /path/to/prepared/json/file \ 15 | --save-file /path/to/saving/npy/file -m gpu 16 | 17 | e.g.: 18 | python compare_3ddfa_v2.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --save-file ./agora_val_3DDFA_v2.npy -m gpu --debug 19 | [results] 20 | Saving all results in one file ./agora_val_3DDFA_v2.npy ... 21 | Inference one image taking time: 0.015800806553336474 22 | face number: 3403; MAE: 22.7539, [pitch_error, yaw_error, roll_error]: 20.5154, 28.4544, 19.2918 23 | 24 | 25 | python compare_3ddfa_v2.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --save-file ./cmu_val_3DDFA_v2.npy -m gpu --debug 26 | [results] 27 | Saving all results in one file ./cmu_val_3DDFA_v2.npy ... 28 | Inference one image taking time: 0.016364026179303746 29 | face number: 15871; MAE: 17.3448, [pitch_error, yaw_error, roll_error]: 18.6524, 17.0074, 16.3747 30 | 31 | ''' 32 | 33 | import cv2 34 | import os 35 | import time 36 | import json 37 | import argparse 38 | from tqdm import tqdm 39 | import numpy as np 40 | 41 | from model.pose import estimate_head_pose 42 | from model.plot import draw_pose 43 | 44 | os.environ["CUDA_VISIBLE_DEVICES"] = '3' 45 | 46 | def main(args): 47 | 48 | os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' 49 | os.environ['OMP_NUM_THREADS'] = '1' 50 | from model.FaceAlignment3D.TDDFA_ONNX import TDDFA_ONNX 51 | tddfa = TDDFA_ONNX() 52 | 53 | with open(args.json_file, "r") as json_f: 54 | pd_results_list = json.load(json_f) 55 | 56 | 57 | pts_res = [] # 3d facial landmarks collection 58 | pd_poses = [] # predicted pose collection 59 | gt_poses = [] # ground-truth pose collection 60 | taking_time_list = [] # how many ms per face 61 | 62 | for ind, pd_results in enumerate(tqdm(pd_results_list)): 63 | if args.debug and ind > 50: break # for testing 64 | 65 | img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg") 66 | img_ori = cv2.imread(img_path) 67 | 68 | bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1] 69 | bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] 70 | 71 | gt_pitch = pd_results['gt_pitch'] 72 | gt_yaw = pd_results['gt_yaw'] 73 | gt_roll = pd_results['gt_roll'] 74 | 75 | tic = time.time() 76 | 77 | param_lst, roi_box_lst = tddfa(img_ori, [bbox]) 78 | 79 | # calculate Euler angle 80 | ver_lst = tddfa.recon_vers(param_lst, roi_box_lst) 81 | euler_angle_lst, directions_lst, landmarks_lst = estimate_head_pose(ver_lst, True) 82 | 83 | toc = time.time() 84 | taking_time_list.append(toc-tic) 85 | 86 | pts_res.append(landmarks_lst[0]) 87 | pose = euler_angle_lst[0] 88 | 89 | # the predicted order of 3DDFA_v2 is: [-roll, -yaw, -pitch] 90 | pose[:] = -pose[:] 91 | 92 | pd_poses.append([pose[2], pose[1], pose[0]]) 93 | gt_poses.append([gt_pitch, gt_yaw, gt_roll]) 94 | 95 | 96 | if args.debug: 97 | save_img_path = "./tmp/"+str(ind).zfill(0)+\ 98 | "_p"+str(round(gt_pitch, 2))+"v"+str(round(pose[2], 2))+\ 99 | "_y"+str(round(gt_yaw, 2))+"v"+str(round(pose[1], 2))+\ 100 | "_r"+str(round(gt_roll, 2))+"v"+str(round(pose[0], 2))+".jpg" 101 | 102 | # cv2.rectangle(img_ori, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,255,255), 2) 103 | 104 | show_img = draw_pose(img_ori, directions_lst, np.array([bbox]), landmarks_lst, 105 | show_bbox=True, show_landmarks=True) 106 | cv2.imwrite(save_img_path, show_img) 107 | 108 | '''print all results''' 109 | print("Saving all results in one file %s ..."%(args.save_file)) 110 | np.savez(args.save_file, 111 | pts_res=np.array(pts_res), 112 | pd_pose=np.array(pd_poses), 113 | gt_poses=np.array(gt_poses)) 114 | # db_dict = np.load(args.save_file) 115 | # print(args.save_file, list(db_dict.keys())) 116 | 117 | print("Inference one image taking time:", sum(taking_time_list)/len(taking_time_list)) 118 | 119 | error_list = np.abs(np.array(gt_poses) - np.array(pd_poses)) 120 | # error_list[:, 1] = np.min((error_list[:, 1], 360 - error_list[:, 1]), axis=0) # yaw range may be [-180,180] 121 | error_list = np.min((error_list, 360 - error_list), axis=0) 122 | pose_matrix = np.mean(error_list, axis=0) 123 | MAE = np.mean(pose_matrix) 124 | print("face number: %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(len(taking_time_list), 125 | round(MAE, 4), round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4))) 126 | 127 | 128 | if __name__ == '__main__': 129 | parser = argparse.ArgumentParser(description='3DDFA inference pipeline') 130 | 131 | parser.add_argument('--root-imgdir', default='', 132 | help='root path to multiple images') 133 | parser.add_argument('--json-file', default='', 134 | help='json file path that contains multiple images and their head bboxes') 135 | parser.add_argument('--save-file', default='', 136 | help='.npy file path to save all results') 137 | parser.add_argument('-m', '--mode', default='cpu', type=str, help='gpu or cpu mode') 138 | parser.add_argument('--debug', action='store_true', help='whether set into debug mode') 139 | 140 | args = parser.parse_args() 141 | main(args) -------------------------------------------------------------------------------- /exps/compare_FSANet.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | __author__ = 'Huayi Zhou' 4 | 5 | ''' 6 | 7 | 8 | Put this file under the main folder of codes project FSA-Net_pytorch 9 | git clone https://github.com/omasaht/headpose-fsanet-pytorch FSA-Net_pytorch 10 | 11 | 12 | usage: 13 | python compare_FSANet.py --root-imgdir /path/to/root/imgdir \ 14 | --json-file /path/to/prepared/json/file 15 | 16 | e.g.: 17 | python compare_FSANet.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug 18 | [results] 19 | Inference one image taking time: 0.013793717896777136 20 | face number: 3403; MAE: 18.9809, [pitch_error, yaw_error, roll_error]: 18.9733, 21.6935, 16.2759 21 | 22 | 23 | python compare_FSANet.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug 24 | [results] 25 | Inference one image taking time: 0.016242261164280028 26 | face number: 15871; MAE: 15.6144, [pitch_error, yaw_error, roll_error]: 16.343, 17.515, 12.9852 27 | 28 | ''' 29 | 30 | import os 31 | import argparse 32 | import time 33 | import json 34 | import cv2 35 | 36 | import onnxruntime 37 | 38 | import numpy as np 39 | from tqdm import tqdm 40 | from pathlib import Path 41 | from math import cos, sin, pi 42 | 43 | os.environ["CUDA_VISIBLE_DEVICES"] = '3' 44 | 45 | root_path = str(Path(__file__).absolute().parent.parent) 46 | 47 | 48 | def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size = 100): 49 | # Referenced from HopeNet https://github.com/natanielruiz/deep-head-pose 50 | pitch = pitch * np.pi / 180 51 | yaw = -(yaw * np.pi / 180) 52 | roll = roll * np.pi / 180 53 | 54 | if tdx != None and tdy != None: 55 | tdx = tdx 56 | tdy = tdy 57 | else: 58 | height, width = img.shape[:2] 59 | tdx = width / 2 60 | tdy = height / 2 61 | 62 | # X-Axis pointing to right. drawn in red 63 | x1 = size * (cos(yaw) * cos(roll)) + tdx 64 | y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy 65 | 66 | # Y-Axis | drawn in green 67 | # v 68 | x2 = size * (-cos(yaw) * sin(roll)) + tdx 69 | y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy 70 | 71 | # Z-Axis (out of the screen) drawn in blue 72 | x3 = size * (sin(yaw)) + tdx 73 | y3 = size * (-cos(yaw) * sin(pitch)) + tdy 74 | 75 | cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),2) 76 | cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),2) 77 | cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2) 78 | return img 79 | 80 | def main(args): 81 | 82 | sess1 = onnxruntime.InferenceSession(f'{root_path}/FSA-Net_pytorch/pretrained/fsanet-1x1-iter-688590.onnx') 83 | sess2 = onnxruntime.InferenceSession(f'{root_path}/FSA-Net_pytorch/pretrained/fsanet-var-iter-688590.onnx') 84 | 85 | # sess1 + sess2 --> 2.37 MB 86 | 87 | 88 | with open(args.json_file, "r") as json_f: 89 | pd_results_list = json.load(json_f) 90 | 91 | pd_poses = [] # predicted pose collection 92 | gt_poses = [] # ground-truth pose collection 93 | taking_time_list = [] # how many ms per face 94 | 95 | for ind, pd_results in enumerate(tqdm(pd_results_list)): 96 | if args.debug and ind > 50: break # for testing 97 | 98 | img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg") 99 | img_ori = cv2.imread(img_path) 100 | 101 | bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1] 102 | bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] 103 | 104 | gt_pitch = pd_results['gt_pitch'] 105 | gt_yaw = pd_results['gt_yaw'] 106 | gt_roll = pd_results['gt_roll'] 107 | 108 | 109 | t1 = time.time() 110 | [x1, y1, x2, y2] = [int(i) for i in bbox] 111 | face_roi = img_ori[y1:y2+1,x1:x2+1] 112 | 113 | # preprocess headpose model input 114 | face_roi = cv2.resize(face_roi,(64,64)) 115 | face_roi = face_roi.transpose((2,0,1)) 116 | face_roi = np.expand_dims(face_roi,axis=0) 117 | face_roi = (face_roi-127.5)/128 118 | face_roi = face_roi.astype(np.float32) 119 | 120 | # get headpose 121 | res1 = sess1.run(["output"], {"input": face_roi})[0] 122 | res2 = sess2.run(["output"], {"input": face_roi})[0] 123 | 124 | yaw, pitch, roll = np.mean(np.vstack((res1,res2)),axis=0) 125 | t2 = time.time() 126 | taking_time_list.append(t2-t1) 127 | 128 | pd_poses.append([pitch, yaw, roll]) 129 | gt_poses.append([gt_pitch, gt_yaw, gt_roll]) 130 | 131 | if args.debug: 132 | save_img_path = "./tmp/"+str(ind).zfill(0)+\ 133 | "_p"+str(round(gt_pitch, 2))+"v"+str(round(pitch, 2))+\ 134 | "_y"+str(round(gt_yaw, 2))+"v"+str(round(yaw, 2))+\ 135 | "_r"+str(round(gt_roll, 2))+"v"+str(round(roll, 2))+".jpg" 136 | 137 | cv2.rectangle(img_ori, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,255,255), 2) 138 | img_ori = draw_axis(img_ori, yaw, pitch, roll, 139 | tdx=(bbox[0]+bbox[2])/2, tdy=(bbox[1]+bbox[3])/2, size=100) 140 | cv2.imwrite(save_img_path, img_ori) 141 | 142 | '''print all results''' 143 | print("Inference one image taking time:", sum(taking_time_list[1:])/len(taking_time_list[1:])) 144 | 145 | error_list = np.abs(np.array(gt_poses) - np.array(pd_poses)) 146 | error_list = np.min((error_list, 360 - error_list), axis=0) 147 | pose_matrix = np.mean(error_list, axis=0) 148 | MAE = np.mean(pose_matrix) 149 | print("face number: %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(len(taking_time_list), 150 | round(MAE, 4), round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4))) 151 | 152 | 153 | if __name__ == '__main__': 154 | parser = argparse.ArgumentParser(description='FAN inference pipeline') 155 | 156 | parser.add_argument('--root-imgdir', default='', 157 | help='root path to multiple images') 158 | parser.add_argument('--json-file', default='', 159 | help='json file path that contains multiple images and their head bboxes') 160 | parser.add_argument('--debug', action='store_true', help='whether set into debug mode') 161 | 162 | args = parser.parse_args() 163 | main(args) -------------------------------------------------------------------------------- /exps/compare_HopeNet.py: -------------------------------------------------------------------------------- 1 | 2 | '''Too slow inference speed''' 3 | 4 | __author__ = 'Huayi Zhou' 5 | 6 | ''' 7 | 8 | git clone https://github.com/natanielruiz/deep-head-pose ./HopeNet 9 | 10 | Put this file under the main folder of codes project HopeNet 11 | https://github.com/natanielruiz/deep-head-pose 12 | 13 | usage: 14 | python compare_HopeNet.py --root-imgdir /path/to/root/imgdir \ 15 | --json-file /path/to/prepared/json/file 16 | 17 | 18 | e.g.: 19 | python compare_HopeNet.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug 20 | [results] 21 | Inference one image taking time: 0.011602783960009378 22 | face number: 3403; MAE: 19.9984, [pitch_error, yaw_error, roll_error]: 19.1262, 24.0867, 16.7823 23 | 24 | 25 | python compare_HopeNet.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug 26 | [results] 27 | Inference one image taking time: 0.011030971019915537 28 | face number: 15871; MAE: 17.0851, [pitch_error, yaw_error, roll_error]: 17.4948, 20.3525, 13.4079 29 | 30 | ''' 31 | 32 | import os 33 | import argparse 34 | import time 35 | import json 36 | import cv2 37 | 38 | import numpy as np 39 | from tqdm import tqdm 40 | from pathlib import Path 41 | from math import cos, sin, pi 42 | 43 | import torch 44 | import torchvision 45 | import torch.backends.cudnn as cudnn 46 | from codes import hopenet, utils 47 | from torchvision import transforms 48 | from PIL import Image 49 | from torch.autograd import Variable 50 | 51 | 52 | os.environ["CUDA_VISIBLE_DEVICES"] = '3' 53 | 54 | root_path = str(Path(__file__).absolute().parent.parent) 55 | 56 | 57 | def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size = 100): 58 | # Referenced from HopeNet https://github.com/natanielruiz/deep-head-pose 59 | pitch = pitch * np.pi / 180 60 | yaw = -(yaw * np.pi / 180) 61 | roll = roll * np.pi / 180 62 | 63 | if tdx != None and tdy != None: 64 | tdx = tdx 65 | tdy = tdy 66 | else: 67 | height, width = img.shape[:2] 68 | tdx = width / 2 69 | tdy = height / 2 70 | 71 | # X-Axis pointing to right. drawn in red 72 | x1 = size * (cos(yaw) * cos(roll)) + tdx 73 | y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy 74 | 75 | # Y-Axis | drawn in green 76 | # v 77 | x2 = size * (-cos(yaw) * sin(roll)) + tdx 78 | y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy 79 | 80 | # Z-Axis (out of the screen) drawn in blue 81 | x3 = size * (sin(yaw)) + tdx 82 | y3 = size * (-cos(yaw) * sin(pitch)) + tdy 83 | 84 | cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),2) 85 | cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),2) 86 | cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2) 87 | return img 88 | 89 | def main(args): 90 | 91 | cudnn.enabled = True 92 | snapshot_path = "./hopenet_robust_alpha1.pkl" # 91.4 MB 93 | 94 | # ResNet50 structure 95 | model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) 96 | 97 | # Load snapshot 98 | saved_state_dict = torch.load(snapshot_path) 99 | model.load_state_dict(saved_state_dict) 100 | model.cuda() 101 | model.eval() 102 | 103 | transformations = transforms.Compose([transforms.Scale(224), 104 | transforms.CenterCrop(224), transforms.ToTensor(), 105 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 106 | 107 | idx_tensor = [idx for idx in range(66)] 108 | idx_tensor = torch.FloatTensor(idx_tensor).cuda() 109 | 110 | 111 | 112 | with open(args.json_file, "r") as json_f: 113 | pd_results_list = json.load(json_f) 114 | 115 | pd_poses = [] # predicted pose collection 116 | gt_poses = [] # ground-truth pose collection 117 | taking_time_list = [] # how many ms per face 118 | 119 | for ind, pd_results in enumerate(tqdm(pd_results_list)): 120 | if args.debug and ind > 50: break # for testing 121 | 122 | img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg") 123 | img_ori = cv2.imread(img_path) 124 | 125 | bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1] 126 | bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] 127 | 128 | gt_pitch = pd_results['gt_pitch'] 129 | gt_yaw = pd_results['gt_yaw'] 130 | gt_roll = pd_results['gt_roll'] 131 | 132 | 133 | t1 = time.time() 134 | [x1, y1, x2, y2] = [int(i) for i in bbox] 135 | face_roi = img_ori[y1:y2+1,x1:x2+1] 136 | 137 | # preprocess headpose model input 138 | face_roi = Image.fromarray(cv2.cvtColor(face_roi, cv2.COLOR_BGR2RGB)) # opencv --> PIL 139 | face_roi = transformations(face_roi) 140 | face_roi = face_roi.unsqueeze(0) 141 | 142 | face_roi = Variable(face_roi).cuda() 143 | 144 | # get headpose 145 | yaw_predicted, pitch_predicted, roll_predicted = model(face_roi) 146 | 147 | # Continuous predictions 148 | yaw_predicted = utils.softmax_temperature(yaw_predicted.data, 1) 149 | pitch_predicted = utils.softmax_temperature(pitch_predicted.data, 1) 150 | roll_predicted = utils.softmax_temperature(roll_predicted.data, 1) 151 | 152 | yaw_predicted = torch.sum(yaw_predicted * idx_tensor, 1).cpu() * 3 - 99 153 | pitch_predicted = torch.sum(pitch_predicted * idx_tensor, 1).cpu() * 3 - 99 154 | roll_predicted = torch.sum(roll_predicted * idx_tensor, 1).cpu() * 3 - 99 155 | 156 | yaw = yaw_predicted[0].cpu().numpy() 157 | pitch = pitch_predicted[0].cpu().numpy() 158 | roll = roll_predicted[0].cpu().numpy() 159 | 160 | t2 = time.time() 161 | taking_time_list.append(t2-t1) 162 | 163 | pd_poses.append([pitch, yaw, roll]) 164 | gt_poses.append([gt_pitch, gt_yaw, gt_roll]) 165 | 166 | if args.debug: 167 | save_img_path = "./tmp/"+str(ind).zfill(0)+\ 168 | "_p"+str(round(gt_pitch, 2))+"v"+str(np.round(pitch, 2))+\ 169 | "_y"+str(round(gt_yaw, 2))+"v"+str(np.round(yaw, 2))+\ 170 | "_r"+str(round(gt_roll, 2))+"v"+str(np.round(roll, 2))+".jpg" 171 | 172 | cv2.rectangle(img_ori, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,255,255), 2) 173 | img_ori = draw_axis(img_ori, yaw, pitch, roll, 174 | tdx=(bbox[0]+bbox[2])/2, tdy=(bbox[1]+bbox[3])/2, size=100) 175 | cv2.imwrite(save_img_path, img_ori) 176 | 177 | '''print all results''' 178 | print("Inference one image taking time:", sum(taking_time_list[1:])/len(taking_time_list[1:])) 179 | 180 | error_list = np.abs(np.array(gt_poses) - np.array(pd_poses)) 181 | error_list = np.min((error_list, 360 - error_list), axis=0) 182 | pose_matrix = np.mean(error_list, axis=0) 183 | MAE = np.mean(pose_matrix) 184 | print("face number: %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(len(taking_time_list), 185 | round(MAE, 4), round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4))) 186 | 187 | 188 | if __name__ == '__main__': 189 | parser = argparse.ArgumentParser(description='FAN inference pipeline') 190 | 191 | parser.add_argument('--root-imgdir', default='', 192 | help='root path to multiple images') 193 | parser.add_argument('--json-file', default='', 194 | help='json file path that contains multiple images and their head bboxes') 195 | parser.add_argument('--debug', action='store_true', help='whether set into debug mode') 196 | 197 | args = parser.parse_args() 198 | main(args) -------------------------------------------------------------------------------- /exps/compare_SynergyNet.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | __author__ = 'Huayi Zhou' 4 | 5 | ''' 6 | 7 | 8 | Put this file under the main folder of codes project SynergyNet 9 | git clone https://github.com/choyingw/SynergyNet SynergyNet 10 | https://drive.google.com/file/d/1SQsMhvAmpD1O8Hm0yEGom0C0rXtA0qs8/view [3dmm_data] data link 11 | https://drive.google.com/file/d/1BVHbiLTfX6iTeJcNbh-jgHjWDoemfrzG/view [pretrained weight] data link 12 | 13 | usage: 14 | python compare_SynergyNet.py --root-imgdir /path/to/root/imgdir \ 15 | --json-file /path/to/prepared/json/file 16 | 17 | e.g.: 18 | python compare_SynergyNet.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug 19 | [results][2023-01-15] 20 | Inference one image taking time: 0.007218158028596455 21 | face number: 3415; MAE: 42.212, [pitch_error, yaw_error, roll_error]: 35.5837, 39.5468, 51.5054 22 | 23 | python compare_SynergyNet.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug 24 | [results][2023-01-15] 25 | Inference one image taking time: 0.006917836413339054 26 | face number: 15885; MAE: 24.6768, [pitch_error, yaw_error, roll_error]: 23.518, 27.5607, 22.9518 27 | ''' 28 | 29 | import os 30 | import argparse 31 | import time 32 | import json 33 | import cv2 34 | 35 | import numpy as np 36 | from tqdm import tqdm 37 | from pathlib import Path 38 | from math import cos, sin, pi 39 | 40 | import types 41 | import torch 42 | import torchvision.transforms as transforms 43 | from utils.ddfa import ToTensor, Normalize 44 | from utils.inference import predict_pose, predict_sparseVert, predict_denseVert, crop_img 45 | from model_building import SynergyNet 46 | 47 | 48 | os.environ["CUDA_VISIBLE_DEVICES"] = '3' 49 | 50 | root_path = str(Path(__file__).absolute().parent.parent) 51 | 52 | 53 | def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size = 100): 54 | # Referenced from HopeNet https://github.com/natanielruiz/deep-head-pose 55 | pitch = pitch * np.pi / 180 56 | yaw = -(yaw * np.pi / 180) 57 | roll = roll * np.pi / 180 58 | 59 | if tdx != None and tdy != None: 60 | tdx = tdx 61 | tdy = tdy 62 | else: 63 | height, width = img.shape[:2] 64 | tdx = width / 2 65 | tdy = height / 2 66 | 67 | # X-Axis pointing to right. drawn in red 68 | x1 = size * (cos(yaw) * cos(roll)) + tdx 69 | y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy 70 | 71 | # Y-Axis | drawn in green 72 | # v 73 | x2 = size * (-cos(yaw) * sin(roll)) + tdx 74 | y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy 75 | 76 | # Z-Axis (out of the screen) drawn in blue 77 | x3 = size * (sin(yaw)) + tdx 78 | y3 = size * (-cos(yaw) * sin(pitch)) + tdy 79 | 80 | cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),2) 81 | cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),2) 82 | cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2) 83 | return img 84 | 85 | def main(args): 86 | 87 | IMG_SIZE = 120 # Following 3DDFA-V2, we also use 120x120 resolution 88 | transform = transforms.Compose([ToTensor(), Normalize(mean=127.5, std=128)]) 89 | 90 | # load pre-tained model 91 | checkpoint_fp = 'pretrained/best.pth.tar' 92 | checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] 93 | 94 | args_SynergyNet = types.SimpleNamespace() 95 | args_SynergyNet.arch = 'mobilenet_v2' 96 | args_SynergyNet.img_size = 120 97 | args_SynergyNet.devices_id = [0] 98 | 99 | model = SynergyNet(args_SynergyNet) 100 | model_dict = model.state_dict() 101 | 102 | # because the model is trained by multiple gpus, prefix 'module' should be removed 103 | for k in checkpoint.keys(): 104 | model_dict[k.replace('module.', '')] = checkpoint[k] 105 | 106 | model.load_state_dict(model_dict, strict=False) 107 | model = model.cuda() 108 | model.eval() 109 | 110 | 111 | with open(args.json_file, "r") as json_f: 112 | pd_results_list = json.load(json_f) 113 | 114 | pd_poses = [] # predicted pose collection 115 | gt_poses = [] # ground-truth pose collection 116 | taking_time_list = [] # how many ms per face 117 | 118 | for ind, pd_results in enumerate(tqdm(pd_results_list)): 119 | if args.debug and ind > 50: break # for testing 120 | 121 | img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg") 122 | img_ori = cv2.imread(img_path) 123 | 124 | # bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1] 125 | bbox = pd_results['gt_bbox'] 126 | bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] 127 | 128 | gt_pitch = pd_results['gt_pitch'] 129 | gt_yaw = pd_results['gt_yaw'] 130 | gt_roll = pd_results['gt_roll'] 131 | 132 | 133 | t1 = time.time() 134 | # [x1, y1, x2, y2] = [int(i) for i in bbox] 135 | # face_roi = img_ori[y1:y2+1,x1:x2+1] 136 | 137 | HCenter = (bbox[1] + bbox[3])/2 138 | WCenter = (bbox[0] + bbox[2])/2 139 | side_len = bbox[3]-bbox[1] 140 | margin = side_len * 0.75 // 2 # a larger bbox will result a worse MAE 141 | bbox[0], bbox[1], bbox[2], bbox[3] = WCenter-margin, HCenter-margin, WCenter+margin, HCenter+margin 142 | face_roi = crop_img(img_ori, bbox) 143 | 144 | img = cv2.resize(face_roi, dsize=(IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_LINEAR) 145 | input = transform(img).unsqueeze(0) 146 | with torch.no_grad(): 147 | input = input.cuda() 148 | param = model.forward_test(input) 149 | param = param.squeeze().cpu().numpy().flatten().astype(np.float32) 150 | # inferences 151 | # lmks = predict_sparseVert(param, bbox, transform=True) 152 | # vertices = predict_denseVert(param, bbox, transform=True) 153 | angles, translation = predict_pose(param, bbox) 154 | yaw, pitch, roll = angles[0], angles[1], angles[2] 155 | 156 | t2 = time.time() 157 | taking_time_list.append(t2-t1) 158 | 159 | pd_poses.append([pitch, yaw, roll]) 160 | gt_poses.append([gt_pitch, gt_yaw, gt_roll]) 161 | 162 | if args.debug: 163 | save_img_path = "./tmp/"+str(ind).zfill(0)+\ 164 | "_p"+str(round(gt_pitch, 2))+"v"+str(round(pitch, 2))+\ 165 | "_y"+str(round(gt_yaw, 2))+"v"+str(round(yaw, 2))+\ 166 | "_r"+str(round(gt_roll, 2))+"v"+str(round(roll, 2))+".jpg" 167 | 168 | cv2.rectangle(img_ori, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,255,255), 2) 169 | img_ori = draw_axis(img_ori, yaw, pitch, roll, 170 | tdx=(bbox[0]+bbox[2])/2, tdy=(bbox[1]+bbox[3])/2, size=100) 171 | cv2.imwrite(save_img_path, img_ori) 172 | 173 | '''print all results''' 174 | print("Inference one image taking time:", sum(taking_time_list[1:])/len(taking_time_list[1:])) 175 | 176 | error_list = np.abs(np.array(gt_poses) - np.array(pd_poses)) 177 | error_list = np.min((error_list, 360 - error_list), axis=0) 178 | pose_matrix = np.mean(error_list, axis=0) 179 | MAE = np.mean(pose_matrix) 180 | print("face number: %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(len(taking_time_list), 181 | round(MAE, 4), round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4))) 182 | 183 | 184 | if __name__ == '__main__': 185 | parser = argparse.ArgumentParser(description='FAN inference pipeline') 186 | 187 | parser.add_argument('--root-imgdir', default='', 188 | help='root path to multiple images') 189 | parser.add_argument('--json-file', default='', 190 | help='json file path that contains multiple images and their head bboxes') 191 | parser.add_argument('--debug', action='store_true', help='whether set into debug mode') 192 | 193 | args = parser.parse_args() 194 | main(args) -------------------------------------------------------------------------------- /exps/compare_dad3dnet.py: -------------------------------------------------------------------------------- 1 | 2 | '''Too slow inference speed''' 3 | 4 | __author__ = 'Huayi Zhou' 5 | 6 | ''' 7 | 8 | usage: 9 | python compare_dad3dnet.py --root-imgdir /path/to/root/imgdir \ 10 | --json-file /path/to/prepared/json/file 11 | 12 | e.g.: 13 | python compare_dad3dnet.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_full.json --debug 14 | [results] 15 | Inference one image taking time: 0.018059632885267938 16 | frontal face number: 3741; MAE_frontal: 35.1075, [pitch_error, yaw_error, roll_error]: 41.2611, 22.4362, 41.6252 17 | face number: 7414; MAE: 80.1786, [pitch_error, yaw_error, roll_error]: 85.2124, 68.098, 87.2253 18 | [results][2023-01-14] 19 | Inference one image taking time: 0.01842204154443272 20 | frontal face number: 3413; MAE_frontal: 32.6388, [pitch_error, yaw_error, roll_error]: 38.889, 19.987, 39.0404 21 | face number: 6715; MAE: 80.2083, [pitch_error, yaw_error, roll_error]: 86.3455, 65.9651, 88.3143 22 | 23 | python compare_dad3dnet.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_full.json --debug 24 | [results] 25 | Inference one image taking time: 0.02023921753516816 26 | frontal face number: 16396; MAE_frontal: 21.7698, [pitch_error, yaw_error, roll_error]: 26.1876, 11.4288, 27.6928 27 | face number: 32604; MAE: 80.5461, [pitch_error, yaw_error, roll_error]: 88.7443, 58.7891, 94.1048 28 | [results][2023-01-14] 29 | Inference one image taking time: 0.018940799204607884 30 | frontal face number: 15886; MAE_frontal: 18.9887, [pitch_error, yaw_error, roll_error]: 22.4626, 10.58, 23.9235 31 | face number: 31976; MAE: 79.7676, [pitch_error, yaw_error, roll_error]: 87.6178, 58.6636, 93.0214 32 | 33 | ''' 34 | 35 | import os 36 | import argparse 37 | import time 38 | import json 39 | import cv2 40 | 41 | import numpy as np 42 | from tqdm import tqdm 43 | from pathlib import Path 44 | from math import cos, sin, pi 45 | 46 | from model_training.model.flame import calculate_rpy, FlameParams, FLAME_CONSTS 47 | from pytorch_toolbelt.utils import read_rgb_image 48 | from predictor import FaceMeshPredictor 49 | predictor_dad3dnet = FaceMeshPredictor.dad_3dnet() 50 | 51 | 52 | np.set_printoptions(suppress=True) 53 | 54 | os.environ["CUDA_VISIBLE_DEVICES"] = '3' 55 | 56 | root_path = str(Path(__file__).absolute().parent.parent) 57 | 58 | 59 | def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size = 100): 60 | # Referenced from HopeNet https://github.com/natanielruiz/deep-head-pose 61 | pitch = pitch * np.pi / 180 62 | yaw = -(yaw * np.pi / 180) 63 | roll = roll * np.pi / 180 64 | 65 | if tdx != None and tdy != None: 66 | tdx = tdx 67 | tdy = tdy 68 | else: 69 | height, width = img.shape[:2] 70 | tdx = width / 2 71 | tdy = height / 2 72 | 73 | # X-Axis pointing to right. drawn in red 74 | x1 = size * (cos(yaw) * cos(roll)) + tdx 75 | y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy 76 | 77 | # Y-Axis | drawn in green 78 | # v 79 | x2 = size * (-cos(yaw) * sin(roll)) + tdx 80 | y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy 81 | 82 | # Z-Axis (out of the screen) drawn in blue 83 | x3 = size * (sin(yaw)) + tdx 84 | y3 = size * (-cos(yaw) * sin(pitch)) + tdy 85 | 86 | cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),2) 87 | cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),2) 88 | cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2) 89 | return img 90 | 91 | 92 | def main(args): 93 | 94 | # with open(args.json_file, "r") as json_f: 95 | # gt_results_dict = json.load(json_f) 96 | 97 | 98 | with open(args.json_file, "r") as json_f: 99 | pd_results_list = json.load(json_f) 100 | 101 | pd_poses = [] # predicted pose collection 102 | gt_poses = [] # ground-truth pose collection 103 | pd_poses_frontal = [] # predicted pose collection of frontal face 104 | gt_poses_frontal = [] # ground-truth pose collection of frontal face 105 | taking_time_list = [] # how many ms per face 106 | 107 | for ind, pd_results in enumerate(tqdm(pd_results_list)): 108 | if args.debug and ind > 50: break # for testing 109 | 110 | img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg") 111 | img_ori = cv2.imread(img_path) 112 | 113 | # bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1] 114 | bbox = pd_results['gt_bbox'] 115 | bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] 116 | 117 | gt_pitch = pd_results['gt_pitch'] 118 | gt_yaw = pd_results['gt_yaw'] 119 | gt_roll = pd_results['gt_roll'] 120 | 121 | t1 = time.time() 122 | [x1, y1, x2, y2] = [int(i) for i in bbox] 123 | face_roi = img_ori[y1:y2+1,x1:x2+1] 124 | 125 | cropped_img_path = "./temp_cropped_img.jpg" 126 | cv2.imwrite(cropped_img_path, face_roi) 127 | image = read_rgb_image(cropped_img_path) 128 | predictions = predictor_dad3dnet(image) 129 | params_3dmm = predictions["3dmm_params"].float() 130 | flame_params = FlameParams.from_3dmm(params_3dmm, FLAME_CONSTS) 131 | rpy = calculate_rpy(flame_params) 132 | yaw, pitch, roll = rpy.yaw, rpy.pitch, rpy.roll 133 | 134 | t2 = time.time() 135 | taking_time_list.append(t2-t1) 136 | 137 | pd_poses.append([pitch, yaw, roll]) 138 | gt_poses.append([gt_pitch, gt_yaw, gt_roll]) 139 | 140 | if abs(gt_yaw) < 90: 141 | pd_poses_frontal.append([pitch, yaw, roll]) 142 | gt_poses_frontal.append([gt_pitch, gt_yaw, gt_roll]) 143 | 144 | if args.debug: 145 | save_img_path = "./tmp/"+str(ind).zfill(2)+"#"+str(id).zfill(2)+\ 146 | "_p"+str(round(gt_pitch, 2))+"#"+str(np.round(pitch, 2))+\ 147 | "_y"+str(round(gt_yaw, 2))+"#"+str(np.round(yaw, 2))+\ 148 | "_r"+str(round(gt_roll, 2))+"#"+str(np.round(roll, 2))+".jpg" 149 | 150 | img_ori_copy = cv2.rectangle(img_ori.copy(), (int(bbox[0]), int(bbox[1])), 151 | (int(bbox[2]), int(bbox[3])), (255,255,255), 2) 152 | img_ori_copy = draw_axis(img_ori_copy, yaw, pitch, roll, 153 | tdx=(bbox[0]+bbox[2])/2, tdy=(bbox[1]+bbox[3])/2, size=100) 154 | cv2.imwrite(save_img_path, img_ori_copy) 155 | 156 | ind += 1 157 | os.remove(cropped_img_path) 158 | 159 | 160 | '''print all results''' 161 | print("Inference one image taking time:", sum(taking_time_list[1:])/len(taking_time_list[1:])) 162 | 163 | error_list_frontal = np.abs(np.array(pd_poses_frontal) - np.array(gt_poses_frontal)) 164 | error_list_frontal = np.min((error_list_frontal, 360 - error_list_frontal), axis=0) 165 | pose_matrix_frontal = np.mean(error_list_frontal, axis=0) 166 | MAE_frontal = np.mean(pose_matrix_frontal) 167 | print("frontal face number: %d; MAE_frontal: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%( 168 | len(error_list_frontal), round(MAE_frontal, 4), round(pose_matrix_frontal[0], 4), 169 | round(pose_matrix_frontal[1], 4), round(pose_matrix_frontal[2], 4))) 170 | 171 | error_list = np.abs(np.array(gt_poses) - np.array(pd_poses)) 172 | error_list = np.min((error_list, 360 - error_list), axis=0) 173 | pose_matrix = np.mean(error_list, axis=0) 174 | MAE = np.mean(pose_matrix) 175 | print("face number: %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(len(taking_time_list), 176 | round(MAE, 4), round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4))) 177 | 178 | 179 | if __name__ == '__main__': 180 | parser = argparse.ArgumentParser(description='FAN inference pipeline') 181 | 182 | parser.add_argument('--root-imgdir', default='', 183 | help='root path to multiple images') 184 | parser.add_argument('--json-file', default='', 185 | help='json file path that contains multiple images and their head bboxes') 186 | parser.add_argument('--debug', action='store_true', help='whether set into debug mode') 187 | 188 | args = parser.parse_args() 189 | main(args) -------------------------------------------------------------------------------- /exps/compare_img2pose.py: -------------------------------------------------------------------------------- 1 | 2 | '''Too slow inference speed''' 3 | 4 | __author__ = 'Huayi Zhou' 5 | 6 | ''' 7 | 8 | git clone https://github.com/vitoralbiero/img2pose ./img2pose 9 | 10 | Put this file under the main folder of codes project img2pose 11 | 12 | usage: 13 | python compare_img2pose.py --root-imgdir /path/to/root/imgdir \ 14 | --json-file /path/to/prepared/json/file 15 | 16 | 17 | e.g.: 18 | python compare_img2pose.py --root-imgdir /datasdc/zhouhuayi/dataset/AGORA/HPE/images/validation --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/agora_m_1280_e300_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug 19 | [results] 20 | Inference one image taking time: 0.019194672508227584 21 | face number: 3138; MAE: 19.9507, [pitch_error, yaw_error, roll_error]: 22.1878, 17.2238, 20.4407 22 | 23 | 24 | python compare_img2pose.py --root-imgdir /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/images/val --json-file /datasdc/zhouhuayi/face_related/DirectMHP/runs/DirectMHP/cmu_m_1280_e200_t40_lw010/weights/val_best_c0.001_i0.65_pd_frontal.json --debug 25 | [results] 26 | Inference one image taking time: 0.019501390085946375 27 | face number: 15724; MAE: 15.0667, [pitch_error, yaw_error, roll_error]: 16.6038, 13.0171, 15.5792 28 | 29 | ''' 30 | 31 | import os 32 | import argparse 33 | import time 34 | import json 35 | import cv2 36 | 37 | import numpy as np 38 | from tqdm import tqdm 39 | from pathlib import Path 40 | from math import cos, sin, pi 41 | 42 | 43 | import torch 44 | from torchvision import transforms 45 | from PIL import Image 46 | from scipy.spatial.transform import Rotation 47 | from img2pose import img2poseModel 48 | from model_loader import load_model 49 | 50 | 51 | np.set_printoptions(suppress=True) 52 | 53 | os.environ["CUDA_VISIBLE_DEVICES"] = '3' 54 | 55 | root_path = str(Path(__file__).absolute().parent.parent) 56 | 57 | 58 | def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size = 100): 59 | # Referenced from HopeNet https://github.com/natanielruiz/deep-head-pose 60 | pitch = pitch * np.pi / 180 61 | yaw = -(yaw * np.pi / 180) 62 | roll = roll * np.pi / 180 63 | 64 | if tdx != None and tdy != None: 65 | tdx = tdx 66 | tdy = tdy 67 | else: 68 | height, width = img.shape[:2] 69 | tdx = width / 2 70 | tdy = height / 2 71 | 72 | # X-Axis pointing to right. drawn in red 73 | x1 = size * (cos(yaw) * cos(roll)) + tdx 74 | y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy 75 | 76 | # Y-Axis | drawn in green 77 | # v 78 | x2 = size * (-cos(yaw) * sin(roll)) + tdx 79 | y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy 80 | 81 | # Z-Axis (out of the screen) drawn in blue 82 | x3 = size * (sin(yaw)) + tdx 83 | y3 = size * (-cos(yaw) * sin(pitch)) + tdy 84 | 85 | cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),2) 86 | cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),2) 87 | cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2) 88 | return img 89 | 90 | 91 | def convert_to_aflw(rotvec, is_rotvec=True): 92 | if is_rotvec: 93 | rotvec = Rotation.from_rotvec(rotvec).as_matrix() 94 | rot_mat_2 = np.transpose(rotvec) 95 | angle = Rotation.from_matrix(rot_mat_2).as_euler('xyz', degrees=True) 96 | 97 | return np.array([angle[0], -angle[1], -angle[2]]) # Pitch, Yaw, Roll 98 | 99 | 100 | def main(args): 101 | 102 | transform = transforms.Compose([transforms.ToTensor()]) 103 | 104 | DEPTH = 18 105 | MAX_SIZE = 1400 106 | MIN_SIZE = 400 107 | 108 | POSE_MEAN = "./models/WIDER_train_pose_mean_v1.npy" 109 | POSE_STDDEV = "./models/WIDER_train_pose_stddev_v1.npy" 110 | # MODEL_PATH = "./models/img2pose_v1_ft_300w_lp.pth" 111 | MODEL_PATH = "./models/img2pose_v1.pth" # 161 MB 112 | 113 | threed_points = np.load('./pose_references/reference_3d_68_points_trans.npy') 114 | 115 | pose_mean = np.load(POSE_MEAN) 116 | pose_stddev = np.load(POSE_STDDEV) 117 | 118 | img2pose_model = img2poseModel( 119 | DEPTH, MIN_SIZE, MAX_SIZE, 120 | pose_mean=pose_mean, pose_stddev=pose_stddev, 121 | threed_68_points=threed_points, 122 | rpn_pre_nms_top_n_test=500, 123 | rpn_post_nms_top_n_test=10, 124 | ) 125 | load_model(img2pose_model.fpn_model, MODEL_PATH, cpu_mode=str(img2pose_model.device) == "cpu", model_only=True) 126 | img2pose_model.evaluate() 127 | 128 | 129 | total_failures = 0 130 | 131 | 132 | with open(args.json_file, "r") as json_f: 133 | pd_results_list = json.load(json_f) 134 | 135 | pd_poses = [] # predicted pose collection 136 | gt_poses = [] # ground-truth pose collection 137 | taking_time_list = [] # how many ms per face 138 | 139 | for ind, pd_results in enumerate(tqdm(pd_results_list)): 140 | if args.debug and ind > 50: break # for testing 141 | 142 | img_path = os.path.join(args.root_imgdir, str(pd_results["image_id"])+".jpg") 143 | img_ori = cv2.imread(img_path) 144 | 145 | bbox = pd_results['bbox'] # bbox default format is [x0,y0,w,h], should be converted to [x0,y0,x1,y1] 146 | bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] 147 | 148 | gt_pitch = pd_results['gt_pitch'] 149 | gt_yaw = pd_results['gt_yaw'] 150 | gt_roll = pd_results['gt_roll'] 151 | 152 | 153 | t1 = time.time() 154 | [x1, y1, x2, y2] = [int(i) for i in bbox] 155 | face_roi = img_ori[y1:y2+1,x1:x2+1] 156 | 157 | # preprocess headpose model input 158 | face_roi = Image.fromarray(cv2.cvtColor(face_roi, cv2.COLOR_BGR2RGB)) # opencv --> PIL 159 | 160 | # get headpose 161 | res = img2pose_model.predict([transform(face_roi)]) 162 | 163 | res = res[0] 164 | bboxes = res["boxes"].cpu().numpy().astype('float') 165 | 166 | if len(bboxes) == 0: 167 | total_failures += 1 168 | continue 169 | 170 | max_score = 0 171 | best_index = -1 172 | for i in range(len(bboxes)): 173 | score = res["scores"][i] 174 | if score > max_score: 175 | max_score = score 176 | best_index = i 177 | 178 | pose_pred = res["dofs"].cpu().numpy()[best_index].astype('float') 179 | pose_pred = np.asarray(pose_pred.squeeze()) 180 | pose_pred[:3] = convert_to_aflw(pose_pred[:3]) 181 | 182 | [pitch, yaw, roll] = pose_pred[:3] 183 | 184 | t2 = time.time() 185 | taking_time_list.append(t2-t1) 186 | 187 | pd_poses.append([pitch, yaw, roll]) 188 | gt_poses.append([gt_pitch, gt_yaw, gt_roll]) 189 | 190 | if args.debug: 191 | save_img_path = "./tmp/"+str(ind).zfill(0)+\ 192 | "_p"+str(round(gt_pitch, 2))+"v"+str(np.round(pitch, 2))+\ 193 | "_y"+str(round(gt_yaw, 2))+"v"+str(np.round(yaw, 2))+\ 194 | "_r"+str(round(gt_roll, 2))+"v"+str(np.round(roll, 2))+".jpg" 195 | 196 | cv2.rectangle(img_ori, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,255,255), 2) 197 | img_ori = draw_axis(img_ori, yaw, pitch, roll, 198 | tdx=(bbox[0]+bbox[2])/2, tdy=(bbox[1]+bbox[3])/2, size=100) 199 | cv2.imwrite(save_img_path, img_ori) 200 | 201 | '''print all results''' 202 | print("Inference one image taking time:", sum(taking_time_list[1:])/len(taking_time_list[1:])) 203 | 204 | error_list = np.abs(np.array(gt_poses) - np.array(pd_poses)) 205 | error_list = np.min((error_list, 360 - error_list), axis=0) 206 | pose_matrix = np.mean(error_list, axis=0) 207 | MAE = np.mean(pose_matrix) 208 | print("face number: %d; MAE: %s, [pitch_error, yaw_error, roll_error]: %s, %s, %s"%(len(taking_time_list), 209 | round(MAE, 4), round(pose_matrix[0], 4), round(pose_matrix[1], 4), round(pose_matrix[2], 4))) 210 | 211 | 212 | if __name__ == '__main__': 213 | parser = argparse.ArgumentParser(description='FAN inference pipeline') 214 | 215 | parser.add_argument('--root-imgdir', default='', 216 | help='root path to multiple images') 217 | parser.add_argument('--json-file', default='', 218 | help='json file path that contains multiple images and their head bboxes') 219 | parser.add_argument('--debug', action='store_true', help='whether set into debug mode') 220 | 221 | args = parser.parse_args() 222 | main(args) -------------------------------------------------------------------------------- /exps/convert_coco_style_300wlp_aflw2000.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import copy 4 | import shutil 5 | from tqdm import tqdm 6 | 7 | coco_dict_template = { 8 | 'info': { 9 | 'description': 'Face landmarks, Euler angles and 3D Cubes of 300W_LP & AFLW2000 Dataset', 10 | 'url': 'http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/main.htm', 11 | 'version': '1.0', 12 | 'year': 2022, 13 | 'contributor': 'Huayi Zhou', 14 | 'date_created': '2022/07/28', 15 | }, 16 | 'licences': [{ 17 | 'url': 'http://creativecommons.org/licenses/by-nc/2.0', 18 | 'name': 'Attribution-NonCommercial License' 19 | }], 20 | 'images': [], 21 | 'annotations': [], 22 | 'categories': [{ 23 | 'supercategory': 'person', 24 | 'id': 1, 25 | 'name': 'person' 26 | }] 27 | } 28 | 29 | def convert_to_coco_style(source_img, target_img, source_json, target_json, coco_dict): 30 | print(source_img, " --> ", target_img) 31 | print(source_json, " --> ", target_json) 32 | 33 | if os.path.exists(target_img): 34 | shutil.rmtree(target_img) 35 | os.mkdir(target_img) 36 | 37 | json_img_dict = json.load(open(source_json, "r")) 38 | index_id = 0 39 | for img_name in tqdm(json_img_dict.keys()): 40 | labels = json_img_dict[img_name] 41 | 42 | image_id = 1000000 + index_id # 300W_LP has about 122217 images 43 | temp_image = {'file_name': str(image_id)+".jpg", 44 | 'height': labels['height'], 'width': labels['width'], 'id': image_id} 45 | 46 | source_img_path = os.path.join(source_img, img_name) 47 | target_img_path = os.path.join(target_img, str(image_id)+".jpg") 48 | shutil.copy(source_img_path, target_img_path) 49 | 50 | # bbox: [xmin, ymin, xmax, ymax] --> [xmin, ymin, w, h] 51 | [xmin, ymin, xmax, ymax] = labels["bbox"] 52 | labels["bbox"] = [xmin, ymin, xmax-xmin, ymax-ymin] 53 | 54 | # pose: [yaw, pitch, roll] --> [pitch, yaw, roll] 55 | [yaw, pitch, roll] = labels["pose"] 56 | labels["pose"] = [pitch, yaw, roll] 57 | 58 | labels_new = { 59 | 'face2d_pts': labels["landmarks"], 60 | 'bbox': labels["bbox"], 61 | 'euler_angles': labels["pose"], 62 | 'cube': labels["cube"], 63 | 'image_id': image_id, 64 | 'id': image_id, # only one head in each image 65 | 'category_id': 1, 66 | 'iscrowd': 0, 67 | 'segmentation': [], # This script is not for segmentation 68 | 'area': round(labels["bbox"][-1] * labels["bbox"][-2], 4) 69 | } 70 | coco_dict['images'].append(temp_image) 71 | coco_dict['annotations'].append(labels_new) 72 | 73 | index_id += 1 74 | 75 | with open(target_json, "w") as dst_ann_file: 76 | json.dump(coco_dict, dst_ann_file) 77 | 78 | 79 | if __name__ == '__main__': 80 | 81 | train_image_file = "./HeadCube3D/images/300W_LP/" 82 | train_image_file_coco = "./HeadCube3D/images/train/" 83 | train_json_file = "./HeadCube3D/annotations/train_300W_LP.json" 84 | train_json_file_coco = "./HeadCube3D/annotations/train_300W_LP_coco_style.json" 85 | coco_dict_train = copy.deepcopy(coco_dict_template) 86 | convert_to_coco_style(train_image_file, train_image_file_coco, 87 | train_json_file, train_json_file_coco, coco_dict_train) 88 | 89 | 90 | val_image_file = "./HeadCube3D/images/AFLW2000/" 91 | val_image_file_coco = "./HeadCube3D/images/validation/" 92 | val_json_file = "./HeadCube3D/annotations/val_AFLW2000.json" 93 | val_json_file_coco = "./HeadCube3D/annotations/val_AFLW2000_coco_style.json" 94 | coco_dict_val = copy.deepcopy(coco_dict_template) 95 | convert_to_coco_style(val_image_file, val_image_file_coco, 96 | val_json_file, val_json_file_coco, coco_dict_val) 97 | -------------------------------------------------------------------------------- /exps/convert_coco_style_300wlp_biwi.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import copy 4 | import shutil 5 | from tqdm import tqdm 6 | 7 | coco_dict_template = { 8 | 'info': { 9 | 'description': 'Face landmarks, Euler angles and 3D Cubes of 300W_LP & AFLW2000 Dataset', 10 | 'url': 'http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/main.htm', 11 | 'version': '1.0', 12 | 'year': 2022, 13 | 'contributor': 'Huayi Zhou', 14 | 'date_created': '2022/07/28', 15 | }, 16 | 'licences': [{ 17 | 'url': 'http://creativecommons.org/licenses/by-nc/2.0', 18 | 'name': 'Attribution-NonCommercial License' 19 | }], 20 | 'images': [], 21 | 'annotations': [], 22 | 'categories': [{ 23 | 'supercategory': 'person', 24 | 'id': 1, 25 | 'name': 'person' 26 | }] 27 | } 28 | 29 | def convert_to_coco_style(source_img, target_img, source_json, target_json, coco_dict): 30 | print(source_img, " --> ", target_img) 31 | print(source_json, " --> ", target_json) 32 | 33 | if os.path.exists(target_img): 34 | shutil.rmtree(target_img) 35 | os.mkdir(target_img) 36 | 37 | json_img_dict = json.load(open(source_json, "r")) 38 | index_id = 0 39 | for img_name in tqdm(json_img_dict.keys()): 40 | labels = json_img_dict[img_name] 41 | 42 | image_id = 1000000 + index_id # 300W_LP has about 122217 images 43 | temp_image = {'file_name': str(image_id)+".jpg", 44 | 'height': labels['height'], 'width': labels['width'], 'id': image_id} 45 | 46 | source_img_path = os.path.join(source_img, img_name) 47 | target_img_path = os.path.join(target_img, str(image_id)+".jpg") 48 | # shutil.copy(source_img_path, target_img_path) 49 | os.system('ln -s %s %s'%(source_img_path, target_img_path)) 50 | 51 | # bbox: [xmin, ymin, xmax, ymax] --> [xmin, ymin, w, h] 52 | [xmin, ymin, xmax, ymax] = labels["bbox"] 53 | labels["bbox"] = [xmin, ymin, xmax-xmin, ymax-ymin] 54 | 55 | # pose: [yaw, pitch, roll] --> [pitch, yaw, roll] 56 | [yaw, pitch, roll] = labels["pose"] 57 | labels["pose"] = [pitch, yaw, roll] 58 | 59 | labels_new = { 60 | 'face2d_pts': labels["landmarks"], 61 | 'bbox': labels["bbox"], 62 | 'euler_angles': labels["pose"], 63 | 'cube': labels["cube"], 64 | 'image_id': image_id, 65 | 'id': image_id, # only one head in each image 66 | 'category_id': 1, 67 | 'iscrowd': 0, 68 | 'segmentation': [], # This script is not for segmentation 69 | 'area': round(labels["bbox"][-1] * labels["bbox"][-2], 4) 70 | } 71 | coco_dict['images'].append(temp_image) 72 | coco_dict['annotations'].append(labels_new) 73 | 74 | index_id += 1 75 | 76 | with open(target_json, "w") as dst_ann_file: 77 | json.dump(coco_dict, dst_ann_file) 78 | 79 | 80 | if __name__ == '__main__': 81 | 82 | # train_image_file = "/datasdc/zhouhuayi/dataset/headpose/HeadCube3D/images/300W_LP/" 83 | # train_image_file_coco = "./HeadCube3D/images/train/" 84 | # if os.path.exists(train_image_file_coco): 85 | # shutil.rmtree(train_image_file_coco) 86 | # os.mkdir(train_image_file_coco) 87 | # train_json_file = "./HeadCube3D/annotations/train_300W_LP.json" 88 | # train_json_file_coco = "./HeadCube3D/annotations/train_300W_LP_coco_style.json" 89 | # coco_dict_train = copy.deepcopy(coco_dict_template) 90 | # convert_to_coco_style(train_image_file, train_image_file_coco, 91 | # train_json_file, train_json_file_coco, coco_dict_train) 92 | 93 | 94 | val_image_file = "/datasdc/zhouhuayi/dataset/headpose/HeadCube3D/images/BIWI_test/" 95 | val_image_file_coco = "./HeadCube3D/images/validation/" 96 | if os.path.exists(val_image_file_coco): 97 | shutil.rmtree(val_image_file_coco) 98 | os.mkdir(val_image_file_coco) 99 | val_json_file = "./HeadCube3D/annotations/BIWI_test.json" 100 | val_json_file_coco = "./HeadCube3D/annotations/BIWI_test_coco_style.json" 101 | coco_dict_val = copy.deepcopy(coco_dict_template) 102 | convert_to_coco_style(val_image_file, val_image_file_coco, 103 | val_json_file, val_json_file_coco, coco_dict_val) 104 | -------------------------------------------------------------------------------- /exps/gen_dataset_full_AGORA_CMU.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import cv2 4 | import json 5 | import argparse 6 | import numpy as np 7 | 8 | from tqdm import tqdm 9 | 10 | def get_args(): 11 | parser = argparse.ArgumentParser(description="This script cleans-up noisy labels " 12 | "and creates database for training.", 13 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 14 | parser.add_argument("--db", type=str, default='./AGORA', 15 | help="path to database") 16 | parser.add_argument("--data_type", type=str, default='train', 17 | help="data type, train or val") 18 | parser.add_argument("--img_size", type=int, default=256, 19 | help="output image size") 20 | parser.add_argument("--plot", type=bool, default=False, 21 | help="plot image flag") 22 | 23 | parser.add_argument('--root_dir = ', 24 | dest='root_dir', 25 | help='root directory of the datasets files', 26 | default='./datasets/AGORA/', 27 | type=str) 28 | parser.add_argument('--filename', 29 | dest='filename', 30 | help='Output filename.', 31 | default='files_train.txt', 32 | type=str) 33 | 34 | args = parser.parse_args() 35 | return args 36 | 37 | def sort_labels_by_image_id(labels_list): 38 | images_labels_dict = {} 39 | for i, labels_dict in enumerate(labels_list): 40 | image_id = str(labels_dict['image_id']) 41 | if image_id in images_labels_dict.keys(): 42 | images_labels_dict[image_id].append(labels_dict) 43 | else: 44 | images_labels_dict[image_id] = [labels_dict] 45 | return images_labels_dict 46 | 47 | def main(): 48 | 49 | args = get_args() 50 | mypath = args.db 51 | data_type = args.data_type 52 | img_size = args.img_size 53 | isPlot = args.plot 54 | 55 | output_path = args.root_dir 56 | filename = args.filename 57 | 58 | if not os.path.exists(output_path): 59 | os.mkdir(output_path) 60 | 61 | if "train" == data_type: 62 | if "AGORA" in mypath: 63 | img_path = os.path.join(mypath, "images", "train") 64 | json_path = os.path.join(mypath, "annotations", "coco_style_train_v2.json") 65 | if "CMU" in mypath: 66 | img_path = os.path.join(mypath, "images", "train") 67 | json_path = os.path.join(mypath, "annotations", "coco_style_sampled_train_v2.json") 68 | if "val" == data_type: 69 | if "AGORA" in mypath: 70 | img_path = os.path.join(mypath, "images", "validation") 71 | json_path = os.path.join(mypath, "annotations", "coco_style_validation_v2.json") 72 | if "CMU" in mypath: 73 | img_path = os.path.join(mypath, "images", "val") 74 | json_path = os.path.join(mypath, "annotations", "coco_style_sampled_val_v2.json") 75 | 76 | save_img_path = os.path.join(output_path, data_type) 77 | save_filename = os.path.join(output_path, filename) 78 | 79 | if os.path.exists(save_img_path): 80 | shutil.rmtree(save_img_path) 81 | os.mkdir(save_img_path) 82 | 83 | anno_json_dict = json.load(open(json_path, "r")) 84 | imgs_dict_list = anno_json_dict["images"] 85 | imgs_labels_dict = sort_labels_by_image_id(anno_json_dict["annotations"]) 86 | 87 | print("Json file: %s\n[images number]: %d\n[head instances number]: %d"%( 88 | json_path, len(imgs_dict_list), len(anno_json_dict["annotations"]) )) 89 | 90 | out_imgs = [] 91 | out_poses = [] 92 | 93 | outfile = open(save_filename, 'w') 94 | for i, imgs_dict in enumerate(tqdm(imgs_dict_list)): 95 | img_name = imgs_dict["file_name"] 96 | img_id = str(imgs_dict["id"]) 97 | 98 | img_ori = cv2.imread(os.path.join(img_path, img_name)) 99 | 100 | img_anno_list = imgs_labels_dict[img_id] 101 | for img_anno in img_anno_list: 102 | [x, y, w, h] = img_anno["bbox"] 103 | [pitch, yaw, roll] = img_anno["euler_angles"] 104 | instance_id = img_anno["id"] 105 | 106 | # if abs(yaw) < 90: # for FSA-Net, we only focus on the head with frontal face 107 | # img_crop = img_ori[int(y):int(y+h), int(x):int(x+w)] 108 | # img_crop = cv2.resize(img_crop, (img_size, img_size)) 109 | 110 | # out_imgs.append(img_crop) 111 | # out_poses.append(np.array([yaw, pitch, roll])) 112 | # else: 113 | # continue 114 | 115 | 116 | # for 6DRepNet with full-range design, we focus on all the labeled heads 117 | img_crop = img_ori[int(y):int(y+h), int(x):int(x+w)] 118 | img_crop = cv2.resize(img_crop, (img_size, img_size)) 119 | 120 | save_img_path_abs = os.path.join(save_img_path, str(instance_id)+".jpg") 121 | cv2.imwrite(save_img_path_abs, img_crop) 122 | 123 | outfile.write(str(instance_id)+".jpg" + " %.4f %.4f %.4f\n"%(pitch, yaw, roll)) 124 | 125 | 126 | if i < 2: 127 | if "AGORA" in mypath: 128 | cv2.imwrite("./tmp/"+str(instance_id)+"_agora.jpg", img_crop) 129 | if "CMU" in mypath: 130 | cv2.imwrite("./tmp/"+str(instance_id)+"_cmu.jpg", img_crop) 131 | 132 | # Checking the cropped image 133 | if isPlot: 134 | cv2.imshow('check', img_crop) 135 | k=cv2.waitKey(300) 136 | 137 | outfile.close() 138 | 139 | if __name__ == "__main__": 140 | main() 141 | 142 | ''' 143 | AGORA 144 | Json file: /datasdc/zhouhuayi/dataset/AGORA/HPE/annotations/coco_style_train_v2.json 145 | [images number]: 14408 146 | [head instances number]: 105046 147 | 148 | Json file: /datasdc/zhouhuayi/dataset/AGORA/HPE/annotations/coco_style_validation_v2.json 149 | [images number]: 1070 150 | [head instances number]: 7505 151 | 152 | 153 | CMU 154 | Json file: /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/annotations/coco_style_sampled_train_v2.json 155 | [images number]: 15718 156 | [head instances number]: 35725 157 | 158 | Json file: /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/annotations/coco_style_sampled_val_v2.json 159 | [images number]: 16216 160 | [head instances number]: 32738 161 | ''' -------------------------------------------------------------------------------- /exps/gen_dataset_single_AGORA.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import cv2 4 | import json 5 | import argparse 6 | import numpy as np 7 | 8 | from tqdm import tqdm 9 | 10 | def get_args(): 11 | parser = argparse.ArgumentParser(description="This script cleans-up noisy labels " 12 | "and creates database for training.", 13 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 14 | parser.add_argument("--db", type=str, default='./AGORA', 15 | help="path to database") 16 | parser.add_argument("--output", type=str, default='./AGORA.npz', 17 | help="path to output database mat file") 18 | parser.add_argument("--img_size", type=int, default=64, 19 | help="output image size") 20 | parser.add_argument("--plot", type=bool, default=False, 21 | help="plot image flag") 22 | 23 | 24 | args = parser.parse_args() 25 | return args 26 | 27 | def sort_labels_by_image_id(labels_list): 28 | images_labels_dict = {} 29 | for i, labels_dict in enumerate(labels_list): 30 | image_id = str(labels_dict['image_id']) 31 | if image_id in images_labels_dict.keys(): 32 | images_labels_dict[image_id].append(labels_dict) 33 | else: 34 | images_labels_dict[image_id] = [labels_dict] 35 | return images_labels_dict 36 | 37 | def main(): 38 | 39 | args = get_args() 40 | mypath = args.db 41 | output_path = args.output 42 | img_size = args.img_size 43 | isPlot = args.plot 44 | 45 | if "train" in output_path: 46 | img_path = os.path.join(mypath, "images", "train") 47 | json_path = os.path.join(mypath, "annotations", "coco_style_train_v2.json") 48 | if "val" in output_path: 49 | img_path = os.path.join(mypath, "images", "validation") 50 | json_path = os.path.join(mypath, "annotations", "coco_style_validation_v2.json") 51 | 52 | 53 | anno_json_dict = json.load(open(json_path, "r")) 54 | imgs_dict_list = anno_json_dict["images"] 55 | imgs_labels_dict = sort_labels_by_image_id(anno_json_dict["annotations"]) 56 | 57 | print("Json file: %s\n[images number]: %d\n[head instances number]: %d"%( 58 | json_path, len(imgs_dict_list), len(anno_json_dict["annotations"]) )) 59 | 60 | out_imgs = [] 61 | out_poses = [] 62 | for i, imgs_dict in enumerate(tqdm(imgs_dict_list)): 63 | img_name = imgs_dict["file_name"] 64 | img_id = str(imgs_dict["id"]) 65 | 66 | img_ori = cv2.imread(os.path.join(img_path, img_name)) 67 | 68 | img_anno_list = imgs_labels_dict[img_id] 69 | for img_anno in img_anno_list: 70 | [x, y, w, h] = img_anno["bbox"] 71 | [pitch, yaw, roll] = img_anno["euler_angles"] 72 | instance_id = img_anno["id"] 73 | 74 | if abs(yaw) < 90: # for FSA-Net, we only focus on the head with frontal face 75 | img_crop = img_ori[int(y):int(y+h), int(x):int(x+w)] 76 | img_crop = cv2.resize(img_crop, (img_size, img_size)) 77 | 78 | out_imgs.append(img_crop) 79 | out_poses.append(np.array([yaw, pitch, roll])) 80 | else: 81 | continue 82 | 83 | if i < 2: 84 | cv2.imwrite("./tmp/"+str(instance_id)+"_agora.jpg", img_crop) 85 | 86 | # Checking the cropped image 87 | if isPlot: 88 | cv2.imshow('check', img_crop) 89 | k=cv2.waitKey(300) 90 | 91 | print("[left head instances]: %d"%(len(out_imgs) )) 92 | 93 | np.savez(output_path, image=np.array(out_imgs), pose=np.array(out_poses), img_size=img_size) 94 | 95 | if __name__ == "__main__": 96 | main() 97 | 98 | ''' 99 | Json file: /datasdc/zhouhuayi/dataset/AGORA/HPE/annotations/coco_style_train_v2.json 100 | [images number]: 14408 101 | [head instances number]: 105046 102 | [left head instances]: 52639 103 | 104 | Json file: /datasdc/zhouhuayi/dataset/AGORA/HPE/annotations/coco_style_validation_v2.json 105 | [images number]: 1070 106 | [head instances number]: 7505 107 | [left head instances]: 3781 108 | ''' -------------------------------------------------------------------------------- /exps/gen_dataset_single_CMU.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import cv2 4 | import json 5 | import argparse 6 | import numpy as np 7 | 8 | from tqdm import tqdm 9 | 10 | def get_args(): 11 | parser = argparse.ArgumentParser(description="This script cleans-up noisy labels " 12 | "and creates database for training.", 13 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 14 | parser.add_argument("--db", type=str, default='./CMU', 15 | help="path to database") 16 | parser.add_argument("--output", type=str, default='./CMU.npz', 17 | help="path to output database mat file") 18 | parser.add_argument("--img_size", type=int, default=64, 19 | help="output image size") 20 | parser.add_argument("--plot", type=bool, default=False, 21 | help="plot image flag") 22 | 23 | 24 | args = parser.parse_args() 25 | return args 26 | 27 | def sort_labels_by_image_id(labels_list): 28 | images_labels_dict = {} 29 | for i, labels_dict in enumerate(labels_list): 30 | image_id = str(labels_dict['image_id']) 31 | if image_id in images_labels_dict.keys(): 32 | images_labels_dict[image_id].append(labels_dict) 33 | else: 34 | images_labels_dict[image_id] = [labels_dict] 35 | return images_labels_dict 36 | 37 | def main(): 38 | 39 | args = get_args() 40 | mypath = args.db 41 | output_path = args.output 42 | img_size = args.img_size 43 | isPlot = args.plot 44 | 45 | if "train" in output_path: 46 | img_path = os.path.join(mypath, "images", "train") 47 | json_path = os.path.join(mypath, "annotations", "coco_style_sampled_train_v2.json") 48 | if "val" in output_path: 49 | img_path = os.path.join(mypath, "images", "val") 50 | json_path = os.path.join(mypath, "annotations", "coco_style_sampled_val_v2.json") 51 | 52 | 53 | anno_json_dict = json.load(open(json_path, "r")) 54 | imgs_dict_list = anno_json_dict["images"] 55 | imgs_labels_dict = sort_labels_by_image_id(anno_json_dict["annotations"]) 56 | 57 | print("Json file: %s\n[images number]: %d\n[head instances number]: %d"%( 58 | json_path, len(imgs_dict_list), len(anno_json_dict["annotations"]) )) 59 | 60 | out_imgs = [] 61 | out_poses = [] 62 | for i, imgs_dict in enumerate(tqdm(imgs_dict_list)): 63 | img_name = imgs_dict["file_name"] 64 | img_id = str(imgs_dict["id"]) 65 | 66 | img_ori = cv2.imread(os.path.join(img_path, img_name)) 67 | 68 | img_anno_list = imgs_labels_dict[img_id] 69 | for img_anno in img_anno_list: 70 | [x, y, w, h] = img_anno["bbox"] 71 | [pitch, yaw, roll] = img_anno["euler_angles"] 72 | instance_id = img_anno["id"] 73 | 74 | if abs(yaw) < 90: # for FSA-Net, we only focus on the head with frontal face 75 | img_crop = img_ori[int(y):int(y+h), int(x):int(x+w)] 76 | img_crop = cv2.resize(img_crop, (img_size, img_size)) 77 | 78 | out_imgs.append(img_crop) 79 | out_poses.append(np.array([yaw, pitch, roll])) 80 | else: 81 | continue 82 | 83 | if i < 2: 84 | cv2.imwrite("./tmp/"+str(instance_id)+"_cmu.jpg", img_crop) 85 | 86 | # Checking the cropped image 87 | if isPlot: 88 | cv2.imshow('check', img_crop) 89 | k=cv2.waitKey(300) 90 | 91 | print("[left head instances]: %d"%(len(out_imgs) )) 92 | 93 | np.savez(output_path, image=np.array(out_imgs), pose=np.array(out_poses), img_size=img_size) 94 | 95 | if __name__ == "__main__": 96 | main() 97 | 98 | ''' 99 | Json file: /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/annotations/coco_style_sampled_train_v2.json 100 | [images number]: 15718 101 | [head instances number]: 35725 102 | [left head instances]: 18447 103 | 104 | Json file: /datasdc/zhouhuayi/dataset/CMUPanopticDataset/HPE/annotations/coco_style_sampled_val_v2.json 105 | [images number]: 16216 106 | [head instances number]: 32738 107 | [left head instances]: 16497 108 | ''' -------------------------------------------------------------------------------- /exps/sixdrepnet.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/exps/sixdrepnet.zip -------------------------------------------------------------------------------- /exps/statistic_angles.py: -------------------------------------------------------------------------------- 1 | import scipy.io as sio 2 | from os import listdir 3 | from os.path import isfile, join 4 | from tqdm import tqdm 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | 8 | def main_300W_LP(): 9 | db_name = "300W_LP" 10 | db_paths = [ 11 | "../300W_LP/AFW", "../300W_LP/AFW_Flip", 12 | "../300W_LP/HELEN", "../300W_LP/HELEN_Flip", 13 | "../300W_LP/IBUG", "../300W_LP/IBUG_Flip", 14 | "../300W_LP/LFPW", "../300W_LP/LFPW_Flip", 15 | "../300W_LP/AFW", "../300W_LP/AFW_Flip", 16 | "../AFLW2000"] # 300W_LP & AFLW2000 17 | 18 | total_num = 0 19 | euler_angles_stat = [[],[],[]] # pitch, yaw, roll 20 | 21 | for db_path in db_paths: 22 | onlyfiles_mat = [] 23 | for f in listdir(db_path): 24 | if isfile(join(db_path, f)) and join(db_path, f).endswith('.mat'): 25 | onlyfiles_mat.append(f) 26 | onlyfiles_mat.sort() 27 | print(db_path, "\t", len(onlyfiles_mat)) 28 | 29 | for i in tqdm(range(len(onlyfiles_mat))): 30 | mat_name = onlyfiles_mat[i] 31 | mat_contents = sio.loadmat(db_path + '/' + mat_name) 32 | pose_para = mat_contents['Pose_Para'][0] 33 | pt2d = mat_contents['pt2d'] 34 | 35 | pitch = pose_para[0] * 180 / np.pi 36 | yaw = pose_para[1] * 180 / np.pi 37 | roll = pose_para[2] * 180 / np.pi 38 | 39 | if abs(pitch)>99 or abs(yaw)>99 or abs(roll)>99: 40 | continue 41 | 42 | euler_angles_stat[0].append(pitch) 43 | euler_angles_stat[1].append(yaw) 44 | euler_angles_stat[2].append(roll) 45 | total_num += 1 46 | 47 | print("total_num:\t", total_num) 48 | 49 | '''Euler Angels Stat''' 50 | plt.figure(figsize=(10, 5), dpi=100) 51 | plt.title("300W_LP and AFLW2000") 52 | interval = 10 # 10 or 15 is better 53 | bins = 200 // interval 54 | density = True # True or False, density=False would make counts 55 | colors = ['r', 'g', 'b'] 56 | labels = ["Pitch", "Yaw", "Roll"] 57 | plt.hist(euler_angles_stat, bins=bins, alpha=0.7, density=density, histtype='bar', label=labels, color=colors) 58 | plt.legend(prop ={'size': 10}) 59 | # plt.xlim(-90, 91) 60 | plt.xticks(range(-100,101,interval)) 61 | if density: plt.ylabel('Percentage') 62 | else: plt.ylabel('Counts') 63 | plt.xlabel('Degree') 64 | plt.show() 65 | 66 | 67 | def main_BIWI(): 68 | db_path_train = "./BIWI_train.npz" 69 | db_path_test = "./BIWI_test.npz" 70 | 71 | total_num = 0 72 | euler_angles_stat = [[],[],[]] # pitch, yaw, roll 73 | 74 | for db_path in [db_path_train, db_path_test]: 75 | db_dict = np.load(db_path) 76 | print(db_path, list(db_dict.keys())) 77 | 78 | for cont_labels in tqdm(db_dict['pose']): 79 | [yaw, pitch, roll] = cont_labels 80 | 81 | if abs(pitch)>90 or abs(yaw)>90 or abs(roll)>90: 82 | continue 83 | 84 | euler_angles_stat[0].append(pitch) 85 | euler_angles_stat[1].append(yaw) 86 | euler_angles_stat[2].append(roll) 87 | total_num += 1 88 | 89 | print("total_num:\t", total_num) 90 | 91 | '''Euler Angels Stat''' 92 | plt.figure(figsize=(10, 5), dpi=100) 93 | plt.title("BIWI") 94 | interval = 10 # 10 or 15 is better 95 | bins = 180 // interval 96 | density = True # True or False, density=False would make counts 97 | colors = ['r', 'g', 'b'] 98 | labels = ["Pitch", "Yaw", "Roll"] 99 | plt.hist(euler_angles_stat, bins=bins, alpha=0.7, density=density, histtype='bar', label=labels, color=colors) 100 | plt.legend(prop ={'size': 10}) 101 | # plt.xlim(-90, 91) 102 | plt.xticks(range(-90,91,interval)) 103 | if density: plt.ylabel('Percentage') 104 | else: plt.ylabel('Counts') 105 | plt.xlabel('Degree') 106 | plt.show() 107 | 108 | 109 | if __name__ == '__main__': 110 | '''https://github.com/shamangary/FSA-Net''' 111 | # main_300W_LP() # total_num 134793 112 | main_BIWI() # total_num 15678 -------------------------------------------------------------------------------- /materials/000000002685_vis3d_res.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000000002685_vis3d_res.jpg -------------------------------------------------------------------------------- /materials/000000018380_vis3d_res.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000000018380_vis3d_res.jpg -------------------------------------------------------------------------------- /materials/000000038829_vis3d_res.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000000038829_vis3d_res.jpg -------------------------------------------------------------------------------- /materials/000000081988_vis3d_res.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000000081988_vis3d_res.jpg -------------------------------------------------------------------------------- /materials/000000161925_vis3d_res.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000000161925_vis3d_res.jpg -------------------------------------------------------------------------------- /materials/000000183648_vis3d_res.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000000183648_vis3d_res.jpg -------------------------------------------------------------------------------- /materials/000002_mpiinew_test_DirectMHP_vis3d.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000002_mpiinew_test_DirectMHP_vis3d.gif -------------------------------------------------------------------------------- /materials/000003_mpiinew_test_DirectMHP_vis3d.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/000003_mpiinew_test_DirectMHP_vis3d.gif -------------------------------------------------------------------------------- /materials/datasetexamples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/datasetexamples.png -------------------------------------------------------------------------------- /materials/full_range.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/full_range.png -------------------------------------------------------------------------------- /materials/illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/materials/illustration.png -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Experimental modules 4 | """ 5 | 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | 10 | from models.common import Conv 11 | from utils.downloads import attempt_download 12 | 13 | 14 | class CrossConv(nn.Module): 15 | # Cross Convolution Downsample 16 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 17 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 18 | super().__init__() 19 | c_ = int(c2 * e) # hidden channels 20 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 21 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 22 | self.add = shortcut and c1 == c2 23 | 24 | def forward(self, x): 25 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 26 | 27 | 28 | class Sum(nn.Module): 29 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 30 | def __init__(self, n, weight=False): # n: number of inputs 31 | super().__init__() 32 | self.weight = weight # apply weights boolean 33 | self.iter = range(n - 1) # iter object 34 | if weight: 35 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights 36 | 37 | def forward(self, x): 38 | y = x[0] # no weight 39 | if self.weight: 40 | w = torch.sigmoid(self.w) * 2 41 | for i in self.iter: 42 | y = y + x[i + 1] * w[i] 43 | else: 44 | for i in self.iter: 45 | y = y + x[i + 1] 46 | return y 47 | 48 | 49 | class MixConv2d(nn.Module): 50 | # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595 51 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 52 | super().__init__() 53 | groups = len(k) 54 | if equal_ch: # equal c_ per group 55 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 56 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 57 | else: # equal weight.numel() per group 58 | b = [c2] + [0] * groups 59 | a = np.eye(groups + 1, groups, k=-1) 60 | a -= np.roll(a, 1, axis=1) 61 | a *= np.array(k) ** 2 62 | a[0] = 1 63 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 64 | 65 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 66 | self.bn = nn.BatchNorm2d(c2) 67 | self.act = nn.LeakyReLU(0.1, inplace=True) 68 | 69 | def forward(self, x): 70 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 71 | 72 | 73 | class Ensemble(nn.ModuleList): 74 | # Ensemble of models 75 | def __init__(self): 76 | super().__init__() 77 | 78 | def forward(self, x, augment=False, profile=False, visualize=False): 79 | y = [] 80 | for module in self: 81 | y.append(module(x, augment, profile, visualize)[0]) 82 | # y = torch.stack(y).max(0)[0] # max ensemble 83 | # y = torch.stack(y).mean(0) # mean ensemble 84 | y = torch.cat(y, 1) # nms ensemble 85 | return y, None # inference, train output 86 | 87 | 88 | def attempt_load(weights, map_location=None, inplace=True, fuse=True): 89 | from models.yolo import Detect, Model 90 | 91 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 92 | model = Ensemble() 93 | for w in weights if isinstance(weights, list) else [weights]: 94 | ckpt = torch.load(attempt_download(w), map_location=map_location) # load 95 | if fuse: 96 | model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model 97 | else: 98 | model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval()) # without layer fuse 99 | 100 | 101 | # Compatibility updates 102 | for m in model.modules(): 103 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]: 104 | m.inplace = inplace # pytorch 1.7.0 compatibility 105 | elif type(m) is Conv: 106 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 107 | 108 | if len(model) == 1: 109 | return model[-1] # return model 110 | else: 111 | print(f'Ensemble created with {weights}\n') 112 | for k in ['names']: 113 | setattr(model, k, getattr(model[-1], k)) 114 | model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride 115 | return model # return ensemble 116 | -------------------------------------------------------------------------------- /models/yolov5l6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 9, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 1, SPP, [1024, [3, 5, 7]]], 27 | [-1, 3, C3, [1024, False]], # 11 28 | ] 29 | 30 | # YOLOv5 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] -------------------------------------------------------------------------------- /models/yolov5m6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.67 # model depth multiple 6 | width_multiple: 0.75 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 9, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 1, SPP, [1024, [3, 5, 7]]], 27 | [-1, 3, C3, [1024, False]], # 11 28 | ] 29 | 30 | # YOLOv5 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] -------------------------------------------------------------------------------- /models/yolov5s6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 9, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 1, SPP, [1024, [3, 5, 7]]], 27 | [-1, 3, C3, [1024, False]], # 11 28 | ] 29 | 30 | # YOLOv5 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] -------------------------------------------------------------------------------- /pose_references/reference_3d_5_points_trans.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/pose_references/reference_3d_5_points_trans.npy -------------------------------------------------------------------------------- /pose_references/reference_3d_68_points_trans.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/pose_references/reference_3d_68_points_trans.npy -------------------------------------------------------------------------------- /pose_references/triangles.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/pose_references/triangles.npy -------------------------------------------------------------------------------- /pose_references/vertices_trans.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/pose_references/vertices_trans.npy -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple 2 | 3 | # pip3 install torch==1.10.0+cu111 torchvision==0.11.1+cu111 torchaudio==0.10.0+cu111 -f https://download.pytorch.org/whl/cu111/torch_stable.html 4 | # wget https://download.pytorch.org/whl/cu111/torch-1.10.0%2Bcu111-cp38-cp38-linux_x86_64.whl 5 | # wget https://download.pytorch.org/whl/cu111/torchvision-0.11.1%2Bcu111-cp38-cp38-linux_x86_64.whl 6 | # wget https://download.pytorch.org/whl/cu111/torchaudio-0.10.0%2Bcu111-cp38-cp38-linux_x86_64.whl 7 | 8 | # base ---------------------------------------- 9 | matplotlib>=3.2.2 10 | numpy>=1.18.5 11 | opencv-python>=4.1.2 12 | Pillow 13 | PyYAML>=5.3.1 14 | scipy>=1.4.1 15 | torch>=1.7.0 16 | torchvision>=0.8.1 17 | tqdm>=4.41.0 18 | 19 | # logging ------------------------------------- 20 | tensorboard>=2.4.1 21 | wandb 22 | 23 | # plotting ------------------------------------ 24 | seaborn>=0.11.0 25 | pandas 26 | 27 | # export -------------------------------------- 28 | # coremltools>=4.1 29 | # onnx>=1.9.0 30 | # scikit-learn==0.19.2 # for coreml quantization 31 | 32 | # extras -------------------------------------- 33 | # Cython # for pycocotools https://github.com/cocodataset/cocoapi/issues/172 34 | # pycocotools>=2.0 # COCO mAP 35 | thop # FLOPs computation -------------------------------------------------------------------------------- /test_imgs/AGORA/agora_val_2000400001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/AGORA/agora_val_2000400001.jpg -------------------------------------------------------------------------------- /test_imgs/AGORA/agora_val_2000400205.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/AGORA/agora_val_2000400205.jpg -------------------------------------------------------------------------------- /test_imgs/CMU/cmu_val_10400060013.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/CMU/cmu_val_10400060013.jpg -------------------------------------------------------------------------------- /test_imgs/CMU/cmu_val_10602142026.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/CMU/cmu_val_10602142026.jpg -------------------------------------------------------------------------------- /test_imgs/CMU/cmu_val_11500144012.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/CMU/cmu_val_11500144012.jpg -------------------------------------------------------------------------------- /test_imgs/COCO/000000002685.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/COCO/000000002685.jpg -------------------------------------------------------------------------------- /test_imgs/COCO/000000018380.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/COCO/000000018380.jpg -------------------------------------------------------------------------------- /test_imgs/COCO/000000038829.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/COCO/000000038829.jpg -------------------------------------------------------------------------------- /test_imgs/COCO/000000081988.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/COCO/000000081988.jpg -------------------------------------------------------------------------------- /test_imgs/COCO/000000161925.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/COCO/000000161925.jpg -------------------------------------------------------------------------------- /test_imgs/COCO/000000183648.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/test_imgs/COCO/000000183648.jpg -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Activation functions 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | # SiLU https://arxiv.org/pdf/1606.08415.pdf ---------------------------------------------------------------------------- 12 | class SiLU(nn.Module): # export-friendly version of nn.SiLU() 13 | @staticmethod 14 | def forward(x): 15 | return x * torch.sigmoid(x) 16 | 17 | 18 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 19 | @staticmethod 20 | def forward(x): 21 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 22 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 23 | 24 | 25 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 26 | class Mish(nn.Module): 27 | @staticmethod 28 | def forward(x): 29 | return x * F.softplus(x).tanh() 30 | 31 | 32 | class MemoryEfficientMish(nn.Module): 33 | class F(torch.autograd.Function): 34 | @staticmethod 35 | def forward(ctx, x): 36 | ctx.save_for_backward(x) 37 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 38 | 39 | @staticmethod 40 | def backward(ctx, grad_output): 41 | x = ctx.saved_tensors[0] 42 | sx = torch.sigmoid(x) 43 | fx = F.softplus(x).tanh() 44 | return grad_output * (fx + x * sx * (1 - fx * fx)) 45 | 46 | def forward(self, x): 47 | return self.F.apply(x) 48 | 49 | 50 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 51 | class FReLU(nn.Module): 52 | def __init__(self, c1, k=3): # ch_in, kernel 53 | super().__init__() 54 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 55 | self.bn = nn.BatchNorm2d(c1) 56 | 57 | def forward(self, x): 58 | return torch.max(x, self.bn(self.conv(x))) 59 | 60 | 61 | # ACON https://arxiv.org/pdf/2009.04759.pdf ---------------------------------------------------------------------------- 62 | class AconC(nn.Module): 63 | r""" ACON activation (activate or not). 64 | AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter 65 | according to "Activate or Not: Learning Customized Activation" . 66 | """ 67 | 68 | def __init__(self, c1): 69 | super().__init__() 70 | self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) 71 | self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) 72 | self.beta = nn.Parameter(torch.ones(1, c1, 1, 1)) 73 | 74 | def forward(self, x): 75 | dpx = (self.p1 - self.p2) * x 76 | return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x 77 | 78 | 79 | class MetaAconC(nn.Module): 80 | r""" ACON activation (activate or not). 81 | MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network 82 | according to "Activate or Not: Learning Customized Activation" . 83 | """ 84 | 85 | def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r 86 | super().__init__() 87 | c2 = max(r, c1 // r) 88 | self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) 89 | self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) 90 | self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True) 91 | self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True) 92 | # self.bn1 = nn.BatchNorm2d(c2) 93 | # self.bn2 = nn.BatchNorm2d(c1) 94 | 95 | def forward(self, x): 96 | y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True) 97 | # batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891 98 | # beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable 99 | beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed 100 | dpx = (self.p1 - self.p2) * x 101 | return dpx * torch.sigmoid(beta * dpx) + self.p2 * x 102 | -------------------------------------------------------------------------------- /utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Auto-anchor utils 4 | """ 5 | 6 | import random 7 | 8 | import numpy as np 9 | import torch 10 | import yaml 11 | from tqdm import tqdm 12 | 13 | from utils.general import colorstr 14 | 15 | 16 | def check_anchor_order(m): 17 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary 18 | a = m.anchor_grid.prod(-1).view(-1) # anchor area 19 | da = a[-1] - a[0] # delta a 20 | ds = m.stride[-1] - m.stride[0] # delta s 21 | if da.sign() != ds.sign(): # same order 22 | print('Reversing anchor order') 23 | m.anchors[:] = m.anchors.flip(0) 24 | m.anchor_grid[:] = m.anchor_grid.flip(0) 25 | 26 | 27 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 28 | # Check anchor fit to data, recompute if necessary 29 | prefix = colorstr('autoanchor: ') 30 | print(f'\n{prefix}Analyzing anchors... ', end='') 31 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 32 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 33 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 34 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 35 | 36 | def metric(k): # compute metric 37 | r = wh[:, None] / k[None] 38 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 39 | best = x.max(1)[0] # best_x 40 | aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold 41 | bpr = (best > 1. / thr).float().mean() # best possible recall 42 | return bpr, aat 43 | 44 | anchors = m.anchor_grid.clone().cpu().view(-1, 2) # current anchors 45 | bpr, aat = metric(anchors) 46 | print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='') 47 | if bpr < 0.98: # threshold to recompute 48 | print('. Attempting to improve anchors, please wait...') 49 | na = m.anchor_grid.numel() // 2 # number of anchors 50 | try: 51 | anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 52 | except Exception as e: 53 | print(f'{prefix}ERROR: {e}') 54 | new_bpr = metric(anchors)[0] 55 | if new_bpr > bpr: # replace anchors 56 | anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) 57 | m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid) # for inference 58 | m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss 59 | check_anchor_order(m) 60 | print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.') 61 | else: 62 | print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.') 63 | print('') # newline 64 | 65 | 66 | def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 67 | """ Creates kmeans-evolved anchors from training dataset 68 | 69 | Arguments: 70 | dataset: path to data.yaml, or a loaded dataset 71 | n: number of anchors 72 | img_size: image size used for training 73 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 74 | gen: generations to evolve anchors using genetic algorithm 75 | verbose: print all results 76 | 77 | Return: 78 | k: kmeans evolved anchors 79 | 80 | Usage: 81 | from utils.autoanchor import *; _ = kmean_anchors() 82 | """ 83 | from scipy.cluster.vq import kmeans 84 | 85 | thr = 1. / thr 86 | prefix = colorstr('autoanchor: ') 87 | 88 | def metric(k, wh): # compute metrics 89 | r = wh[:, None] / k[None] 90 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 91 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 92 | return x, x.max(1)[0] # x, best_x 93 | 94 | def anchor_fitness(k): # mutation fitness 95 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 96 | return (best * (best > thr).float()).mean() # fitness 97 | 98 | def print_results(k): 99 | k = k[np.argsort(k.prod(1))] # sort small to large 100 | x, best = metric(k, wh0) 101 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 102 | print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr') 103 | print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' 104 | f'past_thr={x[x > thr].mean():.3f}-mean: ', end='') 105 | for i, x in enumerate(k): 106 | print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg 107 | return k 108 | 109 | if isinstance(dataset, str): # *.yaml file 110 | with open(dataset, errors='ignore') as f: 111 | data_dict = yaml.safe_load(f) # model dict 112 | from utils.datasets import LoadImagesAndLabels 113 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 114 | 115 | # Get label wh 116 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 117 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 118 | 119 | # Filter 120 | i = (wh0 < 3.0).any(1).sum() 121 | if i: 122 | print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.') 123 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels 124 | # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 125 | 126 | # Kmeans calculation 127 | print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...') 128 | s = wh.std(0) # sigmas for whitening 129 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance 130 | assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}') 131 | k *= s 132 | wh = torch.tensor(wh, dtype=torch.float32) # filtered 133 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered 134 | k = print_results(k) 135 | 136 | # Plot 137 | # k, d = [None] * 20, [None] * 20 138 | # for i in tqdm(range(1, 21)): 139 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 140 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 141 | # ax = ax.ravel() 142 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 143 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 144 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 145 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 146 | # fig.savefig('wh.png', dpi=200) 147 | 148 | # Evolve 149 | npr = np.random 150 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 151 | pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') # progress bar 152 | for _ in pbar: 153 | v = np.ones(sh) 154 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 155 | v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 156 | kg = (k.copy() * v).clip(min=2.0) 157 | fg = anchor_fitness(kg) 158 | if fg > f: 159 | f, k = fg, kg.copy() 160 | pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' 161 | if verbose: 162 | print_results(k) 163 | 164 | return print_results(k) 165 | -------------------------------------------------------------------------------- /utils/callbacks.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Callback utils 4 | """ 5 | 6 | 7 | class Callbacks: 8 | """" 9 | Handles all registered callbacks for YOLOv5 Hooks 10 | """ 11 | 12 | _callbacks = { 13 | 'on_pretrain_routine_start': [], 14 | 'on_pretrain_routine_end': [], 15 | 16 | 'on_train_start': [], 17 | 'on_train_epoch_start': [], 18 | 'on_train_batch_start': [], 19 | 'optimizer_step': [], 20 | 'on_before_zero_grad': [], 21 | 'on_train_batch_end': [], 22 | 'on_train_epoch_end': [], 23 | 24 | 'on_val_start': [], 25 | 'on_val_batch_start': [], 26 | 'on_val_image_end': [], 27 | 'on_val_batch_end': [], 28 | 'on_val_end': [], 29 | 30 | 'on_fit_epoch_end': [], # fit = train + val 31 | 'on_model_save': [], 32 | 'on_train_end': [], 33 | 34 | 'teardown': [], 35 | } 36 | 37 | def __init__(self): 38 | return 39 | 40 | def register_action(self, hook, name='', callback=None): 41 | """ 42 | Register a new action to a callback hook 43 | 44 | Args: 45 | hook The callback hook name to register the action to 46 | name The name of the action 47 | callback The callback to fire 48 | """ 49 | assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" 50 | assert callable(callback), f"callback '{callback}' is not callable" 51 | self._callbacks[hook].append({'name': name, 'callback': callback}) 52 | 53 | def get_registered_actions(self, hook=None): 54 | """" 55 | Returns all the registered actions by callback hook 56 | 57 | Args: 58 | hook The name of the hook to check, defaults to all 59 | """ 60 | if hook: 61 | return self._callbacks[hook] 62 | else: 63 | return self._callbacks 64 | 65 | def run_callbacks(self, hook, *args, **kwargs): 66 | """ 67 | Loop through the registered actions and fire all callbacks 68 | """ 69 | for logger in self._callbacks[hook]: 70 | # print(f"Running callbacks.{logger['callback'].__name__}()") 71 | logger['callback'](*args, **kwargs) 72 | 73 | def on_pretrain_routine_start(self, *args, **kwargs): 74 | """ 75 | Fires all registered callbacks at the start of each pretraining routine 76 | """ 77 | self.run_callbacks('on_pretrain_routine_start', *args, **kwargs) 78 | 79 | def on_pretrain_routine_end(self, *args, **kwargs): 80 | """ 81 | Fires all registered callbacks at the end of each pretraining routine 82 | """ 83 | self.run_callbacks('on_pretrain_routine_end', *args, **kwargs) 84 | 85 | def on_train_start(self, *args, **kwargs): 86 | """ 87 | Fires all registered callbacks at the start of each training 88 | """ 89 | self.run_callbacks('on_train_start', *args, **kwargs) 90 | 91 | def on_train_epoch_start(self, *args, **kwargs): 92 | """ 93 | Fires all registered callbacks at the start of each training epoch 94 | """ 95 | self.run_callbacks('on_train_epoch_start', *args, **kwargs) 96 | 97 | def on_train_batch_start(self, *args, **kwargs): 98 | """ 99 | Fires all registered callbacks at the start of each training batch 100 | """ 101 | self.run_callbacks('on_train_batch_start', *args, **kwargs) 102 | 103 | def optimizer_step(self, *args, **kwargs): 104 | """ 105 | Fires all registered callbacks on each optimizer step 106 | """ 107 | self.run_callbacks('optimizer_step', *args, **kwargs) 108 | 109 | def on_before_zero_grad(self, *args, **kwargs): 110 | """ 111 | Fires all registered callbacks before zero grad 112 | """ 113 | self.run_callbacks('on_before_zero_grad', *args, **kwargs) 114 | 115 | def on_train_batch_end(self, *args, **kwargs): 116 | """ 117 | Fires all registered callbacks at the end of each training batch 118 | """ 119 | self.run_callbacks('on_train_batch_end', *args, **kwargs) 120 | 121 | def on_train_epoch_end(self, *args, **kwargs): 122 | """ 123 | Fires all registered callbacks at the end of each training epoch 124 | """ 125 | self.run_callbacks('on_train_epoch_end', *args, **kwargs) 126 | 127 | def on_val_start(self, *args, **kwargs): 128 | """ 129 | Fires all registered callbacks at the start of the validation 130 | """ 131 | self.run_callbacks('on_val_start', *args, **kwargs) 132 | 133 | def on_val_batch_start(self, *args, **kwargs): 134 | """ 135 | Fires all registered callbacks at the start of each validation batch 136 | """ 137 | self.run_callbacks('on_val_batch_start', *args, **kwargs) 138 | 139 | def on_val_image_end(self, *args, **kwargs): 140 | """ 141 | Fires all registered callbacks at the end of each val image 142 | """ 143 | self.run_callbacks('on_val_image_end', *args, **kwargs) 144 | 145 | def on_val_batch_end(self, *args, **kwargs): 146 | """ 147 | Fires all registered callbacks at the end of each validation batch 148 | """ 149 | self.run_callbacks('on_val_batch_end', *args, **kwargs) 150 | 151 | def on_val_end(self, *args, **kwargs): 152 | """ 153 | Fires all registered callbacks at the end of the validation 154 | """ 155 | self.run_callbacks('on_val_end', *args, **kwargs) 156 | 157 | def on_fit_epoch_end(self, *args, **kwargs): 158 | """ 159 | Fires all registered callbacks at the end of each fit (train+val) epoch 160 | """ 161 | self.run_callbacks('on_fit_epoch_end', *args, **kwargs) 162 | 163 | def on_model_save(self, *args, **kwargs): 164 | """ 165 | Fires all registered callbacks after each model save 166 | """ 167 | self.run_callbacks('on_model_save', *args, **kwargs) 168 | 169 | def on_train_end(self, *args, **kwargs): 170 | """ 171 | Fires all registered callbacks at the end of training 172 | """ 173 | self.run_callbacks('on_train_end', *args, **kwargs) 174 | 175 | def teardown(self, *args, **kwargs): 176 | """ 177 | Fires all registered callbacks before teardown 178 | """ 179 | self.run_callbacks('teardown', *args, **kwargs) 180 | -------------------------------------------------------------------------------- /utils/downloads.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Download utils 4 | """ 5 | 6 | import os 7 | import platform 8 | import subprocess 9 | import time 10 | import urllib 11 | from pathlib import Path 12 | 13 | import requests 14 | import torch 15 | 16 | 17 | def gsutil_getsize(url=''): 18 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 19 | s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8') 20 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 21 | 22 | 23 | def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''): 24 | # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes 25 | file = Path(file) 26 | assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}" 27 | try: # url1 28 | print(f'Downloading {url} to {file}...') 29 | torch.hub.download_url_to_file(url, str(file)) 30 | assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check 31 | except Exception as e: # url2 32 | file.unlink(missing_ok=True) # remove partial downloads 33 | print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...') 34 | os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail 35 | finally: 36 | if not file.exists() or file.stat().st_size < min_bytes: # check 37 | file.unlink(missing_ok=True) # remove partial downloads 38 | print(f"ERROR: {assert_msg}\n{error_msg}") 39 | print('') 40 | 41 | 42 | def attempt_download(file, repo='ultralytics/yolov5'): # from utils.downloads import *; attempt_download() 43 | # Attempt file download if does not exist 44 | file = Path(str(file).strip().replace("'", '')) 45 | 46 | if not file.exists(): 47 | # URL specified 48 | name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc. 49 | if str(file).startswith(('http:/', 'https:/')): # download 50 | url = str(file).replace(':/', '://') # Pathlib turns :// -> :/ 51 | name = name.split('?')[0] # parse authentication https://url.com/file.txt?auth... 52 | safe_download(file=name, url=url, min_bytes=1E5) 53 | return name 54 | 55 | # GitHub assets 56 | file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required) 57 | try: 58 | response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() # github api 59 | assets = [x['name'] for x in response['assets']] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...] 60 | tag = response['tag_name'] # i.e. 'v1.0' 61 | except: # fallback plan 62 | assets = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 63 | 'yolov5s6.pt', 'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt'] 64 | try: 65 | tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1] 66 | except: 67 | tag = 'v5.0' # current release 68 | tag = 'v5.0' # download v5.0 models 69 | if name in assets: 70 | safe_download(file, 71 | url=f'https://github.com/{repo}/releases/download/{tag}/{name}', 72 | # url2=f'https://storage.googleapis.com/{repo}/ckpt/{name}', # backup url (optional) 73 | min_bytes=1E5, 74 | error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/') 75 | 76 | return str(file) 77 | 78 | 79 | def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'): 80 | # Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download() 81 | t = time.time() 82 | file = Path(file) 83 | cookie = Path('cookie') # gdrive cookie 84 | print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='') 85 | file.unlink(missing_ok=True) # remove existing file 86 | cookie.unlink(missing_ok=True) # remove existing cookie 87 | 88 | # Attempt file download 89 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 90 | os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}') 91 | if os.path.exists('cookie'): # large file 92 | s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}' 93 | else: # small file 94 | s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"' 95 | r = os.system(s) # execute, capture return 96 | cookie.unlink(missing_ok=True) # remove existing cookie 97 | 98 | # Error check 99 | if r != 0: 100 | file.unlink(missing_ok=True) # remove partial 101 | print('Download error ') # raise Exception('Download error') 102 | return r 103 | 104 | # Unzip if archive 105 | if file.suffix == '.zip': 106 | print('unzipping... ', end='') 107 | os.system(f'unzip -q {file}') # unzip 108 | file.unlink() # remove zip to free space 109 | 110 | print(f'Done ({time.time() - t:.1f}s)') 111 | return r 112 | 113 | 114 | def get_token(cookie="./cookie"): 115 | with open(cookie) as f: 116 | for line in f: 117 | if "download" in line: 118 | return line.split()[-1] 119 | return "" 120 | 121 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries ---------------------------------------------- 122 | # 123 | # 124 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 125 | # # Uploads a file to a bucket 126 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 127 | # 128 | # storage_client = storage.Client() 129 | # bucket = storage_client.get_bucket(bucket_name) 130 | # blob = bucket.blob(destination_blob_name) 131 | # 132 | # blob.upload_from_filename(source_file_name) 133 | # 134 | # print('File {} uploaded to {}.'.format( 135 | # source_file_name, 136 | # destination_blob_name)) 137 | # 138 | # 139 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 140 | # # Uploads a blob from a bucket 141 | # storage_client = storage.Client() 142 | # bucket = storage_client.get_bucket(bucket_name) 143 | # blob = bucket.blob(source_blob_name) 144 | # 145 | # blob.download_to_filename(destination_file_name) 146 | # 147 | # print('Blob {} downloaded to {}.'.format( 148 | # source_blob_name, 149 | # destination_file_name)) 150 | -------------------------------------------------------------------------------- /utils/labels.py: -------------------------------------------------------------------------------- 1 | import os, os.path as osp 2 | import argparse 3 | import numpy as np 4 | import yaml 5 | from tqdm import tqdm 6 | 7 | from pycocotools.coco import COCO 8 | 9 | def write_yolov5_labels(data): 10 | assert not osp.isdir(osp.join(data['path'], data['labels'])), \ 11 | 'Labels already generated. Remove or choose new name for labels.' 12 | 13 | splits = [osp.splitext(osp.split(data[s])[-1])[0] for s in ['train', 'val', 'test'] if s in data] 14 | annotations = [osp.join(data['path'], data['{}_annotations'.format(s)]) for s in ['train', 'val', 'test'] if s in data] 15 | test_split = [0 if s in ['train', 'val'] else 1 for s in ['train', 'val', 'test'] if s in data] 16 | img_txt_dir = osp.join(data['path'], data['labels'], 'img_txt') 17 | os.makedirs(img_txt_dir, exist_ok=True) 18 | 19 | for split, annot, is_test in zip(splits, annotations, test_split): 20 | img_txt_path = osp.join(img_txt_dir, '{}.txt'.format(split)) 21 | labels_path = osp.join(data['path'], '{}/{}'.format(data['labels'], split)) 22 | if not is_test: 23 | os.makedirs(labels_path, exist_ok=True) 24 | coco = COCO(annot) 25 | if not is_test: 26 | pbar = tqdm(coco.anns.keys(), total=len(coco.anns.keys())) 27 | pbar.desc = 'Writing {} labels to {}'.format(split, labels_path) 28 | for id in pbar: 29 | a = coco.anns[id] 30 | 31 | if a['image_id'] not in coco.imgs: 32 | continue 33 | 34 | if 'train' in split and a['iscrowd']: 35 | continue 36 | 37 | img_info = coco.imgs[a['image_id']] 38 | img_h, img_w = img_info['height'], img_info['width'] 39 | # x, y, w, h = a['head_bbox'] 40 | x, y, w, h = a['bbox'] 41 | xc, yc = x + w / 2, y + h / 2 42 | xc /= img_w 43 | yc /= img_h 44 | w /= img_w 45 | h /= img_h 46 | 47 | [pitch, yaw, roll] = a['euler_angles'] 48 | pitch = (pitch / 180 + 0.5) # (-90,90)/180 + 0.5 --> (0,1) 49 | yaw = (yaw / 360 + 0.5) # (-180, 180)/360 + 0.5 --> (0,1) 50 | roll = (roll / 180 + 0.5) # (-90,90)/180 + 0.5 --> (0,1) 51 | 52 | yolov5_label_txt = '{}.txt'.format(osp.splitext(img_info['file_name'])[0]) 53 | with open(osp.join(labels_path, yolov5_label_txt), 'a') as f: 54 | f.write('{} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f}\n'.format( 55 | 0, xc, yc, w, h, pitch, yaw, roll)) 56 | pbar.close() 57 | 58 | with open(img_txt_path, 'w') as f: 59 | for img_info in coco.imgs.values(): 60 | f.write(osp.join(data['path'], 'images', '{}'.format(split), img_info['file_name']) + '\n') 61 | 62 | 63 | if __name__ == '__main__': 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument('--data', default='data/coco-kp.yaml') 66 | args = parser.parse_args() 67 | 68 | assert osp.isfile(args.data), 'Data config file not found at {}'.format(args.data) 69 | 70 | with open(args.data, 'rb') as f: 71 | data = yaml.safe_load(f) 72 | write_yolov5_labels(data) -------------------------------------------------------------------------------- /utils/labels_v2.py: -------------------------------------------------------------------------------- 1 | import os, os.path as osp 2 | import argparse 3 | import numpy as np 4 | import yaml 5 | import json 6 | from tqdm import tqdm 7 | 8 | def write_yolov5_labels(data): 9 | assert not osp.isdir(osp.join(data['path'], data['labels'])), \ 10 | 'Labels already generated. Remove or choose new name for labels.' 11 | 12 | splits = [osp.splitext(osp.split(data[s])[-1])[0] for s in ['train', 'val', 'test'] if s in data] 13 | annotations = [osp.join(data['path'], data['{}_annotations'.format(s)]) for s in ['train', 'val', 'test'] if s in data] 14 | test_split = [0 if s in ['train', 'val'] else 1 for s in ['train', 'val', 'test'] if s in data] 15 | img_txt_dir = osp.join(data['path'], data['labels'], 'img_txt') 16 | os.makedirs(img_txt_dir, exist_ok=True) 17 | 18 | for split, annot, is_test in zip(splits, annotations, test_split): 19 | img_txt_path = osp.join(img_txt_dir, '{}.txt'.format(split)) 20 | labels_path = osp.join(data['path'], '{}/{}'.format(data['labels'], split)) 21 | if not is_test: 22 | os.makedirs(labels_path, exist_ok=True) 23 | 24 | json_img_dict = json.load(open(annot, "r")) 25 | if not is_test: 26 | pbar = tqdm(json_img_dict.keys(), total=len(json_img_dict.keys())) 27 | pbar.desc = 'Writing {} labels to {}'.format(split, labels_path) 28 | for img_name in pbar: # the id is img_name 29 | a = json_img_dict[img_name] 30 | img_h, img_w = a['height'], a['width'] 31 | xmin, ymin, xmax, ymax = a['bbox'] 32 | x, y, w, h = xmin, ymin, xmax-xmin, ymax-ymin 33 | xc, yc = x + w / 2, y + h / 2 34 | xc /= img_w 35 | yc /= img_h 36 | w /= img_w 37 | h /= img_h 38 | 39 | [yaw, pitch, roll] = a['pose'] 40 | pitch = (pitch / 180 + 0.5) # (-90,90)/180 + 0.5 --> (0,1) 41 | yaw = (yaw / 360 + 0.5) # (-180, 180)/360 + 0.5 --> (0,1) 42 | roll = (roll / 180 + 0.5) # (-90,90)/180 + 0.5 --> (0,1) 43 | 44 | yolov5_label_txt = '{}.txt'.format(osp.splitext(img_name)[0]) 45 | with open(osp.join(labels_path, yolov5_label_txt), 'a') as f: 46 | f.write('{} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f}\n'.format( 47 | 0, xc, yc, w, h, pitch, yaw, roll)) 48 | pbar.close() 49 | 50 | with open(img_txt_path, 'w') as f: 51 | for img_name in json_img_dict.keys(): 52 | f.write(osp.join(data['path'], 'images', '{}'.format(split), img_name) + '\n') 53 | 54 | 55 | if __name__ == '__main__': 56 | parser = argparse.ArgumentParser() 57 | parser.add_argument('--data', default='data/coco-kp.yaml') 58 | args = parser.parse_args() 59 | 60 | assert osp.isfile(args.data), 'Data config file not found at {}'.format(args.data) 61 | 62 | with open(args.data, 'rb') as f: 63 | data = yaml.safe_load(f) 64 | write_yolov5_labels(data) -------------------------------------------------------------------------------- /utils/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Logging utils 4 | """ 5 | 6 | import warnings 7 | from threading import Thread 8 | 9 | import torch 10 | from torch.utils.tensorboard import SummaryWriter 11 | 12 | from utils.general import colorstr, emojis 13 | from utils.loggers.wandb.wandb_utils import WandbLogger 14 | from utils.plots import plot_images, plot_results 15 | from utils.torch_utils import de_parallel 16 | 17 | LOGGERS = ('csv', 'tb', 'wandb') # text-file, TensorBoard, Weights & Biases 18 | 19 | try: 20 | import wandb 21 | 22 | assert hasattr(wandb, '__version__') # verify package import not local dir 23 | except (ImportError, AssertionError): 24 | wandb = None 25 | 26 | 27 | class Loggers(): 28 | # YOLOv5 Loggers class 29 | def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS): 30 | self.save_dir = save_dir 31 | self.weights = weights 32 | self.opt = opt 33 | self.hyp = hyp 34 | self.logger = logger # for printing results to console 35 | self.include = include 36 | self.keys = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', 'train/mse_loss', # train loss 37 | 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', # metrics 38 | 'val/box_loss', 'val/obj_loss', 'val/cls_loss', 'val/mse_loss', # val loss 39 | 'metrics/error_MAE', 'metrics/error_pitch', 'metrics/error_yaw', 'metrics/error_roll', # metrics error 40 | 'x/lr0', 'x/lr1', 'x/lr2'] # params 41 | for k in LOGGERS: 42 | setattr(self, k, None) # init empty logger dictionary 43 | self.csv = True # always log to csv 44 | 45 | # Message 46 | if not wandb: 47 | prefix = colorstr('Weights & Biases: ') 48 | s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs (RECOMMENDED)" 49 | print(emojis(s)) 50 | 51 | # TensorBoard 52 | s = self.save_dir 53 | if 'tb' in self.include and not self.opt.evolve: 54 | prefix = colorstr('TensorBoard: ') 55 | self.logger.info(f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/") 56 | self.tb = SummaryWriter(str(s)) 57 | 58 | # W&B 59 | if wandb and 'wandb' in self.include: 60 | wandb_artifact_resume = isinstance(self.opt.resume, str) and self.opt.resume.startswith('wandb-artifact://') 61 | run_id = torch.load(self.weights).get('wandb_id') if self.opt.resume and not wandb_artifact_resume else None 62 | self.opt.hyp = self.hyp # add hyperparameters 63 | self.wandb = WandbLogger(self.opt, run_id) 64 | else: 65 | self.wandb = None 66 | 67 | def on_pretrain_routine_end(self): 68 | # Callback runs on pre-train routine end 69 | paths = self.save_dir.glob('*labels*.jpg') # training labels 70 | if self.wandb: 71 | self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]}) 72 | 73 | def on_train_batch_end(self, ni, model, imgs, targets, paths, plots, sync_bn): 74 | # Callback runs on train batch end 75 | if plots: 76 | if ni == 0: 77 | if not sync_bn: # tb.add_graph() --sync known issue https://github.com/ultralytics/yolov5/issues/3754 78 | with warnings.catch_warnings(): 79 | warnings.simplefilter('ignore') # suppress jit trace warning 80 | self.tb.add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), []) 81 | if ni < 3: 82 | f = self.save_dir / f'train_batch{ni}.jpg' # filename 83 | Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start() 84 | if self.wandb and ni == 10: 85 | files = sorted(self.save_dir.glob('train*.jpg')) 86 | self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]}) 87 | 88 | def on_train_epoch_end(self, epoch): 89 | # Callback runs on train epoch end 90 | if self.wandb: 91 | self.wandb.current_epoch = epoch + 1 92 | 93 | def on_val_image_end(self, pred, predn, path, names, im): 94 | # Callback runs on val image end 95 | if self.wandb: 96 | self.wandb.val_one_image(pred, predn, path, names, im) 97 | 98 | def on_val_end(self): 99 | # Callback runs on val end 100 | if self.wandb: 101 | files = sorted(self.save_dir.glob('val*.jpg')) 102 | self.wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in files]}) 103 | 104 | def on_fit_epoch_end(self, vals, epoch, best_fitness, fi): 105 | # Callback runs at the end of each fit (train+val) epoch 106 | x = {k: v for k, v in zip(self.keys, vals)} # dict 107 | if self.csv: 108 | file = self.save_dir / 'results.csv' 109 | n = len(x) + 1 # number of cols 110 | s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n') # add header 111 | with open(file, 'a') as f: 112 | f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n') 113 | 114 | if self.tb: 115 | for k, v in x.items(): 116 | self.tb.add_scalar(k, v, epoch) 117 | 118 | if self.wandb: 119 | self.wandb.log(x) 120 | self.wandb.end_epoch(best_result=best_fitness == fi) 121 | 122 | def on_model_save(self, last, epoch, final_epoch, best_fitness, fi): 123 | # Callback runs on model save event 124 | if self.wandb: 125 | if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1: 126 | self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi) 127 | 128 | def on_train_end(self, last, best, plots, epoch): 129 | # Callback runs on training end 130 | if plots: 131 | plot_results(file=self.save_dir / 'results.csv') # save results.png 132 | files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]] 133 | files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter 134 | 135 | if self.tb: 136 | import cv2 137 | for f in files: 138 | self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC') 139 | 140 | if self.wandb: 141 | self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]}) 142 | # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model 143 | if not self.opt.evolve: 144 | wandb.log_artifact(str(best if best.exists() else last), type='model', 145 | name='run_' + self.wandb.wandb_run.id + '_model', 146 | aliases=['latest', 'best', 'stripped']) 147 | self.wandb.finish_run() 148 | else: 149 | self.wandb.finish_run() 150 | self.wandb = WandbLogger(self.opt) 151 | -------------------------------------------------------------------------------- /utils/loggers/wandb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnuzhy/DirectMHP/a51b06bc34ed4202a9e9d1aad1c1cac9b6de05d0/utils/loggers/wandb/__init__.py -------------------------------------------------------------------------------- /utils/loggers/wandb/log_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from wandb_utils import WandbLogger 4 | 5 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' 6 | 7 | 8 | def create_dataset_artifact(opt): 9 | logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused 10 | 11 | 12 | if __name__ == '__main__': 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') 15 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 16 | parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project') 17 | parser.add_argument('--entity', default=None, help='W&B entity') 18 | parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run') 19 | 20 | opt = parser.parse_args() 21 | opt.resume = False # Explicitly disallow resume check for dataset upload job 22 | 23 | create_dataset_artifact(opt) 24 | -------------------------------------------------------------------------------- /utils/loggers/wandb/sweep.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | import wandb 5 | 6 | FILE = Path(__file__).absolute() 7 | sys.path.append(FILE.parents[3].as_posix()) # add utils/ to path 8 | 9 | from train import train, parse_opt 10 | from utils.general import increment_path 11 | from utils.torch_utils import select_device 12 | 13 | 14 | def sweep(): 15 | wandb.init() 16 | # Get hyp dict from sweep agent 17 | hyp_dict = vars(wandb.config).get("_items") 18 | 19 | # Workaround: get necessary opt args 20 | opt = parse_opt(known=True) 21 | opt.batch_size = hyp_dict.get("batch_size") 22 | opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve)) 23 | opt.epochs = hyp_dict.get("epochs") 24 | opt.nosave = True 25 | opt.data = hyp_dict.get("data") 26 | device = select_device(opt.device, batch_size=opt.batch_size) 27 | 28 | # train 29 | train(hyp_dict, opt, device) 30 | 31 | 32 | if __name__ == "__main__": 33 | sweep() 34 | -------------------------------------------------------------------------------- /utils/loggers/wandb/sweep.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for training 2 | # To set range- 3 | # Provide min and max values as: 4 | # parameter: 5 | # 6 | # min: scalar 7 | # max: scalar 8 | # OR 9 | # 10 | # Set a specific list of search space- 11 | # parameter: 12 | # values: [scalar1, scalar2, scalar3...] 13 | # 14 | # You can use grid, bayesian and hyperopt search strategy 15 | # For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration 16 | 17 | program: utils/loggers/wandb/sweep.py 18 | method: random 19 | metric: 20 | name: metrics/mAP_0.5 21 | goal: maximize 22 | 23 | parameters: 24 | # hyperparameters: set either min, max range or values list 25 | data: 26 | value: "data/coco128.yaml" 27 | batch_size: 28 | values: [64] 29 | epochs: 30 | values: [10] 31 | 32 | lr0: 33 | distribution: uniform 34 | min: 1e-5 35 | max: 1e-1 36 | lrf: 37 | distribution: uniform 38 | min: 0.01 39 | max: 1.0 40 | momentum: 41 | distribution: uniform 42 | min: 0.6 43 | max: 0.98 44 | weight_decay: 45 | distribution: uniform 46 | min: 0.0 47 | max: 0.001 48 | warmup_epochs: 49 | distribution: uniform 50 | min: 0.0 51 | max: 5.0 52 | warmup_momentum: 53 | distribution: uniform 54 | min: 0.0 55 | max: 0.95 56 | warmup_bias_lr: 57 | distribution: uniform 58 | min: 0.0 59 | max: 0.2 60 | box: 61 | distribution: uniform 62 | min: 0.02 63 | max: 0.2 64 | cls: 65 | distribution: uniform 66 | min: 0.2 67 | max: 4.0 68 | cls_pw: 69 | distribution: uniform 70 | min: 0.5 71 | max: 2.0 72 | obj: 73 | distribution: uniform 74 | min: 0.2 75 | max: 4.0 76 | obj_pw: 77 | distribution: uniform 78 | min: 0.5 79 | max: 2.0 80 | iou_t: 81 | distribution: uniform 82 | min: 0.1 83 | max: 0.7 84 | anchor_t: 85 | distribution: uniform 86 | min: 2.0 87 | max: 8.0 88 | fl_gamma: 89 | distribution: uniform 90 | min: 0.0 91 | max: 0.1 92 | hsv_h: 93 | distribution: uniform 94 | min: 0.0 95 | max: 0.1 96 | hsv_s: 97 | distribution: uniform 98 | min: 0.0 99 | max: 0.9 100 | hsv_v: 101 | distribution: uniform 102 | min: 0.0 103 | max: 0.9 104 | degrees: 105 | distribution: uniform 106 | min: 0.0 107 | max: 45.0 108 | translate: 109 | distribution: uniform 110 | min: 0.0 111 | max: 0.9 112 | scale: 113 | distribution: uniform 114 | min: 0.0 115 | max: 0.9 116 | shear: 117 | distribution: uniform 118 | min: 0.0 119 | max: 10.0 120 | perspective: 121 | distribution: uniform 122 | min: 0.0 123 | max: 0.001 124 | flipud: 125 | distribution: uniform 126 | min: 0.0 127 | max: 1.0 128 | fliplr: 129 | distribution: uniform 130 | min: 0.0 131 | max: 1.0 132 | mosaic: 133 | distribution: uniform 134 | min: 0.0 135 | max: 1.0 136 | mixup: 137 | distribution: uniform 138 | min: 0.0 139 | max: 1.0 140 | copy_paste: 141 | distribution: uniform 142 | min: 0.0 143 | max: 1.0 144 | -------------------------------------------------------------------------------- /utils/renderer.py: -------------------------------------------------------------------------------- 1 | ''' 2 | The code is from https://github.com/vitoralbiero/img2pose/blob/main/utils/renderer.py 3 | ''' 4 | 5 | import cv2 6 | import numpy as np 7 | from Sim3DR import RenderPipeline 8 | from scipy.spatial.transform import Rotation 9 | 10 | def transform_points(points, pose): 11 | return points.dot(Rotation.from_rotvec(pose[:3]).as_matrix().T) + pose[3:] 12 | 13 | def plot_3d_landmark(verts, campose, intrinsics): 14 | lm_3d_trans = transform_points(verts, campose) 15 | 16 | # project to image plane 17 | lms_3d_trans_proj = intrinsics.dot(lm_3d_trans.T).T 18 | lms_projected = ( 19 | lms_3d_trans_proj[:, :2] / np.tile(lms_3d_trans_proj[:, 2], (2, 1)).T 20 | ) 21 | 22 | return lms_projected, lms_3d_trans_proj 23 | 24 | 25 | def _to_ctype(arr): 26 | if not arr.flags.c_contiguous: 27 | return arr.copy(order="C") 28 | return arr 29 | 30 | 31 | def get_colors(img, ver): 32 | h, w, _ = img.shape 33 | ver[0, :] = np.minimum(np.maximum(ver[0, :], 0), w - 1) # x 34 | ver[1, :] = np.minimum(np.maximum(ver[1, :], 0), h - 1) # y 35 | ind = np.round(ver).astype(np.int32) 36 | colors = img[ind[1, :], ind[0, :], :] / 255.0 # n x 3 37 | 38 | return colors.copy() 39 | 40 | 41 | class Renderer: 42 | def __init__( 43 | self, 44 | vertices_path="../pose_references/vertices_trans.npy", 45 | triangles_path="../pose_references/triangles.npy", 46 | ): 47 | self.vertices = np.load(vertices_path) 48 | self.triangles = _to_ctype(np.load(triangles_path).T) 49 | self.vertices[:, 0] *= -1 50 | 51 | self.cfg = { 52 | "intensity_ambient": 0.3, 53 | "color_ambient": (1, 1, 1), 54 | "intensity_directional": 0.6, 55 | "color_directional": (1, 1, 1), 56 | "intensity_specular": 0.1, 57 | "specular_exp": 5, 58 | "light_pos": (0, 0, 5), 59 | "view_pos": (0, 0, 5), 60 | } 61 | 62 | self.render_app = RenderPipeline(**self.cfg) 63 | 64 | def transform_vertices(self, img, poses, global_intrinsics=None): 65 | # (w, h) = img.size # PIL 66 | (h, w, c) = img.shape # cv2 67 | if global_intrinsics is None: 68 | global_intrinsics = np.array( 69 | [[w + h, 0, w // 2], [0, w + h, h // 2], [0, 0, 1]] 70 | ) 71 | 72 | transformed_vertices = [] 73 | for pose in poses: 74 | projected_lms = np.zeros_like(self.vertices) 75 | projected_lms[:, :2], lms_3d_trans_proj = plot_3d_landmark( 76 | self.vertices, pose, global_intrinsics 77 | ) 78 | projected_lms[:, 2] = lms_3d_trans_proj[:, 2] * -1 79 | 80 | range_x = np.max(projected_lms[:, 0]) - np.min(projected_lms[:, 0]) 81 | range_y = np.max(projected_lms[:, 1]) - np.min(projected_lms[:, 1]) 82 | 83 | s = (h + w) / pose[5] 84 | projected_lms[:, 2] *= s 85 | projected_lms[:, 2] += (range_x + range_y) * 3 86 | 87 | transformed_vertices.append(projected_lms) 88 | 89 | return transformed_vertices 90 | 91 | def render(self, img, transformed_vertices, alpha=0.9, save_path=None): 92 | img = np.asarray(img) 93 | overlap = img.copy() 94 | 95 | for vertices in transformed_vertices: 96 | vertices = _to_ctype(vertices) # transpose 97 | overlap = self.render_app(vertices, self.triangles, overlap) 98 | 99 | res = cv2.addWeighted(img, 1 - alpha, overlap, alpha, 0) 100 | 101 | if save_path is not None: 102 | cv2.imwrite(save_path, res) 103 | print(f"Save visualization result to {save_path}") 104 | 105 | return res 106 | 107 | def save_to_obj(self, img, ver_lst, height, save_path): 108 | n_obj = len(ver_lst) # count obj 109 | 110 | if n_obj <= 0: 111 | return 112 | 113 | n_vertex = ver_lst[0].T.shape[1] 114 | n_face = self.triangles.shape[0] 115 | 116 | with open(save_path, "w") as f: 117 | for i in range(n_obj): 118 | ver = ver_lst[i].T 119 | colors = get_colors(img, ver) 120 | 121 | for j in range(n_vertex): 122 | x, y, z = ver[:, j] 123 | f.write( 124 | f"v {x:.2f} {height - y:.2f} {z:.2f} {colors[j, 2]:.2f} " 125 | f"{colors[j, 1]:.2f} {colors[j, 0]:.2f}\n" 126 | ) 127 | 128 | for i in range(n_obj): 129 | offset = i * n_vertex 130 | for j in range(n_face): 131 | idx1, idx2, idx3 = self.triangles[j] # m x 3 132 | f.write( 133 | f"f {idx3 + 1 + offset} {idx2 + 1 + offset} " 134 | f"{idx1 + 1 + offset}\n" 135 | ) 136 | 137 | print(f"Dump tp {save_path}") 138 | -------------------------------------------------------------------------------- /weights/readme.md: -------------------------------------------------------------------------------- 1 | Put the pretrained YOLOv5 model weights here. --------------------------------------------------------------------------------