├── LICENSE.md ├── README.md ├── read.py ├── render ├── objcoord.compositor ├── objcoord.material ├── objcoord.vert └── vertcolor.frag └── tobrachmann.py /LICENSE.md: -------------------------------------------------------------------------------- 1 | This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Converter Utilities for the ACCV LINEMOD Dataset 2 | 3 | The LINEMOD dataset is widely used for various 6D pose estimation and camera localization algorithms. 4 | 5 | Unfortunately each author chose to convert the original data into an own file format and only support loading from that data. 6 | 7 | This repository therefore provides a set of python functions to read and convert the original LINEMOD data to the respective formats. 8 | 9 | This way one can compare the algorithms using the exact same input data. 10 | 11 | ## ACCV LINEMOD Dataset 12 | 13 | The extensive ACCV database is public (over 18000 real images with 15 different objects and ground truth pose)! 14 | 15 | Each dataset contains the 3D model saved as a point cloud object.xyz 16 | format: 17 | ``` 18 | #_of_voxels size_of_voxel_in_cm 19 | x1_in_cm y1_in_cm z1_in_cm normal_x1 normal_y1 normal_z1 color_x1_normalized_to_1 color_y1_normalized_to_1 color_z1_normalized_to_1 20 | ... 21 | ``` 22 | and a file called distance.txt with the maximum diameter of the object (in cm). 23 | 24 | For some datasets we also provide a nice mesh model in the ply format (in mm - with better normals). The original mesh is contained in OLDmesh.ply. For most datasets we registered this OLDmesh.ply to the point cloud with the transformation stored in transform.dat (first number is not important, then each first number of a line is obsolete - for the rest: the transformation matrix [R|T] is stored rowwise (in m)). 25 | 26 | The registered mesh is stored in mesh.ply. In the folder data you can find the color images, the aligned depth images and the ground truth rotation and translation (in cm). 27 | 28 | In order to read the depth images you can use this function. 29 | 30 | The internal camera matrix parameters for the kinect are: 31 | ``` 32 | fx=572.41140, px=325.26110, fy=573.57043, py=242.04899 33 | ``` 34 | 35 | Color image and depth image are already aligned by the internal alignement procedure of Kinect. 36 | -------------------------------------------------------------------------------- /read.py: -------------------------------------------------------------------------------- 1 | """ 2 | utilites to read the linemod dataset to numpy types 3 | 4 | @author Pavel Rojtberg 5 | """ 6 | 7 | import numpy as np 8 | 9 | def _parse_ply_header(f): 10 | assert f.readline().strip() == "ply" 11 | vtx_count = 0 12 | idx_count = 0 13 | 14 | for l in f: 15 | if l.strip() == "end_header": 16 | break 17 | elif l.startswith("element vertex"): 18 | vtx_count = int(l.split()[-1]) 19 | elif l.startswith("element face"): 20 | idx_count = int(l.split()[-1]) 21 | 22 | return vtx_count, idx_count 23 | 24 | def ply_vtx(path): 25 | """ 26 | read all vertices from a ply file 27 | """ 28 | f = open(path) 29 | vtx_count = _parse_ply_header(f)[0] 30 | 31 | pts = [] 32 | 33 | for _ in range(vtx_count): 34 | pts.append(f.readline().split()[:3]) 35 | 36 | return np.array(pts, dtype=np.float32) 37 | 38 | def ply_idx(path): 39 | """ 40 | read all indices from a ply file 41 | """ 42 | f = open(path) 43 | vtx_count, idx_count = _parse_ply_header(f) 44 | 45 | for _ in range(vtx_count): 46 | f.readline() 47 | 48 | idx = [] 49 | for _ in range(idx_count): 50 | idx.append(f.readline().split()[1:4]) 51 | 52 | return np.array(idx, dtype=np.int32) 53 | 54 | def transform(path): 55 | """ 56 | read the to-origin transform.dat 57 | 58 | @return R, t in [cm] 59 | """ 60 | f = open(path) 61 | f.readline() # 12 62 | 63 | T = [] 64 | for l in f: 65 | T.append(l.split()[1]) 66 | 67 | T = np.float32(T).reshape((3, 4)) 68 | return T[:3, :3], T[:, 3] * 100 # [m] > [cm] 69 | 70 | def linemod_pose(path, i): 71 | """ 72 | read a 3x3 rotation and 3x1 translation. 73 | 74 | can be done with np.loadtxt, but this is way faster 75 | @return R, t in [cm] 76 | """ 77 | R = open("{}/data/rot{}.rot".format(path, i)) 78 | R.readline() 79 | R = np.float32(R.read().split()).reshape((3, 3)) 80 | 81 | t = open("{}/data/tra{}.tra".format(path, i)) 82 | t.readline() 83 | t = np.float32(t.read().split()) 84 | 85 | return R, t 86 | 87 | def linemod_dpt(path): 88 | """ 89 | read a depth image 90 | 91 | @return uint16 image of distance in [mm]""" 92 | dpt = open(path, "rb") 93 | rows = np.frombuffer(dpt.read(4), dtype=np.int32)[0] 94 | cols = np.frombuffer(dpt.read(4), dtype=np.int32)[0] 95 | 96 | return np.fromfile(dpt, dtype=np.uint16).reshape((rows, cols)) -------------------------------------------------------------------------------- /render/objcoord.compositor: -------------------------------------------------------------------------------- 1 | compositor Objcoord 2 | { 3 | technique 4 | { 5 | texture img_float target_width target_height PF_FLOAT32_RGBA 6 | 7 | target img_float { 8 | input previous 9 | } 10 | 11 | // copy something to output 12 | target_output 13 | { 14 | pass render_quad 15 | { 16 | material copy 17 | input 0 img_float 18 | } 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /render/objcoord.material: -------------------------------------------------------------------------------- 1 | vertex_program vert glsl 2 | { 3 | source objcoord.vert 4 | default_params 5 | { 6 | param_named_auto MVP worldviewproj_matrix 7 | } 8 | } 9 | fragment_program frag glsl 10 | { 11 | source vertcolor.frag 12 | } 13 | 14 | material objcoord { 15 | technique { 16 | pass { 17 | vertex_program_ref vert {} 18 | fragment_program_ref frag {} 19 | } 20 | } 21 | } 22 | 23 | material copy { 24 | technique { 25 | pass { 26 | lighting off 27 | texture_unit { 28 | filtering none 29 | } 30 | } 31 | } 32 | } -------------------------------------------------------------------------------- /render/objcoord.vert: -------------------------------------------------------------------------------- 1 | #version 120 2 | 3 | attribute vec4 position; 4 | uniform mat4 MVP; 5 | 6 | uniform vec3 min; 7 | uniform vec3 extent; 8 | 9 | varying vec3 color; 10 | 11 | void main() 12 | { 13 | color = (position.xyz - min)/extent; 14 | gl_Position = MVP * position; // needs w for proper perspective correction 15 | } 16 | -------------------------------------------------------------------------------- /render/vertcolor.frag: -------------------------------------------------------------------------------- 1 | #version 120 2 | 3 | varying vec3 color; 4 | 5 | void main() 6 | { 7 | gl_FragColor = vec4(color, 1.0); 8 | } 9 | -------------------------------------------------------------------------------- /tobrachmann.py: -------------------------------------------------------------------------------- 1 | """ 2 | convert the data for 3 | "Uncertainty-Driven 6D Pose Estimation of Objects and Scenes from a Single RGB Image" 4 | 5 | @author: Pavel Rojtberg 6 | """ 7 | 8 | import os.path 9 | import glob 10 | import numpy as np 11 | import cv2 12 | 13 | import read 14 | 15 | RENDER_OBJCOORDS = True 16 | 17 | # convert from the lineMOD coordinate system to the OpenCV CS 18 | t_LM2cv = np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]]) 19 | 20 | # convert from OpenCV coordinate sytem to the CS used by brachmann 21 | t_cv2brach = np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]]) 22 | 23 | # info.txt template: all units in [m] 24 | info = """image size 25 | 640 480 26 | {} 27 | rotation: 28 | {} 29 | center: 30 | {} 31 | extent: 32 | {} 33 | """ 34 | 35 | def write_info(path, obj_name, R, t, extent, ctr): 36 | """ 37 | write brachmann info file, given the linemod data 38 | @param t: distance to center in [m] 39 | @param extent: extent in [mm] 40 | @param ctr: center of bounding box in [mm] 41 | """ 42 | # bbox center from view 43 | ctr = R.dot(ctr) / 10 # [mm] > [cm] 44 | R, t = lm2brach(R, t + ctr) # brachmann stores distance to center 45 | 46 | # write info file 47 | t /= 100 # [cm] > [m] 48 | 49 | extent = t_cv2brach.dot(extent)/1000 # [mm] > [m] 50 | 51 | f = open(path, "w") 52 | 53 | R_str = "\n".join([" ".join(r) for r in R.astype(str)]) 54 | t_str = " ".join(t.astype(str)) 55 | ext_str = " ".join(extent.astype(str)) 56 | 57 | txt = info.format(obj_name, R_str, t_str, ext_str) 58 | 59 | f.write(txt) 60 | 61 | def lm2brach(R, t): 62 | """ 63 | convert linemod to brachmann pose format 64 | """ 65 | R = t_LM2cv.dot(R) 66 | t = t_LM2cv.dot(t) 67 | 68 | # result may be reconstructed behind the camera 69 | if cv2.determinant(R) < 0: 70 | R *= -1 71 | t *= -1 72 | 73 | return t_cv2brach.dot(R.T).T, t # TODO why dont we transfrom t as well? 74 | 75 | def main(): 76 | if RENDER_OBJCOORDS: 77 | camera_matrix = np.array([[572.41140, 0, 325.26110], [0, 573.57043, 242.04899], [0, 0, 1]]) 78 | 79 | cv2.ovis.addResourceLocation("render") 80 | win = cv2.ovis.createWindow("objcoord", (640, 480), 0) 81 | win.setCompositors(["Objcoord"]) 82 | win.setCameraPose((0, 0, 0)) 83 | win.setCameraIntrinsics(camera_matrix, (640, 480)) 84 | win.setBackgroundColor((0, 0, 0, 0)) 85 | 86 | # all non hidden directories 87 | objs = [d for d in sorted(os.listdir(".")) if os.path.isdir(d) and d[0] not in (".", "_")] 88 | 89 | for o in objs: 90 | pts = read.ply_vtx("{}/mesh.ply".format(o)) # [mm] 91 | omin = np.min(pts, axis=0) 92 | omax = np.max(pts, axis=0) 93 | 94 | octr = (omin + omax) * 0.5 95 | extent = omax - omin 96 | 97 | N = len(glob.glob("{}/data/color*.jpg".format(o))) 98 | 99 | infop = "{}/info".format(o) 100 | depthp = "{}/depth_noseg".format(o) 101 | rgbp = "{}/rgb_noseg".format(o) 102 | segp = "{}/seg".format(o) 103 | objp = "{}/obj".format(o) 104 | 105 | for p in [infop, depthp, rgbp, segp, objp]: 106 | if not os.path.exists(p): 107 | os.mkdir(p) 108 | 109 | if RENDER_OBJCOORDS: 110 | idx = read.ply_idx("{}/mesh.ply".format(o)) 111 | cv2.ovis.createTriangleMesh(o, pts.reshape(-1, 1, 3), None, idx) 112 | win.createEntity(o, o) 113 | win.setEntityProperty(o, cv2.ovis.ENTITY_MATERIAL, "objcoord") 114 | cv2.ovis.setMaterialProperty("objcoord", "min", omin.tolist()) 115 | cv2.ovis.setMaterialProperty("objcoord", "extent", extent.tolist()) 116 | 117 | for i in range(N): 118 | dpt = read.linemod_dpt("{}/data/depth{}.dpt".format(o, i)) 119 | cv2.imwrite("{}/depth_{:05d}.png".format(depthp, i), dpt) 120 | 121 | # just jpg > png, doh! 122 | color = cv2.imread("{}/data/color{}.jpg".format(o, i)) 123 | cv2.imwrite("{}/color_{:05d}.png".format(rgbp, i), color) 124 | 125 | R, t = read.linemod_pose(o, i) 126 | write_info("{}/info_{:05d}.txt".format(infop, i), o, R, t, extent, octr) 127 | 128 | if not RENDER_OBJCOORDS: 129 | continue 130 | 131 | win.setEntityPose(o, t * 10, R) # [cm] > [mm] for rendering 132 | cv2.ovis.waitKey() 133 | img_float = win.getCompositorTexture("Objcoord", "img_float") 134 | assert img_float.min() >= 0 and img_float.max() <= 1, "wrong object extents" 135 | 136 | # write mask 137 | seg = img_float[..., 3] > 0 138 | cv2.imwrite("{}/seg_{:05d}.png".format(segp, i), seg.astype(np.uint8) * 255) 139 | 140 | # write objcoords 141 | obj = img_float[..., :3] 142 | # objcoords centered around bbox center 143 | obj[seg] *= extent 144 | obj[seg] -= extent/2 145 | 146 | # imwrite expects bgr 147 | obj = cv2.cvtColor(obj, cv2.COLOR_RGB2BGR).astype(np.uint16) 148 | cv2.imwrite("{}/obj_{:05d}.png".format(objp, i), obj) 149 | 150 | if __name__ == "__main__": 151 | main() 152 | --------------------------------------------------------------------------------