├── LICENSE.md
├── README.md
├── read.py
├── render
    ├── objcoord.compositor
    ├── objcoord.material
    ├── objcoord.vert
    └── vertcolor.frag
└── tobrachmann.py


/LICENSE.md:
--------------------------------------------------------------------------------
1 | This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Converter Utilities for the ACCV LINEMOD Dataset
 2 | 
 3 | The LINEMOD dataset is widely used for various 6D pose estimation and camera localization algorithms.
 4 | 
 5 | Unfortunately each author chose to convert the original data into an own file format and only support loading from that data.
 6 | 
 7 | This repository therefore provides a set of python functions to read and convert the original LINEMOD data to the respective formats.
 8 | 
 9 | This way one can compare the algorithms using the exact same input data.
10 | 
11 | ## ACCV LINEMOD Dataset
12 | 
13 | The extensive ACCV database is public (over 18000 real images with 15 different objects and ground truth pose)! 
14 | 
15 | Each dataset contains the 3D model saved as a point cloud object.xyz
16 | format: 
17 | ```
18 | #_of_voxels size_of_voxel_in_cm 
19 | x1_in_cm y1_in_cm z1_in_cm normal_x1 normal_y1 normal_z1 color_x1_normalized_to_1 color_y1_normalized_to_1 color_z1_normalized_to_1
20 | ...
21 | ```
22 | and a file called distance.txt with the maximum diameter of the object (in cm). 
23 | 
24 | For some datasets we also provide a nice mesh model in the ply format (in mm - with better normals). The original mesh is contained in OLDmesh.ply. For most datasets we registered this OLDmesh.ply to the point cloud with the transformation stored in transform.dat (first number is not important, then each first number of a line is obsolete - for the rest: the transformation matrix [R|T] is stored rowwise (in m)). 
25 | 
26 | The registered mesh is stored in mesh.ply. In the folder data you can find the color images, the aligned depth images and the ground truth rotation and translation (in cm).
27 | 
28 | In order to read the depth images you can use this function. 
29 | 
30 | The internal camera matrix parameters for the kinect are:
31 | ```
32 | fx=572.41140, px=325.26110, fy=573.57043, py=242.04899
33 | ```
34 | 
35 | Color image and depth image are already aligned by the internal alignement procedure of Kinect.
36 | 


--------------------------------------------------------------------------------
/read.py:
--------------------------------------------------------------------------------
 1 | """
 2 | utilites to read the linemod dataset to numpy types
 3 | 
 4 | @author Pavel Rojtberg <https://github.com/paroj>
 5 | """
 6 | 
 7 | import numpy as np
 8 | 
 9 | def _parse_ply_header(f):
10 |     assert f.readline().strip() == "ply"
11 |     vtx_count = 0
12 |     idx_count = 0
13 | 
14 |     for l in f:
15 |         if l.strip() == "end_header":
16 |             break
17 |         elif l.startswith("element vertex"):
18 |             vtx_count = int(l.split()[-1])
19 |         elif l.startswith("element face"):
20 |             idx_count = int(l.split()[-1])
21 |     
22 |     return vtx_count, idx_count
23 | 
24 | def ply_vtx(path):
25 |     """
26 |     read all vertices from a ply file
27 |     """
28 |     f = open(path)
29 |     vtx_count = _parse_ply_header(f)[0]
30 | 
31 |     pts = []
32 | 
33 |     for _ in range(vtx_count):
34 |         pts.append(f.readline().split()[:3])
35 | 
36 |     return np.array(pts, dtype=np.float32)
37 | 
38 | def ply_idx(path):
39 |     """
40 |     read all indices from a ply file
41 |     """
42 |     f = open(path)
43 |     vtx_count, idx_count = _parse_ply_header(f)
44 |     
45 |     for _ in range(vtx_count):
46 |         f.readline()
47 |     
48 |     idx = []
49 |     for _ in range(idx_count):
50 |         idx.append(f.readline().split()[1:4])
51 | 
52 |     return np.array(idx, dtype=np.int32)
53 | 
54 | def transform(path):
55 |     """
56 |     read the to-origin transform.dat
57 |     
58 |     @return R, t in [cm]
59 |     """
60 |     f = open(path)
61 |     f.readline() # 12
62 |     
63 |     T = []
64 |     for l in f:
65 |         T.append(l.split()[1])
66 |     
67 |     T = np.float32(T).reshape((3, 4))
68 |     return T[:3, :3], T[:, 3] * 100 # [m] > [cm]
69 | 
70 | def linemod_pose(path, i):
71 |     """
72 |     read a 3x3 rotation and 3x1 translation.
73 |     
74 |     can be done with np.loadtxt, but this is way faster
75 |     @return R, t in [cm]
76 |     """
77 |     R = open("{}/data/rot{}.rot".format(path, i))
78 |     R.readline()
79 |     R = np.float32(R.read().split()).reshape((3, 3))
80 | 
81 |     t = open("{}/data/tra{}.tra".format(path, i))
82 |     t.readline()
83 |     t = np.float32(t.read().split())
84 |     
85 |     return R, t
86 | 
87 | def linemod_dpt(path):
88 |     """
89 |     read a depth image
90 |     
91 |     @return uint16 image of distance in [mm]"""
92 |     dpt = open(path, "rb")
93 |     rows = np.frombuffer(dpt.read(4), dtype=np.int32)[0]
94 |     cols = np.frombuffer(dpt.read(4), dtype=np.int32)[0]
95 |     
96 |     return np.fromfile(dpt, dtype=np.uint16).reshape((rows, cols))


--------------------------------------------------------------------------------
/render/objcoord.compositor:
--------------------------------------------------------------------------------
 1 | compositor Objcoord
 2 | {
 3 |     technique
 4 |     {
 5 |         texture img_float target_width target_height PF_FLOAT32_RGBA
 6 | 
 7 |         target img_float {
 8 |             input previous
 9 |         }
10 | 
11 |         // copy something to output
12 |         target_output
13 |         {
14 |             pass render_quad
15 |             {
16 |                 material copy
17 |                 input 0 img_float
18 |             }
19 |         }
20 |     }
21 | }


--------------------------------------------------------------------------------
/render/objcoord.material:
--------------------------------------------------------------------------------
 1 | vertex_program vert glsl
 2 | {
 3 |     source objcoord.vert
 4 |     default_params
 5 |     {
 6 |         param_named_auto MVP worldviewproj_matrix	
 7 |     }
 8 | }
 9 | fragment_program frag glsl
10 | {
11 |     source vertcolor.frag
12 | }
13 | 
14 | material objcoord {
15 |     technique {
16 |         pass {
17 |             vertex_program_ref vert {}
18 |             fragment_program_ref frag {}
19 |         }
20 |     }
21 | }
22 | 
23 | material copy {
24 |     technique {
25 |         pass {
26 |             lighting off
27 |             texture_unit {
28 |                 filtering none
29 |             }
30 |         }
31 |     }
32 | }


--------------------------------------------------------------------------------
/render/objcoord.vert:
--------------------------------------------------------------------------------
 1 | #version 120
 2 | 
 3 | attribute vec4 position;
 4 | uniform mat4 MVP;
 5 | 
 6 | uniform vec3 min;
 7 | uniform vec3 extent;
 8 | 
 9 | varying vec3 color;
10 | 
11 | void main()
12 | {
13 |     color = (position.xyz - min)/extent;
14 |     gl_Position = MVP * position; // needs w for proper perspective correction
15 | }
16 | 


--------------------------------------------------------------------------------
/render/vertcolor.frag:
--------------------------------------------------------------------------------
1 | #version 120
2 | 
3 | varying vec3 color;
4 | 
5 | void main()
6 | {
7 |     gl_FragColor = vec4(color, 1.0);
8 | }
9 | 


--------------------------------------------------------------------------------
/tobrachmann.py:
--------------------------------------------------------------------------------
  1 | """
  2 | convert the data for
  3 | "Uncertainty-Driven 6D Pose Estimation of Objects and Scenes from a Single RGB Image"
  4 | 
  5 | @author: Pavel Rojtberg <https://github.com/paroj>
  6 | """
  7 | 
  8 | import os.path
  9 | import glob
 10 | import numpy as np
 11 | import cv2
 12 | 
 13 | import read
 14 | 
 15 | RENDER_OBJCOORDS = True
 16 | 
 17 | # convert from the lineMOD coordinate system to the OpenCV CS
 18 | t_LM2cv    = np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
 19 | 
 20 | # convert from OpenCV coordinate sytem to the CS used by brachmann
 21 | t_cv2brach = np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
 22 | 
 23 | # info.txt template: all units in [m]
 24 | info = """image size
 25 | 640 480
 26 | {}
 27 | rotation:
 28 | {}
 29 | center:
 30 | {}
 31 | extent:
 32 | {}
 33 | """
 34 | 
 35 | def write_info(path, obj_name, R, t, extent, ctr):
 36 |     """
 37 |     write brachmann info file, given the linemod data
 38 |     @param t: distance to center in [m]
 39 |     @param extent: extent in [mm]
 40 |     @param ctr: center of bounding box in [mm] 
 41 |     """
 42 |     # bbox center from view
 43 |     ctr = R.dot(ctr) / 10  # [mm] > [cm]
 44 |     R, t = lm2brach(R, t + ctr) # brachmann stores distance to center
 45 |     
 46 |     # write info file
 47 |     t /= 100 # [cm] > [m]
 48 |     
 49 |     extent = t_cv2brach.dot(extent)/1000 # [mm] > [m]
 50 |     
 51 |     f = open(path, "w")
 52 |     
 53 |     R_str = "\n".join([" ".join(r) for r in R.astype(str)])
 54 |     t_str = " ".join(t.astype(str))
 55 |     ext_str = " ".join(extent.astype(str))
 56 |     
 57 |     txt = info.format(obj_name, R_str, t_str, ext_str)
 58 | 
 59 |     f.write(txt)
 60 | 
 61 | def lm2brach(R, t):
 62 |     """
 63 |     convert linemod to brachmann pose format
 64 |     """
 65 |     R = t_LM2cv.dot(R)
 66 |     t = t_LM2cv.dot(t)
 67 |     
 68 |     # result may be reconstructed behind the camera
 69 |     if cv2.determinant(R) < 0:
 70 |         R *= -1
 71 |         t *= -1
 72 |     
 73 |     return t_cv2brach.dot(R.T).T, t # TODO why dont we transfrom t as well?
 74 | 
 75 | def main():
 76 |     if RENDER_OBJCOORDS:
 77 |         camera_matrix = np.array([[572.41140, 0, 325.26110], [0, 573.57043, 242.04899], [0, 0, 1]])
 78 |         
 79 |         cv2.ovis.addResourceLocation("render")
 80 |         win = cv2.ovis.createWindow("objcoord", (640, 480), 0)
 81 |         win.setCompositors(["Objcoord"])
 82 |         win.setCameraPose((0, 0, 0))
 83 |         win.setCameraIntrinsics(camera_matrix, (640, 480))
 84 |         win.setBackgroundColor((0, 0, 0, 0))
 85 |     
 86 |     # all non hidden directories
 87 |     objs = [d for d in sorted(os.listdir(".")) if os.path.isdir(d) and d[0] not in (".", "_")]
 88 |         
 89 |     for o in objs:
 90 |         pts = read.ply_vtx("{}/mesh.ply".format(o)) # [mm]
 91 |         omin = np.min(pts, axis=0)
 92 |         omax = np.max(pts, axis=0)
 93 |         
 94 |         octr = (omin + omax) * 0.5
 95 |         extent = omax - omin
 96 |     
 97 |         N = len(glob.glob("{}/data/color*.jpg".format(o)))
 98 |     
 99 |         infop = "{}/info".format(o)
100 |         depthp = "{}/depth_noseg".format(o)
101 |         rgbp = "{}/rgb_noseg".format(o)
102 |         segp = "{}/seg".format(o)
103 |         objp = "{}/obj".format(o)
104 |         
105 |         for p in [infop, depthp, rgbp, segp, objp]:
106 |             if not os.path.exists(p):
107 |                 os.mkdir(p)
108 |     
109 |         if RENDER_OBJCOORDS:
110 |             idx = read.ply_idx("{}/mesh.ply".format(o))
111 |             cv2.ovis.createTriangleMesh(o, pts.reshape(-1, 1, 3), None, idx)
112 |             win.createEntity(o, o)
113 |             win.setEntityProperty(o, cv2.ovis.ENTITY_MATERIAL, "objcoord")
114 |             cv2.ovis.setMaterialProperty("objcoord", "min", omin.tolist())
115 |             cv2.ovis.setMaterialProperty("objcoord", "extent", extent.tolist())
116 |     
117 |         for i in range(N):
118 |             dpt = read.linemod_dpt("{}/data/depth{}.dpt".format(o, i))
119 |             cv2.imwrite("{}/depth_{:05d}.png".format(depthp, i), dpt)
120 |             
121 |             # just jpg > png, doh!
122 |             color = cv2.imread("{}/data/color{}.jpg".format(o, i))        
123 |             cv2.imwrite("{}/color_{:05d}.png".format(rgbp, i), color)
124 |             
125 |             R, t = read.linemod_pose(o, i)
126 |             write_info("{}/info_{:05d}.txt".format(infop, i), o, R, t, extent, octr)
127 |             
128 |             if not RENDER_OBJCOORDS:
129 |                 continue
130 |             
131 |             win.setEntityPose(o, t * 10, R)  # [cm] > [mm] for rendering
132 |             cv2.ovis.waitKey()
133 |             img_float = win.getCompositorTexture("Objcoord", "img_float")
134 |             assert img_float.min() >= 0 and img_float.max() <= 1, "wrong object extents"
135 |             
136 |             # write mask
137 |             seg = img_float[..., 3] > 0
138 |             cv2.imwrite("{}/seg_{:05d}.png".format(segp, i), seg.astype(np.uint8) * 255)
139 | 
140 |             # write objcoords
141 |             obj = img_float[..., :3]
142 |             # objcoords centered around bbox center
143 |             obj[seg] *= extent
144 |             obj[seg] -= extent/2
145 |             
146 |             # imwrite expects bgr
147 |             obj = cv2.cvtColor(obj, cv2.COLOR_RGB2BGR).astype(np.uint16)
148 |             cv2.imwrite("{}/obj_{:05d}.png".format(objp, i), obj)                
149 | 
150 | if __name__ == "__main__":
151 |     main()
152 | 


--------------------------------------------------------------------------------