├── README.md ├── process_BundleTrack_data.py └── process_data.py /README.md: -------------------------------------------------------------------------------- 1 | # ObjTracking 2 | Please check out the HOI4D Challenge on the latest project website www.hoi4d.top ! 3 | ## Overview 4 | This code base provides a benchmark for the HOI4D challenge object tracking task, and we provide the sript to preprocess dataset for Bundletrack, which is a sota categoray-level object pose tracking method. 5 | ## Challege 6 | For this challege, you need submmit a pred.npy file(your predicted results) to the leaderboard. The file pred.npy is a ndarray which is the prediction of test_wolabel.h5. You can download the example here: [Link](https://1drv.ms/u/s!ApQF_e_bw-USgjQCKg9hGJIijeqs?e=eGfohd) 7 | 8 | ## Usage 9 | After you process the data using our sripts, you can easily run [BundleTrack](https://github.com/wenbowen123/BundleTrack). 10 | ## Citation 11 | ``` 12 | @InProceedings{Liu_2022_CVPR, 13 | author = {Liu, Yunze and Liu, Yun and Jiang, Che and Lyu, Kangbo and Wan, Weikang and Shen, Hao and Liang, Boqiang and Fu, Zhoujie and Wang, He and Yi, Li}, 14 | title = {HOI4D: A 4D Egocentric Dataset for Category-Level Human-Object Interaction}, 15 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 16 | month = {June}, 17 | year = {2022}, 18 | pages = {21013-21022} 19 | } 20 | ``` 21 | ``` 22 | @inproceedings{wen2021bundletrack, 23 | title={BundleTrack: 6D Pose Tracking for Novel Objects without Instance or Category-Level 3D Models}, 24 | author={Wen, B and Bekris, Kostas E}, 25 | booktitle={IEEE/RSJ International Conference on Intelligent Robots and Systems}, 26 | year={2021} 27 | } 28 | ``` 29 | 30 | -------------------------------------------------------------------------------- /process_BundleTrack_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import cv2 4 | from process_data import shift_mask, get_specific_semantic_mask, read_pose_from_json 5 | 6 | 7 | def get_video_names(dataset_dir, category): 8 | video_names = [] 9 | for ZY in os.listdir(dataset_dir): 10 | p1 = os.path.join(dataset_dir, ZY) 11 | if not "ZY" in ZY: 12 | continue 13 | for H in os.listdir(p1): 14 | p2 = os.path.join(p1, H) 15 | if not "H" in H: 16 | continue 17 | p3 = os.path.join(p2, category) 18 | if not os.path.isdir(p3): 19 | continue 20 | for N in os.listdir(p3): 21 | p4 = os.path.join(p3, N) 22 | if not "N" in N: 23 | continue 24 | for S in os.listdir(p4): 25 | p5 = os.path.join(p4, S) 26 | if not "S" in S: 27 | continue 28 | for s in os.listdir(p5): 29 | p6 = os.path.join(p5, s) 30 | if not "s" in s: 31 | continue 32 | for T in os.listdir(p6): 33 | p7 = os.path.join(p6, T) 34 | if not "T" in T: 35 | continue 36 | if ZY == "ZY20210800001": 37 | if not os.path.isfile(os.path.join(p7, "align_image", "0.jpg")): 38 | continue 39 | else: 40 | if not os.path.isfile(os.path.join(p7, "shift_rgb", "0.jpg")): 41 | continue 42 | flag = True 43 | for i in range(300): 44 | pose_path = os.path.join(p7, "objpose", str(i) + ".json") 45 | if not os.path.isfile(pose_path): 46 | pose_path = os.path.join(p7, "objpose", str(i).zfill(5) + ".json") 47 | if not os.path.isfile(pose_path): 48 | flag = False 49 | break 50 | if not flag: 51 | continue 52 | video_names.append(os.path.join(ZY, H, category, N, S, s, T)) 53 | return video_names 54 | 55 | 56 | def prepare_BundleTrack_data(dataset_dir, HOI4D_Sim_dir, intrinsics_dir, video_name, save_dir): 57 | save_dir = os.path.join(save_dir, video_name.replace("/", "_")) 58 | rgb_dir = os.path.join(save_dir, "rgb") 59 | depth_dir = os.path.join(save_dir, "depth") 60 | masks_dir = os.path.join(save_dir, "masks") 61 | poses_dir = os.path.join(save_dir, "annotated_poses") 62 | os.makedirs(rgb_dir, exist_ok=True) 63 | os.makedirs(depth_dir, exist_ok=True) 64 | os.makedirs(masks_dir, exist_ok=True) 65 | os.makedirs(poses_dir, exist_ok=True) 66 | 67 | p1 = video_name.find("ZY") 68 | p2 = video_name[p1:].find("/") 69 | ZY = video_name[p1:p1+p2] 70 | intrinsic = np.load(os.path.join(intrinsics_dir, ZY, "intrin.npy")) 71 | np.savetxt(os.path.join(save_dir, "cam_K.txt"), intrinsic) 72 | 73 | for i in range(300): 74 | ''' 75 | # HOI4D_Sim -> HOI4D_BundleTrack 76 | rgb_path = os.path.join(HOI4D_Sim_dir, video_name, "rgb", str(i).zfill(5) + ".png") 77 | depth_path = os.path.join(HOI4D_Sim_dir, video_name, "depth", str(i).zfill(5) + ".png") 78 | mask_path = os.path.join(HOI4D_Sim_dir, video_name, "2Dseg", str(i).zfill(5) + ".png") 79 | rgb = cv2.imread(rgb_path, cv2.IMREAD_UNCHANGED) 80 | depth = cv2.imread(depth_path, cv2.CV_16UC1) 81 | mask = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED) 82 | ''' 83 | # HOI4D_overall -> HOI4D_BundleTrack 84 | if ZY == "ZY20210800001": 85 | rgb_path = os.path.join(dataset_dir, video_name, "align_image", str(i) + ".jpg") 86 | else: 87 | rgb_path = os.path.join(dataset_dir, video_name, "shift_rgb", str(i) + ".jpg") 88 | depth_path = os.path.join(dataset_dir, video_name, "align_depth", str(i) + ".png") 89 | mask_path = os.path.join(dataset_dir, video_name, "refine_2Dseg", "mask", str(i).zfill(5) + ".png") # unshifted 90 | rgb = cv2.imread(rgb_path, cv2.IMREAD_UNCHANGED) 91 | depth = cv2.imread(depth_path, cv2.CV_16UC1) 92 | mask = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)[:, :, ::-1] # unshifted 93 | mask = shift_mask(mask, ZY) # shifted 94 | 95 | object_mask = get_specific_semantic_mask(mask, label=1).astype(np.uint8) 96 | 97 | cv2.imwrite(os.path.join(rgb_dir, str(i).zfill(4) + ".png"), rgb) 98 | cv2.imwrite(os.path.join(depth_dir, str(i).zfill(4) + ".png"), depth) 99 | cv2.imwrite(os.path.join(masks_dir, str(i).zfill(4) + ".png"), object_mask) 100 | 101 | pose_path = os.path.join(dataset_dir, video_name, "objpose", str(i) + ".json") 102 | if not os.path.isfile(pose_path): 103 | pose_path = os.path.join(dataset_dir, video_name, "objpose", str(i).zfill(5) + ".json") 104 | if not os.path.isfile(pose_path): 105 | raise NotImplementedError 106 | gt_pose = read_pose_from_json(pose_path) 107 | T = np.eye(4) 108 | T[:3, :3] = gt_pose["rotation"] 109 | T[:3, 3:] = gt_pose["translation"].reshape(3, 1) 110 | np.savetxt(os.path.join(poses_dir, str(i).zfill(4) + ".txt"), T) 111 | 112 | 113 | if __name__ == "__main__": 114 | # HOI4D_Sim_dir = "/localdata_hdd1/HOI4D_Sim" 115 | HOI4D_Sim_dir = None 116 | 117 | ############ CHANGE THIS ############ 118 | category = "C13" 119 | dataset_dir = "/share/datasets/HOI4D_overall" 120 | save_dir = os.path.join("/localdata_hdd1/HOI4D_BundleTrack", category) 121 | intrinsics_dir = "/share/datasets/HOI4D_intrinsics" 122 | ######################## 123 | 124 | video_names = get_video_names(dataset_dir, category) 125 | 126 | wr = open(category + ".txt", "w") 127 | for vn in video_names: 128 | wr.write(vn + "\n") 129 | wr.close() 130 | 131 | print("len(video_names) = ", len(video_names)) 132 | for video_name in video_names: 133 | prepare_BundleTrack_data(dataset_dir, HOI4D_Sim_dir, intrinsics_dir, video_name, save_dir) 134 | -------------------------------------------------------------------------------- /process_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import json 4 | import cv2 5 | import numpy as np 6 | from scipy.spatial.transform import Rotation as Rt 7 | import open3d as o3d 8 | 9 | 10 | def get_color_map(N=256): 11 | def bitget(byteval, idx): 12 | return ((byteval & (1 << idx)) != 0) 13 | 14 | cmap = np.zeros((N, 3), dtype=np.uint8) 15 | for i in range(N): 16 | r = g = b = 0 17 | c = i 18 | for j in range(8): 19 | r = r | (bitget(c, 0) << 7-j) 20 | g = g | (bitget(c, 1) << 7-j) 21 | b = b | (bitget(c, 2) << 7-j) 22 | c = c >> 3 23 | 24 | cmap[i] = np.array([r, g, b]) 25 | 26 | return cmap 27 | 28 | 29 | def shift_mask(mask, ZY): 30 | if ZY == "ZY20210800001": 31 | dx, dy = 0, 0 32 | elif (ZY == "ZY20210800002") or (ZY == "ZY20210800004"): 33 | dx, dy = 15, -3 34 | elif ZY == "ZY20210800003": 35 | dx, dy = 15, -30 36 | else: 37 | raise NotImplementedError 38 | 39 | rows, cols, _ = mask.shape 40 | MAT = np.float32([[1, 0, dx], [0, 1, dy]]) 41 | dst = cv2.warpAffine(mask, MAT, (cols, rows)) 42 | return dst 43 | 44 | 45 | def read_pose_from_json(pose_path, num=0): 46 | raw_data = json.load(open(pose_path, "r")) 47 | if "dataList" in raw_data: 48 | raw_pose = raw_data["dataList"][num] 49 | else: 50 | raw_pose = raw_data["objects"][num] 51 | 52 | translation, rotation, scale = raw_pose["center"], raw_pose["rotation"], raw_pose["dimensions"] 53 | translation = np.float32([translation["x"], translation["y"], translation["z"]]) 54 | rotation = np.float32([rotation["x"], rotation["y"], rotation["z"]]) 55 | rotation = Rt.from_euler('XYZ', rotation).as_matrix() 56 | scale = np.float32([scale["length"], scale["width"], scale["height"]]) 57 | 58 | pose = { 59 | "translation": translation.reshape(3, 1), 60 | "rotation": rotation, 61 | "scale": scale.reshape(3, 1), 62 | } 63 | return pose 64 | 65 | 66 | def read_poses_from_pkl(poses_path): 67 | raw_poses = pickle.load(open(poses_path, "rb")) 68 | return raw_poses 69 | 70 | 71 | def get_specific_semantic_mask(mask, label=0): 72 | color_map = get_color_map(N=10) 73 | c = color_map[label] 74 | specific_mask = (mask[..., 0] == c[0]) & (mask[..., 1] == c[1]) & (mask[..., 2] == c[2]) 75 | return specific_mask 76 | 77 | 78 | def read_object_pcd(video_dir, idx, sampling_ratio=0.1): 79 | # read data 80 | p1 = video_dir.find("ZY") 81 | p2 = video_dir[p1:].find("/") 82 | ZY = video_dir[p1:p1+p2] 83 | if ZY == "ZY20210800001": 84 | rgb_path = os.path.join(video_dir, "align_image", str(idx) + ".jpg") 85 | else: 86 | rgb_path = os.path.join(video_dir, "shift_rgb", str(idx) + ".jpg") 87 | depth_path = os.path.join(video_dir, "align_depth", str(idx) + ".png") 88 | mask_path = os.path.join(video_dir, "refine_2Dseg", "mask", str(idx).zfill(5) + ".png") # unshifted 89 | rgb = o3d.io.read_image(rgb_path) 90 | depth = o3d.io.read_image(depth_path) 91 | mask = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)[:, :, ::-1] # unshifted 92 | mask = shift_mask(mask, ZY) # shifted 93 | 94 | # read intrinsic 95 | intrinsic_dir = "/share/datasets/HOI4D_intrinsics" 96 | intrinsic = np.load(os.path.join(intrinsic_dir, ZY, "intrin.npy")) # shape = (3, 3) 97 | intrinsic = o3d.camera.PinholeCameraIntrinsic(1920, 1080, intrinsic[0, 0], intrinsic[1, 1], intrinsic[0, 2], intrinsic[1, 2]) 98 | 99 | # process data 100 | object_mask = get_specific_semantic_mask(mask, label=1) 101 | object_depth = np.float32(depth) 102 | object_depth[~object_mask] = 0 103 | object_depth = o3d.geometry.Image(object_depth) 104 | object_rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(rgb, object_depth, convert_rgb_to_intensity=False) 105 | object_pcd = o3d.geometry.PointCloud.create_from_rgbd_image(object_rgbd, intrinsic, extrinsic=np.eye(4)) 106 | object_pcd = object_pcd.voxel_down_sample(voxel_size=0.001) 107 | object_pcd, _ = object_pcd.remove_radius_outlier(nb_points=500, radius=0.03) 108 | object_pcd = object_pcd.random_down_sample(sampling_ratio=0.1) 109 | return object_pcd 110 | 111 | 112 | def read_model_pcd(model_path, N_points=10000): 113 | model_mesh = o3d.io.read_triangle_mesh(model_path) 114 | model_pcd = model_mesh.sample_points_uniformly(number_of_points=N_points, seed=0) 115 | model_points = np.float32(model_pcd.points) 116 | center = (np.min(model_points, axis=0) + np.max(model_points, axis=0)) / 2 117 | model_points -= center 118 | return model_points 119 | 120 | 121 | if __name__ == "__main__": 122 | video_dir = "/share/datasets/HOI4D_overall/ZY20210800001/H1/C12/N33/S165/s02/T1" 123 | idx = 0 124 | 125 | pose_path = os.path.join(video_dir, "objpose", str(idx) + ".json") 126 | if not os.path.isfile(pose_path): 127 | pose_path = os.path.join(video_dir, "objpose", str(idx).zfill(5) + ".json") 128 | if not os.path.isfile(pose_path): 129 | raise NotImplementedError 130 | gt_pose = read_pose_from_json(pose_path) 131 | object_pcd = read_object_pcd(video_dir, idx) 132 | 133 | object_points = np.float32(object_pcd.points) 134 | object_colors = np.float32(object_pcd.colors) 135 | object_points = (object_points - gt_pose["translation"].reshape(3)) @ gt_pose["rotation"] 136 | object_pcd = o3d.geometry.PointCloud() 137 | object_pcd.points = o3d.utility.Vector3dVector(object_points) 138 | object_pcd.colors = o3d.utility.Vector3dVector(object_colors) 139 | o3d.io.write_point_cloud("gt.ply", object_pcd) 140 | --------------------------------------------------------------------------------