├── .gitignore ├── LICENSE ├── README.md ├── assets ├── blender.png └── fuse.png ├── base_utils.py ├── blender ├── blender_utils.py ├── render_backend.py └── render_utils.py ├── config.py ├── data └── blank.blend ├── download_linemod_orig.sh ├── fuse └── fuse.py └── run.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | /data 3 | 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Generating synthetic data for PVNet 2 | 3 | ### Prepare the dataset 4 | 5 | Download the LINEMOD, which can be found at [here](https://zjueducn-my.sharepoint.com/:f:/g/personal/pengsida_zju_edu_cn/Eh27tt7m6fJNgcCKMp9BzjABRzJTju6bT2GIZzcIVGu9WA?e=vURdqJ). 6 | 7 | Download the LINEMOD_ORIG, which can be found at [here](https://zjueducn-my.sharepoint.com/:f:/g/personal/pengsida_zju_edu_cn/Eh27tt7m6fJNgcCKMp9BzjABRzJTju6bT2GIZzcIVGu9WA?e=vURdqJ). 8 | 9 | Download the SUN397 10 | 11 | ``` 12 | wget http://groups.csail.mit.edu/vision/SUN/releases/SUN2012pascalformat.tar.gz 13 | ``` 14 | 15 | ### Create the soft link 16 | 17 | Commands 18 | 19 | ``` 20 | ln -s path/to/LINEMOD ./data/LINEMOD 21 | ln -s path/to/LINEMOD_ORIG ./data/LINEMOD_ORIG 22 | ln -s path/to/SUN2012 ./data/SUN 23 | ``` 24 | 25 | ### Configure the blender 26 | 27 | Download blender-2.79a, and revise the `cfg.BLENDER_PATH` in the `config.py`. 28 | 29 | ### Run 30 | 31 | **All the Commands must be carried out in the ROOT directory.** 32 | 33 | 10000 images from blender rendering 34 | 35 | ``` 36 | python run.py --type rendering 37 | ``` 38 | 39 | 10000 images from fusion 40 | 41 | ``` 42 | python run.py --type fuse 43 | ``` 44 | 45 | ### Examples 46 | 47 | Blender rendering 48 | 49 | ![blender](./assets/blender.png) 50 | 51 | Fusion 52 | 53 | ![fusion](./assets/fuse.png) 54 | 55 | -------------------------------------------------------------------------------- /assets/blender.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju3dv/pvnet-rendering/2922b58c1c749242bb9a07e7ce6067d582b353a2/assets/blender.png -------------------------------------------------------------------------------- /assets/fuse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju3dv/pvnet-rendering/2922b58c1c749242bb9a07e7ce6067d582b353a2/assets/fuse.png -------------------------------------------------------------------------------- /base_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | import lmdb 4 | import numpy as np 5 | import os 6 | 7 | from PIL import Image 8 | from plyfile import PlyData 9 | 10 | from config import cfg 11 | from transforms3d.euler import mat2euler 12 | 13 | import pickle 14 | 15 | 16 | def read_pickle(pkl_path): 17 | with open(pkl_path, 'rb') as f: 18 | return pickle.load(f) 19 | 20 | 21 | def save_pickle(data, pkl_path): 22 | os.system('mkdir -p {}'.format(os.path.dirname(pkl_path))) 23 | with open(pkl_path, 'wb') as f: 24 | pickle.dump(data, f) 25 | 26 | 27 | def read_pose(rot_path, tra_path): 28 | rot = np.loadtxt(rot_path, skiprows=1) 29 | tra = np.loadtxt(tra_path, skiprows=1) / 100. 30 | return np.concatenate([rot, np.reshape(tra, newshape=[3, 1])], axis=-1) 31 | 32 | 33 | class ModelAligner(object): 34 | rotation_transform = np.array([[1., 0., 0.], 35 | [0., -1., 0.], 36 | [0., 0., -1.]]) 37 | translation_transforms = { 38 | # 'cat': np.array([-0.00577495, -0.01259045, -0.04062323]) 39 | } 40 | intrinsic_matrix = { 41 | 'linemod': np.array([[572.4114, 0., 325.2611], 42 | [0., 573.57043, 242.04899], 43 | [0., 0., 1.]]), 44 | # 'blender': np.array([[280.0, 0.0, 128.0], 45 | # [0.0, 280.0, 128.0], 46 | # [0.0, 0.0, 1.0]]), 47 | 'blender': np.array([[700., 0., 320.], 48 | [0., 700., 240.], 49 | [0., 0., 1.]]) 50 | } 51 | 52 | def __init__(self, class_type='cat'): 53 | self.class_type = class_type 54 | self.blender_model_path = os.path.join(cfg.LINEMOD,'{}/{}.ply'.format(class_type, class_type)) 55 | self.orig_model_path = os.path.join(cfg.LINEMOD_ORIG,'{}/mesh.ply'.format(class_type)) 56 | self.orig_old_model_path = os.path.join(cfg.LINEMOD_ORIG,'{}/OLDmesh.ply'.format(class_type)) 57 | self.transform_dat_path = os.path.join(cfg.LINEMOD_ORIG,'{}/transform.dat'.format(class_type)) 58 | 59 | self.R_p2w,self.t_p2w,self.s_p2w=self.setup_p2w_transform() 60 | 61 | @staticmethod 62 | def setup_p2w_transform(): 63 | transform1 = np.array([[0.161513626575, -0.827108919621, 0.538334608078, -0.245206743479], 64 | [-0.986692547798, -0.124983474612, 0.104004733264, -0.050683632493], 65 | [-0.018740313128, -0.547968924046, -0.836288750172, 0.387638419867]]) 66 | transform2 = np.array([[0.976471602917, 0.201606079936, -0.076541729271, -0.000718327821], 67 | [-0.196746662259, 0.978194475174, 0.066531419754, 0.000077120210], 68 | [0.088285841048, -0.049906700850, 0.994844079018, -0.001409600372]]) 69 | 70 | R1 = transform1[:, :3] 71 | t1 = transform1[:, 3] 72 | R2 = transform2[:, :3] 73 | t2 = transform2[:, 3] 74 | 75 | # printer system to world system 76 | t_p2w = np.dot(R2, t1) + t2 77 | R_p2w = np.dot(R2, R1) 78 | s_p2w = 0.85 79 | return R_p2w,t_p2w,s_p2w 80 | 81 | def pose_p2w(self,RT): 82 | t,R=RT[:,3],RT[:,:3] 83 | R_w2c=np.dot(R, self.R_p2w.T) 84 | t_w2c=-np.dot(R_w2c,self.t_p2w)+self.s_p2w*t 85 | return np.concatenate([R_w2c,t_w2c[:,None]],1) 86 | 87 | @staticmethod 88 | def load_ply_model(model_path): 89 | ply = PlyData.read(model_path) 90 | data = ply.elements[0].data 91 | x = data['x'] 92 | y = data['y'] 93 | z = data['z'] 94 | return np.stack([x, y, z], axis=-1) 95 | 96 | def read_transform_dat(self): 97 | transform_dat = np.loadtxt(self.transform_dat_path, skiprows=1)[:, 1] 98 | transform_dat = np.reshape(transform_dat, newshape=[3, 4]) 99 | return transform_dat 100 | 101 | def load_orig_model(self): 102 | if os.path.exists(self.orig_model_path): 103 | return self.load_ply_model(self.orig_model_path) / 1000. 104 | else: 105 | transform = self.read_transform_dat() 106 | old_model = self.load_ply_model(self.orig_old_model_path) / 1000. 107 | old_model = np.dot(old_model, transform[:, :3].T) + transform[:, 3] 108 | return old_model 109 | 110 | def get_translation_transform(self): 111 | if self.class_type in self.translation_transforms: 112 | return self.translation_transforms[self.class_type] 113 | 114 | blender_model = self.load_ply_model(self.blender_model_path) 115 | orig_model = self.load_orig_model() 116 | blender_model = np.dot(blender_model, self.rotation_transform.T) 117 | translation_transform = np.mean(orig_model, axis=0) - np.mean(blender_model, axis=0) 118 | self.translation_transforms[self.class_type] = translation_transform 119 | 120 | return translation_transform 121 | 122 | def align_model(self): 123 | blender_model = self.load_ply_model(self.blender_model_path) 124 | orig_model = self.load_orig_model() 125 | blender_model = np.dot(blender_model, self.rotation_transform.T) 126 | blender_model += (np.mean(orig_model, axis=0) - np.mean(blender_model, axis=0)) 127 | np.savetxt(os.path.join(cfg.DATA_DIR, 'blender_model.txt'), blender_model) 128 | np.savetxt(os.path.join(cfg.DATA_DIR, 'orig_model.txt'), orig_model) 129 | 130 | def project_model(self, model, pose, camera_type): 131 | camera_points_3d = np.dot(model, pose[:, :3].T) + pose[:, 3] 132 | camera_points_3d = np.dot(camera_points_3d, self.intrinsic_matrix[camera_type].T) 133 | return camera_points_3d[:, :2] / camera_points_3d[:, 2:] 134 | 135 | def validate(self, idx): 136 | model = self.load_ply_model(self.blender_model_path) 137 | pose = read_pickle('/home/pengsida/Datasets/LINEMOD/renders/{}/{}_RT.pkl'.format(self.class_type, idx))['RT'] 138 | model_2d = self.project_model(model, pose, 'blender') 139 | img = np.array(Image.open('/home/pengsida/Datasets/LINEMOD/renders/{}/{}.jpg'.format(self.class_type, idx))) 140 | 141 | import matplotlib.pyplot as plt 142 | plt.imshow(img) 143 | plt.plot(model_2d[:, 0], model_2d[:, 1], 'r.') 144 | plt.show() 145 | 146 | 147 | class PoseTransformer(object): 148 | rotation_transform = np.array([[1., 0., 0.], 149 | [0., -1., 0.], 150 | [0., 0., -1.]]) 151 | translation_transforms = {} 152 | class_type_to_number = { 153 | 'ape': '001', 154 | 'can': '004', 155 | 'cat': '005', 156 | 'driller': '006', 157 | 'duck': '007', 158 | 'eggbox': '008', 159 | 'glue': '009', 160 | 'holepuncher': '010' 161 | } 162 | blender_models={} 163 | 164 | def __init__(self, class_type): 165 | self.class_type = class_type 166 | self.blender_model_path = os.path.join(cfg.LINEMOD,'{}/{}.ply'.format(class_type, class_type)) 167 | self.orig_model_path = os.path.join(cfg.LINEMOD_ORIG,'{}/mesh.ply'.format(class_type)) 168 | self.xyz_pattern = os.path.join(cfg.OCCLUSION_LINEMOD,'models/{}/{}.xyz') 169 | self.model_aligner = ModelAligner(class_type) 170 | 171 | def orig_pose_to_blender_pose(self, pose): 172 | rot, tra = pose[:, :3], pose[:, 3] 173 | tra = tra + np.dot(rot, self.model_aligner.get_translation_transform()) 174 | rot = np.dot(rot, self.rotation_transform) 175 | return np.concatenate([rot, np.reshape(tra, newshape=[3, 1])], axis=-1) 176 | 177 | @staticmethod 178 | def blender_pose_to_blender_euler(pose): 179 | euler = [r / np.pi * 180 for r in mat2euler(pose, axes='szxz')] 180 | euler[0] = -(euler[0] + 90) % 360 181 | euler[1] = euler[1] - 90 182 | return np.array(euler) 183 | 184 | def orig_pose_to_blender_euler(self, pose): 185 | blender_pose = self.orig_pose_to_blender_pose(pose) 186 | return self.blender_pose_to_blender_euler(blender_pose) 187 | 188 | @staticmethod 189 | def load_ply_model(model_path): 190 | ply = PlyData.read(model_path) 191 | data = ply.elements[0].data 192 | x = data['x'] 193 | y = data['y'] 194 | z = data['z'] 195 | return np.stack([x, y, z], axis=-1) 196 | 197 | def get_blender_model(self): 198 | if self.class_type in self.blender_models: 199 | return self.blender_models[self.class_type] 200 | 201 | blender_model = self.load_ply_model(self.blender_model_path.format(self.class_type, self.class_type)) 202 | self.blender_models[self.class_type] = blender_model 203 | 204 | return blender_model 205 | 206 | def get_translation_transform(self): 207 | if self.class_type in self.translation_transforms: 208 | return self.translation_transforms[self.class_type] 209 | 210 | model = self.get_blender_model() 211 | xyz = np.loadtxt(self.xyz_pattern.format( 212 | self.class_type.title(), self.class_type_to_number[self.class_type])) 213 | rotation = np.array([[0., 0., 1.], 214 | [1., 0., 0.], 215 | [0., 1., 0.]]) 216 | xyz = np.dot(xyz, rotation.T) 217 | translation_transform = np.mean(xyz, axis=0) - np.mean(model, axis=0) 218 | self.translation_transforms[self.class_type] = translation_transform 219 | 220 | return translation_transform 221 | 222 | def occlusion_pose_to_blender_pose(self, pose): 223 | rot, tra = pose[:, :3], pose[:, 3] 224 | rotation = np.array([[0., 1., 0.], 225 | [0., 0., 1.], 226 | [1., 0., 0.]]) 227 | rot = np.dot(rot, rotation) 228 | 229 | tra[1:] *= -1 230 | translation_transform = np.dot(rot, self.get_translation_transform()) 231 | rot[1:] *= -1 232 | translation_transform[1:] *= -1 233 | tra += translation_transform 234 | pose = np.concatenate([rot, np.reshape(tra, newshape=[3, 1])], axis=-1) 235 | 236 | return pose 237 | 238 | 239 | class Projector(object): 240 | intrinsic_matrix = { 241 | 'linemod': np.array([[572.4114, 0., 325.2611], 242 | [0., 573.57043, 242.04899], 243 | [0., 0., 1.]]), 244 | 'blender': np.array([[700., 0., 320.], 245 | [0., 700., 240.], 246 | [0., 0., 1.]]), 247 | 'pascal': np.asarray([[-3000.0, 0.0, 0.0], 248 | [0.0, 3000.0, 0.0], 249 | [0.0, 0.0, 1.0]]) 250 | } 251 | 252 | def project(self,pts_3d,RT,K_type): 253 | pts_2d=np.matmul(pts_3d,RT[:,:3].T)+RT[:,3:].T 254 | pts_2d=np.matmul(pts_2d,self.intrinsic_matrix[K_type].T) 255 | pts_2d=pts_2d[:,:2]/pts_2d[:,2:] 256 | return pts_2d 257 | 258 | def project_h(self,pts_3dh,RT,K_type): 259 | ''' 260 | 261 | :param pts_3dh: [n,4] 262 | :param RT: [3,4] 263 | :param K_type: 264 | :return: [n,3] 265 | ''' 266 | K=self.intrinsic_matrix[K_type] 267 | return np.matmul(np.matmul(pts_3dh,RT.transpose()),K.transpose()) 268 | 269 | def project_pascal(self,pts_3d,RT,principle): 270 | ''' 271 | 272 | :param pts_3d: [n,3] 273 | :param principle: [2,2] 274 | :return: 275 | ''' 276 | K=self.intrinsic_matrix['pascal'].copy() 277 | K[:2,2]=principle 278 | cam_3d=np.matmul(pts_3d,RT[:,:3].T)+RT[:,3:].T 279 | cam_3d[np.abs(cam_3d[:,2])<1e-5,2]=1e-5 # revise depth 280 | pts_2d=np.matmul(cam_3d,K.T) 281 | pts_2d=pts_2d[:,:2]/pts_2d[:,2:] 282 | return pts_2d, cam_3d 283 | 284 | def project_pascal_h(self, pts_3dh,RT,principle): 285 | K=self.intrinsic_matrix['pascal'].copy() 286 | K[:2,2]=principle 287 | return np.matmul(np.matmul(pts_3dh,RT.transpose()),K.transpose()) 288 | 289 | @staticmethod 290 | def project_K(pts_3d,RT,K): 291 | pts_2d=np.matmul(pts_3d,RT[:,:3].T)+RT[:,3:].T 292 | pts_2d=np.matmul(pts_2d,K.T) 293 | pts_2d=pts_2d[:,:2]/pts_2d[:,2:] 294 | return pts_2d 295 | 296 | 297 | def randomly_read_background(): 298 | background_lmdb = lmdb.open(os.path.join(cfg.YCB, 'background'), max_readers=1, 299 | readonly=True, lock=False, readahead=False, meminit=False) 300 | with background_lmdb.begin() as txn: 301 | background_len = txn.stat()['entries'] 302 | background_idx=np.random.randint(0,background_len) 303 | with background_lmdb.begin(write=False) as txn: 304 | background=cv2.imdecode(np.fromstring( 305 | txn.get('{:08d}'.format(background_idx).encode('ascii')),np.uint8),cv2.IMREAD_COLOR) 306 | 307 | return background 308 | 309 | def vertex_layer_reshape(vertex): 310 | b,vn,h,w=vertex.shape 311 | vertex=vertex.permute(0,2,3,1) 312 | vn//=2 313 | vertex=vertex.view(b,h,w,vn,2) 314 | return vertex 315 | 316 | def mask_depth_to_point_cloud(mask,depth,K): 317 | ys, xs=np.nonzero(mask) 318 | dpts=depth[ys,xs] 319 | xs,ys=np.asarray(xs,np.float32),np.asarray(ys,np.float32) 320 | xys=np.concatenate([xs[:,None],ys[:,None]],1) 321 | xys*=dpts[:,None] 322 | xyds=np.concatenate([xys,dpts[:,None]],1) 323 | pts=np.matmul(xyds,np.linalg.inv(K).transpose()) 324 | return pts 325 | 326 | def mask_depth_to_pts(mask,depth,K,output_2d=False): 327 | hs,ws=np.nonzero(mask) 328 | pts_2d=np.asarray([ws,hs],np.float32).transpose() 329 | depth=depth[hs,ws] 330 | pts=np.asarray([ws,hs,depth],np.float32).transpose() 331 | pts[:,:2]*=pts[:,2:] 332 | if output_2d: 333 | return np.dot(pts,np.linalg.inv(K).transpose()), pts_2d 334 | else: 335 | return np.dot(pts,np.linalg.inv(K).transpose()) 336 | -------------------------------------------------------------------------------- /blender/blender_utils.py: -------------------------------------------------------------------------------- 1 | import bpy 2 | from mathutils import Matrix 3 | import numpy as np 4 | 5 | 6 | # we could also define the camera matrix 7 | # https://blender.stackexchange.com/questions/38009/3x4-camera-matrix-from-blender-camera 8 | def get_calibration_matrix_K_from_blender(camera): 9 | f_in_mm = camera.lens 10 | scene = bpy.context.scene 11 | resolution_x_in_px = scene.render.resolution_x 12 | resolution_y_in_px = scene.render.resolution_y 13 | scale = scene.render.resolution_percentage / 100 14 | sensor_width_in_mm = camera.sensor_width 15 | sensor_height_in_mm = camera.sensor_height 16 | pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y 17 | 18 | if camera.sensor_fit == 'VERTICAL': 19 | # the sensor height is fixed (sensor fit is horizontal), 20 | # the sensor width is effectively changed with the pixel aspect ratio 21 | s_u = resolution_x_in_px * scale / sensor_width_in_mm / pixel_aspect_ratio 22 | s_v = resolution_y_in_px * scale / sensor_height_in_mm 23 | else: # 'HORIZONTAL' and 'AUTO' 24 | # the sensor width is fixed (sensor fit is horizontal), 25 | # the sensor height is effectively changed with the pixel aspect ratio 26 | s_u = resolution_x_in_px * scale / sensor_width_in_mm 27 | s_v = resolution_y_in_px * scale * pixel_aspect_ratio / sensor_height_in_mm 28 | 29 | # Parameters of intrinsic calibration matrix K 30 | alpha_u = f_in_mm * s_u 31 | alpha_v = f_in_mm * s_u 32 | u_0 = resolution_x_in_px * scale / 2 33 | v_0 = resolution_y_in_px * scale / 2 34 | skew = 0 # only use rectangular pixels 35 | 36 | K = Matrix(((alpha_u, skew, u_0), 37 | (0, alpha_v, v_0), 38 | (0, 0, 1))) 39 | 40 | return K 41 | 42 | 43 | # Returns camera rotation and translation matrices from Blender. 44 | # 45 | # There are 3 coordinate systems involved: 46 | # 1. The World coordinates: "world" 47 | # - right-handed 48 | # 2. The Blender camera coordinates: "bcam" 49 | # - x is horizontal 50 | # - y is up 51 | # - right-handed: negative z look-at direction 52 | # 3. The desired computer vision camera coordinates: "cv" 53 | # - x is horizontal 54 | # - y is down (to align to the actual pixel coordinates 55 | # used in digital images) 56 | # - right-handed: positive z look-at direction 57 | def get_3x4_RT_matrix_from_blender(camera): 58 | # bcam stands for blender camera 59 | R_bcam2cv = Matrix( 60 | ((1, 0, 0), 61 | (0, -1, 0), 62 | (0, 0, -1))) 63 | 64 | # Use matrix_world instead to account for all constraints 65 | location, rotation = camera.matrix_world.decompose()[0:2] 66 | R_world2bcam = rotation.to_matrix().transposed() 67 | 68 | # Convert camera location to translation vector used in coordinate changes 69 | # Use location from matrix_world to account for constraints: 70 | T_world2bcam = -1 * R_world2bcam * location 71 | 72 | # Build the coordinate transform matrix from world to computer vision camera 73 | R_world2cv = R_bcam2cv * R_world2bcam 74 | T_world2cv = R_bcam2cv * T_world2bcam 75 | 76 | # put into 3x4 matrix 77 | RT = Matrix((R_world2cv[0][:] + (T_world2cv[0],), 78 | R_world2cv[1][:] + (T_world2cv[1],), 79 | R_world2cv[2][:] + (T_world2cv[2],))) 80 | return RT 81 | 82 | 83 | def get_3x4_P_matrix_from_blender(camera): 84 | K = get_calibration_matrix_K_from_blender(camera.data) 85 | RT = get_3x4_RT_matrix_from_blender(camera) 86 | return K*RT 87 | 88 | 89 | def get_K_P_from_blender(camera): 90 | K = get_calibration_matrix_K_from_blender(camera.data) 91 | RT = get_3x4_RT_matrix_from_blender(camera) 92 | return {"K": np.asarray(K, dtype=np.float32), "RT": np.asarray(RT, dtype=np.float32)} 93 | -------------------------------------------------------------------------------- /blender/render_backend.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import math 4 | import bpy 5 | import numpy as np 6 | import sys 7 | from transforms3d.euler import euler2mat 8 | import itertools 9 | import glob 10 | 11 | UTILS_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | LIB_DIR = os.path.dirname(UTILS_DIR) 13 | ROOT_DIR = os.path.dirname(LIB_DIR) 14 | 15 | sys.path.append(UTILS_DIR) 16 | sys.path.append(LIB_DIR) 17 | sys.path.append(ROOT_DIR) 18 | 19 | from config import cfg 20 | from blender.blender_utils import get_K_P_from_blender, get_3x4_P_matrix_from_blender 21 | import pickle 22 | import time 23 | 24 | 25 | def parse_argument(): 26 | parser = argparse.ArgumentParser(description='Renders given obj file by rotation a camera around it.') 27 | parser.add_argument('--input', type=str, default='./doc/car/models/model_normalized.obj', 28 | help='The cad model to be rendered') 29 | parser.add_argument('--output_dir', type=str, default='/tmp', 30 | help='The directory of the output 2d image.') 31 | parser.add_argument('--bg_imgs', type=str, default='/tmp', 32 | help='Names of background images stored in a .npy file.') 33 | parser.add_argument('--poses_path', type=str, default='/tmp', 34 | help='6d poses(azimuth, euler, theta, x, y, z) stored in a .npy file.') 35 | parser.add_argument('--use_cycles', type=str, default='False', 36 | help='Decide whether to use cycles render or not.') 37 | parser.add_argument('--azi', type=float, default=0.0, 38 | help='Azimuth of camera.') 39 | parser.add_argument('--ele', type=float, default=0.0, 40 | help='Elevation of camera.') 41 | parser.add_argument('--theta', type=float, default=0.0, 42 | help='In-plane rotation angle of camera.') 43 | parser.add_argument('--height', type=float, default=0.0, 44 | help='Location z of plane.') 45 | 46 | argv = sys.argv[sys.argv.index("--") + 1:] 47 | args = parser.parse_args(argv) 48 | 49 | return args 50 | 51 | 52 | def place_camera(cam, dis, azi, ele): 53 | azi = azi + 90 54 | azi = math.radians(azi) 55 | ele = math.radians(ele) 56 | cam.location = (dis * math.cos(ele) * math.cos(azi), dis * math.cos(ele) * math.sin(azi), dis * math.sin(ele)) 57 | 58 | 59 | def obj_location(dist, azi, ele): 60 | ele = math.radians(ele) 61 | azi = math.radians(azi) 62 | x = dist * math.cos(azi) * math.cos(ele) 63 | y = dist * math.sin(azi) * math.cos(ele) 64 | z = dist * math.sin(ele) 65 | return x, y, z 66 | 67 | 68 | def setup_light(scene): 69 | bpy.ops.object.select_by_type(type='LAMP') 70 | bpy.ops.object.delete(use_global=False) 71 | 72 | for i in range(2): 73 | azi = np.random.uniform(0, 360) 74 | ele = np.random.uniform(0, 40) 75 | dist = np.random.uniform(1, 2) 76 | x, y, z = obj_location(dist, azi, ele) 77 | lamp_name = 'Lamp{}'.format(i) 78 | lamp_data = bpy.data.lamps.new(name=lamp_name, type='POINT') 79 | lamp_data.energy = np.random.uniform(0.5, 2) 80 | lamp = bpy.data.objects.new(name=lamp_name, object_data=lamp_data) 81 | lamp.location = (x, y, z) 82 | scene.objects.link(lamp) 83 | 84 | 85 | def setup(): 86 | bpy.ops.object.select_all(action='TOGGLE') 87 | camera = bpy.data.objects['Camera'] 88 | bpy.data.cameras['Camera'].clip_end = 10000 89 | 90 | # configure rendered image's parameters 91 | bpy.context.scene.render.resolution_x = cfg.WIDTH 92 | bpy.context.scene.render.resolution_y = cfg.HEIGHT 93 | bpy.context.scene.render.resolution_percentage = 100 94 | bpy.context.scene.render.alpha_mode = 'TRANSPARENT' 95 | bpy.context.scene.render.image_settings.color_mode = 'RGBA' 96 | 97 | # modify the camera intrinsic matrix 98 | # bpy.data.cameras['Camera'].sensor_width = 39.132693723430386 99 | # bpy.context.scene.render.pixel_aspect_y = 1.6272340492401836 100 | 101 | cam_constraint = camera.constraints.new(type='TRACK_TO') 102 | cam_constraint.track_axis = 'TRACK_NEGATIVE_Z' 103 | cam_constraint.up_axis = 'UP_Y' 104 | b_empty = parent_obj_to_camera(camera) 105 | # cam_constraint.target = b_empty 106 | 107 | # composite node 108 | bpy.context.scene.use_nodes = True 109 | tree = bpy.context.scene.node_tree 110 | links = tree.links 111 | for n in tree.nodes: 112 | tree.nodes.remove(n) 113 | rl = tree.nodes.new(type="CompositorNodeRLayers") 114 | depth_file_output = tree.nodes.new(type="CompositorNodeOutputFile") 115 | depth_file_output.base_path = '' 116 | depth_file_output.format.file_format = 'OPEN_EXR' 117 | depth_file_output.format.color_depth = '32' 118 | 119 | map_node = tree.nodes.new(type="CompositorNodeMapRange") 120 | map_node.inputs[1].default_value = cfg.MIN_DEPTH 121 | map_node.inputs[2].default_value = cfg.MAX_DEPTH 122 | map_node.inputs[3].default_value = 0 123 | map_node.inputs[4].default_value = 1 124 | links.new(rl.outputs['Depth'], map_node.inputs[0]) 125 | links.new(map_node.outputs[0], depth_file_output.inputs[0]) 126 | 127 | return camera, depth_file_output 128 | 129 | 130 | def parent_obj_to_camera(b_camera): 131 | origin = (0, 0, 0) 132 | b_empty = bpy.data.objects.new("Empty", None) 133 | b_empty.location = origin 134 | b_camera.parent = b_empty # setup parenting 135 | 136 | scn = bpy.context.scene 137 | scn.objects.link(b_empty) 138 | scn.objects.active = b_empty 139 | return b_empty 140 | 141 | 142 | def quaternionFromYawPitchRoll(yaw, pitch, roll): 143 | c1 = math.cos(yaw / 2.0) 144 | c2 = math.cos(pitch / 2.0) 145 | c3 = math.cos(roll / 2.0) 146 | s1 = math.sin(yaw / 2.0) 147 | s2 = math.sin(pitch / 2.0) 148 | s3 = math.sin(roll / 2.0) 149 | q1 = c1 * c2 * c3 + s1 * s2 * s3 150 | q2 = c1 * c2 * s3 - s1 * s2 * c3 151 | q3 = c1 * s2 * c3 + s1 * c2 * s3 152 | q4 = s1 * c2 * c3 - c1 * s2 * s3 153 | return q1, q2, q3, q4 154 | 155 | 156 | def camPosToQuaternion(cx, cy, cz): 157 | q1a = 0 158 | q1b = 0 159 | q1c = math.sqrt(2) / 2 160 | q1d = math.sqrt(2) / 2 161 | camDist = math.sqrt(cx * cx + cy * cy + cz * cz) 162 | cx = cx / camDist 163 | cy = cy / camDist 164 | cz = cz / camDist 165 | t = math.sqrt(cx * cx + cy * cy) 166 | tx = cx / t 167 | ty = cy / t 168 | yaw = math.acos(ty) 169 | if tx > 0: 170 | yaw = 2 * math.pi - yaw 171 | pitch = 0 172 | tmp = min(max(tx * cx + ty * cy, -1), 1) 173 | # roll = math.acos(tx * cx + ty * cy) 174 | roll = math.acos(tmp) 175 | if cz < 0: 176 | roll = -roll 177 | print("%f %f %f" % (yaw, pitch, roll)) 178 | q2a, q2b, q2c, q2d = quaternionFromYawPitchRoll(yaw, pitch, roll) 179 | q1 = q1a * q2a - q1b * q2b - q1c * q2c - q1d * q2d 180 | q2 = q1b * q2a + q1a * q2b + q1d * q2c - q1c * q2d 181 | q3 = q1c * q2a - q1d * q2b + q1a * q2c + q1b * q2d 182 | q4 = q1d * q2a + q1c * q2b - q1b * q2c + q1a * q2d 183 | return q1, q2, q3, q4 184 | 185 | 186 | def camRotQuaternion(cx, cy, cz, theta): 187 | theta = theta / 180.0 * math.pi 188 | camDist = math.sqrt(cx * cx + cy * cy + cz * cz) 189 | cx = -cx / camDist 190 | cy = -cy / camDist 191 | cz = -cz / camDist 192 | q1 = math.cos(theta * 0.5) 193 | q2 = -cx * math.sin(theta * 0.5) 194 | q3 = -cy * math.sin(theta * 0.5) 195 | q4 = -cz * math.sin(theta * 0.5) 196 | return q1, q2, q3, q4 197 | 198 | 199 | def quaternionProduct(qx, qy): 200 | a = qx[0] 201 | b = qx[1] 202 | c = qx[2] 203 | d = qx[3] 204 | e = qy[0] 205 | f = qy[1] 206 | g = qy[2] 207 | h = qy[3] 208 | q1 = a * e - b * f - c * g - d * h 209 | q2 = a * f + b * e + c * h - d * g 210 | q3 = a * g - b * h + c * e + d * f 211 | q4 = a * h + b * g - c * f + d * e 212 | return q1, q2, q3, q4 213 | 214 | 215 | def obj_centened_camera_pos(dist, azimuth_deg, elevation_deg): 216 | phi = float(elevation_deg) / 180 * math.pi 217 | theta = float(azimuth_deg) / 180 * math.pi 218 | x = (dist * math.cos(theta) * math.cos(phi)) 219 | y = (dist * math.sin(theta) * math.cos(phi)) 220 | z = (dist * math.sin(phi)) 221 | return x, y, z 222 | 223 | 224 | def render(camera, outfile, pose): 225 | bpy.context.scene.render.filepath = outfile 226 | depth_file_output.file_slots[0].path = bpy.context.scene.render.filepath + '_depth.png' 227 | 228 | azimuth, elevation, theta = pose[:3] 229 | cx, cy, cz = obj_centened_camera_pos(cfg.cam_dist, azimuth, elevation) 230 | q1 = camPosToQuaternion(cx, cy, cz) 231 | q2 = camRotQuaternion(cx, cy, cz, theta) 232 | q = quaternionProduct(q2, q1) 233 | camera.location[0] = cx # + np.random.uniform(-cfg.pose_noise,g_camPos_noise) 234 | camera.location[1] = cy # + np.random.uniform(-g_camPos_noise,g_camPos_noise) 235 | camera.location[2] = cz # + np.random.uniform(-g_camPos_noise,g_camPos_noise) 236 | camera.rotation_mode = 'QUATERNION' 237 | 238 | camera.rotation_quaternion[0] = q[0] 239 | camera.rotation_quaternion[1] = q[1] 240 | camera.rotation_quaternion[2] = q[2] 241 | camera.rotation_quaternion[3] = q[3] 242 | # camera.location = [0, 1, 0] 243 | # camera.rotation_euler = [np.pi / 2, 0, np.pi] 244 | 245 | setup_light(bpy.context.scene) 246 | rotation_matrix = get_K_P_from_blender(camera)['RT'][:, :3] 247 | camera.location = -np.dot(rotation_matrix.T, pose[3:]) 248 | bpy.ops.render.render(write_still=True) 249 | 250 | 251 | def add_shader_on_world(): 252 | bpy.data.worlds['World'].use_nodes = True 253 | env_node = bpy.data.worlds['World'].node_tree.nodes.new(type='ShaderNodeTexEnvironment') 254 | back_node = bpy.data.worlds['World'].node_tree.nodes['Background'] 255 | bpy.data.worlds['World'].node_tree.links.new(env_node.outputs['Color'], back_node.inputs['Color']) 256 | 257 | 258 | def add_shader_on_ply_object(obj): 259 | bpy.ops.material.new() 260 | material = list(bpy.data.materials)[0] 261 | 262 | material.use_nodes = True 263 | material.node_tree.links.clear() 264 | 265 | mat_out = material.node_tree.nodes['Material Output'] 266 | diffuse_node = material.node_tree.nodes['Diffuse BSDF'] 267 | gloss_node = material.node_tree.nodes.new(type='ShaderNodeBsdfGlossy') 268 | attr_node = material.node_tree.nodes.new(type='ShaderNodeAttribute') 269 | 270 | material.node_tree.nodes.remove(diffuse_node) 271 | attr_node.attribute_name = 'Col' 272 | material.node_tree.links.new(attr_node.outputs['Color'], gloss_node.inputs['Color']) 273 | material.node_tree.links.new(gloss_node.outputs['BSDF'], mat_out.inputs['Surface']) 274 | 275 | obj.data.materials.append(material) 276 | 277 | return material 278 | 279 | 280 | def add_shader_on_obj_object(obj): 281 | bpy.ops.material.new() 282 | material = list(bpy.data.materials)[0] 283 | 284 | material.use_nodes = True 285 | material.node_tree.links.clear() 286 | 287 | mat_out = material.node_tree.nodes['Material Output'] 288 | diffuse_node = material.node_tree.nodes['Diffuse BSDF'] 289 | image_node = material.node_tree.nodes.new(type='ShaderNodeTexImage') 290 | 291 | material.node_tree.links.new(diffuse_node.outputs['BSDF'], mat_out.inputs['Surface']) 292 | material.node_tree.links.new(image_node.outputs['Color'], diffuse_node.inputs['Color']) 293 | img_path = '/home/pengsida/Datasets/YCB/models/002_master_chef_can/texture_map.png' 294 | img_name = os.path.basename(img_path) 295 | bpy.data.images.load(img_path) 296 | image_node.image = bpy.data.images[img_name] 297 | 298 | obj.data.materials.clear() 299 | obj.data.materials.append(material) 300 | 301 | return material 302 | 303 | 304 | def add_shader_on_plane(plane): 305 | bpy.ops.material.new() 306 | material = list(bpy.data.materials)[1] 307 | 308 | material.use_nodes = True 309 | material.node_tree.links.clear() 310 | 311 | mat_out = material.node_tree.nodes['Material Output'] 312 | diffuse_node = material.node_tree.nodes['Diffuse BSDF'] 313 | image_node = material.node_tree.nodes.new(type='ShaderNodeTexImage') 314 | 315 | material.node_tree.links.new(image_node.outputs['Color'], diffuse_node.inputs['Color']) 316 | material.node_tree.links.new(diffuse_node.outputs['BSDF'], mat_out.inputs['Surface']) 317 | 318 | img_path = '/home/pengsida/Pictures/board.png' 319 | img_name = os.path.basename(img_path) 320 | bpy.data.images.load(img_path) 321 | image_node.image = bpy.data.images[img_name] 322 | bpy.ops.object.mode_set(mode='EDIT') 323 | bpy.ops.uv.unwrap() 324 | bpy.ops.object.mode_set(mode='OBJECT') 325 | 326 | plane.data.materials.append(material) 327 | 328 | 329 | def set_material_node_parameters(material): 330 | nodes = material.node_tree.nodes 331 | if os.path.basename(args.input).endswith('.ply'): 332 | nodes['Glossy BSDF'].inputs['Roughness'].default_value = np.random.uniform(0.8, 1) 333 | else: 334 | nodes['Diffuse BSDF'].inputs['Roughness'].default_value = np.random.uniform(0, 1) 335 | 336 | 337 | def batch_render_with_linemod(args, camera): 338 | os.system('mkdir -p {}'.format(args.output_dir)) 339 | bpy.ops.import_mesh.ply(filepath=args.input) 340 | object = bpy.data.objects[os.path.basename(args.input).replace('.ply', '')] 341 | bpy.context.scene.render.image_settings.file_format = 'JPEG' 342 | 343 | # set up the cycles render configuration 344 | bpy.context.scene.render.engine = 'CYCLES' 345 | bpy.context.scene.cycles.sample_clamp_indirect = 1.0 346 | bpy.context.scene.cycles.blur_glossy = 3.0 347 | bpy.context.scene.cycles.samples = 100 348 | 349 | bpy.context.user_preferences.addons['cycles'].preferences.compute_device_type = "CUDA" 350 | bpy.context.scene.cycles.device = 'GPU' 351 | 352 | for mesh in bpy.data.meshes: 353 | mesh.use_auto_smooth = True 354 | 355 | add_shader_on_world() 356 | 357 | material = add_shader_on_ply_object(object) 358 | # add a plane under the object 359 | # bpy.ops.mesh.primitive_plane_add() 360 | # plane = bpy.data.objects['Plane'] 361 | # plane.location = [0, 0, args.height] 362 | # plane.scale = [0.28, 0.28, 0.28] 363 | # add_shader_on_plane(plane) 364 | 365 | bg_imgs = np.load(args.bg_imgs).astype(np.str) 366 | bg_imgs = np.random.choice(bg_imgs, size=cfg.NUM_SYN) 367 | poses = np.load(args.poses_path) 368 | begin_num_imgs = len(glob.glob(os.path.join(args.output_dir, '*.jpg'))) 369 | for i in range(begin_num_imgs, cfg.NUM_SYN): 370 | # overlay an background image and place the object 371 | img_name = os.path.basename(bg_imgs[i]) 372 | bpy.data.images.load(bg_imgs[i]) 373 | bpy.data.worlds['World'].node_tree.nodes['Environment Texture'].image = bpy.data.images[img_name] 374 | pose = poses[i] 375 | # x, y = np.random.uniform(-0.15, 0.15, size=2) 376 | x, y = 0, 0 377 | object.location = [x, y, 0] 378 | set_material_node_parameters(material) 379 | render(camera, '{}/{}'.format(args.output_dir, i), pose) 380 | object_to_world_pose = np.array([[1, 0, 0, x], 381 | [0, 1, 0, y], 382 | [0, 0, 1, 0]]) 383 | object_to_world_pose = np.append(object_to_world_pose, [[0, 0, 0, 1]], axis=0) 384 | KRT = get_K_P_from_blender(camera) 385 | world_to_camera_pose = np.append(KRT['RT'], [[0, 0, 0, 1]], axis=0) 386 | world_to_camera_pose = np.dot(world_to_camera_pose, object_to_world_pose)[:3] 387 | with open('{}/{}_RT.pkl'.format(args.output_dir, i), 'wb') as f: 388 | pickle.dump({'RT': world_to_camera_pose, 'K': KRT['K']}, f) 389 | bpy.data.images.remove(bpy.data.images[img_name]) 390 | 391 | 392 | def batch_render_ycb(args, camera): 393 | os.system('mkdir -p {}'.format(args.output_dir)) 394 | bpy.ops.import_scene.obj(filepath=args.input) 395 | object = list(bpy.data.objects)[-1] 396 | bpy.context.scene.render.image_settings.file_format = 'JPEG' 397 | 398 | # set up the cycles render configuration 399 | bpy.context.scene.render.engine = 'CYCLES' 400 | bpy.context.scene.cycles.sample_clamp_indirect = 1.0 401 | bpy.context.scene.cycles.blur_glossy = 3.0 402 | bpy.context.scene.cycles.samples = 100 403 | 404 | bpy.context.user_preferences.addons['cycles'].preferences.compute_device_type = "CUDA" 405 | bpy.context.scene.cycles.device = 'GPU' 406 | 407 | for mesh in bpy.data.meshes: 408 | mesh.use_auto_smooth = True 409 | 410 | add_shader_on_world() 411 | 412 | material = add_shader_on_obj_object(object) 413 | # add a plane under the object 414 | # bpy.ops.mesh.primitive_plane_add() 415 | # plane = bpy.data.objects['Plane'] 416 | # plane.location = [0, 0, args.height] 417 | # plane.scale = [0.28, 0.28, 0.28] 418 | # add_shader_on_plane(plane) 419 | 420 | bg_imgs = np.load(args.bg_imgs).astype(np.str) 421 | bg_imgs = np.random.choice(bg_imgs, size=cfg.NUM_SYN) 422 | poses = np.load(args.poses_path) 423 | begin_num_imgs = len(glob.glob(os.path.join(args.output_dir, '*.jpg'))) 424 | for i in range(begin_num_imgs, cfg.NUM_SYN): 425 | # overlay an background image and place the object 426 | img_name = os.path.basename(bg_imgs[i]) 427 | bpy.data.images.load(bg_imgs[i]) 428 | bpy.data.worlds['World'].node_tree.nodes['Environment Texture'].image = bpy.data.images[img_name] 429 | pose = poses[i] 430 | # x, y = np.random.uniform(-0.15, 0.15, size=2) 431 | x, y = 0, 0 432 | 433 | azi, ele, theta = (0, 0, 0) 434 | object.rotation_euler = (azi, ele, theta) 435 | azi, ele, theta = object.rotation_euler 436 | object.location = [x, y, 0] 437 | set_material_node_parameters(material) 438 | render(camera, '{}/{}'.format(args.output_dir, i), pose) 439 | 440 | rotation = euler2mat(azi, ele, theta) 441 | object_to_world_pose = np.concatenate([rotation, [[x], [y], [0]]], axis=-1) 442 | object_to_world_pose = np.append(object_to_world_pose, [[0, 0, 0, 1]], axis=0) 443 | KRT = get_K_P_from_blender(camera) 444 | world_to_camera_pose = np.append(KRT['RT'], [[0, 0, 0, 1]], axis=0) 445 | world_to_camera_pose = np.dot(world_to_camera_pose, object_to_world_pose)[:3] 446 | with open('{}/{}_RT.pkl'.format(args.output_dir, i), 'wb') as f: 447 | pickle.dump({'RT': world_to_camera_pose, 'K': KRT['K']}, f) 448 | bpy.data.images.remove(bpy.data.images[img_name]) 449 | 450 | 451 | if __name__ == '__main__': 452 | begin = time.time() 453 | args = parse_argument() 454 | camera, depth_file_output = setup() 455 | if os.path.basename(args.input).endswith('.ply'): 456 | batch_render_with_linemod(args, camera) 457 | else: 458 | batch_render_ycb(args, camera) 459 | print('cost {} s'.format(time.time() - begin)) 460 | 461 | -------------------------------------------------------------------------------- /blender/render_utils.py: -------------------------------------------------------------------------------- 1 | from config import cfg 2 | from base_utils import PoseTransformer, read_pose, read_pickle, save_pickle 3 | import os 4 | import numpy as np 5 | from transforms3d.quaternions import mat2quat 6 | import glob 7 | from PIL import Image 8 | from scipy import stats 9 | import OpenEXR 10 | import Imath 11 | from multiprocessing.dummy import Pool 12 | import struct 13 | import scipy.io as sio 14 | 15 | 16 | class DataStatistics(object): 17 | # world_to_camera_pose = np.array([[-1.19209304e-07, 1.00000000e+00, -2.98023188e-08, 1.19209304e-07], 18 | # [-8.94069672e-08, 2.22044605e-16, -1.00000000e+00, 8.94069672e-08], 19 | # [-1.00000000e+00, -8.94069672e-08, 1.19209304e-07, 1.00000000e+00]]) 20 | world_to_camera_pose = np.array([[-1.00000024e+00, -8.74227979e-08, -5.02429621e-15, 8.74227979e-08], 21 | [5.02429621e-15, 1.34358856e-07, -1.00000012e+00, -1.34358856e-07], 22 | [8.74227979e-08, -1.00000012e+00, 1.34358856e-07, 1.00000012e+00]]) 23 | 24 | def __init__(self, class_type): 25 | self.class_type = class_type 26 | self.mask_path = os.path.join(cfg.LINEMOD,'{}/mask/*.png'.format(class_type)) 27 | self.dir_path = os.path.join(cfg.LINEMOD_ORIG,'{}/data'.format(class_type)) 28 | 29 | dataset_pose_dir_path = os.path.join(cfg.DATA_DIR, 'dataset_poses') 30 | os.system('mkdir -p {}'.format(dataset_pose_dir_path)) 31 | self.dataset_poses_path = os.path.join(dataset_pose_dir_path, '{}_poses.npy'.format(class_type)) 32 | blender_pose_dir_path = os.path.join(cfg.DATA_DIR, 'blender_poses') 33 | os.system('mkdir -p {}'.format(blender_pose_dir_path)) 34 | self.blender_poses_path = os.path.join(blender_pose_dir_path, '{}_poses.npy'.format(class_type)) 35 | os.system('mkdir -p {}'.format(blender_pose_dir_path)) 36 | 37 | self.pose_transformer = PoseTransformer(class_type) 38 | 39 | def get_proper_crop_size(self): 40 | mask_paths = glob.glob(self.mask_path) 41 | widths = [] 42 | heights = [] 43 | 44 | for mask_path in mask_paths: 45 | mask = Image.open(mask_path).convert('1') 46 | mask = np.array(mask).astype(np.int32) 47 | row_col = np.argwhere(mask == 1) 48 | min_row, max_row = np.min(row_col[:, 0]), np.max(row_col[:, 0]) 49 | min_col, max_col = np.min(row_col[:, 1]), np.max(row_col[:, 1]) 50 | width = max_col - min_col 51 | height = max_row - min_row 52 | widths.append(width) 53 | heights.append(height) 54 | 55 | widths = np.array(widths) 56 | heights = np.array(heights) 57 | print('min width: {}, max width: {}'.format(np.min(widths), np.max(widths))) 58 | print('min height: {}, max height: {}'.format(np.min(heights), np.max(heights))) 59 | 60 | def get_quat_translation(self, object_to_camera_pose): 61 | object_to_camera_pose = np.append(object_to_camera_pose, [[0, 0, 0, 1]], axis=0) 62 | world_to_camera_pose = np.append(self.world_to_camera_pose, [[0, 0, 0, 1]], axis=0) 63 | object_to_world_pose = np.dot(np.linalg.inv(world_to_camera_pose), object_to_camera_pose) 64 | quat = mat2quat(object_to_world_pose[:3, :3]) 65 | translation = object_to_world_pose[:3, 3] 66 | return quat, translation 67 | 68 | def get_dataset_poses(self): 69 | if os.path.exists(self.dataset_poses_path): 70 | poses = np.load(self.dataset_poses_path) 71 | return poses[:, :3], poses[:, 3:] 72 | 73 | eulers = [] 74 | translations = [] 75 | train_set = np.loadtxt(os.path.join(cfg.LINEMOD, '{}/training_range.txt'.format(self.class_type)),np.int32) 76 | for idx in train_set: 77 | rot_path = os.path.join(self.dir_path, 'rot{}.rot'.format(idx)) 78 | tra_path = os.path.join(self.dir_path, 'tra{}.tra'.format(idx)) 79 | pose = read_pose(rot_path, tra_path) 80 | euler = self.pose_transformer.orig_pose_to_blender_euler(pose) 81 | eulers.append(euler) 82 | translations.append(pose[:, 3]) 83 | 84 | eulers = np.array(eulers) 85 | translations = np.array(translations) 86 | np.save(self.dataset_poses_path, np.concatenate([eulers, translations], axis=-1)) 87 | 88 | return eulers, translations 89 | 90 | def sample_sphere(self, num_samples): 91 | """ sample angles from the sphere 92 | reference: https://zhuanlan.zhihu.com/p/25988652?group_id=828963677192491008 93 | """ 94 | flat_objects = ['037_scissors', '051_large_clamp', '052_extra_large_clamp'] 95 | if self.class_type in flat_objects: 96 | begin_elevation = 30 97 | else: 98 | begin_elevation = 0 99 | ratio = (begin_elevation + 90) / 180 100 | num_points = int(num_samples // (1 - ratio)) 101 | phi = (np.sqrt(5) - 1.0) / 2. 102 | azimuths = [] 103 | elevations = [] 104 | for n in range(num_points - num_samples, num_points): 105 | z = 2. * n / num_points - 1. 106 | azimuths.append(np.rad2deg(2 * np.pi * n * phi % (2 * np.pi))) 107 | elevations.append(np.rad2deg(np.arcsin(z))) 108 | return np.array(azimuths), np.array(elevations) 109 | 110 | def sample_poses(self): 111 | eulers, translations = self.get_dataset_poses() 112 | num_samples = cfg.NUM_SYN 113 | azimuths, elevations = self.sample_sphere(num_samples) 114 | euler_sampler = stats.gaussian_kde(eulers.T) 115 | eulers = euler_sampler.resample(num_samples).T 116 | eulers[:, 0] = azimuths 117 | eulers[:, 1] = elevations 118 | translation_sampler = stats.gaussian_kde(translations.T) 119 | translations = translation_sampler.resample(num_samples).T 120 | np.save(self.blender_poses_path, np.concatenate([eulers, translations], axis=-1)) 121 | 122 | 123 | class YCBDataStatistics(DataStatistics): 124 | def __init__(self, class_type): 125 | super(YCBDataStatistics, self).__init__(class_type) 126 | self.dir_path = os.path.join(cfg.LINEMOD_ORIG, '{}/data'.format(class_type)) 127 | self.class_types = np.loadtxt(os.path.join(cfg.YCB, 'image_sets/classes.txt'), dtype=np.str) 128 | self.class_types = np.insert(self.class_types, 0, 'background') 129 | self.train_set = np.loadtxt(os.path.join(cfg.YCB, 'image_sets/train.txt'), dtype=np.str) 130 | self.meta_pattern = os.path.join(cfg.YCB, 'data/{}-meta.mat') 131 | self.dataset_poses_pattern = os.path.join(cfg.DATA_DIR, 'dataset_poses/{}_poses.npy') 132 | 133 | def get_dataset_poses(self): 134 | if os.path.exists(self.dataset_poses_path): 135 | poses = np.load(self.dataset_poses_pattern.format(self.class_type)) 136 | return poses[:, :3], poses[:, 3:] 137 | 138 | dataset_poses = {} 139 | for i in self.train_set: 140 | meta_path = self.meta_pattern.format(i) 141 | meta = sio.loadmat(meta_path) 142 | classes = meta['cls_indexes'].ravel() 143 | poses = meta['poses'] 144 | for idx, cls_idx in enumerate(classes): 145 | cls_poses = dataset_poses.setdefault(self.class_types[cls_idx], [[], []]) 146 | pose = poses[..., idx] 147 | euler = self.pose_transformer.blender_pose_to_blender_euler(pose) 148 | cls_poses[0].append(euler) 149 | cls_poses[1].append(pose[:, 3]) 150 | 151 | for class_type, cls_poses in dataset_poses.items(): 152 | np.save(self.dataset_poses_pattern.format(class_type), np.concatenate(cls_poses, axis=-1)) 153 | 154 | cls_poses = dataset_poses[self.class_type] 155 | eulers = np.array(cls_poses[0]) 156 | translations = np.array(cls_poses[1]) 157 | 158 | return eulers, translations 159 | 160 | 161 | class Renderer(object): 162 | intrinsic_matrix = { 163 | 'linemod': np.array([[572.4114, 0., 325.2611], 164 | [0., 573.57043, 242.04899], 165 | [0., 0., 1.]]), 166 | # 'blender': np.array([[280.0, 0.0, 128.0], 167 | # [0.0, 280.0, 128.0], 168 | # [0.0, 0.0, 1.0]]), 169 | 'blender': np.array([[700., 0., 320.], 170 | [0., 700., 240.], 171 | [0., 0., 1.]]) 172 | } 173 | 174 | def __init__(self, class_type): 175 | self.class_type = class_type 176 | self.bg_imgs_path = os.path.join(cfg.DATA_DIR, 'bg_imgs.npy') 177 | self.poses_path = os.path.join(cfg.DATA_DIR, 'blender_poses', '{}_poses.npy').format(class_type) 178 | self.output_dir_path = os.path.join(cfg.LINEMOD,'renders/{}').format(class_type) 179 | self.blender_path = cfg.BLENDER_PATH 180 | self.blank_blend = os.path.join(cfg.DATA_DIR, 'blank.blend') 181 | self.py_path = os.path.join(cfg.BLENDER_DIR, 'render_backend.py') 182 | self.obj_path = os.path.join(cfg.LINEMOD,'{}/{}.ply').format(class_type, class_type) 183 | self.plane_height_path = os.path.join(cfg.DATA_DIR, 'plane_height.pkl') 184 | 185 | def get_bg_imgs(self): 186 | if os.path.exists(self.bg_imgs_path): 187 | return 188 | 189 | img_paths = glob.glob(os.path.join(cfg.SUN, 'JPEGImages/*')) 190 | bg_imgs = [] 191 | 192 | for img_path in img_paths: 193 | img = Image.open(img_path) 194 | row, col = img.size 195 | if row > 500 and col > 500: 196 | bg_imgs.append(img_path) 197 | 198 | np.save(self.bg_imgs_path, bg_imgs) 199 | 200 | def project_model(self, model_3d, pose, camera_type): 201 | camera_model_2d = np.dot(model_3d, pose[:, :3].T) + pose[:, 3] 202 | camera_model_2d = np.dot(camera_model_2d, self.intrinsic_matrix[camera_type].T) 203 | return camera_model_2d[:, :2] / camera_model_2d[:, 2:] 204 | 205 | @staticmethod 206 | def exr_to_png(exr_path): 207 | depth_path = exr_path.replace('.png0001.exr', '.png') 208 | exr_image = OpenEXR.InputFile(exr_path) 209 | dw = exr_image.header()['dataWindow'] 210 | (width, height) = (dw.max.x - dw.min.x + 1, dw.max.y - dw.min.y + 1) 211 | 212 | def read_exr(s, width, height): 213 | mat = np.fromstring(s, dtype=np.float32) 214 | mat = mat.reshape(height, width) 215 | return mat 216 | 217 | dmap, _, _ = [read_exr(s, width, height) for s in exr_image.channels('BGR', Imath.PixelType(Imath.PixelType.FLOAT))] 218 | dmap = Image.fromarray((dmap != 1).astype(np.int32)) 219 | dmap.save(depth_path) 220 | exr_image.close() 221 | os.system('rm {}'.format(exr_path)) 222 | 223 | def sample_poses(self): 224 | statistician = DataStatistics(self.class_type) 225 | statistician.sample_poses() 226 | 227 | def get_plane_height(self): 228 | if os.path.exists(self.plane_height_path): 229 | plane_height = read_pickle(self.plane_height_path) 230 | else: 231 | plane_height = {} 232 | 233 | if self.class_type in plane_height: 234 | return plane_height[self.class_type] 235 | else: 236 | pose_transformer = PoseTransformer(self.class_type) 237 | model = pose_transformer.get_blender_model() 238 | height = np.min(model[:, -1]) 239 | plane_height[self.class_type] = height 240 | save_pickle(plane_height, self.plane_height_path) 241 | return height 242 | 243 | def run(self): 244 | """ Render images 245 | 1. prepare background images 246 | 2. sample poses from the pose distribution of training data 247 | 3. call the blender to render images 248 | """ 249 | self.get_bg_imgs() 250 | self.sample_poses() 251 | 252 | if not os.path.exists(self.output_dir_path): 253 | os.makedirs(self.output_dir_path) 254 | 255 | os.system('{} {} --background --python {} -- --input {} --output_dir {} --bg_imgs {} --poses_path {}'. 256 | format(self.blender_path, self.blank_blend, self.py_path, self.obj_path, 257 | self.output_dir_path, self.bg_imgs_path, self.poses_path)) 258 | depth_paths = glob.glob(os.path.join(self.output_dir_path, '*.exr')) 259 | for depth_path in depth_paths: 260 | self.exr_to_png(depth_path) 261 | 262 | @staticmethod 263 | def multi_thread_render(): 264 | # objects = ['ape', 'benchvise', 'bowl', 'can', 'cat', 'cup', 'driller', 'duck', 265 | # 'glue', 'holepuncher', 'iron', 'lamp', 'phone', 'cam', 'eggbox'] 266 | objects = ['lamp', 'phone'] 267 | 268 | def render(class_type): 269 | renderer = Renderer(class_type) 270 | renderer.run() 271 | 272 | with Pool(processes=2) as pool: 273 | pool.map(render, objects) 274 | 275 | 276 | class YCBRenderer(Renderer): 277 | def __init__(self, class_type): 278 | super(YCBRenderer, self).__init__(class_type) 279 | self.output_dir_path = os.path.join(cfg.YCB, 'renders/{}').format(class_type) 280 | self.blank_blend = os.path.join(cfg.DATA_DIR, 'blank.blend') 281 | self.obj_path = os.path.join(cfg.YCB, 'models', class_type, 'textured.obj') 282 | self.class_types = np.loadtxt(os.path.join(cfg.YCB, 'image_sets/classes.txt'), dtype=np.str) 283 | self.class_types = np.insert(self.class_types, 0, 'background') 284 | 285 | def sample_poses(self): 286 | statistician = YCBDataStatistics(self.class_type) 287 | statistician.sample_poses() 288 | 289 | @staticmethod 290 | def multi_thread_render(): 291 | objects = ['003_cracker_box', '004_sugar_box', '005_tomato_soup_can', '006_mustard_bottle'] 292 | 293 | def render(class_type): 294 | renderer = YCBRenderer(class_type) 295 | renderer.run() 296 | 297 | with Pool(processes=2) as pool: 298 | pool.map(render, objects) 299 | 300 | 301 | class MultiRenderer(Renderer): 302 | class_types = ['ape', 'benchvise', 'can', 'cat', 'driller', 'duck', 'glue', 303 | 'holepuncher', 'iron', 'lamp', 'phone', 'cam', 'eggbox'] 304 | 305 | def __init__(self): 306 | super(MultiRenderer, self).__init__('') 307 | self.poses_path = os.path.join(cfg.DATA_DIR, '{}_poses.npy') 308 | self.output_dir_path = '/home/pengsida/Datasets/LINEMOD/renders/all_objects' 309 | 310 | def sample_poses(self): 311 | for class_type in self.class_types: 312 | statistician = DataStatistics(class_type) 313 | statistician.sample_poses() 314 | 315 | def run(self): 316 | """ Render images 317 | 1. prepare background images 318 | 2. sample poses from the pose distribution of training data 319 | 3. call the blender to render images 320 | """ 321 | self.get_bg_imgs() 322 | self.sample_poses() 323 | 324 | os.system('{} {} --background --python {} -- --input {} --output_dir {} --use_cycles True --bg_imgs {} --poses_path {}'. 325 | format(self.blender_path, self.blank_blend, self.py_path, self.obj_path, self.output_dir_path, self.bg_imgs_path, self.poses_path)) 326 | depth_paths = glob.glob(os.path.join(self.output_dir_path, '*.exr')) 327 | for depth_path in depth_paths: 328 | self.exr_to_png(depth_path) 329 | 330 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict 2 | import os 3 | import sys 4 | import numpy as np 5 | 6 | cfg = EasyDict() 7 | 8 | """ 9 | Path settings 10 | """ 11 | cfg.ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | cfg.DATA_DIR = os.path.join(cfg.ROOT_DIR, 'data') 13 | cfg.MODEL_DIR = os.path.join(cfg.DATA_DIR, 'model') 14 | cfg.REC_DIR = os.path.join(cfg.DATA_DIR, 'record') 15 | cfg.FIGURE_DIR = os.path.join(cfg.ROOT_DIR, 'figure') 16 | cfg.BLENDER_DIR = os.path.join(cfg.ROOT_DIR, "blender") 17 | 18 | 19 | def add_path(): 20 | for key, value in cfg.items(): 21 | if 'DIR' in key: 22 | sys.path.insert(0, value) 23 | 24 | 25 | add_path() 26 | sys.path.extend([".", ".."]) 27 | 28 | 29 | """ 30 | Data settings 31 | """ 32 | cfg.LINEMOD = os.path.join(cfg.DATA_DIR, 'LINEMOD') 33 | cfg.LINEMOD_ORIG = os.path.join(cfg.DATA_DIR, 'LINEMOD_ORIG') 34 | cfg.OCCLUSION_LINEMOD = os.path.join(cfg.DATA_DIR, 'OCCLUSION_LINEMOD') 35 | cfg.YCB = os.path.join(cfg.DATA_DIR, 'YCB') 36 | cfg.SUN = os.path.join(cfg.DATA_DIR, "SUN") 37 | 38 | """ 39 | Rendering setting 40 | """ 41 | cfg.BLENDER_PATH = '/home/pengsida/Software/blender-2.79a-linux-glibc219-x86_64/blender' 42 | cfg.NUM_SYN = 10 43 | cfg.WIDTH = 640 44 | cfg.HEIGHT = 480 45 | cfg.low_azi = 0 46 | cfg.high_azi = 360 47 | cfg.low_ele = -15 48 | cfg.high_ele = 40 49 | cfg.low_theta = 10 50 | cfg.high_theta = 40 51 | cfg.cam_dist = 0.5 52 | cfg.MIN_DEPTH = 0 53 | cfg.MAX_DEPTH = 2 54 | 55 | cfg.render_K=np.array([[700., 0., 320.], 56 | [0., 700., 240.], 57 | [0., 0., 1.]],np.float32) 58 | 59 | cfg.linemod_K=np.array([[572.41140,0. ,325.26110], 60 | [0. ,573.57043,242.04899], 61 | [0. ,0. ,1. ]],np.float32) 62 | 63 | cfg.linemod_cls_names=['ape','cam','cat','duck','glue','iron','phone', 64 | 'benchvise','can','driller','eggbox','holepuncher','lamp'] 65 | cfg.occ_linemod_cls_names=['ape','can','cat','driller','duck','eggbox','glue','holepuncher'] 66 | cfg.linemod_plane=['can'] 67 | 68 | cfg.symmetry_linemod_cls_names=['glue','eggbox'] 69 | 70 | 71 | ''' 72 | pascal 3d + 73 | ''' 74 | cfg.PASCAL = os.path.join(cfg.DATA_DIR, 'PASCAL3D') 75 | cfg.pascal_cls_names=['aeroplane','bicycle','boat','bottle','bus','car', 76 | 'chair','diningtable','motorbike','sofa','train','tvmonitor'] 77 | cfg.pascal_size=128 78 | 79 | 80 | ''' 81 | YCB 82 | ''' 83 | cfg.ycb_sym_cls=[21,20,19,16,13] # foam_brick extra_large_clamp large_clamp wood_block bowl 84 | cfg.ycb_class_num=21 85 | -------------------------------------------------------------------------------- /data/blank.blend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju3dv/pvnet-rendering/2922b58c1c749242bb9a07e7ce6067d582b353a2/data/blank.blend -------------------------------------------------------------------------------- /download_linemod_orig.sh: -------------------------------------------------------------------------------- 1 | cd ~ 2 | mkdir LINEMOD_ORIG 3 | cd LINEMOD_ORIG 4 | 5 | # http://campar.in.tum.de/Main/StefanHinterstoisser 6 | 7 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/ape.zip' 8 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/benchviseblue.zip' 9 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/bowl.zip' 10 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/can.zip' 11 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/cat.zip' 12 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/cup.zip' 13 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/driller.zip' 14 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/duck.zip' 15 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/glue.zip' 16 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/holepuncher.zip' 17 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/iron.zip' 18 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/lamp.zip' 19 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/phone.zip' 20 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/cam.zip' 21 | wget -c 'http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/eggbox.zip' 22 | 23 | for file in `ls`; do unzip $file; done; 24 | -------------------------------------------------------------------------------- /fuse/fuse.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import numpy as np 4 | import os 5 | import time 6 | import cv2 7 | 8 | from glob import glob 9 | from PIL import ImageFile, Image 10 | from plyfile import PlyData 11 | from skimage.io import imread, imsave 12 | from concurrent.futures import ProcessPoolExecutor 13 | 14 | from config import cfg 15 | 16 | 17 | class ModelAligner(object): 18 | rotation_transform = np.array([[1., 0., 0.], 19 | [0., -1., 0.], 20 | [0., 0., -1.]]) 21 | translation_transforms = { 22 | # 'cat': np.array([-0.00577495, -0.01259045, -0.04062323]) 23 | } 24 | intrinsic_matrix = { 25 | 'linemod': np.array([[572.4114, 0., 325.2611], 26 | [0., 573.57043, 242.04899], 27 | [0., 0., 1.]]), 28 | # 'blender': np.array([[280.0, 0.0, 128.0], 29 | # [0.0, 280.0, 128.0], 30 | # [0.0, 0.0, 1.0]]), 31 | 'blender': np.array([[700., 0., 320.], 32 | [0., 700., 240.], 33 | [0., 0., 1.]]) 34 | } 35 | 36 | def __init__(self, class_type,linemod_dir,linemod_orig_dir): 37 | self.class_type = class_type 38 | self.blender_model_path = os.path.join(linemod_dir,'{}/{}.ply'.format(class_type, class_type)) 39 | self.orig_model_path = os.path.join(linemod_orig_dir,'{}/mesh.ply'.format(class_type)) 40 | self.orig_old_model_path = os.path.join(linemod_orig_dir,'{}/OLDmesh.ply'.format(class_type)) 41 | self.transform_dat_path = os.path.join(linemod_orig_dir,'{}/transform.dat'.format(class_type)) 42 | 43 | self.R_p2w,self.t_p2w,self.s_p2w=self.setup_p2w_transform() 44 | 45 | @staticmethod 46 | def setup_p2w_transform(): 47 | transform1 = np.array([[0.161513626575, -0.827108919621, 0.538334608078, -0.245206743479], 48 | [-0.986692547798, -0.124983474612, 0.104004733264, -0.050683632493], 49 | [-0.018740313128, -0.547968924046, -0.836288750172, 0.387638419867]]) 50 | transform2 = np.array([[0.976471602917, 0.201606079936, -0.076541729271, -0.000718327821], 51 | [-0.196746662259, 0.978194475174, 0.066531419754, 0.000077120210], 52 | [0.088285841048, -0.049906700850, 0.994844079018, -0.001409600372]]) 53 | 54 | R1 = transform1[:, :3] 55 | t1 = transform1[:, 3] 56 | R2 = transform2[:, :3] 57 | t2 = transform2[:, 3] 58 | 59 | # printer system to world system 60 | t_p2w = np.dot(R2, t1) + t2 61 | R_p2w = np.dot(R2, R1) 62 | s_p2w = 0.85 63 | return R_p2w,t_p2w,s_p2w 64 | 65 | def pose_p2w(self,RT): 66 | t,R=RT[:,3],RT[:,:3] 67 | R_w2c=np.dot(R, self.R_p2w.T) 68 | t_w2c=-np.dot(R_w2c,self.t_p2w)+self.s_p2w*t 69 | return np.concatenate([R_w2c,t_w2c[:,None]],1) 70 | 71 | @staticmethod 72 | def load_ply_model(model_path): 73 | ply = PlyData.read(model_path) 74 | data = ply.elements[0].data 75 | x = data['x'] 76 | y = data['y'] 77 | z = data['z'] 78 | return np.stack([x, y, z], axis=-1) 79 | 80 | def read_transform_dat(self): 81 | transform_dat = np.loadtxt(self.transform_dat_path, skiprows=1)[:, 1] 82 | transform_dat = np.reshape(transform_dat, newshape=[3, 4]) 83 | return transform_dat 84 | 85 | def load_orig_model(self): 86 | if os.path.exists(self.orig_model_path): 87 | return self.load_ply_model(self.orig_model_path) / 1000. 88 | else: 89 | transform = self.read_transform_dat() 90 | old_model = self.load_ply_model(self.orig_old_model_path) / 1000. 91 | old_model = np.dot(old_model, transform[:, :3].T) + transform[:, 3] 92 | return old_model 93 | 94 | def get_translation_transform(self): 95 | if self.class_type in self.translation_transforms: 96 | return self.translation_transforms[self.class_type] 97 | 98 | blender_model = self.load_ply_model(self.blender_model_path) 99 | orig_model = self.load_orig_model() 100 | blender_model = np.dot(blender_model, self.rotation_transform.T) 101 | translation_transform = np.mean(orig_model, axis=0) - np.mean(blender_model, axis=0) 102 | self.translation_transforms[self.class_type] = translation_transform 103 | 104 | return translation_transform 105 | 106 | class PoseTransformer(object): 107 | rotation_transform = np.array([[1., 0., 0.], 108 | [0., -1., 0.], 109 | [0., 0., -1.]]) 110 | translation_transforms = {} 111 | class_type_to_number = { 112 | 'ape': '001', 113 | 'can': '004', 114 | 'cat': '005', 115 | 'driller': '006', 116 | 'duck': '007', 117 | 'eggbox': '008', 118 | 'glue': '009', 119 | 'holepuncher': '010' 120 | } 121 | blender_models={} 122 | 123 | def __init__(self, class_type,linemod_dir,linemod_orig_dir): 124 | self.class_type = class_type 125 | self.blender_model_path = os.path.join(linemod_dir,'{}/{}.ply'.format(class_type, class_type)) 126 | self.orig_model_path = os.path.join(linemod_orig_dir,'{}/mesh.ply'.format(class_type)) 127 | self.model_aligner = ModelAligner(class_type,linemod_dir,linemod_orig_dir) 128 | 129 | def orig_pose_to_blender_pose(self, pose): 130 | rot, tra = pose[:, :3], pose[:, 3] 131 | tra = tra + np.dot(rot, self.model_aligner.get_translation_transform()) 132 | rot = np.dot(rot, self.rotation_transform) 133 | return np.concatenate([rot, np.reshape(tra, newshape=[3, 1])], axis=-1) 134 | 135 | def read_pickle(pkl_path): 136 | with open(pkl_path, 'rb') as f: 137 | return pickle.load(f) 138 | 139 | def save_pickle(data, pkl_path): 140 | with open(pkl_path, 'wb') as f: 141 | pickle.dump(data, f) 142 | 143 | def read_rgb_np(rgb_path): 144 | ImageFile.LOAD_TRUNCATED_IMAGES = True 145 | img = Image.open(rgb_path).convert('RGB') 146 | img = np.array(img,np.uint8) 147 | return img 148 | 149 | def read_mask_np(mask_path): 150 | mask = Image.open(mask_path) 151 | mask_seg = np.array(mask).astype(np.int32) 152 | return mask_seg 153 | 154 | def read_pose(rot_path, tra_path): 155 | rot = np.loadtxt(rot_path, skiprows=1) 156 | tra = np.loadtxt(tra_path, skiprows=1) / 100. 157 | return np.concatenate([rot, np.reshape(tra, newshape=[3, 1])], axis=-1) 158 | 159 | def collect_train_val_test_info(linemod_dir,cls_name): 160 | with open(os.path.join(linemod_dir,cls_name,'test.txt'),'r') as f: 161 | test_fns=[line.strip().split('/')[-1] for line in f.readlines()] 162 | 163 | with open(os.path.join(linemod_dir,cls_name,'train.txt'),'r') as f: 164 | train_fns=[line.strip().split('/')[-1] for line in f.readlines()] 165 | 166 | return test_fns, train_fns 167 | 168 | def collect_linemod_set_info(linemod_dir,linemod_cls_name,linemod_orig_dir,cache_dir='./'): 169 | database=[] 170 | if os.path.exists(os.path.join(cache_dir,'{}_info.pkl').format(linemod_cls_name)): 171 | return read_pickle(os.path.join(cache_dir,'{}_info.pkl').format(linemod_cls_name)) 172 | 173 | _,train_fns=collect_train_val_test_info(linemod_dir,linemod_cls_name) 174 | print('begin generate database {}'.format(linemod_cls_name)) 175 | rgb_dir=os.path.join(linemod_dir,linemod_cls_name,'JPEGImages') 176 | msk_dir=os.path.join(linemod_dir,linemod_cls_name,'mask') 177 | rt_dir = os.path.join(linemod_orig_dir, linemod_cls_name, 'data') 178 | img_num=len(os.listdir(rgb_dir)) 179 | for k in range(img_num): 180 | data={} 181 | data['rgb_pth']=os.path.join(rgb_dir, '{:06}.jpg'.format(k)) 182 | data['dpt_pth']=os.path.join(msk_dir, '{:04}.png'.format(k)) 183 | if data['rgb_pth'].split('/')[-1] not in train_fns: continue 184 | 185 | pose=read_pose(os.path.join(rt_dir, 'rot{}.rot'.format(k)), 186 | os.path.join(rt_dir, 'tra{}.tra'.format(k))) 187 | pose_transformer = PoseTransformer(linemod_cls_name, linemod_dir, linemod_orig_dir) 188 | data['RT'] = pose_transformer.orig_pose_to_blender_pose(pose).astype(np.float32) 189 | database.append(data) 190 | 191 | print('success generate database {} len {}'.format(linemod_cls_name,len(database))) 192 | save_pickle(database,os.path.join(cache_dir,'{}_info.pkl').format(linemod_cls_name)) 193 | return database 194 | 195 | def randomly_read_background(background_dir,cache_dir): 196 | if os.path.exists(os.path.join(cache_dir,'background_info.pkl')): 197 | fns=read_pickle(os.path.join(cache_dir,'background_info.pkl')) 198 | else: 199 | fns=glob(os.path.join(background_dir,'*.jpg'))+glob(os.path.join(background_dir,'*.png')) 200 | save_pickle(fns,os.path.join(cache_dir,'background_info.pkl')) 201 | return imread(fns[np.random.randint(0,len(fns))]) 202 | 203 | def prepare_dataset_parallel(output_dir, linemod_dir, linemod_orig_dir, fuse_num, background_dir, cache_dir, worker_num=8): 204 | exector=ProcessPoolExecutor(max_workers=worker_num) 205 | futures=[] 206 | 207 | for cls_name in linemod_cls_names: 208 | collect_linemod_set_info(linemod_dir,cls_name,linemod_orig_dir,cache_dir) 209 | randomly_read_background(background_dir,cache_dir) 210 | 211 | for idx in np.arange(fuse_num): 212 | seed=np.random.randint(5000) 213 | futures.append(exector.submit( 214 | prepare_dataset_single,output_dir,idx, linemod_cls_names, linemod_dir, linemod_orig_dir, background_dir,cache_dir, seed)) 215 | 216 | for f in futures: 217 | f.result() 218 | 219 | 220 | def prepare_dataset_single(output_dir,idx,linemod_cls_names,linemod_dir,linemod_orig_dir,background_dir,cache_dir,seed): 221 | time_begin=time.time() 222 | np.random.seed(seed) 223 | rgbs,masks,begins,poses=[],[],[],[] 224 | image_dbs={} 225 | for cls_id,cls_name in enumerate(linemod_cls_names): 226 | image_dbs[cls_id]=collect_linemod_set_info(linemod_dir,cls_name,linemod_orig_dir,cache_dir) 227 | 228 | for cls_id,cls_name in enumerate(linemod_cls_names): 229 | rgb, mask, begin, pose=randomly_sample_foreground(image_dbs[cls_id], linemod_dir) 230 | mask*=cls_id+1 231 | rgbs.append(rgb) 232 | masks.append(mask) 233 | begins.append(begin) 234 | poses.append(pose) 235 | 236 | background=randomly_read_background(background_dir,cache_dir) 237 | 238 | fuse_img, fuse_mask, fuse_begins= fuse_regions(rgbs, masks, begins, background, 480, 640) 239 | 240 | save_fuse_data(output_dir, idx, fuse_img, fuse_mask, fuse_begins, poses) 241 | print('{} cost {} s'.format(idx,time.time()-time_begin)) 242 | 243 | def fuse_regions(rgbs,masks,begins,background,th,tw): 244 | fuse_order=np.arange(len(rgbs)) 245 | np.random.shuffle(fuse_order) 246 | fuse_img=background 247 | fuse_img=cv2.resize(fuse_img,(tw,th),interpolation=cv2.INTER_LINEAR) 248 | fuse_mask=np.zeros([fuse_img.shape[0],fuse_img.shape[1]],np.int32) 249 | for idx in fuse_order: 250 | rh,rw=masks[idx].shape 251 | bh=np.random.randint(0,fuse_img.shape[0]-rh) 252 | bw=np.random.randint(0,fuse_img.shape[1]-rw) 253 | 254 | silhouette=masks[idx]>0 255 | out_silhouette=np.logical_not(silhouette) 256 | fuse_mask[bh:bh+rh,bw:bw+rw]*=out_silhouette.astype(fuse_mask.dtype) 257 | fuse_mask[bh:bh+rh,bw:bw+rw]+=masks[idx] 258 | 259 | fuse_img[bh:bh+rh,bw:bw+rw]*=out_silhouette.astype(fuse_img.dtype)[:,:,None] 260 | fuse_img[bh:bh+rh,bw:bw+rw]+=rgbs[idx] 261 | 262 | begins[idx][0]=-begins[idx][0]+bh 263 | begins[idx][1]=-begins[idx][1]+bw 264 | 265 | return fuse_img,fuse_mask,begins 266 | 267 | def randomly_sample_foreground(image_db,linemod_dir): 268 | idx=np.random.randint(0,len(image_db)) 269 | rgb_pth=os.path.join(linemod_dir,image_db[idx]['rgb_pth']) 270 | dpt_pth=os.path.join(linemod_dir,image_db[idx]['dpt_pth']) 271 | rgb = read_rgb_np(rgb_pth) 272 | mask = read_mask_np(dpt_pth) 273 | mask=np.sum(mask,2)>0 274 | mask=np.asarray(mask,np.int32) 275 | 276 | hs,ws=np.nonzero(mask) 277 | hmin,hmax=np.min(hs),np.max(hs) 278 | wmin,wmax=np.min(ws),np.max(ws) 279 | 280 | mask=mask[hmin:hmax,wmin:wmax] 281 | rgb=rgb[hmin:hmax,wmin:wmax] 282 | 283 | rgb*=mask.astype(np.uint8)[:,:,None] 284 | begin=[hmin,wmin] 285 | pose=image_db[idx]['RT'] 286 | 287 | return rgb, mask, begin, pose 288 | 289 | def save_fuse_data(output_dir, idx, fuse_img, fuse_mask, fuse_begins, fuse_poses): 290 | os.makedirs(output_dir, exist_ok=True) 291 | imsave(os.path.join(output_dir,'{}_rgb.jpg'.format(idx)),fuse_img) 292 | fuse_mask=fuse_mask.astype(np.uint8) 293 | imsave(os.path.join(output_dir,'{}_mask.png'.format(idx)),fuse_mask) 294 | save_pickle([np.asarray(fuse_begins,np.int32), np.asarray(fuse_poses,np.float32)], 295 | os.path.join(output_dir,'{}_info.pkl'.format(idx))) 296 | 297 | def randomly_sample_foreground_ycb(image_db, ycb_dir, ycb_cls_idx): 298 | idx=np.random.randint(0,len(image_db.train_real_set)) 299 | rgb_pth=os.path.join(ycb_dir, image_db.train_real_set[idx]['rgb_pth']) 300 | msk_pth=os.path.join(ycb_dir, image_db.train_real_set[idx]['msk_pth']) 301 | 302 | rgb = read_rgb_np(rgb_pth) 303 | mask = read_mask_np(msk_pth) 304 | mask = mask == ycb_cls_idx 305 | if len(mask.shape)>2: mask=np.sum(mask,2)>0 306 | mask=np.asarray(mask,np.int32) 307 | 308 | hs,ws=np.nonzero(mask) 309 | if len(hs)==0: 310 | print('zero size') 311 | raise RuntimeError 312 | hmin,hmax=np.min(hs),np.max(hs) 313 | wmin,wmax=np.min(ws),np.max(ws) 314 | 315 | mask=mask[hmin:hmax,wmin:wmax] 316 | rgb=rgb[hmin:hmax,wmin:wmax] 317 | 318 | rgb*=mask.astype(np.uint8)[:,:,None] 319 | begin=[hmin,wmin] 320 | pose=image_db.train_real_set[idx]['pose'] 321 | K=image_db.train_real_set[idx]['K'] 322 | 323 | return rgb, mask, begin, pose, K 324 | 325 | linemod_cls_names=['ape','cam','cat','duck','glue','iron','phone', 'benchvise','can','driller','eggbox','holepuncher','lamp'] 326 | 327 | 328 | def run(): 329 | output_dir='./data/LINEMOD/fuse/' 330 | linemod_dir=cfg.LINEMOD 331 | linemod_orig_dir=cfg.LINEMOD_ORIG 332 | background_dir=os.path.join(cfg.SUN, "JPEGImages") 333 | cache_dir='./' 334 | fuse_num=10000 335 | worker_num=2 336 | prepare_dataset_parallel(output_dir, linemod_dir, linemod_orig_dir, fuse_num, background_dir, cache_dir, worker_num) 337 | 338 | 339 | if __name__=="__main__": 340 | output_dir='tmp/' 341 | linemod_dir='/home/liuyuan/data/LINEMOD' 342 | linemod_orig_dir='/home/liuyuan/data/LINEMOD_ORIG' 343 | background_dir='/home/liuyuan/data/SUN2012pascalformat/JPEGImages' 344 | cache_dir='./' 345 | fuse_num=10000 346 | worker_num=2 347 | prepare_dataset_parallel(output_dir, linemod_dir, linemod_orig_dir, fuse_num, background_dir, cache_dir, worker_num) 348 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | from config import cfg 2 | import torch 3 | import argparse 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument('--type', action='store', dest='type', type=str) 7 | args = parser.parse_args() 8 | 9 | 10 | def run_rendering(): 11 | from blender.render_utils import Renderer, YCBRenderer 12 | # YCBRenderer.multi_thread_render() 13 | # renderer = YCBRenderer('037_scissors') 14 | renderer=Renderer('cat') 15 | renderer.run() 16 | 17 | 18 | def run_fuse(): 19 | from fuse.fuse import run 20 | run() 21 | 22 | 23 | if __name__ == '__main__': 24 | globals()['run_' + args.type]() 25 | --------------------------------------------------------------------------------