├── .gitignore ├── LICENSE ├── README.md └── src ├── dataset_generation ├── CMakeLists.txt ├── __init__.py ├── package.xml └── src │ ├── 2_data_split.py │ ├── __init__.py │ ├── assets │ └── shadowhand_asset │ │ ├── __init__.py │ │ ├── grasp_pose.yaml │ │ ├── shadowhand_dict_jonit_name.yaml │ │ └── urdf │ │ ├── bio_tac │ │ ├── biotac │ │ │ └── shell_core │ │ │ │ ├── biotac.urdf │ │ │ │ └── biotac_template.urdf │ │ └── meshes │ │ │ ├── biotac │ │ │ ├── bc │ │ │ │ ├── top_side_core_bc.obj │ │ │ │ ├── top_side_core_bc_center.obj │ │ │ │ ├── top_side_core_bc_large.obj │ │ │ │ └── trans.mlp │ │ │ ├── int_ext_skin │ │ │ │ ├── int_ext_skin_combined_ftw.obj │ │ │ │ ├── int_ext_skin_combined_ftw.tet │ │ │ │ ├── int_ext_skin_combined_ftw.vtk │ │ │ │ ├── int_ext_skin_combined_ftw_1000.obj │ │ │ │ ├── int_ext_skin_combined_ftw_1000.tet │ │ │ │ ├── int_ext_skin_combined_ftw_1200.obj │ │ │ │ ├── int_ext_skin_combined_ftw_1200.tet │ │ │ │ ├── int_ext_skin_combined_ftw_2000.obj │ │ │ │ ├── int_ext_skin_combined_ftw_2000.tet │ │ │ │ ├── int_ext_skin_combined_ftw_500.obj │ │ │ │ ├── int_ext_skin_combined_ftw_500.tet │ │ │ │ ├── int_ext_skin_combined_ftw_800.obj │ │ │ │ └── int_ext_skin_combined_ftw_800.tet │ │ │ └── merged_tip.obj │ │ │ └── indenters │ │ │ ├── cube_14mm.obj │ │ │ ├── cylinder_long_7mm.obj │ │ │ ├── cylinder_short_3-5mm.obj │ │ │ ├── cylinder_short_7mm.obj │ │ │ ├── ring_7mm.obj │ │ │ ├── sphere_14mm.obj │ │ │ ├── sphere_3-5mm.obj │ │ │ └── sphere_7mm.obj │ │ ├── lf_fftip_softcontact.urdf │ │ ├── lf_hand.urdf │ │ ├── lf_hand_biotac.urdf │ │ ├── lf_hand_biotac_nogeom.urdf │ │ ├── lf_hand_biotac_notip.urdf │ │ ├── lf_hand_biotac_simpyfied.urdf │ │ ├── lf_hand_test.urdf │ │ ├── lf_thtip_softcontact.urdf │ │ ├── material.mtl │ │ ├── shadowhand │ │ ├── components │ │ │ ├── cable_connector │ │ │ │ ├── cable_connector_base_E3M5.dae │ │ │ │ └── cable_connector_palm_E3M5.dae │ │ │ ├── f_distal │ │ │ │ ├── bt_2p │ │ │ │ │ ├── f_distal_bt_2p.dae │ │ │ │ │ ├── f_distal_bt_2p_00.obj │ │ │ │ │ ├── f_distal_bt_2p_01.obj │ │ │ │ │ ├── f_distal_bt_2p_02.obj │ │ │ │ │ ├── f_distal_bt_2p_adapter.dae │ │ │ │ │ └── process.mlp │ │ │ │ ├── bt_sp │ │ │ │ │ └── f_distal_bt_sp.dae │ │ │ │ └── pst │ │ │ │ │ └── f_distal_pst.dae │ │ │ ├── f_knuckle │ │ │ │ ├── f_knuckle_C6M2.dae │ │ │ │ ├── f_knuckle_E2M3.dae │ │ │ │ ├── f_knuckle_E3M5.dae │ │ │ │ ├── f_knuckle_E4.dae │ │ │ │ └── f_knuckle_G1M5.dae │ │ │ ├── f_middle │ │ │ │ ├── f_middle_C6M2.dae │ │ │ │ ├── f_middle_E2M3.dae │ │ │ │ ├── f_middle_E3M5.dae │ │ │ │ ├── f_middle_E4.dae │ │ │ │ └── f_middle_G1M5.dae │ │ │ ├── f_proximal │ │ │ │ ├── f_proximal_C6M2.dae │ │ │ │ ├── f_proximal_E2M3.dae │ │ │ │ ├── f_proximal_E3M5.dae │ │ │ │ ├── f_proximal_E4.dae │ │ │ │ └── f_proximal_G1M5.dae │ │ │ ├── forearm │ │ │ │ ├── forearm_C6M2.dae │ │ │ │ ├── forearm_E2M3.dae │ │ │ │ ├── forearm_E3M5.dae │ │ │ │ ├── forearm_E4.dae │ │ │ │ ├── forearm_G1M5.dae │ │ │ │ ├── forearm_collision_C6M2.dae │ │ │ │ ├── forearm_collision_E2M3.dae │ │ │ │ ├── forearm_collision_E3M5.dae │ │ │ │ └── forearm_collision_E4.dae │ │ │ ├── lf_metacarpal │ │ │ │ ├── lf_metacarpal_C6M2.dae │ │ │ │ ├── lf_metacarpal_E2M3.dae │ │ │ │ ├── lf_metacarpal_E3M5.dae │ │ │ │ └── lf_metacarpal_E4.dae │ │ │ ├── mounting_plate │ │ │ │ ├── mounting_plate_E3M5.dae │ │ │ │ ├── mounting_plate_E4.dae │ │ │ │ └── mounting_plate_G1M5.dae │ │ │ ├── palm │ │ │ │ ├── palm_C6M2.dae │ │ │ │ ├── palm_E2M3.dae │ │ │ │ ├── palm_E3M5.dae │ │ │ │ ├── palm_E4.dae │ │ │ │ └── palm_G1M5.dae │ │ │ ├── th_distal │ │ │ │ ├── bt_2p │ │ │ │ │ ├── th_distal_bt_2p.dae │ │ │ │ │ └── th_distal_bt_2p_adapter.dae │ │ │ │ ├── bt_sp │ │ │ │ │ └── th_distal_bt_sp.dae │ │ │ │ └── pst │ │ │ │ │ └── th_distal_pst.dae │ │ │ ├── th_middle │ │ │ │ ├── th_middle_C6M2.dae │ │ │ │ ├── th_middle_E2M3.dae │ │ │ │ ├── th_middle_E3M5.dae │ │ │ │ ├── th_middle_E4.dae │ │ │ │ └── th_middle_G1M5.dae │ │ │ ├── th_proximal │ │ │ │ ├── th_proximal_C6M2.dae │ │ │ │ ├── th_proximal_E2M3.dae │ │ │ │ ├── th_proximal_E3M5.dae │ │ │ │ ├── th_proximal_E4.dae │ │ │ │ └── th_proximal_G1M5.dae │ │ │ └── wrist │ │ │ │ ├── wrist_C6M2.dae │ │ │ │ ├── wrist_E2M3.dae │ │ │ │ ├── wrist_E3M5.dae │ │ │ │ └── wrist_E4.dae │ │ ├── shadowhand_motor_left.urdf │ │ └── shadowhand_motor_plus_left.urdf │ │ ├── square_table.urdf │ │ ├── temp.obj │ │ ├── temp_vhacd.obj │ │ ├── test │ │ ├── lf_fftip_softcontact.urdf │ │ └── lf_thtip_softcontact.urdf │ │ ├── ur10.urdf │ │ ├── ur10e │ │ ├── meshes │ │ │ ├── collision │ │ │ │ ├── base.obj │ │ │ │ ├── base.stl │ │ │ │ ├── forearm.obj │ │ │ │ ├── forearm.stl │ │ │ │ ├── shoulder.obj │ │ │ │ ├── shoulder.stl │ │ │ │ ├── upperarm.obj │ │ │ │ ├── upperarm.stl │ │ │ │ ├── wrist1.obj │ │ │ │ ├── wrist1.stl │ │ │ │ ├── wrist2.obj │ │ │ │ ├── wrist2.stl │ │ │ │ ├── wrist3.obj │ │ │ │ └── wrist3.stl │ │ │ └── visual │ │ │ │ ├── base.dae │ │ │ │ ├── forearm.dae │ │ │ │ ├── shoulder.dae │ │ │ │ ├── upperarm.dae │ │ │ │ ├── wrist1.dae │ │ │ │ ├── wrist2.dae │ │ │ │ └── wrist3.dae │ │ └── robots │ │ │ └── ur10_lf_hand.urdf │ │ ├── ur10e_lf_hand.urdf │ │ ├── ur10e_lf_hand_biotac.urdf │ │ ├── ur10e_test.urdf │ │ ├── urdf_conf │ │ └── ur10_lf_hand.urdf │ │ └── vhacd_log.txt │ ├── cfg │ └── config.yaml │ ├── data_inspection.py │ ├── keypoint_visualization.py │ ├── pcd_render_with_hand.py │ ├── render_call.py │ ├── run_collection.sh │ ├── urdf_generation.py │ ├── utils │ ├── __init__.py │ ├── file_utils.py │ ├── pcd_utils.py │ ├── project_utils.py │ └── transform_utils.py │ ├── visualization.py │ ├── visualization_camera_frame.py │ ├── visualization_pcd.py │ └── visualization_world_frame.py ├── pose_annotation ├── .gitmodules ├── CMakeLists.txt ├── README.md ├── launch │ └── annotation.launch ├── package.xml └── scripts │ ├── basic_controls.py │ ├── ee_anno.py │ ├── model_reorient.py │ ├── model_rescale_relocate.py │ ├── model_resize.py │ └── pub_tf.py └── toolee ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── configs ├── config.py └── mrcnn_config.py ├── datasets ├── data_extract_pvnet.py ├── data_split_ee.py ├── dataset_toolee.py ├── extract_ee_pcd.py └── extract_ee_pose_prior.py ├── launch ├── affordance_segmentation.launch ├── inference_service.launch └── pose_estimation.launch ├── mrcnn ├── __init__.py ├── dataset.py ├── hook.py ├── main.py └── runner.py ├── networks ├── gf_algorithms │ ├── losses.py │ ├── samplers.py │ ├── score_utils.py │ ├── scorenet.py │ └── sde.py ├── posenet.py ├── posenet_agent.py └── pts_encoder │ ├── pointnet2.py │ ├── pointnet2_utils │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── pointnet2 │ │ ├── pointnet2_modules.py │ │ ├── pointnet2_utils.py │ │ ├── pytorch_utils.py │ │ ├── setup.py │ │ └── src │ │ │ ├── ball_query.cpp │ │ │ ├── ball_query_gpu.cu │ │ │ ├── ball_query_gpu.h │ │ │ ├── cuda_utils.h │ │ │ ├── group_points.cpp │ │ │ ├── group_points_gpu.cu │ │ │ ├── group_points_gpu.h │ │ │ ├── interpolate.cpp │ │ │ ├── interpolate_gpu.cu │ │ │ ├── interpolate_gpu.h │ │ │ ├── pointnet2_api.cpp │ │ │ ├── sampling.cpp │ │ │ ├── sampling_gpu.cu │ │ │ └── sampling_gpu.h │ └── tools │ │ ├── _init_path.py │ │ ├── data │ │ └── KITTI │ │ │ └── ImageSets │ │ │ ├── test.txt │ │ │ ├── train.txt │ │ │ ├── trainval.txt │ │ │ └── val.txt │ │ ├── dataset.py │ │ ├── kitti_utils.py │ │ ├── pointnet2_msg.py │ │ └── train_and_eval.py │ └── pointnets.py ├── package.xml ├── requirements.txt ├── runners ├── evaluation_single.py ├── evaluation_single_ee_pose.py ├── evaluation_single_obj_pose.py └── trainer.py ├── scripts ├── eval_single.sh ├── inference_node.py ├── pose_pred_agent.py ├── pose_pred_client.py ├── pose_pred_service.py ├── result_visualization.py ├── seg_pred_client.py ├── seg_pred_service.py ├── tensorboard.sh ├── test_affordance_seg.sh ├── train_score_ee_pose.sh ├── train_score_ee_pose_symtr.sh └── train_score_obj_pose.sh ├── srv ├── PosePred.srv └── SegPred.srv └── utils ├── archive └── data_augmentation.py ├── data_tools.py ├── datasets_utils.py ├── file_utils.py ├── genpose_utils.py ├── metrics.py ├── misc.py ├── operations_3d.py ├── pc2_utils.py ├── so3_visualize.py ├── tracking_utils.py ├── transform_utils.py ├── transforms.py └── visualize.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Yunlong Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ToolEENet 2 | 3 | This code is written based on https://github.com/Jiyao06/GenPose. 4 | 5 | 6 | The clean code, checkpoints and dataset will be released soon. 7 | -------------------------------------------------------------------------------- /src/dataset_generation/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0.2) 2 | project(dataset_generation) 3 | 4 | find_package(catkin REQUIRED COMPONENTS 5 | cv_bridge 6 | message_generation 7 | rospy 8 | sensor_msgs 9 | std_msgs 10 | ) 11 | 12 | generate_messages( 13 | DEPENDENCIES 14 | std_msgs 15 | ) 16 | 17 | catkin_package( 18 | CATKIN_DEPENDS 19 | message_runtime 20 | sensor_msgs 21 | std_msgs 22 | ) 23 | include_directories( 24 | ${catkin_INCLUDE_DIRS} 25 | ) 26 | 27 | catkin_install_python( 28 | PROGRAMS 29 | DESTINATION 30 | ${CATKIN_PACKAGE_BIN_DESTINATION} 31 | ) -------------------------------------------------------------------------------- /src/dataset_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yl-wang996/ToolEENet/fd0130efdd03d5fe20b40808b1477fbbc722caf0/src/dataset_generation/__init__.py -------------------------------------------------------------------------------- /src/dataset_generation/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | dataset_generation 4 | 0.0.0 5 | dataset_generation 6 | 7 | 1wang 8 | 9 | MIT 10 | 11 | catkin 12 | 13 | std_msgs 14 | sensor_msgs 15 | rospy 16 | 17 | message_generation 18 | 19 | message_runtime 20 | 21 | -------------------------------------------------------------------------------- /src/dataset_generation/src/2_data_split.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | if __name__ == '__main__': 5 | random.seed(0) 6 | train_ratio = 0.8 7 | data_path = '/dataSSD/1wang/dataspace/DatasetToolEE' 8 | exclude_objs = ['hammer_10', 'hammer_11'] # some problems with those 3D models 9 | cats = [f for f in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, f))] 10 | all_examples = [] 11 | for cat in cats: 12 | objs = [f for f in os.listdir(os.path.join(data_path, cat)) if os.path.isdir(os.path.join(data_path, cat, f))] 13 | for obj in objs: 14 | if obj in exclude_objs: 15 | continue 16 | meta_file_names = [f for f in os.listdir(os.path.join(data_path, cat, obj)) if 'meta' in f] 17 | all_examples += [os.path.join(cat, obj, f) for f in meta_file_names] 18 | 19 | all_examples.sort() 20 | with open(os.path.join(data_path, 'all_examples.txt'), 'w') as f: 21 | for example_id in all_examples: 22 | f.write(example_id + '\n') 23 | 24 | random.shuffle(all_examples) 25 | train_num = int(len(all_examples) * train_ratio) 26 | train_examples = all_examples[:train_num] 27 | val_examples = all_examples[train_num:] 28 | 29 | with open(os.path.join(data_path, 'train_examples.txt'), 'w') as f: 30 | for example_id in train_examples: 31 | f.write(example_id + '\n') 32 | 33 | with open(os.path.join(data_path, 'val_examples.txt'), 'w') as f: 34 | for example_id in val_examples: 35 | f.write(example_id + '\n') 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /src/dataset_generation/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yl-wang996/ToolEENet/fd0130efdd03d5fe20b40808b1477fbbc722caf0/src/dataset_generation/src/__init__.py -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yl-wang996/ToolEENet/fd0130efdd03d5fe20b40808b1477fbbc722caf0/src/dataset_generation/src/assets/shadowhand_asset/__init__.py -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/grasp_pose.yaml: -------------------------------------------------------------------------------- 1 | lh_FFJ1: 0.8337000012397766 2 | lh_FFJ2: 0.9947999715805054 3 | lh_FFJ3: 1.124400019645691 4 | lh_FFJ4: -0.007199999876320362 5 | lh_LFJ1: 0.551800012588501 6 | lh_LFJ2: 1.2768000364303589 7 | lh_LFJ3: 1.030400037765503 8 | lh_LFJ4: 0.12890000641345978 9 | lh_LFJ5: 0.0 10 | lh_MFJ1: 0.551800012588501 11 | lh_MFJ2: 1.139799952507019 12 | lh_MFJ3: 1.0585999488830566 13 | lh_MFJ4: 0.0 14 | lh_RFJ1: 0.9143000245094299 15 | lh_RFJ2: 1.1237000226974487 16 | lh_RFJ3: 0.9363999962806702 17 | lh_RFJ4: 0.0 18 | lh_THJ1: 0.8331000208854675 19 | lh_THJ2: 0.03579999879002571 20 | lh_THJ3: 0.053700000047683716 21 | lh_THJ4: 1.2216999530792236 22 | lh_THJ5: 0.375900000333786 23 | -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/shadowhand_dict_jonit_name.yaml: -------------------------------------------------------------------------------- 1 | lh_FFJ1: 3 2 | lh_FFJ2: 2 3 | lh_FFJ3: 1 4 | lh_FFJ4: 0 5 | lh_LFJ1: 8 6 | lh_LFJ2: 7 7 | lh_LFJ3: 6 8 | lh_LFJ4: 5 9 | lh_LFJ5: 4 10 | lh_MFJ1: 12 11 | lh_MFJ2: 11 12 | lh_MFJ3: 10 13 | lh_MFJ4: 9 14 | lh_RFJ1: 16 15 | lh_RFJ2: 15 16 | lh_RFJ3: 14 17 | lh_RFJ4: 13 18 | lh_THJ1: 21 19 | lh_THJ2: 20 20 | lh_THJ3: 19 21 | lh_THJ4: 18 22 | lh_THJ5: 17 23 | -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/bio_tac/biotac/shell_core/biotac.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/bio_tac/biotac/shell_core/biotac_template.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/bio_tac/meshes/biotac/bc/trans.mlp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 1 0 0 0 7 | 0 1 0 0 8 | 0 0 1 0 9 | 0 0 0 1 10 | 11 | 100001000000000000000000000001010100000010100000000100111011110000001001 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/lf_fftip_softcontact.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/lf_thtip_softcontact.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/material.mtl: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | 3 | newmtl material_0 4 | Ka 0.20000000 0.20000000 0.20000000 5 | Kd 1.00000000 1.00000000 1.00000000 6 | Ks 1.00000000 1.00000000 1.00000000 7 | Ns 0.00000000 8 | map_Kd material_0.png -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/shadowhand/components/f_distal/bt_2p/process.mlp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 1 0 0 0 7 | 0 1 0 0 8 | 0 0 1 -1.0842 9 | 0 0 0 1 10 | 11 | 100001000000000000000100000001011000001010100000000100111010000111001001 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/square_table.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/test/lf_fftip_softcontact.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 59 | 72 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/test/lf_thtip_softcontact.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 59 | 72 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yl-wang996/ToolEENet/fd0130efdd03d5fe20b40808b1477fbbc722caf0/src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/base.stl -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/forearm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yl-wang996/ToolEENet/fd0130efdd03d5fe20b40808b1477fbbc722caf0/src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/forearm.stl -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/shoulder.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yl-wang996/ToolEENet/fd0130efdd03d5fe20b40808b1477fbbc722caf0/src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/shoulder.stl -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/upperarm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yl-wang996/ToolEENet/fd0130efdd03d5fe20b40808b1477fbbc722caf0/src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/upperarm.stl -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/wrist1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yl-wang996/ToolEENet/fd0130efdd03d5fe20b40808b1477fbbc722caf0/src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/wrist1.stl -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/wrist2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yl-wang996/ToolEENet/fd0130efdd03d5fe20b40808b1477fbbc722caf0/src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/wrist2.stl -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/wrist3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yl-wang996/ToolEENet/fd0130efdd03d5fe20b40808b1477fbbc722caf0/src/dataset_generation/src/assets/shadowhand_asset/urdf/ur10e/meshes/collision/wrist3.stl -------------------------------------------------------------------------------- /src/dataset_generation/src/assets/shadowhand_asset/urdf/vhacd_log.txt: -------------------------------------------------------------------------------- 1 | V-HACD V2.2 2 | Syntax: testVHACD [options] --input infile.obj --output outfile.obj --log logfile.txt 3 | 4 | Options: 5 | --input Wavefront .obj input file name 6 | --output VRML 2.0 output file name 7 | --log Log file name 8 | --resolution Maximum number of voxels generated during the voxelization stage (default=100,000, range=10,000-16,000,000) 9 | --depth Maximum number of clipping stages. During each split stage, parts with a concavity higher than the user defined threshold are clipped according the "best" clipping plane (default=20, range=1-32) 10 | --concavity Maximum allowed concavity (default=0.0025, range=0.0-1.0) 11 | --planeDownsampling Controls the granularity of the search for the "best" clipping plane (default=4, range=1-16) 12 | --convexhullDownsampling Controls the precision of the convex-hull generation process during the clipping plane selection stage (default=4, range=1-16) 13 | --alpha Controls the bias toward clipping along symmetry planes (default=0.05, range=0.0-1.0) 14 | --beta Controls the bias toward clipping along revolution axes (default=0.05, range=0.0-1.0) 15 | --gamma Controls the maximum allowed concavity during the merge stage (default=0.00125, range=0.0-1.0) 16 | --delta Controls the bias toward maximaxing local concavity (default=0.05, range=0.0-1.0) 17 | --pca Enable/disable normalizing the mesh before applying the convex decomposition (default=0, range={0,1}) 18 | --mode 0: voxel-based approximate convex decomposition, 1: tetrahedron-based approximate convex decomposition (default=0, range={0,1}) 19 | --maxNumVerticesPerCH Controls the maximum number of triangles per convex-hull (default=64, range=4-1024) 20 | --minVolumePerCH Controls the adaptive sampling of the generated convex-hulls (default=0.0001, range=0.0-0.01) 21 | --convexhullApproximation Enable/disable approximation when computing convex-hulls (default=1, range={0,1}) 22 | --oclAcceleration Enable/disable OpenCL acceleration (default=0, range={0,1}) 23 | --oclPlatformID OpenCL platform id (default=0, range=0-# OCL platforms) 24 | --oclDeviceID OpenCL device id (default=0, range=0-# OCL devices) 25 | --help Print usage 26 | 27 | Examples: 28 | testVHACD.exe --input bunny.obj --output bunny_acd.obj --log log.txt 29 | 30 | + OpenCL (OFF) 31 | + Parameters 32 | input temp.obj 33 | resolution 500000 34 | max. depth 20 35 | max. concavity 0.001 36 | plane down-sampling 4 37 | convex-hull down-sampling 4 38 | alpha 0.04 39 | beta 0.05 40 | gamma 0.0005 41 | pca 0 42 | mode 0 43 | max. vertices per convex-hull 128 44 | min. volume to add vertices to convex-hulls 0.0001 45 | convex-hull approximation 1 46 | OpenCL acceleration 1 47 | OpenCL platform ID 0 48 | OpenCL device ID 0 49 | output temp_vhacd.obj 50 | log vhacd_log.txt 51 | + Load mesh 52 | + Voxelization 53 | dim = 64 -> 153766 voxels 54 | dim = 94 -> 484072 voxels 55 | dim = 95 -> 499551 voxels 56 | time 0.0511865s 57 | + Compute primitive set 58 | # primitives 499551 59 | # inside surface 454989 60 | # on surface 44562 61 | time 0.00225029s 62 | + Approximate Convex Decomposition 63 | Subdivision level 1 64 | -> Part[0] C = 0.0510511, E = 0.0854966, VS = 44562, VI = 454989 65 | + Generate 1 convex-hulls 66 | time 0.173228s 67 | + Merge Convex Hulls 68 | time 5.941e-06s 69 | + Simplify 1 convex-hulls 70 | Simplify CH[00000] 2099 V, 4194 T 71 | time 0.0798867s 72 | + Generate output: 1 convex-hulls 73 | CH[00000] 128 V, 252 T 74 | -------------------------------------------------------------------------------- /src/dataset_generation/src/cfg/config.yaml: -------------------------------------------------------------------------------- 1 | sim: 2 | headless: True 3 | visualize: True 4 | obj_asset_root: "/homeL/1wang/workspace/toolee_ws/src/dataset_generation/src/assets/Dataset3DModel" 5 | num_threads: 8 6 | z_top: True 7 | 8 | env: 9 | num_envs: 100 10 | env_per_row: 10 11 | env_spacing: 1 12 | save_per_frame: 5 13 | collision_filter: 1 14 | object_seg_id: 2 15 | hand_seg_id: 1 16 | 17 | camera: 18 | width: 1920 19 | height: 1080 20 | init_location: [0.3, 0.3, 1.5] 21 | lookat: [0,0,1] 22 | randomize: 23 | is_randomize: True 24 | offset_limit_x: [0.3, 0.6] 25 | offset_limit_y: [0.3, 0.6] 26 | limit_z: [0.9, 1.5] 27 | 28 | shadowhand: 29 | asset_root: "/homeL/1wang/workspace/toolee_ws/src/dataset_generation/src/assets/shadowhand_asset" 30 | joint_name_map: "shadowhand_dict_jonit_name.yaml" 31 | hand_asset: "urdf/lf_hand_biotac_notip.urdf" 32 | grasp_pose: "grasp_pose.yaml" 33 | 34 | ee_name_map: 35 | hammer_grip: ['head1', 'grip'] 36 | screwdriver: ['head1'] 37 | wrench: ['head1', 'head2'] 38 | 39 | affordance_seg_id_map: 40 | empty: 0 41 | hand: 1 42 | object: 2 43 | hammer_grip: 44 | head1: 3 45 | grip: 4 46 | screwdriver: 47 | head1: 5 48 | wrench: 49 | head1: 6 50 | head2: 7 51 | 52 | dataset: 53 | is_save: True 54 | total_num_per_obj: 1000 55 | save_path: "/dataSSD/1wang/dataspace/DatasetToolEE" 56 | 57 | pose_init: 58 | hand: 59 | r: [-90, 90, 0] # deg 60 | t: [0.035 , 0.075, 0.95] # translation 61 | object: 62 | r: [0.0, 0.0, 0.0] 63 | t: [0.0, 0.0, 1] 64 | 65 | object_randomization: 66 | is_randomized: True 67 | # since the 3D model is diagonal normalized to 1, therefore rescale it to normal size 68 | standard_scale: 69 | hammer_grip: 300 # denote the standard size is roughly 300 mm 70 | screwdriver: 200 71 | wrench: 200 72 | random_scale_limit: 0.2 # 0.2 73 | randomize_color: True 74 | random_tz_limit: 0.05 # 0.05 75 | offset_pos_limit: 0.2 76 | offset_angle_limit: 45 77 | 78 | visualization: 79 | is_visualize: False 80 | frame_id: 'map' 81 | sec: 2 82 | -------------------------------------------------------------------------------- /src/dataset_generation/src/keypoint_visualization.py: -------------------------------------------------------------------------------- 1 | import os 2 | import struct 3 | import sys 4 | 5 | import numpy as np 6 | import open3d as o3d 7 | import rospy 8 | import tf 9 | import yaml 10 | from sensor_msgs import point_cloud2 11 | from sensor_msgs.msg import PointCloud2, PointField 12 | from sensor_msgs.msg import Image 13 | from cv_bridge import CvBridge 14 | from std_msgs.msg import Header 15 | from utils.transform_utils import TfUtils 16 | from utils.project_utils import project_depth_to_pointscloud, project_xyz_to_pixel_uv 17 | BIT_MOVE_16 = 2 ** 16 18 | BIT_MOVE_8 = 2 ** 8 19 | 20 | 21 | class KeyPointPublish(): 22 | def __init__(self): 23 | rospy.init_node('image_publisher', anonymous=True) 24 | self.pub = rospy.Publisher("key_points", Image, queue_size=2, latch=True) 25 | self.bridge = CvBridge() 26 | 27 | def pub(self, image, sec=0.5): 28 | try: 29 | hz = 10 30 | rate = rospy.Rate(hz) 31 | for _ in range(int(sec * hz)): 32 | if not rospy.is_shutdown(): 33 | self.pub.publish(self.bridge.cv2_to_imgmsg(image, "bgr8")) 34 | rate.sleep() 35 | except rospy.ROSInterruptException: 36 | print("program interrupted before completion", file=sys.stderr) 37 | pass 38 | 39 | 40 | def show_keypoint_in_cam_picture(): 41 | config_yaml = "/homeL/1wang/workspace/toolee_ws/src/dataset_generation/src/cfg/config.yaml" 42 | with open(config_yaml, 'r') as f: 43 | cfg = yaml.load(f.read(), Loader=yaml.FullLoader) 44 | name_map = cfg['ee_name_map'] 45 | 46 | kp_pub = KeyPointPublish() 47 | cat = "hammer_grip" 48 | obj = "hammer_01" 49 | folder_path = f"/homeL/1wang/workspace/DatasetToolEE_100/{cat}/{obj}" 50 | pcd_files = [f for f in os.listdir(folder_path) if f.endswith(".pcd")] 51 | pcd_files.sort() 52 | ee_poses = {} 53 | for idx, pcd_file in enumerate(pcd_files): 54 | 55 | # load pcd of the object 56 | pcd = o3d.io.read_point_cloud(os.path.join(folder_path, pcd_file)) 57 | camera_view_matrix = np.loadtxt(os.path.join(folder_path, f"view_matrix_{cat}_{obj}_{idx:04d}.txt")) 58 | projection_matrix = np.loadtxt(os.path.join(folder_path, f"projection_matrix_{cat}_{obj}_{idx:04d}.txt")) 59 | for pose_name in name_map[cat]: 60 | file_name = f"ee_pose_{cat}_{obj}_{pose_name}_{idx:04d}.txt" 61 | pose_file = os.path.join(folder_path, file_name) 62 | if os.path.exists(pose_file): 63 | pose = np.loadtxt(pose_file, delimiter=',') 64 | # pose = ee_pose_to_cam_view(cam_view_matrx=camera_view_matrix, ee_pose=pose) 65 | ee_poses[pose_name] = pose 66 | obj_pose_file_name = os.path.join(folder_path, f"obj_pose_{cat}_{obj}_{idx:04d}.txt") 67 | obj_pose = np.loadtxt(obj_pose_file_name) 68 | 69 | points = np.asarray(pcd.points) 70 | # points = points_to_cam_view(cam_view_matrx=camera_view_matrix, points=points) 71 | 72 | if __name__ == '__main__': 73 | show_keypoint_in_cam_picture() 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /src/dataset_generation/src/render_call.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urdf_generation import URDFLoader 3 | import subprocess 4 | import time 5 | 6 | script_name = '/homeL/1wang/workspace/toolee_ws/src/dataset_generation/src/pcd_render.py' 7 | 8 | 9 | load = URDFLoader() 10 | category_list = load.get_category_list() 11 | for category in category_list: 12 | obj_list = load.get_obj_list(category) 13 | for obj in obj_list: 14 | command = [ 15 | 'python', script_name, 16 | '--headless', 'True', 17 | '--visualize', 'False', 18 | '--save', 'True', 19 | '--max_render_per_obj', '100', 20 | '--num_per_ins', '2', 21 | '--env_num', '10', 22 | '--total_num_per_obj', '1000', 23 | '--obj_name', obj, 24 | '--cat_name', category, 25 | ] 26 | # Run the command in a blocking way 27 | subprocess.run(command, check=True) 28 | time.sleep(1) 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/dataset_generation/src/run_collection.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES="1" 3 | # Define the number of times to run the Python code 4 | num_runs=50 # Change this to the desired number of runs 5 | 6 | # Loop to run the Python code multiple times 7 | for ((i=1; i<=$num_runs; i++)); do 8 | echo "Running Python code - Iteration $i" 9 | python /homeL/1wang/workspace/toolee_ws/src/dataset_generation/src/pcd_render_with_hand.py # Replace with the actual Python script name 10 | echo "--------------------------" 11 | done 12 | -------------------------------------------------------------------------------- /src/dataset_generation/src/urdf_generation.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urdf_parser_py.urdf import URDF 3 | 4 | import numpy as np 5 | class URDFLoader: 6 | def __init__(self,asset_root_path="/dataSSD/yunlong/dataspace/Dataset3DModel_v2.0"): 7 | self.asset_root_path = asset_root_path 8 | 9 | def create_urdf(self, obj_name, rgba=None, category="Hammer_Grip"): 10 | ''' 11 | :param obj_name: name of the object 12 | :param rgba: color of the object, None denotes random colors 13 | :param category: category of the object, default is "Hammer_Grip" 14 | :return: urdf path 15 | ''' 16 | 17 | # TODO: the load of stl will rewrite the file, which is weird. Need to fix this. 18 | object_mesh_path = os.path.join(self.asset_root_path, category, f"{obj_name}.stl") 19 | target_urdf_path = os.path.join(self.asset_root_path, category, f"{obj_name}.urdf") 20 | template_urdf = URDF.from_xml_file(os.path.join(self.asset_root_path, "object_template.urdf")) 21 | 22 | template_urdf.links[0].visuals[0].geometry.filename = object_mesh_path 23 | template_urdf.links[0].collisions[0].geometry.filename = object_mesh_path 24 | if rgba is not None: 25 | template_urdf.links[0].visuals[0].material.color.rgba = rgba 26 | else: 27 | template_urdf.links[0].visuals[0].material.color.rgba = np.random.rand(3).tolist() + [1] 28 | 29 | with open(target_urdf_path, 'w') as f: 30 | f.write(template_urdf.to_xml_string()) 31 | 32 | # urdf_template.links[0].visuals[0].geometry.mesh.filename = object_mesh_path 33 | # urdf_template.links[0].collisions[0].geometry.mesh.filename = object_mesh_path 34 | # if color is not None: 35 | # urdf_template.links[0].visuals[0].material.color = color 36 | # else: 37 | # urdf_template.links[0].visuals[0].material.color = np.random.rand(3).tolist() + [1] 38 | # urdf_template.save(target_urdf_path) 39 | 40 | return self.asset_root_path, os.path.join(category, f"{obj_name}.urdf") 41 | 42 | def get_asset_root_path(self): 43 | return self.asset_root_path 44 | 45 | def set_asset_root_path(self, path): 46 | self.asset_root_path = path 47 | 48 | def get_urdf_path_from_asset_root(self, cat_name, obj_name): 49 | return os.path.join(cat_name, obj_name, f"{obj_name}.urdf") 50 | 51 | def get_obj_path(self,cat_name, obj_name): 52 | return os.path.join(self.asset_root_path, cat_name, obj_name, f"{obj_name}.obj") 53 | 54 | def get_category_list(self): 55 | return [name for name in os.listdir(self.asset_root_path) if os.path.isdir(os.path.join(self.asset_root_path, name))] 56 | 57 | def get_obj_list(self, cat_name): 58 | name_list = [name.split('.')[0] for name in os.listdir(os.path.join(self.asset_root_path, cat_name)) if '.stl' in name] 59 | name_list.sort() 60 | return name_list 61 | 62 | # TODO, ah-hoc render new urdf with different color and return path 63 | if __name__ == '__main__': 64 | urdf_loader = URDFLoader() 65 | category_list = urdf_loader.get_category_list() 66 | for category in category_list: 67 | print(f"creating new urdfs for {category}") 68 | obj_names = urdf_loader.get_obj_list(category) 69 | for obj_name in obj_names: 70 | print(f"creating new urdf for {obj_name}") 71 | urdf_path = urdf_loader.create_urdf(obj_name=obj_name, category=category, rgba=None) -------------------------------------------------------------------------------- /src/dataset_generation/src/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yl-wang996/ToolEENet/fd0130efdd03d5fe20b40808b1477fbbc722caf0/src/dataset_generation/src/utils/__init__.py -------------------------------------------------------------------------------- /src/dataset_generation/src/utils/pcd_utils.py: -------------------------------------------------------------------------------- 1 | # refer: https://github.com/felixchenfy/open3d_ros_pointcloud_conversion/blob/master/lib_cloud_conversion_between_Open3D_and_ROS.py 2 | import numpy as np 3 | import open3d as o3d 4 | import rospy 5 | from sensor_msgs import point_cloud2 6 | from std_msgs.msg import Header 7 | 8 | BIT_MOVE_16 = 2 ** 16 9 | BIT_MOVE_8 = 2 ** 8 10 | 11 | fields_xyzrgb = [ 12 | point_cloud2.PointField('x', 0, point_cloud2.PointField.FLOAT32, 1), 13 | point_cloud2.PointField('y', 4, point_cloud2.PointField.FLOAT32, 1), 14 | point_cloud2.PointField('z', 8, point_cloud2.PointField.FLOAT32, 1), 15 | point_cloud2.PointField('rgb', 12, point_cloud2.PointField.UINT32, 1), 16 | ] 17 | 18 | 19 | def o3d_wrap(points, colors): 20 | colors = colors 21 | o3d_pcd = o3d.geometry.PointCloud() 22 | o3d_pcd.points = o3d.utility.Vector3dVector(points) # float 23 | o3d_pcd.colors = o3d.utility.Vector3dVector(colors) # 0-1, uint8 24 | return o3d_pcd 25 | 26 | 27 | def o3d_to_pcd2(o3d_pcd, frame_id): 28 | points = np.asarray(o3d_pcd.points) 29 | header = Header() 30 | header.stamp = rospy.Time.now() 31 | header.frame_id = frame_id 32 | colors = np.floor(np.asarray(o3d_pcd.colors) * 255) 33 | colors = colors[:, 0] * BIT_MOVE_16 + colors[:, 1] * BIT_MOVE_8 + colors[:, 2] 34 | cloud_data = np.c_[points, colors] 35 | point_cloud2.create_cloud(header, fields_xyzrgb, cloud_data) 36 | -------------------------------------------------------------------------------- /src/dataset_generation/src/utils/project_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | 5 | def project_depth_to_pointscloud(depth_buffer, rgb_buffer, seg_buffer, seg_id, camera_view_matrix, camera_proj_matrix, img_width, img_height): 6 | vinv = np.linalg.inv(camera_view_matrix) 7 | fu = 2 / camera_proj_matrix[0, 0] 8 | fv = 2 / camera_proj_matrix [1, 1] 9 | centerU = img_width / 2 10 | centerV = img_height / 2 11 | 12 | u = range(0, rgb_buffer.shape[1]) 13 | v = range(0, rgb_buffer.shape[0]) 14 | 15 | u, v = np.meshgrid(u, v) 16 | u = u.astype(float) 17 | v = v.astype(float) 18 | 19 | Z = depth_buffer 20 | X = -(u - centerU) / img_width * Z * fu 21 | Y = (v - centerV) / img_height * Z * fv 22 | 23 | Z = Z.flatten() 24 | depth_valid = Z > -10001 25 | seg_valid = seg_buffer.flatten() ==seg_id 26 | valid = np.logical_and(depth_valid, seg_valid) 27 | X = X.flatten() 28 | Y = Y.flatten() 29 | 30 | position = np.vstack((X, Y, Z, np.ones(len(X))))[:, valid].T 31 | colors = rgb_buffer.reshape((-1 ,3))[valid] 32 | position = position * vinv 33 | 34 | points = position[:, 0:3] 35 | return points, colors 36 | 37 | def project_xyz_to_pixel_uv(camera_proj_matrix, camera_view_matrix, points): 38 | pass -------------------------------------------------------------------------------- /src/dataset_generation/src/utils/transform_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tf 3 | from isaacgym import gymapi 4 | from tf.transformations import compose_matrix, decompose_matrix 5 | 6 | 7 | class GymUtil: 8 | @staticmethod 9 | def quaternion_to_xyz(quaternion): 10 | if type(quaternion) == np.ndarray: 11 | quaternion = GymUtil.arrary_to_Quat(quaternion) 12 | z, y, x = quaternion.to_euler_zyx() 13 | angle = np.array([x,y,z]) 14 | return angle 15 | 16 | @staticmethod 17 | def xyz_to_quaternion(x, y, z): 18 | quat = gymapi.Quat.from_euler_zyx(z, y, x) 19 | quat = GymUtil.Quat_to_arrary(quat) 20 | return quat 21 | 22 | @staticmethod 23 | def degree_to_radian(x, y, z): 24 | return x / 180 * np.pi, y / 180 * np.pi, z / 180 * np.pi 25 | @staticmethod 26 | def Quat_to_arrary(Quat): 27 | quat = np.zeros(4) 28 | quat[0] = Quat.x 29 | quat[1] = Quat.y 30 | quat[2] = Quat.z 31 | quat[3] = Quat.w 32 | return quat 33 | @staticmethod 34 | def Vec3_to_arrary(Vec3): 35 | p = np.zeros(3) 36 | p[0] = Vec3.x 37 | p[1] = Vec3.y 38 | p[2] = Vec3.z 39 | return p 40 | 41 | @staticmethod 42 | def array_to_Vec3(arr): 43 | Vec3 = gymapi.Vec3(3) 44 | Vec3.x = arr[0] 45 | Vec3.y = arr[1] 46 | Vec3.z = arr[2] 47 | return Vec3 48 | @staticmethod 49 | def arrary_to_Quat(arr): 50 | Quat = gymapi.Quat() 51 | Quat.x = arr[0] 52 | Quat.y = arr[1] 53 | Quat.z = arr[2] 54 | Quat.w = arr[3] 55 | return Quat 56 | @staticmethod 57 | def transform_to_pose(T:gymapi.Transform): 58 | p = GymUtil.Vec3_to_arrary(T.p) 59 | quat = GymUtil.Quat_to_arrary(T.r) 60 | return p, quat 61 | 62 | @staticmethod 63 | def pose_to_transform(p, quat): 64 | if type(p) is np.ndarray: 65 | p = GymUtil.array_to_Vec3(p) 66 | if type(quat) is np.ndarray: 67 | quat= GymUtil.arrary_to_Quat(quat) 68 | T = gymapi.Transform() 69 | T.p = p 70 | T.r = quat 71 | return T 72 | 73 | @staticmethod 74 | def tf_M_to_gym_Tranform(M): 75 | translate, quat = TfUtils.tf_M_to_pose(M) 76 | return GymUtil.pose_to_transform( 77 | p=translate, 78 | quat=quat 79 | ) 80 | 81 | class TfUtils: 82 | @staticmethod 83 | def gym_Transform_to_tf_M(T): 84 | p, quat = GymUtil.transform_to_pose(T) 85 | M = TfUtils.pose_to_tf_M(translate=p, quat=quat) 86 | return M 87 | 88 | @staticmethod 89 | def random_tf_M(): 90 | trans_offset_limit = 0.2 91 | trans_offset = np.random.uniform(low=-1, high=1, size=(3,)) * trans_offset_limit 92 | angle_offset = np.random.uniform(low=-1, high=1, size=(3,)) * np.pi 93 | rnd_M = TfUtils.pose_to_tf_M( 94 | translate=trans_offset, 95 | angles=angle_offset, 96 | ) 97 | return rnd_M 98 | 99 | 100 | 101 | @staticmethod 102 | def compose_tf_M(trans, angles=None, quat=None,scale=np.array([1,1,1])): 103 | # M = compose_matrix(scale, shear, angles, trans, persp) 104 | # sequence of each transform 105 | # angles: xyz 106 | if angles is None: 107 | angles = TfUtils.quaternion_to_anglexyz(quat) 108 | M = compose_matrix( 109 | scale=np.asarray(scale), 110 | shear=None, 111 | angles=np.asarray(angles), 112 | translate=np.asarray(trans), 113 | perspective=None 114 | ) 115 | return M 116 | 117 | @staticmethod 118 | def pose_to_tf_M(translate, angles=None,quat=None): 119 | # angles here is radius 120 | assert angles is not None or quat is not None, 'either angle or quat must be provide' 121 | if angles is None: 122 | angles = TfUtils.quaternion_to_anglexyz(quat) 123 | M = compose_matrix( 124 | scale=None, 125 | shear=None, 126 | angles=np.asarray(angles), 127 | translate=np.asarray(translate), 128 | perspective=None 129 | ) 130 | return M 131 | 132 | @staticmethod 133 | def tf_M_to_pose(M): 134 | scale, shear, angles, translate, perspective = decompose_matrix(M) 135 | quat = TfUtils.anglexyz_to_quaternion(angles) 136 | return translate, quat 137 | 138 | @staticmethod 139 | def apply_tf_M_to_point(M, point): 140 | return np.dot(M,np.append(point,1))[:-1] 141 | 142 | @staticmethod 143 | def anglexyz_to_quaternion(angles): 144 | return tf.transformations.quaternion_from_euler(angles[0], angles[1], angles[2],axes='sxyz') 145 | 146 | @staticmethod 147 | def quaternion_to_anglexyz(quaternion): 148 | return tf.transformations.euler_from_quaternion(quaternion,axes='sxyz') 149 | 150 | @staticmethod 151 | def decompose_tf_M(M): 152 | scale, shear, angles, trans, persp = decompose_matrix(M) 153 | quat = TfUtils.anglexyz_to_quaternion(angles) 154 | return np.asarray(trans), np.asarray(quat) 155 | 156 | @staticmethod 157 | def concat_tf_M(matrices): 158 | M = np.identity(4) 159 | for i in matrices: 160 | M = np.dot(M, i) 161 | return M 162 | 163 | @staticmethod 164 | def anglexyz_to_tf_M(anglexyz): 165 | return tf.transformations.euler_matrix(anglexyz[0], anglexyz[1], anglexyz[2], axes="sxyz") 166 | 167 | @staticmethod 168 | def tf_M_to_anglexyz(tf_M): 169 | return tf.transformations.euler_from_matrix(tf_M, axes="sxyz") 170 | 171 | 172 | 173 | if __name__ == '__main__': 174 | t = [1, 2, 3] 175 | quat = TfUtils.anglexyz_to_quaternion([0, 0, 0]) 176 | M = TfUtils.pose_to_tf_M( 177 | translate=t, 178 | quat=quat 179 | ) 180 | T = GymUtil.tf_M_to_gym_Tranform(M) 181 | 182 | trans2, quat2 = GymUtil.transform_to_pose(T) 183 | angles = GymUtil.quaternion_to_xyz(quat2) 184 | print(trans2, angles) -------------------------------------------------------------------------------- /src/pose_annotation/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "apriltag_ros"] 2 | path = apriltag_ros 3 | url = https://github.com/AprilRobotics/apriltag_ros.git 4 | [submodule "apriltag"] 5 | path = apriltag 6 | url = https://github.com/AprilRobotics/apriltag.git 7 | [submodule "iai_kinect2"] 8 | path = iai_kinect2 9 | url = https://github.com/TAMS-Group/iai_kinect2.git 10 | [submodule "apriltag-imgs"] 11 | path = apriltag-imgs 12 | url = https://github.com/yl-wang996/apriltag-imgs.git 13 | -------------------------------------------------------------------------------- /src/pose_annotation/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0.2) 2 | project(pose_annotation) 3 | 4 | find_package(catkin REQUIRED COMPONENTS 5 | std_msgs 6 | message_generation 7 | interactive_markers 8 | rospy 9 | visualization_msgs 10 | tf 11 | ) 12 | 13 | generate_messages( 14 | DEPENDENCIES 15 | std_msgs 16 | ) 17 | ################################### 18 | ## catkin specific configuration ## 19 | ################################### 20 | ## The catkin_package macro generates cmake config files for your package 21 | ## Declare things to be passed to dependent projects 22 | ## LIBRARIES: libraries you create in this project that dependent projects also need 23 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need 24 | ## DEPENDS: system dependencies of this project that dependent projects also need 25 | catkin_package( 26 | CATKIN_DEPENDS interactive_markers rospy visualization_msgs tf 27 | ) 28 | 29 | ########### 30 | ## Build ## 31 | ########### 32 | 33 | include_directories(include 34 | ${catkin_INCLUDE_DIRS} 35 | ) 36 | 37 | ############# 38 | ## Install ## 39 | ############# 40 | 41 | catkin_install_python(PROGRAMS 42 | scripts/basic_controls.py 43 | DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 44 | ) -------------------------------------------------------------------------------- /src/pose_annotation/README.md: -------------------------------------------------------------------------------- 1 | # pose_annotation -------------------------------------------------------------------------------- /src/pose_annotation/launch/annotation.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /src/pose_annotation/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | pose_annotation 4 | 0.0.1 5 | The interactive_marker_tutorials package 6 | 7 | todo 8 | 9 | BSD 10 | http://ros.org/wiki/interactive_marker_tutorials 11 | 12 | todo 13 | 14 | catkin 15 | 16 | sensor_msgs 17 | 18 | rospy 19 | interactive_markers 20 | visualization_msgs 21 | tf 22 | 23 | rospy 24 | interactive_markers 25 | visualization_msgs 26 | tf 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/pose_annotation/scripts/model_reorient.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import open3d as o3d 3 | import os 4 | 5 | 6 | def main(mesh_content_path=None): 7 | data_root = "/homeL/1wang/workspace/anno_ee_ws/src/pose_annotation/meshes/" 8 | input_dataset_name = "Dataset3DModel_v3.0_norm_location_scale" 9 | output_dataset_name = "Dataset3DModel_v3.0" 10 | if mesh_content_path is not None: 11 | mesh_path = os.path.join(data_root, input_dataset_name, mesh_content_path) 12 | mesh = o3d.io.read_triangle_mesh(mesh_path) 13 | norm_pose_path = mesh_path.replace(".stl", "_norm_pose.txt") 14 | T = np.loadtxt(norm_pose_path, delimiter=",") 15 | mesh.transform(T) 16 | output_stl_path = mesh_path.replace(input_dataset_name, output_dataset_name) 17 | output_obj_path = output_stl_path.replace('.stl', '.obj') 18 | o3d.io.write_triangle_mesh( 19 | output_obj_path, 20 | mesh, 21 | write_vertex_colors=False, 22 | write_triangle_uvs=False 23 | ) 24 | 25 | mesh.compute_vertex_normals() 26 | o3d.io.write_triangle_mesh( 27 | output_stl_path, 28 | mesh, 29 | write_vertex_colors=False, 30 | write_triangle_uvs=False 31 | ) 32 | return 33 | 34 | cats = os.listdir(os.path.join(data_root, input_dataset_name)) 35 | cat_list = [f for f in cats if os.path.isdir(os.path.join(data_root, input_dataset_name, f))] 36 | for cat in cat_list: 37 | cat_path = os.path.join(data_root, input_dataset_name, cat) 38 | stl_list = [f for f in os.listdir(cat_path) if f.endswith(".stl")] 39 | stl_list.sort() 40 | for stl in stl_list: 41 | stl_path = os.path.join(cat_path, stl) 42 | target_folder = os.path.join(data_root, output_dataset_name, cat) 43 | if not os.path.exists(target_folder): 44 | os.makedirs(target_folder) 45 | 46 | print(f"Processing {stl_path}" + "-"*20) 47 | 48 | mesh = o3d.io.read_triangle_mesh(stl_path) 49 | 50 | # # for reorient the object 51 | # normalize the orientation of the object by hand annotation 52 | # hammer_01_norm_pose.txt 53 | norm_pose_path = os.path.join(data_root, input_dataset_name, cat, stl.split("/")[-1].split(".")[0] + "_norm_pose.txt") 54 | T = np.loadtxt(norm_pose_path, delimiter=",") 55 | mesh.transform(T) 56 | 57 | o3d.io.write_triangle_mesh( 58 | os.path.join(target_folder, stl.replace(".stl", ".obj")), 59 | mesh, 60 | write_vertex_colors=False, 61 | write_triangle_uvs=False 62 | ) 63 | 64 | mesh.compute_vertex_normals() 65 | o3d.io.write_triangle_mesh( 66 | os.path.join(target_folder, stl), 67 | mesh, 68 | write_vertex_colors=False, 69 | write_triangle_uvs=False 70 | ) 71 | def inspection(model_path): 72 | mesh = o3d.io.read_triangle_mesh(model_path) 73 | vertices = np.asarray(mesh.vertices) 74 | print(f"max: {np.max(vertices, axis=0)}") 75 | print(f"min: {np.min(vertices, axis=0)}") 76 | print(f"center: {mesh.get_center()}") 77 | 78 | if __name__ == '__main__': 79 | cat = "wrench" 80 | obj = "wrench_17" 81 | 82 | main(mesh_content_path=f"{cat}/{obj}.stl") 83 | # inspection("/homeL/1wang/workspace/anno_ee_ws/src/pose_annotation/meshes/Dataset3DModel_v3.0/hammer_grip/hammer_01.stl") -------------------------------------------------------------------------------- /src/pose_annotation/scripts/model_rescale_relocate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import open3d as o3d 3 | import os 4 | 5 | def main(): 6 | data_root = "/homeL/1wang/workspace/anno_ee_ws/src/pose_annotation/meshes/" 7 | input_dataset_name = "Dataset3DModel_v3.0_unnormalized" 8 | output_dataset_name = "Dataset3DModel_v3.0_norm_location_scale" 9 | cats = os.listdir(os.path.join(data_root, input_dataset_name)) 10 | cat_list = [f for f in cats if os.path.isdir(os.path.join(data_root, input_dataset_name, f))] 11 | for cat in cat_list: 12 | cat_path = os.path.join(data_root, input_dataset_name, cat) 13 | stl_list = [f for f in os.listdir(cat_path) if f.endswith(".stl")] 14 | stl_list.sort() 15 | for stl in stl_list: 16 | stl_path = os.path.join(cat_path, stl) 17 | target_folder = os.path.join(data_root, output_dataset_name, cat) 18 | if not os.path.exists(target_folder): 19 | os.makedirs(target_folder) 20 | 21 | print(f"Processing {stl_path}" + "-"*20) 22 | 23 | mesh = o3d.io.read_triangle_mesh(stl_path) 24 | 25 | # # for reorient the object 26 | # normalize the orientation of the object by hand annotation 27 | # pose_path = os.path.join(path, file.split(".")[0] + "_head_pose.txt") 28 | # T = np.loadtxt(pose_path, delimiter=",") 29 | # mesh.transform(T) 30 | 31 | # scale the object to the unit size of the diagonal of the bounding box 32 | vertices = np.asarray(mesh.vertices) 33 | print(f"max: {np.max(vertices, axis=0)}") 34 | x_range = np.max(vertices[:, 0]) - np.min(vertices[:, 0]) 35 | y_range = np.max(vertices[:, 1]) - np.min(vertices[:, 1]) 36 | z_range = np.max(vertices[:, 2]) - np.min(vertices[:, 2]) 37 | scale = np.sqrt(x_range ** 2 + y_range ** 2 + z_range ** 2) 38 | print(f"scale_before: {scale}") 39 | mesh.scale(1 / scale, center=mesh.get_center()) 40 | # # move the object to the zero mean center 41 | vertices = np.asarray(mesh.vertices) 42 | x_range = np.max(vertices[:, 0]) - np.min(vertices[:, 0]) 43 | y_range = np.max(vertices[:, 1]) - np.min(vertices[:, 1]) 44 | z_range = np.max(vertices[:, 2]) - np.min(vertices[:, 2]) 45 | print(f"scale_after: {np.sqrt(x_range ** 2 + y_range ** 2 + z_range ** 2)}") 46 | 47 | cx = (np.max(vertices[:, 0]) + np.min(vertices[:, 0])) / 2 48 | cy = (np.max(vertices[:, 1]) + np.min(vertices[:, 1])) / 2 49 | cz = (np.max(vertices[:, 2]) + np.min(vertices[:, 2])) / 2 50 | center = np.array([cx, cy, cz]) 51 | print(f"center_before: {center}") 52 | mesh.translate(-center, relative=True) 53 | print(f"center_after: {mesh.get_center()}") # [0, 0, 0] 54 | 55 | o3d.io.write_triangle_mesh( 56 | os.path.join(target_folder, stl.replace(".stl", ".obj")), 57 | mesh, 58 | write_vertex_colors=False, 59 | write_triangle_uvs=False 60 | ) 61 | 62 | mesh.compute_vertex_normals() 63 | o3d.io.write_triangle_mesh( 64 | os.path.join(target_folder, stl), 65 | mesh, 66 | write_vertex_colors=False, 67 | write_triangle_uvs=False 68 | ) 69 | def inspection(model_path): 70 | mesh = o3d.io.read_triangle_mesh(model_path) 71 | vertices = np.asarray(mesh.vertices) 72 | print(f"max: {np.max(vertices, axis=0)}") 73 | print(f"min: {np.min(vertices, axis=0)}") 74 | print(f"center: {mesh.get_center()}") 75 | 76 | if __name__ == '__main__': 77 | main() 78 | # inspection("/homeL/1wang/workspace/anno_ee_ws/src/pose_annotation/meshes/Dataset3DModel_v3.0/hammer_grip/hammer_01.stl") -------------------------------------------------------------------------------- /src/pose_annotation/scripts/model_resize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import open3d as o3d 3 | import os 4 | 5 | obj_id = "hammer_10" 6 | file_path = "/homeL/1wang/workspace/anno_ee_ws/src/pose_annotation/meshes/Dataset3DModel_v2.0/Hammer_Grip/" 7 | 8 | mesh = o3d.io.read_triangle_mesh(os.path.join(file_path, f"{obj_id}.obj")) 9 | print(mesh.get_center()) 10 | mesh.scale(1, center=mesh.get_center()) 11 | print(np.min(mesh.vertices, axis=0)) 12 | print(np.max(mesh.vertices, axis=0)) 13 | mesh.compute_vertex_normals() 14 | mesh.compute_triangle_normals() 15 | 16 | # o3d.visualization.draw_geometries([mesh]) 17 | 18 | o3d.io.write_triangle_mesh(os.path.join(file_path, f"{obj_id}.stl"), mesh,write_vertex_colors=False,write_ascii=False,write_vertex_normals=True,write_triangle_uvs=False) 19 | 20 | -------------------------------------------------------------------------------- /src/pose_annotation/scripts/pub_tf.py: -------------------------------------------------------------------------------- 1 | import tf 2 | import rospy 3 | import numpy as np 4 | 5 | def load_pose(): 6 | id = "05" 7 | pth = f'/homeL/1wang/workspace/toolee_ws/src/pose_annotation/meshes/Dataset3DModel_v2.0/Hammer_Grip/hammer_{id}_head_pose.txt' 8 | pose = np.loadtxt(pth, delimiter=',') 9 | return pose 10 | def publish_tf(): 11 | pose = load_pose() 12 | trans, quat = tf.transformations.translation_from_matrix(pose), tf.transformations.quaternion_from_matrix(pose) 13 | while not rospy.is_shutdown(): 14 | br = tf.TransformBroadcaster() 15 | br.sendTransform(trans, 16 | quat, 17 | rospy.Time.now(), 18 | "camera_link", 19 | "base_link") 20 | rospy.sleep(0.1) 21 | 22 | if __name__ == '__main__': 23 | rospy.init_node('tf_broadcaster') 24 | publish_tf() -------------------------------------------------------------------------------- /src/toolee/.gitignore: -------------------------------------------------------------------------------- 1 | # created by virtualenv automatically 2 | playground/dataset_generation/assets/Dataset3DModel/* 3 | ./bin/* 4 | ./include/* 5 | ./lib/* 6 | -------------------------------------------------------------------------------- /src/toolee/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0.2) 2 | project(tool_ee) 3 | 4 | find_package(catkin REQUIRED COMPONENTS 5 | std_msgs 6 | message_generation 7 | rospy 8 | visualization_msgs 9 | tf 10 | geometry_msgs 11 | sensor_msgs 12 | ) 13 | 14 | #add_message_files(FILES 15 | # ) 16 | 17 | ## Generate services in the 'srv' folder 18 | add_service_files(FILES 19 | PosePred.srv 20 | SegPred.srv 21 | ) 22 | 23 | generate_messages(DEPENDENCIES 24 | std_msgs 25 | geometry_msgs 26 | sensor_msgs 27 | ) 28 | 29 | ################################### 30 | ## catkin specific configuration ## 31 | ################################### 32 | ## The catkin_package macro generates cmake config files for your package 33 | ## Declare things to be passed to dependent projects 34 | ## LIBRARIES: libraries you create in this project that dependent projects also need 35 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need 36 | ## DEPENDS: system dependencies of this project that dependent projects also need 37 | catkin_package(CATKIN_DEPENDS 38 | rospy 39 | visualization_msgs 40 | tf 41 | geometry_msgs 42 | sensor_msgs 43 | ) 44 | 45 | ########### 46 | ## Build ## 47 | ########### 48 | 49 | include_directories(include 50 | ${catkin_INCLUDE_DIRS} 51 | ) 52 | 53 | ############# 54 | ## Install ## 55 | ############# 56 | catkin_install_python(PROGRAMS 57 | scripts/pose_pred_service.py 58 | scripts/seg_pred_service.py 59 | DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 60 | ) -------------------------------------------------------------------------------- /src/toolee/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Jiyao Zhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/toolee/configs/config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | # the mapping between the affordance name and the affordance id in the affordance segmentation image 4 | affordance_seg_id_map={ 5 | "empty": 0, 6 | "hand": 1, 7 | "object": 2, 8 | "hammer_grip_head1": 3, 9 | "hammer_grip_grip": 4, 10 | "screwdriver_head1": 5, 11 | "wrench_head1": 6, 12 | "wrench_head2": 7, 13 | } 14 | def get_affordance_id_from_name(cat, ee_name): 15 | name = f"{cat}_{ee_name}" 16 | return affordance_seg_id_map[name] 17 | def get_affordance_name_from_id(id): 18 | for k,v in affordance_seg_id_map.items(): 19 | if v==id: 20 | return k 21 | return None 22 | def get_all_ee_seg_names(): 23 | return list(affordance_seg_id_map.keys())[3:] 24 | def get_all_ee_seg_ids(): 25 | return list(affordance_seg_id_map.values())[3:] 26 | def get_affordance_seg_id_map(): 27 | return affordance_seg_id_map 28 | 29 | def get_config(show=True): 30 | parser = argparse.ArgumentParser() 31 | 32 | """ dataset """ 33 | # parser.add_argument('--synset_names', nargs='+', default=['bottle', 'bowl', 'camera', 'can', 'laptop', 'mug']) 34 | parser.add_argument('--data_path', default='/dataSSD/yunlong/dataspace/DatasetToolEE', type=str) 35 | parser.add_argument('--asset_path', default='/dataSSD/yunlong/dataspace/Dataset3DModel', type=str) 36 | parser.add_argument('--o2c_pose', default=True, action='store_true') 37 | parser.add_argument('--batch_size', type=int, default=600) 38 | parser.add_argument('--eval_batch_size', type=int, default=200) 39 | parser.add_argument('--pose_mode', type=str, default='rot_matrix') # rot_matrix_symtr, rot_matrix 40 | parser.add_argument('--seed', type=int, default=0) 41 | parser.add_argument('--percentage_data_for_train', type=float, default=1.0) 42 | parser.add_argument('--percentage_data_for_val', type=float, default=0.1) # 0.1 for accelerating the testing 43 | parser.add_argument('--percentage_data_for_test', type=float, default=1.0) 44 | parser.add_argument('--device', type=str, default='cuda') 45 | parser.add_argument('--num_points', type=int, default=1024) 46 | parser.add_argument('--per_obj', type=str, default='') 47 | parser.add_argument('--num_workers', type=int, default=32) 48 | parser.add_argument('--task_type', type=str, default='ee_pose') # ee_pose, obj_pose, 49 | 50 | 51 | """ model """ 52 | parser.add_argument('--posenet_mode', type=str, default='score') 53 | parser.add_argument('--hidden_dim', type=int, default=128) 54 | parser.add_argument('--sampler_mode', nargs='+') 55 | parser.add_argument('--sampling_steps', type=int) 56 | parser.add_argument('--sde_mode', type=str, default='ve') 57 | parser.add_argument('--sigma', type=float, default=25) # base-sigma for SDE 58 | parser.add_argument('--likelihood_weighting', default=False, action='store_true') 59 | parser.add_argument('--regression_head', type=str, default='Rx_Ry_and_T') # Rx_Ry_and_T, Rx_Ry_and_T_and_Symtr 60 | parser.add_argument('--pointnet2_params', type=str, default='light') 61 | parser.add_argument('--pts_encoder', type=str, default='pointnet2') 62 | 63 | 64 | """ training """ 65 | parser.add_argument('--agent_type', type=str, default='score', help='only score') 66 | parser.add_argument('--pretrained_score_model_path', type=str) 67 | 68 | parser.add_argument('--distillation', default=False, action='store_true') 69 | parser.add_argument('--n_epochs', type=int, default=1000) 70 | parser.add_argument('--log_dir', type=str, default='ScoreNet') 71 | parser.add_argument('--log_folder', type=str, default='/dataSSD/yunlong/dataspace/train_logs') 72 | parser.add_argument('--optimizer', type=str, default='Adam') 73 | parser.add_argument('--eval_freq', type=int, default=100) 74 | parser.add_argument('--repeat_num', type=int, default=20) 75 | parser.add_argument('--grad_clip', type=float, default=1.) 76 | parser.add_argument('--ema_rate', type=float, default=0.999) # ema force the smooth training, prevent the shaking of the steps 77 | # ema: mean a weighted average of the current model parameters and the previous parameters, 0.99 is the weight of the previous model parameters 78 | parser.add_argument('--lr', type=float, default=1e-3) 79 | parser.add_argument('--warmup', type=int, default=100) 80 | parser.add_argument('--lr_decay', type=float, default=0.98) 81 | parser.add_argument('--use_pretrain', default=False, action='store_true') 82 | parser.add_argument('--parallel', default=False, action='store_true') 83 | parser.add_argument('--num_gpu', type=int, default=2) 84 | parser.add_argument('--is_train', default=False, action='store_true') 85 | 86 | """ testing """ 87 | parser.add_argument('--eval_set', default='test', type=str) # test, novel 88 | parser.add_argument('--eval', default=False, action='store_true') 89 | parser.add_argument('--pred', default=False, action='store_true') 90 | parser.add_argument('--model_name', type=str) 91 | parser.add_argument('--eval_repeat_num', type=int, default=20) 92 | parser.add_argument('--save_video', default=False, action='store_true') 93 | parser.add_argument('--max_eval_num', type=int, default=10000000) 94 | parser.add_argument('--img_size', type=int, default=256, help='cropped image size') 95 | parser.add_argument('--result_dir', type=str, default='', help='result directory') 96 | parser.add_argument('--T0', type=float, default=1.0) 97 | 98 | # cfg = parser.parse_args() 99 | cfg, _ = parser.parse_known_args() 100 | 101 | cfg.cat_name = ['hammer_grip', 'screwdriver', 'wrench'] 102 | cfg.ee_names = ['hammer_grip_head1', 'hammer_grip_grip', 'screwdriver_head1', 'wrench_head1', 'wrench_head2'] 103 | 104 | if show: 105 | for k, v in cfg.__dict__.items(): 106 | print(f'{k}: {v}') 107 | 108 | return cfg 109 | 110 | -------------------------------------------------------------------------------- /src/toolee/configs/mrcnn_config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | mrcnn_seg_id_map = { 4 | "hammer_grip_head1": 0, 5 | "hammer_grip_grip": 1, 6 | "screwdriver_head1": 2, 7 | "wrench_head1": 3, 8 | "wrench_head2": 4, 9 | } 10 | 11 | def get_seg_id(name): 12 | return mrcnn_seg_id_map[name] 13 | 14 | def get_all_ee_seg_names(): 15 | return list(mrcnn_seg_id_map.keys()) 16 | 17 | 18 | 19 | def get_seg_name(id): 20 | for k, v in mrcnn_seg_id_map.items(): 21 | if v == id: 22 | return k 23 | return None 24 | 25 | 26 | def get_config(): 27 | parser = argparse.ArgumentParser() 28 | 29 | """ dataset """ 30 | parser.add_argument('--data_root', default='/dataSSD/yunlong/dataspace/DatasetToolEE', type=str) 31 | parser.add_argument('--data_dir', default='/dataSSD/yunlong/dataspace/DatasetToolEE_mrcnn', type=str) 32 | parser.add_argument('--batch_size', type=int, default=32) 33 | parser.add_argument('--eval_batch_size', type=int, default=32) 34 | 35 | parser.add_argument('--seed', type=int, default=0) 36 | parser.add_argument('--device', type=str, default='cuda') 37 | parser.add_argument('--num_workers', type=int, default=16) 38 | 39 | """ training """ 40 | parser.add_argument('--lr', type=float, default=0.00025) 41 | parser.add_argument('--max_iter', type=int, default=40000) 42 | parser.add_argument('--num_classes', type=int, default=5) 43 | 44 | 45 | """ evaluation """ 46 | parser.add_argument('--model_path', type=str, default='/dataSSD/yunlong/dataspace/mrcnn_result/output/model_final.pth') 47 | parser.add_argument('--eval_freq', type=int, default=0) 48 | 49 | """ testing """ 50 | parser.add_argument('--roi_threshold', type=float, default=0.7) 51 | 52 | cfg = parser.parse_args() 53 | 54 | for k, v in cfg.__dict__.items(): 55 | print(f'{k}: {v}') 56 | 57 | return cfg 58 | 59 | -------------------------------------------------------------------------------- /src/toolee/datasets/data_extract_pvnet.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import open3d as o3d 3 | from utils.file_utils import MetaUtils 4 | import numpy as np 5 | from tqdm import tqdm 6 | from PIL import Image 7 | from multiprocessing import Pool 8 | import open3d as o3d 9 | 10 | def load_mesh(cat_name, obj_name, scale): 11 | 12 | asset_root = '/dataSSD/yunlong/dataspace/Dataset3DModel/' 13 | mesh_path = os.path.join(asset_root, cat_name, f'{obj_name}.stl') 14 | if not os.path.exists(mesh_path): 15 | raise ValueError(f"the pcd file {mesh_path} does not exist!") 16 | # load mesh file 17 | mesh = o3d.io.read_triangle_mesh(mesh_path) 18 | mesh.scale(scale, center=np.array([0,0,0])) 19 | return mesh 20 | 21 | def convert_dataset(example): 22 | meta_name, ee_name = example 23 | data_root_target = '/dataSSD/yunlong/dataspace/DatasetToolEE_pvnet' 24 | data_root = '/dataSSD/yunlong/dataspace/DatasetToolEE' 25 | meta_util = MetaUtils(data_root, meta_name) 26 | cat_name, obj_name, idx = meta_util.get_cat_obj_id() 27 | obj_dir = os.path.join(data_root_target, cat_name, obj_name) 28 | os.makedirs(obj_dir, exist_ok=True) 29 | 30 | # save scale 31 | scale = meta_util.get_obj_scale() 32 | scale_folder = os.path.join(obj_dir, f'{scale:.4f}') 33 | os.makedirs(scale_folder, exist_ok=True) 34 | 35 | # save camera view matrix 36 | camera_view = meta_util.get_cam_view_matrix() 37 | camera_view_folder = os.path.join(scale_folder, 'camera_view_matrix') 38 | os.makedirs(camera_view_folder, exist_ok=True) 39 | np.save(os.path.join(camera_view_folder, f'view{idx}.npy'), camera_view) 40 | 41 | # save rgb image 42 | rgb_image = meta_util.get_image() 43 | rgb_image_folder = os.path.join(scale_folder, 'rgb') 44 | os.makedirs(rgb_image_folder, exist_ok=True) 45 | rgb_image_path = os.path.join(rgb_image_folder, f'{idx}.jpg') 46 | if os.path.exists(rgb_image_path): 47 | return 48 | rgb_image = Image.fromarray(rgb_image) 49 | rgb_image.save(rgb_image_path) 50 | 51 | 52 | 53 | # save obj pose 54 | obj_pose = meta_util.get_obj_pose() 55 | obj_pose_folder = os.path.join(scale_folder, 'pose') 56 | os.makedirs(obj_pose_folder, exist_ok=True) 57 | np.save(os.path.join(obj_pose_folder, f'pose{idx}.npy'), obj_pose) 58 | ee_pose_dict = meta_util.get_ee_poses() 59 | 60 | # save ee pose 61 | for k, v in ee_pose_dict.items(): 62 | ee_pose_folder = os.path.join(scale_folder, f"ee_pose_{k}") 63 | os.makedirs(ee_pose_folder, exist_ok=True) 64 | os.makedirs(ee_pose_folder, exist_ok=True) 65 | np.save(os.path.join(ee_pose_folder, f'pose{idx}.npy'), v) 66 | 67 | # save mask image 68 | mask_image = meta_util.get_seg() 69 | ids = np.unique(mask_image) 70 | obj_id = meta_util.get_obj_seg_id() 71 | mask_image = mask_image == obj_id 72 | im = Image.fromarray(np.uint8((mask_image) * 255)) 73 | mask_image_folder = os.path.join(scale_folder, 'mask') 74 | os.makedirs(mask_image_folder, exist_ok=True) 75 | mask_image_path = os.path.join(mask_image_folder, f'{idx}.png') 76 | im.save(mask_image_path) 77 | 78 | # convert mesh 79 | mesh = load_mesh(cat_name, obj_name, scale=scale) 80 | mesh.compute_vertex_normals() 81 | mesh.compute_triangle_normals() 82 | mesh_path = os.path.join(scale_folder, 'model.stl') 83 | o3d.io.write_triangle_mesh(mesh_path, mesh) 84 | 85 | # save projection matrix 86 | proj_matirx = meta_util.get_cam_proj_matrix() 87 | proj_matirx = proj_matirx[:3, :3] 88 | u0 = int(1080/2) 89 | v0 = int(1920/2) 90 | proj_matirx[0, 2] = u0 91 | proj_matirx[1, 2] = v0 92 | proj_matrix_path = os.path.join(scale_folder, 'camera.txt') 93 | np.savetxt(proj_matrix_path, proj_matirx) 94 | 95 | 96 | 97 | if __name__ == '__main__': 98 | 99 | data_root = '/dataSSD/yunlong/dataspace/DatasetToolEE' 100 | 101 | all_train_examples = np.loadtxt(os.path.join(data_root, 'all_training_examples_ee_visible.txt'), dtype=str, 102 | delimiter=',').tolist() 103 | novel_examples = np.loadtxt(os.path.join(data_root, 'novel_examples_ee_visible.txt'), dtype=str, delimiter=',').tolist() 104 | 105 | with Pool(processes=50) as pool: 106 | for _ in tqdm(pool.imap_unordered(convert_dataset, all_train_examples), total=len(all_train_examples)): 107 | pass 108 | 109 | with Pool(processes=50) as pool: 110 | for _ in tqdm(pool.imap_unordered(convert_dataset, novel_examples), total=len(novel_examples)): 111 | pass 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /src/toolee/datasets/data_split_ee.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | from tqdm import tqdm 5 | import numpy as np 6 | 7 | from utils.file_utils import MetaUtils 8 | 9 | if __name__ == '__main__': 10 | random.seed(0) 11 | train_ratio = 0.8 12 | data_path = '/dataSSD/yunlong/dataspace/DatasetToolEE' 13 | exclude_objs = ['hammer_10', 'hammer_11'] # some problems with those 3D models 14 | cats = [f for f in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, f))] 15 | all_training_examples = [] 16 | novel_examples = [] 17 | cats.sort() 18 | for cat in cats: 19 | objs = [f for f in os.listdir(os.path.join(data_path, cat)) if os.path.isdir(os.path.join(data_path, cat, f))] 20 | objs.sort() 21 | for obj in objs[:-1]: 22 | if obj in exclude_objs: 23 | continue 24 | meta_file_names = [f for f in os.listdir(os.path.join(data_path, cat, obj)) if 'meta' in f] 25 | for meta_file_name in tqdm(meta_file_names): 26 | meta_file_path = os.path.join(cat, obj, meta_file_name) 27 | meta_util = MetaUtils(data_path, meta_file_path) 28 | ee_names = meta_util.get_ee_names() 29 | for ee_name in ee_names: 30 | all_training_examples.append([meta_file_path, ee_name]) 31 | 32 | novel_obj = objs[-1] 33 | meta_file_names = [f for f in os.listdir(os.path.join(data_path, cat, novel_obj)) if 'meta' in f] 34 | for meta_file_name in tqdm(meta_file_names): 35 | meta_file_path = os.path.join(cat, novel_obj, meta_file_name) 36 | meta_util = MetaUtils(data_path, meta_file_path) 37 | for ee_name in meta_util.get_ee_names(): 38 | novel_examples.append([meta_file_path, ee_name]) 39 | 40 | novel_examples = np.asarray(novel_examples) 41 | all_training_examples = np.asarray(all_training_examples) 42 | print('save to file') 43 | np.savetxt(os.path.join(data_path, 'novel_examples_ee.txt'), novel_examples, fmt='%s', delimiter=',') 44 | np.savetxt(os.path.join(data_path, 'all_training_examples_ee.txt'), all_training_examples, fmt='%s', delimiter=',') 45 | 46 | np.random.shuffle(all_training_examples) 47 | train_num = int(len(all_training_examples) * train_ratio) 48 | train_examples = all_training_examples[:train_num,:] 49 | val_examples = all_training_examples[train_num:,:] 50 | print('save to file') 51 | np.savetxt(os.path.join(data_path, 'train_examples_ee.txt'), train_examples, fmt='%s', delimiter=',') 52 | np.savetxt(os.path.join(data_path, 'val_examples_ee.txt'), val_examples, fmt='%s', delimiter=',') 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /src/toolee/datasets/extract_ee_pcd.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | from multiprocessing import Pool 5 | import numpy as np 6 | from utils.file_utils import MetaUtils, get_examples 7 | from configs.config import get_config, get_affordance_id_from_name 8 | from tqdm import tqdm 9 | import open3d as o3d 10 | from utils.data_tools import sample_data 11 | import random 12 | 13 | 14 | 15 | 16 | def depth_to_pointcloud(depth_buffer, rgb_buffer, seg_buffer, seg_id, camera_proj_matrix, width, height): 17 | fu = 2 / camera_proj_matrix[0, 0] 18 | fv = 2 / camera_proj_matrix[1, 1] 19 | centerU = width / 2 20 | centerV = height / 2 21 | 22 | u = range(0, rgb_buffer.shape[1]) 23 | v = range(0, rgb_buffer.shape[0]) 24 | 25 | u, v = np.meshgrid(u, v) 26 | u = u.astype(float) 27 | v = v.astype(float) 28 | 29 | Z = depth_buffer 30 | X = -(u - centerU) / width * Z * fu 31 | Y = (v - centerV) / height * Z * fv 32 | 33 | Z = Z.flatten() 34 | depth_valid = Z > -10001 35 | seg_valid = seg_buffer.flatten() == seg_id 36 | valid = np.logical_and(depth_valid, seg_valid) 37 | X = X.flatten() 38 | Y = Y.flatten() 39 | 40 | position = np.vstack((X, Y, Z, np.ones(len(X))))[:, valid].T 41 | colors = rgb_buffer.reshape((-1, 3))[valid] 42 | 43 | points = position[:, 0:3] 44 | # if points.shape[0] < cfg.num_points: 45 | # print(f"Warning: {points.shape[0]} points < 1024 in the point cloud, may occlusion or other problems") 46 | return points, colors 47 | 48 | def extract_ee_pcd(example): 49 | meta_name, ee_name = example 50 | data_root = '/dataSSD/yunlong/dataspace/DatasetToolEE' 51 | meta_util = MetaUtils(data_root, meta_name) 52 | cat, obj, idx = meta_util.get_cat_obj_id() 53 | pcd_file_name = f"ee_{cat}_{obj}_{ee_name}_{idx:04d}.pcd" 54 | if not os.path.exists(os.path.join(data_root, cat, obj, pcd_file_name)) or meta_util.get(f'ee_pcd_path_{ee_name}') is None: 55 | if not os.path.exists(os.path.join(data_root, cat, obj, pcd_file_name)): 56 | depth = meta_util.get_depth_array() 57 | seg = meta_util.get_affordance_seg() 58 | rgb = meta_util.get_image() 59 | height, width = meta_util.get_cam_hw() 60 | seg_id = get_affordance_id_from_name(cat, ee_name) 61 | proj_matrix = meta_util.get_cam_proj_matrix() 62 | points, colors = depth_to_pointcloud( 63 | depth_buffer=depth, 64 | rgb_buffer=rgb, 65 | seg_buffer=seg, 66 | seg_id=seg_id, 67 | camera_proj_matrix=proj_matrix, 68 | height=height, 69 | width=width 70 | ) 71 | if points.shape[0] < 50: 72 | print(f"points shape:{points.shape}, {meta_name} {ee_name} is not visuable, ignore it.") 73 | return 74 | try: 75 | _, sample_idx = sample_data(points, cfg.num_points) 76 | except Exception as e: 77 | print(f"Error in {meta_name} {ee_name}") 78 | print(e) 79 | return 80 | points = points[sample_idx] 81 | colors = colors[sample_idx] 82 | pcd = o3d.geometry.PointCloud() 83 | pcd.points = o3d.utility.Vector3dVector(np.asarray(points)) 84 | pcd.colors = o3d.utility.Vector3dVector(np.asarray(colors)) 85 | o3d.io.write_point_cloud(filename=os.path.join(data_root, cat, obj, pcd_file_name), pointcloud=pcd) 86 | 87 | if __name__ == '__main__': 88 | cfg = get_config() 89 | data_root = cfg.data_path 90 | examples_train = get_examples(data_root, "all_training_examples_ee.txt").tolist() 91 | examples_novel = get_examples(data_root, "novel_examples_ee.txt").tolist() 92 | 93 | # with Pool(processes=50) as pool: 94 | # for _ in tqdm(pool.imap_unordered(extract_ee_pcd, examples_train), total=len(examples_train)): 95 | # pass 96 | # 97 | # with Pool(processes=60) as pool: 98 | # for _ in tqdm(pool.imap_unordered(extract_ee_pcd, examples_novel), total=len(examples_novel)): 99 | # pass 100 | # 101 | # 102 | # not_vis_objs = [ 103 | # 'wrench_15', 104 | # 'hammer_04', 105 | # 'wrench_18', 106 | # ] 107 | # not_visible_list = [] 108 | # for example in tqdm(examples_train): 109 | # meta_name, ee_name = example 110 | # for obj in not_vis_objs: 111 | # if obj in meta_name: 112 | # meta_util = MetaUtils(data_root, meta_name) 113 | # if not meta_util.get_ee_pcd_path(ee_name): 114 | # not_visible_list.append(example) 115 | # 116 | # for example in tqdm(examples_novel): 117 | # meta_name, ee_name = example 118 | # for obj in not_vis_objs: 119 | # if obj in meta_name: 120 | # meta_util = MetaUtils(data_root, meta_name) 121 | # if not meta_util.get_ee_pcd_path(ee_name): 122 | # not_visible_list.append(example) 123 | # 124 | # np.savetxt(os.path.join(data_root, "not_visible_ee.txt"), np.asarray(not_visible_list), fmt='%s') 125 | 126 | examples_not_visuable = np.loadtxt(os.path.join(data_root, "not_visible_ee.txt"), dtype=str,delimiter=',').tolist() 127 | for example in tqdm(examples_not_visuable): 128 | if example in examples_train: 129 | examples_train.remove(example) 130 | continue 131 | if example in examples_novel: 132 | examples_novel.remove(example) 133 | 134 | np.savetxt(os.path.join(data_root, "all_training_examples_ee_visible.txt"), np.asarray(examples_train), fmt='%s',delimiter=',') 135 | np.savetxt(os.path.join(data_root, "novel_examples_ee_visible.txt"), np.asarray(examples_novel), fmt='%s',delimiter=',') 136 | 137 | train_ratio = 0.8 138 | random.seed(0) 139 | random.shuffle(examples_train) 140 | train_num = int(len(examples_train) * train_ratio) 141 | train_examples = examples_train[:train_num] 142 | val_examples = examples_train[train_num:] 143 | 144 | np.savetxt(os.path.join(data_root, "train_examples_ee_visible.txt"), np.asarray(train_examples), fmt='%s',delimiter=',') 145 | np.savetxt(os.path.join(data_root, "val_examples_ee_visible.txt"), np.asarray(val_examples), fmt='%s',delimiter=',') 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /src/toolee/datasets/extract_ee_pose_prior.py: -------------------------------------------------------------------------------- 1 | # import os.path 2 | # 3 | # import numpy as np 4 | # from tqdm import tqdm 5 | # 6 | # from utils.file_utils import MetaUtils 7 | # from utils.transform_utils import TfUtils 8 | # 9 | # def extract_ee_pose_prior(meta_file="all_training_examples_ee_visible.txt"): 10 | # data_root = '/dataSSD/yunlong/dataspace/DatasetToolEE' 11 | # save_path = os.path.join(data_root, 'ee_pose_prior.npy') 12 | # meta_file_list = np.loadtxt(f'{data_root}/{meta_file}', dtype=str, delimiter=',') 13 | # _ee_prior = {} 14 | # ee_prior = {} 15 | # if os.path.exists(save_path): 16 | # print("Loading existing ee pose prior from file") 17 | # ee_prior = np.load(save_path, allow_pickle=True).item() 18 | # for meta_file, ee_name in tqdm(meta_file_list, 'Extracting ee pose prior'): 19 | # # for meta_file, ee_name in meta_file_list[:100]: 20 | # meta_util = MetaUtils(data_root, meta_file) 21 | # cat, obj, _ = meta_util.get_cat_obj_id() 22 | # 23 | # if cat not in _ee_prior: 24 | # _ee_prior[cat] = {} 25 | # if cat not in ee_prior: 26 | # ee_prior[cat] = {} 27 | # 28 | # if obj not in _ee_prior[cat]: 29 | # _ee_prior[cat][obj] = {} 30 | # if obj not in ee_prior[cat]: 31 | # ee_prior[cat][obj] = {} 32 | # 33 | # ee_pose_RT = meta_util.get_ee_poses(ee_name=ee_name) 34 | # if ee_name not in _ee_prior[cat][obj]: 35 | # _ee_prior[cat][obj][ee_name] = [] 36 | # _ee_prior[cat][obj][ee_name].append(ee_pose_RT) 37 | # 38 | # for cat in _ee_prior: 39 | # for obj in _ee_prior[cat]: 40 | # for ee_name in _ee_prior[cat][obj]: 41 | # if ee_name in ee_prior[cat][obj]: 42 | # continue 43 | # ee_poses = np.array(_ee_prior[cat][obj][ee_name]) 44 | # ee_poses = np.expand_dims(ee_poses, axis=0) 45 | # avg_poses = TfUtils.get_avg_sRT(ee_poses)[0] 46 | # ee_prior[cat][obj][ee_name] = avg_poses 47 | # print(f'{cat}/{obj}/{ee_name}: {avg_poses}') 48 | # print("Saving ee pose prior to file") 49 | # np.save(save_path, ee_prior) 50 | # 51 | # def load_pose_prior(): 52 | # data_root = '/dataSSD/yunlong/dataspace/DatasetToolEE' 53 | # ee_prior = np.load(os.path.join(data_root, 'ee_pose_prior.npy'), allow_pickle=True).item() 54 | # return ee_prior 55 | # 56 | # if __name__ == '__main__': 57 | # extract_ee_pose_prior(meta_file="all_training_examples_ee_visible.txt") 58 | # extract_ee_pose_prior(meta_file="novel_examples_ee_visible.txt") 59 | # ee_prior = load_pose_prior() 60 | # print('done') 61 | 62 | import numpy as np 63 | import os 64 | from tqdm import tqdm 65 | -------------------------------------------------------------------------------- /src/toolee/launch/affordance_segmentation.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /src/toolee/launch/inference_service.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /src/toolee/launch/pose_estimation.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /src/toolee/mrcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yl-wang996/ToolEENet/fd0130efdd03d5fe20b40808b1477fbbc722caf0/src/toolee/mrcnn/__init__.py -------------------------------------------------------------------------------- /src/toolee/mrcnn/dataset.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from detectron2.utils.logger import setup_logger 3 | 4 | setup_logger() 5 | 6 | import numpy as np 7 | import os 8 | from tqdm import tqdm 9 | import random 10 | import pickle 11 | 12 | from detectron2.data import MetadataCatalog, DatasetCatalog 13 | from pycocotools.mask import encode as mask_encode 14 | from detectron2.utils.visualizer import Visualizer 15 | from detectron2.structures import BoxMode 16 | 17 | from utils.file_utils import MetaUtils 18 | from configs.mrcnn_config import get_seg_id, get_all_ee_seg_names, get_config 19 | 20 | def get_dataset_list(data_root, meta_file_name='_examples_ee_visible.txt'): 21 | examples = np.loadtxt(os.path.join(data_root, meta_file_name), dtype=str, delimiter=',') 22 | return examples 23 | 24 | def dataset_visualization(meta_data, dataset, cfg): 25 | tmp_folder = os.path.join(os.path.dirname(cfg.data_root), 'mrcnn_result', "tmp") 26 | os.makedirs(tmp_folder, exist_ok=True) 27 | assert dataset in meta_data.name, f"dataset {dataset} not found" 28 | dataset_dicts = extract_datasets_dicts(dataset=dataset, cfg=cfg) 29 | os.makedirs(tmp_folder, exist_ok=True) 30 | for d in random.sample(dataset_dicts, 3): 31 | image_path = d["file_name"] 32 | file_name = os.path.basename(image_path) 33 | img = cv2.imread(image_path) 34 | visualizer = Visualizer(img[:, :, ::-1], metadata=meta_data, scale=0.5) 35 | out = visualizer.draw_dataset_dict(d) 36 | cv2.imwrite(filename=os.path.join(tmp_folder, f"vis_{file_name}.jpg"), img=out.get_image()[:, :, ::-1]) 37 | 38 | def extract_datasets_dicts(dataset='train', cfg=None): 39 | assert dataset in ['train', 'val', 'novel'], f"dataset {dataset} not found" 40 | dataset_file_name = f"{dataset}_examples_ee_visible.txt" 41 | data_root = cfg.data_root 42 | examples = get_dataset_list(data_root, dataset_file_name) 43 | 44 | dataset_list = [] 45 | for meta_file_name, _ in tqdm(examples, desc=f"loading {dataset} dataset"): 46 | meta_util = MetaUtils(data_root, meta_file_name) 47 | cat, _, _ = meta_util.get_cat_obj_id() 48 | height, width = meta_util.get_cam_hw() 49 | image_path = os.path.join(data_root, meta_util.image_path) 50 | record = {} 51 | 52 | record["file_name"] = image_path 53 | record["image_id"] = meta_file_name # str 54 | record["height"] = height 55 | record["width"] = width 56 | 57 | seg_array = meta_util.get_affordance_seg() 58 | ee_points_dict = meta_util.get_ee_points() 59 | if len(ee_points_dict) == 0: 60 | print(f"no ee points found in {meta_file_name}") 61 | 62 | ids = np.unique(seg_array) 63 | annotations = [] 64 | 65 | for ee_name, ee_point in ee_points_dict.items(): 66 | ee_id = get_seg_id(f"{cat}_{ee_name}") 67 | seg_id = ee_id + 3 68 | if seg_id not in ids: 69 | continue 70 | anno = {} 71 | bit_seg_array = seg_array == seg_id 72 | ys, xs = np.where(bit_seg_array == True) 73 | x_min, x_max = np.min(xs), np.max(xs) 74 | y_min, y_max = np.min(ys), np.max(ys) 75 | # bbox = [np.max([x_min, 0]), np.max([y_min, 0]), np.min([x_max, height]), np.min([y_max, width])] 76 | bbox = [x_min, y_min, x_max, y_max] 77 | anno["bbox"] = bbox 78 | anno["bbox_mode"] = BoxMode.XYXY_ABS 79 | anno["category_id"] = ee_id 80 | mask = mask_encode(np.asarray(bit_seg_array.astype(np.uint8), 81 | order="F")) # set cfg.INPUT.MASK_FORMAT to 'bitmask' if using the default data loader with such format. 82 | anno['segmentation'] = mask 83 | # [x, y, v], v=1 means visible 2 means invisible 0 means not labeled 84 | # anno['keypoints'] = [int(ee_point[0]), int(ee_point[1]), 1] 85 | anno['iscrowd'] = 0 86 | annotations.append(anno) 87 | if len(ee_points_dict) != 0 and len(annotations)==0: 88 | print(f"no annotations found in {meta_file_name}") 89 | if len(ee_points_dict) != 0 and len(annotations) == 0: 90 | print(f"no annotations found in {meta_file_name}") 91 | record['annotations'] = annotations 92 | dataset_list.append(record) 93 | return dataset_list 94 | 95 | def save_dataset_dicts(cfg, data_dir): 96 | for dataset in ["train", 'val', 'novel']: 97 | dataset_path = os.path.join(data_dir, f"{dataset}_dataset_dicts.pkl") 98 | if not os.path.exists(dataset_path): 99 | dataset_list = extract_datasets_dicts(dataset, cfg) 100 | with open(dataset_path, 'wb') as f: 101 | pickle.dump(dataset_list, f) 102 | else: 103 | print(f"dataset {dataset} already exists, skip saving") 104 | 105 | def load_dataset_dicts(dataset, data_dir): 106 | dataset_path = os.path.join(data_dir, f"{dataset}_dataset_dicts.pkl") 107 | with open(dataset_path, 'rb') as f: 108 | dataset_list = pickle.load(f) 109 | return dataset_list 110 | 111 | def get_meta_data(dataset='val'): 112 | meta_data = MetadataCatalog.get("ToolEE/" + dataset) 113 | return meta_data 114 | 115 | def register_datasets(cfg): 116 | for dataset in ["train", 'val', 'novel']: 117 | DatasetCatalog.register("ToolEE/" + dataset, lambda d=dataset: load_dataset_dicts(dataset, cfg.data_dir)) 118 | ee_seg_names = get_all_ee_seg_names() 119 | meta_data = MetadataCatalog.get("ToolEE/" + dataset).set( 120 | thing_classes=ee_seg_names, 121 | ) 122 | vis = False 123 | if vis: 124 | dataset_visualization(meta_data=meta_data, dataset=dataset, cfg=cfg) 125 | 126 | if __name__ == '__main__': 127 | cfg = get_config() 128 | data_dir = cfg.data_dir 129 | os.makedirs(data_dir, exist_ok=True) 130 | save_dataset_dicts(cfg,data_dir) 131 | register_datasets(cfg) 132 | train_dict = load_dataset_dicts("train", data_dir) 133 | val_dict = load_dataset_dicts("val", data_dir) 134 | novel_dict = load_dataset_dicts("novel", data_dir) 135 | 136 | print(f"train dataset has {len(train_dict)} images") 137 | print(f"val dataset has {len(val_dict)} images") 138 | print(f"novel dataset has {len(novel_dict)} images") -------------------------------------------------------------------------------- /src/toolee/mrcnn/hook.py: -------------------------------------------------------------------------------- 1 | # refer to: https://gist.github.com/ortegatron/c0dad15e49c2b74de8bb09a5615d9f6b?permalink_comment_id=3941644 2 | import datetime 3 | import logging 4 | import time 5 | 6 | import detectron2.utils.comm as comm 7 | import numpy as np 8 | import torch 9 | from detectron2.engine.hooks import HookBase 10 | from detectron2.utils.logger import log_every_n_seconds 11 | 12 | 13 | class LossEvalHook(HookBase): 14 | def __init__(self, eval_period, model, data_loader): 15 | self._model = model 16 | self._period = eval_period 17 | self._data_loader = data_loader 18 | 19 | def _do_loss_eval(self): 20 | # Copying inference_on_dataset from evaluator.py 21 | total = len(self._data_loader) 22 | num_warmup = min(5, total - 1) 23 | 24 | start_time = time.perf_counter() 25 | total_compute_time = 0 26 | losses = [] 27 | for idx, inputs in enumerate(self._data_loader): 28 | if idx == num_warmup: 29 | start_time = time.perf_counter() 30 | total_compute_time = 0 31 | start_compute_time = time.perf_counter() 32 | if torch.cuda.is_available(): 33 | torch.cuda.synchronize() 34 | total_compute_time += time.perf_counter() - start_compute_time 35 | iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) 36 | seconds_per_img = total_compute_time / iters_after_start 37 | if idx >= num_warmup * 2 or seconds_per_img > 5: 38 | total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start 39 | eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1))) 40 | log_every_n_seconds( 41 | logging.INFO, 42 | "Loss on Validation done {}/{}. {:.4f} s / img. ETA={}".format( 43 | idx + 1, total, seconds_per_img, str(eta) 44 | ), 45 | n=5, 46 | ) 47 | loss_batch = self._get_loss(inputs) 48 | losses.append(loss_batch) 49 | mean_loss = np.mean(losses) 50 | self.trainer.storage.put_scalar('validation_loss', mean_loss) 51 | comm.synchronize() 52 | 53 | return losses 54 | 55 | def _get_loss(self, data): 56 | # How loss is calculated on train_loop 57 | metrics_dict = self._model(data) 58 | metrics_dict = { 59 | k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v) 60 | for k, v in metrics_dict.items() 61 | } 62 | total_losses_reduced = sum(loss for loss in metrics_dict.values()) 63 | return total_losses_reduced 64 | 65 | def after_step(self): 66 | next_iter = self.trainer.iter + 1 67 | is_final = next_iter == self.trainer.max_iter 68 | if is_final or (self._period > 0 and next_iter % self._period == 0): 69 | self._do_loss_eval() 70 | self.trainer.storage.put_scalars(timetest=12) -------------------------------------------------------------------------------- /src/toolee/mrcnn/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 3 | import sys 4 | sys.path.append(os.path.dirname(os.path.dirname(__file__))) 5 | 6 | from dataset import register_datasets 7 | from configs.mrcnn_config import get_config 8 | from runner import get_trainer, get_dtrn_cfg 9 | 10 | import random 11 | 12 | 13 | def train(cfg): 14 | register_datasets(cfg) 15 | dtrn_cfg = get_dtrn_cfg(cfg) 16 | trainer = get_trainer(dtrn_cfg) 17 | trainer.train() 18 | 19 | 20 | if __name__ == '__main__': 21 | cfg = get_config() 22 | random.seed(cfg.seed) 23 | train(cfg) -------------------------------------------------------------------------------- /src/toolee/mrcnn/runner.py: -------------------------------------------------------------------------------- 1 | # Some basic setup: 2 | # Setup detectron2 logger 3 | import cv2 4 | from detectron2.utils.logger import setup_logger 5 | setup_logger() 6 | 7 | # import some common libraries 8 | import os 9 | 10 | # import some common detectron2 utilities 11 | from detectron2 import model_zoo 12 | from detectron2.engine import DefaultTrainer 13 | 14 | from detectron2.engine import DefaultPredictor 15 | from detectron2.config import get_cfg 16 | from configs.mrcnn_config import get_config 17 | from detectron2.evaluation import COCOEvaluator 18 | from detectron2.data import build_detection_test_loader 19 | from detectron2.data import DatasetMapper 20 | 21 | from mrcnn.hook import LossEvalHook 22 | 23 | def get_predictor(ckpt_path, roi_threshold=0.7): 24 | dtrn_cfg = get_cfg() 25 | # model configuration 26 | dtrn_cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) 27 | # training configuration 28 | dtrn_cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 29 | dtrn_cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5 30 | dtrn_cfg.INPUT.MASK_FORMAT = "bitmask" 31 | dtrn_cfg.MODEL.WEIGHTS = ckpt_path 32 | dtrn_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = roi_threshold 33 | predictor = DefaultPredictor(dtrn_cfg) 34 | return predictor 35 | 36 | def get_dtrn_cfg(cfg): 37 | dtrn_cfg = get_cfg() 38 | # model configuration 39 | dtrn_cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) 40 | dtrn_cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( 41 | "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") # Let training initialize from model zoo 42 | # training configuration 43 | dtrn_cfg.DATASETS.TRAIN = ("ToolEE/train",) 44 | dtrn_cfg.DATASETS.TEST = ("ToolEE/val", "ToolEE/novel") 45 | dtrn_cfg.DATALOADER.NUM_WORKERS = cfg.num_workers 46 | 47 | dtrn_cfg.SOLVER.IMS_PER_BATCH = cfg.batch_size 48 | dtrn_cfg.SOLVER.BASE_LR = cfg.lr # pick a good LR 49 | dtrn_cfg.SOLVER.MAX_ITER = cfg.max_iter 50 | dtrn_cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 51 | dtrn_cfg.MODEL.ROI_HEADS.NUM_CLASSES = cfg.num_classes 52 | dtrn_cfg.OUTPUT_DIR = os.path.join(os.path.dirname(cfg.data_root), "mrcnn_result", "output") 53 | dtrn_cfg.INPUT.MASK_FORMAT = "bitmask" 54 | dtrn_cfg.TEST.EVAL_PERIOD = cfg.eval_freq 55 | return dtrn_cfg 56 | 57 | class ToolEETainer(DefaultTrainer): 58 | def __init__(self, dtrn_cfg): 59 | self.dtrn_cfg = dtrn_cfg 60 | super(ToolEETainer, self).__init__(cfg=dtrn_cfg) 61 | self.resume_or_load(resume=False) 62 | 63 | 64 | def build_hooks(self): 65 | hooks = super().build_hooks() 66 | hooks.insert(-1,LossEvalHook( 67 | self.dtrn_cfg.TEST.EVAL_PERIOD, 68 | self.model, 69 | build_detection_test_loader( 70 | self.dtrn_cfg, 71 | self.dtrn_cfg.DATASETS.TEST[0], 72 | DatasetMapper(self.dtrn_cfg, True) 73 | ) 74 | )) 75 | return hooks 76 | 77 | @classmethod 78 | def build_evaluator(cls, dtrn_cfg, dataset_name, output_folder=None): 79 | if output_folder is None: 80 | output_folder = os.path.join(dtrn_cfg.OUTPUT_DIR, f"val{dataset_name}") 81 | return COCOEvaluator(dataset_name, output_dir=output_folder) 82 | 83 | def get_trainer(cfg): 84 | dtrn_cfg = get_dtrn_cfg(cfg) 85 | os.makedirs(dtrn_cfg.OUTPUT_DIR, exist_ok=True) 86 | trainer = ToolEETainer(dtrn_cfg) 87 | return trainer 88 | 89 | if __name__ == '__main__': 90 | cfg = get_config() 91 | get_trainer(cfg) -------------------------------------------------------------------------------- /src/toolee/networks/gf_algorithms/losses.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | 5 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 6 | 7 | 8 | def loss_fn_edm( 9 | model, 10 | data, 11 | marginal_prob_func, 12 | sde_fn, 13 | eps=1e-5, 14 | likelihood_weighting=False, 15 | P_mean=-1.2, 16 | P_std=1.2, 17 | sigma_data=1.4148, 18 | sigma_min=0.002, 19 | sigma_max=80, 20 | ): 21 | pts = data['zero_mean_pts'] 22 | y = data['zero_mean_gt_pose'] 23 | bs = pts.shape[0] 24 | 25 | # get noise n 26 | z = torch.randn_like(y) # [bs, pose_dim] 27 | # log_sigma_t = torch.randn([bs, 1], device=device) # [bs, 1] 28 | # sigma_t = (P_std * log_sigma_t + P_mean).exp() # [bs, 1] 29 | log_sigma_t = torch.rand([bs, 1], device=device) # [bs, 1] 30 | sigma_t = (math.log(sigma_min) + log_sigma_t * (math.log(sigma_max) - math.log(sigma_min))).exp() # [bs, 1] 31 | 32 | n = z * sigma_t 33 | 34 | perturbed_x = y + n # [bs, pose_dim] 35 | data['sampled_pose'] = perturbed_x 36 | data['t'] = sigma_t # t and sigma is interchangable in EDM 37 | data, output = model(data) # [bs, pose_dim] 38 | 39 | # set_trace() 40 | 41 | # same as VE 42 | loss_ = torch.mean(torch.sum(((output * sigma_t + z)**2).view(bs, -1), dim=-1)) 43 | 44 | return loss_ 45 | 46 | 47 | def loss_fn( 48 | model, 49 | data, 50 | marginal_prob_func, 51 | sde_fn, 52 | eps=1e-5, 53 | likelihood_weighting=False, 54 | teacher_model=None, 55 | pts_feat_teacher=None 56 | ): 57 | pts = data['zero_mean_pts'] 58 | gt_pose = data['zero_mean_gt_pose'] 59 | 60 | ''' get std ''' 61 | bs = pts.shape[0] 62 | random_t = torch.rand(bs, device=device) * (1. - eps) + eps # [bs, ] 63 | random_t = random_t.unsqueeze(-1) # [bs, 1] 64 | mu, std = marginal_prob_func(gt_pose, random_t) # [bs, pose_dim], [bs] 65 | std = std.view(-1, 1) # [bs, 1] 66 | 67 | ''' perturb data and get estimated score ''' 68 | z = torch.randn_like(gt_pose) # [bs, pose_dim] 69 | perturbed_x = mu + z * std # [bs, pose_dim] 70 | data['sampled_pose'] = perturbed_x 71 | data['t'] = random_t 72 | estimated_score = model(data) # [bs, pose_dim(6+3)] 73 | 74 | ''' get target score ''' 75 | if teacher_model is None: 76 | # theoretic estimation 77 | target_score = - z * std / (std ** 2) 78 | else: 79 | # distillation 80 | pts_feat_student = data['pts_feat'].clone() 81 | data['pts_feat'] = pts_feat_teacher 82 | target_score = teacher_model(data) 83 | data['pts_feat'] = pts_feat_student 84 | 85 | ''' loss weighting ''' 86 | loss_weighting = std ** 2 87 | # loss_all = loss_ + loss_weighting 88 | loss_ = torch.mean(torch.sum((loss_weighting * (estimated_score - target_score)**2).view(bs, -1), dim=-1)) 89 | 90 | return loss_ 91 | 92 | 93 | -------------------------------------------------------------------------------- /src/toolee/networks/gf_algorithms/score_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class ExponentialMovingAverage: 4 | """ 5 | Maintains (exponential) moving average of a set of parameters. 6 | """ 7 | 8 | def __init__(self, parameters, decay, use_num_updates=True): 9 | """ 10 | Args: 11 | parameters: Iterable of `torch.nn.Parameter`; usually the result of 12 | `model.parameters()`. 13 | decay: The exponential decay. 14 | use_num_updates: Whether to use number of updates when computing 15 | averages. 16 | """ 17 | if decay < 0.0 or decay > 1.0: 18 | raise ValueError('Decay must be between 0 and 1') 19 | self.decay = decay 20 | self.num_updates = 0 if use_num_updates else None 21 | self.shadow_params = [p.clone().detach() 22 | for p in parameters if p.requires_grad] 23 | self.collected_params = [] 24 | 25 | def update(self, parameters): 26 | """ 27 | Update currently maintained parameters. 28 | 29 | Call this every time the parameters are updated, such as the result of 30 | the `optimizer.step()` call. 31 | 32 | Args: 33 | parameters: Iterable of `torch.nn.Parameter`; usually the same set of 34 | parameters used to initialize this object. 35 | """ 36 | decay = self.decay 37 | if self.num_updates is not None: 38 | self.num_updates += 1 39 | decay = min(decay, (1 + self.num_updates) / (10 + self.num_updates)) 40 | one_minus_decay = 1.0 - decay 41 | with torch.no_grad(): 42 | parameters = [p for p in parameters if p.requires_grad] 43 | for s_param, param in zip(self.shadow_params, parameters): 44 | s_param.sub_(one_minus_decay * (s_param - param)) # only update the ema-params 45 | 46 | 47 | def copy_to(self, parameters): 48 | """ 49 | Copy current parameters into given collection of parameters. 50 | 51 | Args: 52 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 53 | updated with the stored moving averages. 54 | """ 55 | parameters = [p for p in parameters if p.requires_grad] 56 | for s_param, param in zip(self.shadow_params, parameters): 57 | if param.requires_grad: 58 | param.data.copy_(s_param.data) 59 | 60 | def store(self, parameters): 61 | """ 62 | Save the current parameters for restoring later. 63 | 64 | Args: 65 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 66 | temporarily stored. 67 | """ 68 | self.collected_params = [param.clone() for param in parameters] 69 | 70 | def restore(self, parameters): 71 | """ 72 | Restore the parameters stored with the `store` method. 73 | Useful to validate the model with EMA parameters without affecting the 74 | original optimization process. Store the parameters before the 75 | `copy_to` method. After validation (or model saving), use this to 76 | restore the former parameters. 77 | 78 | Args: 79 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 80 | updated with the stored parameters. 81 | """ 82 | for c_param, param in zip(self.collected_params, parameters): 83 | param.data.copy_(c_param.data) 84 | 85 | def state_dict(self): 86 | return dict(decay=self.decay, num_updates=self.num_updates, 87 | shadow_params=self.shadow_params) 88 | 89 | def load_state_dict(self, state_dict): 90 | self.decay = state_dict['decay'] 91 | self.num_updates = state_dict['num_updates'] 92 | self.shadow_params = state_dict['shadow_params'] 93 | -------------------------------------------------------------------------------- /src/toolee/networks/gf_algorithms/sde.py: -------------------------------------------------------------------------------- 1 | # refer to this paper: https://openreview.net/pdf?id=PxTIG12RRHS 2 | # implement the SDEs with different mode 3 | import functools 4 | import os 5 | import sys 6 | 7 | import numpy as np 8 | import torch 9 | 10 | sys.path.append(os.path.dirname(os.path.dirname(__file__))) 11 | 12 | #----- VE SDE variance exploding stochastic differential equation----- 13 | #------------------ 14 | def ve_marginal_prob(x, t, sigma_min=0.01, sigma_max=90): 15 | std = sigma_min * (sigma_max / sigma_min) ** t 16 | mean = x 17 | return mean, std 18 | 19 | def ve_sde(t, sigma_min=0.01, sigma_max=90): 20 | sigma = sigma_min * (sigma_max / sigma_min) ** t 21 | drift_coeff = torch.tensor(0) 22 | diffusion_coeff = sigma * torch.sqrt(torch.tensor(2 * (np.log(sigma_max) - np.log(sigma_min)), device=t.device)) 23 | return drift_coeff, diffusion_coeff 24 | 25 | def ve_prior(shape, sigma_min=0.01, sigma_max=90, T=1.0): 26 | _, sigma_max_prior = ve_marginal_prob(None, T, sigma_min=sigma_min, sigma_max=sigma_max) 27 | return torch.randn(*shape) * sigma_max_prior 28 | 29 | #----- VP SDE Variance Preserving stochastic differential equation----- 30 | #------------------ 31 | def vp_marginal_prob(x, t, beta_0=0.1, beta_1=20): 32 | log_mean_coeff = -0.25 * t ** 2 * (beta_1 - beta_0) - 0.5 * t * beta_0 33 | mean = torch.exp(log_mean_coeff) * x 34 | std = torch.sqrt(1. - torch.exp(2. * log_mean_coeff)) 35 | return mean, std 36 | 37 | def vp_sde(t, beta_0=0.1, beta_1=20): 38 | beta_t = beta_0 + t * (beta_1 - beta_0) 39 | drift_coeff = -0.5 * beta_t 40 | diffusion_coeff = torch.sqrt(beta_t) 41 | return drift_coeff, diffusion_coeff 42 | 43 | def vp_prior(shape, beta_0=0.1, beta_1=20): 44 | return torch.randn(*shape) 45 | 46 | #----- sub-VP SDE ----- 47 | # a new type of SDEs that is always bounded by the VP SDE at every intermediate time step 48 | #---------------------- 49 | def subvp_marginal_prob(x, t, beta_0, beta_1): 50 | log_mean_coeff = -0.25 * t ** 2 * (beta_1 - beta_0) - 0.5 * t * beta_0 51 | mean = torch.exp(log_mean_coeff) * x 52 | std = 1 - torch.exp(2. * log_mean_coeff) 53 | return mean, std 54 | 55 | def subvp_sde(t, beta_0, beta_1): 56 | beta_t = beta_0 + t * (beta_1 - beta_0) 57 | drift_coeff = -0.5 * beta_t 58 | discount = 1. - torch.exp(-2 * beta_0 * t - (beta_1 - beta_0) * t ** 2) 59 | diffusion_coeff = torch.sqrt(beta_t * discount) 60 | return drift_coeff, diffusion_coeff 61 | 62 | def subvp_prior(shape, beta_0=0.1, beta_1=20): 63 | return torch.randn(*shape) 64 | 65 | #----- EDM SDE ----- 66 | # exponential decay model? 67 | #------------------ 68 | def edm_marginal_prob(x, t, sigma_min=0.002, sigma_max=80): 69 | std = t 70 | mean = x 71 | return mean, std 72 | 73 | def edm_sde(t, sigma_min=0.002, sigma_max=80): 74 | drift_coeff = torch.tensor(0) 75 | diffusion_coeff = torch.sqrt(2 * t) 76 | return drift_coeff, diffusion_coeff 77 | 78 | def edm_prior(shape, sigma_min=0.002, sigma_max=80): 79 | return torch.randn(*shape) * sigma_max 80 | 81 | def init_sde(sde_mode): 82 | # the SDE-related hyperparameters are copied from https://github.com/yang-song/score_sde_pytorch 83 | if sde_mode == 'edm': 84 | sigma_min = 0.002 85 | sigma_max = 80 86 | eps = 0.002 87 | prior_fn = functools.partial(edm_prior, sigma_min=sigma_min, sigma_max=sigma_max) 88 | marginal_prob_fn = functools.partial(edm_marginal_prob, sigma_min=sigma_min, sigma_max=sigma_max) 89 | sde_fn = functools.partial(edm_sde, sigma_min=sigma_min, sigma_max=sigma_max) 90 | T = sigma_max 91 | elif sde_mode == 've': 92 | sigma_min = 0.01 93 | sigma_max = 50 94 | eps = 1e-5 95 | marginal_prob_fn = functools.partial(ve_marginal_prob, sigma_min=sigma_min, sigma_max=sigma_max) 96 | sde_fn = functools.partial(ve_sde, sigma_min=sigma_min, sigma_max=sigma_max) 97 | T = 1.0 98 | prior_fn = functools.partial(ve_prior, sigma_min=sigma_min, sigma_max=sigma_max) 99 | elif sde_mode == 'vp': 100 | beta_0 = 0.1 101 | beta_1 = 20 102 | eps = 1e-3 103 | prior_fn = functools.partial(vp_prior, beta_0=beta_0, beta_1=beta_1) 104 | marginal_prob_fn = functools.partial(vp_marginal_prob, beta_0=beta_0, beta_1=beta_1) 105 | sde_fn = functools.partial(vp_sde, beta_0=beta_0, beta_1=beta_1) 106 | T = 1.0 107 | elif sde_mode == 'subvp': 108 | beta_0 = 0.1 109 | beta_1 = 20 110 | eps = 1e-3 111 | prior_fn = functools.partial(subvp_prior, beta_0=beta_0, beta_1=beta_1) 112 | marginal_prob_fn = functools.partial(subvp_marginal_prob, beta_0=beta_0, beta_1=beta_1) 113 | sde_fn = functools.partial(subvp_sde, beta_0=beta_0, beta_1=beta_1) 114 | T = 1.0 115 | else: 116 | raise NotImplementedError 117 | return prior_fn, marginal_prob_fn, sde_fn, eps, T 118 | 119 | -------------------------------------------------------------------------------- /src/toolee/networks/posenet.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import torch 4 | import torch.nn as nn 5 | 6 | sys.path.append(os.path.dirname(os.path.dirname(__file__))) 7 | from networks.pts_encoder.pointnets import PointNetfeat 8 | from networks.pts_encoder.pointnet2 import Pointnet2ClsMSG 9 | from networks.gf_algorithms.samplers import cond_ode_likelihood, cond_ode_sampler 10 | from networks.gf_algorithms.scorenet import PoseScoreNet 11 | from networks.gf_algorithms.sde import init_sde 12 | from configs.config import get_config 13 | 14 | 15 | # GF denote the generative function 16 | class GFObjectPose(nn.Module): 17 | def __init__(self, cfg, prior_fn, marginal_prob_fn, sde_fn, sampling_eps, T): 18 | super(GFObjectPose, self).__init__() 19 | 20 | self.cfg = cfg 21 | self.device = cfg.device 22 | self.is_testing = False 23 | 24 | ''' Load model, define SDE ''' 25 | # init SDE config 26 | self.prior_fn = prior_fn 27 | self.marginal_prob_fn = marginal_prob_fn 28 | self.sde_fn = sde_fn 29 | self.sampling_eps = sampling_eps 30 | self.T = T 31 | 32 | ''' encode pts ''' 33 | if self.cfg.pts_encoder == 'pointnet': 34 | self.pts_encoder = PointNetfeat(num_points=self.cfg.num_points, out_dim=1024) 35 | elif self.cfg.pts_encoder == 'pointnet2': 36 | self.pts_encoder = Pointnet2ClsMSG(0) 37 | elif self.cfg.pts_encoder == 'pointnet_and_pointnet2': 38 | self.pts_pointnet_encoder = PointNetfeat(num_points=self.cfg.num_points, out_dim=1024) 39 | self.pts_pointnet2_encoder = Pointnet2ClsMSG(0) 40 | self.fusion_layer = nn.Linear(2048, 1024) 41 | self.act = nn.ReLU() 42 | else: 43 | print(f"pts_encoder {self.cfg.pts_encoder} is not supported!") 44 | raise NotImplementedError 45 | 46 | ''' score network''' 47 | if self.cfg.posenet_mode == 'score': 48 | self.pose_score_net = PoseScoreNet(self.marginal_prob_fn, self.cfg.pose_mode, self.cfg.regression_head) 49 | 50 | def extract_pts_feature(self, data): 51 | """extract the input pointcloud feature 52 | 53 | Args: 54 | data (dict): batch example without pointcloud feature. {'pts': [bs, num_pts, 3], 'sampled_pose': [bs, pose_dim], 't': [bs, 1]} 55 | Returns: 56 | data (dict): batch example with pointcloud feature. {'pts': [bs, num_pts, 3], 'pts_feat': [bs, c], 'sampled_pose': [bs, pose_dim], 't': [bs, 1]} 57 | """ 58 | pts = data['pts'] 59 | if self.cfg.pts_encoder == 'pointnet': 60 | pts_feat = self.pts_encoder(pts.permute(0, 2, 1)) # -> (bs, 3, 1024) 61 | elif self.cfg.pts_encoder in ['pointnet2']: 62 | pts_feat = self.pts_encoder(pts) 63 | elif self.cfg.pts_encoder == 'pointnet_and_pointnet2': 64 | pts_pointnet_feat = self.pts_pointnet_encoder(pts.permute(0, 2, 1)) 65 | pts_pointnet2_feat = self.pts_pointnet2_encoder(pts) 66 | pts_feat = self.fusion_layer(torch.cat((pts_pointnet_feat, pts_pointnet2_feat), dim=-1)) 67 | pts_feat = self.act(pts_feat) 68 | else: 69 | print(f"pts_encoder {self.cfg.pts_encoder} is not supported!") 70 | raise NotImplementedError 71 | return pts_feat 72 | 73 | 74 | def sample(self, data, sampler, atol=1e-5, rtol=1e-5, snr=0.16, denoise=True, init_x=None, T0=None): 75 | assert sampler=='ode', f"the sampler {sampler} is not supported!" 76 | if sampler == 'ode': 77 | T0 = self.T if T0 is None else T0 78 | in_process_sample, res = cond_ode_sampler( 79 | score_model=self, 80 | data=data, 81 | prior=self.prior_fn, 82 | sde_coeff=self.sde_fn, 83 | atol=atol, 84 | rtol=rtol, 85 | device=self.device, 86 | eps=self.sampling_eps, 87 | T=T0, 88 | num_steps=self.cfg.sampling_steps, 89 | pose_mode=self.cfg.pose_mode, 90 | denoise=denoise, 91 | init_x=init_x 92 | ) 93 | else: 94 | print(f"sampler {sampler} is not supported!") 95 | raise NotImplementedError 96 | return in_process_sample, res 97 | 98 | 99 | def calc_likelihood(self, data, atol=1e-5, rtol=1e-5): 100 | latent_code, log_likelihoods = cond_ode_likelihood( 101 | score_model=self, 102 | data=data, 103 | prior=self.prior_fn, 104 | sde_coeff=self.sde_fn, 105 | marginal_prob_fn=self.marginal_prob_fn, 106 | atol=atol, 107 | rtol=rtol, 108 | device=self.device, 109 | eps=self.sampling_eps, 110 | num_steps=self.cfg.sampling_steps, 111 | pose_mode=self.cfg.pose_mode, 112 | ) 113 | return log_likelihoods 114 | 115 | 116 | def forward(self, data, mode='score', init_x=None, T0=None): 117 | ''' 118 | Args: 119 | data, dict { 120 | 'pts': [bs, num_pts, 3] 121 | 'pts_feat': [bs, c] 122 | 'sampled_pose': [bs, pose_dim] 123 | 't': [bs, 1] 124 | } 125 | ''' 126 | if mode == 'score': 127 | out_score = self.pose_score_net(data) 128 | return out_score 129 | elif mode == 'likelihood': 130 | likelihoods = self.calc_likelihood(data) 131 | return likelihoods 132 | elif mode == 'pts_feature': 133 | pts_feature = self.extract_pts_feature(data) 134 | return pts_feature 135 | elif mode == 'ode_sample': 136 | in_process_sample, res = self.sample(data, 'ode', init_x=init_x, T0=T0) 137 | return in_process_sample, res 138 | else: 139 | print(f"mode {mode} is not supported!") 140 | raise NotImplementedError 141 | 142 | def test(): 143 | def get_parameter_number(model): 144 | total_num = sum(p.numel() for p in model.parameters()) 145 | trainable_num = sum(p.numel() for p in model.parameters() if p.requires_grad) 146 | return {'Total': total_num, 'Trainable': trainable_num} 147 | cfg = get_config() 148 | prior_fn, marginal_prob_fn, sde_fn, sampling_eps, T = init_sde('ve') 149 | net = GFObjectPose(cfg, prior_fn, marginal_prob_fn, sde_fn, sampling_eps, T) 150 | net_parameters_num= get_parameter_number(net) 151 | print(net_parameters_num['Total'], net_parameters_num['Trainable']) 152 | if __name__ == '__main__': 153 | test() 154 | 155 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/.gitignore: -------------------------------------------------------------------------------- 1 | pointnet2/build/ 2 | pointnet2/dist/ 3 | pointnet2/pointnet2.egg-info/ 4 | __pycache__/ 5 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Shaoshuai Shi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/README.md: -------------------------------------------------------------------------------- 1 | # Pointnet2.PyTorch 2 | 3 | * PyTorch implementation of [PointNet++](https://arxiv.org/abs/1706.02413) based on [erikwijmans/Pointnet2_PyTorch](https://github.com/erikwijmans/Pointnet2_PyTorch). 4 | * Faster than the original codes by re-implementing the CUDA operations. 5 | 6 | ## Installation 7 | ### Requirements 8 | * Linux (tested on Ubuntu 14.04/16.04) 9 | * Python 3.6+ 10 | * PyTorch 1.0 11 | 12 | ### Install 13 | Install this library by running the following command: 14 | 15 | ```shell 16 | cd pointnet2 17 | python setup.py install 18 | cd ../ 19 | ``` 20 | 21 | ## Examples 22 | Here I provide a simple example to use this library in the task of KITTI ourdoor foreground point cloud segmentation, and you could refer to the paper [PointRCNN](https://arxiv.org/abs/1812.04244) for the details of task description and foreground label generation. 23 | 24 | 1. Download the training data from [KITTI 3D object detection](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d) website and organize the downloaded files as follows: 25 | ``` 26 | Pointnet2.PyTorch 27 | ├── pointnet2 28 | ├── tools 29 | │ ├──data 30 | │ │ ├── KITTI 31 | │ │ │ ├── ImageSets 32 | │ │ │ ├── object 33 | │ │ │ │ ├──training 34 | │ │ │ │ ├──calib & velodyne & label_2 & image_2 35 | │ │ train_and_eval.py 36 | ``` 37 | 38 | 2. Run the following command to train and evaluate: 39 | ```shell 40 | cd tools 41 | python train_and_eval.py --batch_size 8 --epochs 100 --ckpt_save_interval 2 42 | ``` 43 | 44 | 45 | 46 | ## Project using this repo: 47 | * [PointRCNN](https://github.com/sshaoshuai/PointRCNN): 3D object detector from raw point cloud. 48 | 49 | ## Acknowledgement 50 | * [charlesq34/pointnet2](https://github.com/charlesq34/pointnet2): Paper author and official code repo. 51 | * [erikwijmans/Pointnet2_PyTorch](https://github.com/erikwijmans/Pointnet2_PyTorch): Initial work of PyTorch implementation of PointNet++. 52 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='pointnet2', 6 | ext_modules=[ 7 | CUDAExtension('pointnet2_cuda', [ 8 | 'src/pointnet2_api.cpp', 9 | 10 | 'src/ball_query.cpp', 11 | 'src/ball_query_gpu.cu', 12 | 'src/group_points.cpp', 13 | 'src/group_points_gpu.cu', 14 | 'src/interpolate.cpp', 15 | 'src/interpolate_gpu.cu', 16 | 'src/sampling.cpp', 17 | 'src/sampling_gpu.cu', 18 | ], 19 | extra_compile_args={'cxx': ['-g'], 20 | 'nvcc': ['-O2']}) 21 | ], 22 | cmdclass={'build_ext': BuildExtension} 23 | ) 24 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | // #include 4 | #include 5 | #include 6 | #include "ball_query_gpu.h" 7 | #include 8 | #include 9 | 10 | // extern THCState *state; 11 | 12 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 13 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 14 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 15 | 16 | int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, 17 | at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) { 18 | CHECK_INPUT(new_xyz_tensor); 19 | CHECK_INPUT(xyz_tensor); 20 | const float *new_xyz = new_xyz_tensor.data(); 21 | const float *xyz = xyz_tensor.data(); 22 | int *idx = idx_tensor.data(); 23 | 24 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 25 | // cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 26 | ball_query_kernel_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream); 27 | return 1; 28 | } -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/src/ball_query_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "ball_query_gpu.h" 6 | #include "cuda_utils.h" 7 | 8 | 9 | __global__ void ball_query_kernel_fast(int b, int n, int m, float radius, int nsample, 10 | const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) { 11 | // new_xyz: (B, M, 3) 12 | // xyz: (B, N, 3) 13 | // output: 14 | // idx: (B, M, nsample) 15 | int bs_idx = blockIdx.y; 16 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 17 | if (bs_idx >= b || pt_idx >= m) return; 18 | 19 | new_xyz += bs_idx * m * 3 + pt_idx * 3; 20 | xyz += bs_idx * n * 3; 21 | idx += bs_idx * m * nsample + pt_idx * nsample; 22 | 23 | float radius2 = radius * radius; 24 | float new_x = new_xyz[0]; 25 | float new_y = new_xyz[1]; 26 | float new_z = new_xyz[2]; 27 | 28 | int cnt = 0; 29 | for (int k = 0; k < n; ++k) { 30 | float x = xyz[k * 3 + 0]; 31 | float y = xyz[k * 3 + 1]; 32 | float z = xyz[k * 3 + 2]; 33 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 34 | if (d2 < radius2){ 35 | if (cnt == 0){ 36 | for (int l = 0; l < nsample; ++l) { 37 | idx[l] = k; 38 | } 39 | } 40 | idx[cnt] = k; 41 | ++cnt; 42 | if (cnt >= nsample) break; 43 | } 44 | } 45 | } 46 | 47 | 48 | void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, \ 49 | const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream) { 50 | // new_xyz: (B, M, 3) 51 | // xyz: (B, N, 3) 52 | // output: 53 | // idx: (B, M, nsample) 54 | 55 | cudaError_t err; 56 | 57 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 58 | dim3 threads(THREADS_PER_BLOCK); 59 | 60 | ball_query_kernel_fast<<>>(b, n, m, radius, nsample, new_xyz, xyz, idx); 61 | // cudaDeviceSynchronize(); // for using printf in kernel function 62 | err = cudaGetLastError(); 63 | if (cudaSuccess != err) { 64 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 65 | exit(-1); 66 | } 67 | } -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/src/ball_query_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _BALL_QUERY_GPU_H 2 | #define _BALL_QUERY_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, 10 | at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor); 11 | 12 | void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, 13 | const float *xyz, const float *new_xyz, int *idx, cudaStream_t stream); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | 6 | #define TOTAL_THREADS 1024 7 | #define THREADS_PER_BLOCK 256 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 9 | 10 | inline int opt_n_threads(int work_size) { 11 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 12 | 13 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | #endif 16 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | // #include 6 | #include "group_points_gpu.h" 7 | #include 8 | #include 9 | // extern THCState *state; 10 | 11 | 12 | int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, 13 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) { 14 | 15 | float *grad_points = grad_points_tensor.data(); 16 | const int *idx = idx_tensor.data(); 17 | const float *grad_out = grad_out_tensor.data(); 18 | 19 | // cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 20 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 21 | group_points_grad_kernel_launcher_fast(b, c, n, npoints, nsample, grad_out, idx, grad_points, stream); 22 | return 1; 23 | } 24 | 25 | 26 | int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, 27 | at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) { 28 | 29 | const float *points = points_tensor.data(); 30 | const int *idx = idx_tensor.data(); 31 | float *out = out_tensor.data(); 32 | 33 | // cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 34 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 35 | group_points_kernel_launcher_fast(b, c, n, npoints, nsample, points, idx, out, stream); 36 | return 1; 37 | } -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/src/group_points_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cuda_utils.h" 5 | #include "group_points_gpu.h" 6 | 7 | 8 | __global__ void group_points_grad_kernel_fast(int b, int c, int n, int npoints, int nsample, 9 | const float *__restrict__ grad_out, const int *__restrict__ idx, float *__restrict__ grad_points) { 10 | // grad_out: (B, C, npoints, nsample) 11 | // idx: (B, npoints, nsample) 12 | // output: 13 | // grad_points: (B, C, N) 14 | int bs_idx = blockIdx.z; 15 | int c_idx = blockIdx.y; 16 | int index = blockIdx.x * blockDim.x + threadIdx.x; 17 | int pt_idx = index / nsample; 18 | if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; 19 | 20 | int sample_idx = index % nsample; 21 | grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; 22 | idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; 23 | 24 | atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0] , grad_out[0]); 25 | } 26 | 27 | void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 28 | const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) { 29 | // grad_out: (B, C, npoints, nsample) 30 | // idx: (B, npoints, nsample) 31 | // output: 32 | // grad_points: (B, C, N) 33 | cudaError_t err; 34 | dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 35 | dim3 threads(THREADS_PER_BLOCK); 36 | 37 | group_points_grad_kernel_fast<<>>(b, c, n, npoints, nsample, grad_out, idx, grad_points); 38 | 39 | err = cudaGetLastError(); 40 | if (cudaSuccess != err) { 41 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 42 | exit(-1); 43 | } 44 | } 45 | 46 | 47 | __global__ void group_points_kernel_fast(int b, int c, int n, int npoints, int nsample, 48 | const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) { 49 | // points: (B, C, N) 50 | // idx: (B, npoints, nsample) 51 | // output: 52 | // out: (B, C, npoints, nsample) 53 | int bs_idx = blockIdx.z; 54 | int c_idx = blockIdx.y; 55 | int index = blockIdx.x * blockDim.x + threadIdx.x; 56 | int pt_idx = index / nsample; 57 | if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; 58 | 59 | int sample_idx = index % nsample; 60 | 61 | idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; 62 | int in_idx = bs_idx * c * n + c_idx * n + idx[0]; 63 | int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; 64 | 65 | out[out_idx] = points[in_idx]; 66 | } 67 | 68 | 69 | void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 70 | const float *points, const int *idx, float *out, cudaStream_t stream) { 71 | // points: (B, C, N) 72 | // idx: (B, npoints, nsample) 73 | // output: 74 | // out: (B, C, npoints, nsample) 75 | cudaError_t err; 76 | dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 77 | dim3 threads(THREADS_PER_BLOCK); 78 | 79 | group_points_kernel_fast<<>>(b, c, n, npoints, nsample, points, idx, out); 80 | // cudaDeviceSynchronize(); // for using printf in kernel function 81 | err = cudaGetLastError(); 82 | if (cudaSuccess != err) { 83 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 84 | exit(-1); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/src/group_points_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUP_POINTS_GPU_H 2 | #define _GROUP_POINTS_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, 11 | at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); 12 | 13 | void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 14 | const float *points, const int *idx, float *out, cudaStream_t stream); 15 | 16 | int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, 17 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); 18 | 19 | void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 20 | const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | // #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "interpolate_gpu.h" 12 | 13 | // extern THCState *state; 14 | 15 | 16 | void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, 17 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) { 18 | const float *unknown = unknown_tensor.data(); 19 | const float *known = known_tensor.data(); 20 | float *dist2 = dist2_tensor.data(); 21 | int *idx = idx_tensor.data(); 22 | 23 | // cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 24 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 25 | three_nn_kernel_launcher_fast(b, n, m, unknown, known, dist2, idx, stream); 26 | } 27 | 28 | 29 | void three_interpolate_wrapper_fast(int b, int c, int m, int n, 30 | at::Tensor points_tensor, 31 | at::Tensor idx_tensor, 32 | at::Tensor weight_tensor, 33 | at::Tensor out_tensor) { 34 | 35 | const float *points = points_tensor.data(); 36 | const float *weight = weight_tensor.data(); 37 | float *out = out_tensor.data(); 38 | const int *idx = idx_tensor.data(); 39 | 40 | // cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 41 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 42 | three_interpolate_kernel_launcher_fast(b, c, m, n, points, idx, weight, out, stream); 43 | } 44 | 45 | void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, 46 | at::Tensor grad_out_tensor, 47 | at::Tensor idx_tensor, 48 | at::Tensor weight_tensor, 49 | at::Tensor grad_points_tensor) { 50 | 51 | const float *grad_out = grad_out_tensor.data(); 52 | const float *weight = weight_tensor.data(); 53 | float *grad_points = grad_points_tensor.data(); 54 | const int *idx = idx_tensor.data(); 55 | 56 | // cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 57 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 58 | three_interpolate_grad_kernel_launcher_fast(b, c, n, m, grad_out, idx, weight, grad_points, stream); 59 | } -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/src/interpolate_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cuda_utils.h" 6 | #include "interpolate_gpu.h" 7 | 8 | 9 | __global__ void three_nn_kernel_fast(int b, int n, int m, const float *__restrict__ unknown, 10 | const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) { 11 | // unknown: (B, N, 3) 12 | // known: (B, M, 3) 13 | // output: 14 | // dist2: (B, N, 3) 15 | // idx: (B, N, 3) 16 | 17 | int bs_idx = blockIdx.y; 18 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 19 | if (bs_idx >= b || pt_idx >= n) return; 20 | 21 | unknown += bs_idx * n * 3 + pt_idx * 3; 22 | known += bs_idx * m * 3; 23 | dist2 += bs_idx * n * 3 + pt_idx * 3; 24 | idx += bs_idx * n * 3 + pt_idx * 3; 25 | 26 | float ux = unknown[0]; 27 | float uy = unknown[1]; 28 | float uz = unknown[2]; 29 | 30 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 31 | int besti1 = 0, besti2 = 0, besti3 = 0; 32 | for (int k = 0; k < m; ++k) { 33 | float x = known[k * 3 + 0]; 34 | float y = known[k * 3 + 1]; 35 | float z = known[k * 3 + 2]; 36 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 37 | if (d < best1) { 38 | best3 = best2; besti3 = besti2; 39 | best2 = best1; besti2 = besti1; 40 | best1 = d; besti1 = k; 41 | } 42 | else if (d < best2) { 43 | best3 = best2; besti3 = besti2; 44 | best2 = d; besti2 = k; 45 | } 46 | else if (d < best3) { 47 | best3 = d; besti3 = k; 48 | } 49 | } 50 | dist2[0] = best1; dist2[1] = best2; dist2[2] = best3; 51 | idx[0] = besti1; idx[1] = besti2; idx[2] = besti3; 52 | } 53 | 54 | 55 | void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown, 56 | const float *known, float *dist2, int *idx, cudaStream_t stream) { 57 | // unknown: (B, N, 3) 58 | // known: (B, M, 3) 59 | // output: 60 | // dist2: (B, N, 3) 61 | // idx: (B, N, 3) 62 | 63 | cudaError_t err; 64 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 65 | dim3 threads(THREADS_PER_BLOCK); 66 | 67 | three_nn_kernel_fast<<>>(b, n, m, unknown, known, dist2, idx); 68 | 69 | err = cudaGetLastError(); 70 | if (cudaSuccess != err) { 71 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 72 | exit(-1); 73 | } 74 | } 75 | 76 | 77 | __global__ void three_interpolate_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points, 78 | const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) { 79 | // points: (B, C, M) 80 | // idx: (B, N, 3) 81 | // weight: (B, N, 3) 82 | // output: 83 | // out: (B, C, N) 84 | 85 | int bs_idx = blockIdx.z; 86 | int c_idx = blockIdx.y; 87 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 88 | 89 | if (bs_idx >= b || c_idx >= c || pt_idx >= n) return; 90 | 91 | weight += bs_idx * n * 3 + pt_idx * 3; 92 | points += bs_idx * c * m + c_idx * m; 93 | idx += bs_idx * n * 3 + pt_idx * 3; 94 | out += bs_idx * c * n + c_idx * n; 95 | 96 | out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]]; 97 | } 98 | 99 | void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, 100 | const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream) { 101 | // points: (B, C, M) 102 | // idx: (B, N, 3) 103 | // weight: (B, N, 3) 104 | // output: 105 | // out: (B, C, N) 106 | 107 | cudaError_t err; 108 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 109 | dim3 threads(THREADS_PER_BLOCK); 110 | three_interpolate_kernel_fast<<>>(b, c, m, n, points, idx, weight, out); 111 | 112 | err = cudaGetLastError(); 113 | if (cudaSuccess != err) { 114 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 115 | exit(-1); 116 | } 117 | } 118 | 119 | 120 | __global__ void three_interpolate_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, 121 | const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ grad_points) { 122 | // grad_out: (B, C, N) 123 | // weight: (B, N, 3) 124 | // output: 125 | // grad_points: (B, C, M) 126 | 127 | int bs_idx = blockIdx.z; 128 | int c_idx = blockIdx.y; 129 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 130 | 131 | if (bs_idx >= b || c_idx >= c || pt_idx >= n) return; 132 | 133 | grad_out += bs_idx * c * n + c_idx * n + pt_idx; 134 | weight += bs_idx * n * 3 + pt_idx * 3; 135 | grad_points += bs_idx * c * m + c_idx * m; 136 | idx += bs_idx * n * 3 + pt_idx * 3; 137 | 138 | 139 | atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]); 140 | atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]); 141 | atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]); 142 | } 143 | 144 | void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, 145 | const int *idx, const float *weight, float *grad_points, cudaStream_t stream) { 146 | // grad_out: (B, C, N) 147 | // weight: (B, N, 3) 148 | // output: 149 | // grad_points: (B, C, M) 150 | 151 | cudaError_t err; 152 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 153 | dim3 threads(THREADS_PER_BLOCK); 154 | three_interpolate_grad_kernel_fast<<>>(b, c, n, m, grad_out, idx, weight, grad_points); 155 | 156 | err = cudaGetLastError(); 157 | if (cudaSuccess != err) { 158 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 159 | exit(-1); 160 | } 161 | } -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/src/interpolate_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATE_GPU_H 2 | #define _INTERPOLATE_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, 11 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor); 12 | 13 | void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown, 14 | const float *known, float *dist2, int *idx, cudaStream_t stream); 15 | 16 | 17 | void three_interpolate_wrapper_fast(int b, int c, int m, int n, at::Tensor points_tensor, 18 | at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor); 19 | 20 | void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, 21 | const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream); 22 | 23 | 24 | void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, at::Tensor grad_out_tensor, 25 | at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor); 26 | 27 | void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, 28 | const int *idx, const float *weight, float *grad_points, cudaStream_t stream); 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/src/pointnet2_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "ball_query_gpu.h" 5 | #include "group_points_gpu.h" 6 | #include "sampling_gpu.h" 7 | #include "interpolate_gpu.h" 8 | 9 | 10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 11 | m.def("ball_query_wrapper", &ball_query_wrapper_fast, "ball_query_wrapper_fast"); 12 | 13 | m.def("group_points_wrapper", &group_points_wrapper_fast, "group_points_wrapper_fast"); 14 | m.def("group_points_grad_wrapper", &group_points_grad_wrapper_fast, "group_points_grad_wrapper_fast"); 15 | 16 | m.def("gather_points_wrapper", &gather_points_wrapper_fast, "gather_points_wrapper_fast"); 17 | m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper_fast, "gather_points_grad_wrapper_fast"); 18 | 19 | m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper, "furthest_point_sampling_wrapper"); 20 | 21 | m.def("three_nn_wrapper", &three_nn_wrapper_fast, "three_nn_wrapper_fast"); 22 | m.def("three_interpolate_wrapper", &three_interpolate_wrapper_fast, "three_interpolate_wrapper_fast"); 23 | m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_fast, "three_interpolate_grad_wrapper_fast"); 24 | } 25 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/src/sampling.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | // #include 5 | 6 | #include "sampling_gpu.h" 7 | #include 8 | #include 9 | 10 | // extern THCState *state; 11 | 12 | 13 | int gather_points_wrapper_fast(int b, int c, int n, int npoints, 14 | at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor){ 15 | const float *points = points_tensor.data(); 16 | const int *idx = idx_tensor.data(); 17 | float *out = out_tensor.data(); 18 | 19 | // cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 20 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 21 | gather_points_kernel_launcher_fast(b, c, n, npoints, points, idx, out, stream); 22 | return 1; 23 | } 24 | 25 | 26 | int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, 27 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) { 28 | 29 | const float *grad_out = grad_out_tensor.data(); 30 | const int *idx = idx_tensor.data(); 31 | float *grad_points = grad_points_tensor.data(); 32 | 33 | // cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 34 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 35 | gather_points_grad_kernel_launcher_fast(b, c, n, npoints, grad_out, idx, grad_points, stream); 36 | return 1; 37 | } 38 | 39 | 40 | int furthest_point_sampling_wrapper(int b, int n, int m, 41 | at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) { 42 | 43 | const float *points = points_tensor.data(); 44 | float *temp = temp_tensor.data(); 45 | int *idx = idx_tensor.data(); 46 | 47 | // cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 48 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 49 | furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream); 50 | return 1; 51 | } 52 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/pointnet2/src/sampling_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_GPU_H 2 | #define _SAMPLING_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | int gather_points_wrapper_fast(int b, int c, int n, int npoints, 10 | at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); 11 | 12 | void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, 13 | const float *points, const int *idx, float *out, cudaStream_t stream); 14 | 15 | 16 | int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, 17 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); 18 | 19 | void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, 20 | const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream); 21 | 22 | 23 | int furthest_point_sampling_wrapper(int b, int n, int m, 24 | at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor); 25 | 26 | void furthest_point_sampling_kernel_launcher(int b, int n, int m, 27 | const float *dataset, float *temp, int *idxs, cudaStream_t stream); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/tools/_init_path.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '../')) 3 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnet2_utils/tools/pointnet2_msg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import sys 4 | sys.path.append('..') 5 | from pointnet2.pointnet2_modules import PointnetFPModule, PointnetSAModuleMSG 6 | import pointnet2.pytorch_utils as pt_utils 7 | 8 | 9 | def get_model(input_channels=0): 10 | return Pointnet2MSG(input_channels=input_channels) 11 | 12 | 13 | NPOINTS = [4096, 1024, 256, 64] 14 | RADIUS = [[0.1, 0.5], [0.5, 1.0], [1.0, 2.0], [2.0, 4.0]] 15 | NSAMPLE = [[16, 32], [16, 32], [16, 32], [16, 32]] 16 | MLPS = [[[16, 16, 32], [32, 32, 64]], [[64, 64, 128], [64, 96, 128]], 17 | [[128, 196, 256], [128, 196, 256]], [[256, 256, 512], [256, 384, 512]]] 18 | FP_MLPS = [[128, 128], [256, 256], [512, 512], [512, 512]] 19 | CLS_FC = [128] 20 | DP_RATIO = 0.5 21 | 22 | 23 | class Pointnet2MSG(nn.Module): 24 | def __init__(self, input_channels=6): 25 | super().__init__() 26 | 27 | self.SA_modules = nn.ModuleList() 28 | channel_in = input_channels 29 | 30 | skip_channel_list = [input_channels] 31 | for k in range(NPOINTS.__len__()): 32 | mlps = MLPS[k].copy() 33 | channel_out = 0 34 | for idx in range(mlps.__len__()): 35 | mlps[idx] = [channel_in] + mlps[idx] 36 | channel_out += mlps[idx][-1] 37 | 38 | self.SA_modules.append( 39 | PointnetSAModuleMSG( 40 | npoint=NPOINTS[k], 41 | radii=RADIUS[k], 42 | nsamples=NSAMPLE[k], 43 | mlps=mlps, 44 | use_xyz=True, 45 | bn=True 46 | ) 47 | ) 48 | skip_channel_list.append(channel_out) 49 | channel_in = channel_out 50 | 51 | self.FP_modules = nn.ModuleList() 52 | 53 | for k in range(FP_MLPS.__len__()): 54 | pre_channel = FP_MLPS[k + 1][-1] if k + 1 < len(FP_MLPS) else channel_out 55 | self.FP_modules.append( 56 | PointnetFPModule(mlp=[pre_channel + skip_channel_list[k]] + FP_MLPS[k]) 57 | ) 58 | 59 | cls_layers = [] 60 | pre_channel = FP_MLPS[0][-1] 61 | for k in range(0, CLS_FC.__len__()): 62 | cls_layers.append(pt_utils.Conv1d(pre_channel, CLS_FC[k], bn=True)) 63 | pre_channel = CLS_FC[k] 64 | cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None)) 65 | cls_layers.insert(1, nn.Dropout(0.5)) 66 | self.cls_layer = nn.Sequential(*cls_layers) 67 | 68 | def _break_up_pc(self, pc): 69 | xyz = pc[..., 0:3].contiguous() 70 | features = ( 71 | pc[..., 3:].transpose(1, 2).contiguous() 72 | if pc.size(-1) > 3 else None 73 | ) 74 | 75 | return xyz, features 76 | 77 | def forward(self, pointcloud: torch.cuda.FloatTensor): 78 | xyz, features = self._break_up_pc(pointcloud) 79 | 80 | l_xyz, l_features = [xyz], [features] 81 | for i in range(len(self.SA_modules)): 82 | li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i]) 83 | 84 | print(li_xyz.shape, li_features.shape) 85 | 86 | l_xyz.append(li_xyz) 87 | l_features.append(li_features) 88 | 89 | for i in range(-1, -(len(self.FP_modules) + 1), -1): 90 | l_features[i - 1] = self.FP_modules[i]( 91 | l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i] 92 | ) 93 | 94 | pred_cls = self.cls_layer(l_features[0]).transpose(1, 2).contiguous() # (B, N, 1) 95 | return pred_cls 96 | 97 | if __name__ == '__main__': 98 | net = Pointnet2MSG(0).cuda() 99 | pts = torch.randn(2, 1024, 3).cuda() 100 | 101 | pre = net(pts) 102 | print(pre.shape) 103 | -------------------------------------------------------------------------------- /src/toolee/networks/pts_encoder/pointnets.py: -------------------------------------------------------------------------------- 1 | """refer to https://github.com/fxia22/pointnet.pytorch/blob/f0c2430b0b1529e3f76fb5d6cd6ca14be763d975/pointnet/model.py.""" 2 | 3 | from __future__ import print_function 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.parallel 7 | import torch.utils.data 8 | from torch.autograd import Variable 9 | from ipdb import set_trace 10 | import numpy as np 11 | import torch.nn.functional as F 12 | 13 | 14 | class STN3d(nn.Module): 15 | def __init__(self): 16 | super(STN3d, self).__init__() 17 | self.conv1 = torch.nn.Conv1d(3, 64, 1) 18 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 19 | self.conv3 = torch.nn.Conv1d(128, 1024, 1) 20 | self.fc1 = nn.Linear(1024, 512) 21 | self.fc2 = nn.Linear(512, 256) 22 | self.fc3 = nn.Linear(256, 9) 23 | self.relu = nn.ReLU() 24 | 25 | def forward(self, x): 26 | batchsize = x.size()[0] 27 | x = F.relu(self.conv1(x)) 28 | x = F.relu(self.conv2(x)) 29 | x = F.relu(self.conv3(x)) 30 | x = torch.max(x, 2, keepdim=True)[0] 31 | x = x.view(-1, 1024) 32 | 33 | x = F.relu(self.fc1(x)) 34 | x = F.relu(self.fc2(x)) 35 | x = self.fc3(x) 36 | 37 | iden = Variable(torch.tensor([1, 0, 0, 0, 1, 0, 0, 0, 1], dtype=torch.float32)).view(1, 9).repeat(batchsize, 1) 38 | if x.is_cuda: 39 | iden = iden.cuda() 40 | x = x + iden 41 | x = x.view(-1, 3, 3) 42 | return x 43 | 44 | 45 | class STNkd(nn.Module): 46 | def __init__(self, k=64): 47 | super(STNkd, self).__init__() 48 | self.conv1 = torch.nn.Conv1d(k, 64, 1) 49 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 50 | self.conv3 = torch.nn.Conv1d(128, 1024, 1) 51 | self.fc1 = nn.Linear(1024, 512) 52 | self.fc2 = nn.Linear(512, 256) 53 | self.fc3 = nn.Linear(256, k * k) 54 | self.relu = nn.ReLU() 55 | 56 | self.k = k 57 | 58 | def forward(self, x): 59 | batchsize = x.size()[0] 60 | x = F.relu(self.conv1(x)) 61 | x = F.relu(self.conv2(x)) 62 | x = F.relu(self.conv3(x)) 63 | x = torch.max(x, 2, keepdim=True)[0] 64 | x = x.view(-1, 1024) 65 | 66 | x = F.relu(self.fc1(x)) 67 | x = F.relu(self.fc2(x)) 68 | x = self.fc3(x) 69 | 70 | iden = ( 71 | Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32))) 72 | .view(1, self.k * self.k) 73 | .repeat(batchsize, 1) 74 | ) 75 | if x.is_cuda: 76 | iden = iden.cuda() 77 | x = x + iden 78 | x = x.view(-1, self.k, self.k) 79 | return x 80 | 81 | 82 | # NOTE: removed BN 83 | class PointNetfeat(nn.Module): 84 | def __init__(self, num_points, global_feat=True, in_dim=3, out_dim=1024, feature_transform=False, **args): 85 | super(PointNetfeat, self).__init__() 86 | self.num_points = num_points 87 | self.out_dim = out_dim 88 | self.feature_transform = feature_transform 89 | # self.stn = STN3d(in_dim=in_dim) 90 | self.stn = STNkd(k=in_dim) 91 | self.conv1 = torch.nn.Conv1d(in_dim, 64, 1) 92 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 93 | self.conv3 = torch.nn.Conv1d(128, 512, 1) 94 | self.conv4 = torch.nn.Conv1d(512, out_dim, 1) 95 | self.global_feat = global_feat 96 | if self.feature_transform: 97 | self.fstn = STNkd(k=64) 98 | 99 | def forward(self, x, **args): 100 | n_pts = x.shape[2] 101 | trans = self.stn(x) 102 | x = x.transpose(2, 1) 103 | x = torch.bmm(x, trans) 104 | x = x.transpose(2, 1) 105 | x = F.relu(self.conv1(x)) 106 | 107 | if self.feature_transform: 108 | trans_feat = self.fstn(x) 109 | x = x.transpose(2, 1) 110 | x = torch.bmm(x, trans_feat) 111 | x = x.transpose(2, 1) 112 | 113 | pointfeat = x 114 | x = F.relu(self.conv2(x)) 115 | x = F.relu(self.conv3(x)) 116 | x = self.conv4(x) 117 | x = torch.max(x, 2, keepdim=True)[0] 118 | x = x.view(-1, self.out_dim) 119 | if self.global_feat: 120 | return x 121 | else: 122 | x = x.view(-1, self.out_dim, 1).repeat(1, 1, n_pts) 123 | return torch.cat([x, pointfeat], 1) 124 | 125 | 126 | def feature_transform_regularizer(trans): 127 | d = trans.size()[1] 128 | batchsize = trans.size()[0] 129 | I = torch.eye(d)[None, :, :] 130 | if trans.is_cuda: 131 | I = I.cuda() 132 | loss = torch.mean(torch.norm(torch.bmm(trans, trans.transpose(2, 1)) - I, dim=(1, 2))) 133 | return loss 134 | 135 | 136 | if __name__ == "__main__": 137 | sim_data = Variable(torch.rand(32, 3, 2500)) 138 | trans = STN3d() 139 | out = trans(sim_data) 140 | print("stn", out.size()) 141 | print("loss", feature_transform_regularizer(out)) 142 | 143 | sim_data_64d = Variable(torch.rand(32, 64, 2500)) 144 | trans = STNkd(k=64) 145 | out = trans(sim_data_64d) 146 | print("stn64d", out.size()) 147 | print("loss", feature_transform_regularizer(out)) 148 | 149 | pointfeat_g = PointNetfeat(global_feat=True, num_points=2500) 150 | out = pointfeat_g(sim_data) 151 | print("global feat", out.size()) 152 | 153 | pointfeat = PointNetfeat(global_feat=False, num_points=2500) 154 | out = pointfeat(sim_data) 155 | print("point feat", out.size()) 156 | 157 | -------------------------------------------------------------------------------- /src/toolee/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | tool_ee 4 | 0.0.1 5 | The inference server of toolEE project 6 | 7 | Yunlong Wang 8 | 9 | MIT 10 | 11 | 12 | Yunlong Wang 13 | 14 | catkin 15 | 16 | sensor_msgs 17 | geometry_msgs 18 | rospy 19 | visualization_msgs 20 | tf 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /src/toolee/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python==4.2.0.32 2 | scipy==1.4.1 3 | numpy==1.23.5 4 | tensorboardX==2.5.1 -------------------------------------------------------------------------------- /src/toolee/scripts/eval_single.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | python runners/evaluation_single.py \ 3 | --score_model_dir ScoreNet/ckpt_epoch2000.pth \ 4 | --score_model_dir ScoreNet/ckpt_epoch2000.pth \ 5 | --log_folder /dataSSD/yunlong/dataspace/training_logs_obj_pose \ 6 | --data_path /dataSSD/yunlong/dataspace/DatasetToolEE \ 7 | --eval_set test \ 8 | --result_dir /data/yunlong/training_logs_obj_pose/results \ 9 | --sampler_mode ode \ 10 | --max_eval_num 1000000 \ 11 | --percentage_data_for_test 1.0 \ 12 | --batch_size 200 \ 13 | --seed 0 \ 14 | --test_source val \ 15 | --eval_repeat_num 50 \ 16 | --T0 0.55 \ 17 | # --save_video \ 18 | -------------------------------------------------------------------------------- /src/toolee/scripts/inference_node.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | host_name = "tams110" 4 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 5 | # os.environ["ROS_MASTER_URI"] = f"http://{host_name}:11311" 6 | import sys 7 | import _pickle as cPickle 8 | import numpy as np 9 | from tqdm import tqdm 10 | 11 | sys.path.append(os.path.dirname(os.path.dirname(__file__))) 12 | from utils.misc import get_rot_matrix 13 | from networks.posenet_agent import PoseNet 14 | from configs.config import get_config 15 | from datasets.dataset_toolee import get_data_loaders_from_cfg, process_batch 16 | from utils.transform_utils import TfUtils 17 | import rospy 18 | from sensor_msgs.msg import Image 19 | from sensor_msgs.msg import PointCloud2 20 | import ros_numpy 21 | from toolee_infer.srv import PosePred 22 | import tf2_ros 23 | from geometry_msgs.msg import Pose 24 | 25 | 26 | def get_cfg(): 27 | ''' load config ''' 28 | cfg = get_config() 29 | epoch = 2000 30 | cfg.score_model_dir = f'ScoreNet/ckpt_epoch{epoch}.pth' 31 | cfg.task_type = 'ee_pose' 32 | cfg.use_symtr_prior = False 33 | cfg.regression_head = 'Rx_Ry_and_T_and_Symtr' # Rx_Ry_and_T, Rx_Ry_and_T_and_Symtr 34 | cfg.pose_mode = 'rot_matrix_symtr' # rot_matrix_symtr, rot_matrix 35 | cfg.log_folder = f"/dataSSD/yunlong/dataspace/training_logs_ee_pose_symtr" 36 | cfg.eval_repeat_num = 20 37 | cfg.eval_set = "test" # test, novel 38 | overwrite = False 39 | cfg.data_path = "/dataSSD/yunlong/dataspace/DatasetToolEE" 40 | cfg.sampler_mode = ["ode"] 41 | cfg.max_eval_num = 1000000 42 | cfg.percentage_data_for_test = 1.0 43 | cfg.batch_size = 200 44 | cfg.seed = 0 45 | cfg.T0 = 0.55 46 | cfg.num_gpu = 1 47 | return cfg 48 | 49 | class ToolEEPredictor(object): 50 | def __init__(self, cfg): 51 | self.cfg = cfg 52 | self.score_agent = None 53 | 54 | def init_model(self, ckpt_path=None): 55 | self.cfg.posenet_mode = 'score' 56 | if ckpt_path is None: 57 | ckpt_path = os.path.join(self.cfg.log_folder, f'results/ckpts/{cfg.score_model_dir}') 58 | assert os.path.exists(ckpt_path), f"ScoreNet checkpoint {ckpt_path} does not exist!" 59 | self.score_agent = PoseNet(cfg) 60 | self.score_agent.load_ckpt(model_dir=ckpt_path, model_path=True, load_model_only=True) 61 | 62 | def predict(self, data, is_pred_symtr=True): 63 | ''' predict poses ''' 64 | # data: [bs, 1024, 3] 65 | assert self.score_agent is not None, "ScoreNet model is not loaded!" 66 | porcessed_data = process_batch( 67 | batch_sample=data, 68 | device=cfg.device, 69 | is_pred_symtr=is_pred_symtr 70 | ) 71 | # [bs, repeat_num, 4, 4], [bs, repeat_num, 9] 72 | pred_RTs, pred_poses, pred_symtrs = self.pred_pose_batch(porcessed_data) # poses (6+3)vector, RTs(4,4) matrix 73 | avg_pred_RT = TfUtils.get_avg_sRT(pred_RTs) 74 | return avg_pred_RT 75 | 76 | def pred_pose_batch(self, batch_sample): 77 | ''' inference poses ''' 78 | pred_symtrs = None 79 | assert self.score_agent is not None, "ScoreNet model is not loaded!" 80 | pred_pose, _, _, _ = self.score_agent.pred_func( 81 | data=batch_sample, 82 | repeat_num=cfg.eval_repeat_num, 83 | T0=cfg.T0, 84 | ) 85 | 86 | if pred_pose.shape[2] == 12: 87 | pred_symtrs = pred_pose[:, :, -3:] 88 | pred_symtrs = pred_symtrs.cpu().numpy() 89 | 90 | ''' Transfer predicted poses (6+3)vector to RTs(4,4) matrix ''' 91 | RTs_all = np.ones((pred_pose.shape[0], pred_pose.shape[1], 4, 4)) # [bs, repeat_num, 4, 4] 92 | for i in range(pred_pose.shape[1]): 93 | R = get_rot_matrix(pred_pose[:, i, :6]) 94 | T = pred_pose[:, i, 6:9] 95 | RTs = np.identity(4, dtype=float)[np.newaxis, ...].repeat(R.shape[0], 0) 96 | RTs[:, :3, :3] = R.cpu().numpy() 97 | RTs[:, :3, 3] = T.cpu().numpy() 98 | RTs_all[:, i, :, :] = RTs 99 | return RTs_all, pred_pose, pred_symtrs 100 | 101 | class PredictNode(object): 102 | def __init__(self, cfg): 103 | self.cfg = get_cfg() 104 | self.predictor = ToolEEPredictor(cfg) 105 | # ros pointscloud2 listener 106 | 107 | def start_service(self): 108 | ''' spin up the node ''' 109 | rospy.Service('pose_pred', PosePred, self.prediction_service) 110 | 111 | def prediction_service(self, pc2_msg): 112 | ''' process pointcloud ''' 113 | points = self.pc2_to_array(pc2_msg) 114 | pred_RT = self.predictor.predict(points) 115 | pose_msg = self.RT_to_Pose_msg(pred_RT) 116 | return pose_msg 117 | 118 | 119 | def array_to_pc2(self,points): 120 | pc2_msg = ros_numpy.point_cloud2.array_to_pointcloud2(points, rospy.Time.now(), "camera_link") 121 | return pc2_msg 122 | 123 | def pc2_to_array(self,pc2_msg): 124 | points = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) 125 | return points 126 | 127 | def RT_to_Pose_msg(self,RT): 128 | ''' convert RT to geometry_msgs/Pose msg''' 129 | trans,quat = TfUtils.decompose_tf_M(RT) 130 | pose_msg = Pose() 131 | pose_msg.position = list(trans) 132 | pose_msg.orientation = list(quat) 133 | return pose_msg 134 | 135 | def Pose_msg_to_RT(self,pose_msg): 136 | ''' convert geometry_msgs/Pose msg to RT''' 137 | RT = TfUtils.compose_tf_M(np.asarray(pose_msg.position),np.asarray(pose_msg.orientation)) 138 | 139 | def warm_up(self): 140 | ''' warm up the model ''' 141 | self.predictor.init_model() 142 | fake_points = np.zeros((1, 1024, 3)) 143 | self.predictor.predict(fake_points) 144 | 145 | if __name__ == '__main__': 146 | rospy.init_node('pose_prediction_node') 147 | cfg = get_cfg() 148 | pred_node = PredictNode(cfg) 149 | pred_node.warm_up() 150 | pred_node.start_service() 151 | rospy.spin() 152 | -------------------------------------------------------------------------------- /src/toolee/scripts/pose_pred_agent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.append(os.path.dirname(os.path.dirname(__file__))) 5 | host_name = "tams110" 6 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 7 | os.environ["ROS_MASTER_URI"] = f"http://{host_name}:11311" 8 | import numpy as np 9 | import rospy 10 | from tool_ee.srv import PosePred 11 | from geometry_msgs.msg import Pose 12 | from utils.transform_utils import TfUtils 13 | from sensor_msgs import point_cloud2 14 | from sensor_msgs.msg import PointCloud2, PointField 15 | from std_msgs.msg import Header 16 | 17 | BIT_MOVE_16 = 2 ** 16 18 | BIT_MOVE_8 = 2 ** 8 19 | 20 | fields_xyz = [ 21 | PointField('x', 0, PointField.FLOAT32, 1), 22 | PointField('y', 4, PointField.FLOAT32, 1), 23 | PointField('z', 8, PointField.FLOAT32, 1), 24 | ] 25 | 26 | def points_array_to_pc2_msg(frame_id, points: np.ndarray): 27 | header = Header() 28 | header.frame_id = frame_id 29 | pc2_msg = point_cloud2.create_cloud(header, fields_xyz, points) 30 | pc2_msg.header.stamp = rospy.Time.now() 31 | return pc2_msg 32 | 33 | def pc2_msg_to_points_array(pc2_msg: PointCloud2): 34 | return np.array(list(point_cloud2.read_points(pc2_msg, field_names=("x", "y", "z")))) 35 | 36 | def array_RT_to_msg_Pose(RT): 37 | ''' convert RT to geometry_msgs/Pose msg''' 38 | trans, quat = TfUtils.decompose_tf_M(RT) 39 | pose_msg = Pose() 40 | pose_msg.position = list(trans) 41 | pose_msg.orientation = list(quat) 42 | return pose_msg 43 | 44 | def msg_Pose_to_array_RT(pose_msg): 45 | ''' convert geometry_msgs/Pose msg to RT''' 46 | RT = TfUtils.compose_tf_M(np.asarray(pose_msg.position), np.asarray(pose_msg.orientation)) 47 | return RT 48 | 49 | def call_pose_pred_service(points: np.ndarray): 50 | try: 51 | rospy.wait_for_service('pose_pred', timeout=10) 52 | rospy.loginfo('pose_pred service is available') 53 | except rospy.ROSException: 54 | rospy.logerr('pose_pred service is not available') 55 | return None 56 | try: 57 | pose_pred_service = rospy.ServiceProxy('pose_pred', PosePred) 58 | pc2_msg = points_array_to_pc2_msg(frame_id='camera_link', points=points) 59 | responce_msg = pose_pred_service(pc2_msg) 60 | pred_pose = responce_msg.pred_pose 61 | pos = np.asarray([ 62 | pred_pose.position.x, 63 | pred_pose.position.y, 64 | pred_pose.position.z 65 | ]) 66 | quat = np.asarray([ 67 | pred_pose.orientation.x, 68 | pred_pose.orientation.y, 69 | pred_pose.orientation.z, 70 | pred_pose.orientation.w 71 | ]) 72 | RT = TfUtils.compose_tf_M(trans=pos, quat=quat) 73 | angle = TfUtils.quaternion_to_anglexyz(quat) 74 | rospy.logdebug(f"Predicted result: \n translation: {pos}, \n Predicted angles: {angle}") 75 | return RT 76 | except rospy.ServiceException as e: 77 | print("Service call failed: %s" % e) 78 | 79 | if __name__ == '__main__': 80 | rospy.init_node('pose_prediction_node_test',log_level=rospy.DEBUG) 81 | fake_points = np.zeros((1024, 3)) 82 | call_pose_pred_service(fake_points) -------------------------------------------------------------------------------- /src/toolee/scripts/pose_pred_client.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.append(os.path.dirname(os.path.dirname(__file__))) 5 | host_name = "tams110" 6 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 7 | os.environ["ROS_MASTER_URI"] = f"http://{host_name}:11311" 8 | import numpy as np 9 | import rospy 10 | from tool_ee.srv import PosePred 11 | from geometry_msgs.msg import Pose 12 | from utils.transform_utils import TfUtils 13 | from sensor_msgs import point_cloud2 14 | from sensor_msgs.msg import PointCloud2, PointField 15 | from std_msgs.msg import Header 16 | 17 | BIT_MOVE_16 = 2 ** 16 18 | BIT_MOVE_8 = 2 ** 8 19 | 20 | fields_xyz = [ 21 | PointField('x', 0, PointField.FLOAT32, 1), 22 | PointField('y', 4, PointField.FLOAT32, 1), 23 | PointField('z', 8, PointField.FLOAT32, 1), 24 | ] 25 | 26 | def points_array_to_pc2_msg(frame_id, points: np.ndarray): 27 | header = Header() 28 | header.frame_id = frame_id 29 | pc2_msg = point_cloud2.create_cloud(header, fields_xyz, points) 30 | pc2_msg.header.stamp = rospy.Time.now() 31 | return pc2_msg 32 | 33 | def pc2_msg_to_points_array(pc2_msg: PointCloud2): 34 | return np.array(list(point_cloud2.read_points(pc2_msg, field_names=("x", "y", "z")))) 35 | 36 | def array_RT_to_msg_Pose(RT): 37 | ''' convert RT to geometry_msgs/Pose msg''' 38 | trans, quat = TfUtils.decompose_tf_M(RT) 39 | pose_msg = Pose() 40 | pose_msg.position = list(trans) 41 | pose_msg.orientation = list(quat) 42 | return pose_msg 43 | 44 | def msg_Pose_to_array_RT(pose_msg): 45 | ''' convert geometry_msgs/Pose msg to RT''' 46 | RT = TfUtils.compose_tf_M(np.asarray(pose_msg.position), np.asarray(pose_msg.orientation)) 47 | return RT 48 | 49 | def call_pose_pred_service(points: np.ndarray): 50 | try: 51 | rospy.wait_for_service('pose_pred', timeout=10) 52 | rospy.loginfo('pose_pred service is available') 53 | except rospy.ROSException: 54 | rospy.logerr('pose_pred service is not available') 55 | return None 56 | try: 57 | pose_pred_service = rospy.ServiceProxy('pose_pred', PosePred) 58 | pc2_msg = points_array_to_pc2_msg(frame_id='camera_link', points=points) 59 | responce_msg = pose_pred_service(pc2_msg) 60 | pred_pose = responce_msg.pred_pose 61 | pos = np.asarray([ 62 | pred_pose.position.x, 63 | pred_pose.position.y, 64 | pred_pose.position.z 65 | ]) 66 | quat = np.asarray([ 67 | pred_pose.orientation.x, 68 | pred_pose.orientation.y, 69 | pred_pose.orientation.z, 70 | pred_pose.orientation.w 71 | ]) 72 | RT = TfUtils.compose_tf_M(trans=pos, quat=quat) 73 | angle = TfUtils.quaternion_to_anglexyz(quat) 74 | rospy.logdebug(f"Predicted result: \n translation: {pos}, \n Predicted angles: {angle}") 75 | return RT 76 | except rospy.ServiceException as e: 77 | print("Service call failed: %s" % e) 78 | 79 | if __name__ == '__main__': 80 | rospy.init_node('pose_prediction_node_test',log_level=rospy.DEBUG) 81 | fake_points = np.zeros((1024, 3)) 82 | call_pose_pred_service(fake_points) -------------------------------------------------------------------------------- /src/toolee/scripts/seg_pred_client.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.append(os.path.dirname(os.path.dirname(__file__))) 5 | host_name = "tams110" 6 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 7 | os.environ["ROS_MASTER_URI"] = f"http://{host_name}:11311" 8 | # os.environ["LD_PRELOAD"] = "/usr/lib/x86_64-linux-gnu/libtiff.so.5" 9 | # os.environ["LD_LIBRARY_PATH"] = "/homeL/1wang/workspace/toolee_ws/devel/lib:/opt/ros/noetic/lib/x86_64-linux-gnu:/opt/ros/noetic/lib" 10 | 11 | from utils.file_utils import MetaUtils 12 | import time 13 | import rospy 14 | import numpy as np 15 | from tool_ee.srv import SegPred, SegPredRequest 16 | from cv_bridge import CvBridge 17 | import cv2 18 | 19 | bridge = CvBridge() 20 | 21 | def call_seg_pred_service(rgb_img: np.ndarray, vis=True): 22 | 23 | try: 24 | rospy.wait_for_service('seg_pred', timeout=10) 25 | rospy.loginfo('seg_pred service is available') 26 | except rospy.ROSException: 27 | rospy.logerr('seg_pred service is not available') 28 | return None 29 | try: 30 | seg_pred_service = rospy.ServiceProxy('seg_pred', SegPred) 31 | 32 | request_msg = SegPredRequest(rgb=bridge.cv2_to_imgmsg(rgb_img, encoding='passthrough'), vis=vis) 33 | response_meg = seg_pred_service(request_msg) 34 | seg_msg_list = response_meg.seg_list 35 | seg_name_list = response_meg.seg_name_list 36 | seg_masks = [] 37 | for idx, _ in enumerate(seg_name_list): 38 | img_msg = seg_msg_list[idx] 39 | seg_img = bridge.imgmsg_to_cv2(img_msg) 40 | seg_mask = np.zeros_like(seg_img, dtype=bool) 41 | seg_mask[seg_img == 255] = True 42 | seg_mask[seg_img == 0] = False 43 | seg_masks.append(seg_mask) 44 | return seg_masks, seg_name_list 45 | 46 | except rospy.ServiceException as e: 47 | print("Service call failed: %s" % e) 48 | 49 | 50 | if __name__ == '__main__': 51 | ''' 52 | if error happened with cv_bridge when using conda env, try the following command before run the python script: 53 | export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtiff.so.5 54 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/ros/noetic/lib 55 | ''' 56 | 57 | rospy.init_node('seg_prediction_node_test', log_level=rospy.DEBUG) 58 | # dataset_root = "/dataSSD/yunlong/dataspace/DatasetToolEE" 59 | # cat_name = "hammer_grip" # "hammer_grip", "screwdriver", "wrench" 60 | # obj_name = "hammer_02" 61 | # ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 62 | # # ids = [0, ] 63 | # for id in ids: 64 | # meta_name = f"meta_{cat_name}_{obj_name}_{id:04d}.yaml" 65 | # meta_path = os.path.join(dataset_root, cat_name, obj_name, meta_name) 66 | # meta_util = MetaUtils(data_root=dataset_root, meta_name=meta_path) 67 | # rgb_img = meta_util.get_image() 68 | # seg_img_gt = meta_util.get_affordance_seg() 69 | # call_seg_pred_service(rgb_img, vis=True) 70 | # 71 | # time.sleep(2) 72 | # print('done!') 73 | img = cv2.imread("/homeL/1wang/workspace/toolee_ws/src/toolee/rgb.png") 74 | rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 75 | call_seg_pred_service(rgb_img, vis=True) 76 | -------------------------------------------------------------------------------- /src/toolee/scripts/seg_pred_service.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import numpy as np 5 | 6 | host_name = "tams110" 7 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 8 | os.environ["ROS_MASTER_URI"] = f"http://{host_name}:11311" 9 | sys.path.append(os.path.dirname(os.path.dirname(__file__))) 10 | import rospy 11 | from tool_ee.srv import SegPred, SegPredResponse 12 | 13 | from configs.mrcnn_config import get_config 14 | from mrcnn.runner import get_predictor 15 | from cv_bridge import CvBridge,CvBridgeError 16 | import random 17 | import cv2 18 | from detectron2.utils.visualizer import Visualizer 19 | from mrcnn.dataset import get_meta_data 20 | from detectron2.utils.visualizer import ColorMode 21 | from sensor_msgs.msg import Image 22 | 23 | seg_id_map_valid = { 24 | 0: "hammer_grip_head1", 25 | 1: "hammer_grip_grip", 26 | 2: "screwdriver_head1", 27 | 3: "wrench_head1", 28 | } 29 | 30 | # the inference class 31 | class AffSegPrediction(object): 32 | def __init__(self): 33 | self.val_metadata = get_meta_data(dataset='val') 34 | self.train_metadata = get_meta_data(dataset='train') 35 | 36 | def init_model(self, ckpt_path=None): 37 | if ckpt_path is None: 38 | ckpt_path = "/dataSSD/yunlong/dataspace/mrcnn_result/output/model_0024999.pth" 39 | self.predictor = get_predictor(ckpt_path=ckpt_path,roi_threshold=0.7) 40 | rospy.loginfo('model initialized') 41 | 42 | def predict(self, img, visualize=True, region_filter=True): 43 | ''' predict the affordance segmentation ''' 44 | """ 45 | aff_seg_pred: [bs, H, W] 46 | classed_ids: [bs, num_instances] 47 | result_draw: [H, W, 3] 48 | """ 49 | 50 | 51 | pred_result = self.predictor(img) 52 | instances = pred_result['instances'].to('cpu') 53 | if region_filter: 54 | fesible_ids = [] 55 | h = img.shape[0] 56 | w = img.shape[1] 57 | fesible_region = np.zeros((h, w), dtype=bool) 58 | fesible_region[int(h / 3):int(h / 3 * 2), int(w / 3):int(w / 3 * 2)] = True 59 | for i in range(len(instances)): 60 | bbox = instances.pred_boxes.tensor[i].numpy() 61 | bbox_center = [int((bbox[0] + bbox[2])/2), int((bbox[1] + bbox[3]) / 2)] 62 | if fesible_region[bbox_center[1], bbox_center[0]]: 63 | fesible_ids.append(i) 64 | instances = instances[fesible_ids] 65 | pred_result['instances'] = instances 66 | classed_ids = instances.pred_classes.numpy() 67 | aff_seg_pred = instances.pred_masks.numpy() 68 | result_draw = None 69 | if visualize: 70 | 71 | result_draw = self.draw_result(img, instances) 72 | 73 | return pred_result, classed_ids, aff_seg_pred, result_draw 74 | 75 | def draw_result(self, img, instances, save_path=None): 76 | v = Visualizer( 77 | img[:, :, ::-1], 78 | metadata=self.val_metadata, 79 | scale=1.0, 80 | instance_mode=ColorMode.IMAGE 81 | # remove the colors of unsegmented pixels. This option is only available for segmentation models 82 | ) 83 | out = v.draw_instance_predictions(instances.to("cpu")) 84 | img_result = out.get_image()[:, :, ::-1] 85 | if save_path is not None: 86 | cv2.imwrite(filename=save_path, img=img_result) 87 | return img_result 88 | 89 | def warm_up(self): 90 | ''' warm up the model ''' 91 | rospy.loginfo('warming up the model') 92 | img = np.zeros((1920, 1080, 3)) 93 | self.predictor(img) 94 | 95 | # the ros node wrapper for the inference class 96 | class AffSegPredictionNode(): 97 | def __init__(self): 98 | self.bridge = CvBridge() 99 | self.aff_seg_pred = AffSegPrediction() 100 | self.aff_seg_pred.init_model() 101 | self.vis_pub = rospy.Publisher('seg_vis', Image, queue_size=1, latch=True) 102 | 103 | def warmup(self): 104 | ''' warm up the model ''' 105 | self.aff_seg_pred.warm_up() 106 | rospy.loginfo('affordance segmentation model warmed up') 107 | 108 | def start_service(self,service_name='seg_pred'): 109 | ''' spin up the node ''' 110 | rospy.Service(service_name, SegPred, self.prediction_service) 111 | 112 | def pub_result(self, img): 113 | ''' publish the visualization result ''' 114 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 115 | vis_img_msg = self.bridge.cv2_to_imgmsg(img, 'bgr8') 116 | self.vis_pub.publish(vis_img_msg) 117 | 118 | def prediction_service(self, msg): 119 | rospy.logdebug('received prediction request') 120 | vis = msg.vis 121 | try: 122 | rgb_img = self.bridge.imgmsg_to_cv2(msg.rgb, 'rgb8') 123 | except CvBridgeError as e: 124 | rospy.logerr(f'failed to convert the image message: {e}') 125 | return SegPredResponse() 126 | pred_result, classed_ids, aff_seg_pred, result_draw = self.aff_seg_pred.predict(rgb_img, visualize=vis) 127 | if vis: 128 | self.pub_result(result_draw) 129 | seg_list = [] 130 | seg_name_list = [] 131 | for idx, classed_id in enumerate(classed_ids): 132 | if classed_id not in seg_id_map_valid: 133 | continue 134 | # wrap the binary segmentation to the image message 135 | # aff_seg_pred: [bs, H, W] each value is (Ture/False) 136 | seg_img = np.zeros_like(aff_seg_pred[idx]) 137 | seg_img[aff_seg_pred[idx] == True] = 1 138 | seg_img = seg_img.astype(np.uint8) 139 | # seg_img = np.repeat(seg_img[:, :, np.newaxis], 3, axis=2) 140 | # cv_rgb_image = cv2.cvtColor(seg_img, cv2.COLOR_RGB2BGR) 141 | seg_msg = self.bridge.cv2_to_imgmsg(seg_img, encoding='mono8') 142 | seg_list.append(seg_msg) 143 | seg_name_list.append(seg_id_map_valid[classed_id]) 144 | rospy.logdebug(f'prediction: {len(seg_name_list)} segments are predicted') 145 | return SegPredResponse(seg_list=seg_list, seg_name_list=seg_name_list) 146 | 147 | # todo: finish the seg prediction service 148 | if __name__ == '__main__': 149 | # init the node 150 | rospy.init_node('seg_pred_node', log_level=rospy.DEBUG) 151 | seg_pred_node = AffSegPredictionNode() 152 | seg_pred_node.warmup() 153 | seg_pred_node.start_service('seg_pred') 154 | try: 155 | rospy.wait_for_service('seg_pred', timeout=10) 156 | rospy.loginfo('seg_pred service is available') 157 | except rospy.ROSException: 158 | rospy.logerr('seg_pred service not available') 159 | rospy.signal_shutdown('seg_pred service not available') 160 | rospy.spin() 161 | -------------------------------------------------------------------------------- /src/toolee/scripts/tensorboard.sh: -------------------------------------------------------------------------------- 1 | tensorboard --logdir ./results/logs/ --port 0505 --reload_interval 1 --samples_per_plugin images=999 -------------------------------------------------------------------------------- /src/toolee/scripts/test_affordance_seg.sh: -------------------------------------------------------------------------------- 1 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/ros/noetic/lib 2 | export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtiff.so.5 3 | -------------------------------------------------------------------------------- /src/toolee/scripts/train_score_ee_pose.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=1 2 | python runners/trainer.py \ 3 | --data_path /dataSSD/yunlong/dataspace/DatasetToolEE \ 4 | --log_folder /dataSSD/yunlong/dataspace/training_logs_ee_pose \ 5 | --log_dir ScoreNet \ 6 | --agent_type score \ 7 | --sampler_mode ode \ 8 | --batch_size 600 \ 9 | --sampling_steps 500 \ 10 | --eval_freq 1 \ 11 | --n_epochs 2000 \ 12 | --percentage_data_for_train 1.0 \ 13 | --percentage_data_for_test 1.0 \ 14 | --seed 0 \ 15 | --is_train \ 16 | --task_type ee_pose \ 17 | -------------------------------------------------------------------------------- /src/toolee/scripts/train_score_ee_pose_symtr.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | python runners/trainer.py \ 3 | --data_path /dataSSD/yunlong/dataspace/DatasetToolEE \ 4 | --log_folder /dataSSD/yunlong/dataspace/training_logs_ee_pose_symtr \ 5 | --percentage_data_for_val 0.1 \ 6 | --batch_size 200 \ 7 | --eval_batch_size 200 \ 8 | --log_dir ScoreNet \ 9 | --agent_type score \ 10 | --sampler_mode ode \ 11 | --sampling_steps 500 \ 12 | --eval_freq 1 \ 13 | --n_epochs 10000 \ 14 | --percentage_data_for_train 1.0 \ 15 | --percentage_data_for_test 1.0 \ 16 | --seed 0 \ 17 | --is_train \ 18 | --task_type ee_pose \ 19 | --regression_head Rx_Ry_and_T_and_Symtr \ 20 | --pose_mode rot_matrix_symtr \ 21 | -------------------------------------------------------------------------------- /src/toolee/scripts/train_score_obj_pose.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES="1" 2 | python runners/trainer.py \ 3 | --data_path /dataSSD/yunlong/dataspace/DatasetToolEE \ 4 | --log_folder /dataSSD/yunlong/dataspace/training_logs_obj_pose \ 5 | --log_dir ScoreNet \ 6 | --agent_type score \ 7 | --sampler_mode ode \ 8 | --sampling_steps 500 \ 9 | --eval_freq 1 \ 10 | --n_epochs 2000 \ 11 | --percentage_data_for_train 1.0 \ 12 | --percentage_data_for_test 1.0 \ 13 | --seed 0 \ 14 | --is_train \ 15 | --task_type obj_pose \ 16 | -------------------------------------------------------------------------------- /src/toolee/srv/PosePred.srv: -------------------------------------------------------------------------------- 1 | #request fields 2 | sensor_msgs/PointCloud2 pc2 3 | --- 4 | #response fields 5 | geometry_msgs/Pose pred_pose 6 | float32[] pred_symtrs -------------------------------------------------------------------------------- /src/toolee/srv/SegPred.srv: -------------------------------------------------------------------------------- 1 | #request fields 2 | sensor_msgs/Image rgb 3 | bool vis 4 | --- 5 | #response fields 6 | sensor_msgs/Image[] seg_list 7 | string[] seg_name_list -------------------------------------------------------------------------------- /src/toolee/utils/data_tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import open3d as o3d 3 | from tf.transformations import compose_matrix, decompose_matrix 4 | import tf 5 | 6 | 7 | def sample_data(data, num_sample): 8 | """ data is in N x ... 9 | we want to keep num_samplexC of them. 10 | if N > num_sample, we will randomly keep num_sample of them. 11 | if N < num_sample, we will randomly duplicate samples. 12 | """ 13 | try: 14 | N = data.shape[0] 15 | if (N == num_sample): 16 | return data, range(N) 17 | elif (N > num_sample): 18 | sample = np.random.choice(N, num_sample) 19 | return data[sample, ...], sample 20 | else: 21 | # print(N) 22 | sample = np.random.choice(N, num_sample - N) 23 | dup_data = data[sample, ...] 24 | return np.concatenate([data, dup_data], 0), list(range(N)) + list(sample) 25 | except Exception as e: 26 | print(e) 27 | 28 | class TfUtils: 29 | @staticmethod 30 | def random_tf_M(N=None): 31 | trans_offset_limit = 0.2 32 | rnd_Ms = [] 33 | if N is None: 34 | trans_offset = np.random.uniform(low=-1, high=1, size=(3,)) * trans_offset_limit 35 | angle_offset = np.random.uniform(low=-1, high=1, size=(3,)) * np.pi 36 | rnd_M = TfUtils.pose_to_tf_M( 37 | translate=trans_offset, 38 | angles=angle_offset, 39 | ) 40 | return rnd_M 41 | else: 42 | for _ in range(N): 43 | trans_offset = np.random.uniform(low=-1, high=1, size=(3,)) * trans_offset_limit 44 | angle_offset = np.random.uniform(low=-1, high=1, size=(3,)) * np.pi 45 | rnd_M = TfUtils.pose_to_tf_M( 46 | translate=trans_offset, 47 | angles=angle_offset, 48 | ) 49 | rnd_Ms.append(np.expand_dims(rnd_M, axis=0)) 50 | rnd_Ms = np.concatenate(rnd_Ms, axis=0) 51 | return rnd_Ms 52 | 53 | 54 | 55 | @staticmethod 56 | def compose_tf_M(trans, angles=None, quat=None,scale=np.array([1,1,1])): 57 | # M = compose_matrix(scale, shear, angles, trans, persp) 58 | # sequence of each transform 59 | # angles: xyz 60 | if angles is None: 61 | angles = TfUtils.quaternion_to_anglexyz(quat) 62 | M = compose_matrix( 63 | scale=np.asarray(scale), 64 | shear=None, 65 | angles=np.asarray(angles), 66 | translate=np.asarray(trans), 67 | perspective=None 68 | ) 69 | return M 70 | 71 | @staticmethod 72 | def pose_to_tf_M(translate, angles=None,quat=None): 73 | # angles here is radians 74 | assert angles is not None or quat is not None, 'either angle or quat must be provide' 75 | if angles is None: 76 | angles = TfUtils.quaternion_to_anglexyz(quat) 77 | M = compose_matrix( 78 | scale=None, 79 | shear=None, 80 | angles=np.asarray(angles), 81 | translate=np.asarray(translate), 82 | perspective=None 83 | ) 84 | return M 85 | 86 | @staticmethod 87 | def tf_M_to_pose(M): 88 | scale, shear, angles, translate, perspective = decompose_matrix(M) 89 | quat = TfUtils.anglexyz_to_quaternion(angles) 90 | return translate, quat 91 | 92 | @staticmethod 93 | def apply_tf_M_to_point(M, point): 94 | return np.dot(M,np.append(point,1))[:-1] 95 | 96 | @staticmethod 97 | def anglexyz_to_quaternion(angles): 98 | return tf.transformations.quaternion_from_euler(angles[0], angles[1], angles[2],axes='sxyz') 99 | 100 | @staticmethod 101 | def quaternion_to_anglexyz(quaternion): 102 | return tf.transformations.euler_from_quaternion(quaternion,axes='sxyz') 103 | 104 | @staticmethod 105 | def decompose_tf_M(M): 106 | scale, shear, angles, trans, persp = decompose_matrix(M) 107 | quat = TfUtils.anglexyz_to_quaternion(angles) 108 | return np.asarray(trans), np.asarray(quat) 109 | 110 | @staticmethod 111 | def concat_tf_M(matrices): 112 | M = np.identity(4) 113 | for i in matrices: 114 | M = np.dot(M, i) 115 | return M 116 | 117 | @staticmethod 118 | def anglexyz_to_tf_M(anglexyz): 119 | return tf.transformations.euler_matrix(anglexyz[0], anglexyz[1], anglexyz[2], axes="sxyz") 120 | 121 | @staticmethod 122 | def tf_M_to_anglexyz(tf_M): 123 | return tf.transformations.euler_from_matrix(tf_M, axes="sxyz") 124 | 125 | @staticmethod 126 | def random_transform(points, ee_poses:list, obj_pose, rnd_M=None): 127 | obj_trans = obj_pose[:3, 3].T 128 | if rnd_M is None: 129 | rnd_M = TfUtils.random_tf_M() 130 | # randomize the points cloud 131 | points -= obj_trans 132 | ones = np.expand_dims(np.ones(points.shape[0]), axis=-1) 133 | points = np.concatenate([points, ones], axis=-1) 134 | points = points.T 135 | points = np.dot(rnd_M, points) 136 | points = points.T[:, :3] 137 | points += obj_trans 138 | 139 | # randomize the ee poses 140 | ee_poses_copy = ee_poses.copy() 141 | for idx, ee_pose in enumerate(ee_poses_copy): 142 | ee_pose[:3, 3] -= obj_pose[:3, 3] 143 | ee_pose = np.dot(rnd_M, ee_pose) 144 | ee_pose[:3, 3] += obj_pose[:3, 3] 145 | ee_poses[idx] = np.asarray(ee_pose) 146 | 147 | # randomize the obj pose 148 | new_obj_pose = obj_pose.copy() 149 | new_obj_pose[:3, 3] -= obj_pose[:3, 3] 150 | new_obj_pose = np.dot(rnd_M, new_obj_pose) 151 | new_obj_pose[:3, 3] += obj_pose[:3, 3] 152 | 153 | return np.asarray(points), ee_poses, np.asarray(new_obj_pose) -------------------------------------------------------------------------------- /src/toolee/utils/datasets_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | # get 2d coordinates from image size 4 | def get_2d_coord_np(width, height, low=0, high=1, fmt="CHW"): 5 | """ 6 | Args: 7 | width: 8 | height: 9 | Returns: 10 | xy: (2, height, width) 11 | """ 12 | # coords values are in [low, high] [0,1] or [-1,1] 13 | x = np.linspace(0, width-1, width, dtype=np.float32) 14 | y = np.linspace(0, height-1, height, dtype=np.float32) 15 | xy = np.asarray(np.meshgrid(x, y)) 16 | if fmt == "HWC": 17 | xy = xy.transpose(1, 2, 0) 18 | elif fmt == "CHW": 19 | pass 20 | else: 21 | raise ValueError(f"Unknown format: {fmt}") 22 | return xy 23 | 24 | 25 | def aug_bbox_DZI(hyper_params, bbox_xyxy, im_H, im_W): 26 | """Used for DZI, the augmented box is a square (maybe enlarged) 27 | Args: 28 | bbox_xyxy (np.ndarray): 29 | Returns: 30 | center: the bbox center of the augmented bbox, it has been randomly shifted 31 | scale: the side length of the augmented bbox, it has been randomly scaled 32 | """ 33 | x1, y1, x2, y2 = bbox_xyxy.copy() 34 | cx = 0.5 * (x1 + x2) # center x 35 | cy = 0.5 * (y1 + y2) # center y 36 | bh = y2 - y1 # bbox height 37 | bw = x2 - x1 # bbox width 38 | if hyper_params['DZI_TYPE'].lower() == "uniform": 39 | scale_ratio = 1 + hyper_params['DZI_SCALE_RATIO'] * (2 * np.random.random_sample() - 1) # (1,) range[1-0.25, 1+0.25] 40 | shift_ratio = hyper_params['DZI_SHIFT_RATIO'] * (2 * np.random.random_sample(2) - 1) # (2,) range[-0.25, 0.25] 41 | bbox_center = np.array([cx + bw * shift_ratio[0], cy + bh * shift_ratio[1]]) # (h/2, w/2) 42 | scale = max(y2 - y1, x2 - x1) * scale_ratio * hyper_params['DZI_PAD_SCALE'] # 43 | elif hyper_params['DZI_TYPE'].lower() == "roi10d": 44 | # shift (x1,y1), (x2,y2) by 15% in each direction 45 | _a = -0.15 46 | _b = 0.15 47 | x1 += bw * (np.random.rand() * (_b - _a) + _a) 48 | x2 += bw * (np.random.rand() * (_b - _a) + _a) 49 | y1 += bh * (np.random.rand() * (_b - _a) + _a) 50 | y2 += bh * (np.random.rand() * (_b - _a) + _a) 51 | x1 = min(max(x1, 0), im_W) 52 | x2 = min(max(x1, 0), im_W) 53 | y1 = min(max(y1, 0), im_H) 54 | y2 = min(max(y2, 0), im_H) 55 | bbox_center = np.array([0.5 * (x1 + x2), 0.5 * (y1 + y2)]) 56 | scale = max(y2 - y1, x2 - x1) * hyper_params['DZI_PAD_SCALE'] 57 | elif hyper_params['DZI_TYPE'].lower() == "truncnorm": 58 | raise NotImplementedError("DZI truncnorm not implemented yet.") 59 | else: 60 | # No DZI 61 | bbox_center = np.array([cx, cy]) # (w/2, h/2) 62 | scale = max(y2 - y1, x2 - x1) 63 | # make sure the scale is not over the image size 64 | scale = min(scale, max(im_H, im_W)) * 1.0 65 | return bbox_center, scale 66 | 67 | 68 | def aug_bbox_eval(bbox_xyxy, im_H, im_W): 69 | """Used for DZI, the augmented box is a square (maybe enlarged) 70 | Args: 71 | bbox_xyxy (np.ndarray): 72 | Returns: 73 | center, scale 74 | """ 75 | x1, y1, x2, y2 = bbox_xyxy.copy() 76 | cx = 0.5 * (x1 + x2) 77 | cy = 0.5 * (y1 + y2) 78 | bh = y2 - y1 79 | bw = x2 - x1 80 | bbox_center = np.array([cx, cy]) # (w/2, h/2) 81 | scale = max(y2 - y1, x2 - x1) 82 | scale = min(scale, max(im_H, im_W)) * 1.0 83 | return bbox_center, scale 84 | 85 | def crop_resize_by_warp_affine(img, center, scale, output_size, rot=0, interpolation=cv2.INTER_LINEAR): 86 | """ 87 | warp affine transformation, from one 2D coordinate to another 2D coordinate, linear transformation. 88 | Try to use this function to transform the image in bbox to a square image with self.img_size. 89 | with rotation=0 means not rotate the image, only zoom in/out and shift the image. 90 | output_size: int or (w, h) 91 | NOTE: if img is (h,w,1), the output will be (h,w) 92 | """ 93 | if isinstance(scale, (int, float)): 94 | scale = (scale, scale) 95 | if isinstance(output_size, int): 96 | output_size = (output_size, output_size) 97 | # get the affine transformation matrix from image in bbox to square img with size self.img_size 98 | trans = get_affine_transform(center, scale, rot, output_size) 99 | # apply the above affine transformation matrix to the 2D coordinate map of the whole image 100 | dst_img = cv2.warpAffine(img, trans, (int(output_size[0]), int(output_size[1])), flags=interpolation) 101 | return dst_img 102 | 103 | def get_affine_transform(center, scale, rot, output_size, shift=np.array([0, 0], dtype=np.float32), inv=False): 104 | """ 105 | adapted from CenterNet: https://github.com/xingyizhou/CenterNet/blob/master/src/lib/utils/image.py 106 | center: ndarray: (cx, cy) 107 | scale: (w, h) 108 | rot: angle in deg 109 | output_size: int or (w, h) 110 | """ 111 | if isinstance(center, (tuple, list)): 112 | center = np.array(center, dtype=np.float32) 113 | 114 | if isinstance(scale, (int, float)): 115 | scale = np.array([scale, scale], dtype=np.float32) 116 | 117 | if isinstance(output_size, (int, float)): 118 | output_size = (output_size, output_size) 119 | 120 | 121 | scale_tmp = scale 122 | src_w = scale_tmp[0] 123 | dst_w = output_size[0] 124 | dst_h = output_size[1] 125 | 126 | rot_rad = np.pi * rot / 180 127 | # get the rotated src points 128 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 129 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 130 | 131 | # three coordinates pairs can determine a warp affine transformation 132 | src = np.zeros((3, 2), dtype=np.float32) 133 | dst = np.zeros((3, 2), dtype=np.float32) 134 | src[0, :] = center + scale_tmp * shift 135 | src[1, :] = center + src_dir + scale_tmp * shift 136 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 137 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir 138 | 139 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 140 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 141 | 142 | if inv: 143 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 144 | else: 145 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 146 | 147 | return trans 148 | 149 | def get_dir(src_point, rot_rad): 150 | ''' 151 | get the direction of the point after rotation 152 | :param src_point: the src points to be rotated 153 | :param rot_rad: the rotation angle in redius 154 | :return: 155 | ''' 156 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 157 | 158 | src_result = [0, 0] 159 | src_result[0] = src_point[0] * cs - src_point[1] * sn 160 | src_result[1] = src_point[0] * sn + src_point[1] * cs 161 | 162 | return src_result 163 | 164 | def get_3rd_point(a, b): 165 | direct = a - b 166 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 167 | -------------------------------------------------------------------------------- /src/toolee/utils/genpose_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def get_pose_dim(pose_mode): 4 | assert pose_mode in ['rot_matrix_symtr','rot_matrix'] 5 | if pose_mode == 'rot_matrix_symtr': 6 | return 6+3+3 7 | elif pose_mode == 'rot_matrix': 8 | return 6+3 9 | else: 10 | raise NotImplementedError 11 | 12 | class TrainClock(object): 13 | """ Clock object to track epoch and step during training 14 | """ 15 | def __init__(self): 16 | self.epoch = 1 17 | self.minibatch = 0 18 | self.step = 0 19 | 20 | # one step 21 | def tick(self): 22 | self.minibatch += 1 23 | self.step += 1 24 | 25 | # one epoch 26 | def tock(self): 27 | self.epoch += 1 28 | self.minibatch = 0 29 | 30 | def make_checkpoint(self): 31 | return { 32 | 'epoch': self.epoch, 33 | 'minibatch': self.minibatch, 34 | 'step': self.step 35 | } 36 | 37 | def restore_checkpoint(self, clock_dict): 38 | self.epoch = clock_dict['epoch'] 39 | self.minibatch = clock_dict['minibatch'] 40 | self.step = clock_dict['step'] 41 | 42 | 43 | def merge_results(results_ori, results_new): 44 | if len(results_ori.keys()) == 0: 45 | return results_new 46 | else: 47 | results = { 48 | 'pred_pose': torch.cat([results_ori['pred_pose'], results_new['pred_pose']], dim=0), 49 | 'gt_pose': torch.cat([results_ori['gt_pose'], results_new['gt_pose']], dim=0), 50 | 'cls_id': torch.cat([results_ori['cls_id'], results_new['cls_id']], dim=0), 51 | # 'path': results_ori['path'] + results_new['path'], 52 | } 53 | return results 54 | 55 | 56 | -------------------------------------------------------------------------------- /src/toolee/utils/misc.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import sys 4 | 5 | import numpy as np 6 | import torch 7 | 8 | sys.path.append('..') 9 | 10 | from scipy.spatial.transform import Rotation as R 11 | from utils.transforms import rotation_6d_to_matrix 12 | 13 | def exists_or_mkdir(path): 14 | if not os.path.exists(path): 15 | os.makedirs(path) 16 | return False 17 | else: 18 | return True 19 | 20 | def sample_data(data, num_sample): 21 | """ data is in N x ... 22 | we want to keep num_samplexC of them. 23 | if N > num_sample, we will randomly keep num_sample of them. 24 | if N < num_sample, we will randomly duplicate samples. 25 | """ 26 | N = data.shape[0] 27 | if (N == num_sample): 28 | return data, range(N) 29 | elif (N > num_sample): 30 | sample = np.random.choice(N, num_sample) 31 | return data[sample, ...], sample 32 | else: 33 | # print(N) 34 | sample = np.random.choice(N, num_sample-N) 35 | dup_data = data[sample, ...] 36 | return np.concatenate([data, dup_data], 0), list(range(N))+list(sample) 37 | 38 | def get_rot_matrix(batch_pose): 39 | """ 40 | 'rot_matrix' -> batch_pose [B, 6] 41 | 42 | Return: rot_matrix [B, 3, 3] 43 | """ 44 | 45 | rot_mat = rotation_6d_to_matrix(batch_pose).permute(0, 2, 1) 46 | return rot_mat 47 | 48 | def transform_batch_pts(batch_pts, batch_pose, pose_mode='rot_matrix', inverse_pose=False): 49 | """ 50 | Args: 51 | batch_pts [B, N, C], N is the number of points, and C [x, y, z, ...] 52 | batch_pose [B, C], [quat/rot_mat/euler, trans] 53 | pose_mode is from ['quat_wxyz', 'quat_xyzw', 'euler_xyz', 'rot_matrix'] 54 | if inverse_pose is true, the transformation will be inversed 55 | Returns: 56 | new_pts [B, N, C] 57 | """ 58 | assert pose_mode in ['rot_matrix', 'rot_matrix_symtr'], f"the rotation mode {pose_mode} is not supported!" 59 | B = batch_pts.shape[0] 60 | rot = batch_pose[:, :6] 61 | loc = batch_pose[:, 6:9] 62 | 63 | 64 | rot_mat = get_rot_matrix(rot) 65 | if inverse_pose == True: 66 | rot_mat, loc = inverse_RT(rot_mat, loc) 67 | loc = loc[..., np.newaxis] 68 | 69 | trans_mat = torch.cat((rot_mat, loc), dim=2) 70 | trans_mat = torch.cat((trans_mat, torch.tile(torch.tensor([[0, 0, 0, 1]]).to(trans_mat.device), (B, 1, 1))), dim=1) 71 | 72 | new_pts = copy.deepcopy(batch_pts) 73 | padding = torch.ones([batch_pts.shape[0], batch_pts.shape[1], 1]).to(batch_pts.device) 74 | pts = torch.cat((batch_pts[:, :, :3], padding), dim=2) 75 | new_pts[:, :, :3] = torch.matmul(trans_mat.to(torch.float32), pts.permute(0, 2, 1)).permute(0, 2, 1)[:, :, :3] 76 | 77 | return new_pts 78 | 79 | def inverse_RT(batch_rot_mat, batch_trans): 80 | """ 81 | Args: 82 | batch_rot_mat [B, 3, 3] 83 | batch_trans [B, 3] 84 | Return: 85 | inversed_rot_mat [B, 3, 3] 86 | inversed_trans [B, 3] 87 | """ 88 | trans = batch_trans[..., np.newaxis] 89 | inversed_rot_mat = batch_rot_mat.permute(0, 2, 1) 90 | inversed_trans = - inversed_rot_mat @ trans 91 | return inversed_rot_mat, inversed_trans.squeeze(-1) 92 | 93 | """ https://arc.aiaa.org/doi/abs/10.2514/1.28949 """ 94 | """ https://stackoverflow.com/questions/12374087/average-of-multiple-quaternions """ 95 | """ http://tbirdal.blogspot.com/2019/10/i-allocate-this-post-to-providing.html """ 96 | 97 | def average_quaternion_batch(Q, weights=None): 98 | """calculate the average quaternion of the multiple quaternions 99 | Args: 100 | Q (tensor): [B, num_quaternions, 4] 101 | weights (tensor, optional): [B, num_quaternions]. Defaults to None. 102 | 103 | Returns: 104 | oriented_q_avg: average quaternion, [B, 4] 105 | """ 106 | 107 | if weights is None: 108 | weights = torch.ones((Q.shape[0], Q.shape[1]), device=Q.device) / Q.shape[1] 109 | A = torch.zeros((Q.shape[0], 4, 4), device=Q.device) 110 | weight_sum = torch.sum(weights, axis=-1) 111 | 112 | oriented_Q = ((Q[:, :, 0:1] > 0).float() - 0.5) * 2 * Q 113 | A = torch.einsum("abi,abk->abik", (oriented_Q, oriented_Q)) 114 | A = torch.sum(torch.einsum("abij,ab->abij", (A, weights)), 1) 115 | A /= weight_sum.reshape(A.shape[0], -1).unsqueeze(-1).repeat(1, 4, 4) 116 | 117 | q_avg = torch.linalg.eigh(A)[1][:, :, -1] 118 | oriented_q_avg = ((q_avg[:, 0:1] > 0).float() - 0.5) * 2 * q_avg 119 | return oriented_q_avg 120 | 121 | 122 | def average_quaternion_numpy(Q, W=None): 123 | if W is not None: 124 | Q *= W[:, None] 125 | eigvals, eigvecs = np.linalg.eig(Q.T@Q) 126 | return eigvecs[:, eigvals.argmax()] 127 | 128 | 129 | def normalize_rotation(rotation, rotation_mode): 130 | 131 | if rotation_mode == 'rot_matrix' or rotation_mode == 'rot_matrix_symtr': 132 | rot_matrix = get_rot_matrix(rotation) 133 | rotation[:, :3] = rot_matrix[:, :, 0] 134 | rotation[:, 3:6] = rot_matrix[:, :, 1] 135 | else: 136 | raise NotImplementedError 137 | return rotation 138 | 139 | 140 | if __name__ == '__main__': 141 | quat = torch.randn(2, 3, 4) 142 | quat = quat / torch.linalg.norm(quat, axis=-1).unsqueeze(-1) 143 | quat = average_quaternion_batch(quat) 144 | 145 | 146 | -------------------------------------------------------------------------------- /src/toolee/utils/operations_3d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | # since there are some installation problems with pytorch3d, we copy the following functions from pytorch3d 4 | 5 | def matrix_to_rotation_6d(matrix: torch.Tensor) -> torch.Tensor: 6 | """ 7 | Copy from pytorch3d! 8 | Converts rotation matrices to 6D rotation representation by Zhou et al. [1] 9 | by dropping the last row. Note that 6D representation is not unique. 10 | Args: 11 | matrix: batch of rotation matrices of size (*, 3, 3) 12 | 13 | Returns: 14 | 6D rotation representation, of size (*, 6) 15 | 16 | [1] Zhou, Y., Barnes, C., Lu, J., Yang, J., & Li, H. 17 | On the Continuity of Rotation Representations in Neural Networks. 18 | IEEE Conference on Computer Vision and Pattern Recognition, 2019. 19 | Retrieved from http://arxiv.org/abs/1812.07035 20 | """ 21 | batch_dim = matrix.size()[:-2] 22 | return matrix[..., :2, :].clone().reshape(batch_dim + (6,)) 23 | 24 | def quaternion_to_matrix(quaternions: torch.Tensor) -> torch.Tensor: 25 | """ 26 | Copy from pytorch3d! 27 | Convert rotations given as quaternions to rotation matrices. 28 | 29 | Args: 30 | quaternions: quaternions with real part first, 31 | as tensor of shape (..., 4). 32 | 33 | Returns: 34 | Rotation matrices as tensor of shape (..., 3, 3). 35 | """ 36 | r, i, j, k = torch.unbind(quaternions, -1) 37 | # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. 38 | two_s = 2.0 / (quaternions * quaternions).sum(-1) 39 | 40 | o = torch.stack( 41 | ( 42 | 1 - two_s * (j * j + k * k), 43 | two_s * (i * j - k * r), 44 | two_s * (i * k + j * r), 45 | two_s * (i * j + k * r), 46 | 1 - two_s * (i * i + k * k), 47 | two_s * (j * k - i * r), 48 | two_s * (i * k - j * r), 49 | two_s * (j * k + i * r), 50 | 1 - two_s * (i * i + j * j), 51 | ), 52 | -1, 53 | ) 54 | return o.reshape(quaternions.shape[:-1] + (3, 3)) 55 | -------------------------------------------------------------------------------- /src/toolee/utils/pc2_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yl-wang996/ToolEENet/fd0130efdd03d5fe20b40808b1477fbbc722caf0/src/toolee/utils/pc2_utils.py -------------------------------------------------------------------------------- /src/toolee/utils/tracking_utils.py: -------------------------------------------------------------------------------- 1 | ''' modified from CAPTRA https://github.com/HalfSummer11/CAPTRA/tree/5d7d088c3de49389a90b5fae280e96409e7246c6 ''' 2 | 3 | import torch 4 | import copy 5 | import math 6 | from ipdb import set_trace 7 | 8 | def normalize(q): 9 | assert q.shape[-1] == 4 10 | norm = q.norm(dim=-1, keepdim=True) 11 | return q.div(norm) 12 | 13 | 14 | def matrix_to_unit_quaternion(matrix): 15 | assert matrix.shape[-1] == matrix.shape[-2] == 3 16 | if not isinstance(matrix, torch.Tensor): 17 | matrix = torch.tensor(matrix) 18 | 19 | trace = 1 + matrix[..., 0, 0] + matrix[..., 1, 1] + matrix[..., 2, 2] 20 | trace = torch.clamp(trace, min=0.) 21 | r = torch.sqrt(trace) 22 | s = 1.0 / (2 * r + 1e-7) 23 | w = 0.5 * r 24 | x = (matrix[..., 2, 1] - matrix[..., 1, 2])*s 25 | y = (matrix[..., 0, 2] - matrix[..., 2, 0])*s 26 | z = (matrix[..., 1, 0] - matrix[..., 0, 1])*s 27 | 28 | q = torch.stack((w, x, y, z), dim=-1) 29 | 30 | return normalize(q) 31 | 32 | 33 | def generate_random_quaternion(quaternion_shape): 34 | assert quaternion_shape[-1] == 4 35 | rand_norm = torch.randn(quaternion_shape) 36 | rand_q = normalize(rand_norm) 37 | return rand_q 38 | 39 | 40 | def jitter_quaternion(q, theta): #[Bs, 4], [Bs, 1] 41 | new_q = generate_random_quaternion(q.shape).to(q.device) 42 | dot_product = torch.sum(q*new_q, dim=-1, keepdim=True) # 43 | shape = (tuple(1 for _ in range(len(dot_product.shape) - 1)) + (4, )) 44 | q_orthogonal = normalize(new_q - q * dot_product.repeat(*shape)) 45 | # theta = 2arccos(|p.dot(q)|) 46 | # |p.dot(q)| = cos(theta/2) 47 | tile_theta = theta.repeat(shape) 48 | jittered_q = q*torch.cos(tile_theta/2) + q_orthogonal*torch.sin(tile_theta/2) 49 | 50 | return jittered_q 51 | 52 | 53 | def assert_normalized(q, atol=1e-3): 54 | assert q.shape[-1] == 4 55 | norm = q.norm(dim=-1) 56 | norm_check = (norm - 1.0).abs() 57 | try: 58 | assert torch.max(norm_check) < atol 59 | except: 60 | print("normalization failure: {}.".format(torch.max(norm_check))) 61 | return -1 62 | return 0 63 | 64 | 65 | def unit_quaternion_to_matrix(q): 66 | assert_normalized(q) 67 | w, x, y, z= torch.unbind(q, dim=-1) 68 | matrix = torch.stack(( 1 - 2*y*y - 2*z*z, 2*x*y - 2*z*w, 2*x*z + 2*y* w, 69 | 2*x*y + 2*z*w, 1 - 2*x*x - 2*z*z, 2*y*z - 2*x*w, 70 | 2*x*z - 2*y*w, 2*y*z + 2*x*w, 1 - 2*x*x -2*y*y), 71 | dim=-1) 72 | matrix_shape = list(matrix.shape)[:-1]+[3,3] 73 | return matrix.view(matrix_shape).contiguous() 74 | 75 | 76 | def noisy_rot_matrix(matrix, rad, type='normal'): 77 | if type == 'normal': 78 | theta = torch.abs(torch.randn_like(matrix[..., 0, 0])) * rad 79 | elif type == 'uniform': 80 | theta = torch.rand_like(matrix[..., 0, 0]) * rad 81 | quater = matrix_to_unit_quaternion(matrix) 82 | new_quater = jitter_quaternion(quater, theta.unsqueeze(-1)) 83 | new_mat = unit_quaternion_to_matrix(new_quater) 84 | return new_mat 85 | 86 | 87 | def add_noise_to_RT(RT, type='normal', r=5.0, t=0.03): 88 | rand_type = type # 'uniform' or 'normal' --> we use 'normal' 89 | 90 | def random_tensor(base): 91 | if rand_type == 'uniform': 92 | return torch.rand_like(base) * 2.0 - 1.0 93 | elif rand_type == 'normal': 94 | return torch.randn_like(base) 95 | new_RT = copy.deepcopy(RT) 96 | new_RT[:, :3, :3] = noisy_rot_matrix(RT[:, :3, :3], r/180*math.pi, type=rand_type).reshape(RT[:, :3, :3].shape) 97 | norm = random_tensor(RT[:, 0, 0]) * t # [B, P] 98 | direction = random_tensor(RT[:, :3, 3].squeeze(-1)) # [B, P, 3] 99 | direction = direction / torch.clamp(direction.norm(dim=-1, keepdim=True), min=1e-9) # [B, P, 3] unit vecs 100 | new_RT[:, :3, 3] = RT[:, :3, 3] + (direction * norm.unsqueeze(-1)) # [B, P, 3, 1] 101 | 102 | return new_RT 103 | 104 | -------------------------------------------------------------------------------- /src/toolee/utils/transform_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tf 3 | from tf.transformations import compose_matrix, decompose_matrix 4 | from utils import transforms 5 | import torch 6 | from utils.misc import average_quaternion_batch 7 | 8 | class TfUtils: 9 | 10 | @staticmethod 11 | def random_tf_M(): 12 | trans_offset_limit = 0.2 13 | trans_offset = np.random.uniform(low=-1, high=1, size=(3,)) * trans_offset_limit 14 | angle_offset = np.random.uniform(low=-1, high=1, size=(3,)) * np.pi 15 | rnd_M = TfUtils.pose_to_tf_M( 16 | translate=trans_offset, 17 | angles=angle_offset, 18 | ) 19 | return rnd_M 20 | 21 | @staticmethod 22 | def get_prior_pose(ee_prior_M, obj_M, scale): 23 | # ee_prior_M: the pose based on the obejct pose 24 | ee_trans, ee_quat = TfUtils.decompose_tf_M(ee_prior_M) 25 | ee_M = TfUtils.compose_tf_M(trans=ee_trans*scale, quat=ee_quat) 26 | ee_M = np.dot(obj_M, ee_M) 27 | return ee_M 28 | 29 | @staticmethod 30 | def rot_matrix_to_rotation_6d(rot_matrix) -> np.ndarray: 31 | """ 32 | Converts rotation matrices to 6D rotation representation by Zhou et al. [1] 33 | by dropping the last row. Note that 6D representation is not unique. 34 | Args: 35 | matrix: rotation matrices of size (3, 3) 36 | 37 | Returns: 38 | 6D rotation representation, of size (*, 6) 39 | 40 | [1] Zhou, Y., Barnes, C., Lu, J., Yang, J., & Li, H. 41 | On the Continuity of Rotation Representations in Neural Networks. 42 | IEEE Conference on Computer Vision and Pattern Recognition, 2019. 43 | Retrieved from http://arxiv.org/abs/1812.07035 44 | """ 45 | assert rot_matrix.shape == (3, 3) 46 | return np.reshape(rot_matrix[:2, :], newshape=(6,)) 47 | 48 | @staticmethod 49 | def matrix_to_9d_pose(matrix) -> np.ndarray: 50 | assert matrix.shape == (4, 4) 51 | rot_matrix = matrix[:3, :3] 52 | rot = TfUtils.rot_matrix_to_rotation_6d(rot_matrix) 53 | trans = matrix[:3, 3] 54 | return np.concatenate([rot,trans], axis=0) 55 | 56 | 57 | @staticmethod 58 | def compose_tf_M(trans, angles=None, quat=None,scale=np.array([1,1,1])): 59 | # M = compose_matrix(scale, shear, angles, trans, persp) 60 | # sequence of each transform 61 | # angles: xyz 62 | if angles is None: 63 | angles = TfUtils.quaternion_to_anglexyz(quat) 64 | M = compose_matrix( 65 | scale=np.asarray(scale), 66 | shear=None, 67 | angles=np.asarray(angles), 68 | translate=np.asarray(trans), 69 | perspective=None 70 | ) 71 | return M 72 | 73 | @staticmethod 74 | def pose_to_tf_M(translate, angles=None,quat=None): 75 | # angles here is radian 76 | assert angles is not None or quat is not None, 'either angle or quat must be provide' 77 | if angles is None: 78 | angles = TfUtils.quaternion_to_anglexyz(quat) 79 | M = compose_matrix( 80 | scale=None, 81 | shear=None, 82 | angles=np.asarray(angles), 83 | translate=np.asarray(translate), 84 | perspective=None 85 | ) 86 | return M 87 | 88 | @staticmethod 89 | def tf_M_to_pose(M): 90 | scale, shear, angles, translate, perspective = decompose_matrix(M) 91 | quat = TfUtils.anglexyz_to_quaternion(angles) 92 | return translate, quat 93 | 94 | @staticmethod 95 | def apply_tf_M_to_point(M, point): 96 | return np.dot(M,np.append(point,1))[:-1] 97 | 98 | @staticmethod 99 | def anglexyz_to_quaternion(angles): 100 | # quat: xyzw 101 | return tf.transformations.quaternion_from_euler(angles[0], angles[1], angles[2],axes='sxyz') 102 | 103 | @staticmethod 104 | def quaternion_to_anglexyz(quaternion): 105 | return tf.transformations.euler_from_quaternion(quaternion,axes='sxyz') # return angles in radian 106 | 107 | @staticmethod 108 | def decompose_tf_M(M): 109 | scale, shear, angles, trans, persp = decompose_matrix(M) 110 | quat = TfUtils.anglexyz_to_quaternion(angles) # xyzw 111 | return np.asarray(trans), np.asarray(quat) 112 | 113 | @staticmethod 114 | def concat_tf_M(matrices): 115 | M = np.identity(4) 116 | for i in matrices: 117 | M = np.dot(M, i) 118 | return M 119 | 120 | @staticmethod 121 | def anglexyz_to_tf_M(anglexyz): 122 | return tf.transformations.euler_matrix(anglexyz[0], anglexyz[1], anglexyz[2], axes="sxyz") 123 | 124 | @staticmethod 125 | def tf_M_to_anglexyz(tf_M, axes="sxyz"): 126 | return tf.transformations.euler_from_matrix(tf_M, axes=axes) 127 | 128 | @staticmethod 129 | def get_avg_sRT(selected_sRT): 130 | ins_num = selected_sRT.shape[0] 131 | repeat_num = selected_sRT.shape[1] 132 | reshaped_selected_sRT = selected_sRT.reshape(ins_num * repeat_num, 4, 4) 133 | quat_wxyz = transforms.matrix_to_quaternion(torch.from_numpy(reshaped_selected_sRT[:, :3, :3])).cuda() 134 | quat_wxyz = torch.cat((quat_wxyz, torch.tensor(reshaped_selected_sRT[:, :3, 3]).to(quat_wxyz.device)), dim=-1) 135 | quat_wxyz = quat_wxyz.reshape(ins_num, repeat_num, -1) 136 | 137 | average_pred_pose = torch.zeros((quat_wxyz.shape[0], quat_wxyz.shape[-1])).to(quat_wxyz.device) 138 | average_pred_pose[:, :4] = average_quaternion_batch(quat_wxyz[:, :, :4]) 139 | average_pred_pose[:, 4:] = torch.mean(quat_wxyz[:, :, 4:], dim=1) 140 | average_sRT = np.identity(4)[np.newaxis, ...].repeat(ins_num, 0) 141 | average_sRT[:, :3, :3] = transforms.quaternion_to_matrix(average_pred_pose[:, :4]).cpu().numpy() 142 | average_sRT[:, :3, 3] = average_pred_pose[:, 4:].cpu().numpy() 143 | return average_sRT 144 | 145 | if __name__ == '__main__': 146 | t = [1, 2, 3] 147 | quat = TfUtils.anglexyz_to_quaternion([0, 0, 0]) # xyzw 148 | M = TfUtils.pose_to_tf_M( 149 | translate=t, 150 | quat=quat 151 | ) 152 | --------------------------------------------------------------------------------