├── Method
    ├── gym
    │   ├── __init__.py
    │   └── vlm_utils.py
    ├── position
    │   ├── __init__.py
    │   ├── .gitignore
    │   └── vlm_utils.py
    ├── vision
    │   ├── __init__.py
    │   ├── .gitignore
    │   ├── GroundedSAM
    │   │   ├── segment_anything
    │   │   │   ├── notebooks
    │   │   │   │   └── images
    │   │   │   │   │   ├── dog.jpg
    │   │   │   │   │   ├── truck.jpg
    │   │   │   │   │   └── groceries.jpg
    │   │   │   ├── .flake8
    │   │   │   ├── segment_anything
    │   │   │   │   ├── utils
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── transforms.py
    │   │   │   │   │   └── onnx.py
    │   │   │   │   ├── modeling
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── common.py
    │   │   │   │   │   ├── mask_decoder.py
    │   │   │   │   │   └── sam.py
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── build_sam.py
    │   │   │   │   └── build_sam_hq.py
    │   │   │   ├── setup.cfg
    │   │   │   ├── setup.py
    │   │   │   ├── linter.sh
    │   │   │   ├── CONTRIBUTING.md
    │   │   │   ├── CODE_OF_CONDUCT.md
    │   │   │   ├── README.md
    │   │   │   └── scripts
    │   │   │   │   ├── export_onnx_model.py
    │   │   │   │   └── amg.py
    │   │   ├── .gitmodules
    │   │   ├── requirements.txt
    │   │   ├── .gitignore
    │   │   └── grounded_sam_demo.py
    │   ├── tranformation.py
    │   └── test_sam.py
    ├── isaacgym0
    │   ├── .gitignore
    │   ├── config.yaml
    │   ├── utils.py
    │   └── asset_info.py
    ├── tasks
    ├── mask.png
    ├── test_image.png
    ├── utils
    │   ├── mesh.py
    │   ├── task_stat.py
    │   ├── vlm_utils.py
    │   └── get_assets.py
    ├── run_multiple.py
    ├── method_cfg.yaml
    ├── README.md
    ├── test_gym.py
    └── open6dor_gpt.py
├── assets
    ├── blender
    │   └── .gitignore
    ├── ckpts
    │   └── .gitignore
    ├── .gitignore
    ├── objects
    │   └── .gitignore
    ├── tasks
    │   ├── .gitignore
    │   └── task_refine_6dof_example
    │   │   └── behind
    │   │       └── 20240824-165044_no_interaction
    │   │           ├── isaac_render-rgb-0-0.png
    │   │           ├── isaac_render-rgb-0-1.png
    │   │           ├── isaac_render-rgb-0-2.png
    │   │           ├── isaac_render-rgb-0-3.png
    │   │           ├── isaac_render-rgb-0-4.png
    │   │           ├── gsam-gsam-mask-apple-0.npy
    │   │           ├── gsam-gsam-mask-apple-0.ply
    │   │           ├── gsam-gsam-mask-apple-0.png
    │   │           ├── gsam-gsam-mask-apple-1.npy
    │   │           ├── gsam-gsam-mask-apple-1.ply
    │   │           ├── gsam-gsam-mask-apple-1.png
    │   │           ├── gsam-gsam-mask-bottle-0.npy
    │   │           ├── gsam-gsam-mask-bottle-0.ply
    │   │           ├── gsam-gsam-mask-bottle-0.png
    │   │           ├── gsam-gsam-mask-bottle-1.npy
    │   │           ├── gsam-gsam-mask-bottle-1.ply
    │   │           ├── gsam-gsam-mask-bottle-1.png
    │   │           ├── isaac_render-depth-0-0.npy
    │   │           ├── isaac_render-depth-0-0.png
    │   │           ├── isaac_render-depth-0-1.npy
    │   │           ├── isaac_render-depth-0-1.png
    │   │           ├── isaac_render-depth-0-2.npy
    │   │           ├── isaac_render-depth-0-2.png
    │   │           ├── isaac_render-depth-0-3.npy
    │   │           ├── isaac_render-depth-0-3.png
    │   │           ├── isaac_render-depth-0-4.npy
    │   │           ├── isaac_render-depth-0-4.png
    │   │           ├── task_config_test.json
    │   │           └── task_config_new5.json
    └── robot
    │   └── franka_description
    │       ├── meshes
    │           ├── collision
    │           │   ├── finger.stl
    │           │   ├── hand.stl
    │           │   ├── link0.stl
    │           │   ├── link1.stl
    │           │   ├── link2.stl
    │           │   ├── link3.stl
    │           │   ├── link4.stl
    │           │   ├── link5.stl
    │           │   ├── link6.stl
    │           │   ├── link7.stl
    │           │   ├── stltoobj.bat
    │           │   ├── stltoobj.mlx
    │           │   └── finger.obj
    │           └── visual
    │           │   ├── daetoobj.mlx
    │           │   ├── daetoobj.bat
    │           │   ├── link1.mtl
    │           │   ├── link2.mtl
    │           │   ├── finger.mtl
    │           │   ├── link5.mtl
    │           │   ├── link4.mtl
    │           │   ├── link3.mtl
    │           │   ├── hand.mtl
    │           │   ├── link7.mtl
    │           │   ├── link0.mtl
    │           │   └── link6.mtl
    │       └── robots
    │           └── franka_panda.urdf
├── requirements.txt
├── images
    ├── teaser_final1.jpg
    ├── teaser_final1.pdf
    ├── overall_pipeline_final1.jpg
    └── overall_pipeline_final1.pdf
├── Benchmark
    ├── renderer
    │   ├── texture
    │   │   └── texture0.jpg
    │   └── run_Open6DOR_render.sh
    ├── .gitignore
    ├── bench_config.yaml
    ├── task_examples
    │   ├── rotation
    │   │   └── None
    │   │   │   └── mug_handle_left
    │   │   │       ├── 20240717-075819_no_interaction
    │   │   │           ├── before-rgb-0-0.png
    │   │   │           ├── before-rgb-0-1.png
    │   │   │           ├── before-rgb-0-2.png
    │   │   │           ├── before-rgb-0-3.png
    │   │   │           ├── task_config.json
    │   │   │           └── task_config_new.json
    │   │   │       └── 20240717-075911_no_interaction
    │   │   │           ├── before-rgb-0-0.png
    │   │   │           ├── before-rgb-0-1.png
    │   │   │           ├── before-rgb-0-2.png
    │   │   │           ├── before-rgb-0-3.png
    │   │   │           ├── task_config.json
    │   │   │           └── task_config_new.json
    │   ├── 6DoF
    │   │   └── behind
    │   │   │   └── Place_the_apple_behind_the_box_on_the_table.__upright
    │   │   │       └── 20240704-145831_no_interaction
    │   │   │           ├── before-rgb-0-0.png
    │   │   │           ├── before-rgb-0-1.png
    │   │   │           ├── before-rgb-0-2.png
    │   │   │           ├── before-rgb-0-3.png
    │   │   │           ├── task_config_new.json
    │   │   │           └── task_config.json
    │   └── position
    │   │   └── left
    │   │       └── Place_the_hammer_to_the_left_of_the_USB_on_the_table._
    │   │           ├── 20240717-090658_no_interaction
    │   │               ├── before-rgb-0-0.png
    │   │               ├── before-rgb-0-1.png
    │   │               ├── before-rgb-0-2.png
    │   │               ├── before-rgb-0-3.png
    │   │               └── task_config.json
    │   │           └── 20240717-094704_no_interaction
    │   │               ├── before-rgb-0-0.png
    │   │               ├── before-rgb-0-1.png
    │   │               ├── before-rgb-0-2.png
    │   │               ├── before-rgb-0-3.png
    │   │               └── task_config.json
    ├── benchmark_catalogue
    │   └── error.txt
    ├── dataset
    │   └── objects
    │   │   └── scale.py
    ├── evaluation
    │   └── evaluator.py
    └── bench.py
├── .gitignore
└── README.md


/Method/gym/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Method/position/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Method/vision/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assets/blender/.gitignore:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assets/ckpts/.gitignore:
--------------------------------------------------------------------------------
1 | *pth


--------------------------------------------------------------------------------
/assets/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | isaacgym/*


--------------------------------------------------------------------------------
/Method/position/.gitignore:
--------------------------------------------------------------------------------
1 | openai_api.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | imageio
2 | bpy
3 | scipy
4 | 


--------------------------------------------------------------------------------
/Method/isaacgym0/.gitignore:
--------------------------------------------------------------------------------
1 | *.mp4
2 | *.png
3 | *.jpg


--------------------------------------------------------------------------------
/Method/vision/.gitignore:
--------------------------------------------------------------------------------
1 | outputs/
2 | segment-anything/


--------------------------------------------------------------------------------
/assets/objects/.gitignore:
--------------------------------------------------------------------------------
1 | objaverse_rescale/
2 | ycb_16k_backup/


--------------------------------------------------------------------------------
/Method/tasks:
--------------------------------------------------------------------------------
1 | /home/haoran/Projects/Rearrangement/Open6DOR/Benchmark/tasks


--------------------------------------------------------------------------------
/Method/mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Method/mask.png


--------------------------------------------------------------------------------
/assets/tasks/.gitignore:
--------------------------------------------------------------------------------
1 | task_refine_6dof
2 | task_refine_rot_only
3 | task_refine_pos


--------------------------------------------------------------------------------
/Method/test_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Method/test_image.png


--------------------------------------------------------------------------------
/images/teaser_final1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/images/teaser_final1.jpg


--------------------------------------------------------------------------------
/images/teaser_final1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/images/teaser_final1.pdf


--------------------------------------------------------------------------------
/images/overall_pipeline_final1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/images/overall_pipeline_final1.jpg


--------------------------------------------------------------------------------
/images/overall_pipeline_final1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/images/overall_pipeline_final1.pdf


--------------------------------------------------------------------------------
/Benchmark/renderer/texture/texture0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/renderer/texture/texture0.jpg


--------------------------------------------------------------------------------
/Benchmark/.gitignore:
--------------------------------------------------------------------------------
1 | dataset/objects/*
2 | *run_renderer.sh
3 | *.DS_Store
4 | tasks/
5 | 
6 | *error.txt
7 | evaluation/format.py
8 | 


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/finger.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/finger.stl


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/hand.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/hand.stl


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link0.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link0.stl


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link1.stl


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link2.stl


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link3.stl


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link4.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link4.stl


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link5.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link5.stl


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link6.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link6.stl


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link7.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link7.stl


--------------------------------------------------------------------------------
/Benchmark/bench_config.yaml:
--------------------------------------------------------------------------------
1 | render:
2 |   cam_quaternion: [0.0, 0.0, 0.0, 1.0]
3 |   cam_translation: [0.0, 0.0, 1.0]
4 |   background_material_id: 44
5 |   env_map_id: 25
6 | 


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/daetoobj.mlx:
--------------------------------------------------------------------------------
1 | <!DOCTYPE FilterScript>
2 | <FilterScript>
3 |  <filter name="Convert PerWedge UV into PerVertex UV"/>
4 | </FilterScript>
5 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/notebooks/images/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Method/vision/GroundedSAM/segment_anything/notebooks/images/dog.jpg


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/notebooks/images/truck.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Method/vision/GroundedSAM/segment_anything/notebooks/images/truck.jpg


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/notebooks/images/groceries.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Method/vision/GroundedSAM/segment_anything/notebooks/images/groceries.jpg


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/.gitmodules:
--------------------------------------------------------------------------------
1 | 
2 | [submodule "grounded-sam-osx"]
3 | 	path = grounded-sam-osx
4 | 	url = https://github.com/linjing7/grounded-sam-osx.git
5 | [submodule "VISAM"]
6 | 	path = VISAM
7 | 	url = https://github.com/BingfengYan/VISAM
8 | 


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-0.png


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-1.png


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-2.png


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-3.png


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-4.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-0.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-1.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-2.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-3.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-0.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-1.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-2.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-3.png


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-0.npy


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-0.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-0.ply


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-0.png


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-1.npy


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-1.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-1.ply


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-1.png


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-0.npy


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-0.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-0.ply


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-0.png


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-1.npy


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-1.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-1.ply


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-1.png


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-0.npy


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-0.png


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-1.npy


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-1.png


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-2.npy


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-2.png


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-3.npy


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-3.png


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-4.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-4.npy


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-4.png


--------------------------------------------------------------------------------
/Benchmark/renderer/run_Open6DOR_render.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | mycount=0;
4 | while (( $mycount < 1)); do 
5 |     ./blender-2.93.3-linux-x64/blender material_lib_v2.blend --background --python open6dor_renderer.py -- $mycount;
6 | ((mycount=$mycount+1));
7 | done;
8 | 
9 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = W503, E203, E221, C901, C408, E741, C407, B017, F811, C101, EXE001, EXE002
3 | max-line-length = 100
4 | max-complexity = 18
5 | select = B,C,E,F,W,T4,B9
6 | per-file-ignores =
7 |   **/__init__.py:F401,F403,E402
8 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | 
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/Method/isaacgym0/config.yaml:
--------------------------------------------------------------------------------
 1 | SAVE_VIDEO: True
 2 | SEED: 42
 3 | STEPS: 1000
 4 | num_envs: 256
 5 | controller: ik
 6 | 
 7 | # asset
 8 | asset_root: ../assets
 9 | asset_file: urdf/ycb/025_mug/025_mug_new.urdf
10 | 
11 | # robot
12 | franka_asset_file: urdf/franka_description/robots/franka_panda.urdf


--------------------------------------------------------------------------------
/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-0.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-1.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-2.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-3.png


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/daetoobj.bat:
--------------------------------------------------------------------------------
1 | SET PATH=%PATH%;C:/Tools/Assimp/bin/x64/
2 | forfiles /m *.dae /c "cmd /c assimp export @file @fname.obj --verbose --show-log -ptv"
3 | 
4 | REM SET PATH=%PATH%;C:/Program Files/VCG/MeshLab/
5 | REM forfiles /m *.dae /c "cmd /c meshlabserver -i @file -o @fname.obj -m vn vt


--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-0.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-1.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-2.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-3.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-0.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-1.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-2.png


--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-3.png


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/stltoobj.bat:
--------------------------------------------------------------------------------
1 | REM SET PATH=%PATH%;C:/Tools/Assimp/bin/x64/
2 | REM forfiles /m *.dae /c "cmd /c assimp export @file @fname.obj --verbose --show-log -ptv"
3 | 
4 | SET PATH=%PATH%;C:/Program Files/VCG/MeshLab/
5 | forfiles /m *.stl /c "cmd /c meshlabserver -i @file -o @fname.obj -m vn -s stltoobj.mlx"


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link1.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 1
 3 | 
 4 | newmtl Part__Feature_001
 5 | Ns -1.960784
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 1.000000 1.000000 1.000000
 8 | Ks 0.062500 0.062500 0.062500
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link2.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 1
 3 | 
 4 | newmtl Part__Feature024
 5 | Ns -1.960784
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 1.000000 1.000000 1.000000
 8 | Ks 0.125000 0.125000 0.125000
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/requirements.txt:
--------------------------------------------------------------------------------
 1 | addict
 2 | diffusers
 3 | gradio
 4 | huggingface_hub
 5 | matplotlib
 6 | numpy
 7 | onnxruntime
 8 | opencv_python
 9 | Pillow
10 | pycocotools
11 | PyYAML
12 | requests
13 | setuptools
14 | supervision
15 | termcolor
16 | timm
17 | torch
18 | torchvision
19 | transformers
20 | yapf
21 | nltk
22 | fairscale
23 | litellm
24 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length=100
 3 | multi_line_output=3
 4 | include_trailing_comma=True
 5 | known_standard_library=numpy,setuptools
 6 | skip_glob=*/__init__.py
 7 | known_myself=segment_anything
 8 | known_third_party=matplotlib,cv2,torch,torchvision,pycocotools,onnx,black,isort
 9 | no_lines_before=STDLIB,THIRDPARTY
10 | sections=FUTURE,STDLIB,THIRDPARTY,MYSELF,FIRSTPARTY,LOCALFOLDER
11 | default_section=FIRSTPARTY
12 | 


--------------------------------------------------------------------------------
/Method/utils/mesh.py:
--------------------------------------------------------------------------------
 1 | import trimesh
 2 | 
 3 | # Load a mesh from OBJ file
 4 | mesh = trimesh.load('/home/haoran/Projects/Rearrangement/Open6DOR/Method/assets/objaverse_final_norm/69511a7fad2f42ee8c4b0579bbc8fec6/material.obj')
 5 | 
 6 | # Translate mesh to its centroid
 7 | mesh.apply_translation(-mesh.centroid)
 8 | 
 9 | import pdb; pdb.set_trace()
10 | # Scale the mesh (1 unit here)
11 | scale_factor = 1.0 / mesh.bounding_box.extents.max()
12 | mesh.apply_scale(scale_factor)
13 | 
14 | # save the new mesh to OBJ file
15 | mesh.export('output.obj')


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .sam import Sam
 8 | from .image_encoder import ImageEncoderViT
 9 | from .mask_decoder_hq import MaskDecoderHQ
10 | from .mask_decoder import MaskDecoder
11 | from .prompt_encoder import PromptEncoder
12 | from .transformer import TwoWayTransformer
13 | 


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/finger.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 2
 3 | 
 4 | newmtl Part__Feature001_006
 5 | Ns -1.960784
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.901961 0.921569 0.929412
 8 | Ks 0.250000 0.250000 0.250000
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl Part__Feature_007
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 0.250980 0.250980 0.250980
18 | Ks 0.250000 0.250000 0.250000
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from setuptools import find_packages, setup
 8 | 
 9 | setup(
10 |     name="segment_anything",
11 |     version="1.0",
12 |     install_requires=[],
13 |     packages=find_packages(exclude="notebooks"),
14 |     extras_require={
15 |         "all": ["matplotlib", "pycocotools", "opencv-python", "onnx", "onnxruntime"],
16 |         "dev": ["flake8", "isort", "black", "mypy"],
17 |     },
18 | )
19 | 


--------------------------------------------------------------------------------
/Method/run_multiple.py:
--------------------------------------------------------------------------------
 1 | import sys, os
 2 | import argparse
 3 | 
 4 | # add args
 5 | parser = argparse.ArgumentParser()
 6 | parser.add_argument('--n', type=int, default=100)
 7 | #parser.add_argument('--f', type=str, default="python reconstruction/mesh_reconstruction.py")
 8 | parser.add_argument('--f', type=str, default="python interaction.py --mode gen_task --task_root rot_banch_0717 ")
 9 | # parser.add_argument('--f', type=str, default="python interaction.py --mode gen_task_pure_rot --task_root rot_banch_0717_pure_rot ")
10 | #parser.add_argument('--f', type=str, default="python overall_clip.py")
11 | 
12 | 
13 | 
14 | args = parser.parse_args()
15 | 
16 | for i in range(args.n):
17 |     os.system(args.f)
18 |     


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .build_sam import (
 8 |     build_sam,
 9 |     build_sam_vit_h,
10 |     build_sam_vit_l,
11 |     build_sam_vit_b,
12 |     sam_model_registry,
13 | )
14 | from .build_sam_hq import (
15 |     build_sam_hq,
16 |     build_sam_hq_vit_h,
17 |     build_sam_hq_vit_l,
18 |     build_sam_hq_vit_b,
19 |     sam_hq_model_registry,
20 | )
21 | from .predictor import SamPredictor
22 | from .automatic_mask_generator import SamAutomaticMaskGenerator
23 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/linter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | {
 5 |   black --version | grep -E "23\." > /dev/null
 6 | } || {
 7 |   echo "Linter requires 'black==23.*' !"
 8 |   exit 1
 9 | }
10 | 
11 | ISORT_VERSION=$(isort --version-number)
12 | if [[ "$ISORT_VERSION" != 5.12* ]]; then
13 |   echo "Linter requires isort==5.12.0 !"
14 |   exit 1
15 | fi
16 | 
17 | echo "Running isort ..."
18 | isort . --atomic
19 | 
20 | echo "Running black ..."
21 | black -l 100 .
22 | 
23 | echo "Running flake8 ..."
24 | if [ -x "$(command -v flake8)" ]; then
25 |   flake8 .
26 | else
27 |   python3 -m flake8 .
28 | fi
29 | 
30 | echo "Running mypy..."
31 | 
32 | mypy --exclude 'setup.py|notebooks' .
33 | 


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link5.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 3
 3 | 
 4 | newmtl Part__Feature_002_004_003
 5 | Ns -1.960784
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 1.000000 1.000000 1.000000
 8 | Ks 0.015625 0.015625 0.015625
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl Shell001_001_001_003
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 0.250000 0.250000 0.250000
18 | Ks 0.015625 0.015625 0.015625
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 | 
24 | newmtl Shell_001_001_003
25 | Ns -1.960784
26 | Ka 1.000000 1.000000 1.000000
27 | Kd 1.000000 1.000000 1.000000
28 | Ks 0.015625 0.015625 0.015625
29 | Ke 0.000000 0.000000 0.000000
30 | Ni 1.000000
31 | d 1.000000
32 | illum 2
33 | 


--------------------------------------------------------------------------------
/Method/method_cfg.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE: cuda:0
 2 | INFERENCE_GSAM: True
 3 | SAVE_RENDER: True
 4 | VISUALIZE: True
 5 | 
 6 | position:
 7 | 
 8 | rotation:
 9 | 
10 | vision:
11 |   sam_checkpoint_path: ../assets/ckpts/sam_vit_h_4b8939.pth
12 |   grounded_checkpoint_path: ../assets/ckpts/groundingdino_swint_ogc.pth
13 |   config_path: ./vision/GroundedSAM/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py
14 |   box_threshold: 0.3
15 |   text_threshold: 0.25
16 |   sam_version: vit_h
17 | 
18 | cam:
19 |   vinv: [[ 0.        ,  1.        ,  0.        ,  0.        ],
20 |         [-0.9028605 , -0.        ,  0.42993355, -0.        ],
21 |         [ 0.42993355, -0.        ,  0.9028605 , -0.        ],
22 |         [ 1.        ,  0.        ,  1.2       ,  1.        ]]
23 |   proj: [[ 1.7320507,  0.       ,  0.       ,  0.       ],
24 |        [ 0.       ,  2.5980759,  0.       ,  0.       ],
25 |        [ 0.       ,  0.       ,  0.       , -1.       ],
26 |        [ 0.       ,  0.       ,  0.05     ,  0.       ]]


--------------------------------------------------------------------------------
/Benchmark/benchmark_catalogue/error.txt:
--------------------------------------------------------------------------------
 1 | "ae7142127dd84ebbbe7762368ace452c": { shoe->mug }
 2 | 072-b no upright, wrong category(toy->glue gun)
 3 | 019 trans
 4 | 024 trans
 5 | 040 trans
 6 | 065-a trans
 7 | 065-b trans
 8 | 065-c trans
 9 | 065-d trans
10 | 065-f trans
11 | 065-g trans
12 | 065-j trans
13 | d5a5f0a954f94bcea3168329d1605fe9: shoe->mu
14 | 048 hammer trans
15 | 033 trans
16 | 8a6cb4f7b0004f53830e270dc6e1ff1d handle_left/right xx(no handle)
17 | 025 trans
18 | rewrite "tip_left" and "tip_right"'s prompt
19 | f47fdcf9615d4e94a71e6731242a4c94 wierd mesh
20 | dbb07d13a33546f09ac8ca98b1ddef20 wallet has no clasp (instruction)
21 | 032 trans
22 | d9675ab05c39447baf27e19ea07d484e lighter pointing forth(facing the viewer)-instruction
23 | note! "forth" rotation equivalence
24 | note! "spout left" needs to be upright
25 | note! "cap" forth rotation equivalence
26 | note! "cap" forth rotation equivalence
27 | 022 wierd mesh
28 | 
29 | blender no texture: 9660e0c0326b4f7386014e27717231ae, ycb 04 08 09, 5de830b2cccf4fe7a2e6b400abf26ca7


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link4.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 4
 3 | 
 4 | newmtl Part__Feature001_001_003_001
 5 | Ns -1.960784
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 1.000000 1.000000 1.000000
 8 | Ks 0.007812 0.007812 0.007812
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl Part__Feature002_001_003_001
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 0.250980 0.250980 0.250980
18 | Ks 0.007812 0.007812 0.007812
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 | 
24 | newmtl Part__Feature003_001_003_001
25 | Ns -1.960784
26 | Ka 1.000000 1.000000 1.000000
27 | Kd 1.000000 1.000000 1.000000
28 | Ks 0.007812 0.007812 0.007812
29 | Ke 0.000000 0.000000 0.000000
30 | Ni 1.000000
31 | d 1.000000
32 | illum 2
33 | 
34 | newmtl Part__Feature_002_003_001
35 | Ns -1.960784
36 | Ka 1.000000 1.000000 1.000000
37 | Kd 1.000000 1.000000 1.000000
38 | Ks 0.007812 0.007812 0.007812
39 | Ke 0.000000 0.000000 0.000000
40 | Ni 1.000000
41 | d 1.000000
42 | illum 2
43 | 


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link3.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 4
 3 | 
 4 | newmtl Part__Feature001_010_001_002.001
 5 | Ns -1.960784
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 1.000000 1.000000 1.000000
 8 | Ks 0.007812 0.007812 0.007812
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl Part__Feature002_007_001_002.001
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 1.000000 1.000000 1.000000
18 | Ks 0.007812 0.007812 0.007812
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 | 
24 | newmtl Part__Feature003_004_001_002.001
25 | Ns -1.960784
26 | Ka 1.000000 1.000000 1.000000
27 | Kd 1.000000 1.000000 1.000000
28 | Ks 0.007812 0.007812 0.007812
29 | Ke 0.000000 0.000000 0.000000
30 | Ni 1.000000
31 | d 1.000000
32 | illum 2
33 | 
34 | newmtl Part__Feature_001_001_001_002.001
35 | Ns -1.960784
36 | Ka 1.000000 1.000000 1.000000
37 | Kd 0.250980 0.250980 0.250980
38 | Ks 0.007812 0.007812 0.007812
39 | Ke 0.000000 0.000000 0.000000
40 | Ni 1.000000
41 | d 1.000000
42 | illum 2
43 | 


--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/task_config.json:
--------------------------------------------------------------------------------
1 | {"orientation": "left", "rotation": "None", "selected_obj_names": ["USB", "hammer"], "selected_urdfs": ["objaverse_final_norm/0a51815f3c0941ae8312fc6917173ed6/material_2.urdf", "objaverse_final_norm/8ed38a92668a425eb16da938622d9ace/material_2.urdf"], "target_obj_name": "hammer", "instruction": "Place the hammer to the left of the USB on the table. ", "init_obj_pos": [[0.5523672103881836, -0.1767720878124237, 0.30958184599876404, -0.16768784821033478, -0.42019906640052795, 0.01495102047920227, 0.8916782140731812, 0.00046477484283968806, 0.0010078288614749908, -0.00030404693097807467, -0.10503458976745605, 0.03628098964691162, -0.002049945993348956], [0.5076466798782349, -0.05766259878873825, 0.30820930004119873, -0.5712552666664124, 0.4136405289173126, -0.41678178310394287, 0.5734648108482361, 0.001841548248194158, 0.003947087097913027, 0.005498047918081284, 0.7908462882041931, -0.034841056913137436, 0.027878539636731148]], "position_instruction": "Place the hammer to the left of the USB on the table. "}


--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/task_config.json:
--------------------------------------------------------------------------------
1 | {"orientation": "left", "rotation": "None", "selected_obj_names": ["USB", "hammer"], "selected_urdfs": ["objaverse_final_norm/0a51815f3c0941ae8312fc6917173ed6/material_2.urdf", "objaverse_final_norm/35a76a67ea1c45edabbd5013de70d68d/material_2.urdf"], "target_obj_name": "hammer", "instruction": "Place the hammer to the left of the USB on the table. ", "init_obj_pos": [[0.5709131360054016, 0.2073042243719101, 0.3095809519290924, -0.17370298504829407, -0.4178505837917328, 0.0022908926475793123, 0.8917526602745056, -0.0003591739514376968, 0.0003141180204693228, -0.0003524061758071184, -0.03348350524902344, -0.04323001950979233, -0.00611852714791894], [0.4233412742614746, -0.10578499734401703, 0.32568830251693726, 0.0025873545091599226, 0.0003954840067308396, 0.12344525009393692, 0.9923479557037354, 0.0007402655319310725, -0.003524358617141843, -0.002587254624813795, 0.10105752944946289, 0.06055070459842682, 0.00236650463193655]], "position_instruction": "Place the hammer to the left of the USB on the table. "}


--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/task_config.json:
--------------------------------------------------------------------------------
1 | {"orientation": "None", "rotation": "None", "selected_obj_names": ["mug"], "selected_urdfs": ["objaverse_final_norm/ca4f9a92cc2f4ee98fe9332db41bf7f7/material_2.urdf"], "target_obj_name": "mug", "instruction": "Please pick up the object and place it to specify the rotation of the object after placement:  the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "init_obj_pos": [[0.6550417542457581, 0.05568762868642807, 0.3321579694747925, 0.07643917948007584, 0.21541181206703186, -0.12756481766700745, 0.9651331901550293, -0.004337493795901537, 0.004771982319653034, -0.0002449209277983755, -0.10857345163822174, -0.09869785606861115, -0.002580456668511033]], "position_instruction": "", "rotation_instruction": "Please pick up the object and place it to specify the rotation of the object after placement:  the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "rotation_instruction_label": "handle_left"}


--------------------------------------------------------------------------------
/Method/utils/task_stat.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | 
 3 | paths = glob.glob('Method/output/rot_banch_0704/*/*/*/task_config.json')
 4 | print("total tasks", len(paths))
 5 | position_tags = set([paths[i].split('/')[-4] for i in range(len(paths))])
 6 | print(position_tags)
 7 | for position_tag in position_tags:
 8 |     print(position_tag, len([paths[i] for i in range(len(paths)) if paths[i].split('/')[-4] == position_tag]))
 9 | paths = glob.glob('Method/output/rot_banch_0717/*/*/*/task_config.json')
10 | print("total tasks", len(paths))
11 | position_tags = set([paths[i].split('/')[-4] for i in range(len(paths))])
12 | print(position_tags)
13 | for position_tag in position_tags:
14 |     print(position_tag, len([paths[i] for i in range(len(paths)) if paths[i].split('/')[-4] == position_tag]))
15 | paths = glob.glob('Method/output/rot_banch_0717_pure_rot/*/*/*/task_config.json')
16 | 
17 | print("total tasks", len(paths))
18 | position_tags = set([paths[i].split('/')[-4] for i in range(len(paths))])
19 | print(position_tags)
20 | for position_tag in position_tags:
21 |     print(position_tag, len([paths[i] for i in range(len(paths)) if paths[i].split('/')[-4] == position_tag]))


--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/task_config.json:
--------------------------------------------------------------------------------
1 | {"orientation": "None", "rotation": "None", "selected_obj_names": ["mug"], "selected_urdfs": ["objaverse_final_norm/db9345f568e8499a9eac2577302b5f51/material_2.urdf"], "target_obj_name": "mug", "instruction": "Please pick up the object and place it to specify the rotation of the object after placement:  the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "init_obj_pos": [[0.6686422824859619, 0.11716754734516144, 0.34889549016952515, -0.006926149129867554, 0.25072675943374634, 0.026660921052098274, 0.9676658511161804, -0.001081045251339674, 0.0014700093306601048, -0.0009055532282218337, -0.03115496225655079, -0.024703728035092354, 0.0006507631042040884]], "position_instruction": "", "rotation_instruction": "Please pick up the object and place it to specify the rotation of the object after placement:  the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "rotation_instruction_label": "handle_left"}


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/hand.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 5
 3 | 
 4 | newmtl Part__Feature001_008_005
 5 | Ns -1.960784
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.250980 0.250980 0.250980
 8 | Ks 0.007812 0.007812 0.007812
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl Part__Feature002_005_005
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 0.901961 0.921569 0.929412
18 | Ks 0.015625 0.015625 0.015625
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 | 
24 | newmtl Part__Feature005_001_005
25 | Ns -1.960784
26 | Ka 1.000000 1.000000 1.000000
27 | Kd 1.000000 1.000000 1.000000
28 | Ks 0.015625 0.015625 0.015625
29 | Ke 0.000000 0.000000 0.000000
30 | Ni 1.000000
31 | d 1.000000
32 | illum 2
33 | 
34 | newmtl Part__Feature005_001_005_001
35 | Ns -1.960784
36 | Ka 1.000000 1.000000 1.000000
37 | Kd 0.901961 0.921569 0.929412
38 | Ks 0.015625 0.015625 0.015625
39 | Ke 0.000000 0.000000 0.000000
40 | Ni 1.000000
41 | d 1.000000
42 | illum 2
43 | 
44 | newmtl Part__Feature_009_005
45 | Ns -1.960784
46 | Ka 1.000000 1.000000 1.000000
47 | Kd 0.250980 0.250980 0.250980
48 | Ks 0.015625 0.015625 0.015625
49 | Ke 0.000000 0.000000 0.000000
50 | Ni 1.000000
51 | d 1.000000
52 | illum 2
53 | 


--------------------------------------------------------------------------------
/Method/vision/tranformation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import json
 3 | 
 4 | 
 5 | 
 6 | def quaternion_to_matrix(q):
 7 |     """
 8 |     Convert a quaternion into a 3x3 rotation matrix.
 9 |     """
10 |     qw, qx, qy, qz = q
11 |     return np.array([
12 |         [1 - 2*qy*qy - 2*qz*qz, 2*qx*qy - 2*qz*qw, 2*qx*qz + 2*qy*qw],
13 |         [2*qx*qy + 2*qz*qw, 1 - 2*qx*qx - 2*qz*qz, 2*qy*qz - 2*qx*qw],
14 |         [2*qx*qz - 2*qy*qw, 2*qy*qz + 2*qx*qw, 1 - 2*qx*qx - 2*qy*qy]
15 |     ])
16 | 
17 | def create_transformation_matrix(position, quaternion):
18 |     """
19 |     Create a 4x4 transformation matrix from position and quaternion.
20 |     """
21 |     x, y, z = position
22 |     q = quaternion
23 |     
24 |     rotation_matrix = quaternion_to_matrix(q)
25 |     
26 |     transformation_matrix = np.identity(4)
27 |     transformation_matrix[:3, :3] = rotation_matrix
28 |     transformation_matrix[:3, 3] = [x, y, z]
29 |     
30 |     return transformation_matrix
31 | 
32 | config_path = "output/gym_outputs_task_gen_obja_0304_rot/center/Place_the_mouse_at_the_center_of_all_the_objects_on_the_table.__upright/20240630-202931_no_interaction/task_config.json"
33 | 
34 | config = json.load(open(config_path, "r"))
35 | pos_s = config["init_obj_pos"]
36 | for pos in pos_s:
37 |     position = pos[:3]
38 |     quaternion = pos[3:7]  # Example quaternion
39 |     transformation_matrix = create_transformation_matrix(position, quaternion)
40 | 
41 |     print(transformation_matrix)
42 | 


--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/task_config_new.json:
--------------------------------------------------------------------------------
1 | {"orientation": "None", "rotation": "None", "selected_obj_names": ["mug"], "selected_urdfs": ["objaverse_final_norm/ca4f9a92cc2f4ee98fe9332db41bf7f7/material_2.urdf"], "target_obj_name": "mug", "instruction": "Please pick up the object and place it to specify the rotation of the object after placement:  the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "init_obj_pos": [[0.6550417542457581, 0.05568762868642807, 0.3321579694747925, 0.07643917948007584, 0.21541181206703186, -0.12756481766700745, 0.9651331901550293, -0.004337493795901537, 0.004771982319653034, -0.0002449209277983755, -0.10857345163822174, -0.09869785606861115, -0.002580456668511033]], "position_instruction": "", "rotation_instruction": "Please pick up the object and place it to specify the rotation of the object after placement:  the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "rotation_instruction_label": "handle_left", "obj_codes": ["ca4f9a92cc2f4ee98fe9332db41bf7f7"], "target_obj_code": "ca4f9a92cc2f4ee98fe9332db41bf7f7", "anno_target": {"category": "mug", "annotation": {" the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).": {"quat": [[0.5, -0.5, -0.5, 0.4999999701976776]], "stage": 1}}}}


--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/task_config_new.json:
--------------------------------------------------------------------------------
1 | {"orientation": "None", "rotation": "None", "selected_obj_names": ["mug"], "selected_urdfs": ["objaverse_final_norm/db9345f568e8499a9eac2577302b5f51/material_2.urdf"], "target_obj_name": "mug", "instruction": "Please pick up the object and place it to specify the rotation of the object after placement:  the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "init_obj_pos": [[0.6686422824859619, 0.11716754734516144, 0.34889549016952515, -0.006926149129867554, 0.25072675943374634, 0.026660921052098274, 0.9676658511161804, -0.001081045251339674, 0.0014700093306601048, -0.0009055532282218337, -0.03115496225655079, -0.024703728035092354, 0.0006507631042040884]], "position_instruction": "", "rotation_instruction": "Please pick up the object and place it to specify the rotation of the object after placement:  the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "rotation_instruction_label": "handle_left", "obj_codes": ["db9345f568e8499a9eac2577302b5f51"], "target_obj_code": "db9345f568e8499a9eac2577302b5f51", "anno_target": {"category": "mug", "annotation": {" the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).": {"quat": [[0.5, -0.5, -0.5, 0.4999999701976776]], "stage": 1}}}}


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to segment-anything
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Pull Requests
 6 | We actively welcome your pull requests.
 7 | 
 8 | 1. Fork the repo and create your branch from `main`.
 9 | 2. If you've added code that should be tested, add tests.
10 | 3. If you've changed APIs, update the documentation.
11 | 4. Ensure the test suite passes.
12 | 5. Make sure your code lints, using the `linter.sh` script in the project's root directory. Linting requires `black==23.*`, `isort==5.12.0`, `flake8`, and `mypy`.
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 | 
15 | ## Contributor License Agreement ("CLA")
16 | In order to accept your pull request, we need you to submit a CLA. You only need
17 | to do this once to work on any of Facebook's open source projects.
18 | 
19 | Complete your CLA here: <https://code.facebook.com/cla>
20 | 
21 | ## Issues
22 | We use GitHub issues to track public bugs. Please ensure your description is
23 | clear and has sufficient instructions to be able to reproduce the issue.
24 | 
25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
26 | disclosure of security bugs. In those cases, please go through the process
27 | outlined on that page and do not file a public issue.
28 | 
29 | ## License
30 | By contributing to segment-anything, you agree that your contributions will be licensed
31 | under the LICENSE file in the root directory of this source tree.
32 | 


--------------------------------------------------------------------------------
/Method/position/vlm_utils.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import requests
 3 | 
 4 | # OpenAI API Key
 5 | import os
 6 | API_KEY = os.getenv("API_KEY")
 7 | if API_KEY is None:
 8 |     raise ValueError("please set API_KEY environment variable by running `export API_KEY=XXXX`")
 9 | # Function to encode the image
10 | def encode_image(image_path):
11 |   with open(image_path, "rb") as image_file:
12 |     return base64.b64encode(image_file.read()).decode('utf-8')
13 | 
14 | def infer_path(prompt, path):
15 |     # Getting the base64 string
16 |     base64_image = encode_image(path)
17 | 
18 |     headers = {
19 |     "Content-Type": "application/json",
20 |     "Authorization": f"Bearer {API_KEY}"
21 |     }
22 | 
23 |     payload = {
24 |     "model": "gpt-4o",
25 |     "messages": [
26 |         {
27 |         "role": "user",
28 |         "content": [
29 |             {
30 |             "type": "text",
31 |             "text": prompt
32 |             },
33 |             {
34 |             "type": "image_url",
35 |             "image_url": {
36 |                 "url": f"data:image/jpeg;base64,{base64_image}"
37 |             }
38 |             }
39 |         ]
40 |         }
41 |     ],
42 |     "max_tokens": 300
43 |     }
44 | 
45 |     response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
46 | 
47 |     # print(response.json())
48 |     return response
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     prompt = "descripbe this image"
53 |     path = "./vision/1.jpg"
54 |     response = infer_path(prompt, path)
55 |     print(response.json())
56 | 


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/stltoobj.mlx:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE FilterScript>
 2 | <FilterScript>
 3 |  <filter name="Remove Duplicate Vertices"/>
 4 |  <filter name="Split Vertexes Incident on Non Manifold Faces">
 5 |   <Param tooltip="When a vertex is split it is moved along the average vector going from its position to the baricyenter of the FF connected faces sharing it" value="0" type="RichFloat" name="VertDispRatio" description="Vertex Displacement Ratio"/>
 6 |  </filter>
 7 |  <filter name="Select non Manifold Vertices"/>
 8 |  <filter name="Remove Faces from Non Manifold Edges"/>
 9 |  <filter name="Split Vertexes Incident on Non Manifold Faces">
10 |   <Param tooltip="When a vertex is split it is moved along the average vector going from its position to the baricyenter of the FF connected faces sharing it" value="0" type="RichFloat" name="VertDispRatio" description="Vertex Displacement Ratio"/>
11 |  </filter>
12 |  <filter name="Select non Manifold Vertices"/>
13 |  <filter name="Delete Selected Vertices"/>
14 |  <filter name="Cut mesh along crease edges">
15 |   <Param tooltip="If the angle between the normals of two adjacent faces is &lt;b>larger&lt;/b> that this threshold the edge is considered a creased and the mesh is cut along it." value="20" type="RichFloat" name="angleDeg" description="Crease Angle (degree)"/>
16 |  </filter>
17 |  <filter name="Re-Compute Vertex Normals">
18 |   <Param tooltip="" value="0" type="RichEnum" name="weightMode" enum_val1="By Angle" enum_cardinality="4" enum_val2="By Area" enum_val3="As defined by N. Max" description="Weighting Mode:" enum_val0="None (avg)"/>
19 |  </filter>
20 | </FilterScript>
21 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/modeling/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | from typing import Type
11 | 
12 | 
13 | class MLPBlock(nn.Module):
14 |     def __init__(
15 |         self,
16 |         embedding_dim: int,
17 |         mlp_dim: int,
18 |         act: Type[nn.Module] = nn.GELU,
19 |     ) -> None:
20 |         super().__init__()
21 |         self.lin1 = nn.Linear(embedding_dim, mlp_dim)
22 |         self.lin2 = nn.Linear(mlp_dim, embedding_dim)
23 |         self.act = act()
24 | 
25 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
26 |         return self.lin2(self.act(self.lin1(x)))
27 | 
28 | 
29 | # From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa
30 | # Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119  # noqa
31 | class LayerNorm2d(nn.Module):
32 |     def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
33 |         super().__init__()
34 |         self.weight = nn.Parameter(torch.ones(num_channels))
35 |         self.bias = nn.Parameter(torch.zeros(num_channels))
36 |         self.eps = eps
37 | 
38 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
39 |         u = x.mean(1, keepdim=True)
40 |         s = (x - u).pow(2).mean(1, keepdim=True)
41 |         x = (x - u) / torch.sqrt(s + self.eps)
42 |         x = self.weight[:, None, None] * x + self.bias[:, None, None]
43 |         return x
44 | 


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/task_config_test.json:
--------------------------------------------------------------------------------
1 | {"position_tag": "behind", "rotation": "None", "selected_obj_names": ["bottle", "tissue box", "apple"], "selected_urdfs": ["ycb_16k_backup/006_mustard_bottle_google_16k/006_mustard_bottle_google_16k.urdf", "objaverse_rescale/dc4c91abf45342b4bb8822f50fa162b2/material_2.urdf", "objaverse_rescale/fbda0b25f41f40958ea984f460e4770b/material_2.urdf"], "target_obj_name": "apple", "instruction": "Place the apple behind the bottle on the table. We also need to specify the rotation of the object after placement:  the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.", "init_obj_pos": [[0.3738532066345215, 0.17327244579792023, 0.30287155508995056, 5.603695899480954e-05, -3.935253698728047e-05, -0.03753087669610977, 0.9992955327033997, 0.0029977706726640463, 0.001985779032111168, -0.0012033769162371755, -0.03269371762871742, 0.04539608955383301, -0.03798031061887741], [0.44172099232673645, -0.32238009572029114, 0.3753003478050232, 0.7060639262199402, -0.037992026656866074, -0.037284620106220245, 0.7061444520950317, -0.00012565749057102948, 0.0002828052965924144, 0.00027510791551321745, -0.005133399739861488, -0.002302509034052491, 0.0013929366832599044], [0.5476588606834412, -0.07213786244392395, 0.3492436110973358, 0.11362186074256897, 0.05067095533013344, -0.08851055055856705, 0.9882755279541016, 0.004178161732852459, -0.00013288251648191363, -0.000834679405670613, 0.0010649901814758778, 0.08433020859956741, -0.0004798930021934211]], "position_instruction": "Place the apple behind the bottle on the table. We also need to specify the rotation of the object after placement:  the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up."}


--------------------------------------------------------------------------------
/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/task_config_new.json:
--------------------------------------------------------------------------------
1 | {"orientation": "behind", "rotation": "None", "selected_obj_names": ["box", "apple"], "selected_urdfs": ["objaverse_final_norm/9660e0c0326b4f7386014e27717231ae/material_2.urdf", "objaverse_final_norm/f53d75bd123b40bca14d12d54286f432/material_2.urdf"], "target_obj_name": "apple", "instruction": "Place the apple behind the box on the table. We also need to specify the rotation of the object after placement:  the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.", "init_obj_pos": [[0.5763212442398071, 0.24244019389152527, 0.3158315122127533, 0.00011814905155915767, 3.0217168387025595e-05, 0.057858873158693314, 0.9983247518539429, 0.0005872970796190202, 0.00024345181009266526, 1.8670303688850254e-05, 0.0013161733513697982, -0.0011025663698092103, -0.001989496871829033], [0.4732729494571686, 0.19301258027553558, 0.34965574741363525, 0.08372167497873306, -0.015573234297335148, -0.0979083776473999, 0.9915453195571899, 0.004182836972177029, 0.0017127282917499542, -0.001595060108229518, -0.02539602667093277, 0.09032362699508667, 0.01703813299536705]], "position_instruction": "Place the apple behind the box on the table. ", "rotation_instruction": "We also need to specify the rotation of the object after placement:  the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.", "rotation_instruction_label": "upright", "obj_codes": ["9660e0c0326b4f7386014e27717231ae", "f53d75bd123b40bca14d12d54286f432"], "target_obj_code": "f53d75bd123b40bca14d12d54286f432", "anno_target": {"category": "apple", "annotation": {" the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.": {"quat": [[0.7071067690849304, 0.0, 0.0, 0.7071067690849304]], "stage": 1}}}}


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link7.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 8
 3 | 
 4 | newmtl Part__Mirroring001_004_002
 5 | Ns -1.960784
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.250980 0.250980 0.250980
 8 | Ks 0.015625 0.015625 0.015625
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl Part__Mirroring002_004_001
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 0.250980 0.250980 0.250980
18 | Ks 0.031250 0.031250 0.031250
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 | 
24 | newmtl Part__Mirroring003_004_001
25 | Ns -1.960784
26 | Ka 1.000000 1.000000 1.000000
27 | Kd 0.250980 0.250980 0.250980
28 | Ks 0.031250 0.031250 0.031250
29 | Ke 0.000000 0.000000 0.000000
30 | Ni 1.000000
31 | d 1.000000
32 | illum 2
33 | 
34 | newmtl Part__Mirroring004_004_002
35 | Ns -1.960784
36 | Ka 1.000000 1.000000 1.000000
37 | Kd 1.000000 1.000000 1.000000
38 | Ks 0.031250 0.031250 0.031250
39 | Ke 0.000000 0.000000 0.000000
40 | Ni 1.000000
41 | d 1.000000
42 | illum 2
43 | 
44 | newmtl Part__Mirroring005_004_001
45 | Ns -1.960784
46 | Ka 1.000000 1.000000 1.000000
47 | Kd 0.250980 0.250980 0.250980
48 | Ks 0.031250 0.031250 0.031250
49 | Ke 0.000000 0.000000 0.000000
50 | Ni 1.000000
51 | d 1.000000
52 | illum 2
53 | 
54 | newmtl Part__Mirroring006_004_001
55 | Ns -1.960784
56 | Ka 1.000000 1.000000 1.000000
57 | Kd 0.250980 0.250980 0.250980
58 | Ks 0.031250 0.031250 0.031250
59 | Ke 0.000000 0.000000 0.000000
60 | Ni 1.000000
61 | d 1.000000
62 | illum 2
63 | 
64 | newmtl Part__Mirroring007_004_001
65 | Ns -1.960784
66 | Ka 1.000000 1.000000 1.000000
67 | Kd 0.250980 0.250980 0.250980
68 | Ks 0.031250 0.031250 0.031250
69 | Ke 0.000000 0.000000 0.000000
70 | Ni 1.000000
71 | d 1.000000
72 | illum 2
73 | 
74 | newmtl Part__Mirroring_004_001
75 | Ns -1.960784
76 | Ka 1.000000 1.000000 1.000000
77 | Kd 0.898039 0.917647 0.929412
78 | Ks 0.031250 0.031250 0.031250
79 | Ke 0.000000 0.000000 0.000000
80 | Ni 1.000000
81 | d 1.000000
82 | illum 2
83 | 


--------------------------------------------------------------------------------
/Method/utils/vlm_utils.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import requests
 3 | 
 4 | # OpenAI API Key
 5 | api_key = None
 6 | # Function to encode the image
 7 | def encode_image(image_path):
 8 |   with open(image_path, "rb") as image_file:
 9 |     return base64.b64encode(image_file.read()).decode('utf-8')
10 | 
11 | def infer_path(prompt, path):
12 |     # Getting the base64 string
13 |     base64_image = encode_image(path)
14 | 
15 |     headers = {
16 |     "Content-Type": "application/json",
17 |     "Authorization": f"Bearer {api_key}"
18 |     }
19 | 
20 |     payload = {
21 |     "model": "gpt-4-vision-preview",
22 |     "messages": [
23 |         {
24 |         "role": "user",
25 |         "content": [
26 |             {
27 |             "type": "text",
28 |             "text": prompt
29 |             },
30 |             {
31 |             "type": "image_url",
32 |             "image_url": {
33 |                 "url": f"data:image/jpeg;base64,{base64_image}"
34 |             }
35 |             }
36 |         ]
37 |         }
38 |     ],
39 |     "max_tokens": 300
40 |     }
41 | 
42 |     response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
43 | 
44 |     # print(response.json())
45 |     return response
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     prompt = "descripbe this image"
50 |     path = "imgs/bana_cup_gsam_cup.jpg"
51 |     response = infer_path(prompt, path)
52 |     print(response.json())
53 | # prompt_path = "pure_prompt.txt"
54 | # import os
55 | # os.makedirs("GPT4V-pure", exist_ok=True)
56 | # import glob, json, os
57 | # paths = glob.glob("result/*.png")
58 | # prompt_ori = open(prompt_path, "r").read()
59 | # total = len(paths)
60 | # for i, path in enumerate(paths):
61 | #     name = path.split("/")[-1].split(".")[0]
62 | #     print(name, i , total)
63 | #     save_path = f"GPT4V-pure/{name}_pure.json"
64 | #     if os.path.exists(save_path):
65 | #         continue
66 | #     # prompt = prompt_ori + open(f"pure_GAPartNet/{name}_pure_GAPartNet.txt", "r").read()
67 | #     prompt = prompt_ori
68 | #     response = infer_path(prompt, path)
69 | #     json.dump(response.json(), open(save_path, "w"))
70 | #     # import pdb; pdb.set_trace()


--------------------------------------------------------------------------------
/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/task_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "orientation": "behind",
 3 |     "rotation": "None",
 4 |     "selected_obj_names": [
 5 |         "box",
 6 |         "apple"
 7 |     ],
 8 |     "selected_urdfs": [
 9 |         "objaverse_final_norm/9660e0c0326b4f7386014e27717231ae/material_2.urdf",
10 |         "objaverse_final_norm/f53d75bd123b40bca14d12d54286f432/material_2.urdf"
11 |     ],
12 |     "target_obj_name": "apple",
13 |     "instruction": "Place the apple behind the box on the table. We also need to specify the rotation of the object after placement:  the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.",
14 |     "init_obj_pos": [
15 |         [
16 |             0.5763212442398071,
17 |             0.24244019389152527,
18 |             0.3158315122127533,
19 |             0.00011814905155915767,
20 |             3.0217168387025595e-05,
21 |             0.057858873158693314,
22 |             0.9983247518539429,
23 |             0.0005872970796190202,
24 |             0.00024345181009266526,
25 |             1.8670303688850254e-05,
26 |             0.0013161733513697982,
27 |             -0.0011025663698092103,
28 |             -0.001989496871829033
29 |         ],
30 |         [
31 |             0.4732729494571686,
32 |             0.19301258027553558,
33 |             0.34965574741363525,
34 |             0.08372167497873306,
35 |             -0.015573234297335148,
36 |             -0.0979083776473999,
37 |             0.9915453195571899,
38 |             0.004182836972177029,
39 |             0.0017127282917499542,
40 |             -0.001595060108229518,
41 |             -0.02539602667093277,
42 |             0.09032362699508667,
43 |             0.01703813299536705
44 |         ]
45 |     ],
46 |     "position_instruction": "Place the apple behind the box on the table. ",
47 |     "rotation_instruction": "We also need to specify the rotation of the object after placement:  the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.",
48 |     "rotation_instruction_label": "upright"
49 | }


--------------------------------------------------------------------------------
/Method/isaacgym0/utils.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import os, glob
 4 | import argparse
 5 | import imageio
 6 | from PIL import Image
 7 | from isaacgym.torch_utils import *
 8 | import torch
 9 | import math
10 | import yaml
11 | 
12 | def images_to_video(image_folder, video_path, frame_size=(1920, 1080), fps=30):
13 |     images = sorted([img for img in os.listdir(image_folder) if img.endswith(".png") or img.endswith(".jpg") or img.endswith(".jpeg")])
14 | 
15 |     if not images:
16 |         print("No images found in the specified directory!")
17 |         return
18 |     
19 |     writer = imageio.get_writer(video_path, fps=fps)
20 |     
21 |     for image in images:
22 |         img_path = os.path.join(image_folder, image)
23 |         img = imageio.imread(img_path)
24 | 
25 |         if img.shape[1] > frame_size[0] or img.shape[0] > frame_size[1]:
26 |             print("Warning: frame size is smaller than the one of the images.")
27 |             print("Images will be resized to match frame size.")
28 |             img = np.array(Image.fromarray(img).resize(frame_size))
29 |         
30 |         writer.append_data(img)
31 |     
32 |     writer.close()
33 |     print("Video created successfully!")
34 |     
35 | def quat_axis(q, axis=0):
36 |     basis_vec = torch.zeros(q.shape[0], 3, device=q.device)
37 |     basis_vec[:, axis] = 1
38 |     return quat_rotate(q, basis_vec)
39 | 
40 | 
41 | def orientation_error(desired, current):
42 |     cc = quat_conjugate(current)
43 |     q_r = quat_mul(desired, cc)
44 |     return q_r[:, 0:3] * torch.sign(q_r[:, 3]).unsqueeze(-1)
45 | 
46 | 
47 | def cube_grasping_yaw(q, corners):
48 |     """ returns horizontal rotation required to grasp cube """
49 |     rc = quat_rotate(q, corners)
50 |     yaw = (torch.atan2(rc[:, 1], rc[:, 0]) - 0.25 * math.pi) % (0.5 * math.pi)
51 |     theta = 0.5 * yaw
52 |     w = theta.cos()
53 |     x = torch.zeros_like(w)
54 |     y = torch.zeros_like(w)
55 |     z = theta.sin()
56 |     yaw_quats = torch.stack([x, y, z, w], dim=-1)
57 |     return yaw_quats
58 | 
59 | def read_yaml_config(file_path):
60 |     with open(file_path, 'r') as file:
61 |         # Load the YAML file into a Python dictionary
62 |         config = yaml.safe_load(file)
63 |     return config


--------------------------------------------------------------------------------
/Benchmark/dataset/objects/scale.py:
--------------------------------------------------------------------------------
 1 | import trimesh
 2 | import os
 3 | import json
 4 | import math
 5 | 
 6 | 
 7 | mesh_path = '/Users/selina/Desktop/projects/ObjectPlacement/assets/mesh/final_norm'
 8 | category_path = '/Users/selina/Desktop/projects/Open6DOR/Benchmark/benchmark_catalogue/category_dictionary.json'
 9 | object_path = '/Users/selina/Desktop/projects/Open6DOR/Benchmark/benchmark_catalogue/object_dictionary_complete_0702.json'
10 | new_path = "/Users/selina/Desktop/projects/Open6DOR/Benchmark/dataset/objects/rescale"
11 | 
12 | category_dict = json.load(open(category_path, 'r'))
13 | object_dict = json.load(open(object_path, 'r'))
14 | for root, dirs, files in os.walk(mesh_path):
15 |     for dir in dirs:
16 |         try:
17 |             obj_dir = os.path.join(root, dir)
18 |             obj_name = dir
19 |             if obj_name not in object_dict:
20 |                 continue
21 |             obj_cat = object_dict[obj_name]['category']
22 |             obj_scale = category_dict[obj_cat]['scale']
23 |             obj_mesh = trimesh.load(os.path.join(mesh_path, dir) + '/material.obj')
24 |             
25 |             obj_mesh.apply_translation(-obj_mesh.centroid)
26 |         
27 |             if obj_mesh.bounding_box.extents.max() < 0.1:
28 |                 print(f"Object {obj_name} is too small")
29 |                 continue
30 |             scale_factor = 0.7 * math.sqrt(obj_scale) / obj_mesh.bounding_box.extents.max()
31 | 
32 |             obj_mesh.apply_scale(scale_factor)
33 |             if not os.path.exists(os.path.join(new_path, dir)):
34 |                 os.makedirs(os.path.join(new_path, dir), exist_ok=False)
35 |                 obj_mesh.export(os.path.join(new_path, dir) + '/material.obj')
36 |         except:
37 |             import pdb; pdb.set_trace()
38 |        
39 |         
40 |     break
41 | 
42 | # # Load a mesh from OBJ file
43 | # mesh = trimesh.load('/Users/selina/Desktop/projects/Open6DOR/Benchmark/dataset/objects/rescale/c61227cac7224b86b43c53ac2a2b6ec7/material.obj')
44 | 
45 | # # Translate mesh to its centroid
46 | # mesh.apply_translation(-mesh.centroid)
47 | 
48 | # # Scale the mesh (1 unit here)
49 | # # scale_factor = 1.0 / mesh.bounding_box.extents.max()
50 | # print(mesh.bounding_box.extents.max())
51 | # # mesh.apply_scale(scale_factor)
52 | 
53 | # # # save the new mesh to OBJ file
54 | # # mesh.export('2ab18cb4ec8f4a1f8dec637602362054.obj')


--------------------------------------------------------------------------------
/Method/gym/vlm_utils.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import requests
 3 | 
 4 | # OpenAI API Key
 5 | # api_key = "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"
 6 | api_key = "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"
 7 | api_key = "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"
 8 | api_key = "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"
 9 | # Function to encode the image
10 | def encode_image(image_path):
11 |   with open(image_path, "rb") as image_file:
12 |     return base64.b64encode(image_file.read()).decode('utf-8')
13 | 
14 | def infer_path(prompt, path):
15 |     # Getting the base64 string
16 |     base64_image = encode_image(path)
17 | 
18 |     headers = {
19 |     "Content-Type": "application/json",
20 |     "Authorization": f"Bearer {api_key}"
21 |     }
22 | 
23 |     payload = {
24 |     "model": "gpt-4-vision-preview",
25 |     "messages": [
26 |         {
27 |         "role": "user",
28 |         "content": [
29 |             {
30 |             "type": "text",
31 |             "text": prompt
32 |             },
33 |             {
34 |             "type": "image_url",
35 |             "image_url": {
36 |                 "url": f"data:image/jpeg;base64,{base64_image}"
37 |             }
38 |             }
39 |         ]
40 |         }
41 |     ],
42 |     "max_tokens": 300
43 |     }
44 | 
45 |     response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
46 | 
47 |     # print(response.json())
48 |     return response
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     prompt = "descripbe this image"
53 |     path = "imgs/bana_cup_gsam_cup.jpg"
54 |     response = infer_path(prompt, path)
55 |     print(response.json())
56 | # prompt_path = "pure_prompt.txt"
57 | # import os
58 | # os.makedirs("GPT4V-pure", exist_ok=True)
59 | # import glob, json, os
60 | # paths = glob.glob("result/*.png")
61 | # prompt_ori = open(prompt_path, "r").read()
62 | # total = len(paths)
63 | # for i, path in enumerate(paths):
64 | #     name = path.split("/")[-1].split(".")[0]
65 | #     print(name, i , total)
66 | #     save_path = f"GPT4V-pure/{name}_pure.json"
67 | #     if os.path.exists(save_path):
68 | #         continue
69 | #     # prompt = prompt_ori + open(f"pure_GAPartNet/{name}_pure_GAPartNet.txt", "r").read()
70 | #     prompt = prompt_ori
71 | #     response = infer_path(prompt, path)
72 | #     json.dump(response.json(), open(save_path, "w"))
73 | #     # import pdb; pdb.set_trace()


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/.gitignore:
--------------------------------------------------------------------------------
  1 | old/
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | pip-wheel-metadata/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # pipenv
 90 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 91 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 92 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 93 | #   install all needed dependencies.
 94 | #Pipfile.lock
 95 | 
 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 97 | __pypackages__/
 98 | 
 99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 | 
103 | # SageMath parsed files
104 | *.sage.py
105 | 
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 | 
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 | 
119 | # Rope project settings
120 | .ropeproject
121 | 
122 | # mkdocs documentation
123 | /site
124 | 
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 | 
130 | # Pyre type checker
131 | .pyre/
132 | 
133 | # checkpoint
134 | *.pth
135 | outputs/
136 | 
137 | .idea/
138 | 


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link0.mtl:
--------------------------------------------------------------------------------
  1 | # Blender MTL File: 'None'
  2 | # Material Count: 12
  3 | 
  4 | newmtl Face636_001
  5 | Ns -1.960784
  6 | Ka 1.000000 1.000000 1.000000
  7 | Kd 0.901961 0.921569 0.929412
  8 | Ks 0.125000 0.125000 0.125000
  9 | Ke 0.000000 0.000000 0.000000
 10 | Ni 1.000000
 11 | d 1.000000
 12 | illum 2
 13 | 
 14 | newmtl Part__Feature017_001
 15 | Ns -1.960784
 16 | Ka 1.000000 1.000000 1.000000
 17 | Kd 1.000000 1.000000 1.000000
 18 | Ks 0.500000 0.500000 0.500000
 19 | Ke 0.000000 0.000000 0.000000
 20 | Ni 1.000000
 21 | d 1.000000
 22 | illum 2
 23 | 
 24 | newmtl Part__Feature018_001
 25 | Ns -1.960784
 26 | Ka 1.000000 1.000000 1.000000
 27 | Kd 1.000000 1.000000 1.000000
 28 | Ks 0.500000 0.500000 0.500000
 29 | Ke 0.000000 0.000000 0.000000
 30 | Ni 1.000000
 31 | d 1.000000
 32 | illum 2
 33 | 
 34 | newmtl Part__Feature019_001
 35 | Ns -1.960784
 36 | Ka 1.000000 1.000000 1.000000
 37 | Kd 1.000000 1.000000 1.000000
 38 | Ks 0.125000 0.125000 0.125000
 39 | Ke 0.000000 0.000000 0.000000
 40 | Ni 1.000000
 41 | d 1.000000
 42 | illum 2
 43 | 
 44 | newmtl Part__Feature022_001
 45 | Ns -1.960784
 46 | Ka 1.000000 1.000000 1.000000
 47 | Kd 0.901961 0.921569 0.929412
 48 | Ks 0.125000 0.125000 0.125000
 49 | Ke 0.000000 0.000000 0.000000
 50 | Ni 1.000000
 51 | d 1.000000
 52 | illum 2
 53 | 
 54 | newmtl Part__Feature023_001
 55 | Ns -1.960784
 56 | Ka 1.000000 1.000000 1.000000
 57 | Kd 0.250980 0.250980 0.250980
 58 | Ks 0.125000 0.125000 0.125000
 59 | Ke 0.000000 0.000000 0.000000
 60 | Ni 1.000000
 61 | d 1.000000
 62 | illum 2
 63 | 
 64 | newmtl Shell001_001
 65 | Ns -1.960784
 66 | Ka 1.000000 1.000000 1.000000
 67 | Kd 0.250980 0.250980 0.250980
 68 | Ks 0.125000 0.125000 0.125000
 69 | Ke 0.000000 0.000000 0.000000
 70 | Ni 1.000000
 71 | d 1.000000
 72 | illum 2
 73 | 
 74 | newmtl Shell002_001
 75 | Ns -1.960784
 76 | Ka 1.000000 1.000000 1.000000
 77 | Kd 0.901961 0.921569 0.929412
 78 | Ks 0.125000 0.125000 0.125000
 79 | Ke 0.000000 0.000000 0.000000
 80 | Ni 1.000000
 81 | d 1.000000
 82 | illum 2
 83 | 
 84 | newmtl Shell003_001
 85 | Ns -1.960784
 86 | Ka 1.000000 1.000000 1.000000
 87 | Kd 0.901961 0.921569 0.929412
 88 | Ks 0.125000 0.125000 0.125000
 89 | Ke 0.000000 0.000000 0.000000
 90 | Ni 1.000000
 91 | d 1.000000
 92 | illum 2
 93 | 
 94 | newmtl Shell009_001
 95 | Ns -1.960784
 96 | Ka 1.000000 1.000000 1.000000
 97 | Kd 0.250980 0.250980 0.250980
 98 | Ks 0.125000 0.125000 0.125000
 99 | Ke 0.000000 0.000000 0.000000
100 | Ni 1.000000
101 | d 1.000000
102 | illum 2
103 | 
104 | newmtl Shell010_001
105 | Ns -1.960784
106 | Ka 1.000000 1.000000 1.000000
107 | Kd 0.901961 0.921569 0.929412
108 | Ks 0.125000 0.125000 0.125000
109 | Ke 0.000000 0.000000 0.000000
110 | Ni 1.000000
111 | d 1.000000
112 | illum 2
113 | 
114 | newmtl Shell_001
115 | Ns -1.960784
116 | Ka 1.000000 1.000000 1.000000
117 | Kd 0.250980 0.250980 0.250980
118 | Ks 0.125000 0.125000 0.125000
119 | Ke 0.000000 0.000000 0.000000
120 | Ni 1.000000
121 | d 1.000000
122 | illum 2
123 | 


--------------------------------------------------------------------------------
/Method/README.md:
--------------------------------------------------------------------------------
  1 | # Method Introduction
  2 | 
  3 | 
  4 | 
  5 | ## Get Task
  6 | A class to get a task through configuration file, which can be used to load simulation env in IsaacGym and get the task information, render and control robot, .etc
  7 | 
  8 | - _prepare_task: Load simulation env and get task information
  9 | 
 10 | - _init_gym: Initialize gym env
 11 | 
 12 | - _setup_scene: Set up scene
 13 | 
 14 | - prepare_franka_asset: from `self.cfgs["asset"]["franka_asset_file"]` to load franka asset
 15 | 
 16 | - _prepare_obj_assets: Load object assets: table, objects
 17 | 
 18 | - _load_env: load all assets to env and set up scene
 19 | 
 20 | - _init_observation: Initialize observation space and corresponding observation functions
 21 | 
 22 | - refresh_observation: get observation dict from env
 23 | 
 24 | - clean_up: clean up env
 25 | 
 26 | ## Open6DOR-GPT
 27 | 
 28 | GroundedSAM:
 29 | ```
 30 | cd Method/vision/GroundedSAM/GroundingDINO
 31 | pip install -e .
 32 | cd ../../../..
 33 | cd Method/vision/GroundedSAM/segment_anything
 34 | pip install -e .
 35 | cd ../../../..
 36 | ```
 37 | Extensions:
 38 | ```
 39 | sudo apt update
 40 | sudo apt install fonts-dejavu
 41 | ```
 42 | 
 43 | if meet error:
 44 | ```
 45 | cannot import name 'split_torch_state_dict_into_shards' from 'huggingface_hub'
 46 | ```
 47 | try:
 48 | ```
 49 | pip install --upgrade huggingface_hub
 50 | ```
 51 | 
 52 | SAM checkpoint is [here](https://huggingface.co/spaces/abhishek/StableSAM/resolve/main/sam_vit_h_4b8939.pth)
 53 | 
 54 | GroundingDINO checkpoint is [here](https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth)
 55 | 
 56 | ## Task Generation
 57 | 
 58 | The core code for task generation is in `Method/interaction.py`. The task generator is responsible for generating tasks for Open6DOR. 
 59 | 
 60 | #### Position Track
 61 | ```bash
 62 | python interaction.py --mode gen_task --task_root debug_gen_task_pos
 63 | ```
 64 | 
 65 | 
 66 | #### Rotation Track
 67 | ```bash
 68 | python interaction.py --mode gen_task_pure_rot --task_root debug_gen_task_rot
 69 | ```
 70 | 
 71 | #### 6DoF Track
 72 | ```bash
 73 | python interaction.py --mode gen_task_rot --task_root debug_gen_task_6dof
 74 | ```
 75 | 
 76 | #### Large Dataset Generation
 77 | If you want to generate a large dataset, you can use the following command:
 78 | ```bash
 79 | python run_multiple.py --f "YOUR COMMAND" --n YOUR_RUN_TIMES
 80 | ```
 81 | 
 82 | #### Change Parameters
 83 | You can change the parameters in `Method/interaction.py` to generate different tasks.
 84 | 
 85 | ##### Object Number
 86 | ```python
 87 |     if orientation == "center":
 88 |         selected_obj_num = np.random.randint(4, 5)
 89 |     elif orientation == "between":
 90 |         selected_obj_num = np.random.randint(3, 5)
 91 |     else:
 92 |         selected_obj_num = np.random.randint(2, 5)
 93 | ```
 94 | 
 95 | ##### Object Position
 96 | In config.yaml, you can change the object position range:
 97 | ```yaml
 98 | assets:
 99 |     position_noise: [0.2, 0.25] # x and y position random range, depends on the table size
100 | ```
101 | 
102 | 


--------------------------------------------------------------------------------
/Method/utils/get_assets.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import json, glob
 3 | 
 4 | def get_assets_info(dataset_names):
 5 |     urdf_paths = []
 6 |     obj_name = []
 7 |     uuids = []
 8 |     if "ycb" in dataset_names:
 9 |         # all the ycb urdf data
10 |         json_dict = json.load(open("../Benchmark/benchmark_catalogue/object_dictionary_complete_0702.json"))
11 |         all_uuid = json_dict.keys()
12 |         
13 |         #ycb_urdf_paths = glob.glob("assets/ycb_16k_backup/*/*.urdf")
14 |         ycb_urdf_paths = glob.glob("benchmark/mesh/ycb/*/*.urdf")
15 |         ycb_names = [urdf_path.split("/")[-2] for urdf_path in ycb_urdf_paths]
16 |         ycb_obj_name = [" ".join(name.split("_")[1:-2]) for name in ycb_names]
17 |         ycb_uuid = [urdf_path.split("/")[-2].split("_")[0] for urdf_path in ycb_urdf_paths]
18 |         
19 |         valid_idx = [i for i in range(len(ycb_uuid)) if ycb_uuid[i] in all_uuid]
20 |         
21 |         ycb_uuids = [ycb_uuid[i] for i in valid_idx]
22 |         ycb_urdf_paths = [ycb_urdf_paths[i] for i in valid_idx]
23 |         ycb_obj_name = [" ".join(json_dict[ycb_uuid[i]]['category'].split("_")) for i in valid_idx]
24 |         urdf_paths+=ycb_urdf_paths
25 |         obj_name+=ycb_obj_name
26 |         uuids += ycb_uuids
27 |     if "objaverse" in dataset_names:
28 |         json_dict = json.load(open("../Benchmark/benchmark_catalogue/object_dictionary_complete_0702.json"))
29 |         
30 |         all_uuid = json_dict.keys()
31 |         # all the objaverse data
32 |         objaverse_urdf_paths = glob.glob("assets/objaverse_final_norm/*/*_2.urdf")
33 |         objaverse_obj_uuid = [path.split("/")[-2] for path in objaverse_urdf_paths]
34 |         
35 |         valid_idx = [i for i in range(len(objaverse_obj_uuid)) if objaverse_obj_uuid[i] in all_uuid]
36 |         objaverse_obj_uuids = [objaverse_obj_uuid[i] for i in valid_idx]
37 |         objaverse_urdf_paths = [objaverse_urdf_paths[i] for i in valid_idx]
38 |         objaverse_obj_name = [" ".join(json_dict[objaverse_obj_uuid[i]]['category'].split("_")) for i in valid_idx]
39 |         urdf_paths+=objaverse_urdf_paths
40 |         obj_name+=objaverse_obj_name
41 |         uuids+=objaverse_obj_uuids
42 |     if "objaverse_old" in dataset_names:
43 |         json_dict = json.load(open("category_dictionary.json"))
44 |         
45 |         all_uuid = []
46 |         for key in json_dict.keys(): all_uuid+=json_dict[key]["object_uuids"]
47 |         # all the objaverse data
48 |         objaverse_urdf_paths = glob.glob("benchmark/mesh/objaverse_final_norm/*/*_2.urdf")
49 |         objaverse_names = [urdf_path.split("/")[-2] for urdf_path in objaverse_urdf_paths]
50 |         objaverse_obj_name = [" ".join(name.split("_")[1:]) for name in objaverse_names]
51 |         objaverse_obj_uuid = [name.split("_")[0] for name in objaverse_names]
52 |         valid_idx = [i for i in range(len(objaverse_obj_uuid)) if objaverse_obj_uuid[i] in all_uuid]
53 |         objaverse_urdf_paths = [objaverse_urdf_paths[i] for i in valid_idx]
54 |         objaverse_obj_name = [objaverse_obj_name[i] for i in valid_idx]
55 |         # import pdb; pdb.set_trace()
56 |         urdf_paths+=objaverse_urdf_paths
57 |         obj_name+=objaverse_obj_name
58 |     return urdf_paths,obj_name,uuids


--------------------------------------------------------------------------------
/Method/test_gym.py:
--------------------------------------------------------------------------------
 1 | # exit()
 2 | import sys
 3 | import os
 4 | sys.path = [os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))] +  sys.path
 5 | sys.path = [os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))] + sys.path
 6 | 
 7 | # import gym
 8 | from gym.object_gym import ObjectGym
 9 | from gym.utils import read_yaml_config
10 | 
11 | import json, glob, random
12 | 
13 | tag = "handle_right"
14 | # tag = "upside"
15 | anno_path = f"/home/haoran/Projects/Rearrangement/Open6DOR/Benchmark/benchmark_catalogue/annotation/annotation_{tag}.json"
16 | # anno_path = f"/home/haoran/Projects/Rearrangement/ObjectPlacement/rotation_anno/annotation_upright_1_.json"
17 | save_root_ = f"/home/haoran/Projects/Rearrangement/Open6DOR/anno_test/anno_images-final_-{tag}"
18 | anno_data = json.load(open(anno_path, 'r'))
19 | anno_keys = list(anno_data.keys())
20 | # import pdb; pdb.set_trace()
21 | random.shuffle(anno_keys)
22 | for anno in anno_keys:
23 |     # print(anno["object_name"], anno["upright"])
24 |     anno_data_i = anno_data[anno]['annotation']
25 |     obj_id = anno
26 |     save_root = f"{save_root_}/{tag}-{obj_id}"
27 |     # if os.path.exists(f"/home/haoran/Projects/Rearrangement/ObjectPlacement/rotation_anno/anno_images/upright-{obj_id}/task_config-rgb-0-0.png"):
28 |     if os.path.exists(f"{save_root_}/{tag}-{obj_id}/task_config-rgb-0-0.png"):
29 |         continue
30 |     cfgs = read_yaml_config("config.yaml")
31 |     
32 |     if len(obj_id) > 10: # objaverse
33 |         cfgs["asset"]["asset_files"] = [f"objaverse_final_norm/{obj_id}/material_2.urdf"]
34 |     else:
35 |         path = glob.glob(f"assets/ycb_16k_backup/{obj_id}*/{obj_id}*.urdf")[0]
36 |         path_r = "/".join(path.split("/")[-3:])
37 |         cfgs["asset"]["asset_files"] = [path_r]
38 |     if len(list(anno_data_i.keys())) > 1:
39 |         import pdb; pdb.set_trace()
40 |     try:
41 |         quat_anno = anno_data_i[list(anno_data_i.keys())[0]]["quat"]
42 |     except:
43 |         continue
44 |     if anno_data_i[list(anno_data_i.keys())[0]]["stage"] != 1 and anno_data_i[list(anno_data_i.keys())[0]]["stage"] != 2:
45 |         import pdb; pdb.set_trace()
46 |     
47 |     cfgs["asset"]["obj_pose_ps"] = [[0.5, 0, 0.4]]
48 |     try:
49 |         cfgs["asset"]["obj_pose_rs"]  = [[quat_anno[0][0], quat_anno[0][1], quat_anno[0][2],quat_anno[0][3],]]
50 |     except:
51 |         cfgs["asset"]["obj_pose_rs"]  = [[quat_anno[0], quat_anno[1], quat_anno[2],quat_anno[3],]]
52 |         
53 |     cfgs["asset"]["position_noise"] = [0, 0]
54 |     cfgs["asset"]["rotation_noise"] = 0
55 |         # cfgs["asset"]["asset_files"] = [obj_id]
56 |     # cfgs["asset"]["asset_files"] = anno["object_name"]
57 |     gym = ObjectGym(cfgs, None, None, pre_steps = 0)
58 |     
59 |     print(list(anno_data_i.keys())[0])
60 |     gym.refresh_observation(get_visual_obs=False)
61 |     # save_root = f"/home/haoran/Projects/Rearrangement/ObjectPlacement/rotation_anno/anno_images2/upright-{obj_id}"
62 |     
63 |     os.makedirs(save_root, exist_ok=True)
64 |     points_envs, colors_envs, rgb_envs, depth_envs ,seg_envs, ori_points_envs, ori_colors_envs, pixel2pointid, pointid2pixel = gym.refresh_observation(get_visual_obs=True)
65 |     gym.save_render(rgb_envs=rgb_envs, depth_envs=None, ori_points_env=None, ori_colors_env=None, points=None, colors=None, save_dir = save_root, save_name = "task_config")
66 |     
67 |     # gym.run_steps(1000)
68 |     # import pdb; pdb.set_trace()
69 |     gym.clean_up()
70 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/build_sam.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | 
  9 | from functools import partial
 10 | 
 11 | from .modeling import ImageEncoderViT, MaskDecoder, PromptEncoder, Sam, TwoWayTransformer
 12 | 
 13 | 
 14 | def build_sam_vit_h(checkpoint=None):
 15 |     return _build_sam(
 16 |         encoder_embed_dim=1280,
 17 |         encoder_depth=32,
 18 |         encoder_num_heads=16,
 19 |         encoder_global_attn_indexes=[7, 15, 23, 31],
 20 |         checkpoint=checkpoint,
 21 |     )
 22 | 
 23 | 
 24 | build_sam = build_sam_vit_h
 25 | 
 26 | 
 27 | def build_sam_vit_l(checkpoint=None):
 28 |     return _build_sam(
 29 |         encoder_embed_dim=1024,
 30 |         encoder_depth=24,
 31 |         encoder_num_heads=16,
 32 |         encoder_global_attn_indexes=[5, 11, 17, 23],
 33 |         checkpoint=checkpoint,
 34 |     )
 35 | 
 36 | 
 37 | def build_sam_vit_b(checkpoint=None):
 38 |     return _build_sam(
 39 |         encoder_embed_dim=768,
 40 |         encoder_depth=12,
 41 |         encoder_num_heads=12,
 42 |         encoder_global_attn_indexes=[2, 5, 8, 11],
 43 |         checkpoint=checkpoint,
 44 |     )
 45 | 
 46 | 
 47 | sam_model_registry = {
 48 |     "default": build_sam,
 49 |     "vit_h": build_sam,
 50 |     "vit_l": build_sam_vit_l,
 51 |     "vit_b": build_sam_vit_b,
 52 | }
 53 | 
 54 | 
 55 | def _build_sam(
 56 |     encoder_embed_dim,
 57 |     encoder_depth,
 58 |     encoder_num_heads,
 59 |     encoder_global_attn_indexes,
 60 |     checkpoint=None,
 61 | ):
 62 |     prompt_embed_dim = 256
 63 |     image_size = 1024
 64 |     vit_patch_size = 16
 65 |     image_embedding_size = image_size // vit_patch_size
 66 |     sam = Sam(
 67 |         image_encoder=ImageEncoderViT(
 68 |             depth=encoder_depth,
 69 |             embed_dim=encoder_embed_dim,
 70 |             img_size=image_size,
 71 |             mlp_ratio=4,
 72 |             norm_layer=partial(torch.nn.LayerNorm, eps=1e-6),
 73 |             num_heads=encoder_num_heads,
 74 |             patch_size=vit_patch_size,
 75 |             qkv_bias=True,
 76 |             use_rel_pos=True,
 77 |             global_attn_indexes=encoder_global_attn_indexes,
 78 |             window_size=14,
 79 |             out_chans=prompt_embed_dim,
 80 |         ),
 81 |         prompt_encoder=PromptEncoder(
 82 |             embed_dim=prompt_embed_dim,
 83 |             image_embedding_size=(image_embedding_size, image_embedding_size),
 84 |             input_image_size=(image_size, image_size),
 85 |             mask_in_chans=16,
 86 |         ),
 87 |         mask_decoder=MaskDecoder(
 88 |             num_multimask_outputs=3,
 89 |             transformer=TwoWayTransformer(
 90 |                 depth=2,
 91 |                 embedding_dim=prompt_embed_dim,
 92 |                 mlp_dim=2048,
 93 |                 num_heads=8,
 94 |             ),
 95 |             transformer_dim=prompt_embed_dim,
 96 |             iou_head_depth=3,
 97 |             iou_head_hidden_dim=256,
 98 |         ),
 99 |         pixel_mean=[123.675, 116.28, 103.53],
100 |         pixel_std=[58.395, 57.12, 57.375],
101 |     )
102 |     sam.eval()
103 |     if checkpoint is not None:
104 |         with open(checkpoint, "rb") as f:
105 |             state_dict = torch.load(f)
106 |         sam.load_state_dict(state_dict)
107 |     return sam
108 | 


--------------------------------------------------------------------------------
/Method/open6dor_gpt.py:
--------------------------------------------------------------------------------
 1 | import json, imageio
 2 | from gym.utils import read_yaml_config, prepare_gsam_model
 3 | import numpy as np
 4 | 
 5 | class Open6DOR_GPT:
 6 |     def __init__(self, cfgs):
 7 |         self.cfgs = cfgs
 8 |         self.device = cfgs["DEVICE"]
 9 |         self._prepare_ckpts()
10 | 
11 |     def _prepare_ckpts(self):
12 |         # prepare gsam model
13 |         if self.cfgs["INFERENCE_GSAM"]:
14 |             self._grounded_dino_model, self._sam_predictor = prepare_gsam_model(device=self.device)
15 | 
16 |             self._box_threshold = 0.3
17 |             self._text_threshold = 0.25
18 |         else:
19 |             self._grounded_dino_model, self._sam_predictor = None, None
20 | 
21 |     def inference_vlm(self, prompt, image_path, print_ans = False):
22 |         from gym.vlm_utils import infer_path
23 |         # prepare vlm model
24 |         response = infer_path(prompt, image_path)
25 |         while 'choices' not in response.json():
26 |             response = infer_path(prompt, image_path)
27 |         ans = response.json()['choices'][0]['message']['content']
28 |         if print_ans:
29 |             print(ans)
30 |         return ans
31 | 
32 |     def inference_gsam(self, image: np.ndarray = None, image_path: str = None, prompt = None):
33 |         from vision.grounded_sam_demo import prepare_GroundedSAM_for_inference, inference_one_image
34 |         if image is not None:
35 |             masks = inference_one_image(image[..., :3], self._grounded_dino_model, self._sam_predictor, box_threshold=self._box_threshold, text_threshold=self._text_threshold, text_prompt=prompt, device=self.device)
36 |         elif image_path is not None:
37 |             image = imageio.imread(image_path)
38 |             masks = inference_one_image(image[..., :3], self._grounded_dino_model, self._sam_predictor, box_threshold=self._box_threshold, text_threshold=self._text_threshold, text_prompt=prompt, device=self.device)
39 |         return masks, image
40 | 
41 |     def inference_task(self, task_cfgs):
42 |         # prepare task data
43 |         task_data = self.prepare_task_data(task_cfgs)
44 |         
45 |         # inference
46 |         pred_pose = self.inference(task_data, self._grounded_dino_model, self._sam_predictor)
47 |         
48 |         return pred_pose
49 | 
50 | def test_vlm():
51 |     cfgs = read_yaml_config("config.yaml")
52 |     open6dor_gpt = Open6DOR_GPT(cfgs=cfgs)
53 |     prompt = "hello gpt, describe the image"
54 |     image_path = "test_image.png"
55 |     print("The ans is: ", open6dor_gpt.inference_vlm(prompt, image_path, print_ans=True))
56 |     print("vlm test passed!")
57 |     import pdb; pdb.set_trace()
58 | 
59 | def test_gsam():
60 |     image_path = "test_image.png"
61 |     cfgs = read_yaml_config("config.yaml")
62 |     open6dor_gpt = Open6DOR_GPT(cfgs=cfgs)
63 |     masks, _image = open6dor_gpt.inference_gsam(image_path = image_path, prompt="calculator")
64 |     _image[masks[0][0].cpu().numpy().astype(bool)] = 0
65 |     imageio.imwrite("test_mask.png", _image)
66 |     print("The mask is saved as test_mask.png, check it!")
67 |     import pdb; pdb.set_trace()
68 | 
69 | if __name__  == "__main__":
70 |     # test_gsam()
71 | 
72 |     test_vlm()
73 | 
74 |     cfgs = read_yaml_config("config.yaml")
75 |     task_cfgs_path = "/home/haoran/Projects/Rearrangement/Open6DOR/Method/tasks/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/task_config_new2.json"
76 |     with open(task_cfgs_path, "r") as f: task_cfgs = json.load(f)
77 |     
78 |     open6dor_gpt = Open6DOR_GPT(cfgs=cfgs, task_cfgs=task_cfgs)
79 |     


--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/task_config_new5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "selected_obj_names": [
 3 |         "bottle",
 4 |         "tissue box",
 5 |         "apple"
 6 |     ],
 7 |     "selected_urdfs": [
 8 |         "ycb_16k_backup/006_mustard_bottle_google_16k/006_mustard_bottle_google_16k.urdf",
 9 |         "objaverse_rescale/dc4c91abf45342b4bb8822f50fa162b2/material_2.urdf",
10 |         "objaverse_rescale/fbda0b25f41f40958ea984f460e4770b/material_2.urdf"
11 |     ],
12 |     "target_obj_name": "apple",
13 |     "instruction": "Place the apple behind the bottle on the table. We also need to specify the rotation of the object after placement:  the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.",
14 |     "init_obj_pos": [
15 |         [
16 |             0.3738566040992737,
17 |             0.17327724397182465,
18 |             0.3028668463230133,
19 |             -9.934652553056367e-06,
20 |             5.676249202224426e-06,
21 |             -0.03726901113986969,
22 |             0.9993051886558533,
23 |             0.00011460757377790287,
24 |             -0.0007374841370619833,
25 |             -0.00024315444170497358,
26 |             0.01065916009247303,
27 |             0.000735661422368139,
28 |             0.0003395920793991536
29 |         ],
30 |         [
31 |             0.4417206645011902,
32 |             -0.3223787248134613,
33 |             0.3753006160259247,
34 |             0.7060578465461731,
35 |             -0.03799350559711456,
36 |             -0.037282224744558334,
37 |             0.7061506509780884,
38 |             5.145368413650431e-05,
39 |             -0.00020104726718273014,
40 |             0.00014684736379422247,
41 |             0.003485196502879262,
42 |             5.90651725360658e-05,
43 |             -0.0011944533325731754
44 |         ],
45 |         [
46 |             0.5476366281509399,
47 |             -0.07213471084833145,
48 |             0.3492423892021179,
49 |             0.1136084571480751,
50 |             0.050451405346393585,
51 |             -0.0884791761636734,
52 |             0.9882911443710327,
53 |             5.16880136274267e-05,
54 |             0.0044308979995548725,
55 |             -0.001778011559508741,
56 |             -0.08829422295093536,
57 |             -0.0050054253078997135,
58 |             0.015157821588218212
59 |         ]
60 |     ],
61 |     "position_instruction": "Place the apple behind the bottle on the table. ",
62 |     "rotation_instruction": "We also need to specify the rotation of the object after placement:  the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.",
63 |     "rotation_instruction_label": "upright",
64 |     "obj_codes": [
65 |         "006",
66 |         "dc4c91abf45342b4bb8822f50fa162b2",
67 |         "fbda0b25f41f40958ea984f460e4770b"
68 |     ],
69 |     "target_obj_code": "fbda0b25f41f40958ea984f460e4770b",
70 |     "anno_target": {
71 |         "category": "apple",
72 |         "annotation": {
73 |             " the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.": {
74 |                 "quat": [
75 |                     [
76 |                         0.7071067690849304,
77 |                         0.0,
78 |                         0.0,
79 |                         0.7071067690849304
80 |                     ]
81 |                 ],
82 |                 "stage": 1,
83 |                 "axis": "z"
84 |             }
85 |         }
86 |     },
87 |     "rot_tag_detail": "upright",
88 |     "rot_tag_level": 0,
89 |     "position_tag": "behind",
90 |     "rotation_tag": "upright"
91 | }


--------------------------------------------------------------------------------
/Method/vision/test_sam.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import matplotlib.pyplot as plt
  4 | import cv2
  5 | import sys
  6 | sys.path.append("..")
  7 | 
  8 | from segment_anything import sam_model_registry, SamPredictor, SamAutomaticMaskGenerator
  9 | 
 10 | def show_anns(anns):
 11 |     if len(anns) == 0:
 12 |         return
 13 |     sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
 14 |     ax = plt.gca()
 15 |     ax.set_autoscale_on(False)
 16 | 
 17 |     img = np.ones((sorted_anns[0]['segmentation'].shape[0], sorted_anns[0]['segmentation'].shape[1], 4))
 18 |     img[:,:,3] = 0
 19 |     for ann in sorted_anns:
 20 |         m = ann['segmentation']
 21 |         color_mask = np.concatenate([np.random.random(3), [0.35]])
 22 |         img[m] = color_mask
 23 |     ax.imshow(img)
 24 | 
 25 | def show_mask(mask, ax, random_color=False):
 26 |     if random_color:
 27 |         color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
 28 |     else:
 29 |         color = np.array([30/255, 144/255, 255/255, 0.6])
 30 |     h, w = mask.shape[-2:]
 31 |     mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
 32 |     ax.imshow(mask_image)
 33 |     
 34 | def show_points(coords, labels, ax, marker_size=375):
 35 |     pos_points = coords[labels==1]
 36 |     neg_points = coords[labels==0]
 37 |     ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
 38 |     ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)   
 39 |     
 40 | def show_box(box, ax):
 41 |     x0, y0 = box[0], box[1]
 42 |     w, h = box[2] - box[0], box[3] - box[1]
 43 |     ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))    
 44 |     
 45 | image = cv2.imread('/home/haoran/Projects/ObjectPlacement/imgs/bana_cup.png')
 46 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 47 | 
 48 | plt.figure(figsize=(10,10))
 49 | plt.imshow(image)
 50 | plt.axis('on')
 51 | plt.show()
 52 | 
 53 | sam_checkpoint = "/home/haoran/Projects/ObjectPlacement/assets/ckpts/sam_vit_h_4b8939.pth"
 54 | model_type = "vit_h"
 55 | 
 56 | device = "cuda"
 57 | 
 58 | sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
 59 | sam.to(device=device)
 60 | 
 61 | ############## Demo1: Mask Generation ################
 62 | mask_generator = SamAutomaticMaskGenerator(sam)
 63 | 
 64 | masks = mask_generator.generate(image)
 65 | print(len(masks))
 66 | print(masks[0].keys())
 67 | plt.figure(figsize=(20,20))
 68 | plt.imshow(image)
 69 | show_anns(masks)
 70 | plt.axis('off')
 71 | plt.show() 
 72 | 
 73 | 
 74 | ############## Demo2: Mask Prediction with Input Point ################
 75 | predictor = SamPredictor(sam)
 76 | 
 77 | predictor.set_image(image)
 78 | 
 79 | input_point = np.array([[500, 375]])
 80 | input_label = np.array([1])
 81 | 
 82 | 
 83 | masks, scores, logits = predictor.predict(
 84 |     point_coords=input_point,
 85 |     point_labels=input_label,
 86 |     multimask_output=True,
 87 | )
 88 | 
 89 | for i, (mask, score) in enumerate(zip(masks, scores)):
 90 |     plt.figure(figsize=(10,10))
 91 |     plt.imshow(image)
 92 |     show_mask(mask, plt.gca())
 93 |     show_points(input_point, input_label, plt.gca())
 94 |     plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
 95 |     plt.axis('off')
 96 |     plt.show()  
 97 |     
 98 | input_point = np.array([[500, 375], [1125, 625]])
 99 | input_label = np.array([1, 1])
100 | 
101 | mask_input = logits[np.argmax(scores), :, :]
102 | 
103 | masks, _, _ = predictor.predict(
104 |     point_coords=input_point,
105 |     point_labels=input_label,
106 |     mask_input=mask_input[None, :, :],
107 |     multimask_output=False,
108 | )


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to make participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | This Code of Conduct also applies outside the project spaces when there is a
56 | reasonable belief that an individual's behavior may have a negative impact on
57 | the project or its community.
58 | 
59 | ## Enforcement
60 | 
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported by contacting the project team at <opensource-conduct@fb.com>. All
63 | complaints will be reviewed and investigated and will result in a response that
64 | is deemed necessary and appropriate to the circumstances. The project team is
65 | obligated to maintain confidentiality with regard to the reporter of an incident.
66 | Further details of specific enforcement policies may be posted separately.
67 | 
68 | Project maintainers who do not follow or enforce the Code of Conduct in good
69 | faith may face temporary or permanent repercussions as determined by other
70 | members of the project's leadership.
71 | 
72 | ## Attribution
73 | 
74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
76 | 
77 | [homepage]: https://www.contributor-covenant.org
78 | 
79 | For answers to common questions about this code of conduct, see
80 | https://www.contributor-covenant.org/faq
81 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/build_sam_hq.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | 
  9 | from functools import partial
 10 | 
 11 | from .modeling import ImageEncoderViT, MaskDecoderHQ, PromptEncoder, Sam, TwoWayTransformer
 12 | 
 13 | 
 14 | def build_sam_hq_vit_h(checkpoint=None):
 15 |     return _build_sam(
 16 |         encoder_embed_dim=1280,
 17 |         encoder_depth=32,
 18 |         encoder_num_heads=16,
 19 |         encoder_global_attn_indexes=[7, 15, 23, 31],
 20 |         checkpoint=checkpoint,
 21 |     )
 22 | 
 23 | 
 24 | build_sam_hq = build_sam_hq_vit_h
 25 | 
 26 | 
 27 | def build_sam_hq_vit_l(checkpoint=None):
 28 |     return _build_sam(
 29 |         encoder_embed_dim=1024,
 30 |         encoder_depth=24,
 31 |         encoder_num_heads=16,
 32 |         encoder_global_attn_indexes=[5, 11, 17, 23],
 33 |         checkpoint=checkpoint,
 34 |     )
 35 | 
 36 | 
 37 | def build_sam_hq_vit_b(checkpoint=None):
 38 |     return _build_sam(
 39 |         encoder_embed_dim=768,
 40 |         encoder_depth=12,
 41 |         encoder_num_heads=12,
 42 |         encoder_global_attn_indexes=[2, 5, 8, 11],
 43 |         checkpoint=checkpoint,
 44 |     )
 45 | 
 46 | 
 47 | sam_hq_model_registry = {
 48 |     "default": build_sam_hq_vit_h,
 49 |     "vit_h": build_sam_hq_vit_h,
 50 |     "vit_l": build_sam_hq_vit_l,
 51 |     "vit_b": build_sam_hq_vit_b,
 52 | }
 53 | 
 54 | 
 55 | def _build_sam(
 56 |     encoder_embed_dim,
 57 |     encoder_depth,
 58 |     encoder_num_heads,
 59 |     encoder_global_attn_indexes,
 60 |     checkpoint=None,
 61 | ):
 62 |     prompt_embed_dim = 256
 63 |     image_size = 1024
 64 |     vit_patch_size = 16
 65 |     image_embedding_size = image_size // vit_patch_size
 66 |     sam = Sam(
 67 |         image_encoder=ImageEncoderViT(
 68 |             depth=encoder_depth,
 69 |             embed_dim=encoder_embed_dim,
 70 |             img_size=image_size,
 71 |             mlp_ratio=4,
 72 |             norm_layer=partial(torch.nn.LayerNorm, eps=1e-6),
 73 |             num_heads=encoder_num_heads,
 74 |             patch_size=vit_patch_size,
 75 |             qkv_bias=True,
 76 |             use_rel_pos=True,
 77 |             global_attn_indexes=encoder_global_attn_indexes,
 78 |             window_size=14,
 79 |             out_chans=prompt_embed_dim,
 80 |         ),
 81 |         prompt_encoder=PromptEncoder(
 82 |             embed_dim=prompt_embed_dim,
 83 |             image_embedding_size=(image_embedding_size, image_embedding_size),
 84 |             input_image_size=(image_size, image_size),
 85 |             mask_in_chans=16,
 86 |         ),
 87 |         mask_decoder=MaskDecoderHQ(
 88 |             num_multimask_outputs=3,
 89 |             transformer=TwoWayTransformer(
 90 |                 depth=2,
 91 |                 embedding_dim=prompt_embed_dim,
 92 |                 mlp_dim=2048,
 93 |                 num_heads=8,
 94 |             ),
 95 |             transformer_dim=prompt_embed_dim,
 96 |             iou_head_depth=3,
 97 |             iou_head_hidden_dim=256,
 98 |             vit_dim=encoder_embed_dim,
 99 |         ),
100 |         pixel_mean=[123.675, 116.28, 103.53],
101 |         pixel_std=[58.395, 57.12, 57.375],
102 |     )
103 |     # sam.eval()
104 |     if checkpoint is not None:
105 |         with open(checkpoint, "rb") as f:
106 |             state_dict = torch.load(f)
107 |         info = sam.load_state_dict(state_dict, strict=False)
108 |         print(info)
109 |     for n, p in sam.named_parameters():
110 |         if 'hf_token' not in n and 'hf_mlp' not in n and 'compress_vit_feat' not in n and 'embedding_encoder' not in n and 'embedding_maskfeature' not in n:
111 |             p.requires_grad = False
112 | 
113 |     return sam


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.whl
  2 | anno_test
  3 | Benchmark/renderer/envmap_lib
  4 | Benchmark/renderer/blender-2.93.3*
  5 | Benchmark/renderer/material_lib_v2.blend
  6 | Benchmark/dataset/objects/rescale/
  7 | output/
  8 | # assets/
  9 | output_new/
 10 | results_overall/
 11 | *.zip
 12 | *.DS_Store
 13 | # Byte-compiled / optimized / DLL files
 14 | __pycache__/
 15 | *.py[cod]
 16 | *$py.class
 17 | 
 18 | 
 19 | # C extensions
 20 | *.so
 21 | 
 22 | # Distribution / packaging
 23 | .Python
 24 | build/
 25 | develop-eggs/
 26 | dist/
 27 | downloads/
 28 | eggs/
 29 | .eggs/
 30 | lib/
 31 | lib64/
 32 | parts/
 33 | sdist/
 34 | var/
 35 | wheels/
 36 | share/python-wheels/
 37 | *.egg-info/
 38 | .installed.cfg
 39 | *.egg
 40 | MANIFEST
 41 | *.DS_Store
 42 | 
 43 | # PyInstaller
 44 | #  Usually these files are written by a python script from a template
 45 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 46 | *.manifest
 47 | *.spec
 48 | 
 49 | # Installer logs
 50 | pip-log.txt
 51 | pip-delete-this-directory.txt
 52 | 
 53 | # Unit test / coverage reports
 54 | htmlcov/
 55 | .tox/
 56 | .nox/
 57 | .coverage
 58 | .coverage.*
 59 | .cache
 60 | nosetests.xml
 61 | coverage.xml
 62 | *.cover
 63 | *.py,cover
 64 | .hypothesis/
 65 | .pytest_cache/
 66 | cover/
 67 | 
 68 | # Translations
 69 | *.mo
 70 | *.pot
 71 | 
 72 | # Django stuff:
 73 | *.log
 74 | local_settings.py
 75 | db.sqlite3
 76 | db.sqlite3-journal
 77 | 
 78 | # Flask stuff:
 79 | instance/
 80 | .webassets-cache
 81 | 
 82 | # Scrapy stuff:
 83 | .scrapy
 84 | 
 85 | # Sphinx documentation
 86 | docs/_build/
 87 | 
 88 | # PyBuilder
 89 | .pybuilder/
 90 | target/
 91 | 
 92 | # Jupyter Notebook
 93 | .ipynb_checkpoints
 94 | 
 95 | # IPython
 96 | profile_default/
 97 | ipython_config.py
 98 | 
 99 | # pyenv
100 | #   For a library or package, you might want to ignore these files since the code is
101 | #   intended to run in multiple environments; otherwise, check them in:
102 | # .python-version
103 | 
104 | # pipenv
105 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
106 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
107 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
108 | #   install all needed dependencies.
109 | #Pipfile.lock
110 | 
111 | # poetry
112 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
113 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
114 | #   commonly ignored for libraries.
115 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
116 | #poetry.lock
117 | 
118 | # pdm
119 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
120 | #pdm.lock
121 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
122 | #   in version control.
123 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
124 | .pdm.toml
125 | .pdm-python
126 | .pdm-build/
127 | 
128 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
129 | __pypackages__/
130 | 
131 | # Celery stuff
132 | celerybeat-schedule
133 | celerybeat.pid
134 | 
135 | # SageMath parsed files
136 | *.sage.py
137 | 
138 | # Environments
139 | .env
140 | .venv
141 | env/
142 | venv/
143 | ENV/
144 | env.bak/
145 | venv.bak/
146 | 
147 | # Spyder project settings
148 | .spyderproject
149 | .spyproject
150 | 
151 | # Rope project settings
152 | .ropeproject
153 | 
154 | # mkdocs documentation
155 | /site
156 | 
157 | # mypy
158 | .mypy_cache/
159 | .dmypy.json
160 | dmypy.json
161 | 
162 | # Pyre type checker
163 | .pyre/
164 | 
165 | # pytype static type analyzer
166 | .pytype/
167 | 
168 | # Cython debug symbols
169 | cython_debug/
170 | 
171 | # PyCharm
172 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
175 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
176 | #.idea/
177 | .DS_Store
178 | .DS_Store
179 | 


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link6.mtl:
--------------------------------------------------------------------------------
  1 | # Blender MTL File: 'None'
  2 | # Material Count: 17
  3 | 
  4 | newmtl Face064_002_001_002_001
  5 | Ns -1.960784
  6 | Ka 1.000000 1.000000 1.000000
  7 | Kd 1.000000 0.000000 0.000000
  8 | Ks 0.003906 0.003906 0.003906
  9 | Ke 0.000000 0.000000 0.000000
 10 | Ni 1.000000
 11 | d 1.000000
 12 | illum 2
 13 | 
 14 | newmtl Face065_002_001_002_001
 15 | Ns -1.960784
 16 | Ka 1.000000 1.000000 1.000000
 17 | Kd 0.000000 1.000000 0.000000
 18 | Ks 0.003906 0.003906 0.003906
 19 | Ke 0.000000 0.000000 0.000000
 20 | Ni 1.000000
 21 | d 1.000000
 22 | illum 2
 23 | 
 24 | newmtl Face374_002_001_002_001
 25 | Ns -1.960784
 26 | Ka 1.000000 1.000000 1.000000
 27 | Kd 1.000000 1.000000 1.000000
 28 | Ks 0.003906 0.003906 0.003906
 29 | Ke 0.000000 0.000000 0.000000
 30 | Ni 1.000000
 31 | d 1.000000
 32 | illum 2
 33 | 
 34 | newmtl Face539_002_001_002_001
 35 | Ns -1.960784
 36 | Ka 1.000000 1.000000 1.000000
 37 | Kd 0.250980 0.250980 0.250980
 38 | Ks 0.003906 0.003906 0.003906
 39 | Ke 0.000000 0.000000 0.000000
 40 | Ni 1.000000
 41 | d 1.000000
 42 | illum 2
 43 | 
 44 | newmtl Part__Feature001_009_001_002_001
 45 | Ns -1.960784
 46 | Ka 1.000000 1.000000 1.000000
 47 | Kd 0.250980 0.250980 0.250980
 48 | Ks 0.003906 0.003906 0.003906
 49 | Ke 0.000000 0.000000 0.000000
 50 | Ni 1.000000
 51 | d 1.000000
 52 | illum 2
 53 | 
 54 | newmtl Part__Feature002_006_001_002_001
 55 | Ns -1.960784
 56 | Ka 1.000000 1.000000 1.000000
 57 | Kd 0.250980 0.250980 0.250980
 58 | Ks 0.003906 0.003906 0.003906
 59 | Ke 0.000000 0.000000 0.000000
 60 | Ni 1.000000
 61 | d 1.000000
 62 | illum 2
 63 | 
 64 | newmtl Shell002_002_001_002_001
 65 | Ns -1.960784
 66 | Ka 1.000000 1.000000 1.000000
 67 | Kd 1.000000 1.000000 1.000000
 68 | Ks 0.003906 0.003906 0.003906
 69 | Ke 0.000000 0.000000 0.000000
 70 | Ni 1.000000
 71 | d 1.000000
 72 | illum 2
 73 | 
 74 | newmtl Shell003_002_001_002_001
 75 | Ns -1.960784
 76 | Ka 1.000000 1.000000 1.000000
 77 | Kd 1.000000 1.000000 1.000000
 78 | Ks 0.003906 0.003906 0.003906
 79 | Ke 0.000000 0.000000 0.000000
 80 | Ni 1.000000
 81 | d 1.000000
 82 | illum 2
 83 | 
 84 | newmtl Shell004_001_001_002_001
 85 | Ns -1.960784
 86 | Ka 1.000000 1.000000 1.000000
 87 | Kd 1.000000 1.000000 1.000000
 88 | Ks 0.003906 0.003906 0.003906
 89 | Ke 0.000000 0.000000 0.000000
 90 | Ni 1.000000
 91 | d 1.000000
 92 | illum 2
 93 | 
 94 | newmtl Shell005_001_001_002_001
 95 | Ns -1.960784
 96 | Ka 1.000000 1.000000 1.000000
 97 | Kd 1.000000 1.000000 1.000000
 98 | Ks 0.003906 0.003906 0.003906
 99 | Ke 0.000000 0.000000 0.000000
100 | Ni 1.000000
101 | d 1.000000
102 | illum 2
103 | 
104 | newmtl Shell006_003_002_001
105 | Ns -1.960784
106 | Ka 1.000000 1.000000 1.000000
107 | Kd 0.901961 0.921569 0.929412
108 | Ks 0.015625 0.015625 0.015625
109 | Ke 0.000000 0.000000 0.000000
110 | Ni 1.000000
111 | d 1.000000
112 | illum 2
113 | 
114 | newmtl Shell007_002_002_001
115 | Ns -1.960784
116 | Ka 1.000000 1.000000 1.000000
117 | Kd 0.250000 0.250000 0.250000
118 | Ks 0.003906 0.003906 0.003906
119 | Ke 0.000000 0.000000 0.000000
120 | Ni 1.000000
121 | d 1.000000
122 | illum 2
123 | 
124 | newmtl Shell011_002_002_001
125 | Ns -1.960784
126 | Ka 1.000000 1.000000 1.000000
127 | Kd 1.000000 1.000000 1.000000
128 | Ks 0.003906 0.003906 0.003906
129 | Ke 0.000000 0.000000 0.000000
130 | Ni 1.000000
131 | d 1.000000
132 | illum 2
133 | 
134 | newmtl Shell012_002_002_001
135 | Ns -1.960784
136 | Ka 1.000000 1.000000 1.000000
137 | Kd 1.000000 1.000000 1.000000
138 | Ks 0.003906 0.003906 0.003906
139 | Ke 0.000000 0.000000 0.000000
140 | Ni 1.000000
141 | d 1.000000
142 | illum 2
143 | 
144 | newmtl Shell_003_001_002_001
145 | Ns -1.960784
146 | Ka 1.000000 1.000000 1.000000
147 | Kd 0.250980 0.250980 0.250980
148 | Ks 0.003906 0.003906 0.003906
149 | Ke 0.000000 0.000000 0.000000
150 | Ni 1.000000
151 | d 1.000000
152 | illum 2
153 | 
154 | newmtl Union001_001_001_002_001
155 | Ns -1.960784
156 | Ka 1.000000 1.000000 1.000000
157 | Kd 0.039216 0.541176 0.780392
158 | Ks 0.003906 0.003906 0.003906
159 | Ke 0.000000 0.000000 0.000000
160 | Ni 1.000000
161 | d 1.000000
162 | illum 2
163 | 
164 | newmtl Union_001_001_002_001
165 | Ns -1.960784
166 | Ka 1.000000 1.000000 1.000000
167 | Kd 0.039216 0.541176 0.780392
168 | Ks 0.003906 0.003906 0.003906
169 | Ke 0.000000 0.000000 0.000000
170 | Ni 1.000000
171 | d 1.000000
172 | illum 2
173 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | from torch.nn import functional as F
 10 | from torchvision.transforms.functional import resize, to_pil_image  # type: ignore
 11 | 
 12 | from copy import deepcopy
 13 | from typing import Tuple
 14 | 
 15 | 
 16 | class ResizeLongestSide:
 17 |     """
 18 |     Resizes images to longest side 'target_length', as well as provides
 19 |     methods for resizing coordinates and boxes. Provides methods for
 20 |     transforming both numpy array and batched torch tensors.
 21 |     """
 22 | 
 23 |     def __init__(self, target_length: int) -> None:
 24 |         self.target_length = target_length
 25 | 
 26 |     def apply_image(self, image: np.ndarray) -> np.ndarray:
 27 |         """
 28 |         Expects a numpy array with shape HxWxC in uint8 format.
 29 |         """
 30 |         target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
 31 |         return np.array(resize(to_pil_image(image), target_size))
 32 | 
 33 |     def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
 34 |         """
 35 |         Expects a numpy array of length 2 in the final dimension. Requires the
 36 |         original image size in (H, W) format.
 37 |         """
 38 |         old_h, old_w = original_size
 39 |         new_h, new_w = self.get_preprocess_shape(
 40 |             original_size[0], original_size[1], self.target_length
 41 |         )
 42 |         coords = deepcopy(coords).astype(float)
 43 |         coords[..., 0] = coords[..., 0] * (new_w / old_w)
 44 |         coords[..., 1] = coords[..., 1] * (new_h / old_h)
 45 |         return coords
 46 | 
 47 |     def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
 48 |         """
 49 |         Expects a numpy array shape Bx4. Requires the original image size
 50 |         in (H, W) format.
 51 |         """
 52 |         boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size)
 53 |         return boxes.reshape(-1, 4)
 54 | 
 55 |     def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor:
 56 |         """
 57 |         Expects batched images with shape BxCxHxW and float format. This
 58 |         transformation may not exactly match apply_image. apply_image is
 59 |         the transformation expected by the model.
 60 |         """
 61 |         # Expects an image in BCHW format. May not exactly match apply_image.
 62 |         target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
 63 |         return F.interpolate(
 64 |             image, target_size, mode="bilinear", align_corners=False, antialias=True
 65 |         )
 66 | 
 67 |     def apply_coords_torch(
 68 |         self, coords: torch.Tensor, original_size: Tuple[int, ...]
 69 |     ) -> torch.Tensor:
 70 |         """
 71 |         Expects a torch tensor with length 2 in the last dimension. Requires the
 72 |         original image size in (H, W) format.
 73 |         """
 74 |         old_h, old_w = original_size
 75 |         new_h, new_w = self.get_preprocess_shape(
 76 |             original_size[0], original_size[1], self.target_length
 77 |         )
 78 |         coords = deepcopy(coords).to(torch.float)
 79 |         coords[..., 0] = coords[..., 0] * (new_w / old_w)
 80 |         coords[..., 1] = coords[..., 1] * (new_h / old_h)
 81 |         return coords
 82 | 
 83 |     def apply_boxes_torch(
 84 |         self, boxes: torch.Tensor, original_size: Tuple[int, ...]
 85 |     ) -> torch.Tensor:
 86 |         """
 87 |         Expects a torch tensor with shape Bx4. Requires the original image
 88 |         size in (H, W) format.
 89 |         """
 90 |         boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size)
 91 |         return boxes.reshape(-1, 4)
 92 | 
 93 |     @staticmethod
 94 |     def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]:
 95 |         """
 96 |         Compute the output size given input size and target long side length.
 97 |         """
 98 |         scale = long_side_length * 1.0 / max(oldh, oldw)
 99 |         newh, neww = oldh * scale, oldw * scale
100 |         neww = int(neww + 0.5)
101 |         newh = int(newh + 0.5)
102 |         return (newh, neww)
103 | 


--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/finger.obj:
--------------------------------------------------------------------------------
  1 | ####
  2 | #
  3 | # OBJ File Generated by Meshlab
  4 | #
  5 | ####
  6 | # Object finger.obj
  7 | #
  8 | # Vertices: 52
  9 | # Faces: 32
 10 | #
 11 | ####
 12 | vn 0.999991 0.003723 -0.001919
 13 | v 0.010360 0.026403 0.000155
 14 | vn 0.019341 -0.997893 -0.061925
 15 | v 0.010449 0.002583 0.000147
 16 | vn -0.999568 -0.025962 0.013789
 17 | v -0.010387 0.002534 0.000132
 18 | vn -0.999606 -0.009503 0.026403
 19 | v -0.010479 0.016102 0.018988
 20 | vn -0.000579 0.001464 -0.999999
 21 | v -0.010401 0.026309 0.000167
 22 | vn -0.044737 0.976483 0.210900
 23 | v -0.010389 0.025220 0.019188
 24 | vn -0.871286 -0.490748 0.005227
 25 | v -0.008730 -0.000024 0.036165
 26 | vn 0.999861 0.006488 0.015354
 27 | v 0.010400 0.025253 0.019037
 28 | vn 0.377718 0.867563 0.323518
 29 | v 0.005840 0.014274 0.053803
 30 | vn 0.736099 -0.021564 0.676530
 31 | v 0.008616 0.013989 0.051328
 32 | vn 0.999373 -0.008600 0.034345
 33 | v 0.010495 0.015103 0.018436
 34 | vn 0.013041 -0.999896 -0.006124
 35 | v 0.008693 -0.000133 0.050166
 36 | vn -0.998603 -0.032800 0.041418
 37 | v -0.008623 -0.000057 0.050953
 38 | vn -0.588468 -0.017705 0.808327
 39 | v -0.005481 -0.000091 0.053725
 40 | vn 0.004085 -0.008700 0.999954
 41 | v -0.005278 0.014293 0.053849
 42 | vn -0.691057 -0.012018 0.722700
 43 | v -0.007778 0.014218 0.052366
 44 | vn -0.665951 0.690851 0.281486
 45 | v -0.008841 0.013918 0.050589
 46 | vn 0.736099 -0.021564 0.676530
 47 | v 0.006138 -0.000021 0.053578
 48 | vn -0.002818 0.998255 0.058981
 49 | v 0.010360 0.026403 0.000155
 50 | vn 0.000073 0.000898 -1.000000
 51 | v 0.010360 0.026403 0.000155
 52 | vn 0.999898 -0.012431 0.007036
 53 | v 0.010449 0.002583 0.000147
 54 | vn 0.000724 0.000331 -1.000000
 55 | v 0.010449 0.002583 0.000147
 56 | vn -0.871286 -0.490748 0.005227
 57 | v -0.010387 0.002534 0.000132
 58 | vn 0.002403 -0.997480 -0.070914
 59 | v -0.010387 0.002534 0.000132
 60 | vn 0.000073 0.000898 -1.000000
 61 | v -0.010387 0.002534 0.000132
 62 | vn -0.004486 0.998354 0.057168
 63 | v -0.010401 0.026309 0.000167
 64 | vn -0.999988 0.004662 -0.001626
 65 | v -0.010401 0.026309 0.000167
 66 | vn -0.665951 0.690851 0.281486
 67 | v -0.010389 0.025220 0.019188
 68 | vn -0.999597 0.009346 0.026807
 69 | v -0.010389 0.025220 0.019188
 70 | vn 0.006493 -0.999457 -0.032313
 71 | v -0.008730 -0.000024 0.036165
 72 | vn 0.377718 0.867563 0.323518
 73 | v 0.010400 0.025253 0.019037
 74 | vn -0.000242 0.983230 0.182372
 75 | v 0.010400 0.025253 0.019037
 76 | vn 0.665647 0.002096 0.746264
 77 | v 0.005840 0.014274 0.053803
 78 | vn 0.008418 -0.012115 0.999891
 79 | v 0.005840 0.014274 0.053803
 80 | vn 0.001757 0.953702 0.300749
 81 | v 0.005840 0.014274 0.053803
 82 | vn 0.377718 0.867563 0.323518
 83 | v 0.008616 0.013989 0.051328
 84 | vn 0.998361 0.003310 0.057136
 85 | v 0.008616 0.013989 0.051328
 86 | vn 0.798906 -0.045001 0.599770
 87 | v 0.008693 -0.000133 0.050166
 88 | vn 0.998687 -0.025065 0.044683
 89 | v 0.008693 -0.000133 0.050166
 90 | vn -0.769031 -0.017753 0.638965
 91 | v -0.008623 -0.000057 0.050953
 92 | vn -0.008996 -0.999957 -0.002185
 93 | v -0.008623 -0.000057 0.050953
 94 | vn -0.871286 -0.490748 0.005227
 95 | v -0.008623 -0.000057 0.050953
 96 | vn 0.008418 -0.012115 0.999891
 97 | v -0.005481 -0.000091 0.053725
 98 | vn -0.002059 -0.999940 0.010793
 99 | v -0.005481 -0.000091 0.053725
100 | vn -0.510143 -0.000217 0.860089
101 | v -0.005278 0.014293 0.053849
102 | vn -0.108731 0.943365 0.313433
103 | v -0.005278 0.014293 0.053849
104 | vn -0.665951 0.690851 0.281486
105 | v -0.007778 0.014218 0.052366
106 | vn -0.218924 0.920873 0.322590
107 | v -0.007778 0.014218 0.052366
108 | vn -0.858159 -0.000049 0.513385
109 | v -0.008841 0.013918 0.050589
110 | vn -0.998665 -0.002749 0.051583
111 | v -0.008841 0.013918 0.050589
112 | vn 0.006542 -0.999267 0.037718
113 | v 0.006138 -0.000021 0.053578
114 | vn 0.012751 -0.015529 0.999798
115 | v 0.006138 -0.000021 0.053578
116 | # 52 vertices, 0 vertices normals
117 | 
118 | f 20//20 22//22 25//25
119 | f 3//3 4//4 27//27
120 | f 27//27 4//4 29//29
121 | f 2//2 30//30 24//24
122 | f 32//32 6//6 35//35
123 | f 25//25 5//5 20//20
124 | f 37//37 11//11 8//8
125 | f 11//11 39//39 21//21
126 | f 37//37 39//39 11//11
127 | f 42//42 23//23 7//7
128 | f 2//2 12//12 30//30
129 | f 12//12 44//44 30//30
130 | f 8//8 11//11 21//21
131 | f 8//8 21//21 1//1
132 | f 32//32 19//19 6//6
133 | f 6//6 46//46 35//35
134 | f 48//48 46//46 6//6
135 | f 40//40 14//14 16//16
136 | f 3//3 13//13 4//4
137 | f 31//31 9//9 36//36
138 | f 19//19 26//26 6//6
139 | f 4//4 50//50 29//29
140 | f 17//17 47//47 28//28
141 | f 34//34 43//43 52//52
142 | f 15//15 43//43 34//34
143 | f 12//12 51//51 44//44
144 | f 18//18 38//38 10//10
145 | f 44//44 41//41 30//30
146 | f 16//16 14//14 45//45
147 | f 13//13 50//50 4//4
148 | f 18//18 10//10 33//33
149 | f 16//16 49//49 40//40
150 | # 32 faces, 0 coords texture
151 | 
152 | # End of File
153 | 


--------------------------------------------------------------------------------
/Benchmark/evaluation/evaluator.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains the evaluation metrics for Open6DOR Benchmark.
  3 | We are currently refining the rotation eval section for fairer evaluation and easier comparison.
  4 | Full version coming soon.
  5 | """
  6 | import numpy as np
  7 | import math
  8 | from scipy.spatial.transform import Rotation as R
  9 | 
 10 | 
 11 | 
 12 | def projection(rot_mat_A, rot_mat_B, axis):
 13 |     """
 14 |     Project the relative rotation from A to B onto the axis.
 15 |     rot_mat: 3x3 rotation matrix 
 16 |         A: ground truth rotation
 17 |         B: predicted rotation
 18 |     axis: 3x1 vector
 19 |     """
 20 |     det = np.linalg.det(rot_mat_A)
 21 |     assert det != 0 # rotation matrix should have determinant +1 or -1
 22 |     v = np.linalg.inv(rot_mat_A) @ axis
 23 |   
 24 |     w = rot_mat_B @ v  
 25 |     angle = np.arccos(np.dot(axis, w) / (np.linalg.norm(axis) * np.linalg.norm(w)))
 26 |     return np.degrees(angle)
 27 | 
 28 | # quat_gt = [0.884556,-0.093848,-0.436286,0.135678]
 29 | quat_gt = [-0.205673,-0.205673,-0.596955,0.772278]
 30 | rot_gt = R.from_quat(quat_gt).as_matrix()
 31 | # quat_pred = [0.972568,-0.128846,-0.164,0.103027]
 32 | # quat_pred = [0.546952,-0.013245,-0.820748,0.16444]
 33 | # quat_pred = [0.450043,-0.310077,-0.760036,0.351651]
 34 | # quat_pred = [0.270194,-0.590044,-0.570659,0.503183]
 35 | 
 36 | # quat_pred = [0.166216,-0.492937,-0.609121,0.59863]
 37 | # quat_pred = [-0.058748,-0.690237,-0.377434,-0.377434]
 38 | 
 39 | 
 40 | 
 41 | quat_pred = [0.107351,-0.684364,-0.220191,0.68676]
 42 | rot_pred = R.from_quat(quat_pred).as_matrix()
 43 | ax = "y"
 44 | axis = ax
 45 | if ax == "x":
 46 |     axis = np.array([1, 0, 0])
 47 | elif ax == "y":
 48 |     axis = np.array([0, 1, 0])
 49 | elif ax == "z":
 50 |     axis = np.array([0, 0, 1])
 51 | 
 52 | # if isinstance(axis, np.ndarray):
 53 | #     deviation = projection(rot_gt, rot_pred, axis)
 54 | #     print(f"Deviation along axis {axis}: {deviation}")
 55 | 
 56 |     
 57 | def normalize_quat(quat):
 58 |     norm = math.sqrt(sum(q ** 2 for q in quat))
 59 |     return [q / norm for q in quat]
 60 | 
 61 | def angle_deviation(quat0, quat1):
 62 |     # Normalize the quaternions
 63 |     quat0 = normalize_quat(quat0)
 64 |     quat1 = normalize_quat(quat1)
 65 |     
 66 |     # Compute the dot product of the two quaternions
 67 |     dot_product = sum(q0 * q1 for q0, q1 in zip(quat0, quat1))
 68 |     
 69 |     # Ensure the dot product is within the range [-1, 1] to avoid numerical errors
 70 |     dot_product = max(-1.0, min(1.0, dot_product))
 71 |     
 72 |     # Compute the angle deviation (in radians)
 73 |     angle_deviation = 2 * math.acos(dot_product)
 74 |     
 75 |     # Convert the angle deviation to degrees
 76 |     angle_deviation_degrees = math.degrees(angle_deviation)
 77 |     
 78 |     return angle_deviation_degrees
 79 | 
 80 | # # Example usage
 81 | # quat0 = [0.7071, 0.0, 0.7071, 0.0]  # Example quaternion 0
 82 | # quat1 = [0.7, 0.0, 0.9, 0.0]  # Example quaternion 1
 83 | 
 84 | # angle_deviation = angle_deviation(quat0, quat1)
 85 | # print(f"Angle deviation: {angle_deviation} degrees")
 86 | 
 87 | 
 88 | 
 89 | def evaluate_rot(quat_gt, quat_pred):
 90 |     """
 91 |     Evaluate the predicted rotation.
 92 |     task_id: str
 93 |     quat_pred: list of 4 floats
 94 |     """
 95 |     # load the ground truth quaternion
 96 |    
 97 |     rot_gt = R.from_quat(quat_gt).as_matrix()
 98 |     rot_pred = R.from_quat(quat_pred).as_matrix()
 99 |     task_level = 0#TODO: load task level from the dataset
100 |     obj_category = 0#TODO: load object category from the dataset
101 |     if task_level == 0:
102 |         ax = "z"
103 |     elif task_level == 1:
104 |         ax = "y"
105 |         if obj_category in ["mug", "binder_clips", "toy", "wallet", "headphone"] :
106 |             ax = "n"
107 |     elif task_level == 2:
108 |         ax = 0#TODO: load axis from the dataset
109 |     else:
110 |         raise ValueError(f"Invalid task level: {task_level}")
111 |     axis = ax
112 |     if ax == "x":
113 |         axis = np.array([1, 0, 0])
114 |     elif ax == "y":
115 |         axis = np.array([0, 1, 0])
116 |     elif ax == "z":
117 |         axis = np.array([0, 0, 1])
118 | 
119 |     deviation = -1
120 |     if isinstance(axis, np.ndarray):
121 |         deviation = projection(rot_gt, rot_pred, axis)
122 |     else:
123 |         deviation = angle_deviation(quat_gt, quat_pred)
124 |     
125 |     return deviation
126 | 
127 | 
128 | def evaluate_posi(sel_pos, tar_pos, mode):
129 |     """
130 |     Evaluate the predicted position.
131 |     """
132 |     if mode in ["left", "right", "front", "back", "behind", "top"]:
133 |         if mode == "left":
134 |             succ += sel_pos[1] > tar_pos[1]
135 |         elif mode == "right":
136 |             succ += sel_pos[1] < tar_pos[1]
137 |         elif mode == "front":
138 |             succ += sel_pos[0] > tar_pos[0]
139 |         elif mode == "back" or mode == "behind":
140 |             succ += sel_pos[0] < tar_pos[0]
141 |         elif mode == "top":
142 |             succ += sel_pos[2] <= tar_pos[2]
143 |     elif mode == "between":
144 |         max_sel_pos_x = np.max([sel_pos_1[0], sel_pos_2[0]])
145 |         max_sel_pos_y = np.max([sel_pos_1[1], sel_pos_2[1]])
146 |         min_sel_pos_x = np.min([sel_pos_1[0], sel_pos_2[0]])
147 |         min_sel_pos_y = np.min([sel_pos_1[1], sel_pos_2[1]])
148 |         tar_pos = result["final_obj_pos"][-1]
149 |         succ += (min_sel_pos_x < tar_pos[0] < max_sel_pos_x) or (min_sel_pos_y < tar_pos[0] < max_sel_pos_y)
150 |     elif mode == "center":
151 |         max_sel_pos_x = np.max(sel_pos_all, axis=0)[0]
152 |         min_sel_pos_x = np.min(sel_pos_all, axis=0)[0]
153 |         max_sel_pos_y = np.max(sel_pos_all, axis=0)[1]
154 |         min_sel_pos_y = np.min(sel_pos_all, axis=0)[1]
155 |         succ += (min_sel_pos_x < tar_pos[0] < max_sel_pos_x) and (min_sel_pos_y < tar_pos[1] < max_sel_pos_y)


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/README.md:
--------------------------------------------------------------------------------
  1 | # Segment Anything
  2 | 
  3 | **[Meta AI Research, FAIR](https://ai.facebook.com/research/)**
  4 | 
  5 | [Alexander Kirillov](https://alexander-kirillov.github.io/), [Eric Mintun](https://ericmintun.github.io/), [Nikhila Ravi](https://nikhilaravi.com/), [Hanzi Mao](https://hanzimao.me/), Chloe Rolland, Laura Gustafson, [Tete Xiao](https://tetexiao.com), [Spencer Whitehead](https://www.spencerwhitehead.com/), Alex Berg, Wan-Yen Lo, [Piotr Dollar](https://pdollar.github.io/), [Ross Girshick](https://www.rossgirshick.info/)
  6 | 
  7 | [[`Paper`](https://ai.facebook.com/research/publications/segment-anything/)] [[`Project`](https://segment-anything.com/)] [[`Demo`](https://segment-anything.com/demo)] [[`Dataset`](https://segment-anything.com/dataset/index.html)] [[`Blog`](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/)]
  8 | 
  9 | ![SAM design](assets/model_diagram.png?raw=true)
 10 | 
 11 | The **Segment Anything Model (SAM)** produces high quality object masks from input prompts such as points or boxes, and it can be used to generate masks for all objects in an image. It has been trained on a [dataset](https://segment-anything.com/dataset/index.html) of 11 million images and 1.1 billion masks, and has strong zero-shot performance on a variety of segmentation tasks.
 12 | 
 13 | <p float="left">
 14 |   <img src="assets/masks1.png?raw=true" width="37.25%" />
 15 |   <img src="assets/masks2.jpg?raw=true" width="61.5%" /> 
 16 | </p>
 17 | 
 18 | ## Installation
 19 | 
 20 | The code requires `python>=3.8`, as well as `pytorch>=1.7` and `torchvision>=0.8`. Please follow the instructions [here](https://pytorch.org/get-started/locally/) to install both PyTorch and TorchVision dependencies. Installing both PyTorch and TorchVision with CUDA support is strongly recommended.
 21 | 
 22 | Install Segment Anything:
 23 | 
 24 | ```
 25 | pip install git+https://github.com/facebookresearch/segment-anything.git
 26 | ```
 27 | 
 28 | or clone the repository locally and install with
 29 | 
 30 | ```
 31 | git clone git@github.com:facebookresearch/segment-anything.git
 32 | cd segment-anything; pip install -e .
 33 | ```
 34 | 
 35 | The following optional dependencies are necessary for mask post-processing, saving masks in COCO format, the example notebooks, and exporting the model in ONNX format. `jupyter` is also required to run the example notebooks.
 36 | ```
 37 | pip install opencv-python pycocotools matplotlib onnxruntime onnx
 38 | ```
 39 | 
 40 | 
 41 | ## <a name="GettingStarted"></a>Getting Started
 42 | 
 43 | First download a [model checkpoint](#model-checkpoints). Then the model can be used in just a few lines to get masks from a given prompt:
 44 | 
 45 | ```
 46 | from segment_anything import build_sam, SamPredictor 
 47 | predictor = SamPredictor(build_sam(checkpoint="</path/to/model.pth>"))
 48 | predictor.set_image(<your_image>)
 49 | masks, _, _ = predictor.predict(<input_prompts>)
 50 | ```
 51 | 
 52 | or generate masks for an entire image:
 53 | 
 54 | ```
 55 | from segment_anything import build_sam, SamAutomaticMaskGenerator
 56 | mask_generator = SamAutomaticMaskGenerator(build_sam(checkpoint="</path/to/model.pth>"))
 57 | masks = mask_generator_generate(<your_image>)
 58 | ```
 59 | 
 60 | Additionally, masks can be generated for images from the command line:
 61 | 
 62 | ```
 63 | python scripts/amg.py --checkpoint <path/to/sam/checkpoint> --input <image_or_folder> --output <output_directory>
 64 | ```
 65 | 
 66 | See the examples notebooks on [using SAM with prompts](/notebooks/predictor_example.ipynb) and [automatically generating masks](/notebooks/automatic_mask_generator_example.ipynb) for more details.
 67 | 
 68 | <p float="left">
 69 |   <img src="assets/notebook1.png?raw=true" width="49.1%" />
 70 |   <img src="assets/notebook2.png?raw=true" width="48.9%" />
 71 | </p>
 72 | 
 73 | ## ONNX Export
 74 | 
 75 | SAM's lightweight mask decoder can be exported to ONNX format so that it can be run in any environment that supports ONNX runtime, such as in-browser as showcased in the [demo](https://segment-anything.com/demo). Export the model with
 76 | 
 77 | ```
 78 | python scripts/export_onnx_model.py --checkpoint <path/to/checkpoint> --output <path/to/output>
 79 | ```
 80 | 
 81 | See the [example notebook](https://github.com/facebookresearch/segment-anything/blob/main/notebooks/onnx_model_example.ipynb) for details on how to combine image preprocessing via SAM's backbone with mask prediction using the ONNX model. It is recommended to use the latest stable version of PyTorch for ONNX export.
 82 | 
 83 | ## <a name="Models"></a>Model Checkpoints
 84 | 
 85 | Three model versions of the model are available with different backbone sizes. These models can be instantiated by running 
 86 | ```
 87 | from segment_anything import sam_model_registry
 88 | sam = sam_model_registry["<name>"](checkpoint="<path/to/checkpoint>")
 89 | ```
 90 | Click the links below to download the checkpoint for the corresponding model name. The default model in bold can also be instantiated with `build_sam`, as in the examples in [Getting Started](#getting-started).
 91 | 
 92 | * **`default` or `vit_h`: [ViT-H SAM model.](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth)**
 93 | * `vit_l`: [ViT-L SAM model.](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth)
 94 | * `vit_b`: [ViT-B SAM model.](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth)
 95 | 
 96 | ## License
 97 | The model is licensed under the [Apache 2.0 license](LICENSE).
 98 | 
 99 | ## Contributing
100 | 
101 | See [contributing](CONTRIBUTING.md) and the [code of conduct](CODE_OF_CONDUCT.md).
102 | 
103 | ## Contributors
104 | 
105 | The Segment Anything project was made possible with the help of many contributors (alphabetical):
106 | 
107 | Aaron Adcock, Vaibhav Aggarwal, Morteza Behrooz, Cheng-Yang Fu, Ashley Gabriel, Ahuva Goldstand, Allen Goodman, Sumanth Gurram, Jiabo Hu, Somya Jain, Devansh Kukreja, Robert Kuo, Joshua Lane, Yanghao Li, Lilian Luong, Jitendra Malik, Mallika Malhotra, William Ngan, Omkar Parkhi, Nikhil Raina, Dirk Rowe, Neil Sejoor, Vanessa Stark, Bala Varadarajan, Bram Wasti, Zachary Winstrom
108 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/utils/onnx.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | from torch.nn import functional as F
 10 | 
 11 | from typing import Tuple
 12 | 
 13 | from ..modeling import Sam
 14 | from .amg import calculate_stability_score
 15 | 
 16 | 
 17 | class SamOnnxModel(nn.Module):
 18 |     """
 19 |     This model should not be called directly, but is used in ONNX export.
 20 |     It combines the prompt encoder, mask decoder, and mask postprocessing of Sam,
 21 |     with some functions modified to enable model tracing. Also supports extra
 22 |     options controlling what information. See the ONNX export script for details.
 23 |     """
 24 | 
 25 |     def __init__(
 26 |         self,
 27 |         model: Sam,
 28 |         return_single_mask: bool,
 29 |         use_stability_score: bool = False,
 30 |         return_extra_metrics: bool = False,
 31 |     ) -> None:
 32 |         super().__init__()
 33 |         self.mask_decoder = model.mask_decoder
 34 |         self.model = model
 35 |         self.img_size = model.image_encoder.img_size
 36 |         self.return_single_mask = return_single_mask
 37 |         self.use_stability_score = use_stability_score
 38 |         self.stability_score_offset = 1.0
 39 |         self.return_extra_metrics = return_extra_metrics
 40 | 
 41 |     @staticmethod
 42 |     def resize_longest_image_size(
 43 |         input_image_size: torch.Tensor, longest_side: int
 44 |     ) -> torch.Tensor:
 45 |         input_image_size = input_image_size.to(torch.float32)
 46 |         scale = longest_side / torch.max(input_image_size)
 47 |         transformed_size = scale * input_image_size
 48 |         transformed_size = torch.floor(transformed_size + 0.5).to(torch.int64)
 49 |         return transformed_size
 50 | 
 51 |     def _embed_points(self, point_coords: torch.Tensor, point_labels: torch.Tensor) -> torch.Tensor:
 52 |         point_coords = point_coords + 0.5
 53 |         point_coords = point_coords / self.img_size
 54 |         point_embedding = self.model.prompt_encoder.pe_layer._pe_encoding(point_coords)
 55 |         point_labels = point_labels.unsqueeze(-1).expand_as(point_embedding)
 56 | 
 57 |         point_embedding = point_embedding * (point_labels != -1)
 58 |         point_embedding = point_embedding + self.model.prompt_encoder.not_a_point_embed.weight * (
 59 |             point_labels == -1
 60 |         )
 61 | 
 62 |         for i in range(self.model.prompt_encoder.num_point_embeddings):
 63 |             point_embedding = point_embedding + self.model.prompt_encoder.point_embeddings[
 64 |                 i
 65 |             ].weight * (point_labels == i)
 66 | 
 67 |         return point_embedding
 68 | 
 69 |     def _embed_masks(self, input_mask: torch.Tensor, has_mask_input: torch.Tensor) -> torch.Tensor:
 70 |         mask_embedding = has_mask_input * self.model.prompt_encoder.mask_downscaling(input_mask)
 71 |         mask_embedding = mask_embedding + (
 72 |             1 - has_mask_input
 73 |         ) * self.model.prompt_encoder.no_mask_embed.weight.reshape(1, -1, 1, 1)
 74 |         return mask_embedding
 75 | 
 76 |     def mask_postprocessing(self, masks: torch.Tensor, orig_im_size: torch.Tensor) -> torch.Tensor:
 77 |         masks = F.interpolate(
 78 |             masks,
 79 |             size=(self.img_size, self.img_size),
 80 |             mode="bilinear",
 81 |             align_corners=False,
 82 |         )
 83 | 
 84 |         prepadded_size = self.resize_longest_image_size(orig_im_size, self.img_size)
 85 |         masks = masks[..., : int(prepadded_size[0]), : int(prepadded_size[1])]
 86 | 
 87 |         orig_im_size = orig_im_size.to(torch.int64)
 88 |         h, w = orig_im_size[0], orig_im_size[1]
 89 |         masks = F.interpolate(masks, size=(h, w), mode="bilinear", align_corners=False)
 90 |         return masks
 91 | 
 92 |     def select_masks(
 93 |         self, masks: torch.Tensor, iou_preds: torch.Tensor, num_points: int
 94 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
 95 |         # Determine if we should return the multiclick mask or not from the number of points.
 96 |         # The reweighting is used to avoid control flow.
 97 |         score_reweight = torch.tensor(
 98 |             [[1000] + [0] * (self.model.mask_decoder.num_mask_tokens - 1)]
 99 |         ).to(iou_preds.device)
100 |         score = iou_preds + (num_points - 2.5) * score_reweight
101 |         best_idx = torch.argmax(score, dim=1)
102 |         masks = masks[torch.arange(masks.shape[0]), best_idx, :, :].unsqueeze(1)
103 |         iou_preds = iou_preds[torch.arange(masks.shape[0]), best_idx].unsqueeze(1)
104 | 
105 |         return masks, iou_preds
106 | 
107 |     @torch.no_grad()
108 |     def forward(
109 |         self,
110 |         image_embeddings: torch.Tensor,
111 |         point_coords: torch.Tensor,
112 |         point_labels: torch.Tensor,
113 |         mask_input: torch.Tensor,
114 |         has_mask_input: torch.Tensor,
115 |         orig_im_size: torch.Tensor,
116 |     ):
117 |         sparse_embedding = self._embed_points(point_coords, point_labels)
118 |         dense_embedding = self._embed_masks(mask_input, has_mask_input)
119 | 
120 |         masks, scores = self.model.mask_decoder.predict_masks(
121 |             image_embeddings=image_embeddings,
122 |             image_pe=self.model.prompt_encoder.get_dense_pe(),
123 |             sparse_prompt_embeddings=sparse_embedding,
124 |             dense_prompt_embeddings=dense_embedding,
125 |         )
126 | 
127 |         if self.use_stability_score:
128 |             scores = calculate_stability_score(
129 |                 masks, self.model.mask_threshold, self.stability_score_offset
130 |             )
131 | 
132 |         if self.return_single_mask:
133 |             masks, scores = self.select_masks(masks, scores, point_coords.shape[1])
134 | 
135 |         upscaled_masks = self.mask_postprocessing(masks, orig_im_size)
136 | 
137 |         if self.return_extra_metrics:
138 |             stability_scores = calculate_stability_score(
139 |                 upscaled_masks, self.model.mask_threshold, self.stability_score_offset
140 |             )
141 |             areas = (upscaled_masks > self.model.mask_threshold).sum(-1).sum(-1)
142 |             return upscaled_masks, scores, stability_scores, areas, masks
143 | 
144 |         return upscaled_masks, scores, masks
145 | 


--------------------------------------------------------------------------------
/Benchmark/bench.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import json
  3 | import imageio.v2 as imageio
  4 | import os
  5 | import argparse
  6 | from evaluation import evaluator
  7 | import yaml
  8 | 
  9 | mesh_root = "meshes"
 10 | def load_task(task_path, bench_config):
 11 |     # task_config
 12 |     task_config = json.load(open(task_path, 'r'))
 13 |     
 14 |     # task_instruction
 15 |     task_instruction = task_config["instruction"]
 16 |     print("instruction:", task_instruction)
 17 | 
 18 |     # task_image
 19 |     if bench_config["image_mode"] == "GIVEN_IMAGE_ISAACGYM":
 20 |         image_path = task_path.replace("task_config.json", "before-rgb-0-0.png")
 21 |         task_image = imageio.imread(image_path)
 22 | 
 23 |     elif bench_config["image_mode"] == "GIVEN_IMAGE_BLENDER":
 24 |         pass
 25 |     
 26 |     elif bench_config["image_mode"] == "RENDER_IMAGE_ISAACGYM":
 27 |         from ..Method.interaction import init_gym
 28 |         gym, cfgs, task_config_now= init_gym(task_config, index=i, random_task=True, no_position = True)
 29 | 
 30 |         points_envs, colors_envs, rgb_envs, depth_envs ,seg_envs, ori_points_envs, ori_colors_envs, \
 31 |             pixel2pointid, pointid2pixel = gym.refresh_observation(get_visual_obs=True)
 32 | 
 33 |         task_image = colors_envs[0]
 34 |     
 35 |     elif bench_config["image_mode"] == "RENDER_IMAGE_BLENDER":
 36 |         from renderer import open6dor_renderer
 37 |         task_image = None
 38 |         output_root_path = bench_config["output_path"]
 39 |         obj_paths = task_config["selected_urdfs"]
 40 |         obj_ids = [path.split("/")[-2] for path in obj_paths]
 41 | 
 42 |         init_poses = task_config["init_obj_pos"]
 43 |         obj_poses = {}
 44 | 
 45 |         for i in range(len(obj_ids)):
 46 |             pos = init_poses[i]
 47 |             id = obj_ids[i]
 48 |             position = pos[:3]
 49 |             quaternion = pos[3:7] 
 50 |             transformation_matrix = open6dor_renderer.create_transformation_matrix(position, quaternion)
 51 |             obj_poses[id] = transformation_matrix
 52 |         task_id = "my_test"
 53 |         script = generate_shell_script(output_root_path, task_id, obj_paths, init_poses, 
 54 |             bench_config["background_material_id"], bench_config["env_map_id"], 
 55 |             bench_config["cam_quaternion"], bench_config["cam_translation"])
 56 |         # run shell script
 57 |         os.system(f"bash {script}")
 58 |         
 59 |     return task_config, task_instruction, task_image
 60 | 
 61 | def generate_shell_script(output_root_path, task_id, obj_paths, init_poses,
 62 |                           background_material_id, env_map_id, cam_quaternion, cam_translation):
 63 |     script_name = "renderer/run_renderer.sh"
 64 |     command = "cd renderer\n"
 65 |     command += f"./blender-2.93.3-linux-x64/blender material_lib_v2.blend --background --python open6dor_renderer.py -- \\\n"
 66 |     command += f"    --output_root_path {output_root_path} \\\n"
 67 |     command += f"    --task_id {task_id} \\\n"
 68 |     command += f"    --obj_paths {' '.join(obj_paths)} \\\n"
 69 |     init_obj_pos_flat = ' '.join(map(str, [item for sublist in init_poses for item in sublist]))
 70 |     command += f"    --init_obj_pos {init_obj_pos_flat} \\\n"
 71 |     command += f"    --background_material_id {background_material_id} \\\n"
 72 |     command += f"    --env_map_id {env_map_id} \\\n"
 73 |     command += f"    --cam_quaternion {' '.join(map(str, cam_quaternion))} \\\n"
 74 |     command += f"    --cam_translation {' '.join(map(str, cam_translation))}\n"
 75 | 
 76 |     shell_file_content = f"#!/bin/bash\n\n{command}"
 77 | 
 78 |     with open(script_name, "w") as shell_file:
 79 |         shell_file.write(shell_file_content)
 80 | 
 81 |     print(f"Shell script {script_name} generated successfully.")
 82 |     print("=============================================")
 83 | 
 84 |     return script_name
 85 | 
 86 | def eval_task(cfgs, pred_pose, use_rot = False):
 87 |     if use_rot:
 88 |         pred_rot = pred_pose["rotation"]
 89 |         rot_gt = list(cfgs['anno_target']['annotation'].values())[0]["quat"]
 90 |         rot_deviation = evaluator.evaluate_rot(rot_gt, pred_rot)
 91 |         print(f"Rotation deviation: {rot_deviation} degrees")
 92 | 
 93 |     pos_bases = cfgs['init_obj_pos']
 94 |     pred_pos = pred_pose["position"]
 95 |     pos_eval = evaluator.evaluate_posi(pred_pos, pos_bases, "behind")
 96 | 
 97 |     return rot_deviation, pos_eval
 98 | 
 99 | def method_template(cfgs, task_instruction, task_image):
100 |     pred_pose = {
101 |         "position": [0,0,0],
102 |         "rotation": [0,0,0,0]
103 |     }
104 |     return pred_pose
105 | 
106 | if __name__ == "__main__":
107 |     
108 |     parser = argparse.ArgumentParser(description="Benchmarking script for task evaluation")
109 |     parser.add_argument("--mode", type=str, choices=["load_test", "eval"], help="Path to the task configuration file")
110 |     parser.add_argument("--task_data", type=str, default="6dof", help="path set or single path to the task configuration file")
111 |     parser.add_argument("--image_mode", type=str, default="GIVEN_IMAGE_ISAACGYM", help="Image mode")
112 |     parser.add_argument("--output_path", type=str, default="../output/test", help="Path to the output directory")
113 | 
114 |     _args = parser.parse_args()
115 |     
116 |     render_configs = yaml.load(open("bench_config.yaml", 'r'), Loader=yaml.FullLoader)
117 |     import pdb; pdb.set_trace()
118 |     # merge the two configs
119 |     bench_config = {**_args.__dict__, **render_configs}
120 |     if bench_config["task_data"] == "6dof":
121 |         task_paths = glob.glob('tasks/6DoF/*/*/*/task_config_new2.json')
122 |     elif bench_config["task_data"] == "position":
123 |         task_paths = glob.glob('tasks/position/*/*/*/task_config_new2.json')
124 |     elif bench_config["task_data"] == "rotation":
125 |         task_paths = glob.glob('tasks/rotation/*/*/*/task_config_new2.json')
126 |     else:
127 |         task_paths = [bench_config["task_data"]]
128 | 
129 |     if bench_config["mode"] == "load_test":
130 |         for task_path in task_paths:
131 |             task_config, task_instruction, task_image = load_task(task_path, bench_config)
132 | 
133 |     elif bench_config["mode"] == "eval":
134 |         USE_ROT = False if bench_config["task_data"] == "position" else True
135 |         for task_path in task_paths:
136 |             task_config = json.load(open(task_path, 'r'))
137 |             task_config, task_instruction, task_image = load_task(task_path, bench_config)
138 |             pred_pose = method_template(task_config, task_instruction, task_image)
139 |             eval_task(task_config, pred_pose, use_rot = USE_ROT)
140 | 


--------------------------------------------------------------------------------
/Method/isaacgym0/asset_info.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 | 
  4 | NVIDIA CORPORATION and its licensors retain all intellectual property
  5 | and proprietary rights in and to this software, related documentation
  6 | and any modifications thereto. Any use, reproduction, disclosure or
  7 | distribution of this software and related documentation without an express
  8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
  9 | 
 10 | 
 11 | Asset and Environment Information
 12 | ---------------------------------
 13 | Demonstrates introspection capabilities of the gym api at the asset and environment levels
 14 | - Once an asset is loaded its properties can be queried
 15 | - Assets in environments can be queried and their current states be retrieved
 16 | """
 17 | 
 18 | import os
 19 | from isaacgym import gymapi
 20 | from isaacgym import gymutil
 21 | 
 22 | 
 23 | def print_asset_info(asset, name):
 24 |     print("======== Asset info %s: ========" % (name))
 25 |     num_bodies = gym.get_asset_rigid_body_count(asset)
 26 |     num_joints = gym.get_asset_joint_count(asset)
 27 |     num_dofs = gym.get_asset_dof_count(asset)
 28 |     print("Got %d bodies, %d joints, and %d DOFs" %
 29 |           (num_bodies, num_joints, num_dofs))
 30 | 
 31 |     # Iterate through bodies
 32 |     print("Bodies:")
 33 |     for i in range(num_bodies):
 34 |         name = gym.get_asset_rigid_body_name(asset, i)
 35 |         print(" %2d: '%s'" % (i, name))
 36 | 
 37 |     # Iterate through joints
 38 |     print("Joints:")
 39 |     for i in range(num_joints):
 40 |         name = gym.get_asset_joint_name(asset, i)
 41 |         type = gym.get_asset_joint_type(asset, i)
 42 |         type_name = gym.get_joint_type_string(type)
 43 |         print(" %2d: '%s' (%s)" % (i, name, type_name))
 44 | 
 45 |     # iterate through degrees of freedom (DOFs)
 46 |     print("DOFs:")
 47 |     for i in range(num_dofs):
 48 |         name = gym.get_asset_dof_name(asset, i)
 49 |         type = gym.get_asset_dof_type(asset, i)
 50 |         type_name = gym.get_dof_type_string(type)
 51 |         print(" %2d: '%s' (%s)" % (i, name, type_name))
 52 | 
 53 | 
 54 | def print_actor_info(gym, env, actor_handle):
 55 | 
 56 |     name = gym.get_actor_name(env, actor_handle)
 57 | 
 58 |     body_names = gym.get_actor_rigid_body_names(env, actor_handle)
 59 |     body_dict = gym.get_actor_rigid_body_dict(env, actor_handle)
 60 | 
 61 |     joint_names = gym.get_actor_joint_names(env, actor_handle)
 62 |     joint_dict = gym.get_actor_joint_dict(env, actor_handle)
 63 | 
 64 |     dof_names = gym.get_actor_dof_names(env, actor_handle)
 65 |     dof_dict = gym.get_actor_dof_dict(env, actor_handle)
 66 | 
 67 |     print()
 68 |     print("===== Actor: %s =======================================" % name)
 69 | 
 70 |     print("\nBodies")
 71 |     print(body_names)
 72 |     print(body_dict)
 73 | 
 74 |     print("\nJoints")
 75 |     print(joint_names)
 76 |     print(joint_dict)
 77 | 
 78 |     print("\n Degrees Of Freedom (DOFs)")
 79 |     print(dof_names)
 80 |     print(dof_dict)
 81 |     print()
 82 | 
 83 |     # Get body state information
 84 |     body_states = gym.get_actor_rigid_body_states(
 85 |         env, actor_handle, gymapi.STATE_ALL)
 86 | 
 87 |     # Print some state slices
 88 |     print("Poses from Body State:")
 89 |     print(body_states['pose'])          # print just the poses
 90 | 
 91 |     print("\nVelocities from Body State:")
 92 |     print(body_states['vel'])          # print just the velocities
 93 |     print()
 94 | 
 95 |     # iterate through bodies and print name and position
 96 |     body_positions = body_states['pose']['p']
 97 |     for i in range(len(body_names)):
 98 |         print("Body '%s' has position" % body_names[i], body_positions[i])
 99 | 
100 |     print("\nDOF states:")
101 | 
102 |     # get DOF states
103 |     dof_states = gym.get_actor_dof_states(env, actor_handle, gymapi.STATE_ALL)
104 | 
105 |     # print some state slices
106 |     # Print all states for each degree of freedom
107 |     print(dof_states)
108 |     print()
109 | 
110 |     # iterate through DOFs and print name and position
111 |     dof_positions = dof_states['pos']
112 |     for i in range(len(dof_names)):
113 |         print("DOF '%s' has position" % dof_names[i], dof_positions[i])
114 | 
115 | 
116 | # initialize gym
117 | gym = gymapi.acquire_gym()
118 | 
119 | # parse arguments
120 | args = gymutil.parse_arguments(description="Asset and Environment Information")
121 | 
122 | # create simulation context
123 | sim_params = gymapi.SimParams()
124 | 
125 | sim_params.use_gpu_pipeline = False
126 | if args.use_gpu_pipeline:
127 |     print("WARNING: Forcing CPU pipeline.")
128 | 
129 | sim = gym.create_sim(args.compute_device_id, args.graphics_device_id, args.physics_engine, sim_params)
130 | 
131 | if sim is None:
132 |     print("*** Failed to create sim")
133 |     quit()
134 | 
135 | # Print out the working directory
136 | # helpful in determining the relative location that assets will be loaded from
137 | print("Working directory: %s" % os.getcwd())
138 | 
139 | # Path where assets are searched, relative to the current working directory
140 | asset_root = "../../assets"
141 | 
142 | # List of assets that will be loaded, both URDF and MJCF files are supported
143 | asset_files = ["urdf/cartpole.urdf",
144 |                "urdf/franka_description/robots/franka_panda.urdf",
145 |                "mjcf/nv_ant.xml"]
146 | asset_names = ["cartpole", "franka", "ant"]
147 | loaded_assets = []
148 | 
149 | # Load the assets and ensure that we are successful
150 | for asset in asset_files:
151 |     print("Loading asset '%s' from '%s'" % (asset, asset_root))
152 | 
153 |     current_asset = gym.load_asset(sim, asset_root, asset)
154 | 
155 |     if current_asset is None:
156 |         print("*** Failed to load asset '%s'" % (asset, asset_root))
157 |         quit()
158 |     loaded_assets.append(current_asset)
159 | 
160 | for i in range(len(loaded_assets)):
161 |     print()
162 |     print_asset_info(loaded_assets[i], asset_names[i])
163 | 
164 | # Setup environment spacing
165 | spacing = 2.0
166 | lower = gymapi.Vec3(-spacing, 0.0, -spacing)
167 | upper = gymapi.Vec3(spacing, spacing, spacing)
168 | 
169 | # Create one environment
170 | env = gym.create_env(sim, lower, upper, 1)
171 | 
172 | # Add actors to environment
173 | pose = gymapi.Transform()
174 | for i in range(len(loaded_assets)):
175 |     pose.p = gymapi.Vec3(0.0, 0.0, i * 2)
176 |     pose.r = gymapi.Quat(-0.707107, 0.0, 0.0, 0.707107)
177 |     gym.create_actor(env, loaded_assets[i], pose, asset_names[i], -1, -1)
178 | 
179 | print("=== Environment info: ================================================")
180 | 
181 | #actor_count = gym.get_actor_count(env)
182 | print("%d actors total" % actor_num)
183 | 
184 | # Iterate through all actors for the environment
185 | for i in range(actor_count):
186 |     actor_handle = gym.get_actor_handle(envs[i], i)
187 |     print_actor_info(gym, envs[i], actor_handle)
188 | 
189 | # Cleanup the simulator
190 | gym.destroy_sim(sim)
191 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/scripts/export_onnx_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | 
  9 | from segment_anything import build_sam, build_sam_vit_b, build_sam_vit_l
 10 | from segment_anything.utils.onnx import SamOnnxModel
 11 | 
 12 | import argparse
 13 | import warnings
 14 | 
 15 | try:
 16 |     import onnxruntime  # type: ignore
 17 | 
 18 |     onnxruntime_exists = True
 19 | except ImportError:
 20 |     onnxruntime_exists = False
 21 | 
 22 | parser = argparse.ArgumentParser(
 23 |     description="Export the SAM prompt encoder and mask decoder to an ONNX model."
 24 | )
 25 | 
 26 | parser.add_argument(
 27 |     "--checkpoint", type=str, required=True, help="The path to the SAM model checkpoint."
 28 | )
 29 | 
 30 | parser.add_argument(
 31 |     "--output", type=str, required=True, help="The filename to save the ONNX model to."
 32 | )
 33 | 
 34 | parser.add_argument(
 35 |     "--model-type",
 36 |     type=str,
 37 |     default="default",
 38 |     help="In ['default', 'vit_b', 'vit_l']. Which type of SAM model to export.",
 39 | )
 40 | 
 41 | parser.add_argument(
 42 |     "--return-single-mask",
 43 |     action="store_true",
 44 |     help=(
 45 |         "If true, the exported ONNX model will only return the best mask, "
 46 |         "instead of returning multiple masks. For high resolution images "
 47 |         "this can improve runtime when upscaling masks is expensive."
 48 |     ),
 49 | )
 50 | 
 51 | parser.add_argument(
 52 |     "--opset",
 53 |     type=int,
 54 |     default=17,
 55 |     help="The ONNX opset version to use. Must be >=11",
 56 | )
 57 | 
 58 | parser.add_argument(
 59 |     "--quantize-out",
 60 |     type=str,
 61 |     default=None,
 62 |     help=(
 63 |         "If set, will quantize the model and save it with this name. "
 64 |         "Quantization is performed with quantize_dynamic from onnxruntime.quantization.quantize."
 65 |     ),
 66 | )
 67 | 
 68 | parser.add_argument(
 69 |     "--gelu-approximate",
 70 |     action="store_true",
 71 |     help=(
 72 |         "Replace GELU operations with approximations using tanh. Useful "
 73 |         "for some runtimes that have slow or unimplemented erf ops, used in GELU."
 74 |     ),
 75 | )
 76 | 
 77 | parser.add_argument(
 78 |     "--use-stability-score",
 79 |     action="store_true",
 80 |     help=(
 81 |         "Replaces the model's predicted mask quality score with the stability "
 82 |         "score calculated on the low resolution masks using an offset of 1.0. "
 83 |     ),
 84 | )
 85 | 
 86 | parser.add_argument(
 87 |     "--return-extra-metrics",
 88 |     action="store_true",
 89 |     help=(
 90 |         "The model will return five results: (masks, scores, stability_scores, "
 91 |         "areas, low_res_logits) instead of the usual three. This can be "
 92 |         "significantly slower for high resolution outputs."
 93 |     ),
 94 | )
 95 | 
 96 | 
 97 | def run_export(
 98 |     model_type: str,
 99 |     checkpoint: str,
100 |     output: str,
101 |     opset: int,
102 |     return_single_mask: bool,
103 |     gelu_approximate: bool = False,
104 |     use_stability_score: bool = False,
105 |     return_extra_metrics=False,
106 | ):
107 |     print("Loading model...")
108 |     if model_type == "vit_b":
109 |         sam = build_sam_vit_b(checkpoint)
110 |     elif model_type == "vit_l":
111 |         sam = build_sam_vit_l(checkpoint)
112 |     else:
113 |         sam = build_sam(checkpoint)
114 | 
115 |     onnx_model = SamOnnxModel(
116 |         model=sam,
117 |         return_single_mask=return_single_mask,
118 |         use_stability_score=use_stability_score,
119 |         return_extra_metrics=return_extra_metrics,
120 |     )
121 | 
122 |     if gelu_approximate:
123 |         for n, m in onnx_model.named_modules():
124 |             if isinstance(m, torch.nn.GELU):
125 |                 m.approximate = "tanh"
126 | 
127 |     dynamic_axes = {
128 |         "point_coords": {1: "num_points"},
129 |         "point_labels": {1: "num_points"},
130 |     }
131 | 
132 |     embed_dim = sam.prompt_encoder.embed_dim
133 |     embed_size = sam.prompt_encoder.image_embedding_size
134 |     mask_input_size = [4 * x for x in embed_size]
135 |     dummy_inputs = {
136 |         "image_embeddings": torch.randn(1, embed_dim, *embed_size, dtype=torch.float),
137 |         "point_coords": torch.randint(low=0, high=1024, size=(1, 5, 2), dtype=torch.float),
138 |         "point_labels": torch.randint(low=0, high=4, size=(1, 5), dtype=torch.float),
139 |         "mask_input": torch.randn(1, 1, *mask_input_size, dtype=torch.float),
140 |         "has_mask_input": torch.tensor([1], dtype=torch.float),
141 |         "orig_im_size": torch.tensor([1500, 2250], dtype=torch.float),
142 |     }
143 | 
144 |     _ = onnx_model(**dummy_inputs)
145 | 
146 |     output_names = ["masks", "iou_predictions", "low_res_masks"]
147 | 
148 |     with warnings.catch_warnings():
149 |         warnings.filterwarnings("ignore", category=torch.jit.TracerWarning)
150 |         warnings.filterwarnings("ignore", category=UserWarning)
151 |         with open(output, "wb") as f:
152 |             print(f"Exporing onnx model to {output}...")
153 |             torch.onnx.export(
154 |                 onnx_model,
155 |                 tuple(dummy_inputs.values()),
156 |                 f,
157 |                 export_params=True,
158 |                 verbose=False,
159 |                 opset_version=opset,
160 |                 do_constant_folding=True,
161 |                 input_names=list(dummy_inputs.keys()),
162 |                 output_names=output_names,
163 |                 dynamic_axes=dynamic_axes,
164 |             )
165 | 
166 |     if onnxruntime_exists:
167 |         ort_inputs = {k: to_numpy(v) for k, v in dummy_inputs.items()}
168 |         ort_session = onnxruntime.InferenceSession(output)
169 |         _ = ort_session.run(None, ort_inputs)
170 |         print("Model has successfully been run with ONNXRuntime.")
171 | 
172 | 
173 | def to_numpy(tensor):
174 |     return tensor.cpu().numpy()
175 | 
176 | 
177 | if __name__ == "__main__":
178 |     args = parser.parse_args()
179 |     run_export(
180 |         model_type=args.model_type,
181 |         checkpoint=args.checkpoint,
182 |         output=args.output,
183 |         opset=args.opset,
184 |         return_single_mask=args.return_single_mask,
185 |         gelu_approximate=args.gelu_approximate,
186 |         use_stability_score=args.use_stability_score,
187 |         return_extra_metrics=args.return_extra_metrics,
188 |     )
189 | 
190 |     if args.quantize_out is not None:
191 |         assert onnxruntime_exists, "onnxruntime is required to quantize the model."
192 |         from onnxruntime.quantization import QuantType  # type: ignore
193 |         from onnxruntime.quantization.quantize import quantize_dynamic  # type: ignore
194 | 
195 |         print(f"Quantizing model and writing to {args.quantize_out}...")
196 |         quantize_dynamic(
197 |             model_input=args.output,
198 |             model_output=args.quantize_out,
199 |             optimize_model=True,
200 |             per_channel=False,
201 |             reduce_range=False,
202 |             weight_type=QuantType.QUInt8,
203 |         )
204 |         print("Done!")
205 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/modeling/mask_decoder.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | from torch import nn
  9 | from torch.nn import functional as F
 10 | 
 11 | from typing import List, Tuple, Type
 12 | 
 13 | from .common import LayerNorm2d
 14 | 
 15 | 
 16 | class MaskDecoder(nn.Module):
 17 |     def __init__(
 18 |         self,
 19 |         *,
 20 |         transformer_dim: int,
 21 |         transformer: nn.Module,
 22 |         num_multimask_outputs: int = 3,
 23 |         activation: Type[nn.Module] = nn.GELU,
 24 |         iou_head_depth: int = 3,
 25 |         iou_head_hidden_dim: int = 256,
 26 |     ) -> None:
 27 |         """
 28 |         Predicts masks given an image and prompt embeddings, using a
 29 |         transformer architecture.
 30 | 
 31 |         Arguments:
 32 |           transformer_dim (int): the channel dimension of the transformer
 33 |           transformer (nn.Module): the transformer used to predict masks
 34 |           num_multimask_outputs (int): the number of masks to predict
 35 |             when disambiguating masks
 36 |           activation (nn.Module): the type of activation to use when
 37 |             upscaling masks
 38 |           iou_head_depth (int): the depth of the MLP used to predict
 39 |             mask quality
 40 |           iou_head_hidden_dim (int): the hidden dimension of the MLP
 41 |             used to predict mask quality
 42 |         """
 43 |         super().__init__()
 44 |         self.transformer_dim = transformer_dim
 45 |         self.transformer = transformer
 46 | 
 47 |         self.num_multimask_outputs = num_multimask_outputs
 48 | 
 49 |         self.iou_token = nn.Embedding(1, transformer_dim)
 50 |         self.num_mask_tokens = num_multimask_outputs + 1
 51 |         self.mask_tokens = nn.Embedding(self.num_mask_tokens, transformer_dim)
 52 | 
 53 |         self.output_upscaling = nn.Sequential(
 54 |             nn.ConvTranspose2d(transformer_dim, transformer_dim // 4, kernel_size=2, stride=2),
 55 |             LayerNorm2d(transformer_dim // 4),
 56 |             activation(),
 57 |             nn.ConvTranspose2d(transformer_dim // 4, transformer_dim // 8, kernel_size=2, stride=2),
 58 |             activation(),
 59 |         )
 60 |         self.output_hypernetworks_mlps = nn.ModuleList(
 61 |             [
 62 |                 MLP(transformer_dim, transformer_dim, transformer_dim // 8, 3)
 63 |                 for i in range(self.num_mask_tokens)
 64 |             ]
 65 |         )
 66 | 
 67 |         self.iou_prediction_head = MLP(
 68 |             transformer_dim, iou_head_hidden_dim, self.num_mask_tokens, iou_head_depth
 69 |         )
 70 | 
 71 |     def forward(
 72 |         self,
 73 |         image_embeddings: torch.Tensor,
 74 |         image_pe: torch.Tensor,
 75 |         sparse_prompt_embeddings: torch.Tensor,
 76 |         dense_prompt_embeddings: torch.Tensor,
 77 |         multimask_output: bool,
 78 |         hq_token_only: bool,
 79 |         interm_embeddings: torch.Tensor,
 80 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
 81 |         """
 82 |         Predict masks given image and prompt embeddings.
 83 | 
 84 |         Arguments:
 85 |           image_embeddings (torch.Tensor): the embeddings from the image encoder
 86 |           image_pe (torch.Tensor): positional encoding with the shape of image_embeddings
 87 |           sparse_prompt_embeddings (torch.Tensor): the embeddings of the points and boxes
 88 |           dense_prompt_embeddings (torch.Tensor): the embeddings of the mask inputs
 89 |           multimask_output (bool): Whether to return multiple masks or a single
 90 |             mask.
 91 | 
 92 |         Returns:
 93 |           torch.Tensor: batched predicted masks
 94 |           torch.Tensor: batched predictions of mask quality
 95 |         """
 96 |         masks, iou_pred = self.predict_masks(
 97 |             image_embeddings=image_embeddings,
 98 |             image_pe=image_pe,
 99 |             sparse_prompt_embeddings=sparse_prompt_embeddings,
100 |             dense_prompt_embeddings=dense_prompt_embeddings,
101 |         )
102 | 
103 |         # Select the correct mask or masks for output
104 |         if multimask_output:
105 |             mask_slice = slice(1, None)
106 |         else:
107 |             mask_slice = slice(0, 1)
108 |         masks = masks[:, mask_slice, :, :]
109 |         iou_pred = iou_pred[:, mask_slice]
110 | 
111 |         # Prepare output
112 |         return masks, iou_pred
113 | 
114 |     def predict_masks(
115 |         self,
116 |         image_embeddings: torch.Tensor,
117 |         image_pe: torch.Tensor,
118 |         sparse_prompt_embeddings: torch.Tensor,
119 |         dense_prompt_embeddings: torch.Tensor,
120 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
121 |         """Predicts masks. See 'forward' for more details."""
122 |         # Concatenate output tokens
123 |         output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
124 |         output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.size(0), -1, -1)
125 |         tokens = torch.cat((output_tokens, sparse_prompt_embeddings), dim=1)
126 | 
127 |         # Expand per-image data in batch direction to be per-mask
128 |         src = torch.repeat_interleave(image_embeddings, tokens.shape[0], dim=0)
129 |         src = src + dense_prompt_embeddings
130 |         pos_src = torch.repeat_interleave(image_pe, tokens.shape[0], dim=0)
131 |         b, c, h, w = src.shape
132 | 
133 |         # Run the transformer
134 |         hs, src = self.transformer(src, pos_src, tokens)
135 |         iou_token_out = hs[:, 0, :]
136 |         mask_tokens_out = hs[:, 1 : (1 + self.num_mask_tokens), :]
137 | 
138 |         # Upscale mask embeddings and predict masks using the mask tokens
139 |         src = src.transpose(1, 2).view(b, c, h, w)
140 |         upscaled_embedding = self.output_upscaling(src)
141 |         hyper_in_list: List[torch.Tensor] = []
142 |         for i in range(self.num_mask_tokens):
143 |             hyper_in_list.append(self.output_hypernetworks_mlps[i](mask_tokens_out[:, i, :]))
144 |         hyper_in = torch.stack(hyper_in_list, dim=1)
145 |         b, c, h, w = upscaled_embedding.shape
146 |         masks = (hyper_in @ upscaled_embedding.view(b, c, h * w)).view(b, -1, h, w)
147 | 
148 |         # Generate mask quality predictions
149 |         iou_pred = self.iou_prediction_head(iou_token_out)
150 | 
151 |         return masks, iou_pred
152 | 
153 | 
154 | # Lightly adapted from
155 | # https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py # noqa
156 | class MLP(nn.Module):
157 |     def __init__(
158 |         self,
159 |         input_dim: int,
160 |         hidden_dim: int,
161 |         output_dim: int,
162 |         num_layers: int,
163 |         sigmoid_output: bool = False,
164 |     ) -> None:
165 |         super().__init__()
166 |         self.num_layers = num_layers
167 |         h = [hidden_dim] * (num_layers - 1)
168 |         self.layers = nn.ModuleList(
169 |             nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])
170 |         )
171 |         self.sigmoid_output = sigmoid_output
172 | 
173 |     def forward(self, x):
174 |         for i, layer in enumerate(self.layers):
175 |             x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
176 |         if self.sigmoid_output:
177 |             x = F.sigmoid(x)
178 |         return x


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/modeling/sam.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | from torch import nn
  9 | from torch.nn import functional as F
 10 | 
 11 | from typing import Any, Dict, List, Tuple
 12 | 
 13 | from .image_encoder import ImageEncoderViT
 14 | from .mask_decoder import MaskDecoder
 15 | from .prompt_encoder import PromptEncoder
 16 | 
 17 | 
 18 | class Sam(nn.Module):
 19 |     mask_threshold: float = 0.0
 20 |     image_format: str = "RGB"
 21 | 
 22 |     def __init__(
 23 |         self,
 24 |         image_encoder: ImageEncoderViT,
 25 |         prompt_encoder: PromptEncoder,
 26 |         mask_decoder: MaskDecoder,
 27 |         pixel_mean: List[float] = [123.675, 116.28, 103.53],
 28 |         pixel_std: List[float] = [58.395, 57.12, 57.375],
 29 |     ) -> None:
 30 |         """
 31 |         SAM predicts object masks from an image and input prompts.
 32 | 
 33 |         Arguments:
 34 |           image_encoder (ImageEncoderViT): The backbone used to encode the
 35 |             image into image embeddings that allow for efficient mask prediction.
 36 |           prompt_encoder (PromptEncoder): Encodes various types of input prompts.
 37 |           mask_decoder (MaskDecoder): Predicts masks from the image embeddings
 38 |             and encoded prompts.
 39 |           pixel_mean (list(float)): Mean values for normalizing pixels in the input image.
 40 |           pixel_std (list(float)): Std values for normalizing pixels in the input image.
 41 |         """
 42 |         super().__init__()
 43 |         self.image_encoder = image_encoder
 44 |         self.prompt_encoder = prompt_encoder
 45 |         self.mask_decoder = mask_decoder
 46 |         self.register_buffer("pixel_mean", torch.Tensor(pixel_mean).view(-1, 1, 1), False)
 47 |         self.register_buffer("pixel_std", torch.Tensor(pixel_std).view(-1, 1, 1), False)
 48 | 
 49 |     @property
 50 |     def device(self) -> Any:
 51 |         return self.pixel_mean.device
 52 | 
 53 |     @torch.no_grad()
 54 |     def forward(
 55 |         self,
 56 |         batched_input: List[Dict[str, Any]],
 57 |         multimask_output: bool,
 58 |     ) -> List[Dict[str, torch.Tensor]]:
 59 |         """
 60 |         Predicts masks end-to-end from provided images and prompts.
 61 |         If prompts are not known in advance, using SamPredictor is
 62 |         recommended over calling the model directly.
 63 | 
 64 |         Arguments:
 65 |           batched_input (list(dict)): A list over input images, each a
 66 |             dictionary with the following keys. A prompt key can be
 67 |             excluded if it is not present.
 68 |               'image': The image as a torch tensor in 3xHxW format,
 69 |                 already transformed for input to the model.
 70 |               'original_size': (tuple(int, int)) The original size of
 71 |                 the image before transformation, as (H, W).
 72 |               'point_coords': (torch.Tensor) Batched point prompts for
 73 |                 this image, with shape BxNx2. Already transformed to the
 74 |                 input frame of the model.
 75 |               'point_labels': (torch.Tensor) Batched labels for point prompts,
 76 |                 with shape BxN.
 77 |               'boxes': (torch.Tensor) Batched box inputs, with shape Bx4.
 78 |                 Already transformed to the input frame of the model.
 79 |               'mask_inputs': (torch.Tensor) Batched mask inputs to the model,
 80 |                 in the form Bx1xHxW.
 81 |           multimask_output (bool): Whether the model should predict multiple
 82 |             disambiguating masks, or return a single mask.
 83 | 
 84 |         Returns:
 85 |           (list(dict)): A list over input images, where each element is
 86 |             as dictionary with the following keys.
 87 |               'masks': (torch.Tensor) Batched binary mask predictions,
 88 |                 with shape BxCxHxW, where B is the number of input promts,
 89 |                 C is determiend by multimask_output, and (H, W) is the
 90 |                 original size of the image.
 91 |               'iou_predictions': (torch.Tensor) The model's predictions
 92 |                 of mask quality, in shape BxC.
 93 |               'low_res_logits': (torch.Tensor) Low resolution logits with
 94 |                 shape BxCxHxW, where H=W=256. Can be passed as mask input
 95 |                 to subsequent iterations of prediction.
 96 |         """
 97 |         input_images = torch.stack([self.preprocess(x["image"]) for x in batched_input], dim=0)
 98 |         image_embeddings = self.image_encoder(input_images)
 99 | 
100 |         outputs = []
101 |         for image_record, curr_embedding in zip(batched_input, image_embeddings):
102 |             if "point_coords" in image_record:
103 |                 points = (image_record["point_coords"], image_record["point_labels"])
104 |             else:
105 |                 points = None
106 |             sparse_embeddings, dense_embeddings = self.prompt_encoder(
107 |                 points=points,
108 |                 boxes=image_record.get("boxes", None),
109 |                 masks=image_record.get("mask_inputs", None),
110 |             )
111 |             low_res_masks, iou_predictions = self.mask_decoder(
112 |                 image_embeddings=curr_embedding.unsqueeze(0),
113 |                 image_pe=self.prompt_encoder.get_dense_pe(),
114 |                 sparse_prompt_embeddings=sparse_embeddings,
115 |                 dense_prompt_embeddings=dense_embeddings,
116 |                 multimask_output=multimask_output,
117 |             )
118 |             masks = self.postprocess_masks(
119 |                 low_res_masks,
120 |                 input_size=image_record["image"].shape[-2:],
121 |                 original_size=image_record["original_size"],
122 |             )
123 |             masks = masks > self.mask_threshold
124 |             outputs.append(
125 |                 {
126 |                     "masks": masks,
127 |                     "iou_predictions": iou_predictions,
128 |                     "low_res_logits": low_res_masks,
129 |                 }
130 |             )
131 |         return outputs
132 | 
133 |     def postprocess_masks(
134 |         self,
135 |         masks: torch.Tensor,
136 |         input_size: Tuple[int, ...],
137 |         original_size: Tuple[int, ...],
138 |     ) -> torch.Tensor:
139 |         """
140 |         Remove padding and upscale masks to the original image size.
141 | 
142 |         Arguments:
143 |           masks (torch.Tensor): Batched masks from the mask_decoder,
144 |             in BxCxHxW format.
145 |           input_size (tuple(int, int)): The size of the image input to the
146 |             model, in (H, W) format. Used to remove padding.
147 |           original_size (tuple(int, int)): The original size of the image
148 |             before resizing for input to the model, in (H, W) format.
149 | 
150 |         Returns:
151 |           (torch.Tensor): Batched masks in BxCxHxW format, where (H, W)
152 |             is given by original_size.
153 |         """
154 |         masks = F.interpolate(
155 |             masks,
156 |             (self.image_encoder.img_size, self.image_encoder.img_size),
157 |             mode="bilinear",
158 |             align_corners=False,
159 |         )
160 |         masks = masks[..., : input_size[0], : input_size[1]]
161 |         masks = F.interpolate(masks, original_size, mode="bilinear", align_corners=False)
162 |         return masks
163 | 
164 |     def preprocess(self, x: torch.Tensor) -> torch.Tensor:
165 |         """Normalize pixel values and pad to a square input."""
166 |         # Normalize colors
167 |         x = (x - self.pixel_mean) / self.pixel_std
168 | 
169 |         # Pad
170 |         h, w = x.shape[-2:]
171 |         padh = self.image_encoder.img_size - h
172 |         padw = self.image_encoder.img_size - w
173 |         x = F.pad(x, (0, padw, 0, padh))
174 |         return x
175 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/scripts/amg.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import cv2  # type: ignore
  8 | 
  9 | from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
 10 | 
 11 | import argparse
 12 | import json
 13 | import os
 14 | from typing import Any, Dict, List
 15 | 
 16 | parser = argparse.ArgumentParser(
 17 |     description=(
 18 |         "Runs automatic mask generation on an input image or directory of images, "
 19 |         "and outputs masks as either PNGs or COCO-style RLEs. Requires open-cv, "
 20 |         "as well as pycocotools if saving in RLE format."
 21 |     )
 22 | )
 23 | 
 24 | parser.add_argument(
 25 |     "--input",
 26 |     type=str,
 27 |     required=True,
 28 |     help="Path to either a single input image or folder of images.",
 29 | )
 30 | 
 31 | parser.add_argument(
 32 |     "--output",
 33 |     type=str,
 34 |     required=True,
 35 |     help=(
 36 |         "Path to the directory where masks will be output. Output will be either a folder "
 37 |         "of PNGs per image or a single json with COCO-style masks."
 38 |     ),
 39 | )
 40 | 
 41 | parser.add_argument(
 42 |     "--model-type",
 43 |     type=str,
 44 |     default="default",
 45 |     help="The type of model to load, in ['default', 'vit_l', 'vit_b']",
 46 | )
 47 | 
 48 | parser.add_argument(
 49 |     "--checkpoint",
 50 |     type=str,
 51 |     required=True,
 52 |     help="The path to the SAM checkpoint to use for mask generation.",
 53 | )
 54 | 
 55 | parser.add_argument("--device", type=str, default="cuda", help="The device to run generation on.")
 56 | 
 57 | parser.add_argument(
 58 |     "--convert-to-rle",
 59 |     action="store_true",
 60 |     help=(
 61 |         "Save masks as COCO RLEs in a single json instead of as a folder of PNGs. "
 62 |         "Requires pycocotools."
 63 |     ),
 64 | )
 65 | 
 66 | amg_settings = parser.add_argument_group("AMG Settings")
 67 | 
 68 | amg_settings.add_argument(
 69 |     "--points-per-side",
 70 |     type=int,
 71 |     default=None,
 72 |     help="Generate masks by sampling a grid over the image with this many points to a side.",
 73 | )
 74 | 
 75 | amg_settings.add_argument(
 76 |     "--points-per-batch",
 77 |     type=int,
 78 |     default=None,
 79 |     help="How many input points to process simultaneously in one batch.",
 80 | )
 81 | 
 82 | amg_settings.add_argument(
 83 |     "--pred-iou-thresh",
 84 |     type=float,
 85 |     default=None,
 86 |     help="Exclude masks with a predicted score from the model that is lower than this threshold.",
 87 | )
 88 | 
 89 | amg_settings.add_argument(
 90 |     "--stability-score-thresh",
 91 |     type=float,
 92 |     default=None,
 93 |     help="Exclude masks with a stability score lower than this threshold.",
 94 | )
 95 | 
 96 | amg_settings.add_argument(
 97 |     "--stability-score-offset",
 98 |     type=float,
 99 |     default=None,
100 |     help="Larger values perturb the mask more when measuring stability score.",
101 | )
102 | 
103 | amg_settings.add_argument(
104 |     "--box-nms-thresh",
105 |     type=float,
106 |     default=None,
107 |     help="The overlap threshold for excluding a duplicate mask.",
108 | )
109 | 
110 | amg_settings.add_argument(
111 |     "--crop-n-layers",
112 |     type=int,
113 |     default=None,
114 |     help=(
115 |         "If >0, mask generation is run on smaller crops of the image to generate more masks. "
116 |         "The value sets how many different scales to crop at."
117 |     ),
118 | )
119 | 
120 | amg_settings.add_argument(
121 |     "--crop-nms-thresh",
122 |     type=float,
123 |     default=None,
124 |     help="The overlap threshold for excluding duplicate masks across different crops.",
125 | )
126 | 
127 | amg_settings.add_argument(
128 |     "--crop-overlap-ratio",
129 |     type=int,
130 |     default=None,
131 |     help="Larger numbers mean image crops will overlap more.",
132 | )
133 | 
134 | amg_settings.add_argument(
135 |     "--crop-n-points-downscale-factor",
136 |     type=int,
137 |     default=None,
138 |     help="The number of points-per-side in each layer of crop is reduced by this factor.",
139 | )
140 | 
141 | amg_settings.add_argument(
142 |     "--min-mask-region-area",
143 |     type=int,
144 |     default=None,
145 |     help=(
146 |         "Disconnected mask regions or holes with area smaller than this value "
147 |         "in pixels are removed by postprocessing."
148 |     ),
149 | )
150 | 
151 | 
152 | def write_masks_to_folder(masks: List[Dict[str, Any]], path: str) -> None:
153 |     header = "id,area,bbox_x0,bbox_y0,bbox_w,bbox_h,point_input_x,point_input_y,predicted_iou,stability_score,crop_box_x0,crop_box_y0,crop_box_w,crop_box_h"  # noqa
154 |     metadata = [header]
155 |     for i, mask_data in enumerate(masks):
156 |         mask = mask_data["segmentation"]
157 |         filename = f"{i}.png"
158 |         cv2.imwrite(os.path.join(path, filename), mask * 255)
159 |         mask_metadata = [
160 |             str(i),
161 |             str(mask_data["area"]),
162 |             *[str(x) for x in mask_data["bbox"]],
163 |             *[str(x) for x in mask_data["point_coords"][0]],
164 |             str(mask_data["predicted_iou"]),
165 |             str(mask_data["stability_score"]),
166 |             *[str(x) for x in mask_data["crop_box"]],
167 |         ]
168 |         row = ",".join(mask_metadata)
169 |         metadata.append(row)
170 |     metadata_path = os.path.join(path, "metadata.csv")
171 |     with open(metadata_path, "w") as f:
172 |         f.write("\n".join(metadata))
173 | 
174 |     return
175 | 
176 | 
177 | def get_amg_kwargs(args):
178 |     amg_kwargs = {
179 |         "points_per_side": args.points_per_side,
180 |         "points_per_batch": args.points_per_batch,
181 |         "pred_iou_thresh": args.pred_iou_thresh,
182 |         "stability_score_thresh": args.stability_score_thresh,
183 |         "stability_score_offset": args.stability_score_offset,
184 |         "box_nms_thresh": args.box_nms_thresh,
185 |         "crop_n_layers": args.crop_n_layers,
186 |         "crop_nms_thresh": args.crop_nms_thresh,
187 |         "crop_overlap_ratio": args.crop_overlap_ratio,
188 |         "crop_n_points_downscale_factor": args.crop_n_points_downscale_factor,
189 |         "min_mask_region_area": args.min_mask_region_area,
190 |     }
191 |     amg_kwargs = {k: v for k, v in amg_kwargs.items() if v is not None}
192 |     return amg_kwargs
193 | 
194 | 
195 | def main(args: argparse.Namespace) -> None:
196 |     print("Loading model...")
197 |     sam = sam_model_registry[args.model_type](checkpoint=args.checkpoint)
198 |     _ = sam.to(device=args.device)
199 |     output_mode = "coco_rle" if args.convert_to_rle else "binary_mask"
200 |     amg_kwargs = get_amg_kwargs(args)
201 |     generator = SamAutomaticMaskGenerator(sam, output_mode=output_mode, **amg_kwargs)
202 | 
203 |     if not os.path.isdir(args.input):
204 |         targets = [args.input]
205 |     else:
206 |         targets = [
207 |             f for f in os.listdir(args.input) if not os.path.isdir(os.path.join(args.input, f))
208 |         ]
209 |         targets = [os.path.join(args.input, f) for f in targets]
210 | 
211 |     os.makedirs(args.output, exist_ok=True)
212 | 
213 |     for t in targets:
214 |         print(f"Processing '{t}'...")
215 |         image = cv2.imread(t)
216 |         if image is None:
217 |             print(f"Could not load '{t}' as an image, skipping...")
218 |             continue
219 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
220 | 
221 |         masks = generator.generate(image)
222 | 
223 |         base = os.path.basename(t)
224 |         base = os.path.splitext(base)[0]
225 |         save_base = os.path.join(args.output, base)
226 |         if output_mode == "binary_mask":
227 |             os.makedirs(save_base, exist_ok=False)
228 |             write_masks_to_folder(masks, save_base)
229 |         else:
230 |             save_file = save_base + ".json"
231 |             with open(save_file, "w") as f:
232 |                 json.dump(masks, f)
233 |     print("Done!")
234 | 
235 | 
236 | if __name__ == "__main__":
237 |     args = parser.parse_args()
238 |     main(args)
239 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h2 align="center">
  2 |   <b>Open6DOR: Open6DOR: Benchmarking Open-instruction 6-DoF Object Rearrangement and A VLM-based Approach</b>
  3 | 
  4 |   <b><i>IROS 2024</i></b>
  5 | 
  6 | 
  7 | <div align="center">
  8 |     <a href="" target="_blank">
  9 |     <img src="https://img.shields.io/badge/IROS 2024-Oral-red"></a>
 10 |     <a href="" target="_blank">
 11 |     <img src="https://img.shields.io/badge/Paper-arXiv-green" alt="Paper arXiv"></a>
 12 |     <a href="https://pku-epic.github.io/Open6DOR/" target="_blank">
 13 |     <img src="https://img.shields.io/badge/Page-Open6DOR-blue" alt="Project Page"/></a>
 14 | </div>
 15 | </h2>
 16 | 
 17 | 
 18 | ![Teaser](./images/teaser_final1.jpg)
 19 | This is the official repository of [Open6DOR: Benchmarking Open-instruction 6-DoF Object Rearrangement and A VLM-based Approach](https://pku-epic.github.io/Open6DOR/). In this work, we propel the pioneer construction of the benchmark and approach for table-top Open-instruction 6-DoF Object Rearrangement (Open6DOR). Specifically, we collect a synthetic dataset of 200+ objects and carefully design 2400+ Open6DOR tasks. These tasks are divided into the Position-track, Rotation-track, and 6-DoF-track for evaluating different embodied agents in predicting the positions and rotations of target objects. Besides, we also propose a VLM-based approach for Open6DOR, named Open6DOR-GPT, which empowers GPT-4V with 3D-awareness and simulation-assistance while exploiting its strengths in generalizability and instruction-following for this task. We compare the existing embodied agents with our Open6DOR-GPT on the proposed Open6DOR benchmark and find that Open6DOR-GPT achieves the state-of-the-art performance. We further show the impressive performance of Open6DOR-GPT in diverse real-world experiments.
 20 | We plan to release the final version of the benchmark, along with our refined method, in early September, and we recommend waiting until then to download the dataset.
 21 | 
 22 | ## News
 23 | - We update method with gpt-4o api. See Method folder.
 24 | 
 25 | ## Benchmark
 26 | The Open6DOR Benchmark is specifically designed for table-top Open6DOR tasks within a simulation environment. Our dataset encompasses 200+ high-quality objects, forming diverse scenes and totaling 2400+ diverse tasks. All tasks are carefully configured and accompanied by detailed annotations. To ensure comprehensive evaluation, we provide three specialized tracks of benchmark: the Rotation-track Benchmark ($B_r$), the Position-track benchmark ($B_p$), and the 6-DoF-track Benchmark ($B_\text{6DoF}$). 
 27 | In this repository, we provide:
 28 | - A dataset of diverse objects
 29 | - 2400+ Open6DOR tasks with detailed annotations
 30 | - A set of evaluation metrics for each track of tasks
 31 | 
 32 | 
 33 | ### Installation
 34 | **Environment Setup**
 35 | 
 36 | We recommend using Linux system for better compatability with our modules (including Blender and Isaacgym).
 37 | ```
 38 | # Clone the repository
 39 | git clone git@github.com:Selina2023/Open6DOR.git
 40 | cd Open6DOR
 41 | # Create an environment
 42 | conda create -n Open6DOR python=3.9
 43 | # Install dependencies
 44 | pip install -r requirements.txt
 45 | ```
 46 | 
 47 | 
 48 | **Dataset Downloads**
 49 | 
 50 | Refer to the subsequent section for specific file locations.
 51 | - Download the [object datasets](https://drive.google.com/drive/folders/1Gm30OtQWRb5NitIdnLSJlfLdAG_rWHQX?usp=sharing) and uncompress.
 52 | - Download the [task datasets](https://drive.google.com/drive/folders/11o2I20Q8uJrSXO_JvnbH7dEoR43V9fKa?usp=sharing) and uncompress. (The refined version will be released along with our paper)
 53 | 
 54 | **Rendering Dependencies**
 55 | 
 56 | - Download [Blender 2.93.3 (Linux x64)](https://download.blender.org/release/Blender2.93/blender-2.93.3-linux-x64.tar.xz) and uncompress.
 57 | - Download the [environment map asset](https://drive.google.com/file/d/1qbXc-fT04GcLqZX6D1WhbEtQo_Uav-FL/view?usp=sharing) and uncompress.
 58 | - Download the [.blend file](https://drive.google.com/file/d/1Rg9fHn9D9RcNt1XFTvHP-RRa73lgzspF/view?usp=sharing) and uncompress.
 59 | - Install the Python packages (Numpy, etc.) into the Blender built-in Python environment. 
 60 | ```
 61 | cd Benchmark/renderer/blender-2.93.3-linux-x64/2.93/python/bin
 62 | ./python3.9 -m ensurepip
 63 | ./python3.9 -m pip install --upgrade pip --user
 64 | ./python3.9 -m pip install numpy --user
 65 | ```
 66 | <!-- **Simulator Dependencies** -->
 67 | 
 68 | 
 69 | **File Structure**
 70 | 
 71 | After downloading the datasets, organize the file structure as follows:
 72 | 
 73 | ```
 74 | Benchmark
 75 | ├── benchmark_catalogue                              
 76 | │   ├── annotation
 77 | │   │   └── ...
 78 | │   ├── category_dictionary.json
 79 | │   └── ...
 80 | ├── dataset
 81 | │   ├── objects
 82 | │   │   ├── objaverse_rescale
 83 | │   │   └── ycb
 84 | │   └── tasks
 85 | │       ├── 6DoF_track
 86 | │       ├── position_track
 87 | │       └── rotation_track
 88 | ├── evaluation
 89 | │   └── evaluator.py
 90 | ├── renderer
 91 | │   ├── blender-2.93.3-linux-x64
 92 | │   ├── envmap_lib                                
 93 | │   │   ├── abandoned_factory_canteen_01_1k.hdr
 94 | │   │   └── ...
 95 | │   ├── texture
 96 | │   │   └── texture0.jpg
 97 | │   ├── material_lib_v2.blend
 98 | │   ├── modify_material.py
 99 | │   └── open6dor_renderer.py
100 | ├── task_examples
101 | │   ├── 6DoF
102 | │   ├── position
103 | │   └── rotation
104 | └── bench.py
105 | 
106 | ```
107 | 
108 | ### Usage
109 | Along with the dataset, we provide several functions to enable visualization and evaluation of the tasks:
110 | - To load a task example, run the following command (you may change the image_mode to RENDER_IMAGE_BLENDER or others):
111 | ```
112 | cd Benchmark
113 | python bench.py load_task --task_path ./task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/task_config.json --image_mode GIVEN_IMAGE_ISAACGYM --output_path ./output/test 
114 | ```
115 | For personalized rendering, you may try arbitrary camera positions and background settings:
116 | ```
117 | python bench.py load_task --task_path ./task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/task_config.json --image_mode RENDER_IMAGE_BLENDER --cam_quaternion 0 0 0.0 1.0 --cam_translation 0.0 0.0 4 --background_material_id 44 --env_map_id 25
118 | ```
119 | - To evaluate the task, run the following command（ you need to fill the predicted pose into a json file）:
120 | 
121 | ```
122 | python bench.py eval_task --task_id my_test --pred_pose path/to/pred_pose.json
123 | ```
124 | 
125 | - Besides evaluating the numerical results of the pose prediction directly, we provide another set of metrics where users are allowed to control the robot arm and interact with the simulation environment. Such evaluation is soely based on the final pose of the target object after execution. To do this, run the following command (currently not available):
126 | 
127 | ```
128 | python interaction.py 
129 | ```
130 | <!-- 
131 | ### Format Conventions
132 | ```
133 | rotation matrix?
134 | 13-float list?
135 | ``` -->
136 | 
137 | ## Method
138 | ![Method](./images/overall_pipeline_final1.jpg)
139 | By incorporating 3D awareness and simulation assistance, we effectively tackle the Open6DOR task through a decomposed approach. 
140 | Specifically, Open6DOR-GPT takes the RGB-D image and instruction as input and outputs the corresponding robot motion trajectory. Firstly, the preprocessing module extracts the object names and masks. Then, the two modules simultaneously predict the position and rotation of the target object in a decoupled way. Finally, the planning module generates a trajectory for execution.
141 | 
142 | See README in Method folder.
143 | 
144 | <!-- 
145 | ```bash
146 | cd vision/GroundedSAM/GroundingDINO
147 | pip install -e .
148 | cd ../segment_anything
149 | pip install -e .
150 | cd ../../..
151 | ```
152 |  -->
153 | 
154 | 
155 | 
156 | <!-- ## Troubleshooting
157 | 
158 | - requests.exceptions.ConnectionError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-uncased/resolve/main/tf_model.h5 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f4769a3cc40>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
159 |     - Solution: Network error, In China, try global proxy. -->
160 | 
161 | ## Contact
162 | For further details or questions, please feel free to contact us:
163 | - [Yufei Ding](https://selina2023.github.io/): selina@stu.pku.edu.cn
164 | - [Haoran Geng](https://geng-haoran.github.io/): ghr@berkeley.edu
165 | - [He Wang](https://hughw19.github.io/): hewang@pku.edu.cn
166 | 


--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/grounded_sam_demo.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import copy
  4 | 
  5 | import numpy as np
  6 | import json
  7 | import torch
  8 | from PIL import Image, ImageDraw, ImageFont
  9 | 
 10 | # Grounding DINO
 11 | import GroundingDINO.groundingdino.datasets.transforms as T
 12 | from GroundingDINO.groundingdino.models import build_model
 13 | from GroundingDINO.groundingdino.util import box_ops
 14 | from GroundingDINO.groundingdino.util.slconfig import SLConfig
 15 | from GroundingDINO.groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap
 16 | 
 17 | # segment anything
 18 | from segment_anything.segment_anything import (
 19 |     sam_model_registry,
 20 |     sam_hq_model_registry,
 21 |     SamPredictor
 22 | )
 23 | import cv2
 24 | import numpy as np
 25 | import matplotlib.pyplot as plt
 26 | 
 27 | 
 28 | def load_image(image_path):
 29 |     # load image
 30 |     image_pil = Image.open(image_path).convert("RGB")  # load image
 31 | 
 32 |     transform = T.Compose(
 33 |         [
 34 |             T.RandomResize([800], max_size=1333),
 35 |             T.ToTensor(),
 36 |             T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
 37 |         ]
 38 |     )
 39 |     image, _ = transform(image_pil, None)  # 3, h, w
 40 |     return image_pil, image
 41 | 
 42 | 
 43 | def load_model(model_config_path, model_checkpoint_path, device):
 44 |     args = SLConfig.fromfile(model_config_path)
 45 |     args.device = device
 46 |     model = build_model(args)
 47 |     checkpoint = torch.load(model_checkpoint_path, map_location="cpu")
 48 |     load_res = model.load_state_dict(clean_state_dict(checkpoint["model"]), strict=False)
 49 |     print(load_res)
 50 |     _ = model.eval()
 51 |     return model
 52 | 
 53 | 
 54 | def get_grounding_output(model, image, caption, box_threshold, text_threshold, with_logits=True, device="cpu"):
 55 |     caption = caption.lower()
 56 |     caption = caption.strip()
 57 |     if not caption.endswith("."):
 58 |         caption = caption + "."
 59 |     model = model.to(device)
 60 |     image = image.to(device)
 61 |     with torch.no_grad():
 62 |         outputs = model(image[None], captions=[caption])
 63 |     logits = outputs["pred_logits"].cpu().sigmoid()[0]  # (nq, 256)
 64 |     boxes = outputs["pred_boxes"].cpu()[0]  # (nq, 4)
 65 |     logits.shape[0]
 66 | 
 67 |     # filter output
 68 |     logits_filt = logits.clone()
 69 |     boxes_filt = boxes.clone()
 70 |     filt_mask = logits_filt.max(dim=1)[0] > box_threshold
 71 |     logits_filt = logits_filt[filt_mask]  # num_filt, 256
 72 |     boxes_filt = boxes_filt[filt_mask]  # num_filt, 4
 73 |     logits_filt.shape[0]
 74 | 
 75 |     # get phrase
 76 |     tokenlizer = model.tokenizer
 77 |     tokenized = tokenlizer(caption)
 78 |     # build pred
 79 |     pred_phrases = []
 80 |     for logit, box in zip(logits_filt, boxes_filt):
 81 |         pred_phrase = get_phrases_from_posmap(logit > text_threshold, tokenized, tokenlizer)
 82 |         if with_logits:
 83 |             pred_phrases.append(pred_phrase + f"({str(logit.max().item())[:4]})")
 84 |         else:
 85 |             pred_phrases.append(pred_phrase)
 86 | 
 87 |     return boxes_filt, pred_phrases
 88 | 
 89 | def show_mask(mask, ax, random_color=False):
 90 |     if random_color:
 91 |         color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
 92 |     else:
 93 |         color = np.array([30/255, 144/255, 255/255, 0.6])
 94 |     h, w = mask.shape[-2:]
 95 |     mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
 96 |     ax.imshow(mask_image)
 97 | 
 98 | 
 99 | def show_box(box, ax, label):
100 |     x0, y0 = box[0], box[1]
101 |     w, h = box[2] - box[0], box[3] - box[1]
102 |     ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))
103 |     ax.text(x0, y0, label)
104 | 
105 | 
106 | def save_mask_data(output_dir, mask_list, box_list, label_list):
107 |     value = 0  # 0 for background
108 | 
109 |     mask_img = torch.zeros(mask_list.shape[-2:])
110 |     for idx, mask in enumerate(mask_list):
111 |         mask_img[mask.cpu().numpy()[0] == True] = value + idx + 1
112 |     plt.figure(figsize=(10, 10))
113 |     plt.imshow(mask_img.numpy())
114 |     plt.axis('off')
115 |     plt.savefig(os.path.join(output_dir, 'mask.jpg'), bbox_inches="tight", dpi=300, pad_inches=0.0)
116 | 
117 |     json_data = [{
118 |         'value': value,
119 |         'label': 'background'
120 |     }]
121 |     for label, box in zip(label_list, box_list):
122 |         value += 1
123 |         name, logit = label.split('(')
124 |         logit = logit[:-1] # the last is ')'
125 |         json_data.append({
126 |             'value': value,
127 |             'label': name,
128 |             'logit': float(logit),
129 |             'box': box.numpy().tolist(),
130 |         })
131 |     with open(os.path.join(output_dir, 'mask.json'), 'w') as f:
132 |         json.dump(json_data, f)
133 | 
134 | 
135 | if __name__ == "__main__":
136 | 
137 |     parser = argparse.ArgumentParser("Grounded-Segment-Anything Demo", add_help=True)
138 |     parser.add_argument("--config", type=str, required=True, help="path to config file")
139 |     parser.add_argument(
140 |         "--grounded_checkpoint", type=str, required=True, help="path to checkpoint file"
141 |     )
142 |     parser.add_argument(
143 |         "--sam_version", type=str, default="vit_h", required=False, help="SAM ViT version: vit_b / vit_l / vit_h"
144 |     )
145 |     parser.add_argument(
146 |         "--sam_checkpoint", type=str, required=False, help="path to sam checkpoint file"
147 |     )
148 |     parser.add_argument(
149 |         "--sam_hq_checkpoint", type=str, default=None, help="path to sam-hq checkpoint file"
150 |     )
151 |     parser.add_argument(
152 |         "--use_sam_hq", action="store_true", help="using sam-hq for prediction"
153 |     )
154 |     parser.add_argument("--input_image", type=str, required=True, help="path to image file")
155 |     parser.add_argument("--text_prompt", type=str, required=True, help="text prompt")
156 |     parser.add_argument(
157 |         "--output_dir", "-o", type=str, default="outputs", required=True, help="output directory"
158 |     )
159 | 
160 |     parser.add_argument("--box_threshold", type=float, default=0.3, help="box threshold")
161 |     parser.add_argument("--text_threshold", type=float, default=0.25, help="text threshold")
162 | 
163 |     parser.add_argument("--device", type=str, default="cpu", help="running on cpu only!, default=False")
164 |     args = parser.parse_args()
165 | 
166 |     # cfg
167 |     config_file = args.config  # change the path of the model config file
168 |     grounded_checkpoint = args.grounded_checkpoint  # change the path of the model
169 |     sam_version = args.sam_version
170 |     sam_checkpoint = args.sam_checkpoint
171 |     sam_hq_checkpoint = args.sam_hq_checkpoint
172 |     use_sam_hq = args.use_sam_hq
173 |     image_path = args.input_image
174 |     text_prompt = args.text_prompt
175 |     output_dir = args.output_dir
176 |     box_threshold = args.box_threshold
177 |     text_threshold = args.text_threshold
178 |     device = args.device
179 | 
180 |     # make dir
181 |     os.makedirs(output_dir, exist_ok=True)
182 |     # load image
183 |     image_pil, image = load_image(image_path)
184 |     # load model
185 |     model = load_model(config_file, grounded_checkpoint, device=device)
186 | 
187 |     # visualize raw image
188 |     image_pil.save(os.path.join(output_dir, "raw_image.jpg"))
189 | 
190 |     # run grounding dino model
191 |     boxes_filt, pred_phrases = get_grounding_output(
192 |         model, image, text_prompt, box_threshold, text_threshold, device=device
193 |     )
194 | 
195 |     # initialize SAM
196 |     if use_sam_hq:
197 |         predictor = SamPredictor(sam_hq_model_registry[sam_version](checkpoint=sam_hq_checkpoint).to(device))
198 |     else:
199 |         predictor = SamPredictor(sam_model_registry[sam_version](checkpoint=sam_checkpoint).to(device))
200 |     image = cv2.imread(image_path)
201 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
202 |     predictor.set_image(image)
203 | 
204 |     size = image_pil.size
205 |     H, W = size[1], size[0]
206 |     for i in range(boxes_filt.size(0)):
207 |         boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
208 |         boxes_filt[i][:2] -= boxes_filt[i][2:] / 2
209 |         boxes_filt[i][2:] += boxes_filt[i][:2]
210 | 
211 |     boxes_filt = boxes_filt.cpu()
212 |     transformed_boxes = predictor.transform.apply_boxes_torch(boxes_filt, image.shape[:2]).to(device)
213 | 
214 |     masks, _, _ = predictor.predict_torch(
215 |         point_coords = None,
216 |         point_labels = None,
217 |         boxes = transformed_boxes.to(device),
218 |         multimask_output = False,
219 |     )
220 | 
221 |     # draw output image
222 |     plt.figure(figsize=(10, 10))
223 |     plt.imshow(image)
224 |     for mask in masks:
225 |         show_mask(mask.cpu().numpy(), plt.gca(), random_color=True)
226 |     for box, label in zip(boxes_filt, pred_phrases):
227 |         show_box(box.numpy(), plt.gca(), label)
228 | 
229 |     plt.axis('off')
230 |     plt.savefig(
231 |         os.path.join(output_dir, "grounded_sam_output.jpg"),
232 |         bbox_inches="tight", dpi=300, pad_inches=0.0
233 |     )
234 | 
235 |     save_mask_data(output_dir, masks, boxes_filt, pred_phrases)
236 | 


--------------------------------------------------------------------------------
/assets/robot/franka_description/robots/franka_panda.urdf:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" ?>
  2 | <robot name="panda" xmlns:xacro="http://www.ros.org/wiki/xacro">
  3 |   <link name="panda_link0">
  4 |     <visual>
  5 |       <geometry>
  6 |         <mesh filename="package://franka_description/meshes/visual/link0.dae"/>
  7 |       </geometry>
  8 |     </visual>
  9 |     <collision>
 10 |       <geometry>
 11 |         <mesh filename="package://franka_description/meshes/collision/link0.obj"/>
 12 |       </geometry>
 13 |     </collision>
 14 |   </link>
 15 |   <link name="panda_link1">
 16 |     <visual>
 17 |       <geometry>
 18 |         <mesh filename="package://franka_description/meshes/visual/link1.dae"/>
 19 |       </geometry>
 20 |     </visual>
 21 |     <collision>
 22 |       <geometry>
 23 |         <mesh filename="package://franka_description/meshes/collision/link1.obj"/>
 24 |       </geometry>
 25 |     </collision>
 26 |   </link>
 27 |   <joint name="panda_joint1" type="revolute">
 28 |     <safety_controller k_position="100.0" k_velocity="40.0" soft_lower_limit="-2.8973" soft_upper_limit="2.8973"/>
 29 |     <origin rpy="0 0 0" xyz="0 0 0.333"/>
 30 |     <parent link="panda_link0"/>
 31 |     <child link="panda_link1"/>
 32 |     <axis xyz="0 0 1"/>
 33 |     <dynamics damping="10.0"/>
 34 |     <limit effort="87" lower="-2.8973" upper="2.8973" velocity="2.1750"/>
 35 |   </joint>
 36 |   <link name="panda_link2">
 37 |     <visual>
 38 |       <geometry>
 39 |         <mesh filename="package://franka_description/meshes/visual/link2.dae"/>
 40 |       </geometry>
 41 |     </visual>
 42 |     <collision>
 43 |       <geometry>
 44 |         <mesh filename="package://franka_description/meshes/collision/link2.obj"/>
 45 |       </geometry>
 46 |     </collision>
 47 |   </link>
 48 |   <joint name="panda_joint2" type="revolute">
 49 |     <safety_controller k_position="100.0" k_velocity="40.0" soft_lower_limit="-1.7628" soft_upper_limit="1.7628"/>
 50 |     <origin rpy="-1.57079632679 0 0" xyz="0 0 0"/>
 51 |     <parent link="panda_link1"/>
 52 |     <child link="panda_link2"/>
 53 |     <axis xyz="0 0 1"/>
 54 |     <dynamics damping="10.0"/>
 55 |     <limit effort="87" lower="-1.7628" upper="1.7628" velocity="2.1750"/>
 56 |   </joint>
 57 |   <link name="panda_link3">
 58 |     <visual>
 59 |       <geometry>
 60 |         <mesh filename="package://franka_description/meshes/visual/link3.dae"/>
 61 |       </geometry>
 62 |     </visual>
 63 |     <collision>
 64 |       <geometry>
 65 |         <mesh filename="package://franka_description/meshes/collision/link3.obj"/>
 66 |       </geometry>
 67 |     </collision>
 68 |   </link>
 69 |   <joint name="panda_joint3" type="revolute">
 70 |     <safety_controller k_position="100.0" k_velocity="40.0" soft_lower_limit="-2.8973" soft_upper_limit="2.8973"/>
 71 |     <origin rpy="1.57079632679 0 0" xyz="0 -0.316 0"/>
 72 |     <parent link="panda_link2"/>
 73 |     <child link="panda_link3"/>
 74 |     <axis xyz="0 0 1"/>
 75 |     <dynamics damping="10.0"/>
 76 |     <limit effort="87" lower="-2.8973" upper="2.8973" velocity="2.1750"/>
 77 |   </joint>
 78 |   <link name="panda_link4">
 79 |     <visual>
 80 |       <geometry>
 81 |         <mesh filename="package://franka_description/meshes/visual/link4.dae"/>
 82 |       </geometry>
 83 |     </visual>
 84 |     <collision>
 85 |       <geometry>
 86 |         <mesh filename="package://franka_description/meshes/collision/link4.obj"/>
 87 |       </geometry>
 88 |     </collision>
 89 |   </link>
 90 |   <joint name="panda_joint4" type="revolute">
 91 |     <safety_controller k_position="100.0" k_velocity="40.0" soft_lower_limit="-3.0718" soft_upper_limit="-0.0698"/>
 92 |     <origin rpy="1.57079632679 0 0" xyz="0.0825 0 0"/>
 93 |     <parent link="panda_link3"/>
 94 |     <child link="panda_link4"/>
 95 |     <axis xyz="0 0 1"/>    
 96 |     <dynamics damping="10.0"/>
 97 |     <limit effort="87" lower="-3.0718" upper="-0.0698" velocity="2.1750"/>
 98 |     <!-- something is weird with this joint limit config
 99 |     <dynamics damping="10.0"/>
100 |     <limit effort="87" lower="-3.0" upper="0.087" velocity="2.1750"/>  -->
101 |   </joint>
102 |   <link name="panda_link5">
103 |     <visual>
104 |       <geometry>
105 |         <mesh filename="package://franka_description/meshes/visual/link5.dae"/>
106 |       </geometry>
107 |     </visual>
108 |     <collision>
109 |       <geometry>
110 |         <mesh filename="package://franka_description/meshes/collision/link5.obj"/>
111 |       </geometry>
112 |     </collision>
113 |   </link>
114 |   <joint name="panda_joint5" type="revolute">
115 |     <safety_controller k_position="100.0" k_velocity="40.0" soft_lower_limit="-2.8973" soft_upper_limit="2.8973"/>
116 |     <origin rpy="-1.57079632679 0 0" xyz="-0.0825 0.384 0"/>
117 |     <parent link="panda_link4"/>
118 |     <child link="panda_link5"/>
119 |     <axis xyz="0 0 1"/>
120 |     <dynamics damping="10.0"/>
121 |     <limit effort="12" lower="-2.8973" upper="2.8973" velocity="2.6100"/>
122 |   </joint>
123 |   <link name="panda_link6">
124 |     <visual>
125 |       <geometry>
126 |         <mesh filename="package://franka_description/meshes/visual/link6.dae"/>
127 |       </geometry>
128 |     </visual>
129 |     <collision>
130 |       <geometry>
131 |         <mesh filename="package://franka_description/meshes/collision/link6.obj"/>
132 |       </geometry>
133 |     </collision>
134 |   </link>
135 |   <joint name="panda_joint6" type="revolute">
136 |     <safety_controller k_position="100.0" k_velocity="40.0" soft_lower_limit="-0.0175" soft_upper_limit="3.7525"/>
137 |     <origin rpy="1.57079632679 0 0" xyz="0 0 0"/>
138 |     <parent link="panda_link5"/>
139 |     <child link="panda_link6"/>
140 |     <axis xyz="0 0 1"/>
141 |     <dynamics damping="10.0"/>
142 |     <limit effort="12" lower="-0.0175" upper="3.7525" velocity="2.6100"/>
143 |     <!-- <dynamics damping="10.0"/>
144 |     <limit effort="12" lower="-0.0873" upper="3.0" velocity="2.6100"/> -->
145 |   </joint>
146 |   <link name="panda_link7">
147 |     <visual>
148 |       <geometry>
149 |         <mesh filename="package://franka_description/meshes/visual/link7.dae"/>
150 |       </geometry>
151 |     </visual>
152 |     <collision>
153 |       <geometry>
154 |         <mesh filename="package://franka_description/meshes/collision/link7.obj"/>
155 |       </geometry>
156 |     </collision>
157 |   </link>
158 |   <joint name="panda_joint7" type="revolute">
159 |     <safety_controller k_position="100.0" k_velocity="40.0" soft_lower_limit="-2.8973" soft_upper_limit="2.8973"/>
160 |     <origin rpy="1.57079632679 0 0" xyz="0.088 0 0"/>
161 |     <parent link="panda_link6"/>
162 |     <child link="panda_link7"/>
163 |     <axis xyz="0 0 1"/>
164 |     <dynamics damping="10.0"/>
165 |     <limit effort="12" lower="-2.8973" upper="2.8973" velocity="2.6100"/>
166 |   </joint>
167 |   <!--
168 |   <link name="panda_link8"/>
169 |   <joint name="panda_joint8" type="fixed">
170 |     <origin rpy="0 0 0" xyz="0 0 0.107"/>
171 |     <parent link="panda_link7"/>
172 |     <child link="panda_link8"/>
173 |     <axis xyz="0 0 0"/>
174 |   </joint>
175 |   Removing this joint seems to help with some stability things
176 |   -->
177 |   <joint name="panda_hand_joint" type="fixed">
178 |     <!--
179 |     <parent link="panda_link8"/>
180 |     -->
181 |     <parent link="panda_link7"/>
182 |     <child link="panda_hand"/>
183 |     <origin rpy="0 0 -0.785398163397" xyz="0 0 0.107"/>
184 |     <!--
185 |     <origin rpy="0 0 -0.785398163397" xyz="0 0 0"/>
186 |     -->
187 |   </joint>
188 |   <link name="panda_hand">
189 |     <visual>
190 |       <geometry>
191 |         <mesh filename="package://franka_description/meshes/visual/hand.dae"/>
192 |       </geometry>
193 |     </visual>
194 |     <collision>
195 |       <geometry>
196 |         <mesh filename="package://franka_description/meshes/collision/hand.obj"/>
197 |       </geometry>
198 |     </collision>
199 |   </link>
200 |   <link name="panda_leftfinger">
201 |     <visual>
202 |       <geometry>
203 |         <mesh filename="package://franka_description/meshes/visual/finger.dae"/>
204 |       </geometry>
205 |     </visual>
206 |     <collision>
207 |       <geometry>
208 |         <mesh filename="package://franka_description/meshes/collision/finger.obj"/>
209 |       </geometry>
210 |     </collision>
211 |   </link>
212 |   <link name="panda_rightfinger">
213 |     <visual>
214 |       <origin rpy="0 0 3.14159265359" xyz="0 0 0"/>
215 |       <geometry>
216 |         <mesh filename="package://franka_description/meshes/visual/finger.dae"/>
217 |       </geometry>
218 |     </visual>
219 |     <collision>
220 |       <origin rpy="0 0 3.14159265359" xyz="0 0 0"/>
221 |       <geometry>
222 |         <mesh filename="package://franka_description/meshes/collision/finger.obj"/>
223 |       </geometry>
224 |     </collision>
225 |   </link>
226 |   <joint name="panda_finger_joint1" type="prismatic">
227 |     <parent link="panda_hand"/>
228 |     <child link="panda_leftfinger"/>
229 |     <origin rpy="0 0 0" xyz="0 0 0.0584"/>
230 |     <axis xyz="0 1 0"/>
231 |     <dynamics damping="10.0"/>
232 |     <limit effort="20" lower="0.0" upper="0.04" velocity="0.2"/>
233 |   </joint>
234 |   <joint name="panda_finger_joint2" type="prismatic">
235 |     <parent link="panda_hand"/>
236 |     <child link="panda_rightfinger"/>
237 |     <origin rpy="0 0 0" xyz="0 0 0.0584"/>
238 |     <axis xyz="0 -1 0"/>
239 |     <dynamics damping="10.0"/>
240 |     <limit effort="20" lower="0.0" upper="0.04" velocity="0.2"/>
241 |     <mimic joint="panda_finger_joint1"/>
242 |   </joint>
243 | </robot>
244 | 
245 | 
246 | 


--------------------------------------------------------------------------------