├── Method
├── gym
│ ├── __init__.py
│ └── vlm_utils.py
├── position
│ ├── __init__.py
│ ├── .gitignore
│ └── vlm_utils.py
├── vision
│ ├── __init__.py
│ ├── .gitignore
│ ├── GroundedSAM
│ │ ├── segment_anything
│ │ │ ├── notebooks
│ │ │ │ └── images
│ │ │ │ │ ├── dog.jpg
│ │ │ │ │ ├── truck.jpg
│ │ │ │ │ └── groceries.jpg
│ │ │ ├── .flake8
│ │ │ ├── segment_anything
│ │ │ │ ├── utils
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── transforms.py
│ │ │ │ │ └── onnx.py
│ │ │ │ ├── modeling
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── common.py
│ │ │ │ │ ├── mask_decoder.py
│ │ │ │ │ └── sam.py
│ │ │ │ ├── __init__.py
│ │ │ │ ├── build_sam.py
│ │ │ │ └── build_sam_hq.py
│ │ │ ├── setup.cfg
│ │ │ ├── setup.py
│ │ │ ├── linter.sh
│ │ │ ├── CONTRIBUTING.md
│ │ │ ├── CODE_OF_CONDUCT.md
│ │ │ ├── README.md
│ │ │ └── scripts
│ │ │ │ ├── export_onnx_model.py
│ │ │ │ └── amg.py
│ │ ├── .gitmodules
│ │ ├── requirements.txt
│ │ ├── .gitignore
│ │ └── grounded_sam_demo.py
│ ├── tranformation.py
│ └── test_sam.py
├── isaacgym0
│ ├── .gitignore
│ ├── config.yaml
│ ├── utils.py
│ └── asset_info.py
├── tasks
├── mask.png
├── test_image.png
├── utils
│ ├── mesh.py
│ ├── task_stat.py
│ ├── vlm_utils.py
│ └── get_assets.py
├── run_multiple.py
├── method_cfg.yaml
├── README.md
├── test_gym.py
└── open6dor_gpt.py
├── assets
├── blender
│ └── .gitignore
├── ckpts
│ └── .gitignore
├── .gitignore
├── objects
│ └── .gitignore
├── tasks
│ ├── .gitignore
│ └── task_refine_6dof_example
│ │ └── behind
│ │ └── 20240824-165044_no_interaction
│ │ ├── isaac_render-rgb-0-0.png
│ │ ├── isaac_render-rgb-0-1.png
│ │ ├── isaac_render-rgb-0-2.png
│ │ ├── isaac_render-rgb-0-3.png
│ │ ├── isaac_render-rgb-0-4.png
│ │ ├── gsam-gsam-mask-apple-0.npy
│ │ ├── gsam-gsam-mask-apple-0.ply
│ │ ├── gsam-gsam-mask-apple-0.png
│ │ ├── gsam-gsam-mask-apple-1.npy
│ │ ├── gsam-gsam-mask-apple-1.ply
│ │ ├── gsam-gsam-mask-apple-1.png
│ │ ├── gsam-gsam-mask-bottle-0.npy
│ │ ├── gsam-gsam-mask-bottle-0.ply
│ │ ├── gsam-gsam-mask-bottle-0.png
│ │ ├── gsam-gsam-mask-bottle-1.npy
│ │ ├── gsam-gsam-mask-bottle-1.ply
│ │ ├── gsam-gsam-mask-bottle-1.png
│ │ ├── isaac_render-depth-0-0.npy
│ │ ├── isaac_render-depth-0-0.png
│ │ ├── isaac_render-depth-0-1.npy
│ │ ├── isaac_render-depth-0-1.png
│ │ ├── isaac_render-depth-0-2.npy
│ │ ├── isaac_render-depth-0-2.png
│ │ ├── isaac_render-depth-0-3.npy
│ │ ├── isaac_render-depth-0-3.png
│ │ ├── isaac_render-depth-0-4.npy
│ │ ├── isaac_render-depth-0-4.png
│ │ ├── task_config_test.json
│ │ └── task_config_new5.json
└── robot
│ └── franka_description
│ ├── meshes
│ ├── collision
│ │ ├── finger.stl
│ │ ├── hand.stl
│ │ ├── link0.stl
│ │ ├── link1.stl
│ │ ├── link2.stl
│ │ ├── link3.stl
│ │ ├── link4.stl
│ │ ├── link5.stl
│ │ ├── link6.stl
│ │ ├── link7.stl
│ │ ├── stltoobj.bat
│ │ ├── stltoobj.mlx
│ │ └── finger.obj
│ └── visual
│ │ ├── daetoobj.mlx
│ │ ├── daetoobj.bat
│ │ ├── link1.mtl
│ │ ├── link2.mtl
│ │ ├── finger.mtl
│ │ ├── link5.mtl
│ │ ├── link4.mtl
│ │ ├── link3.mtl
│ │ ├── hand.mtl
│ │ ├── link7.mtl
│ │ ├── link0.mtl
│ │ └── link6.mtl
│ └── robots
│ └── franka_panda.urdf
├── requirements.txt
├── images
├── teaser_final1.jpg
├── teaser_final1.pdf
├── overall_pipeline_final1.jpg
└── overall_pipeline_final1.pdf
├── Benchmark
├── renderer
│ ├── texture
│ │ └── texture0.jpg
│ └── run_Open6DOR_render.sh
├── .gitignore
├── bench_config.yaml
├── task_examples
│ ├── rotation
│ │ └── None
│ │ │ └── mug_handle_left
│ │ │ ├── 20240717-075819_no_interaction
│ │ │ ├── before-rgb-0-0.png
│ │ │ ├── before-rgb-0-1.png
│ │ │ ├── before-rgb-0-2.png
│ │ │ ├── before-rgb-0-3.png
│ │ │ ├── task_config.json
│ │ │ └── task_config_new.json
│ │ │ └── 20240717-075911_no_interaction
│ │ │ ├── before-rgb-0-0.png
│ │ │ ├── before-rgb-0-1.png
│ │ │ ├── before-rgb-0-2.png
│ │ │ ├── before-rgb-0-3.png
│ │ │ ├── task_config.json
│ │ │ └── task_config_new.json
│ ├── 6DoF
│ │ └── behind
│ │ │ └── Place_the_apple_behind_the_box_on_the_table.__upright
│ │ │ └── 20240704-145831_no_interaction
│ │ │ ├── before-rgb-0-0.png
│ │ │ ├── before-rgb-0-1.png
│ │ │ ├── before-rgb-0-2.png
│ │ │ ├── before-rgb-0-3.png
│ │ │ ├── task_config_new.json
│ │ │ └── task_config.json
│ └── position
│ │ └── left
│ │ └── Place_the_hammer_to_the_left_of_the_USB_on_the_table._
│ │ ├── 20240717-090658_no_interaction
│ │ ├── before-rgb-0-0.png
│ │ ├── before-rgb-0-1.png
│ │ ├── before-rgb-0-2.png
│ │ ├── before-rgb-0-3.png
│ │ └── task_config.json
│ │ └── 20240717-094704_no_interaction
│ │ ├── before-rgb-0-0.png
│ │ ├── before-rgb-0-1.png
│ │ ├── before-rgb-0-2.png
│ │ ├── before-rgb-0-3.png
│ │ └── task_config.json
├── benchmark_catalogue
│ └── error.txt
├── dataset
│ └── objects
│ │ └── scale.py
├── evaluation
│ └── evaluator.py
└── bench.py
├── .gitignore
└── README.md
/Method/gym/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Method/position/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Method/vision/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assets/blender/.gitignore:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assets/ckpts/.gitignore:
--------------------------------------------------------------------------------
1 | *pth
--------------------------------------------------------------------------------
/assets/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | isaacgym/*
--------------------------------------------------------------------------------
/Method/position/.gitignore:
--------------------------------------------------------------------------------
1 | openai_api.py
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | imageio
2 | bpy
3 | scipy
4 |
--------------------------------------------------------------------------------
/Method/isaacgym0/.gitignore:
--------------------------------------------------------------------------------
1 | *.mp4
2 | *.png
3 | *.jpg
--------------------------------------------------------------------------------
/Method/vision/.gitignore:
--------------------------------------------------------------------------------
1 | outputs/
2 | segment-anything/
--------------------------------------------------------------------------------
/assets/objects/.gitignore:
--------------------------------------------------------------------------------
1 | objaverse_rescale/
2 | ycb_16k_backup/
--------------------------------------------------------------------------------
/Method/tasks:
--------------------------------------------------------------------------------
1 | /home/haoran/Projects/Rearrangement/Open6DOR/Benchmark/tasks
--------------------------------------------------------------------------------
/Method/mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Method/mask.png
--------------------------------------------------------------------------------
/assets/tasks/.gitignore:
--------------------------------------------------------------------------------
1 | task_refine_6dof
2 | task_refine_rot_only
3 | task_refine_pos
--------------------------------------------------------------------------------
/Method/test_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Method/test_image.png
--------------------------------------------------------------------------------
/images/teaser_final1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/images/teaser_final1.jpg
--------------------------------------------------------------------------------
/images/teaser_final1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/images/teaser_final1.pdf
--------------------------------------------------------------------------------
/images/overall_pipeline_final1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/images/overall_pipeline_final1.jpg
--------------------------------------------------------------------------------
/images/overall_pipeline_final1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/images/overall_pipeline_final1.pdf
--------------------------------------------------------------------------------
/Benchmark/renderer/texture/texture0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/renderer/texture/texture0.jpg
--------------------------------------------------------------------------------
/Benchmark/.gitignore:
--------------------------------------------------------------------------------
1 | dataset/objects/*
2 | *run_renderer.sh
3 | *.DS_Store
4 | tasks/
5 |
6 | *error.txt
7 | evaluation/format.py
8 |
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/finger.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/finger.stl
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/hand.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/hand.stl
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link0.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link0.stl
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link1.stl
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link2.stl
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link3.stl
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link4.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link4.stl
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link5.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link5.stl
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link6.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link6.stl
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/link7.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/robot/franka_description/meshes/collision/link7.stl
--------------------------------------------------------------------------------
/Benchmark/bench_config.yaml:
--------------------------------------------------------------------------------
1 | render:
2 | cam_quaternion: [0.0, 0.0, 0.0, 1.0]
3 | cam_translation: [0.0, 0.0, 1.0]
4 | background_material_id: 44
5 | env_map_id: 25
6 |
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/daetoobj.mlx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/notebooks/images/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Method/vision/GroundedSAM/segment_anything/notebooks/images/dog.jpg
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/notebooks/images/truck.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Method/vision/GroundedSAM/segment_anything/notebooks/images/truck.jpg
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/notebooks/images/groceries.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Method/vision/GroundedSAM/segment_anything/notebooks/images/groceries.jpg
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/.gitmodules:
--------------------------------------------------------------------------------
1 |
2 | [submodule "grounded-sam-osx"]
3 | path = grounded-sam-osx
4 | url = https://github.com/linjing7/grounded-sam-osx.git
5 | [submodule "VISAM"]
6 | path = VISAM
7 | url = https://github.com/BingfengYan/VISAM
8 |
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-0.png
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-1.png
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-2.png
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-3.png
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-rgb-0-4.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-0.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-1.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-2.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/before-rgb-0-3.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-0.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-1.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-2.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/before-rgb-0-3.png
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-0.npy
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-0.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-0.ply
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-0.png
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-1.npy
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-1.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-1.ply
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-apple-1.png
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-0.npy
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-0.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-0.ply
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-0.png
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-1.npy
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-1.ply:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-1.ply
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/gsam-gsam-mask-bottle-1.png
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-0.npy
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-0.png
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-1.npy
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-1.png
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-2.npy
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-2.png
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-3.npy
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-3.png
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-4.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-4.npy
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/isaac_render-depth-0-4.png
--------------------------------------------------------------------------------
/Benchmark/renderer/run_Open6DOR_render.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | mycount=0;
4 | while (( $mycount < 1)); do
5 | ./blender-2.93.3-linux-x64/blender material_lib_v2.blend --background --python open6dor_renderer.py -- $mycount;
6 | ((mycount=$mycount+1));
7 | done;
8 |
9 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = W503, E203, E221, C901, C408, E741, C407, B017, F811, C101, EXE001, EXE002
3 | max-line-length = 100
4 | max-complexity = 18
5 | select = B,C,E,F,W,T4,B9
6 | per-file-ignores =
7 | **/__init__.py:F401,F403,E402
8 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
--------------------------------------------------------------------------------
/Method/isaacgym0/config.yaml:
--------------------------------------------------------------------------------
1 | SAVE_VIDEO: True
2 | SEED: 42
3 | STEPS: 1000
4 | num_envs: 256
5 | controller: ik
6 |
7 | # asset
8 | asset_root: ../assets
9 | asset_file: urdf/ycb/025_mug/025_mug_new.urdf
10 |
11 | # robot
12 | franka_asset_file: urdf/franka_description/robots/franka_panda.urdf
--------------------------------------------------------------------------------
/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-0.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-1.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-2.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/before-rgb-0-3.png
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/daetoobj.bat:
--------------------------------------------------------------------------------
1 | SET PATH=%PATH%;C:/Tools/Assimp/bin/x64/
2 | forfiles /m *.dae /c "cmd /c assimp export @file @fname.obj --verbose --show-log -ptv"
3 |
4 | REM SET PATH=%PATH%;C:/Program Files/VCG/MeshLab/
5 | REM forfiles /m *.dae /c "cmd /c meshlabserver -i @file -o @fname.obj -m vn vt
--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-0.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-1.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-2.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/before-rgb-0-3.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-0.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-1.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-2.png
--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Selina2023/Open6DOR/HEAD/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/before-rgb-0-3.png
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/stltoobj.bat:
--------------------------------------------------------------------------------
1 | REM SET PATH=%PATH%;C:/Tools/Assimp/bin/x64/
2 | REM forfiles /m *.dae /c "cmd /c assimp export @file @fname.obj --verbose --show-log -ptv"
3 |
4 | SET PATH=%PATH%;C:/Program Files/VCG/MeshLab/
5 | forfiles /m *.stl /c "cmd /c meshlabserver -i @file -o @fname.obj -m vn -s stltoobj.mlx"
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link1.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 1
3 |
4 | newmtl Part__Feature_001
5 | Ns -1.960784
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 1.000000 1.000000 1.000000
8 | Ks 0.062500 0.062500 0.062500
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link2.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 1
3 |
4 | newmtl Part__Feature024
5 | Ns -1.960784
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 1.000000 1.000000 1.000000
8 | Ks 0.125000 0.125000 0.125000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/requirements.txt:
--------------------------------------------------------------------------------
1 | addict
2 | diffusers
3 | gradio
4 | huggingface_hub
5 | matplotlib
6 | numpy
7 | onnxruntime
8 | opencv_python
9 | Pillow
10 | pycocotools
11 | PyYAML
12 | requests
13 | setuptools
14 | supervision
15 | termcolor
16 | timm
17 | torch
18 | torchvision
19 | transformers
20 | yapf
21 | nltk
22 | fairscale
23 | litellm
24 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/setup.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | line_length=100
3 | multi_line_output=3
4 | include_trailing_comma=True
5 | known_standard_library=numpy,setuptools
6 | skip_glob=*/__init__.py
7 | known_myself=segment_anything
8 | known_third_party=matplotlib,cv2,torch,torchvision,pycocotools,onnx,black,isort
9 | no_lines_before=STDLIB,THIRDPARTY
10 | sections=FUTURE,STDLIB,THIRDPARTY,MYSELF,FIRSTPARTY,LOCALFOLDER
11 | default_section=FIRSTPARTY
12 |
--------------------------------------------------------------------------------
/Method/utils/mesh.py:
--------------------------------------------------------------------------------
1 | import trimesh
2 |
3 | # Load a mesh from OBJ file
4 | mesh = trimesh.load('/home/haoran/Projects/Rearrangement/Open6DOR/Method/assets/objaverse_final_norm/69511a7fad2f42ee8c4b0579bbc8fec6/material.obj')
5 |
6 | # Translate mesh to its centroid
7 | mesh.apply_translation(-mesh.centroid)
8 |
9 | import pdb; pdb.set_trace()
10 | # Scale the mesh (1 unit here)
11 | scale_factor = 1.0 / mesh.bounding_box.extents.max()
12 | mesh.apply_scale(scale_factor)
13 |
14 | # save the new mesh to OBJ file
15 | mesh.export('output.obj')
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | from .sam import Sam
8 | from .image_encoder import ImageEncoderViT
9 | from .mask_decoder_hq import MaskDecoderHQ
10 | from .mask_decoder import MaskDecoder
11 | from .prompt_encoder import PromptEncoder
12 | from .transformer import TwoWayTransformer
13 |
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/finger.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 2
3 |
4 | newmtl Part__Feature001_006
5 | Ns -1.960784
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.901961 0.921569 0.929412
8 | Ks 0.250000 0.250000 0.250000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl Part__Feature_007
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 0.250980 0.250980 0.250980
18 | Ks 0.250000 0.250000 0.250000
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | from setuptools import find_packages, setup
8 |
9 | setup(
10 | name="segment_anything",
11 | version="1.0",
12 | install_requires=[],
13 | packages=find_packages(exclude="notebooks"),
14 | extras_require={
15 | "all": ["matplotlib", "pycocotools", "opencv-python", "onnx", "onnxruntime"],
16 | "dev": ["flake8", "isort", "black", "mypy"],
17 | },
18 | )
19 |
--------------------------------------------------------------------------------
/Method/run_multiple.py:
--------------------------------------------------------------------------------
1 | import sys, os
2 | import argparse
3 |
4 | # add args
5 | parser = argparse.ArgumentParser()
6 | parser.add_argument('--n', type=int, default=100)
7 | #parser.add_argument('--f', type=str, default="python reconstruction/mesh_reconstruction.py")
8 | parser.add_argument('--f', type=str, default="python interaction.py --mode gen_task --task_root rot_banch_0717 ")
9 | # parser.add_argument('--f', type=str, default="python interaction.py --mode gen_task_pure_rot --task_root rot_banch_0717_pure_rot ")
10 | #parser.add_argument('--f', type=str, default="python overall_clip.py")
11 |
12 |
13 |
14 | args = parser.parse_args()
15 |
16 | for i in range(args.n):
17 | os.system(args.f)
18 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | from .build_sam import (
8 | build_sam,
9 | build_sam_vit_h,
10 | build_sam_vit_l,
11 | build_sam_vit_b,
12 | sam_model_registry,
13 | )
14 | from .build_sam_hq import (
15 | build_sam_hq,
16 | build_sam_hq_vit_h,
17 | build_sam_hq_vit_l,
18 | build_sam_hq_vit_b,
19 | sam_hq_model_registry,
20 | )
21 | from .predictor import SamPredictor
22 | from .automatic_mask_generator import SamAutomaticMaskGenerator
23 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/linter.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | # Copyright (c) Facebook, Inc. and its affiliates.
3 |
4 | {
5 | black --version | grep -E "23\." > /dev/null
6 | } || {
7 | echo "Linter requires 'black==23.*' !"
8 | exit 1
9 | }
10 |
11 | ISORT_VERSION=$(isort --version-number)
12 | if [[ "$ISORT_VERSION" != 5.12* ]]; then
13 | echo "Linter requires isort==5.12.0 !"
14 | exit 1
15 | fi
16 |
17 | echo "Running isort ..."
18 | isort . --atomic
19 |
20 | echo "Running black ..."
21 | black -l 100 .
22 |
23 | echo "Running flake8 ..."
24 | if [ -x "$(command -v flake8)" ]; then
25 | flake8 .
26 | else
27 | python3 -m flake8 .
28 | fi
29 |
30 | echo "Running mypy..."
31 |
32 | mypy --exclude 'setup.py|notebooks' .
33 |
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link5.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 3
3 |
4 | newmtl Part__Feature_002_004_003
5 | Ns -1.960784
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 1.000000 1.000000 1.000000
8 | Ks 0.015625 0.015625 0.015625
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl Shell001_001_001_003
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 0.250000 0.250000 0.250000
18 | Ks 0.015625 0.015625 0.015625
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 |
24 | newmtl Shell_001_001_003
25 | Ns -1.960784
26 | Ka 1.000000 1.000000 1.000000
27 | Kd 1.000000 1.000000 1.000000
28 | Ks 0.015625 0.015625 0.015625
29 | Ke 0.000000 0.000000 0.000000
30 | Ni 1.000000
31 | d 1.000000
32 | illum 2
33 |
--------------------------------------------------------------------------------
/Method/method_cfg.yaml:
--------------------------------------------------------------------------------
1 | DEVICE: cuda:0
2 | INFERENCE_GSAM: True
3 | SAVE_RENDER: True
4 | VISUALIZE: True
5 |
6 | position:
7 |
8 | rotation:
9 |
10 | vision:
11 | sam_checkpoint_path: ../assets/ckpts/sam_vit_h_4b8939.pth
12 | grounded_checkpoint_path: ../assets/ckpts/groundingdino_swint_ogc.pth
13 | config_path: ./vision/GroundedSAM/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py
14 | box_threshold: 0.3
15 | text_threshold: 0.25
16 | sam_version: vit_h
17 |
18 | cam:
19 | vinv: [[ 0. , 1. , 0. , 0. ],
20 | [-0.9028605 , -0. , 0.42993355, -0. ],
21 | [ 0.42993355, -0. , 0.9028605 , -0. ],
22 | [ 1. , 0. , 1.2 , 1. ]]
23 | proj: [[ 1.7320507, 0. , 0. , 0. ],
24 | [ 0. , 2.5980759, 0. , 0. ],
25 | [ 0. , 0. , 0. , -1. ],
26 | [ 0. , 0. , 0.05 , 0. ]]
--------------------------------------------------------------------------------
/Benchmark/benchmark_catalogue/error.txt:
--------------------------------------------------------------------------------
1 | "ae7142127dd84ebbbe7762368ace452c": { shoe->mug }
2 | 072-b no upright, wrong category(toy->glue gun)
3 | 019 trans
4 | 024 trans
5 | 040 trans
6 | 065-a trans
7 | 065-b trans
8 | 065-c trans
9 | 065-d trans
10 | 065-f trans
11 | 065-g trans
12 | 065-j trans
13 | d5a5f0a954f94bcea3168329d1605fe9: shoe->mu
14 | 048 hammer trans
15 | 033 trans
16 | 8a6cb4f7b0004f53830e270dc6e1ff1d handle_left/right xx(no handle)
17 | 025 trans
18 | rewrite "tip_left" and "tip_right"'s prompt
19 | f47fdcf9615d4e94a71e6731242a4c94 wierd mesh
20 | dbb07d13a33546f09ac8ca98b1ddef20 wallet has no clasp (instruction)
21 | 032 trans
22 | d9675ab05c39447baf27e19ea07d484e lighter pointing forth(facing the viewer)-instruction
23 | note! "forth" rotation equivalence
24 | note! "spout left" needs to be upright
25 | note! "cap" forth rotation equivalence
26 | note! "cap" forth rotation equivalence
27 | 022 wierd mesh
28 |
29 | blender no texture: 9660e0c0326b4f7386014e27717231ae, ycb 04 08 09, 5de830b2cccf4fe7a2e6b400abf26ca7
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link4.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 4
3 |
4 | newmtl Part__Feature001_001_003_001
5 | Ns -1.960784
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 1.000000 1.000000 1.000000
8 | Ks 0.007812 0.007812 0.007812
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl Part__Feature002_001_003_001
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 0.250980 0.250980 0.250980
18 | Ks 0.007812 0.007812 0.007812
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 |
24 | newmtl Part__Feature003_001_003_001
25 | Ns -1.960784
26 | Ka 1.000000 1.000000 1.000000
27 | Kd 1.000000 1.000000 1.000000
28 | Ks 0.007812 0.007812 0.007812
29 | Ke 0.000000 0.000000 0.000000
30 | Ni 1.000000
31 | d 1.000000
32 | illum 2
33 |
34 | newmtl Part__Feature_002_003_001
35 | Ns -1.960784
36 | Ka 1.000000 1.000000 1.000000
37 | Kd 1.000000 1.000000 1.000000
38 | Ks 0.007812 0.007812 0.007812
39 | Ke 0.000000 0.000000 0.000000
40 | Ni 1.000000
41 | d 1.000000
42 | illum 2
43 |
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link3.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 4
3 |
4 | newmtl Part__Feature001_010_001_002.001
5 | Ns -1.960784
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 1.000000 1.000000 1.000000
8 | Ks 0.007812 0.007812 0.007812
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl Part__Feature002_007_001_002.001
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 1.000000 1.000000 1.000000
18 | Ks 0.007812 0.007812 0.007812
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 |
24 | newmtl Part__Feature003_004_001_002.001
25 | Ns -1.960784
26 | Ka 1.000000 1.000000 1.000000
27 | Kd 1.000000 1.000000 1.000000
28 | Ks 0.007812 0.007812 0.007812
29 | Ke 0.000000 0.000000 0.000000
30 | Ni 1.000000
31 | d 1.000000
32 | illum 2
33 |
34 | newmtl Part__Feature_001_001_001_002.001
35 | Ns -1.960784
36 | Ka 1.000000 1.000000 1.000000
37 | Kd 0.250980 0.250980 0.250980
38 | Ks 0.007812 0.007812 0.007812
39 | Ke 0.000000 0.000000 0.000000
40 | Ni 1.000000
41 | d 1.000000
42 | illum 2
43 |
--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-090658_no_interaction/task_config.json:
--------------------------------------------------------------------------------
1 | {"orientation": "left", "rotation": "None", "selected_obj_names": ["USB", "hammer"], "selected_urdfs": ["objaverse_final_norm/0a51815f3c0941ae8312fc6917173ed6/material_2.urdf", "objaverse_final_norm/8ed38a92668a425eb16da938622d9ace/material_2.urdf"], "target_obj_name": "hammer", "instruction": "Place the hammer to the left of the USB on the table. ", "init_obj_pos": [[0.5523672103881836, -0.1767720878124237, 0.30958184599876404, -0.16768784821033478, -0.42019906640052795, 0.01495102047920227, 0.8916782140731812, 0.00046477484283968806, 0.0010078288614749908, -0.00030404693097807467, -0.10503458976745605, 0.03628098964691162, -0.002049945993348956], [0.5076466798782349, -0.05766259878873825, 0.30820930004119873, -0.5712552666664124, 0.4136405289173126, -0.41678178310394287, 0.5734648108482361, 0.001841548248194158, 0.003947087097913027, 0.005498047918081284, 0.7908462882041931, -0.034841056913137436, 0.027878539636731148]], "position_instruction": "Place the hammer to the left of the USB on the table. "}
--------------------------------------------------------------------------------
/Benchmark/task_examples/position/left/Place_the_hammer_to_the_left_of_the_USB_on_the_table._/20240717-094704_no_interaction/task_config.json:
--------------------------------------------------------------------------------
1 | {"orientation": "left", "rotation": "None", "selected_obj_names": ["USB", "hammer"], "selected_urdfs": ["objaverse_final_norm/0a51815f3c0941ae8312fc6917173ed6/material_2.urdf", "objaverse_final_norm/35a76a67ea1c45edabbd5013de70d68d/material_2.urdf"], "target_obj_name": "hammer", "instruction": "Place the hammer to the left of the USB on the table. ", "init_obj_pos": [[0.5709131360054016, 0.2073042243719101, 0.3095809519290924, -0.17370298504829407, -0.4178505837917328, 0.0022908926475793123, 0.8917526602745056, -0.0003591739514376968, 0.0003141180204693228, -0.0003524061758071184, -0.03348350524902344, -0.04323001950979233, -0.00611852714791894], [0.4233412742614746, -0.10578499734401703, 0.32568830251693726, 0.0025873545091599226, 0.0003954840067308396, 0.12344525009393692, 0.9923479557037354, 0.0007402655319310725, -0.003524358617141843, -0.002587254624813795, 0.10105752944946289, 0.06055070459842682, 0.00236650463193655]], "position_instruction": "Place the hammer to the left of the USB on the table. "}
--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/task_config.json:
--------------------------------------------------------------------------------
1 | {"orientation": "None", "rotation": "None", "selected_obj_names": ["mug"], "selected_urdfs": ["objaverse_final_norm/ca4f9a92cc2f4ee98fe9332db41bf7f7/material_2.urdf"], "target_obj_name": "mug", "instruction": "Please pick up the object and place it to specify the rotation of the object after placement: the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "init_obj_pos": [[0.6550417542457581, 0.05568762868642807, 0.3321579694747925, 0.07643917948007584, 0.21541181206703186, -0.12756481766700745, 0.9651331901550293, -0.004337493795901537, 0.004771982319653034, -0.0002449209277983755, -0.10857345163822174, -0.09869785606861115, -0.002580456668511033]], "position_instruction": "", "rotation_instruction": "Please pick up the object and place it to specify the rotation of the object after placement: the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "rotation_instruction_label": "handle_left"}
--------------------------------------------------------------------------------
/Method/utils/task_stat.py:
--------------------------------------------------------------------------------
1 | import glob
2 |
3 | paths = glob.glob('Method/output/rot_banch_0704/*/*/*/task_config.json')
4 | print("total tasks", len(paths))
5 | position_tags = set([paths[i].split('/')[-4] for i in range(len(paths))])
6 | print(position_tags)
7 | for position_tag in position_tags:
8 | print(position_tag, len([paths[i] for i in range(len(paths)) if paths[i].split('/')[-4] == position_tag]))
9 | paths = glob.glob('Method/output/rot_banch_0717/*/*/*/task_config.json')
10 | print("total tasks", len(paths))
11 | position_tags = set([paths[i].split('/')[-4] for i in range(len(paths))])
12 | print(position_tags)
13 | for position_tag in position_tags:
14 | print(position_tag, len([paths[i] for i in range(len(paths)) if paths[i].split('/')[-4] == position_tag]))
15 | paths = glob.glob('Method/output/rot_banch_0717_pure_rot/*/*/*/task_config.json')
16 |
17 | print("total tasks", len(paths))
18 | position_tags = set([paths[i].split('/')[-4] for i in range(len(paths))])
19 | print(position_tags)
20 | for position_tag in position_tags:
21 | print(position_tag, len([paths[i] for i in range(len(paths)) if paths[i].split('/')[-4] == position_tag]))
--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/task_config.json:
--------------------------------------------------------------------------------
1 | {"orientation": "None", "rotation": "None", "selected_obj_names": ["mug"], "selected_urdfs": ["objaverse_final_norm/db9345f568e8499a9eac2577302b5f51/material_2.urdf"], "target_obj_name": "mug", "instruction": "Please pick up the object and place it to specify the rotation of the object after placement: the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "init_obj_pos": [[0.6686422824859619, 0.11716754734516144, 0.34889549016952515, -0.006926149129867554, 0.25072675943374634, 0.026660921052098274, 0.9676658511161804, -0.001081045251339674, 0.0014700093306601048, -0.0009055532282218337, -0.03115496225655079, -0.024703728035092354, 0.0006507631042040884]], "position_instruction": "", "rotation_instruction": "Please pick up the object and place it to specify the rotation of the object after placement: the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "rotation_instruction_label": "handle_left"}
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/hand.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 5
3 |
4 | newmtl Part__Feature001_008_005
5 | Ns -1.960784
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.250980 0.250980 0.250980
8 | Ks 0.007812 0.007812 0.007812
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl Part__Feature002_005_005
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 0.901961 0.921569 0.929412
18 | Ks 0.015625 0.015625 0.015625
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 |
24 | newmtl Part__Feature005_001_005
25 | Ns -1.960784
26 | Ka 1.000000 1.000000 1.000000
27 | Kd 1.000000 1.000000 1.000000
28 | Ks 0.015625 0.015625 0.015625
29 | Ke 0.000000 0.000000 0.000000
30 | Ni 1.000000
31 | d 1.000000
32 | illum 2
33 |
34 | newmtl Part__Feature005_001_005_001
35 | Ns -1.960784
36 | Ka 1.000000 1.000000 1.000000
37 | Kd 0.901961 0.921569 0.929412
38 | Ks 0.015625 0.015625 0.015625
39 | Ke 0.000000 0.000000 0.000000
40 | Ni 1.000000
41 | d 1.000000
42 | illum 2
43 |
44 | newmtl Part__Feature_009_005
45 | Ns -1.960784
46 | Ka 1.000000 1.000000 1.000000
47 | Kd 0.250980 0.250980 0.250980
48 | Ks 0.015625 0.015625 0.015625
49 | Ke 0.000000 0.000000 0.000000
50 | Ni 1.000000
51 | d 1.000000
52 | illum 2
53 |
--------------------------------------------------------------------------------
/Method/vision/tranformation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import json
3 |
4 |
5 |
6 | def quaternion_to_matrix(q):
7 | """
8 | Convert a quaternion into a 3x3 rotation matrix.
9 | """
10 | qw, qx, qy, qz = q
11 | return np.array([
12 | [1 - 2*qy*qy - 2*qz*qz, 2*qx*qy - 2*qz*qw, 2*qx*qz + 2*qy*qw],
13 | [2*qx*qy + 2*qz*qw, 1 - 2*qx*qx - 2*qz*qz, 2*qy*qz - 2*qx*qw],
14 | [2*qx*qz - 2*qy*qw, 2*qy*qz + 2*qx*qw, 1 - 2*qx*qx - 2*qy*qy]
15 | ])
16 |
17 | def create_transformation_matrix(position, quaternion):
18 | """
19 | Create a 4x4 transformation matrix from position and quaternion.
20 | """
21 | x, y, z = position
22 | q = quaternion
23 |
24 | rotation_matrix = quaternion_to_matrix(q)
25 |
26 | transformation_matrix = np.identity(4)
27 | transformation_matrix[:3, :3] = rotation_matrix
28 | transformation_matrix[:3, 3] = [x, y, z]
29 |
30 | return transformation_matrix
31 |
32 | config_path = "output/gym_outputs_task_gen_obja_0304_rot/center/Place_the_mouse_at_the_center_of_all_the_objects_on_the_table.__upright/20240630-202931_no_interaction/task_config.json"
33 |
34 | config = json.load(open(config_path, "r"))
35 | pos_s = config["init_obj_pos"]
36 | for pos in pos_s:
37 | position = pos[:3]
38 | quaternion = pos[3:7] # Example quaternion
39 | transformation_matrix = create_transformation_matrix(position, quaternion)
40 |
41 | print(transformation_matrix)
42 |
--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075911_no_interaction/task_config_new.json:
--------------------------------------------------------------------------------
1 | {"orientation": "None", "rotation": "None", "selected_obj_names": ["mug"], "selected_urdfs": ["objaverse_final_norm/ca4f9a92cc2f4ee98fe9332db41bf7f7/material_2.urdf"], "target_obj_name": "mug", "instruction": "Please pick up the object and place it to specify the rotation of the object after placement: the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "init_obj_pos": [[0.6550417542457581, 0.05568762868642807, 0.3321579694747925, 0.07643917948007584, 0.21541181206703186, -0.12756481766700745, 0.9651331901550293, -0.004337493795901537, 0.004771982319653034, -0.0002449209277983755, -0.10857345163822174, -0.09869785606861115, -0.002580456668511033]], "position_instruction": "", "rotation_instruction": "Please pick up the object and place it to specify the rotation of the object after placement: the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "rotation_instruction_label": "handle_left", "obj_codes": ["ca4f9a92cc2f4ee98fe9332db41bf7f7"], "target_obj_code": "ca4f9a92cc2f4ee98fe9332db41bf7f7", "anno_target": {"category": "mug", "annotation": {" the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).": {"quat": [[0.5, -0.5, -0.5, 0.4999999701976776]], "stage": 1}}}}
--------------------------------------------------------------------------------
/Benchmark/task_examples/rotation/None/mug_handle_left/20240717-075819_no_interaction/task_config_new.json:
--------------------------------------------------------------------------------
1 | {"orientation": "None", "rotation": "None", "selected_obj_names": ["mug"], "selected_urdfs": ["objaverse_final_norm/db9345f568e8499a9eac2577302b5f51/material_2.urdf"], "target_obj_name": "mug", "instruction": "Please pick up the object and place it to specify the rotation of the object after placement: the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "init_obj_pos": [[0.6686422824859619, 0.11716754734516144, 0.34889549016952515, -0.006926149129867554, 0.25072675943374634, 0.026660921052098274, 0.9676658511161804, -0.001081045251339674, 0.0014700093306601048, -0.0009055532282218337, -0.03115496225655079, -0.024703728035092354, 0.0006507631042040884]], "position_instruction": "", "rotation_instruction": "Please pick up the object and place it to specify the rotation of the object after placement: the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).", "rotation_instruction_label": "handle_left", "obj_codes": ["db9345f568e8499a9eac2577302b5f51"], "target_obj_code": "db9345f568e8499a9eac2577302b5f51", "anno_target": {"category": "mug", "annotation": {" the position of the object is reasonable and accords with commonsense, and that the handle of the object is on the left(pointing towards left).": {"quat": [[0.5, -0.5, -0.5, 0.4999999701976776]], "stage": 1}}}}
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to segment-anything
2 | We want to make contributing to this project as easy and transparent as
3 | possible.
4 |
5 | ## Pull Requests
6 | We actively welcome your pull requests.
7 |
8 | 1. Fork the repo and create your branch from `main`.
9 | 2. If you've added code that should be tested, add tests.
10 | 3. If you've changed APIs, update the documentation.
11 | 4. Ensure the test suite passes.
12 | 5. Make sure your code lints, using the `linter.sh` script in the project's root directory. Linting requires `black==23.*`, `isort==5.12.0`, `flake8`, and `mypy`.
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 |
15 | ## Contributor License Agreement ("CLA")
16 | In order to accept your pull request, we need you to submit a CLA. You only need
17 | to do this once to work on any of Facebook's open source projects.
18 |
19 | Complete your CLA here:
20 |
21 | ## Issues
22 | We use GitHub issues to track public bugs. Please ensure your description is
23 | clear and has sufficient instructions to be able to reproduce the issue.
24 |
25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
26 | disclosure of security bugs. In those cases, please go through the process
27 | outlined on that page and do not file a public issue.
28 |
29 | ## License
30 | By contributing to segment-anything, you agree that your contributions will be licensed
31 | under the LICENSE file in the root directory of this source tree.
32 |
--------------------------------------------------------------------------------
/Method/position/vlm_utils.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import requests
3 |
4 | # OpenAI API Key
5 | import os
6 | API_KEY = os.getenv("API_KEY")
7 | if API_KEY is None:
8 | raise ValueError("please set API_KEY environment variable by running `export API_KEY=XXXX`")
9 | # Function to encode the image
10 | def encode_image(image_path):
11 | with open(image_path, "rb") as image_file:
12 | return base64.b64encode(image_file.read()).decode('utf-8')
13 |
14 | def infer_path(prompt, path):
15 | # Getting the base64 string
16 | base64_image = encode_image(path)
17 |
18 | headers = {
19 | "Content-Type": "application/json",
20 | "Authorization": f"Bearer {API_KEY}"
21 | }
22 |
23 | payload = {
24 | "model": "gpt-4o",
25 | "messages": [
26 | {
27 | "role": "user",
28 | "content": [
29 | {
30 | "type": "text",
31 | "text": prompt
32 | },
33 | {
34 | "type": "image_url",
35 | "image_url": {
36 | "url": f"data:image/jpeg;base64,{base64_image}"
37 | }
38 | }
39 | ]
40 | }
41 | ],
42 | "max_tokens": 300
43 | }
44 |
45 | response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
46 |
47 | # print(response.json())
48 | return response
49 |
50 |
51 | if __name__ == "__main__":
52 | prompt = "descripbe this image"
53 | path = "./vision/1.jpg"
54 | response = infer_path(prompt, path)
55 | print(response.json())
56 |
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/stltoobj.mlx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/modeling/common.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | import torch
8 | import torch.nn as nn
9 |
10 | from typing import Type
11 |
12 |
13 | class MLPBlock(nn.Module):
14 | def __init__(
15 | self,
16 | embedding_dim: int,
17 | mlp_dim: int,
18 | act: Type[nn.Module] = nn.GELU,
19 | ) -> None:
20 | super().__init__()
21 | self.lin1 = nn.Linear(embedding_dim, mlp_dim)
22 | self.lin2 = nn.Linear(mlp_dim, embedding_dim)
23 | self.act = act()
24 |
25 | def forward(self, x: torch.Tensor) -> torch.Tensor:
26 | return self.lin2(self.act(self.lin1(x)))
27 |
28 |
29 | # From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa
30 | # Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa
31 | class LayerNorm2d(nn.Module):
32 | def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
33 | super().__init__()
34 | self.weight = nn.Parameter(torch.ones(num_channels))
35 | self.bias = nn.Parameter(torch.zeros(num_channels))
36 | self.eps = eps
37 |
38 | def forward(self, x: torch.Tensor) -> torch.Tensor:
39 | u = x.mean(1, keepdim=True)
40 | s = (x - u).pow(2).mean(1, keepdim=True)
41 | x = (x - u) / torch.sqrt(s + self.eps)
42 | x = self.weight[:, None, None] * x + self.bias[:, None, None]
43 | return x
44 |
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/task_config_test.json:
--------------------------------------------------------------------------------
1 | {"position_tag": "behind", "rotation": "None", "selected_obj_names": ["bottle", "tissue box", "apple"], "selected_urdfs": ["ycb_16k_backup/006_mustard_bottle_google_16k/006_mustard_bottle_google_16k.urdf", "objaverse_rescale/dc4c91abf45342b4bb8822f50fa162b2/material_2.urdf", "objaverse_rescale/fbda0b25f41f40958ea984f460e4770b/material_2.urdf"], "target_obj_name": "apple", "instruction": "Place the apple behind the bottle on the table. We also need to specify the rotation of the object after placement: the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.", "init_obj_pos": [[0.3738532066345215, 0.17327244579792023, 0.30287155508995056, 5.603695899480954e-05, -3.935253698728047e-05, -0.03753087669610977, 0.9992955327033997, 0.0029977706726640463, 0.001985779032111168, -0.0012033769162371755, -0.03269371762871742, 0.04539608955383301, -0.03798031061887741], [0.44172099232673645, -0.32238009572029114, 0.3753003478050232, 0.7060639262199402, -0.037992026656866074, -0.037284620106220245, 0.7061444520950317, -0.00012565749057102948, 0.0002828052965924144, 0.00027510791551321745, -0.005133399739861488, -0.002302509034052491, 0.0013929366832599044], [0.5476588606834412, -0.07213786244392395, 0.3492436110973358, 0.11362186074256897, 0.05067095533013344, -0.08851055055856705, 0.9882755279541016, 0.004178161732852459, -0.00013288251648191363, -0.000834679405670613, 0.0010649901814758778, 0.08433020859956741, -0.0004798930021934211]], "position_instruction": "Place the apple behind the bottle on the table. We also need to specify the rotation of the object after placement: the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up."}
--------------------------------------------------------------------------------
/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/task_config_new.json:
--------------------------------------------------------------------------------
1 | {"orientation": "behind", "rotation": "None", "selected_obj_names": ["box", "apple"], "selected_urdfs": ["objaverse_final_norm/9660e0c0326b4f7386014e27717231ae/material_2.urdf", "objaverse_final_norm/f53d75bd123b40bca14d12d54286f432/material_2.urdf"], "target_obj_name": "apple", "instruction": "Place the apple behind the box on the table. We also need to specify the rotation of the object after placement: the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.", "init_obj_pos": [[0.5763212442398071, 0.24244019389152527, 0.3158315122127533, 0.00011814905155915767, 3.0217168387025595e-05, 0.057858873158693314, 0.9983247518539429, 0.0005872970796190202, 0.00024345181009266526, 1.8670303688850254e-05, 0.0013161733513697982, -0.0011025663698092103, -0.001989496871829033], [0.4732729494571686, 0.19301258027553558, 0.34965574741363525, 0.08372167497873306, -0.015573234297335148, -0.0979083776473999, 0.9915453195571899, 0.004182836972177029, 0.0017127282917499542, -0.001595060108229518, -0.02539602667093277, 0.09032362699508667, 0.01703813299536705]], "position_instruction": "Place the apple behind the box on the table. ", "rotation_instruction": "We also need to specify the rotation of the object after placement: the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.", "rotation_instruction_label": "upright", "obj_codes": ["9660e0c0326b4f7386014e27717231ae", "f53d75bd123b40bca14d12d54286f432"], "target_obj_code": "f53d75bd123b40bca14d12d54286f432", "anno_target": {"category": "apple", "annotation": {" the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.": {"quat": [[0.7071067690849304, 0.0, 0.0, 0.7071067690849304]], "stage": 1}}}}
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link7.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 8
3 |
4 | newmtl Part__Mirroring001_004_002
5 | Ns -1.960784
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.250980 0.250980 0.250980
8 | Ks 0.015625 0.015625 0.015625
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl Part__Mirroring002_004_001
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 0.250980 0.250980 0.250980
18 | Ks 0.031250 0.031250 0.031250
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 |
24 | newmtl Part__Mirroring003_004_001
25 | Ns -1.960784
26 | Ka 1.000000 1.000000 1.000000
27 | Kd 0.250980 0.250980 0.250980
28 | Ks 0.031250 0.031250 0.031250
29 | Ke 0.000000 0.000000 0.000000
30 | Ni 1.000000
31 | d 1.000000
32 | illum 2
33 |
34 | newmtl Part__Mirroring004_004_002
35 | Ns -1.960784
36 | Ka 1.000000 1.000000 1.000000
37 | Kd 1.000000 1.000000 1.000000
38 | Ks 0.031250 0.031250 0.031250
39 | Ke 0.000000 0.000000 0.000000
40 | Ni 1.000000
41 | d 1.000000
42 | illum 2
43 |
44 | newmtl Part__Mirroring005_004_001
45 | Ns -1.960784
46 | Ka 1.000000 1.000000 1.000000
47 | Kd 0.250980 0.250980 0.250980
48 | Ks 0.031250 0.031250 0.031250
49 | Ke 0.000000 0.000000 0.000000
50 | Ni 1.000000
51 | d 1.000000
52 | illum 2
53 |
54 | newmtl Part__Mirroring006_004_001
55 | Ns -1.960784
56 | Ka 1.000000 1.000000 1.000000
57 | Kd 0.250980 0.250980 0.250980
58 | Ks 0.031250 0.031250 0.031250
59 | Ke 0.000000 0.000000 0.000000
60 | Ni 1.000000
61 | d 1.000000
62 | illum 2
63 |
64 | newmtl Part__Mirroring007_004_001
65 | Ns -1.960784
66 | Ka 1.000000 1.000000 1.000000
67 | Kd 0.250980 0.250980 0.250980
68 | Ks 0.031250 0.031250 0.031250
69 | Ke 0.000000 0.000000 0.000000
70 | Ni 1.000000
71 | d 1.000000
72 | illum 2
73 |
74 | newmtl Part__Mirroring_004_001
75 | Ns -1.960784
76 | Ka 1.000000 1.000000 1.000000
77 | Kd 0.898039 0.917647 0.929412
78 | Ks 0.031250 0.031250 0.031250
79 | Ke 0.000000 0.000000 0.000000
80 | Ni 1.000000
81 | d 1.000000
82 | illum 2
83 |
--------------------------------------------------------------------------------
/Method/utils/vlm_utils.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import requests
3 |
4 | # OpenAI API Key
5 | api_key = None
6 | # Function to encode the image
7 | def encode_image(image_path):
8 | with open(image_path, "rb") as image_file:
9 | return base64.b64encode(image_file.read()).decode('utf-8')
10 |
11 | def infer_path(prompt, path):
12 | # Getting the base64 string
13 | base64_image = encode_image(path)
14 |
15 | headers = {
16 | "Content-Type": "application/json",
17 | "Authorization": f"Bearer {api_key}"
18 | }
19 |
20 | payload = {
21 | "model": "gpt-4-vision-preview",
22 | "messages": [
23 | {
24 | "role": "user",
25 | "content": [
26 | {
27 | "type": "text",
28 | "text": prompt
29 | },
30 | {
31 | "type": "image_url",
32 | "image_url": {
33 | "url": f"data:image/jpeg;base64,{base64_image}"
34 | }
35 | }
36 | ]
37 | }
38 | ],
39 | "max_tokens": 300
40 | }
41 |
42 | response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
43 |
44 | # print(response.json())
45 | return response
46 |
47 |
48 | if __name__ == "__main__":
49 | prompt = "descripbe this image"
50 | path = "imgs/bana_cup_gsam_cup.jpg"
51 | response = infer_path(prompt, path)
52 | print(response.json())
53 | # prompt_path = "pure_prompt.txt"
54 | # import os
55 | # os.makedirs("GPT4V-pure", exist_ok=True)
56 | # import glob, json, os
57 | # paths = glob.glob("result/*.png")
58 | # prompt_ori = open(prompt_path, "r").read()
59 | # total = len(paths)
60 | # for i, path in enumerate(paths):
61 | # name = path.split("/")[-1].split(".")[0]
62 | # print(name, i , total)
63 | # save_path = f"GPT4V-pure/{name}_pure.json"
64 | # if os.path.exists(save_path):
65 | # continue
66 | # # prompt = prompt_ori + open(f"pure_GAPartNet/{name}_pure_GAPartNet.txt", "r").read()
67 | # prompt = prompt_ori
68 | # response = infer_path(prompt, path)
69 | # json.dump(response.json(), open(save_path, "w"))
70 | # # import pdb; pdb.set_trace()
--------------------------------------------------------------------------------
/Benchmark/task_examples/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/task_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "orientation": "behind",
3 | "rotation": "None",
4 | "selected_obj_names": [
5 | "box",
6 | "apple"
7 | ],
8 | "selected_urdfs": [
9 | "objaverse_final_norm/9660e0c0326b4f7386014e27717231ae/material_2.urdf",
10 | "objaverse_final_norm/f53d75bd123b40bca14d12d54286f432/material_2.urdf"
11 | ],
12 | "target_obj_name": "apple",
13 | "instruction": "Place the apple behind the box on the table. We also need to specify the rotation of the object after placement: the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.",
14 | "init_obj_pos": [
15 | [
16 | 0.5763212442398071,
17 | 0.24244019389152527,
18 | 0.3158315122127533,
19 | 0.00011814905155915767,
20 | 3.0217168387025595e-05,
21 | 0.057858873158693314,
22 | 0.9983247518539429,
23 | 0.0005872970796190202,
24 | 0.00024345181009266526,
25 | 1.8670303688850254e-05,
26 | 0.0013161733513697982,
27 | -0.0011025663698092103,
28 | -0.001989496871829033
29 | ],
30 | [
31 | 0.4732729494571686,
32 | 0.19301258027553558,
33 | 0.34965574741363525,
34 | 0.08372167497873306,
35 | -0.015573234297335148,
36 | -0.0979083776473999,
37 | 0.9915453195571899,
38 | 0.004182836972177029,
39 | 0.0017127282917499542,
40 | -0.001595060108229518,
41 | -0.02539602667093277,
42 | 0.09032362699508667,
43 | 0.01703813299536705
44 | ]
45 | ],
46 | "position_instruction": "Place the apple behind the box on the table. ",
47 | "rotation_instruction": "We also need to specify the rotation of the object after placement: the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.",
48 | "rotation_instruction_label": "upright"
49 | }
--------------------------------------------------------------------------------
/Method/isaacgym0/utils.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import os, glob
4 | import argparse
5 | import imageio
6 | from PIL import Image
7 | from isaacgym.torch_utils import *
8 | import torch
9 | import math
10 | import yaml
11 |
12 | def images_to_video(image_folder, video_path, frame_size=(1920, 1080), fps=30):
13 | images = sorted([img for img in os.listdir(image_folder) if img.endswith(".png") or img.endswith(".jpg") or img.endswith(".jpeg")])
14 |
15 | if not images:
16 | print("No images found in the specified directory!")
17 | return
18 |
19 | writer = imageio.get_writer(video_path, fps=fps)
20 |
21 | for image in images:
22 | img_path = os.path.join(image_folder, image)
23 | img = imageio.imread(img_path)
24 |
25 | if img.shape[1] > frame_size[0] or img.shape[0] > frame_size[1]:
26 | print("Warning: frame size is smaller than the one of the images.")
27 | print("Images will be resized to match frame size.")
28 | img = np.array(Image.fromarray(img).resize(frame_size))
29 |
30 | writer.append_data(img)
31 |
32 | writer.close()
33 | print("Video created successfully!")
34 |
35 | def quat_axis(q, axis=0):
36 | basis_vec = torch.zeros(q.shape[0], 3, device=q.device)
37 | basis_vec[:, axis] = 1
38 | return quat_rotate(q, basis_vec)
39 |
40 |
41 | def orientation_error(desired, current):
42 | cc = quat_conjugate(current)
43 | q_r = quat_mul(desired, cc)
44 | return q_r[:, 0:3] * torch.sign(q_r[:, 3]).unsqueeze(-1)
45 |
46 |
47 | def cube_grasping_yaw(q, corners):
48 | """ returns horizontal rotation required to grasp cube """
49 | rc = quat_rotate(q, corners)
50 | yaw = (torch.atan2(rc[:, 1], rc[:, 0]) - 0.25 * math.pi) % (0.5 * math.pi)
51 | theta = 0.5 * yaw
52 | w = theta.cos()
53 | x = torch.zeros_like(w)
54 | y = torch.zeros_like(w)
55 | z = theta.sin()
56 | yaw_quats = torch.stack([x, y, z, w], dim=-1)
57 | return yaw_quats
58 |
59 | def read_yaml_config(file_path):
60 | with open(file_path, 'r') as file:
61 | # Load the YAML file into a Python dictionary
62 | config = yaml.safe_load(file)
63 | return config
--------------------------------------------------------------------------------
/Benchmark/dataset/objects/scale.py:
--------------------------------------------------------------------------------
1 | import trimesh
2 | import os
3 | import json
4 | import math
5 |
6 |
7 | mesh_path = '/Users/selina/Desktop/projects/ObjectPlacement/assets/mesh/final_norm'
8 | category_path = '/Users/selina/Desktop/projects/Open6DOR/Benchmark/benchmark_catalogue/category_dictionary.json'
9 | object_path = '/Users/selina/Desktop/projects/Open6DOR/Benchmark/benchmark_catalogue/object_dictionary_complete_0702.json'
10 | new_path = "/Users/selina/Desktop/projects/Open6DOR/Benchmark/dataset/objects/rescale"
11 |
12 | category_dict = json.load(open(category_path, 'r'))
13 | object_dict = json.load(open(object_path, 'r'))
14 | for root, dirs, files in os.walk(mesh_path):
15 | for dir in dirs:
16 | try:
17 | obj_dir = os.path.join(root, dir)
18 | obj_name = dir
19 | if obj_name not in object_dict:
20 | continue
21 | obj_cat = object_dict[obj_name]['category']
22 | obj_scale = category_dict[obj_cat]['scale']
23 | obj_mesh = trimesh.load(os.path.join(mesh_path, dir) + '/material.obj')
24 |
25 | obj_mesh.apply_translation(-obj_mesh.centroid)
26 |
27 | if obj_mesh.bounding_box.extents.max() < 0.1:
28 | print(f"Object {obj_name} is too small")
29 | continue
30 | scale_factor = 0.7 * math.sqrt(obj_scale) / obj_mesh.bounding_box.extents.max()
31 |
32 | obj_mesh.apply_scale(scale_factor)
33 | if not os.path.exists(os.path.join(new_path, dir)):
34 | os.makedirs(os.path.join(new_path, dir), exist_ok=False)
35 | obj_mesh.export(os.path.join(new_path, dir) + '/material.obj')
36 | except:
37 | import pdb; pdb.set_trace()
38 |
39 |
40 | break
41 |
42 | # # Load a mesh from OBJ file
43 | # mesh = trimesh.load('/Users/selina/Desktop/projects/Open6DOR/Benchmark/dataset/objects/rescale/c61227cac7224b86b43c53ac2a2b6ec7/material.obj')
44 |
45 | # # Translate mesh to its centroid
46 | # mesh.apply_translation(-mesh.centroid)
47 |
48 | # # Scale the mesh (1 unit here)
49 | # # scale_factor = 1.0 / mesh.bounding_box.extents.max()
50 | # print(mesh.bounding_box.extents.max())
51 | # # mesh.apply_scale(scale_factor)
52 |
53 | # # # save the new mesh to OBJ file
54 | # # mesh.export('2ab18cb4ec8f4a1f8dec637602362054.obj')
--------------------------------------------------------------------------------
/Method/gym/vlm_utils.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import requests
3 |
4 | # OpenAI API Key
5 | # api_key = "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"
6 | api_key = "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"
7 | api_key = "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"
8 | api_key = "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"
9 | # Function to encode the image
10 | def encode_image(image_path):
11 | with open(image_path, "rb") as image_file:
12 | return base64.b64encode(image_file.read()).decode('utf-8')
13 |
14 | def infer_path(prompt, path):
15 | # Getting the base64 string
16 | base64_image = encode_image(path)
17 |
18 | headers = {
19 | "Content-Type": "application/json",
20 | "Authorization": f"Bearer {api_key}"
21 | }
22 |
23 | payload = {
24 | "model": "gpt-4-vision-preview",
25 | "messages": [
26 | {
27 | "role": "user",
28 | "content": [
29 | {
30 | "type": "text",
31 | "text": prompt
32 | },
33 | {
34 | "type": "image_url",
35 | "image_url": {
36 | "url": f"data:image/jpeg;base64,{base64_image}"
37 | }
38 | }
39 | ]
40 | }
41 | ],
42 | "max_tokens": 300
43 | }
44 |
45 | response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
46 |
47 | # print(response.json())
48 | return response
49 |
50 |
51 | if __name__ == "__main__":
52 | prompt = "descripbe this image"
53 | path = "imgs/bana_cup_gsam_cup.jpg"
54 | response = infer_path(prompt, path)
55 | print(response.json())
56 | # prompt_path = "pure_prompt.txt"
57 | # import os
58 | # os.makedirs("GPT4V-pure", exist_ok=True)
59 | # import glob, json, os
60 | # paths = glob.glob("result/*.png")
61 | # prompt_ori = open(prompt_path, "r").read()
62 | # total = len(paths)
63 | # for i, path in enumerate(paths):
64 | # name = path.split("/")[-1].split(".")[0]
65 | # print(name, i , total)
66 | # save_path = f"GPT4V-pure/{name}_pure.json"
67 | # if os.path.exists(save_path):
68 | # continue
69 | # # prompt = prompt_ori + open(f"pure_GAPartNet/{name}_pure_GAPartNet.txt", "r").read()
70 | # prompt = prompt_ori
71 | # response = infer_path(prompt, path)
72 | # json.dump(response.json(), open(save_path, "w"))
73 | # # import pdb; pdb.set_trace()
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/.gitignore:
--------------------------------------------------------------------------------
1 | old/
2 |
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | pip-wheel-metadata/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | *.py,cover
53 | .hypothesis/
54 | .pytest_cache/
55 |
56 | # Translations
57 | *.mo
58 | *.pot
59 |
60 | # Django stuff:
61 | *.log
62 | local_settings.py
63 | db.sqlite3
64 | db.sqlite3-journal
65 |
66 | # Flask stuff:
67 | instance/
68 | .webassets-cache
69 |
70 | # Scrapy stuff:
71 | .scrapy
72 |
73 | # Sphinx documentation
74 | docs/_build/
75 |
76 | # PyBuilder
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # IPython
83 | profile_default/
84 | ipython_config.py
85 |
86 | # pyenv
87 | .python-version
88 |
89 | # pipenv
90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
93 | # install all needed dependencies.
94 | #Pipfile.lock
95 |
96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
97 | __pypackages__/
98 |
99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 |
103 | # SageMath parsed files
104 | *.sage.py
105 |
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 |
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 |
119 | # Rope project settings
120 | .ropeproject
121 |
122 | # mkdocs documentation
123 | /site
124 |
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 |
130 | # Pyre type checker
131 | .pyre/
132 |
133 | # checkpoint
134 | *.pth
135 | outputs/
136 |
137 | .idea/
138 |
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link0.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 12
3 |
4 | newmtl Face636_001
5 | Ns -1.960784
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.901961 0.921569 0.929412
8 | Ks 0.125000 0.125000 0.125000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl Part__Feature017_001
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 1.000000 1.000000 1.000000
18 | Ks 0.500000 0.500000 0.500000
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 |
24 | newmtl Part__Feature018_001
25 | Ns -1.960784
26 | Ka 1.000000 1.000000 1.000000
27 | Kd 1.000000 1.000000 1.000000
28 | Ks 0.500000 0.500000 0.500000
29 | Ke 0.000000 0.000000 0.000000
30 | Ni 1.000000
31 | d 1.000000
32 | illum 2
33 |
34 | newmtl Part__Feature019_001
35 | Ns -1.960784
36 | Ka 1.000000 1.000000 1.000000
37 | Kd 1.000000 1.000000 1.000000
38 | Ks 0.125000 0.125000 0.125000
39 | Ke 0.000000 0.000000 0.000000
40 | Ni 1.000000
41 | d 1.000000
42 | illum 2
43 |
44 | newmtl Part__Feature022_001
45 | Ns -1.960784
46 | Ka 1.000000 1.000000 1.000000
47 | Kd 0.901961 0.921569 0.929412
48 | Ks 0.125000 0.125000 0.125000
49 | Ke 0.000000 0.000000 0.000000
50 | Ni 1.000000
51 | d 1.000000
52 | illum 2
53 |
54 | newmtl Part__Feature023_001
55 | Ns -1.960784
56 | Ka 1.000000 1.000000 1.000000
57 | Kd 0.250980 0.250980 0.250980
58 | Ks 0.125000 0.125000 0.125000
59 | Ke 0.000000 0.000000 0.000000
60 | Ni 1.000000
61 | d 1.000000
62 | illum 2
63 |
64 | newmtl Shell001_001
65 | Ns -1.960784
66 | Ka 1.000000 1.000000 1.000000
67 | Kd 0.250980 0.250980 0.250980
68 | Ks 0.125000 0.125000 0.125000
69 | Ke 0.000000 0.000000 0.000000
70 | Ni 1.000000
71 | d 1.000000
72 | illum 2
73 |
74 | newmtl Shell002_001
75 | Ns -1.960784
76 | Ka 1.000000 1.000000 1.000000
77 | Kd 0.901961 0.921569 0.929412
78 | Ks 0.125000 0.125000 0.125000
79 | Ke 0.000000 0.000000 0.000000
80 | Ni 1.000000
81 | d 1.000000
82 | illum 2
83 |
84 | newmtl Shell003_001
85 | Ns -1.960784
86 | Ka 1.000000 1.000000 1.000000
87 | Kd 0.901961 0.921569 0.929412
88 | Ks 0.125000 0.125000 0.125000
89 | Ke 0.000000 0.000000 0.000000
90 | Ni 1.000000
91 | d 1.000000
92 | illum 2
93 |
94 | newmtl Shell009_001
95 | Ns -1.960784
96 | Ka 1.000000 1.000000 1.000000
97 | Kd 0.250980 0.250980 0.250980
98 | Ks 0.125000 0.125000 0.125000
99 | Ke 0.000000 0.000000 0.000000
100 | Ni 1.000000
101 | d 1.000000
102 | illum 2
103 |
104 | newmtl Shell010_001
105 | Ns -1.960784
106 | Ka 1.000000 1.000000 1.000000
107 | Kd 0.901961 0.921569 0.929412
108 | Ks 0.125000 0.125000 0.125000
109 | Ke 0.000000 0.000000 0.000000
110 | Ni 1.000000
111 | d 1.000000
112 | illum 2
113 |
114 | newmtl Shell_001
115 | Ns -1.960784
116 | Ka 1.000000 1.000000 1.000000
117 | Kd 0.250980 0.250980 0.250980
118 | Ks 0.125000 0.125000 0.125000
119 | Ke 0.000000 0.000000 0.000000
120 | Ni 1.000000
121 | d 1.000000
122 | illum 2
123 |
--------------------------------------------------------------------------------
/Method/README.md:
--------------------------------------------------------------------------------
1 | # Method Introduction
2 |
3 |
4 |
5 | ## Get Task
6 | A class to get a task through configuration file, which can be used to load simulation env in IsaacGym and get the task information, render and control robot, .etc
7 |
8 | - _prepare_task: Load simulation env and get task information
9 |
10 | - _init_gym: Initialize gym env
11 |
12 | - _setup_scene: Set up scene
13 |
14 | - prepare_franka_asset: from `self.cfgs["asset"]["franka_asset_file"]` to load franka asset
15 |
16 | - _prepare_obj_assets: Load object assets: table, objects
17 |
18 | - _load_env: load all assets to env and set up scene
19 |
20 | - _init_observation: Initialize observation space and corresponding observation functions
21 |
22 | - refresh_observation: get observation dict from env
23 |
24 | - clean_up: clean up env
25 |
26 | ## Open6DOR-GPT
27 |
28 | GroundedSAM:
29 | ```
30 | cd Method/vision/GroundedSAM/GroundingDINO
31 | pip install -e .
32 | cd ../../../..
33 | cd Method/vision/GroundedSAM/segment_anything
34 | pip install -e .
35 | cd ../../../..
36 | ```
37 | Extensions:
38 | ```
39 | sudo apt update
40 | sudo apt install fonts-dejavu
41 | ```
42 |
43 | if meet error:
44 | ```
45 | cannot import name 'split_torch_state_dict_into_shards' from 'huggingface_hub'
46 | ```
47 | try:
48 | ```
49 | pip install --upgrade huggingface_hub
50 | ```
51 |
52 | SAM checkpoint is [here](https://huggingface.co/spaces/abhishek/StableSAM/resolve/main/sam_vit_h_4b8939.pth)
53 |
54 | GroundingDINO checkpoint is [here](https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth)
55 |
56 | ## Task Generation
57 |
58 | The core code for task generation is in `Method/interaction.py`. The task generator is responsible for generating tasks for Open6DOR.
59 |
60 | #### Position Track
61 | ```bash
62 | python interaction.py --mode gen_task --task_root debug_gen_task_pos
63 | ```
64 |
65 |
66 | #### Rotation Track
67 | ```bash
68 | python interaction.py --mode gen_task_pure_rot --task_root debug_gen_task_rot
69 | ```
70 |
71 | #### 6DoF Track
72 | ```bash
73 | python interaction.py --mode gen_task_rot --task_root debug_gen_task_6dof
74 | ```
75 |
76 | #### Large Dataset Generation
77 | If you want to generate a large dataset, you can use the following command:
78 | ```bash
79 | python run_multiple.py --f "YOUR COMMAND" --n YOUR_RUN_TIMES
80 | ```
81 |
82 | #### Change Parameters
83 | You can change the parameters in `Method/interaction.py` to generate different tasks.
84 |
85 | ##### Object Number
86 | ```python
87 | if orientation == "center":
88 | selected_obj_num = np.random.randint(4, 5)
89 | elif orientation == "between":
90 | selected_obj_num = np.random.randint(3, 5)
91 | else:
92 | selected_obj_num = np.random.randint(2, 5)
93 | ```
94 |
95 | ##### Object Position
96 | In config.yaml, you can change the object position range:
97 | ```yaml
98 | assets:
99 | position_noise: [0.2, 0.25] # x and y position random range, depends on the table size
100 | ```
101 |
102 |
--------------------------------------------------------------------------------
/Method/utils/get_assets.py:
--------------------------------------------------------------------------------
1 |
2 | import json, glob
3 |
4 | def get_assets_info(dataset_names):
5 | urdf_paths = []
6 | obj_name = []
7 | uuids = []
8 | if "ycb" in dataset_names:
9 | # all the ycb urdf data
10 | json_dict = json.load(open("../Benchmark/benchmark_catalogue/object_dictionary_complete_0702.json"))
11 | all_uuid = json_dict.keys()
12 |
13 | #ycb_urdf_paths = glob.glob("assets/ycb_16k_backup/*/*.urdf")
14 | ycb_urdf_paths = glob.glob("benchmark/mesh/ycb/*/*.urdf")
15 | ycb_names = [urdf_path.split("/")[-2] for urdf_path in ycb_urdf_paths]
16 | ycb_obj_name = [" ".join(name.split("_")[1:-2]) for name in ycb_names]
17 | ycb_uuid = [urdf_path.split("/")[-2].split("_")[0] for urdf_path in ycb_urdf_paths]
18 |
19 | valid_idx = [i for i in range(len(ycb_uuid)) if ycb_uuid[i] in all_uuid]
20 |
21 | ycb_uuids = [ycb_uuid[i] for i in valid_idx]
22 | ycb_urdf_paths = [ycb_urdf_paths[i] for i in valid_idx]
23 | ycb_obj_name = [" ".join(json_dict[ycb_uuid[i]]['category'].split("_")) for i in valid_idx]
24 | urdf_paths+=ycb_urdf_paths
25 | obj_name+=ycb_obj_name
26 | uuids += ycb_uuids
27 | if "objaverse" in dataset_names:
28 | json_dict = json.load(open("../Benchmark/benchmark_catalogue/object_dictionary_complete_0702.json"))
29 |
30 | all_uuid = json_dict.keys()
31 | # all the objaverse data
32 | objaverse_urdf_paths = glob.glob("assets/objaverse_final_norm/*/*_2.urdf")
33 | objaverse_obj_uuid = [path.split("/")[-2] for path in objaverse_urdf_paths]
34 |
35 | valid_idx = [i for i in range(len(objaverse_obj_uuid)) if objaverse_obj_uuid[i] in all_uuid]
36 | objaverse_obj_uuids = [objaverse_obj_uuid[i] for i in valid_idx]
37 | objaverse_urdf_paths = [objaverse_urdf_paths[i] for i in valid_idx]
38 | objaverse_obj_name = [" ".join(json_dict[objaverse_obj_uuid[i]]['category'].split("_")) for i in valid_idx]
39 | urdf_paths+=objaverse_urdf_paths
40 | obj_name+=objaverse_obj_name
41 | uuids+=objaverse_obj_uuids
42 | if "objaverse_old" in dataset_names:
43 | json_dict = json.load(open("category_dictionary.json"))
44 |
45 | all_uuid = []
46 | for key in json_dict.keys(): all_uuid+=json_dict[key]["object_uuids"]
47 | # all the objaverse data
48 | objaverse_urdf_paths = glob.glob("benchmark/mesh/objaverse_final_norm/*/*_2.urdf")
49 | objaverse_names = [urdf_path.split("/")[-2] for urdf_path in objaverse_urdf_paths]
50 | objaverse_obj_name = [" ".join(name.split("_")[1:]) for name in objaverse_names]
51 | objaverse_obj_uuid = [name.split("_")[0] for name in objaverse_names]
52 | valid_idx = [i for i in range(len(objaverse_obj_uuid)) if objaverse_obj_uuid[i] in all_uuid]
53 | objaverse_urdf_paths = [objaverse_urdf_paths[i] for i in valid_idx]
54 | objaverse_obj_name = [objaverse_obj_name[i] for i in valid_idx]
55 | # import pdb; pdb.set_trace()
56 | urdf_paths+=objaverse_urdf_paths
57 | obj_name+=objaverse_obj_name
58 | return urdf_paths,obj_name,uuids
--------------------------------------------------------------------------------
/Method/test_gym.py:
--------------------------------------------------------------------------------
1 | # exit()
2 | import sys
3 | import os
4 | sys.path = [os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))] + sys.path
5 | sys.path = [os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))] + sys.path
6 |
7 | # import gym
8 | from gym.object_gym import ObjectGym
9 | from gym.utils import read_yaml_config
10 |
11 | import json, glob, random
12 |
13 | tag = "handle_right"
14 | # tag = "upside"
15 | anno_path = f"/home/haoran/Projects/Rearrangement/Open6DOR/Benchmark/benchmark_catalogue/annotation/annotation_{tag}.json"
16 | # anno_path = f"/home/haoran/Projects/Rearrangement/ObjectPlacement/rotation_anno/annotation_upright_1_.json"
17 | save_root_ = f"/home/haoran/Projects/Rearrangement/Open6DOR/anno_test/anno_images-final_-{tag}"
18 | anno_data = json.load(open(anno_path, 'r'))
19 | anno_keys = list(anno_data.keys())
20 | # import pdb; pdb.set_trace()
21 | random.shuffle(anno_keys)
22 | for anno in anno_keys:
23 | # print(anno["object_name"], anno["upright"])
24 | anno_data_i = anno_data[anno]['annotation']
25 | obj_id = anno
26 | save_root = f"{save_root_}/{tag}-{obj_id}"
27 | # if os.path.exists(f"/home/haoran/Projects/Rearrangement/ObjectPlacement/rotation_anno/anno_images/upright-{obj_id}/task_config-rgb-0-0.png"):
28 | if os.path.exists(f"{save_root_}/{tag}-{obj_id}/task_config-rgb-0-0.png"):
29 | continue
30 | cfgs = read_yaml_config("config.yaml")
31 |
32 | if len(obj_id) > 10: # objaverse
33 | cfgs["asset"]["asset_files"] = [f"objaverse_final_norm/{obj_id}/material_2.urdf"]
34 | else:
35 | path = glob.glob(f"assets/ycb_16k_backup/{obj_id}*/{obj_id}*.urdf")[0]
36 | path_r = "/".join(path.split("/")[-3:])
37 | cfgs["asset"]["asset_files"] = [path_r]
38 | if len(list(anno_data_i.keys())) > 1:
39 | import pdb; pdb.set_trace()
40 | try:
41 | quat_anno = anno_data_i[list(anno_data_i.keys())[0]]["quat"]
42 | except:
43 | continue
44 | if anno_data_i[list(anno_data_i.keys())[0]]["stage"] != 1 and anno_data_i[list(anno_data_i.keys())[0]]["stage"] != 2:
45 | import pdb; pdb.set_trace()
46 |
47 | cfgs["asset"]["obj_pose_ps"] = [[0.5, 0, 0.4]]
48 | try:
49 | cfgs["asset"]["obj_pose_rs"] = [[quat_anno[0][0], quat_anno[0][1], quat_anno[0][2],quat_anno[0][3],]]
50 | except:
51 | cfgs["asset"]["obj_pose_rs"] = [[quat_anno[0], quat_anno[1], quat_anno[2],quat_anno[3],]]
52 |
53 | cfgs["asset"]["position_noise"] = [0, 0]
54 | cfgs["asset"]["rotation_noise"] = 0
55 | # cfgs["asset"]["asset_files"] = [obj_id]
56 | # cfgs["asset"]["asset_files"] = anno["object_name"]
57 | gym = ObjectGym(cfgs, None, None, pre_steps = 0)
58 |
59 | print(list(anno_data_i.keys())[0])
60 | gym.refresh_observation(get_visual_obs=False)
61 | # save_root = f"/home/haoran/Projects/Rearrangement/ObjectPlacement/rotation_anno/anno_images2/upright-{obj_id}"
62 |
63 | os.makedirs(save_root, exist_ok=True)
64 | points_envs, colors_envs, rgb_envs, depth_envs ,seg_envs, ori_points_envs, ori_colors_envs, pixel2pointid, pointid2pixel = gym.refresh_observation(get_visual_obs=True)
65 | gym.save_render(rgb_envs=rgb_envs, depth_envs=None, ori_points_env=None, ori_colors_env=None, points=None, colors=None, save_dir = save_root, save_name = "task_config")
66 |
67 | # gym.run_steps(1000)
68 | # import pdb; pdb.set_trace()
69 | gym.clean_up()
70 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/build_sam.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | import torch
8 |
9 | from functools import partial
10 |
11 | from .modeling import ImageEncoderViT, MaskDecoder, PromptEncoder, Sam, TwoWayTransformer
12 |
13 |
14 | def build_sam_vit_h(checkpoint=None):
15 | return _build_sam(
16 | encoder_embed_dim=1280,
17 | encoder_depth=32,
18 | encoder_num_heads=16,
19 | encoder_global_attn_indexes=[7, 15, 23, 31],
20 | checkpoint=checkpoint,
21 | )
22 |
23 |
24 | build_sam = build_sam_vit_h
25 |
26 |
27 | def build_sam_vit_l(checkpoint=None):
28 | return _build_sam(
29 | encoder_embed_dim=1024,
30 | encoder_depth=24,
31 | encoder_num_heads=16,
32 | encoder_global_attn_indexes=[5, 11, 17, 23],
33 | checkpoint=checkpoint,
34 | )
35 |
36 |
37 | def build_sam_vit_b(checkpoint=None):
38 | return _build_sam(
39 | encoder_embed_dim=768,
40 | encoder_depth=12,
41 | encoder_num_heads=12,
42 | encoder_global_attn_indexes=[2, 5, 8, 11],
43 | checkpoint=checkpoint,
44 | )
45 |
46 |
47 | sam_model_registry = {
48 | "default": build_sam,
49 | "vit_h": build_sam,
50 | "vit_l": build_sam_vit_l,
51 | "vit_b": build_sam_vit_b,
52 | }
53 |
54 |
55 | def _build_sam(
56 | encoder_embed_dim,
57 | encoder_depth,
58 | encoder_num_heads,
59 | encoder_global_attn_indexes,
60 | checkpoint=None,
61 | ):
62 | prompt_embed_dim = 256
63 | image_size = 1024
64 | vit_patch_size = 16
65 | image_embedding_size = image_size // vit_patch_size
66 | sam = Sam(
67 | image_encoder=ImageEncoderViT(
68 | depth=encoder_depth,
69 | embed_dim=encoder_embed_dim,
70 | img_size=image_size,
71 | mlp_ratio=4,
72 | norm_layer=partial(torch.nn.LayerNorm, eps=1e-6),
73 | num_heads=encoder_num_heads,
74 | patch_size=vit_patch_size,
75 | qkv_bias=True,
76 | use_rel_pos=True,
77 | global_attn_indexes=encoder_global_attn_indexes,
78 | window_size=14,
79 | out_chans=prompt_embed_dim,
80 | ),
81 | prompt_encoder=PromptEncoder(
82 | embed_dim=prompt_embed_dim,
83 | image_embedding_size=(image_embedding_size, image_embedding_size),
84 | input_image_size=(image_size, image_size),
85 | mask_in_chans=16,
86 | ),
87 | mask_decoder=MaskDecoder(
88 | num_multimask_outputs=3,
89 | transformer=TwoWayTransformer(
90 | depth=2,
91 | embedding_dim=prompt_embed_dim,
92 | mlp_dim=2048,
93 | num_heads=8,
94 | ),
95 | transformer_dim=prompt_embed_dim,
96 | iou_head_depth=3,
97 | iou_head_hidden_dim=256,
98 | ),
99 | pixel_mean=[123.675, 116.28, 103.53],
100 | pixel_std=[58.395, 57.12, 57.375],
101 | )
102 | sam.eval()
103 | if checkpoint is not None:
104 | with open(checkpoint, "rb") as f:
105 | state_dict = torch.load(f)
106 | sam.load_state_dict(state_dict)
107 | return sam
108 |
--------------------------------------------------------------------------------
/Method/open6dor_gpt.py:
--------------------------------------------------------------------------------
1 | import json, imageio
2 | from gym.utils import read_yaml_config, prepare_gsam_model
3 | import numpy as np
4 |
5 | class Open6DOR_GPT:
6 | def __init__(self, cfgs):
7 | self.cfgs = cfgs
8 | self.device = cfgs["DEVICE"]
9 | self._prepare_ckpts()
10 |
11 | def _prepare_ckpts(self):
12 | # prepare gsam model
13 | if self.cfgs["INFERENCE_GSAM"]:
14 | self._grounded_dino_model, self._sam_predictor = prepare_gsam_model(device=self.device)
15 |
16 | self._box_threshold = 0.3
17 | self._text_threshold = 0.25
18 | else:
19 | self._grounded_dino_model, self._sam_predictor = None, None
20 |
21 | def inference_vlm(self, prompt, image_path, print_ans = False):
22 | from gym.vlm_utils import infer_path
23 | # prepare vlm model
24 | response = infer_path(prompt, image_path)
25 | while 'choices' not in response.json():
26 | response = infer_path(prompt, image_path)
27 | ans = response.json()['choices'][0]['message']['content']
28 | if print_ans:
29 | print(ans)
30 | return ans
31 |
32 | def inference_gsam(self, image: np.ndarray = None, image_path: str = None, prompt = None):
33 | from vision.grounded_sam_demo import prepare_GroundedSAM_for_inference, inference_one_image
34 | if image is not None:
35 | masks = inference_one_image(image[..., :3], self._grounded_dino_model, self._sam_predictor, box_threshold=self._box_threshold, text_threshold=self._text_threshold, text_prompt=prompt, device=self.device)
36 | elif image_path is not None:
37 | image = imageio.imread(image_path)
38 | masks = inference_one_image(image[..., :3], self._grounded_dino_model, self._sam_predictor, box_threshold=self._box_threshold, text_threshold=self._text_threshold, text_prompt=prompt, device=self.device)
39 | return masks, image
40 |
41 | def inference_task(self, task_cfgs):
42 | # prepare task data
43 | task_data = self.prepare_task_data(task_cfgs)
44 |
45 | # inference
46 | pred_pose = self.inference(task_data, self._grounded_dino_model, self._sam_predictor)
47 |
48 | return pred_pose
49 |
50 | def test_vlm():
51 | cfgs = read_yaml_config("config.yaml")
52 | open6dor_gpt = Open6DOR_GPT(cfgs=cfgs)
53 | prompt = "hello gpt, describe the image"
54 | image_path = "test_image.png"
55 | print("The ans is: ", open6dor_gpt.inference_vlm(prompt, image_path, print_ans=True))
56 | print("vlm test passed!")
57 | import pdb; pdb.set_trace()
58 |
59 | def test_gsam():
60 | image_path = "test_image.png"
61 | cfgs = read_yaml_config("config.yaml")
62 | open6dor_gpt = Open6DOR_GPT(cfgs=cfgs)
63 | masks, _image = open6dor_gpt.inference_gsam(image_path = image_path, prompt="calculator")
64 | _image[masks[0][0].cpu().numpy().astype(bool)] = 0
65 | imageio.imwrite("test_mask.png", _image)
66 | print("The mask is saved as test_mask.png, check it!")
67 | import pdb; pdb.set_trace()
68 |
69 | if __name__ == "__main__":
70 | # test_gsam()
71 |
72 | test_vlm()
73 |
74 | cfgs = read_yaml_config("config.yaml")
75 | task_cfgs_path = "/home/haoran/Projects/Rearrangement/Open6DOR/Method/tasks/6DoF/behind/Place_the_apple_behind_the_box_on_the_table.__upright/20240704-145831_no_interaction/task_config_new2.json"
76 | with open(task_cfgs_path, "r") as f: task_cfgs = json.load(f)
77 |
78 | open6dor_gpt = Open6DOR_GPT(cfgs=cfgs, task_cfgs=task_cfgs)
79 |
--------------------------------------------------------------------------------
/assets/tasks/task_refine_6dof_example/behind/20240824-165044_no_interaction/task_config_new5.json:
--------------------------------------------------------------------------------
1 | {
2 | "selected_obj_names": [
3 | "bottle",
4 | "tissue box",
5 | "apple"
6 | ],
7 | "selected_urdfs": [
8 | "ycb_16k_backup/006_mustard_bottle_google_16k/006_mustard_bottle_google_16k.urdf",
9 | "objaverse_rescale/dc4c91abf45342b4bb8822f50fa162b2/material_2.urdf",
10 | "objaverse_rescale/fbda0b25f41f40958ea984f460e4770b/material_2.urdf"
11 | ],
12 | "target_obj_name": "apple",
13 | "instruction": "Place the apple behind the bottle on the table. We also need to specify the rotation of the object after placement: the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.",
14 | "init_obj_pos": [
15 | [
16 | 0.3738566040992737,
17 | 0.17327724397182465,
18 | 0.3028668463230133,
19 | -9.934652553056367e-06,
20 | 5.676249202224426e-06,
21 | -0.03726901113986969,
22 | 0.9993051886558533,
23 | 0.00011460757377790287,
24 | -0.0007374841370619833,
25 | -0.00024315444170497358,
26 | 0.01065916009247303,
27 | 0.000735661422368139,
28 | 0.0003395920793991536
29 | ],
30 | [
31 | 0.4417206645011902,
32 | -0.3223787248134613,
33 | 0.3753006160259247,
34 | 0.7060578465461731,
35 | -0.03799350559711456,
36 | -0.037282224744558334,
37 | 0.7061506509780884,
38 | 5.145368413650431e-05,
39 | -0.00020104726718273014,
40 | 0.00014684736379422247,
41 | 0.003485196502879262,
42 | 5.90651725360658e-05,
43 | -0.0011944533325731754
44 | ],
45 | [
46 | 0.5476366281509399,
47 | -0.07213471084833145,
48 | 0.3492423892021179,
49 | 0.1136084571480751,
50 | 0.050451405346393585,
51 | -0.0884791761636734,
52 | 0.9882911443710327,
53 | 5.16880136274267e-05,
54 | 0.0044308979995548725,
55 | -0.001778011559508741,
56 | -0.08829422295093536,
57 | -0.0050054253078997135,
58 | 0.015157821588218212
59 | ]
60 | ],
61 | "position_instruction": "Place the apple behind the bottle on the table. ",
62 | "rotation_instruction": "We also need to specify the rotation of the object after placement: the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.",
63 | "rotation_instruction_label": "upright",
64 | "obj_codes": [
65 | "006",
66 | "dc4c91abf45342b4bb8822f50fa162b2",
67 | "fbda0b25f41f40958ea984f460e4770b"
68 | ],
69 | "target_obj_code": "fbda0b25f41f40958ea984f460e4770b",
70 | "anno_target": {
71 | "category": "apple",
72 | "annotation": {
73 | " the object is placed upright on the table and corresponds with how humans usually place the object, bottom down and top up.": {
74 | "quat": [
75 | [
76 | 0.7071067690849304,
77 | 0.0,
78 | 0.0,
79 | 0.7071067690849304
80 | ]
81 | ],
82 | "stage": 1,
83 | "axis": "z"
84 | }
85 | }
86 | },
87 | "rot_tag_detail": "upright",
88 | "rot_tag_level": 0,
89 | "position_tag": "behind",
90 | "rotation_tag": "upright"
91 | }
--------------------------------------------------------------------------------
/Method/vision/test_sam.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import matplotlib.pyplot as plt
4 | import cv2
5 | import sys
6 | sys.path.append("..")
7 |
8 | from segment_anything import sam_model_registry, SamPredictor, SamAutomaticMaskGenerator
9 |
10 | def show_anns(anns):
11 | if len(anns) == 0:
12 | return
13 | sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
14 | ax = plt.gca()
15 | ax.set_autoscale_on(False)
16 |
17 | img = np.ones((sorted_anns[0]['segmentation'].shape[0], sorted_anns[0]['segmentation'].shape[1], 4))
18 | img[:,:,3] = 0
19 | for ann in sorted_anns:
20 | m = ann['segmentation']
21 | color_mask = np.concatenate([np.random.random(3), [0.35]])
22 | img[m] = color_mask
23 | ax.imshow(img)
24 |
25 | def show_mask(mask, ax, random_color=False):
26 | if random_color:
27 | color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
28 | else:
29 | color = np.array([30/255, 144/255, 255/255, 0.6])
30 | h, w = mask.shape[-2:]
31 | mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
32 | ax.imshow(mask_image)
33 |
34 | def show_points(coords, labels, ax, marker_size=375):
35 | pos_points = coords[labels==1]
36 | neg_points = coords[labels==0]
37 | ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
38 | ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
39 |
40 | def show_box(box, ax):
41 | x0, y0 = box[0], box[1]
42 | w, h = box[2] - box[0], box[3] - box[1]
43 | ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))
44 |
45 | image = cv2.imread('/home/haoran/Projects/ObjectPlacement/imgs/bana_cup.png')
46 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
47 |
48 | plt.figure(figsize=(10,10))
49 | plt.imshow(image)
50 | plt.axis('on')
51 | plt.show()
52 |
53 | sam_checkpoint = "/home/haoran/Projects/ObjectPlacement/assets/ckpts/sam_vit_h_4b8939.pth"
54 | model_type = "vit_h"
55 |
56 | device = "cuda"
57 |
58 | sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
59 | sam.to(device=device)
60 |
61 | ############## Demo1: Mask Generation ################
62 | mask_generator = SamAutomaticMaskGenerator(sam)
63 |
64 | masks = mask_generator.generate(image)
65 | print(len(masks))
66 | print(masks[0].keys())
67 | plt.figure(figsize=(20,20))
68 | plt.imshow(image)
69 | show_anns(masks)
70 | plt.axis('off')
71 | plt.show()
72 |
73 |
74 | ############## Demo2: Mask Prediction with Input Point ################
75 | predictor = SamPredictor(sam)
76 |
77 | predictor.set_image(image)
78 |
79 | input_point = np.array([[500, 375]])
80 | input_label = np.array([1])
81 |
82 |
83 | masks, scores, logits = predictor.predict(
84 | point_coords=input_point,
85 | point_labels=input_label,
86 | multimask_output=True,
87 | )
88 |
89 | for i, (mask, score) in enumerate(zip(masks, scores)):
90 | plt.figure(figsize=(10,10))
91 | plt.imshow(image)
92 | show_mask(mask, plt.gca())
93 | show_points(input_point, input_label, plt.gca())
94 | plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
95 | plt.axis('off')
96 | plt.show()
97 |
98 | input_point = np.array([[500, 375], [1125, 625]])
99 | input_label = np.array([1, 1])
100 |
101 | mask_input = logits[np.argmax(scores), :, :]
102 |
103 | masks, _, _ = predictor.predict(
104 | point_coords=input_point,
105 | point_labels=input_label,
106 | mask_input=mask_input[None, :, :],
107 | multimask_output=False,
108 | )
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to make participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 |
55 | This Code of Conduct also applies outside the project spaces when there is a
56 | reasonable belief that an individual's behavior may have a negative impact on
57 | the project or its community.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported by contacting the project team at . All
63 | complaints will be reviewed and investigated and will result in a response that
64 | is deemed necessary and appropriate to the circumstances. The project team is
65 | obligated to maintain confidentiality with regard to the reporter of an incident.
66 | Further details of specific enforcement policies may be posted separately.
67 |
68 | Project maintainers who do not follow or enforce the Code of Conduct in good
69 | faith may face temporary or permanent repercussions as determined by other
70 | members of the project's leadership.
71 |
72 | ## Attribution
73 |
74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
76 |
77 | [homepage]: https://www.contributor-covenant.org
78 |
79 | For answers to common questions about this code of conduct, see
80 | https://www.contributor-covenant.org/faq
81 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/build_sam_hq.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | import torch
8 |
9 | from functools import partial
10 |
11 | from .modeling import ImageEncoderViT, MaskDecoderHQ, PromptEncoder, Sam, TwoWayTransformer
12 |
13 |
14 | def build_sam_hq_vit_h(checkpoint=None):
15 | return _build_sam(
16 | encoder_embed_dim=1280,
17 | encoder_depth=32,
18 | encoder_num_heads=16,
19 | encoder_global_attn_indexes=[7, 15, 23, 31],
20 | checkpoint=checkpoint,
21 | )
22 |
23 |
24 | build_sam_hq = build_sam_hq_vit_h
25 |
26 |
27 | def build_sam_hq_vit_l(checkpoint=None):
28 | return _build_sam(
29 | encoder_embed_dim=1024,
30 | encoder_depth=24,
31 | encoder_num_heads=16,
32 | encoder_global_attn_indexes=[5, 11, 17, 23],
33 | checkpoint=checkpoint,
34 | )
35 |
36 |
37 | def build_sam_hq_vit_b(checkpoint=None):
38 | return _build_sam(
39 | encoder_embed_dim=768,
40 | encoder_depth=12,
41 | encoder_num_heads=12,
42 | encoder_global_attn_indexes=[2, 5, 8, 11],
43 | checkpoint=checkpoint,
44 | )
45 |
46 |
47 | sam_hq_model_registry = {
48 | "default": build_sam_hq_vit_h,
49 | "vit_h": build_sam_hq_vit_h,
50 | "vit_l": build_sam_hq_vit_l,
51 | "vit_b": build_sam_hq_vit_b,
52 | }
53 |
54 |
55 | def _build_sam(
56 | encoder_embed_dim,
57 | encoder_depth,
58 | encoder_num_heads,
59 | encoder_global_attn_indexes,
60 | checkpoint=None,
61 | ):
62 | prompt_embed_dim = 256
63 | image_size = 1024
64 | vit_patch_size = 16
65 | image_embedding_size = image_size // vit_patch_size
66 | sam = Sam(
67 | image_encoder=ImageEncoderViT(
68 | depth=encoder_depth,
69 | embed_dim=encoder_embed_dim,
70 | img_size=image_size,
71 | mlp_ratio=4,
72 | norm_layer=partial(torch.nn.LayerNorm, eps=1e-6),
73 | num_heads=encoder_num_heads,
74 | patch_size=vit_patch_size,
75 | qkv_bias=True,
76 | use_rel_pos=True,
77 | global_attn_indexes=encoder_global_attn_indexes,
78 | window_size=14,
79 | out_chans=prompt_embed_dim,
80 | ),
81 | prompt_encoder=PromptEncoder(
82 | embed_dim=prompt_embed_dim,
83 | image_embedding_size=(image_embedding_size, image_embedding_size),
84 | input_image_size=(image_size, image_size),
85 | mask_in_chans=16,
86 | ),
87 | mask_decoder=MaskDecoderHQ(
88 | num_multimask_outputs=3,
89 | transformer=TwoWayTransformer(
90 | depth=2,
91 | embedding_dim=prompt_embed_dim,
92 | mlp_dim=2048,
93 | num_heads=8,
94 | ),
95 | transformer_dim=prompt_embed_dim,
96 | iou_head_depth=3,
97 | iou_head_hidden_dim=256,
98 | vit_dim=encoder_embed_dim,
99 | ),
100 | pixel_mean=[123.675, 116.28, 103.53],
101 | pixel_std=[58.395, 57.12, 57.375],
102 | )
103 | # sam.eval()
104 | if checkpoint is not None:
105 | with open(checkpoint, "rb") as f:
106 | state_dict = torch.load(f)
107 | info = sam.load_state_dict(state_dict, strict=False)
108 | print(info)
109 | for n, p in sam.named_parameters():
110 | if 'hf_token' not in n and 'hf_mlp' not in n and 'compress_vit_feat' not in n and 'embedding_encoder' not in n and 'embedding_maskfeature' not in n:
111 | p.requires_grad = False
112 |
113 | return sam
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.whl
2 | anno_test
3 | Benchmark/renderer/envmap_lib
4 | Benchmark/renderer/blender-2.93.3*
5 | Benchmark/renderer/material_lib_v2.blend
6 | Benchmark/dataset/objects/rescale/
7 | output/
8 | # assets/
9 | output_new/
10 | results_overall/
11 | *.zip
12 | *.DS_Store
13 | # Byte-compiled / optimized / DLL files
14 | __pycache__/
15 | *.py[cod]
16 | *$py.class
17 |
18 |
19 | # C extensions
20 | *.so
21 |
22 | # Distribution / packaging
23 | .Python
24 | build/
25 | develop-eggs/
26 | dist/
27 | downloads/
28 | eggs/
29 | .eggs/
30 | lib/
31 | lib64/
32 | parts/
33 | sdist/
34 | var/
35 | wheels/
36 | share/python-wheels/
37 | *.egg-info/
38 | .installed.cfg
39 | *.egg
40 | MANIFEST
41 | *.DS_Store
42 |
43 | # PyInstaller
44 | # Usually these files are written by a python script from a template
45 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
46 | *.manifest
47 | *.spec
48 |
49 | # Installer logs
50 | pip-log.txt
51 | pip-delete-this-directory.txt
52 |
53 | # Unit test / coverage reports
54 | htmlcov/
55 | .tox/
56 | .nox/
57 | .coverage
58 | .coverage.*
59 | .cache
60 | nosetests.xml
61 | coverage.xml
62 | *.cover
63 | *.py,cover
64 | .hypothesis/
65 | .pytest_cache/
66 | cover/
67 |
68 | # Translations
69 | *.mo
70 | *.pot
71 |
72 | # Django stuff:
73 | *.log
74 | local_settings.py
75 | db.sqlite3
76 | db.sqlite3-journal
77 |
78 | # Flask stuff:
79 | instance/
80 | .webassets-cache
81 |
82 | # Scrapy stuff:
83 | .scrapy
84 |
85 | # Sphinx documentation
86 | docs/_build/
87 |
88 | # PyBuilder
89 | .pybuilder/
90 | target/
91 |
92 | # Jupyter Notebook
93 | .ipynb_checkpoints
94 |
95 | # IPython
96 | profile_default/
97 | ipython_config.py
98 |
99 | # pyenv
100 | # For a library or package, you might want to ignore these files since the code is
101 | # intended to run in multiple environments; otherwise, check them in:
102 | # .python-version
103 |
104 | # pipenv
105 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
106 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
107 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
108 | # install all needed dependencies.
109 | #Pipfile.lock
110 |
111 | # poetry
112 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
113 | # This is especially recommended for binary packages to ensure reproducibility, and is more
114 | # commonly ignored for libraries.
115 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
116 | #poetry.lock
117 |
118 | # pdm
119 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
120 | #pdm.lock
121 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
122 | # in version control.
123 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
124 | .pdm.toml
125 | .pdm-python
126 | .pdm-build/
127 |
128 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
129 | __pypackages__/
130 |
131 | # Celery stuff
132 | celerybeat-schedule
133 | celerybeat.pid
134 |
135 | # SageMath parsed files
136 | *.sage.py
137 |
138 | # Environments
139 | .env
140 | .venv
141 | env/
142 | venv/
143 | ENV/
144 | env.bak/
145 | venv.bak/
146 |
147 | # Spyder project settings
148 | .spyderproject
149 | .spyproject
150 |
151 | # Rope project settings
152 | .ropeproject
153 |
154 | # mkdocs documentation
155 | /site
156 |
157 | # mypy
158 | .mypy_cache/
159 | .dmypy.json
160 | dmypy.json
161 |
162 | # Pyre type checker
163 | .pyre/
164 |
165 | # pytype static type analyzer
166 | .pytype/
167 |
168 | # Cython debug symbols
169 | cython_debug/
170 |
171 | # PyCharm
172 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174 | # and can be added to the global gitignore or merged into this file. For a more nuclear
175 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176 | #.idea/
177 | .DS_Store
178 | .DS_Store
179 |
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/visual/link6.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 17
3 |
4 | newmtl Face064_002_001_002_001
5 | Ns -1.960784
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 1.000000 0.000000 0.000000
8 | Ks 0.003906 0.003906 0.003906
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl Face065_002_001_002_001
15 | Ns -1.960784
16 | Ka 1.000000 1.000000 1.000000
17 | Kd 0.000000 1.000000 0.000000
18 | Ks 0.003906 0.003906 0.003906
19 | Ke 0.000000 0.000000 0.000000
20 | Ni 1.000000
21 | d 1.000000
22 | illum 2
23 |
24 | newmtl Face374_002_001_002_001
25 | Ns -1.960784
26 | Ka 1.000000 1.000000 1.000000
27 | Kd 1.000000 1.000000 1.000000
28 | Ks 0.003906 0.003906 0.003906
29 | Ke 0.000000 0.000000 0.000000
30 | Ni 1.000000
31 | d 1.000000
32 | illum 2
33 |
34 | newmtl Face539_002_001_002_001
35 | Ns -1.960784
36 | Ka 1.000000 1.000000 1.000000
37 | Kd 0.250980 0.250980 0.250980
38 | Ks 0.003906 0.003906 0.003906
39 | Ke 0.000000 0.000000 0.000000
40 | Ni 1.000000
41 | d 1.000000
42 | illum 2
43 |
44 | newmtl Part__Feature001_009_001_002_001
45 | Ns -1.960784
46 | Ka 1.000000 1.000000 1.000000
47 | Kd 0.250980 0.250980 0.250980
48 | Ks 0.003906 0.003906 0.003906
49 | Ke 0.000000 0.000000 0.000000
50 | Ni 1.000000
51 | d 1.000000
52 | illum 2
53 |
54 | newmtl Part__Feature002_006_001_002_001
55 | Ns -1.960784
56 | Ka 1.000000 1.000000 1.000000
57 | Kd 0.250980 0.250980 0.250980
58 | Ks 0.003906 0.003906 0.003906
59 | Ke 0.000000 0.000000 0.000000
60 | Ni 1.000000
61 | d 1.000000
62 | illum 2
63 |
64 | newmtl Shell002_002_001_002_001
65 | Ns -1.960784
66 | Ka 1.000000 1.000000 1.000000
67 | Kd 1.000000 1.000000 1.000000
68 | Ks 0.003906 0.003906 0.003906
69 | Ke 0.000000 0.000000 0.000000
70 | Ni 1.000000
71 | d 1.000000
72 | illum 2
73 |
74 | newmtl Shell003_002_001_002_001
75 | Ns -1.960784
76 | Ka 1.000000 1.000000 1.000000
77 | Kd 1.000000 1.000000 1.000000
78 | Ks 0.003906 0.003906 0.003906
79 | Ke 0.000000 0.000000 0.000000
80 | Ni 1.000000
81 | d 1.000000
82 | illum 2
83 |
84 | newmtl Shell004_001_001_002_001
85 | Ns -1.960784
86 | Ka 1.000000 1.000000 1.000000
87 | Kd 1.000000 1.000000 1.000000
88 | Ks 0.003906 0.003906 0.003906
89 | Ke 0.000000 0.000000 0.000000
90 | Ni 1.000000
91 | d 1.000000
92 | illum 2
93 |
94 | newmtl Shell005_001_001_002_001
95 | Ns -1.960784
96 | Ka 1.000000 1.000000 1.000000
97 | Kd 1.000000 1.000000 1.000000
98 | Ks 0.003906 0.003906 0.003906
99 | Ke 0.000000 0.000000 0.000000
100 | Ni 1.000000
101 | d 1.000000
102 | illum 2
103 |
104 | newmtl Shell006_003_002_001
105 | Ns -1.960784
106 | Ka 1.000000 1.000000 1.000000
107 | Kd 0.901961 0.921569 0.929412
108 | Ks 0.015625 0.015625 0.015625
109 | Ke 0.000000 0.000000 0.000000
110 | Ni 1.000000
111 | d 1.000000
112 | illum 2
113 |
114 | newmtl Shell007_002_002_001
115 | Ns -1.960784
116 | Ka 1.000000 1.000000 1.000000
117 | Kd 0.250000 0.250000 0.250000
118 | Ks 0.003906 0.003906 0.003906
119 | Ke 0.000000 0.000000 0.000000
120 | Ni 1.000000
121 | d 1.000000
122 | illum 2
123 |
124 | newmtl Shell011_002_002_001
125 | Ns -1.960784
126 | Ka 1.000000 1.000000 1.000000
127 | Kd 1.000000 1.000000 1.000000
128 | Ks 0.003906 0.003906 0.003906
129 | Ke 0.000000 0.000000 0.000000
130 | Ni 1.000000
131 | d 1.000000
132 | illum 2
133 |
134 | newmtl Shell012_002_002_001
135 | Ns -1.960784
136 | Ka 1.000000 1.000000 1.000000
137 | Kd 1.000000 1.000000 1.000000
138 | Ks 0.003906 0.003906 0.003906
139 | Ke 0.000000 0.000000 0.000000
140 | Ni 1.000000
141 | d 1.000000
142 | illum 2
143 |
144 | newmtl Shell_003_001_002_001
145 | Ns -1.960784
146 | Ka 1.000000 1.000000 1.000000
147 | Kd 0.250980 0.250980 0.250980
148 | Ks 0.003906 0.003906 0.003906
149 | Ke 0.000000 0.000000 0.000000
150 | Ni 1.000000
151 | d 1.000000
152 | illum 2
153 |
154 | newmtl Union001_001_001_002_001
155 | Ns -1.960784
156 | Ka 1.000000 1.000000 1.000000
157 | Kd 0.039216 0.541176 0.780392
158 | Ks 0.003906 0.003906 0.003906
159 | Ke 0.000000 0.000000 0.000000
160 | Ni 1.000000
161 | d 1.000000
162 | illum 2
163 |
164 | newmtl Union_001_001_002_001
165 | Ns -1.960784
166 | Ka 1.000000 1.000000 1.000000
167 | Kd 0.039216 0.541176 0.780392
168 | Ks 0.003906 0.003906 0.003906
169 | Ke 0.000000 0.000000 0.000000
170 | Ni 1.000000
171 | d 1.000000
172 | illum 2
173 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/utils/transforms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | import numpy as np
8 | import torch
9 | from torch.nn import functional as F
10 | from torchvision.transforms.functional import resize, to_pil_image # type: ignore
11 |
12 | from copy import deepcopy
13 | from typing import Tuple
14 |
15 |
16 | class ResizeLongestSide:
17 | """
18 | Resizes images to longest side 'target_length', as well as provides
19 | methods for resizing coordinates and boxes. Provides methods for
20 | transforming both numpy array and batched torch tensors.
21 | """
22 |
23 | def __init__(self, target_length: int) -> None:
24 | self.target_length = target_length
25 |
26 | def apply_image(self, image: np.ndarray) -> np.ndarray:
27 | """
28 | Expects a numpy array with shape HxWxC in uint8 format.
29 | """
30 | target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
31 | return np.array(resize(to_pil_image(image), target_size))
32 |
33 | def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
34 | """
35 | Expects a numpy array of length 2 in the final dimension. Requires the
36 | original image size in (H, W) format.
37 | """
38 | old_h, old_w = original_size
39 | new_h, new_w = self.get_preprocess_shape(
40 | original_size[0], original_size[1], self.target_length
41 | )
42 | coords = deepcopy(coords).astype(float)
43 | coords[..., 0] = coords[..., 0] * (new_w / old_w)
44 | coords[..., 1] = coords[..., 1] * (new_h / old_h)
45 | return coords
46 |
47 | def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
48 | """
49 | Expects a numpy array shape Bx4. Requires the original image size
50 | in (H, W) format.
51 | """
52 | boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size)
53 | return boxes.reshape(-1, 4)
54 |
55 | def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor:
56 | """
57 | Expects batched images with shape BxCxHxW and float format. This
58 | transformation may not exactly match apply_image. apply_image is
59 | the transformation expected by the model.
60 | """
61 | # Expects an image in BCHW format. May not exactly match apply_image.
62 | target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
63 | return F.interpolate(
64 | image, target_size, mode="bilinear", align_corners=False, antialias=True
65 | )
66 |
67 | def apply_coords_torch(
68 | self, coords: torch.Tensor, original_size: Tuple[int, ...]
69 | ) -> torch.Tensor:
70 | """
71 | Expects a torch tensor with length 2 in the last dimension. Requires the
72 | original image size in (H, W) format.
73 | """
74 | old_h, old_w = original_size
75 | new_h, new_w = self.get_preprocess_shape(
76 | original_size[0], original_size[1], self.target_length
77 | )
78 | coords = deepcopy(coords).to(torch.float)
79 | coords[..., 0] = coords[..., 0] * (new_w / old_w)
80 | coords[..., 1] = coords[..., 1] * (new_h / old_h)
81 | return coords
82 |
83 | def apply_boxes_torch(
84 | self, boxes: torch.Tensor, original_size: Tuple[int, ...]
85 | ) -> torch.Tensor:
86 | """
87 | Expects a torch tensor with shape Bx4. Requires the original image
88 | size in (H, W) format.
89 | """
90 | boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size)
91 | return boxes.reshape(-1, 4)
92 |
93 | @staticmethod
94 | def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]:
95 | """
96 | Compute the output size given input size and target long side length.
97 | """
98 | scale = long_side_length * 1.0 / max(oldh, oldw)
99 | newh, neww = oldh * scale, oldw * scale
100 | neww = int(neww + 0.5)
101 | newh = int(newh + 0.5)
102 | return (newh, neww)
103 |
--------------------------------------------------------------------------------
/assets/robot/franka_description/meshes/collision/finger.obj:
--------------------------------------------------------------------------------
1 | ####
2 | #
3 | # OBJ File Generated by Meshlab
4 | #
5 | ####
6 | # Object finger.obj
7 | #
8 | # Vertices: 52
9 | # Faces: 32
10 | #
11 | ####
12 | vn 0.999991 0.003723 -0.001919
13 | v 0.010360 0.026403 0.000155
14 | vn 0.019341 -0.997893 -0.061925
15 | v 0.010449 0.002583 0.000147
16 | vn -0.999568 -0.025962 0.013789
17 | v -0.010387 0.002534 0.000132
18 | vn -0.999606 -0.009503 0.026403
19 | v -0.010479 0.016102 0.018988
20 | vn -0.000579 0.001464 -0.999999
21 | v -0.010401 0.026309 0.000167
22 | vn -0.044737 0.976483 0.210900
23 | v -0.010389 0.025220 0.019188
24 | vn -0.871286 -0.490748 0.005227
25 | v -0.008730 -0.000024 0.036165
26 | vn 0.999861 0.006488 0.015354
27 | v 0.010400 0.025253 0.019037
28 | vn 0.377718 0.867563 0.323518
29 | v 0.005840 0.014274 0.053803
30 | vn 0.736099 -0.021564 0.676530
31 | v 0.008616 0.013989 0.051328
32 | vn 0.999373 -0.008600 0.034345
33 | v 0.010495 0.015103 0.018436
34 | vn 0.013041 -0.999896 -0.006124
35 | v 0.008693 -0.000133 0.050166
36 | vn -0.998603 -0.032800 0.041418
37 | v -0.008623 -0.000057 0.050953
38 | vn -0.588468 -0.017705 0.808327
39 | v -0.005481 -0.000091 0.053725
40 | vn 0.004085 -0.008700 0.999954
41 | v -0.005278 0.014293 0.053849
42 | vn -0.691057 -0.012018 0.722700
43 | v -0.007778 0.014218 0.052366
44 | vn -0.665951 0.690851 0.281486
45 | v -0.008841 0.013918 0.050589
46 | vn 0.736099 -0.021564 0.676530
47 | v 0.006138 -0.000021 0.053578
48 | vn -0.002818 0.998255 0.058981
49 | v 0.010360 0.026403 0.000155
50 | vn 0.000073 0.000898 -1.000000
51 | v 0.010360 0.026403 0.000155
52 | vn 0.999898 -0.012431 0.007036
53 | v 0.010449 0.002583 0.000147
54 | vn 0.000724 0.000331 -1.000000
55 | v 0.010449 0.002583 0.000147
56 | vn -0.871286 -0.490748 0.005227
57 | v -0.010387 0.002534 0.000132
58 | vn 0.002403 -0.997480 -0.070914
59 | v -0.010387 0.002534 0.000132
60 | vn 0.000073 0.000898 -1.000000
61 | v -0.010387 0.002534 0.000132
62 | vn -0.004486 0.998354 0.057168
63 | v -0.010401 0.026309 0.000167
64 | vn -0.999988 0.004662 -0.001626
65 | v -0.010401 0.026309 0.000167
66 | vn -0.665951 0.690851 0.281486
67 | v -0.010389 0.025220 0.019188
68 | vn -0.999597 0.009346 0.026807
69 | v -0.010389 0.025220 0.019188
70 | vn 0.006493 -0.999457 -0.032313
71 | v -0.008730 -0.000024 0.036165
72 | vn 0.377718 0.867563 0.323518
73 | v 0.010400 0.025253 0.019037
74 | vn -0.000242 0.983230 0.182372
75 | v 0.010400 0.025253 0.019037
76 | vn 0.665647 0.002096 0.746264
77 | v 0.005840 0.014274 0.053803
78 | vn 0.008418 -0.012115 0.999891
79 | v 0.005840 0.014274 0.053803
80 | vn 0.001757 0.953702 0.300749
81 | v 0.005840 0.014274 0.053803
82 | vn 0.377718 0.867563 0.323518
83 | v 0.008616 0.013989 0.051328
84 | vn 0.998361 0.003310 0.057136
85 | v 0.008616 0.013989 0.051328
86 | vn 0.798906 -0.045001 0.599770
87 | v 0.008693 -0.000133 0.050166
88 | vn 0.998687 -0.025065 0.044683
89 | v 0.008693 -0.000133 0.050166
90 | vn -0.769031 -0.017753 0.638965
91 | v -0.008623 -0.000057 0.050953
92 | vn -0.008996 -0.999957 -0.002185
93 | v -0.008623 -0.000057 0.050953
94 | vn -0.871286 -0.490748 0.005227
95 | v -0.008623 -0.000057 0.050953
96 | vn 0.008418 -0.012115 0.999891
97 | v -0.005481 -0.000091 0.053725
98 | vn -0.002059 -0.999940 0.010793
99 | v -0.005481 -0.000091 0.053725
100 | vn -0.510143 -0.000217 0.860089
101 | v -0.005278 0.014293 0.053849
102 | vn -0.108731 0.943365 0.313433
103 | v -0.005278 0.014293 0.053849
104 | vn -0.665951 0.690851 0.281486
105 | v -0.007778 0.014218 0.052366
106 | vn -0.218924 0.920873 0.322590
107 | v -0.007778 0.014218 0.052366
108 | vn -0.858159 -0.000049 0.513385
109 | v -0.008841 0.013918 0.050589
110 | vn -0.998665 -0.002749 0.051583
111 | v -0.008841 0.013918 0.050589
112 | vn 0.006542 -0.999267 0.037718
113 | v 0.006138 -0.000021 0.053578
114 | vn 0.012751 -0.015529 0.999798
115 | v 0.006138 -0.000021 0.053578
116 | # 52 vertices, 0 vertices normals
117 |
118 | f 20//20 22//22 25//25
119 | f 3//3 4//4 27//27
120 | f 27//27 4//4 29//29
121 | f 2//2 30//30 24//24
122 | f 32//32 6//6 35//35
123 | f 25//25 5//5 20//20
124 | f 37//37 11//11 8//8
125 | f 11//11 39//39 21//21
126 | f 37//37 39//39 11//11
127 | f 42//42 23//23 7//7
128 | f 2//2 12//12 30//30
129 | f 12//12 44//44 30//30
130 | f 8//8 11//11 21//21
131 | f 8//8 21//21 1//1
132 | f 32//32 19//19 6//6
133 | f 6//6 46//46 35//35
134 | f 48//48 46//46 6//6
135 | f 40//40 14//14 16//16
136 | f 3//3 13//13 4//4
137 | f 31//31 9//9 36//36
138 | f 19//19 26//26 6//6
139 | f 4//4 50//50 29//29
140 | f 17//17 47//47 28//28
141 | f 34//34 43//43 52//52
142 | f 15//15 43//43 34//34
143 | f 12//12 51//51 44//44
144 | f 18//18 38//38 10//10
145 | f 44//44 41//41 30//30
146 | f 16//16 14//14 45//45
147 | f 13//13 50//50 4//4
148 | f 18//18 10//10 33//33
149 | f 16//16 49//49 40//40
150 | # 32 faces, 0 coords texture
151 |
152 | # End of File
153 |
--------------------------------------------------------------------------------
/Benchmark/evaluation/evaluator.py:
--------------------------------------------------------------------------------
1 | """
2 | This file contains the evaluation metrics for Open6DOR Benchmark.
3 | We are currently refining the rotation eval section for fairer evaluation and easier comparison.
4 | Full version coming soon.
5 | """
6 | import numpy as np
7 | import math
8 | from scipy.spatial.transform import Rotation as R
9 |
10 |
11 |
12 | def projection(rot_mat_A, rot_mat_B, axis):
13 | """
14 | Project the relative rotation from A to B onto the axis.
15 | rot_mat: 3x3 rotation matrix
16 | A: ground truth rotation
17 | B: predicted rotation
18 | axis: 3x1 vector
19 | """
20 | det = np.linalg.det(rot_mat_A)
21 | assert det != 0 # rotation matrix should have determinant +1 or -1
22 | v = np.linalg.inv(rot_mat_A) @ axis
23 |
24 | w = rot_mat_B @ v
25 | angle = np.arccos(np.dot(axis, w) / (np.linalg.norm(axis) * np.linalg.norm(w)))
26 | return np.degrees(angle)
27 |
28 | # quat_gt = [0.884556,-0.093848,-0.436286,0.135678]
29 | quat_gt = [-0.205673,-0.205673,-0.596955,0.772278]
30 | rot_gt = R.from_quat(quat_gt).as_matrix()
31 | # quat_pred = [0.972568,-0.128846,-0.164,0.103027]
32 | # quat_pred = [0.546952,-0.013245,-0.820748,0.16444]
33 | # quat_pred = [0.450043,-0.310077,-0.760036,0.351651]
34 | # quat_pred = [0.270194,-0.590044,-0.570659,0.503183]
35 |
36 | # quat_pred = [0.166216,-0.492937,-0.609121,0.59863]
37 | # quat_pred = [-0.058748,-0.690237,-0.377434,-0.377434]
38 |
39 |
40 |
41 | quat_pred = [0.107351,-0.684364,-0.220191,0.68676]
42 | rot_pred = R.from_quat(quat_pred).as_matrix()
43 | ax = "y"
44 | axis = ax
45 | if ax == "x":
46 | axis = np.array([1, 0, 0])
47 | elif ax == "y":
48 | axis = np.array([0, 1, 0])
49 | elif ax == "z":
50 | axis = np.array([0, 0, 1])
51 |
52 | # if isinstance(axis, np.ndarray):
53 | # deviation = projection(rot_gt, rot_pred, axis)
54 | # print(f"Deviation along axis {axis}: {deviation}")
55 |
56 |
57 | def normalize_quat(quat):
58 | norm = math.sqrt(sum(q ** 2 for q in quat))
59 | return [q / norm for q in quat]
60 |
61 | def angle_deviation(quat0, quat1):
62 | # Normalize the quaternions
63 | quat0 = normalize_quat(quat0)
64 | quat1 = normalize_quat(quat1)
65 |
66 | # Compute the dot product of the two quaternions
67 | dot_product = sum(q0 * q1 for q0, q1 in zip(quat0, quat1))
68 |
69 | # Ensure the dot product is within the range [-1, 1] to avoid numerical errors
70 | dot_product = max(-1.0, min(1.0, dot_product))
71 |
72 | # Compute the angle deviation (in radians)
73 | angle_deviation = 2 * math.acos(dot_product)
74 |
75 | # Convert the angle deviation to degrees
76 | angle_deviation_degrees = math.degrees(angle_deviation)
77 |
78 | return angle_deviation_degrees
79 |
80 | # # Example usage
81 | # quat0 = [0.7071, 0.0, 0.7071, 0.0] # Example quaternion 0
82 | # quat1 = [0.7, 0.0, 0.9, 0.0] # Example quaternion 1
83 |
84 | # angle_deviation = angle_deviation(quat0, quat1)
85 | # print(f"Angle deviation: {angle_deviation} degrees")
86 |
87 |
88 |
89 | def evaluate_rot(quat_gt, quat_pred):
90 | """
91 | Evaluate the predicted rotation.
92 | task_id: str
93 | quat_pred: list of 4 floats
94 | """
95 | # load the ground truth quaternion
96 |
97 | rot_gt = R.from_quat(quat_gt).as_matrix()
98 | rot_pred = R.from_quat(quat_pred).as_matrix()
99 | task_level = 0#TODO: load task level from the dataset
100 | obj_category = 0#TODO: load object category from the dataset
101 | if task_level == 0:
102 | ax = "z"
103 | elif task_level == 1:
104 | ax = "y"
105 | if obj_category in ["mug", "binder_clips", "toy", "wallet", "headphone"] :
106 | ax = "n"
107 | elif task_level == 2:
108 | ax = 0#TODO: load axis from the dataset
109 | else:
110 | raise ValueError(f"Invalid task level: {task_level}")
111 | axis = ax
112 | if ax == "x":
113 | axis = np.array([1, 0, 0])
114 | elif ax == "y":
115 | axis = np.array([0, 1, 0])
116 | elif ax == "z":
117 | axis = np.array([0, 0, 1])
118 |
119 | deviation = -1
120 | if isinstance(axis, np.ndarray):
121 | deviation = projection(rot_gt, rot_pred, axis)
122 | else:
123 | deviation = angle_deviation(quat_gt, quat_pred)
124 |
125 | return deviation
126 |
127 |
128 | def evaluate_posi(sel_pos, tar_pos, mode):
129 | """
130 | Evaluate the predicted position.
131 | """
132 | if mode in ["left", "right", "front", "back", "behind", "top"]:
133 | if mode == "left":
134 | succ += sel_pos[1] > tar_pos[1]
135 | elif mode == "right":
136 | succ += sel_pos[1] < tar_pos[1]
137 | elif mode == "front":
138 | succ += sel_pos[0] > tar_pos[0]
139 | elif mode == "back" or mode == "behind":
140 | succ += sel_pos[0] < tar_pos[0]
141 | elif mode == "top":
142 | succ += sel_pos[2] <= tar_pos[2]
143 | elif mode == "between":
144 | max_sel_pos_x = np.max([sel_pos_1[0], sel_pos_2[0]])
145 | max_sel_pos_y = np.max([sel_pos_1[1], sel_pos_2[1]])
146 | min_sel_pos_x = np.min([sel_pos_1[0], sel_pos_2[0]])
147 | min_sel_pos_y = np.min([sel_pos_1[1], sel_pos_2[1]])
148 | tar_pos = result["final_obj_pos"][-1]
149 | succ += (min_sel_pos_x < tar_pos[0] < max_sel_pos_x) or (min_sel_pos_y < tar_pos[0] < max_sel_pos_y)
150 | elif mode == "center":
151 | max_sel_pos_x = np.max(sel_pos_all, axis=0)[0]
152 | min_sel_pos_x = np.min(sel_pos_all, axis=0)[0]
153 | max_sel_pos_y = np.max(sel_pos_all, axis=0)[1]
154 | min_sel_pos_y = np.min(sel_pos_all, axis=0)[1]
155 | succ += (min_sel_pos_x < tar_pos[0] < max_sel_pos_x) and (min_sel_pos_y < tar_pos[1] < max_sel_pos_y)
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/README.md:
--------------------------------------------------------------------------------
1 | # Segment Anything
2 |
3 | **[Meta AI Research, FAIR](https://ai.facebook.com/research/)**
4 |
5 | [Alexander Kirillov](https://alexander-kirillov.github.io/), [Eric Mintun](https://ericmintun.github.io/), [Nikhila Ravi](https://nikhilaravi.com/), [Hanzi Mao](https://hanzimao.me/), Chloe Rolland, Laura Gustafson, [Tete Xiao](https://tetexiao.com), [Spencer Whitehead](https://www.spencerwhitehead.com/), Alex Berg, Wan-Yen Lo, [Piotr Dollar](https://pdollar.github.io/), [Ross Girshick](https://www.rossgirshick.info/)
6 |
7 | [[`Paper`](https://ai.facebook.com/research/publications/segment-anything/)] [[`Project`](https://segment-anything.com/)] [[`Demo`](https://segment-anything.com/demo)] [[`Dataset`](https://segment-anything.com/dataset/index.html)] [[`Blog`](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/)]
8 |
9 | 
10 |
11 | The **Segment Anything Model (SAM)** produces high quality object masks from input prompts such as points or boxes, and it can be used to generate masks for all objects in an image. It has been trained on a [dataset](https://segment-anything.com/dataset/index.html) of 11 million images and 1.1 billion masks, and has strong zero-shot performance on a variety of segmentation tasks.
12 |
13 |
14 |
15 |
16 |
17 |
18 | ## Installation
19 |
20 | The code requires `python>=3.8`, as well as `pytorch>=1.7` and `torchvision>=0.8`. Please follow the instructions [here](https://pytorch.org/get-started/locally/) to install both PyTorch and TorchVision dependencies. Installing both PyTorch and TorchVision with CUDA support is strongly recommended.
21 |
22 | Install Segment Anything:
23 |
24 | ```
25 | pip install git+https://github.com/facebookresearch/segment-anything.git
26 | ```
27 |
28 | or clone the repository locally and install with
29 |
30 | ```
31 | git clone git@github.com:facebookresearch/segment-anything.git
32 | cd segment-anything; pip install -e .
33 | ```
34 |
35 | The following optional dependencies are necessary for mask post-processing, saving masks in COCO format, the example notebooks, and exporting the model in ONNX format. `jupyter` is also required to run the example notebooks.
36 | ```
37 | pip install opencv-python pycocotools matplotlib onnxruntime onnx
38 | ```
39 |
40 |
41 | ## Getting Started
42 |
43 | First download a [model checkpoint](#model-checkpoints). Then the model can be used in just a few lines to get masks from a given prompt:
44 |
45 | ```
46 | from segment_anything import build_sam, SamPredictor
47 | predictor = SamPredictor(build_sam(checkpoint=""))
48 | predictor.set_image()
49 | masks, _, _ = predictor.predict()
50 | ```
51 |
52 | or generate masks for an entire image:
53 |
54 | ```
55 | from segment_anything import build_sam, SamAutomaticMaskGenerator
56 | mask_generator = SamAutomaticMaskGenerator(build_sam(checkpoint=""))
57 | masks = mask_generator_generate()
58 | ```
59 |
60 | Additionally, masks can be generated for images from the command line:
61 |
62 | ```
63 | python scripts/amg.py --checkpoint --input --output
64 | ```
65 |
66 | See the examples notebooks on [using SAM with prompts](/notebooks/predictor_example.ipynb) and [automatically generating masks](/notebooks/automatic_mask_generator_example.ipynb) for more details.
67 |
68 |
69 |
70 |
71 |
72 |
73 | ## ONNX Export
74 |
75 | SAM's lightweight mask decoder can be exported to ONNX format so that it can be run in any environment that supports ONNX runtime, such as in-browser as showcased in the [demo](https://segment-anything.com/demo). Export the model with
76 |
77 | ```
78 | python scripts/export_onnx_model.py --checkpoint --output
79 | ```
80 |
81 | See the [example notebook](https://github.com/facebookresearch/segment-anything/blob/main/notebooks/onnx_model_example.ipynb) for details on how to combine image preprocessing via SAM's backbone with mask prediction using the ONNX model. It is recommended to use the latest stable version of PyTorch for ONNX export.
82 |
83 | ## Model Checkpoints
84 |
85 | Three model versions of the model are available with different backbone sizes. These models can be instantiated by running
86 | ```
87 | from segment_anything import sam_model_registry
88 | sam = sam_model_registry[""](checkpoint="")
89 | ```
90 | Click the links below to download the checkpoint for the corresponding model name. The default model in bold can also be instantiated with `build_sam`, as in the examples in [Getting Started](#getting-started).
91 |
92 | * **`default` or `vit_h`: [ViT-H SAM model.](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth)**
93 | * `vit_l`: [ViT-L SAM model.](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth)
94 | * `vit_b`: [ViT-B SAM model.](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth)
95 |
96 | ## License
97 | The model is licensed under the [Apache 2.0 license](LICENSE).
98 |
99 | ## Contributing
100 |
101 | See [contributing](CONTRIBUTING.md) and the [code of conduct](CODE_OF_CONDUCT.md).
102 |
103 | ## Contributors
104 |
105 | The Segment Anything project was made possible with the help of many contributors (alphabetical):
106 |
107 | Aaron Adcock, Vaibhav Aggarwal, Morteza Behrooz, Cheng-Yang Fu, Ashley Gabriel, Ahuva Goldstand, Allen Goodman, Sumanth Gurram, Jiabo Hu, Somya Jain, Devansh Kukreja, Robert Kuo, Joshua Lane, Yanghao Li, Lilian Luong, Jitendra Malik, Mallika Malhotra, William Ngan, Omkar Parkhi, Nikhil Raina, Dirk Rowe, Neil Sejoor, Vanessa Stark, Bala Varadarajan, Bram Wasti, Zachary Winstrom
108 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/utils/onnx.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | import torch
8 | import torch.nn as nn
9 | from torch.nn import functional as F
10 |
11 | from typing import Tuple
12 |
13 | from ..modeling import Sam
14 | from .amg import calculate_stability_score
15 |
16 |
17 | class SamOnnxModel(nn.Module):
18 | """
19 | This model should not be called directly, but is used in ONNX export.
20 | It combines the prompt encoder, mask decoder, and mask postprocessing of Sam,
21 | with some functions modified to enable model tracing. Also supports extra
22 | options controlling what information. See the ONNX export script for details.
23 | """
24 |
25 | def __init__(
26 | self,
27 | model: Sam,
28 | return_single_mask: bool,
29 | use_stability_score: bool = False,
30 | return_extra_metrics: bool = False,
31 | ) -> None:
32 | super().__init__()
33 | self.mask_decoder = model.mask_decoder
34 | self.model = model
35 | self.img_size = model.image_encoder.img_size
36 | self.return_single_mask = return_single_mask
37 | self.use_stability_score = use_stability_score
38 | self.stability_score_offset = 1.0
39 | self.return_extra_metrics = return_extra_metrics
40 |
41 | @staticmethod
42 | def resize_longest_image_size(
43 | input_image_size: torch.Tensor, longest_side: int
44 | ) -> torch.Tensor:
45 | input_image_size = input_image_size.to(torch.float32)
46 | scale = longest_side / torch.max(input_image_size)
47 | transformed_size = scale * input_image_size
48 | transformed_size = torch.floor(transformed_size + 0.5).to(torch.int64)
49 | return transformed_size
50 |
51 | def _embed_points(self, point_coords: torch.Tensor, point_labels: torch.Tensor) -> torch.Tensor:
52 | point_coords = point_coords + 0.5
53 | point_coords = point_coords / self.img_size
54 | point_embedding = self.model.prompt_encoder.pe_layer._pe_encoding(point_coords)
55 | point_labels = point_labels.unsqueeze(-1).expand_as(point_embedding)
56 |
57 | point_embedding = point_embedding * (point_labels != -1)
58 | point_embedding = point_embedding + self.model.prompt_encoder.not_a_point_embed.weight * (
59 | point_labels == -1
60 | )
61 |
62 | for i in range(self.model.prompt_encoder.num_point_embeddings):
63 | point_embedding = point_embedding + self.model.prompt_encoder.point_embeddings[
64 | i
65 | ].weight * (point_labels == i)
66 |
67 | return point_embedding
68 |
69 | def _embed_masks(self, input_mask: torch.Tensor, has_mask_input: torch.Tensor) -> torch.Tensor:
70 | mask_embedding = has_mask_input * self.model.prompt_encoder.mask_downscaling(input_mask)
71 | mask_embedding = mask_embedding + (
72 | 1 - has_mask_input
73 | ) * self.model.prompt_encoder.no_mask_embed.weight.reshape(1, -1, 1, 1)
74 | return mask_embedding
75 |
76 | def mask_postprocessing(self, masks: torch.Tensor, orig_im_size: torch.Tensor) -> torch.Tensor:
77 | masks = F.interpolate(
78 | masks,
79 | size=(self.img_size, self.img_size),
80 | mode="bilinear",
81 | align_corners=False,
82 | )
83 |
84 | prepadded_size = self.resize_longest_image_size(orig_im_size, self.img_size)
85 | masks = masks[..., : int(prepadded_size[0]), : int(prepadded_size[1])]
86 |
87 | orig_im_size = orig_im_size.to(torch.int64)
88 | h, w = orig_im_size[0], orig_im_size[1]
89 | masks = F.interpolate(masks, size=(h, w), mode="bilinear", align_corners=False)
90 | return masks
91 |
92 | def select_masks(
93 | self, masks: torch.Tensor, iou_preds: torch.Tensor, num_points: int
94 | ) -> Tuple[torch.Tensor, torch.Tensor]:
95 | # Determine if we should return the multiclick mask or not from the number of points.
96 | # The reweighting is used to avoid control flow.
97 | score_reweight = torch.tensor(
98 | [[1000] + [0] * (self.model.mask_decoder.num_mask_tokens - 1)]
99 | ).to(iou_preds.device)
100 | score = iou_preds + (num_points - 2.5) * score_reweight
101 | best_idx = torch.argmax(score, dim=1)
102 | masks = masks[torch.arange(masks.shape[0]), best_idx, :, :].unsqueeze(1)
103 | iou_preds = iou_preds[torch.arange(masks.shape[0]), best_idx].unsqueeze(1)
104 |
105 | return masks, iou_preds
106 |
107 | @torch.no_grad()
108 | def forward(
109 | self,
110 | image_embeddings: torch.Tensor,
111 | point_coords: torch.Tensor,
112 | point_labels: torch.Tensor,
113 | mask_input: torch.Tensor,
114 | has_mask_input: torch.Tensor,
115 | orig_im_size: torch.Tensor,
116 | ):
117 | sparse_embedding = self._embed_points(point_coords, point_labels)
118 | dense_embedding = self._embed_masks(mask_input, has_mask_input)
119 |
120 | masks, scores = self.model.mask_decoder.predict_masks(
121 | image_embeddings=image_embeddings,
122 | image_pe=self.model.prompt_encoder.get_dense_pe(),
123 | sparse_prompt_embeddings=sparse_embedding,
124 | dense_prompt_embeddings=dense_embedding,
125 | )
126 |
127 | if self.use_stability_score:
128 | scores = calculate_stability_score(
129 | masks, self.model.mask_threshold, self.stability_score_offset
130 | )
131 |
132 | if self.return_single_mask:
133 | masks, scores = self.select_masks(masks, scores, point_coords.shape[1])
134 |
135 | upscaled_masks = self.mask_postprocessing(masks, orig_im_size)
136 |
137 | if self.return_extra_metrics:
138 | stability_scores = calculate_stability_score(
139 | upscaled_masks, self.model.mask_threshold, self.stability_score_offset
140 | )
141 | areas = (upscaled_masks > self.model.mask_threshold).sum(-1).sum(-1)
142 | return upscaled_masks, scores, stability_scores, areas, masks
143 |
144 | return upscaled_masks, scores, masks
145 |
--------------------------------------------------------------------------------
/Benchmark/bench.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import json
3 | import imageio.v2 as imageio
4 | import os
5 | import argparse
6 | from evaluation import evaluator
7 | import yaml
8 |
9 | mesh_root = "meshes"
10 | def load_task(task_path, bench_config):
11 | # task_config
12 | task_config = json.load(open(task_path, 'r'))
13 |
14 | # task_instruction
15 | task_instruction = task_config["instruction"]
16 | print("instruction:", task_instruction)
17 |
18 | # task_image
19 | if bench_config["image_mode"] == "GIVEN_IMAGE_ISAACGYM":
20 | image_path = task_path.replace("task_config.json", "before-rgb-0-0.png")
21 | task_image = imageio.imread(image_path)
22 |
23 | elif bench_config["image_mode"] == "GIVEN_IMAGE_BLENDER":
24 | pass
25 |
26 | elif bench_config["image_mode"] == "RENDER_IMAGE_ISAACGYM":
27 | from ..Method.interaction import init_gym
28 | gym, cfgs, task_config_now= init_gym(task_config, index=i, random_task=True, no_position = True)
29 |
30 | points_envs, colors_envs, rgb_envs, depth_envs ,seg_envs, ori_points_envs, ori_colors_envs, \
31 | pixel2pointid, pointid2pixel = gym.refresh_observation(get_visual_obs=True)
32 |
33 | task_image = colors_envs[0]
34 |
35 | elif bench_config["image_mode"] == "RENDER_IMAGE_BLENDER":
36 | from renderer import open6dor_renderer
37 | task_image = None
38 | output_root_path = bench_config["output_path"]
39 | obj_paths = task_config["selected_urdfs"]
40 | obj_ids = [path.split("/")[-2] for path in obj_paths]
41 |
42 | init_poses = task_config["init_obj_pos"]
43 | obj_poses = {}
44 |
45 | for i in range(len(obj_ids)):
46 | pos = init_poses[i]
47 | id = obj_ids[i]
48 | position = pos[:3]
49 | quaternion = pos[3:7]
50 | transformation_matrix = open6dor_renderer.create_transformation_matrix(position, quaternion)
51 | obj_poses[id] = transformation_matrix
52 | task_id = "my_test"
53 | script = generate_shell_script(output_root_path, task_id, obj_paths, init_poses,
54 | bench_config["background_material_id"], bench_config["env_map_id"],
55 | bench_config["cam_quaternion"], bench_config["cam_translation"])
56 | # run shell script
57 | os.system(f"bash {script}")
58 |
59 | return task_config, task_instruction, task_image
60 |
61 | def generate_shell_script(output_root_path, task_id, obj_paths, init_poses,
62 | background_material_id, env_map_id, cam_quaternion, cam_translation):
63 | script_name = "renderer/run_renderer.sh"
64 | command = "cd renderer\n"
65 | command += f"./blender-2.93.3-linux-x64/blender material_lib_v2.blend --background --python open6dor_renderer.py -- \\\n"
66 | command += f" --output_root_path {output_root_path} \\\n"
67 | command += f" --task_id {task_id} \\\n"
68 | command += f" --obj_paths {' '.join(obj_paths)} \\\n"
69 | init_obj_pos_flat = ' '.join(map(str, [item for sublist in init_poses for item in sublist]))
70 | command += f" --init_obj_pos {init_obj_pos_flat} \\\n"
71 | command += f" --background_material_id {background_material_id} \\\n"
72 | command += f" --env_map_id {env_map_id} \\\n"
73 | command += f" --cam_quaternion {' '.join(map(str, cam_quaternion))} \\\n"
74 | command += f" --cam_translation {' '.join(map(str, cam_translation))}\n"
75 |
76 | shell_file_content = f"#!/bin/bash\n\n{command}"
77 |
78 | with open(script_name, "w") as shell_file:
79 | shell_file.write(shell_file_content)
80 |
81 | print(f"Shell script {script_name} generated successfully.")
82 | print("=============================================")
83 |
84 | return script_name
85 |
86 | def eval_task(cfgs, pred_pose, use_rot = False):
87 | if use_rot:
88 | pred_rot = pred_pose["rotation"]
89 | rot_gt = list(cfgs['anno_target']['annotation'].values())[0]["quat"]
90 | rot_deviation = evaluator.evaluate_rot(rot_gt, pred_rot)
91 | print(f"Rotation deviation: {rot_deviation} degrees")
92 |
93 | pos_bases = cfgs['init_obj_pos']
94 | pred_pos = pred_pose["position"]
95 | pos_eval = evaluator.evaluate_posi(pred_pos, pos_bases, "behind")
96 |
97 | return rot_deviation, pos_eval
98 |
99 | def method_template(cfgs, task_instruction, task_image):
100 | pred_pose = {
101 | "position": [0,0,0],
102 | "rotation": [0,0,0,0]
103 | }
104 | return pred_pose
105 |
106 | if __name__ == "__main__":
107 |
108 | parser = argparse.ArgumentParser(description="Benchmarking script for task evaluation")
109 | parser.add_argument("--mode", type=str, choices=["load_test", "eval"], help="Path to the task configuration file")
110 | parser.add_argument("--task_data", type=str, default="6dof", help="path set or single path to the task configuration file")
111 | parser.add_argument("--image_mode", type=str, default="GIVEN_IMAGE_ISAACGYM", help="Image mode")
112 | parser.add_argument("--output_path", type=str, default="../output/test", help="Path to the output directory")
113 |
114 | _args = parser.parse_args()
115 |
116 | render_configs = yaml.load(open("bench_config.yaml", 'r'), Loader=yaml.FullLoader)
117 | import pdb; pdb.set_trace()
118 | # merge the two configs
119 | bench_config = {**_args.__dict__, **render_configs}
120 | if bench_config["task_data"] == "6dof":
121 | task_paths = glob.glob('tasks/6DoF/*/*/*/task_config_new2.json')
122 | elif bench_config["task_data"] == "position":
123 | task_paths = glob.glob('tasks/position/*/*/*/task_config_new2.json')
124 | elif bench_config["task_data"] == "rotation":
125 | task_paths = glob.glob('tasks/rotation/*/*/*/task_config_new2.json')
126 | else:
127 | task_paths = [bench_config["task_data"]]
128 |
129 | if bench_config["mode"] == "load_test":
130 | for task_path in task_paths:
131 | task_config, task_instruction, task_image = load_task(task_path, bench_config)
132 |
133 | elif bench_config["mode"] == "eval":
134 | USE_ROT = False if bench_config["task_data"] == "position" else True
135 | for task_path in task_paths:
136 | task_config = json.load(open(task_path, 'r'))
137 | task_config, task_instruction, task_image = load_task(task_path, bench_config)
138 | pred_pose = method_template(task_config, task_instruction, task_image)
139 | eval_task(task_config, pred_pose, use_rot = USE_ROT)
140 |
--------------------------------------------------------------------------------
/Method/isaacgym0/asset_info.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 |
4 | NVIDIA CORPORATION and its licensors retain all intellectual property
5 | and proprietary rights in and to this software, related documentation
6 | and any modifications thereto. Any use, reproduction, disclosure or
7 | distribution of this software and related documentation without an express
8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
9 |
10 |
11 | Asset and Environment Information
12 | ---------------------------------
13 | Demonstrates introspection capabilities of the gym api at the asset and environment levels
14 | - Once an asset is loaded its properties can be queried
15 | - Assets in environments can be queried and their current states be retrieved
16 | """
17 |
18 | import os
19 | from isaacgym import gymapi
20 | from isaacgym import gymutil
21 |
22 |
23 | def print_asset_info(asset, name):
24 | print("======== Asset info %s: ========" % (name))
25 | num_bodies = gym.get_asset_rigid_body_count(asset)
26 | num_joints = gym.get_asset_joint_count(asset)
27 | num_dofs = gym.get_asset_dof_count(asset)
28 | print("Got %d bodies, %d joints, and %d DOFs" %
29 | (num_bodies, num_joints, num_dofs))
30 |
31 | # Iterate through bodies
32 | print("Bodies:")
33 | for i in range(num_bodies):
34 | name = gym.get_asset_rigid_body_name(asset, i)
35 | print(" %2d: '%s'" % (i, name))
36 |
37 | # Iterate through joints
38 | print("Joints:")
39 | for i in range(num_joints):
40 | name = gym.get_asset_joint_name(asset, i)
41 | type = gym.get_asset_joint_type(asset, i)
42 | type_name = gym.get_joint_type_string(type)
43 | print(" %2d: '%s' (%s)" % (i, name, type_name))
44 |
45 | # iterate through degrees of freedom (DOFs)
46 | print("DOFs:")
47 | for i in range(num_dofs):
48 | name = gym.get_asset_dof_name(asset, i)
49 | type = gym.get_asset_dof_type(asset, i)
50 | type_name = gym.get_dof_type_string(type)
51 | print(" %2d: '%s' (%s)" % (i, name, type_name))
52 |
53 |
54 | def print_actor_info(gym, env, actor_handle):
55 |
56 | name = gym.get_actor_name(env, actor_handle)
57 |
58 | body_names = gym.get_actor_rigid_body_names(env, actor_handle)
59 | body_dict = gym.get_actor_rigid_body_dict(env, actor_handle)
60 |
61 | joint_names = gym.get_actor_joint_names(env, actor_handle)
62 | joint_dict = gym.get_actor_joint_dict(env, actor_handle)
63 |
64 | dof_names = gym.get_actor_dof_names(env, actor_handle)
65 | dof_dict = gym.get_actor_dof_dict(env, actor_handle)
66 |
67 | print()
68 | print("===== Actor: %s =======================================" % name)
69 |
70 | print("\nBodies")
71 | print(body_names)
72 | print(body_dict)
73 |
74 | print("\nJoints")
75 | print(joint_names)
76 | print(joint_dict)
77 |
78 | print("\n Degrees Of Freedom (DOFs)")
79 | print(dof_names)
80 | print(dof_dict)
81 | print()
82 |
83 | # Get body state information
84 | body_states = gym.get_actor_rigid_body_states(
85 | env, actor_handle, gymapi.STATE_ALL)
86 |
87 | # Print some state slices
88 | print("Poses from Body State:")
89 | print(body_states['pose']) # print just the poses
90 |
91 | print("\nVelocities from Body State:")
92 | print(body_states['vel']) # print just the velocities
93 | print()
94 |
95 | # iterate through bodies and print name and position
96 | body_positions = body_states['pose']['p']
97 | for i in range(len(body_names)):
98 | print("Body '%s' has position" % body_names[i], body_positions[i])
99 |
100 | print("\nDOF states:")
101 |
102 | # get DOF states
103 | dof_states = gym.get_actor_dof_states(env, actor_handle, gymapi.STATE_ALL)
104 |
105 | # print some state slices
106 | # Print all states for each degree of freedom
107 | print(dof_states)
108 | print()
109 |
110 | # iterate through DOFs and print name and position
111 | dof_positions = dof_states['pos']
112 | for i in range(len(dof_names)):
113 | print("DOF '%s' has position" % dof_names[i], dof_positions[i])
114 |
115 |
116 | # initialize gym
117 | gym = gymapi.acquire_gym()
118 |
119 | # parse arguments
120 | args = gymutil.parse_arguments(description="Asset and Environment Information")
121 |
122 | # create simulation context
123 | sim_params = gymapi.SimParams()
124 |
125 | sim_params.use_gpu_pipeline = False
126 | if args.use_gpu_pipeline:
127 | print("WARNING: Forcing CPU pipeline.")
128 |
129 | sim = gym.create_sim(args.compute_device_id, args.graphics_device_id, args.physics_engine, sim_params)
130 |
131 | if sim is None:
132 | print("*** Failed to create sim")
133 | quit()
134 |
135 | # Print out the working directory
136 | # helpful in determining the relative location that assets will be loaded from
137 | print("Working directory: %s" % os.getcwd())
138 |
139 | # Path where assets are searched, relative to the current working directory
140 | asset_root = "../../assets"
141 |
142 | # List of assets that will be loaded, both URDF and MJCF files are supported
143 | asset_files = ["urdf/cartpole.urdf",
144 | "urdf/franka_description/robots/franka_panda.urdf",
145 | "mjcf/nv_ant.xml"]
146 | asset_names = ["cartpole", "franka", "ant"]
147 | loaded_assets = []
148 |
149 | # Load the assets and ensure that we are successful
150 | for asset in asset_files:
151 | print("Loading asset '%s' from '%s'" % (asset, asset_root))
152 |
153 | current_asset = gym.load_asset(sim, asset_root, asset)
154 |
155 | if current_asset is None:
156 | print("*** Failed to load asset '%s'" % (asset, asset_root))
157 | quit()
158 | loaded_assets.append(current_asset)
159 |
160 | for i in range(len(loaded_assets)):
161 | print()
162 | print_asset_info(loaded_assets[i], asset_names[i])
163 |
164 | # Setup environment spacing
165 | spacing = 2.0
166 | lower = gymapi.Vec3(-spacing, 0.0, -spacing)
167 | upper = gymapi.Vec3(spacing, spacing, spacing)
168 |
169 | # Create one environment
170 | env = gym.create_env(sim, lower, upper, 1)
171 |
172 | # Add actors to environment
173 | pose = gymapi.Transform()
174 | for i in range(len(loaded_assets)):
175 | pose.p = gymapi.Vec3(0.0, 0.0, i * 2)
176 | pose.r = gymapi.Quat(-0.707107, 0.0, 0.0, 0.707107)
177 | gym.create_actor(env, loaded_assets[i], pose, asset_names[i], -1, -1)
178 |
179 | print("=== Environment info: ================================================")
180 |
181 | #actor_count = gym.get_actor_count(env)
182 | print("%d actors total" % actor_num)
183 |
184 | # Iterate through all actors for the environment
185 | for i in range(actor_count):
186 | actor_handle = gym.get_actor_handle(envs[i], i)
187 | print_actor_info(gym, envs[i], actor_handle)
188 |
189 | # Cleanup the simulator
190 | gym.destroy_sim(sim)
191 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/scripts/export_onnx_model.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | import torch
8 |
9 | from segment_anything import build_sam, build_sam_vit_b, build_sam_vit_l
10 | from segment_anything.utils.onnx import SamOnnxModel
11 |
12 | import argparse
13 | import warnings
14 |
15 | try:
16 | import onnxruntime # type: ignore
17 |
18 | onnxruntime_exists = True
19 | except ImportError:
20 | onnxruntime_exists = False
21 |
22 | parser = argparse.ArgumentParser(
23 | description="Export the SAM prompt encoder and mask decoder to an ONNX model."
24 | )
25 |
26 | parser.add_argument(
27 | "--checkpoint", type=str, required=True, help="The path to the SAM model checkpoint."
28 | )
29 |
30 | parser.add_argument(
31 | "--output", type=str, required=True, help="The filename to save the ONNX model to."
32 | )
33 |
34 | parser.add_argument(
35 | "--model-type",
36 | type=str,
37 | default="default",
38 | help="In ['default', 'vit_b', 'vit_l']. Which type of SAM model to export.",
39 | )
40 |
41 | parser.add_argument(
42 | "--return-single-mask",
43 | action="store_true",
44 | help=(
45 | "If true, the exported ONNX model will only return the best mask, "
46 | "instead of returning multiple masks. For high resolution images "
47 | "this can improve runtime when upscaling masks is expensive."
48 | ),
49 | )
50 |
51 | parser.add_argument(
52 | "--opset",
53 | type=int,
54 | default=17,
55 | help="The ONNX opset version to use. Must be >=11",
56 | )
57 |
58 | parser.add_argument(
59 | "--quantize-out",
60 | type=str,
61 | default=None,
62 | help=(
63 | "If set, will quantize the model and save it with this name. "
64 | "Quantization is performed with quantize_dynamic from onnxruntime.quantization.quantize."
65 | ),
66 | )
67 |
68 | parser.add_argument(
69 | "--gelu-approximate",
70 | action="store_true",
71 | help=(
72 | "Replace GELU operations with approximations using tanh. Useful "
73 | "for some runtimes that have slow or unimplemented erf ops, used in GELU."
74 | ),
75 | )
76 |
77 | parser.add_argument(
78 | "--use-stability-score",
79 | action="store_true",
80 | help=(
81 | "Replaces the model's predicted mask quality score with the stability "
82 | "score calculated on the low resolution masks using an offset of 1.0. "
83 | ),
84 | )
85 |
86 | parser.add_argument(
87 | "--return-extra-metrics",
88 | action="store_true",
89 | help=(
90 | "The model will return five results: (masks, scores, stability_scores, "
91 | "areas, low_res_logits) instead of the usual three. This can be "
92 | "significantly slower for high resolution outputs."
93 | ),
94 | )
95 |
96 |
97 | def run_export(
98 | model_type: str,
99 | checkpoint: str,
100 | output: str,
101 | opset: int,
102 | return_single_mask: bool,
103 | gelu_approximate: bool = False,
104 | use_stability_score: bool = False,
105 | return_extra_metrics=False,
106 | ):
107 | print("Loading model...")
108 | if model_type == "vit_b":
109 | sam = build_sam_vit_b(checkpoint)
110 | elif model_type == "vit_l":
111 | sam = build_sam_vit_l(checkpoint)
112 | else:
113 | sam = build_sam(checkpoint)
114 |
115 | onnx_model = SamOnnxModel(
116 | model=sam,
117 | return_single_mask=return_single_mask,
118 | use_stability_score=use_stability_score,
119 | return_extra_metrics=return_extra_metrics,
120 | )
121 |
122 | if gelu_approximate:
123 | for n, m in onnx_model.named_modules():
124 | if isinstance(m, torch.nn.GELU):
125 | m.approximate = "tanh"
126 |
127 | dynamic_axes = {
128 | "point_coords": {1: "num_points"},
129 | "point_labels": {1: "num_points"},
130 | }
131 |
132 | embed_dim = sam.prompt_encoder.embed_dim
133 | embed_size = sam.prompt_encoder.image_embedding_size
134 | mask_input_size = [4 * x for x in embed_size]
135 | dummy_inputs = {
136 | "image_embeddings": torch.randn(1, embed_dim, *embed_size, dtype=torch.float),
137 | "point_coords": torch.randint(low=0, high=1024, size=(1, 5, 2), dtype=torch.float),
138 | "point_labels": torch.randint(low=0, high=4, size=(1, 5), dtype=torch.float),
139 | "mask_input": torch.randn(1, 1, *mask_input_size, dtype=torch.float),
140 | "has_mask_input": torch.tensor([1], dtype=torch.float),
141 | "orig_im_size": torch.tensor([1500, 2250], dtype=torch.float),
142 | }
143 |
144 | _ = onnx_model(**dummy_inputs)
145 |
146 | output_names = ["masks", "iou_predictions", "low_res_masks"]
147 |
148 | with warnings.catch_warnings():
149 | warnings.filterwarnings("ignore", category=torch.jit.TracerWarning)
150 | warnings.filterwarnings("ignore", category=UserWarning)
151 | with open(output, "wb") as f:
152 | print(f"Exporing onnx model to {output}...")
153 | torch.onnx.export(
154 | onnx_model,
155 | tuple(dummy_inputs.values()),
156 | f,
157 | export_params=True,
158 | verbose=False,
159 | opset_version=opset,
160 | do_constant_folding=True,
161 | input_names=list(dummy_inputs.keys()),
162 | output_names=output_names,
163 | dynamic_axes=dynamic_axes,
164 | )
165 |
166 | if onnxruntime_exists:
167 | ort_inputs = {k: to_numpy(v) for k, v in dummy_inputs.items()}
168 | ort_session = onnxruntime.InferenceSession(output)
169 | _ = ort_session.run(None, ort_inputs)
170 | print("Model has successfully been run with ONNXRuntime.")
171 |
172 |
173 | def to_numpy(tensor):
174 | return tensor.cpu().numpy()
175 |
176 |
177 | if __name__ == "__main__":
178 | args = parser.parse_args()
179 | run_export(
180 | model_type=args.model_type,
181 | checkpoint=args.checkpoint,
182 | output=args.output,
183 | opset=args.opset,
184 | return_single_mask=args.return_single_mask,
185 | gelu_approximate=args.gelu_approximate,
186 | use_stability_score=args.use_stability_score,
187 | return_extra_metrics=args.return_extra_metrics,
188 | )
189 |
190 | if args.quantize_out is not None:
191 | assert onnxruntime_exists, "onnxruntime is required to quantize the model."
192 | from onnxruntime.quantization import QuantType # type: ignore
193 | from onnxruntime.quantization.quantize import quantize_dynamic # type: ignore
194 |
195 | print(f"Quantizing model and writing to {args.quantize_out}...")
196 | quantize_dynamic(
197 | model_input=args.output,
198 | model_output=args.quantize_out,
199 | optimize_model=True,
200 | per_channel=False,
201 | reduce_range=False,
202 | weight_type=QuantType.QUInt8,
203 | )
204 | print("Done!")
205 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/modeling/mask_decoder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | import torch
8 | from torch import nn
9 | from torch.nn import functional as F
10 |
11 | from typing import List, Tuple, Type
12 |
13 | from .common import LayerNorm2d
14 |
15 |
16 | class MaskDecoder(nn.Module):
17 | def __init__(
18 | self,
19 | *,
20 | transformer_dim: int,
21 | transformer: nn.Module,
22 | num_multimask_outputs: int = 3,
23 | activation: Type[nn.Module] = nn.GELU,
24 | iou_head_depth: int = 3,
25 | iou_head_hidden_dim: int = 256,
26 | ) -> None:
27 | """
28 | Predicts masks given an image and prompt embeddings, using a
29 | transformer architecture.
30 |
31 | Arguments:
32 | transformer_dim (int): the channel dimension of the transformer
33 | transformer (nn.Module): the transformer used to predict masks
34 | num_multimask_outputs (int): the number of masks to predict
35 | when disambiguating masks
36 | activation (nn.Module): the type of activation to use when
37 | upscaling masks
38 | iou_head_depth (int): the depth of the MLP used to predict
39 | mask quality
40 | iou_head_hidden_dim (int): the hidden dimension of the MLP
41 | used to predict mask quality
42 | """
43 | super().__init__()
44 | self.transformer_dim = transformer_dim
45 | self.transformer = transformer
46 |
47 | self.num_multimask_outputs = num_multimask_outputs
48 |
49 | self.iou_token = nn.Embedding(1, transformer_dim)
50 | self.num_mask_tokens = num_multimask_outputs + 1
51 | self.mask_tokens = nn.Embedding(self.num_mask_tokens, transformer_dim)
52 |
53 | self.output_upscaling = nn.Sequential(
54 | nn.ConvTranspose2d(transformer_dim, transformer_dim // 4, kernel_size=2, stride=2),
55 | LayerNorm2d(transformer_dim // 4),
56 | activation(),
57 | nn.ConvTranspose2d(transformer_dim // 4, transformer_dim // 8, kernel_size=2, stride=2),
58 | activation(),
59 | )
60 | self.output_hypernetworks_mlps = nn.ModuleList(
61 | [
62 | MLP(transformer_dim, transformer_dim, transformer_dim // 8, 3)
63 | for i in range(self.num_mask_tokens)
64 | ]
65 | )
66 |
67 | self.iou_prediction_head = MLP(
68 | transformer_dim, iou_head_hidden_dim, self.num_mask_tokens, iou_head_depth
69 | )
70 |
71 | def forward(
72 | self,
73 | image_embeddings: torch.Tensor,
74 | image_pe: torch.Tensor,
75 | sparse_prompt_embeddings: torch.Tensor,
76 | dense_prompt_embeddings: torch.Tensor,
77 | multimask_output: bool,
78 | hq_token_only: bool,
79 | interm_embeddings: torch.Tensor,
80 | ) -> Tuple[torch.Tensor, torch.Tensor]:
81 | """
82 | Predict masks given image and prompt embeddings.
83 |
84 | Arguments:
85 | image_embeddings (torch.Tensor): the embeddings from the image encoder
86 | image_pe (torch.Tensor): positional encoding with the shape of image_embeddings
87 | sparse_prompt_embeddings (torch.Tensor): the embeddings of the points and boxes
88 | dense_prompt_embeddings (torch.Tensor): the embeddings of the mask inputs
89 | multimask_output (bool): Whether to return multiple masks or a single
90 | mask.
91 |
92 | Returns:
93 | torch.Tensor: batched predicted masks
94 | torch.Tensor: batched predictions of mask quality
95 | """
96 | masks, iou_pred = self.predict_masks(
97 | image_embeddings=image_embeddings,
98 | image_pe=image_pe,
99 | sparse_prompt_embeddings=sparse_prompt_embeddings,
100 | dense_prompt_embeddings=dense_prompt_embeddings,
101 | )
102 |
103 | # Select the correct mask or masks for output
104 | if multimask_output:
105 | mask_slice = slice(1, None)
106 | else:
107 | mask_slice = slice(0, 1)
108 | masks = masks[:, mask_slice, :, :]
109 | iou_pred = iou_pred[:, mask_slice]
110 |
111 | # Prepare output
112 | return masks, iou_pred
113 |
114 | def predict_masks(
115 | self,
116 | image_embeddings: torch.Tensor,
117 | image_pe: torch.Tensor,
118 | sparse_prompt_embeddings: torch.Tensor,
119 | dense_prompt_embeddings: torch.Tensor,
120 | ) -> Tuple[torch.Tensor, torch.Tensor]:
121 | """Predicts masks. See 'forward' for more details."""
122 | # Concatenate output tokens
123 | output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
124 | output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.size(0), -1, -1)
125 | tokens = torch.cat((output_tokens, sparse_prompt_embeddings), dim=1)
126 |
127 | # Expand per-image data in batch direction to be per-mask
128 | src = torch.repeat_interleave(image_embeddings, tokens.shape[0], dim=0)
129 | src = src + dense_prompt_embeddings
130 | pos_src = torch.repeat_interleave(image_pe, tokens.shape[0], dim=0)
131 | b, c, h, w = src.shape
132 |
133 | # Run the transformer
134 | hs, src = self.transformer(src, pos_src, tokens)
135 | iou_token_out = hs[:, 0, :]
136 | mask_tokens_out = hs[:, 1 : (1 + self.num_mask_tokens), :]
137 |
138 | # Upscale mask embeddings and predict masks using the mask tokens
139 | src = src.transpose(1, 2).view(b, c, h, w)
140 | upscaled_embedding = self.output_upscaling(src)
141 | hyper_in_list: List[torch.Tensor] = []
142 | for i in range(self.num_mask_tokens):
143 | hyper_in_list.append(self.output_hypernetworks_mlps[i](mask_tokens_out[:, i, :]))
144 | hyper_in = torch.stack(hyper_in_list, dim=1)
145 | b, c, h, w = upscaled_embedding.shape
146 | masks = (hyper_in @ upscaled_embedding.view(b, c, h * w)).view(b, -1, h, w)
147 |
148 | # Generate mask quality predictions
149 | iou_pred = self.iou_prediction_head(iou_token_out)
150 |
151 | return masks, iou_pred
152 |
153 |
154 | # Lightly adapted from
155 | # https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py # noqa
156 | class MLP(nn.Module):
157 | def __init__(
158 | self,
159 | input_dim: int,
160 | hidden_dim: int,
161 | output_dim: int,
162 | num_layers: int,
163 | sigmoid_output: bool = False,
164 | ) -> None:
165 | super().__init__()
166 | self.num_layers = num_layers
167 | h = [hidden_dim] * (num_layers - 1)
168 | self.layers = nn.ModuleList(
169 | nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])
170 | )
171 | self.sigmoid_output = sigmoid_output
172 |
173 | def forward(self, x):
174 | for i, layer in enumerate(self.layers):
175 | x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
176 | if self.sigmoid_output:
177 | x = F.sigmoid(x)
178 | return x
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/segment_anything/modeling/sam.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | import torch
8 | from torch import nn
9 | from torch.nn import functional as F
10 |
11 | from typing import Any, Dict, List, Tuple
12 |
13 | from .image_encoder import ImageEncoderViT
14 | from .mask_decoder import MaskDecoder
15 | from .prompt_encoder import PromptEncoder
16 |
17 |
18 | class Sam(nn.Module):
19 | mask_threshold: float = 0.0
20 | image_format: str = "RGB"
21 |
22 | def __init__(
23 | self,
24 | image_encoder: ImageEncoderViT,
25 | prompt_encoder: PromptEncoder,
26 | mask_decoder: MaskDecoder,
27 | pixel_mean: List[float] = [123.675, 116.28, 103.53],
28 | pixel_std: List[float] = [58.395, 57.12, 57.375],
29 | ) -> None:
30 | """
31 | SAM predicts object masks from an image and input prompts.
32 |
33 | Arguments:
34 | image_encoder (ImageEncoderViT): The backbone used to encode the
35 | image into image embeddings that allow for efficient mask prediction.
36 | prompt_encoder (PromptEncoder): Encodes various types of input prompts.
37 | mask_decoder (MaskDecoder): Predicts masks from the image embeddings
38 | and encoded prompts.
39 | pixel_mean (list(float)): Mean values for normalizing pixels in the input image.
40 | pixel_std (list(float)): Std values for normalizing pixels in the input image.
41 | """
42 | super().__init__()
43 | self.image_encoder = image_encoder
44 | self.prompt_encoder = prompt_encoder
45 | self.mask_decoder = mask_decoder
46 | self.register_buffer("pixel_mean", torch.Tensor(pixel_mean).view(-1, 1, 1), False)
47 | self.register_buffer("pixel_std", torch.Tensor(pixel_std).view(-1, 1, 1), False)
48 |
49 | @property
50 | def device(self) -> Any:
51 | return self.pixel_mean.device
52 |
53 | @torch.no_grad()
54 | def forward(
55 | self,
56 | batched_input: List[Dict[str, Any]],
57 | multimask_output: bool,
58 | ) -> List[Dict[str, torch.Tensor]]:
59 | """
60 | Predicts masks end-to-end from provided images and prompts.
61 | If prompts are not known in advance, using SamPredictor is
62 | recommended over calling the model directly.
63 |
64 | Arguments:
65 | batched_input (list(dict)): A list over input images, each a
66 | dictionary with the following keys. A prompt key can be
67 | excluded if it is not present.
68 | 'image': The image as a torch tensor in 3xHxW format,
69 | already transformed for input to the model.
70 | 'original_size': (tuple(int, int)) The original size of
71 | the image before transformation, as (H, W).
72 | 'point_coords': (torch.Tensor) Batched point prompts for
73 | this image, with shape BxNx2. Already transformed to the
74 | input frame of the model.
75 | 'point_labels': (torch.Tensor) Batched labels for point prompts,
76 | with shape BxN.
77 | 'boxes': (torch.Tensor) Batched box inputs, with shape Bx4.
78 | Already transformed to the input frame of the model.
79 | 'mask_inputs': (torch.Tensor) Batched mask inputs to the model,
80 | in the form Bx1xHxW.
81 | multimask_output (bool): Whether the model should predict multiple
82 | disambiguating masks, or return a single mask.
83 |
84 | Returns:
85 | (list(dict)): A list over input images, where each element is
86 | as dictionary with the following keys.
87 | 'masks': (torch.Tensor) Batched binary mask predictions,
88 | with shape BxCxHxW, where B is the number of input promts,
89 | C is determiend by multimask_output, and (H, W) is the
90 | original size of the image.
91 | 'iou_predictions': (torch.Tensor) The model's predictions
92 | of mask quality, in shape BxC.
93 | 'low_res_logits': (torch.Tensor) Low resolution logits with
94 | shape BxCxHxW, where H=W=256. Can be passed as mask input
95 | to subsequent iterations of prediction.
96 | """
97 | input_images = torch.stack([self.preprocess(x["image"]) for x in batched_input], dim=0)
98 | image_embeddings = self.image_encoder(input_images)
99 |
100 | outputs = []
101 | for image_record, curr_embedding in zip(batched_input, image_embeddings):
102 | if "point_coords" in image_record:
103 | points = (image_record["point_coords"], image_record["point_labels"])
104 | else:
105 | points = None
106 | sparse_embeddings, dense_embeddings = self.prompt_encoder(
107 | points=points,
108 | boxes=image_record.get("boxes", None),
109 | masks=image_record.get("mask_inputs", None),
110 | )
111 | low_res_masks, iou_predictions = self.mask_decoder(
112 | image_embeddings=curr_embedding.unsqueeze(0),
113 | image_pe=self.prompt_encoder.get_dense_pe(),
114 | sparse_prompt_embeddings=sparse_embeddings,
115 | dense_prompt_embeddings=dense_embeddings,
116 | multimask_output=multimask_output,
117 | )
118 | masks = self.postprocess_masks(
119 | low_res_masks,
120 | input_size=image_record["image"].shape[-2:],
121 | original_size=image_record["original_size"],
122 | )
123 | masks = masks > self.mask_threshold
124 | outputs.append(
125 | {
126 | "masks": masks,
127 | "iou_predictions": iou_predictions,
128 | "low_res_logits": low_res_masks,
129 | }
130 | )
131 | return outputs
132 |
133 | def postprocess_masks(
134 | self,
135 | masks: torch.Tensor,
136 | input_size: Tuple[int, ...],
137 | original_size: Tuple[int, ...],
138 | ) -> torch.Tensor:
139 | """
140 | Remove padding and upscale masks to the original image size.
141 |
142 | Arguments:
143 | masks (torch.Tensor): Batched masks from the mask_decoder,
144 | in BxCxHxW format.
145 | input_size (tuple(int, int)): The size of the image input to the
146 | model, in (H, W) format. Used to remove padding.
147 | original_size (tuple(int, int)): The original size of the image
148 | before resizing for input to the model, in (H, W) format.
149 |
150 | Returns:
151 | (torch.Tensor): Batched masks in BxCxHxW format, where (H, W)
152 | is given by original_size.
153 | """
154 | masks = F.interpolate(
155 | masks,
156 | (self.image_encoder.img_size, self.image_encoder.img_size),
157 | mode="bilinear",
158 | align_corners=False,
159 | )
160 | masks = masks[..., : input_size[0], : input_size[1]]
161 | masks = F.interpolate(masks, original_size, mode="bilinear", align_corners=False)
162 | return masks
163 |
164 | def preprocess(self, x: torch.Tensor) -> torch.Tensor:
165 | """Normalize pixel values and pad to a square input."""
166 | # Normalize colors
167 | x = (x - self.pixel_mean) / self.pixel_std
168 |
169 | # Pad
170 | h, w = x.shape[-2:]
171 | padh = self.image_encoder.img_size - h
172 | padw = self.image_encoder.img_size - w
173 | x = F.pad(x, (0, padw, 0, padh))
174 | return x
175 |
--------------------------------------------------------------------------------
/Method/vision/GroundedSAM/segment_anything/scripts/amg.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | import cv2 # type: ignore
8 |
9 | from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
10 |
11 | import argparse
12 | import json
13 | import os
14 | from typing import Any, Dict, List
15 |
16 | parser = argparse.ArgumentParser(
17 | description=(
18 | "Runs automatic mask generation on an input image or directory of images, "
19 | "and outputs masks as either PNGs or COCO-style RLEs. Requires open-cv, "
20 | "as well as pycocotools if saving in RLE format."
21 | )
22 | )
23 |
24 | parser.add_argument(
25 | "--input",
26 | type=str,
27 | required=True,
28 | help="Path to either a single input image or folder of images.",
29 | )
30 |
31 | parser.add_argument(
32 | "--output",
33 | type=str,
34 | required=True,
35 | help=(
36 | "Path to the directory where masks will be output. Output will be either a folder "
37 | "of PNGs per image or a single json with COCO-style masks."
38 | ),
39 | )
40 |
41 | parser.add_argument(
42 | "--model-type",
43 | type=str,
44 | default="default",
45 | help="The type of model to load, in ['default', 'vit_l', 'vit_b']",
46 | )
47 |
48 | parser.add_argument(
49 | "--checkpoint",
50 | type=str,
51 | required=True,
52 | help="The path to the SAM checkpoint to use for mask generation.",
53 | )
54 |
55 | parser.add_argument("--device", type=str, default="cuda", help="The device to run generation on.")
56 |
57 | parser.add_argument(
58 | "--convert-to-rle",
59 | action="store_true",
60 | help=(
61 | "Save masks as COCO RLEs in a single json instead of as a folder of PNGs. "
62 | "Requires pycocotools."
63 | ),
64 | )
65 |
66 | amg_settings = parser.add_argument_group("AMG Settings")
67 |
68 | amg_settings.add_argument(
69 | "--points-per-side",
70 | type=int,
71 | default=None,
72 | help="Generate masks by sampling a grid over the image with this many points to a side.",
73 | )
74 |
75 | amg_settings.add_argument(
76 | "--points-per-batch",
77 | type=int,
78 | default=None,
79 | help="How many input points to process simultaneously in one batch.",
80 | )
81 |
82 | amg_settings.add_argument(
83 | "--pred-iou-thresh",
84 | type=float,
85 | default=None,
86 | help="Exclude masks with a predicted score from the model that is lower than this threshold.",
87 | )
88 |
89 | amg_settings.add_argument(
90 | "--stability-score-thresh",
91 | type=float,
92 | default=None,
93 | help="Exclude masks with a stability score lower than this threshold.",
94 | )
95 |
96 | amg_settings.add_argument(
97 | "--stability-score-offset",
98 | type=float,
99 | default=None,
100 | help="Larger values perturb the mask more when measuring stability score.",
101 | )
102 |
103 | amg_settings.add_argument(
104 | "--box-nms-thresh",
105 | type=float,
106 | default=None,
107 | help="The overlap threshold for excluding a duplicate mask.",
108 | )
109 |
110 | amg_settings.add_argument(
111 | "--crop-n-layers",
112 | type=int,
113 | default=None,
114 | help=(
115 | "If >0, mask generation is run on smaller crops of the image to generate more masks. "
116 | "The value sets how many different scales to crop at."
117 | ),
118 | )
119 |
120 | amg_settings.add_argument(
121 | "--crop-nms-thresh",
122 | type=float,
123 | default=None,
124 | help="The overlap threshold for excluding duplicate masks across different crops.",
125 | )
126 |
127 | amg_settings.add_argument(
128 | "--crop-overlap-ratio",
129 | type=int,
130 | default=None,
131 | help="Larger numbers mean image crops will overlap more.",
132 | )
133 |
134 | amg_settings.add_argument(
135 | "--crop-n-points-downscale-factor",
136 | type=int,
137 | default=None,
138 | help="The number of points-per-side in each layer of crop is reduced by this factor.",
139 | )
140 |
141 | amg_settings.add_argument(
142 | "--min-mask-region-area",
143 | type=int,
144 | default=None,
145 | help=(
146 | "Disconnected mask regions or holes with area smaller than this value "
147 | "in pixels are removed by postprocessing."
148 | ),
149 | )
150 |
151 |
152 | def write_masks_to_folder(masks: List[Dict[str, Any]], path: str) -> None:
153 | header = "id,area,bbox_x0,bbox_y0,bbox_w,bbox_h,point_input_x,point_input_y,predicted_iou,stability_score,crop_box_x0,crop_box_y0,crop_box_w,crop_box_h" # noqa
154 | metadata = [header]
155 | for i, mask_data in enumerate(masks):
156 | mask = mask_data["segmentation"]
157 | filename = f"{i}.png"
158 | cv2.imwrite(os.path.join(path, filename), mask * 255)
159 | mask_metadata = [
160 | str(i),
161 | str(mask_data["area"]),
162 | *[str(x) for x in mask_data["bbox"]],
163 | *[str(x) for x in mask_data["point_coords"][0]],
164 | str(mask_data["predicted_iou"]),
165 | str(mask_data["stability_score"]),
166 | *[str(x) for x in mask_data["crop_box"]],
167 | ]
168 | row = ",".join(mask_metadata)
169 | metadata.append(row)
170 | metadata_path = os.path.join(path, "metadata.csv")
171 | with open(metadata_path, "w") as f:
172 | f.write("\n".join(metadata))
173 |
174 | return
175 |
176 |
177 | def get_amg_kwargs(args):
178 | amg_kwargs = {
179 | "points_per_side": args.points_per_side,
180 | "points_per_batch": args.points_per_batch,
181 | "pred_iou_thresh": args.pred_iou_thresh,
182 | "stability_score_thresh": args.stability_score_thresh,
183 | "stability_score_offset": args.stability_score_offset,
184 | "box_nms_thresh": args.box_nms_thresh,
185 | "crop_n_layers": args.crop_n_layers,
186 | "crop_nms_thresh": args.crop_nms_thresh,
187 | "crop_overlap_ratio": args.crop_overlap_ratio,
188 | "crop_n_points_downscale_factor": args.crop_n_points_downscale_factor,
189 | "min_mask_region_area": args.min_mask_region_area,
190 | }
191 | amg_kwargs = {k: v for k, v in amg_kwargs.items() if v is not None}
192 | return amg_kwargs
193 |
194 |
195 | def main(args: argparse.Namespace) -> None:
196 | print("Loading model...")
197 | sam = sam_model_registry[args.model_type](checkpoint=args.checkpoint)
198 | _ = sam.to(device=args.device)
199 | output_mode = "coco_rle" if args.convert_to_rle else "binary_mask"
200 | amg_kwargs = get_amg_kwargs(args)
201 | generator = SamAutomaticMaskGenerator(sam, output_mode=output_mode, **amg_kwargs)
202 |
203 | if not os.path.isdir(args.input):
204 | targets = [args.input]
205 | else:
206 | targets = [
207 | f for f in os.listdir(args.input) if not os.path.isdir(os.path.join(args.input, f))
208 | ]
209 | targets = [os.path.join(args.input, f) for f in targets]
210 |
211 | os.makedirs(args.output, exist_ok=True)
212 |
213 | for t in targets:
214 | print(f"Processing '{t}'...")
215 | image = cv2.imread(t)
216 | if image is None:
217 | print(f"Could not load '{t}' as an image, skipping...")
218 | continue
219 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
220 |
221 | masks = generator.generate(image)
222 |
223 | base = os.path.basename(t)
224 | base = os.path.splitext(base)[0]
225 | save_base = os.path.join(args.output, base)
226 | if output_mode == "binary_mask":
227 | os.makedirs(save_base, exist_ok=False)
228 | write_masks_to_folder(masks, save_base)
229 | else:
230 | save_file = save_base + ".json"
231 | with open(save_file, "w") as f:
232 | json.dump(masks, f)
233 | print("Done!")
234 |
235 |
236 | if __name__ == "__main__":
237 | args = parser.parse_args()
238 | main(args)
239 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |