├── __init__.py
├── imgs
├── P01_01_frame_0000003682.jpg
├── P01_01_frame_0000019463.jpg
├── P01_01_frame_0000049183.jpg
├── P01_01_frame_0000091442.jpg
├── P04_02_frame_0000000946.jpg
├── P04_02_frame_0000003062.jpg
├── P04_02_frame_0000005376.jpg
├── P04_02_frame_0000011581.jpg
├── P04_05_frame_0000111070.png
├── P04_12_frame_0000008119.png
├── P04_02_frame_0000016034 take.png
├── P04_02_frame_0000065888 cut.png
├── P04_04_frame_0000006974 dry.png
├── P04_02_frame_0000033785 insert.png
├── P04_02_frame_0000065888 turn-on.png
├── P04_21_frame_0000006463 close.png
├── P04_21_frame_0000006463 insert.png
├── P04_04_frame_0000006974 turn-off.png
├── Screenshot from 2022-12-13 10-31-56.png
├── Screenshot from 2022-12-14 15-54-23.png
├── Screenshot from 2022-12-14 16-28-24.png
├── Screenshot from 2022-12-14 16-31-39.png
└── Screenshot from 2022-12-14 16-36-05.png
├── P03_EPIC_100_example
├── rgb
│ ├── P03_101_frame_0000000157.jpg
│ ├── P03_101_frame_0000000217.jpg
│ ├── P03_101_frame_0000000280.jpg
│ ├── P03_101_frame_0000000318.jpg
│ ├── P03_101_frame_0000000388.jpg
│ ├── P03_101_frame_0000000426.jpg
│ ├── P03_101_frame_0000000471.jpg
│ ├── P03_101_frame_0000000530.jpg
│ ├── P03_101_frame_0000000563.jpg
│ └── P03_101_frame_0000000626.jpg
├── VISOR_masks
│ ├── P03_101_frame_0000000157.png
│ ├── P03_101_frame_0000000217.png
│ ├── P03_101_frame_0000000280.png
│ ├── P03_101_frame_0000000318.png
│ ├── P03_101_frame_0000000388.png
│ ├── P03_101_frame_0000000426.png
│ ├── P03_101_frame_0000000471.png
│ ├── P03_101_frame_0000000530.png
│ ├── P03_101_frame_0000000563.png
│ └── P03_101_frame_0000000626.png
├── easy_EPIC_Aff
│ ├── P03_101_frame_0000000157.pkl
│ ├── P03_101_frame_0000000217.pkl
│ ├── P03_101_frame_0000000280.pkl
│ ├── P03_101_frame_0000000318.pkl
│ ├── P03_101_frame_0000000388.pkl
│ ├── P03_101_frame_0000000426.pkl
│ ├── P03_101_frame_0000000471.pkl
│ ├── P03_101_frame_0000000530.pkl
│ ├── P03_101_frame_0000000563.pkl
│ └── P03_101_frame_0000000626.pkl
├── COLMAP_masks
│ ├── P03_101_frame_0000000157.jpg.png
│ ├── P03_101_frame_0000000217.jpg.png
│ ├── P03_101_frame_0000000280.jpg.png
│ ├── P03_101_frame_0000000318.jpg.png
│ ├── P03_101_frame_0000000388.jpg.png
│ ├── P03_101_frame_0000000426.jpg.png
│ ├── P03_101_frame_0000000471.jpg.png
│ ├── P03_101_frame_0000000530.jpg.png
│ ├── P03_101_frame_0000000563.jpg.png
│ └── P03_101_frame_0000000626.jpg.png
├── complex_EPIC_Aff
│ ├── P03_101_frame_0000000157.pkl
│ ├── P03_101_frame_0000000217.pkl
│ ├── P03_101_frame_0000000280.pkl
│ ├── P03_101_frame_0000000318.pkl
│ ├── P03_101_frame_0000000388.pkl
│ ├── P03_101_frame_0000000426.pkl
│ ├── P03_101_frame_0000000471.pkl
│ ├── P03_101_frame_0000000530.pkl
│ ├── P03_101_frame_0000000563.pkl
│ └── P03_101_frame_0000000626.pkl
└── 3D_output_aff
│ ├── affordances_P03_101_frame_0000000157.pkl
│ ├── affordances_P03_101_frame_0000000217.pkl
│ ├── affordances_P03_101_frame_0000000280.pkl
│ ├── affordances_P03_101_frame_0000000318.pkl
│ ├── affordances_P03_101_frame_0000000388.pkl
│ ├── affordances_P03_101_frame_0000000426.pkl
│ ├── affordances_P03_101_frame_0000000471.pkl
│ ├── affordances_P03_101_frame_0000000530.pkl
│ ├── affordances_P03_101_frame_0000000563.pkl
│ └── affordances_P03_101_frame_0000000626.pkl
├── data_egom.py
├── read_cameras_colmap.py
├── data.py
├── README.md
├── inference_v2.py
├── utils_read_annotations.py
├── project_from_3D_to_2D.py
└── read_write_model.py
/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/imgs/P01_01_frame_0000003682.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P01_01_frame_0000003682.jpg
--------------------------------------------------------------------------------
/imgs/P01_01_frame_0000019463.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P01_01_frame_0000019463.jpg
--------------------------------------------------------------------------------
/imgs/P01_01_frame_0000049183.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P01_01_frame_0000049183.jpg
--------------------------------------------------------------------------------
/imgs/P01_01_frame_0000091442.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P01_01_frame_0000091442.jpg
--------------------------------------------------------------------------------
/imgs/P04_02_frame_0000000946.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000000946.jpg
--------------------------------------------------------------------------------
/imgs/P04_02_frame_0000003062.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000003062.jpg
--------------------------------------------------------------------------------
/imgs/P04_02_frame_0000005376.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000005376.jpg
--------------------------------------------------------------------------------
/imgs/P04_02_frame_0000011581.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000011581.jpg
--------------------------------------------------------------------------------
/imgs/P04_05_frame_0000111070.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_05_frame_0000111070.png
--------------------------------------------------------------------------------
/imgs/P04_12_frame_0000008119.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_12_frame_0000008119.png
--------------------------------------------------------------------------------
/imgs/P04_02_frame_0000016034 take.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000016034 take.png
--------------------------------------------------------------------------------
/imgs/P04_02_frame_0000065888 cut.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000065888 cut.png
--------------------------------------------------------------------------------
/imgs/P04_04_frame_0000006974 dry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_04_frame_0000006974 dry.png
--------------------------------------------------------------------------------
/imgs/P04_02_frame_0000033785 insert.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000033785 insert.png
--------------------------------------------------------------------------------
/imgs/P04_02_frame_0000065888 turn-on.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000065888 turn-on.png
--------------------------------------------------------------------------------
/imgs/P04_21_frame_0000006463 close.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_21_frame_0000006463 close.png
--------------------------------------------------------------------------------
/imgs/P04_21_frame_0000006463 insert.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_21_frame_0000006463 insert.png
--------------------------------------------------------------------------------
/imgs/P04_04_frame_0000006974 turn-off.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_04_frame_0000006974 turn-off.png
--------------------------------------------------------------------------------
/imgs/Screenshot from 2022-12-13 10-31-56.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/Screenshot from 2022-12-13 10-31-56.png
--------------------------------------------------------------------------------
/imgs/Screenshot from 2022-12-14 15-54-23.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/Screenshot from 2022-12-14 15-54-23.png
--------------------------------------------------------------------------------
/imgs/Screenshot from 2022-12-14 16-28-24.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/Screenshot from 2022-12-14 16-28-24.png
--------------------------------------------------------------------------------
/imgs/Screenshot from 2022-12-14 16-31-39.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/Screenshot from 2022-12-14 16-31-39.png
--------------------------------------------------------------------------------
/imgs/Screenshot from 2022-12-14 16-36-05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/Screenshot from 2022-12-14 16-36-05.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/rgb/P03_101_frame_0000000157.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000157.jpg
--------------------------------------------------------------------------------
/P03_EPIC_100_example/rgb/P03_101_frame_0000000217.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000217.jpg
--------------------------------------------------------------------------------
/P03_EPIC_100_example/rgb/P03_101_frame_0000000280.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000280.jpg
--------------------------------------------------------------------------------
/P03_EPIC_100_example/rgb/P03_101_frame_0000000318.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000318.jpg
--------------------------------------------------------------------------------
/P03_EPIC_100_example/rgb/P03_101_frame_0000000388.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000388.jpg
--------------------------------------------------------------------------------
/P03_EPIC_100_example/rgb/P03_101_frame_0000000426.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000426.jpg
--------------------------------------------------------------------------------
/P03_EPIC_100_example/rgb/P03_101_frame_0000000471.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000471.jpg
--------------------------------------------------------------------------------
/P03_EPIC_100_example/rgb/P03_101_frame_0000000530.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000530.jpg
--------------------------------------------------------------------------------
/P03_EPIC_100_example/rgb/P03_101_frame_0000000563.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000563.jpg
--------------------------------------------------------------------------------
/P03_EPIC_100_example/rgb/P03_101_frame_0000000626.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000626.jpg
--------------------------------------------------------------------------------
/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000157.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000157.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000217.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000217.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000280.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000280.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000318.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000318.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000388.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000388.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000426.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000426.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000471.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000471.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000530.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000530.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000563.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000563.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000626.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000626.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000157.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000157.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000217.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000217.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000280.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000280.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000318.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000318.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000388.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000388.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000426.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000426.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000471.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000471.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000530.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000530.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000563.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000563.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000626.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000626.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000157.jpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000157.jpg.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000217.jpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000217.jpg.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000280.jpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000280.jpg.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000318.jpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000318.jpg.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000388.jpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000388.jpg.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000426.jpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000426.jpg.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000471.jpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000471.jpg.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000530.jpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000530.jpg.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000563.jpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000563.jpg.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000626.jpg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000626.jpg.png
--------------------------------------------------------------------------------
/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000157.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000157.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000217.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000217.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000280.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000280.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000318.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000318.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000388.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000388.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000426.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000426.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000471.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000471.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000530.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000530.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000563.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000563.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000626.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000626.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000157.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000157.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000217.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000217.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000280.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000280.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000318.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000318.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000388.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000388.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000426.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000426.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000471.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000471.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000530.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000530.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000563.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000563.pkl
--------------------------------------------------------------------------------
/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000626.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000626.pkl
--------------------------------------------------------------------------------
/data_egom.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import numpy as np
4 | import copy
5 | from PIL import Image # using pillow-simd for increased speed
6 | import PIL.Image as pil
7 | import torch
8 | import torch.utils.data as data
9 | from torchvision import transforms
10 | import glob
11 | import collections
12 | import cv2
13 | import open3d as o3d
14 | import pandas as pd
15 | from read_write_model import read_model
16 | from utils_read_annotations import EP100_and_VISOR_annotations
17 |
18 |
19 | Camera = collections.namedtuple("Camera", ["id", "model", "width", "height", "params"])
20 |
21 | class VideoSequentialDataset(data.Dataset):
22 | """Superclass for sequential images dataloaders
23 | """
24 | def __init__(self, data_path, kitchen, height, width, frame_idxs):
25 | super(VideoSequentialDataset, self).__init__()
26 | self.colmap_poses = os.path.join(data_path, kitchen,'colmap')
27 | self.masks = os.path.join(data_path, kitchen, 'selected_plus_guided_masks')
28 | self.rgb = os.path.join(data_path, kitchen, 'selected_plus_guided_rgb')
29 |
30 | self.filenames = self.read_directory()
31 | self.height = height
32 | self.width = width
33 | self.colors = self.get_colormap()
34 | self.VISOR_path = '...'
35 | self.EP100_and_VISOR_reader = EP100_and_VISOR_annotations(self.VISOR_path, self.rgb, kitchen)
36 | self.frame_idxs = frame_idxs
37 |
38 | self.cameras_Colmap, self.imgs_Colmap, self.pts_Colmap = read_model(self.colmap_poses, ext=".txt")
39 | self.fx = self.cameras_Colmap[1].params[0]
40 | self.fy = self.cameras_Colmap[1].params[1]
41 | self.cx = self.cameras_Colmap[1].params[2]
42 | self.cy = self.cameras_Colmap[1].params[3]
43 |
44 |
45 | def read_directory(self):
46 | paths = glob.glob(os.path.join(self.rgb, '*.jpg'))
47 | paths.sort()
48 | return paths
49 |
50 | def __len__(self):
51 | return len(self.filenames)
52 |
53 | def __getitem__(self, index):
54 | inputs = {}
55 | full_filename = self.filenames[index]
56 | for i in self.frame_idxs:
57 | inputs[("color", i)] = self.get_color(self.filenames[index + i])
58 | inputs["full_filename"] = full_filename
59 | inputs["filename"] = full_filename.split('/')[-1]
60 | print(full_filename.split('/')[-1].split('_')[0:2])
61 | sequence = full_filename.split('/')[-1].split('_')[0:2]
62 | #Join the two string elements of the list with a '_' in the middle
63 | inputs['sequence'] = '_'.join(sequence)
64 | inputs["subset"] = 'train'
65 | inputs["aff_annotation"], inputs["EP100_annotation"], inputs['VISOR_annotation'] = self.EP100_and_VISOR_reader.affordance_hotspot(inputs["filename"], inputs['subset'], inputs['sequence'])
66 | inputs["exists_affordance"] = self.check_exits_affordance(inputs["aff_annotation"])
67 | return inputs
68 |
69 | def check_exits_affordance(self, aff_annotation):
70 | if aff_annotation is not None: #We have an annotation on EP100
71 | if len(aff_annotation['interacting_objects']) > 0: #The IoU is above the threshold
72 | return True
73 | return False
74 |
75 | def get_color(self, filename):
76 | img = cv2.imread(filename)
77 | return img
78 |
79 | def get_mask(self, filename):
80 | mask = cv2.imread(filename.replace('sampled_rgb', 'sampled_masks').replace('.jpg', '.png'), cv2.IMREAD_GRAYSCALE)
81 | return mask
82 |
83 |
84 | def get_colormap(self, N=256, normalized = False):
85 | def bitget(byteval, idx):
86 | return ((byteval & (1 << idx)) != 0)
87 |
88 | dtype = 'float32' if normalized else 'uint8'
89 | cmap = np.zeros((N, 3), dtype=dtype)
90 | for i in range(N):
91 | r = g = b = 0
92 | c = i
93 | for j in range(8):
94 | r = r | (bitget(c, 0) << 7-j)
95 | g = g | (bitget(c, 1) << 7-j)
96 | b = b | (bitget(c, 2) << 7-j)
97 | c = c >> 3
98 | cmap[i] = np.array([r, g, b])
99 |
100 | cmap = cmap/255 if normalized else cmap
101 | cmap_dict = {}
102 | for i in range(N):
103 | cmap_dict[i] = [cmap[i,0], cmap[i,1], cmap[i, 2]]
104 | return cmap_dict
105 |
106 |
--------------------------------------------------------------------------------
/read_cameras_colmap.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import matplotlib.pyplot as plt
4 | import collections
5 | import open3d as o3d
6 |
7 | Camera = collections.namedtuple(
8 | "Camera", ["id", "model", "width", "height", "params"])
9 |
10 | def qvec2rotmat(qvec):
11 | return np.array([
12 | [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
13 | 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
14 | 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
15 | [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
16 | 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
17 | 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
18 | [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
19 | 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
20 | 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
21 |
22 | def read_cameras_text(path):
23 | """
24 | see: src/base/reconstruction.cc
25 | void Reconstruction::WriteCamerasText(const std::string& path)
26 | void Reconstruction::ReadCamerasText(const std::string& path)
27 | """
28 | cameras = {}
29 | with open(path, "r") as fid:
30 | while True:
31 | line = fid.readline()
32 | if not line:
33 | break
34 | line = line.strip()
35 | if len(line) > 0 and line[0] != "#":
36 | elems = line.split()
37 | camera_id = int(elems[0])
38 | model = elems[1]
39 | width = int(elems[2])
40 | height = int(elems[3])
41 | params = np.array(tuple(map(float, elems[4:])))
42 | cameras[camera_id] = Camera(id=camera_id, model=model,
43 | width=width, height=height,
44 | params=params)
45 | return cameras
46 |
47 | def read_images_txt(images_path):
48 | if not os.path.exists(images_path):
49 | raise Exception(f"No such file : {images_path}")
50 |
51 | with open(images_path, 'r') as f:
52 | lines = f.readlines()
53 |
54 | if len(lines) < 2:
55 | raise Exception(f"Invalid cameras.txt file : {images_path}")
56 |
57 | comments = lines[:4]
58 | contents = lines[4:]
59 |
60 | img_ids = []
61 | img_names = []
62 | t_poses = []
63 | R_poses = []
64 |
65 |
66 | for img_idx, content in enumerate(contents[::2]):
67 | content_items = content.split(' ')
68 | img_id = content_items[0]
69 | q_wxyz = np.array(content_items[1:5], dtype=np.float32) # colmap uses wxyz
70 | t_xyz = np.array(content_items[5:8], dtype=np.float32)
71 | #Transform a quaternion into a rotation matrix following Hamilton convention
72 | R = qvec2rotmat(q_wxyz)
73 | t = -R.T @ t_xyz
74 | R = R.T
75 | img_name = content_items[9]
76 |
77 | img_ids.append(img_id)
78 | img_names.append(img_name)
79 | t_poses.append(t)
80 | R_poses.append(R)
81 |
82 | return img_ids, img_names, t_poses, R_poses
83 |
84 | def plot_cameras_colmap(img_names, R_poses, t_poses):
85 | fig = plt.figure()
86 | ax = fig.add_subplot(111, projection='3d')
87 | for i in range(len(img_names)):
88 | T = np.column_stack((R_poses[i], t_poses[i]))
89 | T = np.vstack((T, (0, 0, 0, 1)))
90 | cam_pos = T[:3, 3]
91 | ax.scatter(cam_pos[0], cam_pos[1], cam_pos[2], c='r', marker='o')
92 | #Add a text in each point of the scatter plot
93 | ax.text(cam_pos[0], cam_pos[1], cam_pos[2], str(i), size=10, zorder=1, color='k')
94 | plt.show()
95 |
96 |
97 |
98 | camera = read_cameras_text('/home/lmur/Documents/Monodepth/sequences/P02_101_colmap/cameras.txt')
99 | cam = camera[1]
100 |
101 | if cam.model in ("PINHOLE", "OPENCV", "OPENCV_FISHEYE", "FULL_OPENCV"):
102 | fx = cam.params[0]
103 | fy = cam.params[1]
104 | cx = cam.params[2]
105 | cy = cam.params[3]
106 |
107 | # intrinsics
108 | K_int = np.identity(3)
109 | K_int[0, 0] = fx
110 | K_int[1, 1] = fy
111 | K_int[0, 2] = cx
112 | K_int[1, 2] = cy
113 | K_inv = np.linalg.inv(K_int)
114 |
115 | img_ids, img_names, t_poses, R_poses = read_images_txt('.../P02_101_part_1/sparse/images.txt')
116 | plot_cameras_colmap(img_names, R_poses, t_poses)
117 |
118 | """
119 | visor = o3d.visualization.Visualizer()
120 | visor.create_window()
121 | for i in range(len(img_names)):
122 | T = np.column_stack((R_poses[i], t_poses[i]))
123 | T = np.vstack((T, (0, 0, 0, 1)))
124 | axis = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5)
125 | axis.transform(T)
126 | visor.add_geometry(axis)
127 | visor.poll_events()
128 | visor.update_renderer()
129 | visor.run()
130 | """
131 |
132 |
133 |
--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pickle
3 | import os
4 | from PIL import Image
5 | from scipy.stats import multivariate_normal
6 | import time
7 | import cv2
8 | from utils.valid_interactions import colormap_interactions
9 | #Create a dataset class to load an image and its corresponding pickle file
10 |
11 | class Ego_Metric_training_dataset():
12 | def __init__(self, Ego_Metric_dataset_path):
13 | self.main_dir = Ego_Metric_dataset_path
14 | self.samples_txt = os.path.join(self.main_dir, 'samples.txt')
15 | self.img_dir = 'selected_plus_guided_rgb'
16 | self.label_2d = '2d_output_labels'
17 | self.label_3d = 'aff_on_3d'
18 | self.valid_verbs = ['take', 'remove', 'put', 'insert', 'throw', 'wash', 'dry', 'open', 'turn-on',
19 | 'close', 'turn-off', 'mix', 'fill', 'add', 'cut', 'peel', 'empty',
20 | 'shake', 'squeeze', 'press', 'cook', 'move', 'adjust', 'eat',
21 | 'drink', 'apply', 'sprinkle', 'fold', 'sort', 'clean', 'slice', 'pick']
22 | self.height = 480
23 | self.width = 854
24 | self.size = 500
25 | self.samples = self.obtain_samples()
26 | self.pos = self.get_pos_for_gaussian()
27 | self.gaussian = self.get_gaussian()
28 | self.colormap_interactions = colormap_interactions
29 |
30 | def obtain_samples(self):
31 | samples = []
32 | for kitchen in os.listdir(self.main_dir):
33 | if kitchen != 'samples.txt':
34 | if not os.path.exists(os.path.join(self.main_dir, kitchen, self.label_2d)):
35 | continue
36 | for sample in os.listdir(os.path.join(self.main_dir, kitchen, self.label_2d)):
37 | sample_id = sample.split('.')[0]
38 | samples.append(kitchen + '/' + sample_id)
39 | return samples
40 |
41 | def __len__(self):
42 | return len(self.samples)
43 |
44 | def get_pos_for_gaussian(self):
45 | x, y = np.mgrid[0:self.width:1, 0:self.height:1]
46 | pos = np.empty(x.shape + (2,))
47 | pos[:, :, 0] = x
48 | pos[:, :, 1] = y
49 | return pos
50 |
51 | def get_gaussian(self):
52 | x, y = np.mgrid[0:self.size:1, 0:self.size:1]
53 | pos = np.empty(x.shape + (2,))
54 | pos[:, :, 0] = x
55 | pos[:, :, 1] = y
56 | gaussian = multivariate_normal(mean=[self.size//2, self.size//2], cov=np.eye(2)*1000)
57 | return gaussian.pdf(pos)
58 |
59 | def get_masks_from_pickle(self, data):
60 | #Cluster the interactions
61 | interaction_clusters = []
62 | interaction_coordinates = {}
63 | verbs_data = data['verbs']
64 | points_data = data['points']
65 | for i in range(len(verbs_data)): #Before good_interactions
66 | if verbs_data[i] not in interaction_clusters:
67 | interaction_clusters.append(verbs_data[i])
68 | for i in range(len(interaction_clusters)):
69 | interaction_coordinates[interaction_clusters[i]] = []
70 | for i in range(len(verbs_data)):
71 | interaction_coordinates[verbs_data[i]].append(points_data[i])
72 |
73 | #Draw the hotspots of the clusters
74 | c = 0
75 | masks = np.zeros((len(self.valid_verbs), self.height, self.width))
76 | for verb_class in self.valid_verbs:
77 | if verb_class in interaction_coordinates.keys():
78 | prob_sum = np.zeros((self.width, self.height))
79 | for j in range(len(interaction_coordinates[verb_class])):
80 | point = interaction_coordinates[verb_class][j][0:2].astype(int)
81 | prob = np.zeros((self.width, self.height))
82 |
83 | if (self.width - point[0]) > self.size // 2:
84 | gauss_right = self.size
85 | prob_right = point[0] + self.size // 2
86 | else:
87 | gauss_right = self.width - point[0] + self.size // 2
88 | prob_right = self.width
89 | if point[0] > self.size // 2:
90 | gauss_left = 0
91 | prob_left = point[0] - self.size // 2
92 | else:
93 | gauss_left = self.size // 2 - point[0]
94 | prob_left = 0
95 | if (self.height - point[1]) > self.size // 2:
96 | gauss_bottom = self.size
97 | prob_bottom = point[1] + self.size // 2
98 | else:
99 | gauss_bottom = self.height - point[1] + self.size // 2
100 | prob_bottom = self.height
101 | if point[1] > self.size // 2:
102 | gauss_top = 0
103 | prob_top = point[1] - self.size // 2
104 | else:
105 | gauss_top = self.size // 2 - point[1]
106 | prob_top = 0
107 | prob[int(prob_left):int(prob_right),int(prob_top):int(prob_bottom)] = self.gaussian[int(gauss_left):int(gauss_right),int(gauss_top):int(gauss_bottom)]
108 | prob_sum += prob
109 |
110 | prob_sum = (prob_sum / np.max(prob_sum)).T
111 | prob_sum[prob_sum < 0.25] = 0 #If prob_sum < 0.5, set it to 0
112 | prob_sum[prob_sum >= 0.25] = 1 #If prob_sum >= 0.5, set it to 1
113 | masks[c, :, :] = prob_sum
114 | c += 1
115 | return masks
116 |
117 | def visualize(self, img, masks, selected_verb):
118 | img_copy = img.copy()
119 | selected_verb_idx = self.valid_verbs.index(selected_verb)
120 | selected_mask = masks[selected_verb_idx, :, :]
121 | selected_mask_2 = selected_mask[:, :, np.newaxis].astype(np.uint8)
122 | color = np.array(self.colormap_interactions[selected_verb]).reshape(1, 3)
123 | prob_paint = (selected_mask_2 @ color).astype(np.uint8)
124 | img_copy = cv2.addWeighted(img_copy, 1.0, prob_paint, 1.0, 0)
125 | cv2.imwrite(os.path.join('/home/lmur/Desktop/EGO_METRIC_Dataset_v3/Kitchens/P04_EPIC_55/show/img.png'), img_copy)
126 |
127 |
128 | def __getitem__(self, idx):
129 | kitchen, sample_id = self.samples[idx].split('/')
130 | #Load the image
131 | img_path = os.path.join(self.main_dir, kitchen, self.img_dir, sample_id + '.jpg')
132 | img = cv2.imread(img_path)
133 | #Load the labels
134 | label_2d_path = os.path.join(self.main_dir, kitchen, self.label_2d, sample_id + '.pkl')
135 | with open(label_2d_path, 'rb') as f:
136 | data_2d = pickle.load(f)
137 | masks = self.get_masks_from_pickle(data_2d)
138 | return img, masks
139 |
140 | data = Ego_Metric_training_dataset('...')
141 | img, masks = data[15]
142 | #data.visualize(img, masks, 'cut')
143 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Epic-Aff Dataset
2 |
3 | This is the dataset introduced on the ICCV 2023 conference paper **Multi-label affordance mapping from egocentric vision** 🎉🎉, by Lorenzo Mur-Labadia, Ruben Martinez-Cantin and Josechu Guerrero Campo from the University of Zaragoza.
4 | Please, do not hesitate to ask any question on the following mail *lmur@unizar.es* ✉️
5 |
6 | ## Dataset creation: automatic annotations
7 |
8 | The EPIC-Aff dataset is a new dataset build on the Epic Kitchens 100 and Epic Kitchens VISOR, containing **automatic annotations with multi-label segmentation masks for the interaction hotspots**, generated by the intersection of both datasets. We provide **38,335** images in two different versions of the dataset (easy-EPIC Aff with 20 classes and complex-EPIC Aff with 50 classes). The annotations represent the hotspots in the space with an affordable action, extracted from the past interactions performed on that region and the actual scene context (present objects). Please, refer to the paper for more information
9 |
10 | The total size of the dataset is 15 GB, which we have divided in the different data type. We also provide a example sequence on the PO3_EPIC_100_Example. The full dataset can be downloaded [here](https://zenodo.org/record/8162678)
11 |
12 | -**Images** 📸 : we already provide the images extracted from the videos of EPIC-100 Kitchens in 480x854 of resolution. This avoids download the approximate 700 GB of that dense dataset. [link](https://zenodo.org/record/8162678/files/EPIC_Aff_images.zip?download=1)
13 |
14 | -**Annotations in 3D** 📝 : in a pickle format, we provide a dictionary with the Colmap data (camera pose, camera intrinsics and keypoints), the distribution of the interacting objects, the annotation of the interaction and the distribution of the neutral objects. We encourage to the research community to use this data to develop new tasks like goal path planning. [link](https://zenodo.org/record/8162678/files/EPIC_Aff_3D_output.zip?download=1)
15 |
16 | -**Affordance annotations in the 2D** 📝: we already run the project_from_3D_to_2D.py for all the sequences in order to provide a pickle dictionary with the location of the interaction points for the afforded-actions. We provide two versions of the dataset:
17 | - Easy EPIC-Aff (20 classes): [link](https://zenodo.org/record/8162678/files/EPIC_Aff_20_classes_2d_output_labels.zip?download=1)
18 | - Complex EPIC-Aff (50 classes): [link](https://zenodo.org/record/8162678/files/EPIC_Aff_50_classes_2d_output_labels.zip?download=1)
19 |
20 | -**VISORs masks** 🎭: the semantic mask wit the active objets, which we consider dynamic. In order to obtain the dynamic masks for COLMAP, we select the dynamic and static objects. [link](https://zenodo.org/record/8162678/files/EPIC_Aff_masks_from_VISOR.zip?download=1)
21 |
22 | We detail the procedure for extracting multi-label affordance regions.
23 |
24 | ### 1. Detect the spatial localization of the interaction
25 |
26 | On one hand, we use the narration annotations of the Epic Kitchens 100 to obtain the semantics of the interaction (e.g "cut onion"). Then, we use the masks provided by EPIC VISOR to discover the location of that interaction, placed in the center of the intersection between the respective hand/glove and the interacting object. This provides an understanding about where the interaction occurs at that time step.
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 | ### 2. Leverage all to the 3D
40 |
41 | In a second stage, using Structure from Motion algorithms (COLMAP), we get the camera pose and the global localization of the interaction in the 3D space. This creates a historical distribution of all the taken actions in that environment, cross-linking along different episodes. In the following images, we show in blue the different camera poses, in grey the Colmap keypoints and the different locations where the interactions occur. For each specific physical kitchen, we accumulated all the EPIC videos where the agent interacted. Note that for some sequences, the EPIC-50 and EPIC-100 was different, while in other it was the same environment.
42 |
43 |
44 |
45 |
46 |
47 |
48 | This created a 3D representation with all the past interactions performed in that environment.
49 |
50 | ### 3. Reproject the 3D to the 2D to obtain the affordances.
51 |
52 | Using the camera intrinsic matrix and the camera pose provided in the "3D_output" directories, we reproject all the past interactions by running *"project_from_3D_to_2D.py"*. Since the affordances are all the possible actions for the agent depending on the context, we filter the past interactions by the current distribution of the objects in each time-step. For that, we use the VISOR annotations for the active objets and we assume a constant distribution of passive objcts (cupboard, oven, hob, fridge) since its distribution did not change with time. For example, although the VISOR annotation does not detect any "active cupboard", if we have opened a cupboard in the past in that location, it means that there is a cupboard innactive. Therefore, we should detect that past interaction as a affordance, since it is a possible action associated to that 3D region.
53 |
54 | We show some images of different affordances. Each point represents the location of a past interaction whose interacting objects are present.
55 |
56 |
57 |
58 |
59 |
60 |
61 | Finally, we apply a Gaussian heatmap for each afforded actions in order to create a potential interaction region. We show respectively: takeable, insertable, cuttable and driable. Note that in inference, we assume a possitive affordance label when gaussian heat map is greater than 0.25.
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 | *Note*: the files in the *2D_output_labels* directories only contain the pixel points with the affordances and its semantic labels. When you run data.py, in the dataloader we incorporate a function to obtain the Gaussian heatmaps in an efficient way. This avoids to load the *N* masks.
71 |
72 | ## Dataset pipeline
73 | We also share the code for the dataset pipeline extraction, and we encourage the research community to apply in other scenarios.
74 |
--------------------------------------------------------------------------------
/inference_v2.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import torch
4 | import data_egom
5 | import glob
6 | import matplotlib.pyplot as plt
7 | import open3d as o3d
8 | import pickle
9 | import cv2
10 | import time
11 |
12 |
13 | class Inference:
14 | def __init__(self):
15 | self.height = 480
16 | self.width = 854
17 | self.frame_idxs = [0]
18 | self.data_path = '...'
19 | self.kitchen = 'P03_EPIC_100'
20 |
21 | self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
22 | self.dataset = data_egom.VideoSequentialDataset(self.data_path, self.kitchen, self.height, self.width, self.frame_idxs)
23 | self.palette = self.dataset.colors
24 | self.output_dir = os.path.join(self.data_path, self.kitchen, '3D_output')
25 | if not os.path.exists(self.output_dir):
26 | os.mkdir(self.output_dir)
27 | self.output_dir_2d = os.path.join(self.data_path, self.kitchen, 'aff_on_2d')
28 | if not os.path.exists(self.output_dir_2d):
29 | os.mkdir(self.output_dir_2d)
30 |
31 | self.alpha = 0.6
32 | self.depth_model_type = "DPT_Hybrid" #"DPT_Large"
33 | self.depth_model = torch.hub.load("intel-isl/MiDaS", self.depth_model_type)
34 | self.depth_model.to(self.device)
35 | self.depth_model.eval()
36 | self.depth_transforms = torch.hub.load("intel-isl/MiDaS", "transforms").dpt_transform
37 |
38 |
39 | def depth_extractor(self, img, filename):
40 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
41 | input_batch = self.depth_transforms(img).to(self.device)
42 | with torch.no_grad():
43 | prediction = self.depth_model(input_batch)
44 | prediction = torch.nn.functional.interpolate(
45 | prediction.unsqueeze(1),
46 | size=img.shape[:2],
47 | mode="bicubic",
48 | align_corners=False,).squeeze()
49 | disparity = prediction.cpu().numpy()
50 | depth = 1 / disparity
51 | return depth
52 |
53 | def paint_affordance_hotpots(self, points, label): #SIMPLIFY THIS FUNCTION
54 | mask = np.zeros((self.height, self.width))
55 | aff = label
56 | aff_center = np.array(aff['affordance_center'])
57 | cv2.circle(mask, (int(aff_center[0]), int(aff_center[1])), 0, 1, -1)
58 | mask = mask.astype(bool)
59 | points = points[mask]
60 |
61 | painting_color = np.array(self.palette[label['verb_id']])
62 | img = (np.ones((self.height, self.width, 3)) * painting_color)[mask]
63 | return points, img
64 |
65 | def obtain_rgbd(self, depth, scale):
66 | z = depth * scale
67 | x = (np.tile(np.arange(self.width), (self.height, 1)) - self.dataset.cx) * z / self.dataset.fx
68 | y = (np.tile(np.arange(self.height), (self.width, 1)).T - self.dataset.cy) * z / self.dataset.fy
69 | points = np.stack([x, y, z], axis=2) #h, w, 3
70 | return points
71 |
72 | def new_scale_SfM_depth(self, depth, colmap_depths, colmap_coords):
73 | SfM_depth, NN_depth = [], []
74 | for kypt in range(len(colmap_coords)):
75 | SfM_depth.append(colmap_depths[kypt]) #Interpretation 1 of the depth: La distancia entre el plano de la camara y el plano paralelo que corta el punto en 3D
76 | # Change order in coords, from XY to YX!!!
77 | u_interp = colmap_coords[kypt, 1] % 1
78 | v_interp = colmap_coords[kypt, 0] % 1
79 | u = int(colmap_coords[kypt, 1])
80 | v = int(colmap_coords[kypt, 0])
81 | if u < self.width - 1 and v < self.height - 1:
82 | interpolated_NN_depth = (1 - u_interp) * (1 - v_interp) * depth[v, u] + u_interp * (1 - v_interp) * depth[v, u + 1] + (1 - u_interp) * v_interp * depth[v + 1, u] + u_interp * v_interp * depth[v + 1, u + 1]
83 | NN_depth.append(interpolated_NN_depth)
84 | if u > self.width:
85 | print('alerta 1 !!!', u)
86 | if v > self.height:
87 | print('alerta 2 !!!', v)
88 | local_scale = np.median(np.array(SfM_depth)) / np.median(np.array(NN_depth))
89 | return local_scale
90 |
91 | def image_to_extrinsics(self, img):
92 | Rc, tc = img.qvec2rotmat(), img.tvec
93 | t = -Rc.T @ tc
94 | R = Rc.T
95 | return R, t
96 |
97 | def paint_aff_on_2D(self, img, frame_dict, keypoints):
98 | label = frame_dict["aff_annotation"]
99 | img_name = frame_dict['filename']
100 | ep100_label = frame_dict['EP100_annotation']
101 | visors_objects = frame_dict['VISOR_annotation']
102 | font = cv2.FONT_HERSHEY_PLAIN
103 | c = 0
104 | if label is not None:
105 | for h in range(len(label['hands'])):
106 | hand_bbox = label['hands'][h]['hand_bbox']
107 | cv2.rectangle(img, (hand_bbox[0], hand_bbox[1]), (hand_bbox[2], hand_bbox[3]), color=(0, 255, 0), thickness=2)
108 | for o in range(len(label['neutral_objects'])):
109 | obj_bbox = label['neutral_objects'][o]['noun_bbox']
110 | cv2.rectangle(img, (obj_bbox[0], obj_bbox[1]), (obj_bbox[2], obj_bbox[3]), color=(0, 255, 255), thickness=2)
111 | for aff_o in range(len(label['interacting_objects'])):
112 | obj_bbox = label['interacting_objects'][aff_o]['noun_bbox']
113 | cv2.rectangle(img, (obj_bbox[0], obj_bbox[1]), (obj_bbox[2], obj_bbox[3]), color=(255, 0, 0), thickness=5)
114 | x_center, y_center = label['interacting_objects'][aff_o]['affordance_center']
115 | cv2.circle(img, (int(x_center), int(y_center)), radius=10, color=(255, 0, 0), thickness=15)
116 | text = 'The ' + label['interacting_objects'][aff_o]['hand'] + ' is ' + label['interacting_objects'][aff_o]['verb'] + ' the ' + label['interacting_objects'][aff_o]['noun']
117 | cv2.putText(img, text, (10, 30 * (c + 1)), font, 1.5, (0, 255, 0), 2, cv2.LINE_AA)
118 | c += 1
119 | if keypoints is not None:
120 | for kp in range(len(keypoints)):
121 | cv2.circle(img, (int(keypoints[kp, 1]), int(keypoints[kp, 0])), radius=1, color=(255, 255, 255), thickness=1)
122 | if ep100_label is not None:
123 | for i in range(len(ep100_label)):
124 | text = 'EP100 origi is: ' + ep100_label[i]['non_remapped_noun'] + ' remapped: ' + ep100_label[i]['noun'] + ' The verb ' + ep100_label[i]['verb']
125 | cv2.putText(img, text, (10, 100 + i*20), font, 1.5, (255,0,0), 2, cv2.LINE_AA)
126 | for i in range(len(visors_objects)):
127 | cv2.putText(img, 'Object given by VISOR ' + visors_objects[i], (10, 110 + 30 * (i + 1)), font, 1.5, (0,0,255), 2, cv2.LINE_AA)
128 | cv2.imwrite(os.path.join(self.output_dir_2d, img_name), img)
129 |
130 |
131 | def run(self):
132 | all_abs_depth, all_abs_colors, cameras, all_keypoints, all_rgb_keypoints = [], [], [], [], []
133 | global_counter = 0
134 | for i in range(len(self.dataset)):
135 | output_all = {}
136 | frame_dict = self.dataset[i]
137 |
138 | print('---------We are analying the frame', i, '--------- corresponding to the image', frame_dict['filename'], '---------')
139 | try:
140 | v = next(v for v in self.dataset.imgs_Colmap.values() if v.name == frame_dict['filename'])
141 | except:
142 | if frame_dict['exists_affordance']:
143 | global_counter += 1
144 | print('We lost the camera pose for this image')
145 | continue
146 |
147 | R, t = self.image_to_extrinsics(v) #Location of camera with respect to the world
148 | cameras.append(t)
149 | label_t = frame_dict["aff_annotation"]
150 | colmap_coords = None
151 | output_all['EGOMETRIC_label'] = {'affordance_labels': []}
152 | if frame_dict['exists_affordance']:
153 | colmap_depths = np.array([(v.qvec2rotmat() @ self.dataset.pts_Colmap[p3d].xyz + v.tvec)[2] for p3d in v.point3D_ids[v.point3D_ids > -1]]) #WE PASS TO CAMERA COORDINATES
154 | colmap_coords = np.array([v.xys[np.where(v.point3D_ids == p3d)][0, ::-1] for p3d in v.point3D_ids[v.point3D_ids > -1]]) #Depth of the keypoints in the camera coordinates
155 | colmap_keypoints = np.array([self.dataset.pts_Colmap[p3d].xyz for p3d in v.point3D_ids[v.point3D_ids > -1]]) #Absolute coordinates
156 | colmap_rgb = np.array([self.dataset.pts_Colmap[p3d].rgb for p3d in v.point3D_ids[v.point3D_ids > -1]]) #Absolute coordinates
157 | colmap_rgb = self.alpha * colmap_rgb + (1 - self.alpha) * 255
158 | all_keypoints.append(colmap_keypoints)
159 | all_rgb_keypoints.append(colmap_rgb)
160 | depth = self.depth_extractor(frame_dict[('color', 0)], frame_dict['filename']) #Depth map in image coordinates (Relative!!)
161 | local_scale = self.new_scale_SfM_depth(depth, colmap_depths, colmap_coords)
162 | rescaled_rgbd = self.obtain_rgbd(depth, local_scale)
163 |
164 | for aff in range(len(label_t['interacting_objects'])):
165 | rel_points, rel_colors = self.paint_affordance_hotpots(rescaled_rgbd, label_t['interacting_objects'][aff])
166 | abs_points = np.dot(R, rel_points.reshape(-1, 3).T).T + t
167 | abs_colors = np.reshape(rel_colors, (-1, 3))
168 | #abs_points = np.concatenate((abs_points, abs_points + np.random.randn(20, 3) * 0.1), axis=0)
169 | #abs_colors = np.concatenate((abs_colors, abs_colors + np.random.randn(20, 3) * 0), axis=0)
170 | all_abs_depth.append(abs_points)
171 | all_abs_colors.append(abs_colors)
172 | dict_aff = {'3D_aff_points': abs_points,
173 | '3D_aff_colors': abs_colors,
174 | 'aff_noun': label_t['interacting_objects'][aff]['noun'],
175 | 'aff_noun_id': label_t['interacting_objects'][aff]['noun_id'],
176 | 'aff_verb': label_t['interacting_objects'][aff]['verb'],
177 | 'aff_verb_id': label_t['interacting_objects'][aff]['verb_id']}
178 | output_all['EGOMETRIC_label']['affordance_labels'].append(dict_aff)
179 | #output_all['EGOMETRIC_label'][aff]['aff_' + str(aff)] = abs_points
180 | #output_all['EGOMETRIC_label'][aff]['aff_rgb_' + str(aff)] = abs_colors
181 | #output_all['EGOMETRIC_label'][aff]['aff_noun_' + str(aff)] = label_t['interacting_objects'][aff]['noun']
182 | #output_all['EGOMETRIC_label'][aff]['aff_noun_id_' + str(aff)] = label_t['interacting_objects'][aff]['noun_id']
183 | #output_all['EGOMETRIC_label'][aff]['aff_verb_' + str(aff)] = label_t['interacting_objects'][aff]['verb']
184 | #output_all['EGOMETRIC_label'][aff]['aff_verb_id_' + str(aff)] = label_t['interacting_objects'][aff]['verb_id']
185 | #self.paint_aff_on_2D(frame_dict[('color', 0)], frame_dict, colmap_coords)
186 | output_all['colmap'] = {}
187 | output_all['colmap']['keypoints_3D'] = colmap_keypoints
188 | output_all['colmap']['keypoints_rgb'] = colmap_rgb
189 | output_all['colmap']['keypoints_2D'] = colmap_coords
190 | output_all['colmap']['R_pos'] = R #Rotation matrix of the camera
191 | output_all['colmap']['t_pos'] = t #Translation vector of the camera
192 | output_all['VISOR'] = {}
193 | if label_t is not None:
194 | output_all['VISOR']['neutral_objects'] = label_t['neutral_objects']
195 | output_all['VISOR']['hands'] = label_t['hands']
196 | output_all['VISOR']['interacting_objects'] = label_t['interacting_objects']
197 | output_all['EPIC_100'] = frame_dict['EP100_annotation']
198 | output_all['filename'] = frame_dict['filename'] #Name of the image
199 | output_all['sequence'] = frame_dict['sequence'] #Name of the sequence
200 |
201 | #Save the output_all in a json file
202 | output_filename = os.path.join(self.output_dir, 'affordances_' + output_all['filename'].split('.')[0] +'.pkl')
203 | #With pickle
204 | with open(output_filename, 'wb') as f:
205 | pickle.dump(output_all, f)
206 |
207 |
208 | #Plot the camera pose and the sparse point cloud with Matplotlib
209 | cameras = np.array(cameras)
210 | keypoints = np.concatenate(all_keypoints, axis=0)
211 | abs_depth = np.concatenate(all_abs_depth, axis=0)
212 | abs_colors = np.concatenate(all_abs_colors, axis=0)
213 | rgb_keypoints = np.concatenate(all_rgb_keypoints, axis=0)
214 | print(keypoints.shape, abs_depth.shape, abs_colors.shape, rgb_keypoints.shape)
215 |
216 | #fig = plt.figure()
217 | #ax = fig.add_subplot(projection='3d')
218 | #ax.scatter(keypoints[:, 0], keypoints[:, 1], keypoints[:, 2], c='r')
219 | #ax.scatter(cameras[:, 0], cameras[:, 1], cameras[:, 2], c='b')
220 | #ax.scatter(origin[0], origin[1], origin[2], c='k')
221 | #plt.show()
222 |
223 | #Plot the camera pose and the sparse point cloud with Open3D
224 | pcd_plot = o3d.geometry.PointCloud()
225 | #Draw these points bigger
226 | pcd_plot.points = o3d.utility.Vector3dVector(abs_depth)
227 | pcd_plot.colors = o3d.utility.Vector3dVector(abs_colors / 255.0)
228 | cameras_plot = o3d.geometry.PointCloud()
229 | cameras_plot.points = o3d.utility.Vector3dVector(cameras)
230 | cameras_plot.colors = o3d.utility.Vector3dVector(np.array([[0, 0, 1] for i in range(cameras.shape[0])]))
231 | keypoints_plot = o3d.geometry.PointCloud()
232 | keypoints_plot.points = o3d.utility.Vector3dVector(keypoints)
233 | keypoints_plot.colors = o3d.utility.Vector3dVector(rgb_keypoints / 255.0)
234 | o3d.visualization.draw_geometries([cameras_plot, pcd_plot, keypoints_plot], height = 800, width = 1200)
235 |
236 |
237 |
238 | inf = Inference()
239 | data = inf.run()
240 |
241 |
242 |
243 |
244 |
--------------------------------------------------------------------------------
/utils_read_annotations.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import numpy as np
4 | import ijson
5 | import json
6 | import matplotlib.pyplot as plt
7 | import pandas as pd
8 | import time
9 |
10 |
11 | class EP100_and_VISOR_annotations():
12 | def __init__(self, VISOR_path, img_dir, kitchen):
13 | #Visor dataset
14 | self.VISOR_json_dir_dense = os.path.join(VISOR_path, 'Interpolations-DenseAnnotations', 'train')
15 | self.VISOR_json_dir_sparse = os.path.join(VISOR_path, 'GroundTruth-SparseAnnotations', 'annotations', 'train')
16 | self.kitchen = kitchen.split('_')[0]
17 |
18 | self.all_dense_VISOR_jsons = {}
19 | self.all_sparse_VISOR_jsons = {}
20 | for root, dirs, files in os.walk(self.VISOR_json_dir_dense):
21 | for file in files:
22 | kitchen_name = file.split('_')[0]
23 | sequence_name = file.split('_')[1]
24 | if kitchen_name == self.kitchen and file.endswith('.json'):
25 | dense_file = os.path.join(self.VISOR_json_dir_dense, file)
26 | sparse_file = os.path.join(self.VISOR_json_dir_sparse, file)[:-20] + '.json'
27 | self.all_sparse_VISOR_jsons[kitchen_name + '_' + sequence_name] = sparse_file
28 | self.all_dense_VISOR_jsons[kitchen_name + '_' + sequence_name] = dense_file
29 | print(self.all_sparse_VISOR_jsons)
30 | self.hands = ['left hand', 'hand:left', 'right hand', 'hand:right']
31 |
32 | #Read the EPIC-Kitchen 100 narration
33 | self.EPIC_100_pkl = os.path.join(VISOR_path, 'EPIC_100_train.pkl')
34 | self.EPIC_100_narration = pd.read_pickle(self.EPIC_100_pkl)
35 |
36 | #Dictionary to remap the VISOR and EPIC-100 classes
37 | self.EPIC_100_nouns = os.path.join(VISOR_path, 'EPIC_100_noun_classes_v2.csv')
38 | self.EPIC_100_nouns = pd.read_csv(self.EPIC_100_nouns)
39 |
40 | #Directory with the sampled images, which we have their colmap poses
41 | self.img_dir = img_dir
42 |
43 | def affordance_hotspot(self, img_name, subset, sequence):
44 | #Output dictionary with the bounding boxes of the interacting hands and objects
45 | output = {'neutral_objects': [], 'interacting_objects': [], 'hands': []}
46 | VISOR_active_objects_list = []
47 | frame_id = int(img_name.split('.')[0].split('_')[-1])
48 | EP100_narration_list = self.read_EPIC_100_annot(frame_id, sequence)
49 | if EP100_narration_list is not None: #If there is a narration for the frame
50 | print('que hacen leer', sequence, img_name)
51 | VISOR_active_objects, divisor = self.read_VISOR_annot(img_name, subset, sequence) #Read the VISOR annotations
52 | if VISOR_active_objects is not None: #If there is a VISOR annotation for the frame
53 | for narration in range(len(EP100_narration_list)): #We can have multiple narrations for the same frame
54 | EP100_narration = EP100_narration_list[narration] #Read the EPIC-100 narration
55 | for e_idx, entity in enumerate(VISOR_active_objects): #Read the VISOR annotations
56 | VISOR_active_objects_list.append(entity['name']) #To show later the active objects in the image
57 | if entity['name'] in self.hands:
58 | hand_bbox = self.get_bbox_from_segment(entity['segments']) #Add the bounding box of the hand
59 | output['hands'].append({'hand': entity['name'], 'hand_bbox': tuple([int(item / divisor) for item in hand_bbox])})
60 | for e_idx2, entity_2 in enumerate(VISOR_active_objects): #VISOR annotations are 'name', but when we remapp them we call 'noun', as well as with EP100
61 | entity_2_name = self.remap_VISOR_annot(entity_2)['noun']
62 | if entity_2_name in self.hands:
63 | continue
64 | elif entity_2_name in EP100_narration['noun']:
65 | obj_bbox = self.get_bbox_from_segment(entity_2['segments'])
66 | cond_aff_intersect, aff_bbox = self.get_intersection_bbox(hand_bbox, obj_bbox)
67 | if cond_aff_intersect:
68 | x_center, y_center = self.get_bbox_center(aff_bbox)
69 | output['interacting_objects'].append({'hand': entity['name'],
70 | 'verb': EP100_narration['verb'],
71 | 'verb_id': EP100_narration['verb_id'],
72 | 'noun': entity_2_name,
73 | 'noun_id': EP100_narration['noun_id'],
74 | 'noun_bbox': tuple([int(item / divisor) for item in obj_bbox]),
75 | 'hand_bbox': tuple([int(item / divisor) for item in hand_bbox]),
76 | 'affordance_bbox': tuple([int(item / divisor) for item in aff_bbox]),
77 | 'affordance_center': (int(x_center / divisor), int(y_center / divisor))})
78 | print('There is an interaction!!!:))')
79 | else:
80 | output['neutral_objects'].append({'noun': entity_2_name, 'noun_bbox': tuple([int(item / divisor) for item in obj_bbox])})
81 | else:
82 | output['neutral_objects'].append({'noun': entity_2_name, 'noun_bbox': tuple([int(item / divisor) for item in self.get_bbox_from_segment(entity_2['segments'])])})
83 | #Check that if there is not any interacting objects, we add the interaction in the center of the hand bounding box
84 | #if len(output['interacting_objects']) == 0:
85 |
86 | else:
87 | output = None
88 | return output, EP100_narration_list, VISOR_active_objects_list
89 |
90 | def affordance_hotspot_visual(self, img_name):
91 | #Output dictionary with the bounding boxes of the interacting hands and objects
92 | output = {'neutral_objects': [], 'interacting_objects': [], 'hands': []}
93 | self.img_path = os.path.join(self.img_dir, img_name + '.jpg')
94 | frame_id = int(img_name.split('_')[-1])
95 | VISOR_active_objects = self.read_VISOR_annot()
96 | EP100_narration_list = self.read_EPIC_100_annot(frame_id, sequence)
97 | self.img_show = cv2.imread(self.img_path)
98 | for narration in EP100_narration_list:
99 | EP100_narration = EP100_narration_list[narration]
100 | for e_idx, entity in enumerate(VISOR_active_objects):
101 | if entity['noun'] in self.hands:
102 | hand_bbox = self.get_bbox_from_segment(entity['segments'])
103 | output['hands'].append({'hand': entity['noun'], 'hand_bbox': hand_bbox})
104 | cv2.rectangle(self.img_show, (hand_bbox[0], hand_bbox[1]), (hand_bbox[2], hand_bbox[3]), color=(0, 0, 255), thickness=10)
105 | for e_idx2, entity_2 in enumerate(VISOR_active_objects):
106 | entity_2_name = self.remap_VISOR_annot(entity_2)['noun']
107 | if entity_2_name in self.hands:
108 | continue
109 | elif entity_2_name in EP100_narration['noun']:
110 | obj_bbox = self.get_bbox_from_segment(entity_2['segments'])
111 | cv2.rectangle(self.img_show, (obj_bbox[0], obj_bbox[1]), (obj_bbox[2], obj_bbox[3]), color=(0, 255, 0), thickness=10)
112 | if self.get_intersection_bbox(hand_bbox, obj_bbox)[0]:
113 | aff_bbox = self.get_intersection_bbox(hand_bbox, obj_bbox)[1]
114 | x_center, y_center = self.get_bbox_center(aff_bbox)
115 | output['interacting_objects'].append({'hand': entity['noun'],'verb': EP100_narration['verb'],'object': entity_2_name, 'object_bbox': obj_bbox, 'hand_bbox': hand_bbox,'affordance_bbox': aff_bbox, 'affordance_center': (x_center, y_center)})
116 | cv2.circle(self.img_show, (int(x_center), int(y_center)), radius=10, color=(255, 0, 0), thickness=15)
117 | else:
118 | output['neutral_objects'].append({'object': entity_2_name, 'object_bbox': obj_bbox})
119 | else:
120 | obj_bbox = self.get_bbox_from_segment(entity_2['segments'])
121 | output['neutral_objects'].append({'object': entity_2_name, 'object_bbox': obj_bbox})
122 | cv2.rectangle(self.img_show, (obj_bbox[0], obj_bbox[1]), (obj_bbox[2], obj_bbox[3]), color=(0, 255, 255), thickness=2)
123 | cv2.imwrite('.../affordance_hotspot3.jpg', self.img_show)
124 | sampled_mask = os.path.join('.../dense_masks', self.sequence, img_name + '.png')
125 | sampled_mask = cv2.imread(sampled_mask)
126 | cv2.imwrite('.../active_object_masks3.jpg', sampled_mask)
127 | return output
128 |
129 |
130 | def read_VISOR_annot(self, img_name, subset, sequence):
131 | print('PROBAMOS CON EL SPARSE')
132 | VISOR_filename = self.all_sparse_VISOR_jsons[sequence]
133 | the_annotation = None
134 | with open(VISOR_filename, 'r') as f:
135 | VISOR_annot = ijson.items(f, 'video_annotations.item')
136 | for entity in VISOR_annot:
137 | if entity['image']['name'].split('.')[0] == img_name.split('.')[0]:
138 | the_annotation = entity['annotations']
139 | divisor = 2.25
140 | break
141 | if the_annotation is None:
142 | print('PROBANMOS CON EL DENSE')
143 | VISOR_filename = self.all_dense_VISOR_jsons[sequence]
144 | with open(VISOR_filename, 'r') as f:
145 | VISOR_annot = ijson.items(f, 'video_annotations.item')
146 | for entity in VISOR_annot:
147 | if entity['image']['name'].split('.')[0] == img_name.split('.')[0]:
148 | the_annotation = entity['annotations']
149 | divisor = 1
150 | break
151 | return the_annotation, divisor
152 |
153 | def remap_VISOR_annot(self, visor_annot):
154 | visor_noun_class = visor_annot['class_id']
155 | remapped = self.EPIC_100_nouns[self.EPIC_100_nouns['id'] == visor_noun_class]
156 | full_visor_annot = {'noun_id': remapped['id'].values[0],
157 | 'noun': remapped['key'].values[0],
158 | 'category': remapped['category'].values[0],
159 | 'non_remapped_noun': visor_annot['name']}
160 | return full_visor_annot
161 |
162 | def read_EPIC_100_annot(self, frame_id, sequence):
163 | df = self.EPIC_100_narration
164 | df = df[df['video_id'] == sequence]
165 | df = df.reset_index(drop=True)
166 | EP_100_narration = df[(df['start_frame'] <= frame_id) & (df['stop_frame'] >= frame_id)]
167 | if len(EP_100_narration) == 0:
168 | return None
169 | list_annotations = []
170 | for i in range(len(EP_100_narration)):
171 | EP_100_narration_noun = EP_100_narration['noun_class'].values[i]
172 | remapped = self.EPIC_100_nouns[self.EPIC_100_nouns['id'] == EP_100_narration_noun]
173 | narration_annot = {'noun_id': remapped['id'].values[0],
174 | 'noun': remapped['key'].values[0],
175 | 'category': remapped['category'].values[0],
176 | 'non_remapped_noun': EP_100_narration['noun'].values[i],
177 | 'verb': EP_100_narration['verb'].values[i],
178 | 'verb_id': EP_100_narration['verb_class'].values[i]}
179 | list_annotations.append(narration_annot)
180 | return list_annotations
181 |
182 | def get_bbox_from_segment(self, annot):
183 | mask_clean = []
184 | for mask in annot:
185 | if len(mask) == 0: continue
186 | mask = np.array(mask, dtype=np.int32)
187 | mask_clean.append(mask)
188 | bbox = self.get_bbox(mask_clean)
189 | x1, y1, x2, y2 = bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]
190 | return x1, y1, x2, y2
191 |
192 | def get_bbox(self, masks):
193 | '''
194 | Get bbox for object masks (1 object may have 1> components). Returns:
195 | bbox: [x, y, height, width]
196 | '''
197 | g_xmin, g_ymin, g_xmax, g_ymax = 10000, 10000, 0, 0
198 | for mask in masks:
199 | if len(mask) == 0: continue
200 | mask = np.array(mask)
201 | xmin, xmax = np.min(mask[:,0]), np.max(mask[:,0])
202 | ymin, ymax = np.min(mask[:,1]), np.max(mask[:,1])
203 |
204 | g_xmin = min(g_xmin, xmin)
205 | g_xmax = max(g_xmax, xmax)
206 | g_ymin = min(g_ymin, ymin)
207 | g_ymax = max(g_ymax, ymax)
208 |
209 | bbox = [int(g_xmin), int(g_ymin), int(g_xmax - g_xmin), int(g_ymax - g_ymin)]
210 | return bbox
211 |
212 | def get_intersection_bbox(self, hand_bbox, obj_bbox):
213 | x1, y1, x2, y2 = hand_bbox
214 | x3, y3, x4, y4 = obj_bbox
215 | x_left = max(x1, x3)
216 | y_top = max(y1, y3)
217 | x_right = min(x2, x4)
218 | y_bottom = min(y2, y4)
219 | intersection_bbox = [x_left, y_top, x_right, y_bottom]
220 | if x_right < x_left or y_bottom < y_top:
221 | return False, None
222 | else:
223 | return True, intersection_bbox
224 |
225 | def get_bbox_center(self, bbox):
226 | x1, y1, x2, y2 = bbox #Get the center of the affordance hotspot
227 | x_center = x1 + (x2 - x1)/2
228 | y_center = y1 + (y2 - y1)/2
229 | return x_center, y_center
230 |
--------------------------------------------------------------------------------
/project_from_3D_to_2D.py:
--------------------------------------------------------------------------------
1 | # Description: This script is used to project 3D points to 2D image plane
2 |
3 | import numpy as np
4 | import os
5 | import cv2
6 | import pickle
7 | from read_write_model import read_model
8 | import pandas as pd
9 | import torch
10 | from scipy.stats import multivariate_normal
11 | import time
12 | from utils.valid_interactions import valid_interactions, colormap_interactions
13 |
14 | class Reproject_data():
15 | def __init__(self):
16 | #Initialize directories
17 | self.verbs_EP100_csv = '.../EPIC_100_verb_classes.csv'
18 | self.verbs_EP100_csv = pd.read_csv(self.verbs_EP100_csv)
19 | self.sequence_dir = '.../P04_EPIC_55'
20 | self.labels_dir = os.path.join(self.sequence_dir, '3D_output')
21 | self.colmap_poses = os.path.join(self.sequence_dir, 'colmap')
22 | self.imgs_dir = os.path.join(self.sequence_dir, 'selected_plus_guided_rgb')
23 | self.output_to_show = os.path.join(self.sequence_dir, 'output_to_show')
24 | if not os.path.exists(self.output_to_show):
25 | os.makedirs(self.output_to_show)
26 | self.output_labels_2d = os.path.join(self.sequence_dir, '2d_output_labels')
27 | if not os.path.exists(self.output_labels_2d):
28 | os.makedirs(self.output_labels_2d)
29 | self.output_clusters = os.path.join(self.sequence_dir, 'output_clusters_v2')
30 | if not os.path.exists(self.output_clusters):
31 | os.makedirs(self.output_clusters)
32 |
33 | #35 valid verbs
34 | self.valid_interactions = ['take', 'remove', 'put', 'insert', 'throw', 'wash', 'dry', 'open', 'turn-on',
35 | 'close', 'turn-off', 'mix', 'fill', 'add', 'cut', 'peel', 'empty',
36 | 'shake', 'squeeze', 'press', 'cook', 'move', 'adjust', 'eat',
37 | 'drink', 'apply', 'sprinkle', 'fold', 'sort', 'clean', 'slice', 'pick']
38 | self.valid_interactions_2 = valid_interactions
39 |
40 | self.colormap_interactions = colormap_interactions
41 |
42 |
43 | #Read the intrinsic parameters of the sequence
44 | self.cameras_Colmap, self.imgs_Colmap, self.pts_Colmap = read_model(self.colmap_poses, ext=".txt")
45 | self.fx = self.cameras_Colmap[1].params[0]
46 | self.fy = self.cameras_Colmap[1].params[1]
47 | self.cx = self.cameras_Colmap[1].params[2]
48 | self.cy = self.cameras_Colmap[1].params[3]
49 | self.projection_matrix = self.get_projection_matrix()
50 | self.height = 480
51 | self.width = 854
52 | self.size = 500
53 | self.gaussian = self.get_gaussian()
54 | self.read_3D_points()
55 | print('the length of the 3D points is: ', self.points_coord.shape)
56 |
57 |
58 | def get_projection_matrix(self):
59 | # Get the projection matrix
60 | projection_matrix = np.zeros((3, 4))
61 | projection_matrix[0, 0] = self.fx
62 | projection_matrix[1, 1] = self.fy
63 | projection_matrix[0, 2] = self.cx
64 | projection_matrix[1, 2] = self.cy
65 | projection_matrix[2, 2] = 1
66 | return projection_matrix
67 |
68 | def get_camera_pose(self, data):
69 | #Get the camera translation matrix
70 | t = data['colmap']['t_pos']
71 | R = data['colmap']['R_pos']
72 | t_c = (-R.T @ t).reshape(3,1)
73 | R_c = R.T
74 | return t_c, R_c
75 |
76 | def remap_verb_EP100(self, data):
77 | ep_verb_class = data['aff_verb_id']
78 | remapped = self.verbs_EP100_csv[self.verbs_EP100_csv['id'] == ep_verb_class]
79 | remapped_verb_str = remapped['key'].values[0]
80 | remapped_verb_id = remapped['id'].values[0]
81 | return remapped_verb_str, remapped_verb_id
82 |
83 | def read_3D_points(self):
84 | points, rgb_points = [], []
85 | self.verb_str, self.verb_id, self.noun_str, self.noun_id = [], [], [], []
86 | #Iterate over all the files in the directory sequence_dir
87 | for root, dirs, files in os.walk(self.labels_dir):
88 | for file in files:
89 | if file.endswith('.pkl'):
90 | pkl = open(os.path.join(root, file), 'rb') #Open a pickle file
91 | data = pickle.load(pkl) #Load the pickle file
92 | for i in range(len(data['EGOMETRIC_label']['affordance_labels'])):
93 | points.append(data['EGOMETRIC_label']['affordance_labels'][i]['3D_aff_points'])
94 | rgb_points.append(data['EGOMETRIC_label']['affordance_labels'][i]['3D_aff_colors'])
95 | remap_verb_str, remap_verb_id = self.remap_verb_EP100(data['EGOMETRIC_label']['affordance_labels'][i])
96 | self.verb_str.append(remap_verb_str)
97 | self.verb_id.append(remap_verb_id)
98 | self.noun_str.append(data['EGOMETRIC_label']['affordance_labels'][i]['aff_noun'])
99 | self.noun_id.append(data['EGOMETRIC_label']['affordance_labels'][i]['aff_noun_id'])
100 | pkl.close()
101 |
102 | print(len(points), len(rgb_points), len(self.verb_str), len(self.verb_id), len(self.noun_str), len(self.noun_id))
103 | self.points_coord = np.concatenate(points, axis=0)
104 | self.points_rgb = np.concatenate(rgb_points, axis=0)
105 |
106 | def object_detector_gt(self, visor_annot):
107 | #Static objects which are always present in the scene
108 | objects_in_scene = ['drawer', 'fridge', 'microwave', 'oven', 'sink',
109 | 'hob', 'kettle', 'maker:coffee', 'dishwsher',
110 | 'machine:washing', 'floor', 'table', 'rubbish']
111 | #We add the dynamics objects
112 |
113 | if len(visor_annot) > 0:
114 | for i in range(len(visor_annot['neutral_objects'])):
115 | objects_in_scene.append(visor_annot['neutral_objects'][i]['noun'])
116 | for i in range(len(visor_annot['interacting_objects'])):
117 | objects_in_scene.append(visor_annot['interacting_objects'][i]['noun'])
118 | return objects_in_scene
119 |
120 | def reproject_points(self, points_in_camera):
121 | points_in_camera = np.append(points_in_camera, np.ones((1, points_in_camera.shape[1])), axis=0)
122 | reprojected_points = np.dot(self.projection_matrix, points_in_camera)
123 | reprojected_points = reprojected_points / reprojected_points[2]
124 | return reprojected_points
125 |
126 | def filter_reprojected_points(self, reprojected_points, present_objects, img_name):
127 | # Filter the reprojected points by the localization and the semantic noun
128 | self.good_reprojected_points, self.good_reprojected_rgb, self.good_verbs, self.good_nouns, self.good_interactions = [], [], [], [], []
129 | for i in range(reprojected_points.shape[1]):
130 | point = reprojected_points[:, i]
131 | if point[0] >= 0 and point[0] <= self.width and point[1] >= 0 and point[1] <= self.height:
132 | if self.noun_str[i] in present_objects and (self.verb_str[i]) in self.valid_interactions:
133 | self.good_reprojected_points.append(point)
134 | self.good_reprojected_rgb.append(self.points_rgb[i])
135 | self.good_verbs.append(self.verb_str[i])
136 | self.good_nouns.append(self.noun_str[i])
137 | self.good_interactions.append(self.verb_str[i] + ' ' + self.noun_str[i])
138 | #Save all in a json file
139 | img_name = img_name.split('.')[0]
140 | output_2d = {}
141 | output_2d['points'] = self.good_reprojected_points
142 | output_2d['rgb'] = self.good_reprojected_rgb
143 | output_2d['verbs'] = self.good_verbs
144 | output_2d['nouns'] = self.good_nouns
145 | output_2d['verb plus noun'] = self.good_interactions
146 | output_filename = os.path.join(self.output_labels_2d, img_name +'.pkl')
147 | with open(output_filename, 'wb') as f:
148 | pickle.dump(output_2d, f)
149 | print('we are saving the 2d labels in: ', output_filename, 'with a len', len(self.good_reprojected_points))
150 |
151 | def cluster_interactions(self):
152 | #Cluster the interactions
153 | self.interaction_clusters = []
154 | self.interaction_coordinates = {}
155 | for i in range(len(self.good_verbs)): #Before good_interactions
156 | if self.good_verbs[i] not in self.interaction_clusters:
157 | self.interaction_clusters.append(self.good_verbs[i])
158 | for i in range(len(self.interaction_clusters)):
159 | self.interaction_coordinates[self.interaction_clusters[i]] = []
160 | for i in range(len(self.good_verbs)):
161 | self.interaction_coordinates[self.good_verbs[i]].append(self.good_reprojected_points[i])
162 |
163 | def paint_points(self, img, img_name):
164 | img_copy = img.copy()
165 | img_name = img_name.split('.')[0]
166 | font = cv2.FONT_HERSHEY_PLAIN
167 | for i in range(len(self.good_reprojected_points)):
168 | point = self.good_reprojected_points[i]
169 | rgb_point = self.good_reprojected_rgb[i]
170 | text = self.good_verbs[i] + ' ' + self.good_nouns[i]
171 | cv2.circle(img_copy, (int(point[0]), int(point[1])), 3, (int(rgb_point[0]), int(rgb_point[1]), int(rgb_point[2])), -1)
172 | cv2.putText(img_copy, text, (int(point[0]), int(point[1])), font, 1, (255,0,0), 1, cv2.LINE_AA)
173 | cv2.imwrite(os.path.join(self.output_to_show, img_name + '.png'), img_copy)
174 |
175 | def paint_clusters(self, img, img_name):
176 | img_copy = img.copy()
177 | img_name = img_name.split('.')[0]
178 | font = cv2.FONT_HERSHEY_PLAIN
179 | #Draw the hotspots of the clusters
180 | for i in range(len(self.interaction_clusters)):
181 | cluster = self.interaction_clusters[i]
182 | prob_sum = np.zeros((self.width, self.height))
183 | print(prob_sum.shape, 'que mierdas es ')
184 | for j in range(len(self.interaction_coordinates[cluster])):
185 | point = self.interaction_coordinates[cluster][j][0:2].astype(int)
186 | prob = np.zeros((self.width, self.height))
187 |
188 | if (self.width - point[0]) > self.size // 2:
189 | gauss_right = self.size
190 | prob_right = point[0] + self.size // 2
191 | else:
192 | gauss_right = self.width - point[0] + self.size // 2
193 | prob_right = self.width
194 |
195 | if point[0] > self.size // 2:
196 | gauss_left = 0
197 | prob_left = point[0] - self.size // 2
198 | else:
199 | gauss_left = self.size // 2 - point[0]
200 | prob_left = 0
201 |
202 | if (self.height - point[1]) > self.size // 2:
203 | gauss_bottom = self.size
204 | prob_bottom = point[1] + self.size // 2
205 | else:
206 | gauss_bottom = self.height - point[1] + self.size // 2
207 | prob_bottom = self.height
208 |
209 | if point[1] > self.size // 2:
210 | gauss_top = 0
211 | prob_top = point[1] - self.size // 2
212 | else:
213 | gauss_top = self.size // 2 - point[1]
214 | prob_top = 0
215 |
216 | prob[int(prob_left):int(prob_right),int(prob_top):int(prob_bottom)] = self.gaussian[int(gauss_left):int(gauss_right),int(gauss_top):int(gauss_bottom)]
217 | prob_sum += prob
218 |
219 | prob_sum = (prob_sum / np.max(prob_sum)).T
220 | #If prob_sum < 0.5, set it to 0
221 | prob_sum[prob_sum < 0.25] = 0
222 | print(prob_sum.shape, 'aqui pasa algo')
223 | #prob_sum[prob_sum > 0.25] = 1
224 | prob_paint = np.expand_dims((prob_sum), axis=2)
225 | print('uy la prob paint', prob_paint.shape)
226 | color = np.array(self.colormap_interactions[cluster]).reshape(1, 3)
227 | print(prob_paint.shape)
228 | prob_paint = (prob_paint @ color).astype(np.uint8)
229 | print(prob_paint.shape)
230 | print(img_copy.shape)
231 | print(color.shape)
232 | img_copy = cv2.addWeighted(img_copy, 0.5, prob_paint, 2.0, 0)
233 | cv2.imwrite(os.path.join(self.output_clusters, img_name + ' ' + cluster + '.png'), img_copy)
234 | print('Saved image', os.path.join(self.output_clusters, img_name + ' ' + cluster + '.png'))
235 | img_copy = cv2.imread(os.path.join(self.imgs_dir, img_name + '.jpg'))
236 | #Draw the text of the clusters for a better visualization
237 | for i in range(len(self.interaction_clusters)):
238 | cluster = self.interaction_clusters[i]
239 | for j in range(len(self.interaction_coordinates[cluster])):
240 | if j == 0:
241 | point = self.interaction_coordinates[cluster][j]
242 | cv2.putText(img_copy, cluster, (int(point[0]), int(point[1])), font, 3, (255,0,0), 3, cv2.LINE_AA)
243 | cv2.imwrite(os.path.join(self.output_clusters, img_name + '------' + '.png'), img_copy)
244 |
245 | def get_gaussian(self):
246 | x, y = np.mgrid[0:self.size:1, 0:self.size:1]
247 | pos = np.empty(x.shape + (2,))
248 | pos[:, :, 0] = x
249 | pos[:, :, 1] = y
250 | gaussian = multivariate_normal(mean=[self.size//2, self.size//2], cov=np.eye(2)*1000)
251 | return gaussian.pdf(pos)
252 |
253 | def examine_sequence(self):
254 | c = 0
255 | for root, dirs, files in os.walk(self.labels_dir):
256 | for file in files:
257 | if file.endswith('.pkl'):
258 | pkl = open(os.path.join(root, file), 'rb')
259 | print('Processing file', os.path.join(root, file))
260 | data = pickle.load(pkl)
261 |
262 | img = cv2.imread(os.path.join(self.imgs_dir, data['filename']))
263 |
264 | t_c, R_c = self.get_camera_pose(data)
265 | points_in_camera = R_c @ self.points_coord.T + t_c
266 | objects_in_scene = self.object_detector_gt(data['VISOR'])
267 |
268 | reprojected_points = self.reproject_points(points_in_camera)
269 | self.filter_reprojected_points(reprojected_points, objects_in_scene, data['filename'])
270 | #self.paint_points(img, data['filename'])
271 | self.cluster_interactions()
272 | self.paint_clusters(img, data['filename'])
273 | pkl.close()
274 | break
275 | c += 1
276 | if c % 100 == 0:
277 | print(c, 'images processed')
278 |
279 |
280 |
281 |
282 | reproject = Reproject_data()
283 | the_3D_points = reproject.examine_sequence()
284 |
--------------------------------------------------------------------------------
/read_write_model.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022, ETH Zurich and UNC Chapel Hill.
2 | # All rights reserved.
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # * Redistributions of source code must retain the above copyright
8 | # notice, this list of conditions and the following disclaimer.
9 | #
10 | # * Redistributions in binary form must reproduce the above copyright
11 | # notice, this list of conditions and the following disclaimer in the
12 | # documentation and/or other materials provided with the distribution.
13 | #
14 | # * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
15 | # its contributors may be used to endorse or promote products derived
16 | # from this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
22 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 | # POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
31 |
32 | import os
33 | import collections
34 | import numpy as np
35 | import struct
36 | import argparse
37 |
38 |
39 | CameraModel = collections.namedtuple(
40 | "CameraModel", ["model_id", "model_name", "num_params"])
41 | Camera = collections.namedtuple(
42 | "Camera", ["id", "model", "width", "height", "params"])
43 | BaseImage = collections.namedtuple(
44 | "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
45 | Point3D = collections.namedtuple(
46 | "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
47 |
48 |
49 | class Image(BaseImage):
50 | def qvec2rotmat(self):
51 | return qvec2rotmat(self.qvec)
52 |
53 |
54 | CAMERA_MODELS = {
55 | CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
56 | CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
57 | CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
58 | CameraModel(model_id=3, model_name="RADIAL", num_params=5),
59 | CameraModel(model_id=4, model_name="OPENCV", num_params=8),
60 | CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
61 | CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
62 | CameraModel(model_id=7, model_name="FOV", num_params=5),
63 | CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
64 | CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
65 | CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
66 | }
67 | CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model)
68 | for camera_model in CAMERA_MODELS])
69 | CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
70 | for camera_model in CAMERA_MODELS])
71 |
72 |
73 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
74 | """Read and unpack the next bytes from a binary file.
75 | :param fid:
76 | :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
77 | :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
78 | :param endian_character: Any of {@, =, <, >, !}
79 | :return: Tuple of read and unpacked values.
80 | """
81 | data = fid.read(num_bytes)
82 | return struct.unpack(endian_character + format_char_sequence, data)
83 |
84 |
85 | def write_next_bytes(fid, data, format_char_sequence, endian_character="<"):
86 | """pack and write to a binary file.
87 | :param fid:
88 | :param data: data to send, if multiple elements are sent at the same time,
89 | they should be encapsuled either in a list or a tuple
90 | :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
91 | should be the same length as the data list or tuple
92 | :param endian_character: Any of {@, =, <, >, !}
93 | """
94 | if isinstance(data, (list, tuple)):
95 | bytes = struct.pack(endian_character + format_char_sequence, *data)
96 | else:
97 | bytes = struct.pack(endian_character + format_char_sequence, data)
98 | fid.write(bytes)
99 |
100 |
101 | def read_cameras_text(path):
102 | """
103 | see: src/base/reconstruction.cc
104 | void Reconstruction::WriteCamerasText(const std::string& path)
105 | void Reconstruction::ReadCamerasText(const std::string& path)
106 | """
107 | cameras = {}
108 | with open(path, "r") as fid:
109 | while True:
110 | line = fid.readline()
111 | if not line:
112 | break
113 | line = line.strip()
114 | if len(line) > 0 and line[0] != "#":
115 | elems = line.split()
116 | camera_id = int(elems[0])
117 | model = elems[1]
118 | width = int(elems[2])
119 | height = int(elems[3])
120 | params = np.array(tuple(map(float, elems[4:])))
121 | cameras[camera_id] = Camera(id=camera_id, model=model,
122 | width=width, height=height,
123 | params=params)
124 | return cameras
125 |
126 |
127 | def read_cameras_binary(path_to_model_file):
128 | """
129 | see: src/base/reconstruction.cc
130 | void Reconstruction::WriteCamerasBinary(const std::string& path)
131 | void Reconstruction::ReadCamerasBinary(const std::string& path)
132 | """
133 | cameras = {}
134 | with open(path_to_model_file, "rb") as fid:
135 | num_cameras = read_next_bytes(fid, 8, "Q")[0]
136 | for _ in range(num_cameras):
137 | camera_properties = read_next_bytes(
138 | fid, num_bytes=24, format_char_sequence="iiQQ")
139 | camera_id = camera_properties[0]
140 | model_id = camera_properties[1]
141 | model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
142 | width = camera_properties[2]
143 | height = camera_properties[3]
144 | num_params = CAMERA_MODEL_IDS[model_id].num_params
145 | params = read_next_bytes(fid, num_bytes=8*num_params,
146 | format_char_sequence="d"*num_params)
147 | cameras[camera_id] = Camera(id=camera_id,
148 | model=model_name,
149 | width=width,
150 | height=height,
151 | params=np.array(params))
152 | assert len(cameras) == num_cameras
153 | return cameras
154 |
155 |
156 | def write_cameras_text(cameras, path):
157 | """
158 | see: src/base/reconstruction.cc
159 | void Reconstruction::WriteCamerasText(const std::string& path)
160 | void Reconstruction::ReadCamerasText(const std::string& path)
161 | """
162 | HEADER = "# Camera list with one line of data per camera:\n" + \
163 | "# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n" + \
164 | "# Number of cameras: {}\n".format(len(cameras))
165 | with open(path, "w") as fid:
166 | fid.write(HEADER)
167 | for _, cam in cameras.items():
168 | to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params]
169 | line = " ".join([str(elem) for elem in to_write])
170 | fid.write(line + "\n")
171 |
172 |
173 | def write_cameras_binary(cameras, path_to_model_file):
174 | """
175 | see: src/base/reconstruction.cc
176 | void Reconstruction::WriteCamerasBinary(const std::string& path)
177 | void Reconstruction::ReadCamerasBinary(const std::string& path)
178 | """
179 | with open(path_to_model_file, "wb") as fid:
180 | write_next_bytes(fid, len(cameras), "Q")
181 | for _, cam in cameras.items():
182 | model_id = CAMERA_MODEL_NAMES[cam.model].model_id
183 | camera_properties = [cam.id,
184 | model_id,
185 | cam.width,
186 | cam.height]
187 | write_next_bytes(fid, camera_properties, "iiQQ")
188 | for p in cam.params:
189 | write_next_bytes(fid, float(p), "d")
190 | return cameras
191 |
192 |
193 | def read_images_text(path):
194 | """
195 | see: src/base/reconstruction.cc
196 | void Reconstruction::ReadImagesText(const std::string& path)
197 | void Reconstruction::WriteImagesText(const std::string& path)
198 | """
199 | images = {}
200 | with open(path, "r") as fid:
201 | while True:
202 | line = fid.readline()
203 | if not line:
204 | break
205 | line = line.strip()
206 | if len(line) > 0 and line[0] != "#":
207 | elems = line.split()
208 | image_id = int(elems[0])
209 | qvec = np.array(tuple(map(float, elems[1:5])))
210 | tvec = np.array(tuple(map(float, elems[5:8])))
211 | camera_id = int(elems[8])
212 | image_name = elems[9]
213 | elems = fid.readline().split()
214 | xys = np.column_stack([tuple(map(float, elems[0::3])),
215 | tuple(map(float, elems[1::3]))])
216 | point3D_ids = np.array(tuple(map(int, elems[2::3])))
217 | images[image_id] = Image(
218 | id=image_id, qvec=qvec, tvec=tvec,
219 | camera_id=camera_id, name=image_name,
220 | xys=xys, point3D_ids=point3D_ids)
221 | return images
222 |
223 |
224 | def read_images_binary(path_to_model_file):
225 | """
226 | see: src/base/reconstruction.cc
227 | void Reconstruction::ReadImagesBinary(const std::string& path)
228 | void Reconstruction::WriteImagesBinary(const std::string& path)
229 | """
230 | images = {}
231 | with open(path_to_model_file, "rb") as fid:
232 | num_reg_images = read_next_bytes(fid, 8, "Q")[0]
233 | for _ in range(num_reg_images):
234 | binary_image_properties = read_next_bytes(
235 | fid, num_bytes=64, format_char_sequence="idddddddi")
236 | image_id = binary_image_properties[0]
237 | qvec = np.array(binary_image_properties[1:5])
238 | tvec = np.array(binary_image_properties[5:8])
239 | camera_id = binary_image_properties[8]
240 | image_name = ""
241 | current_char = read_next_bytes(fid, 1, "c")[0]
242 | while current_char != b"\x00": # look for the ASCII 0 entry
243 | image_name += current_char.decode("utf-8")
244 | current_char = read_next_bytes(fid, 1, "c")[0]
245 | num_points2D = read_next_bytes(fid, num_bytes=8,
246 | format_char_sequence="Q")[0]
247 | x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
248 | format_char_sequence="ddq"*num_points2D)
249 | xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
250 | tuple(map(float, x_y_id_s[1::3]))])
251 | point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
252 | images[image_id] = Image(
253 | id=image_id, qvec=qvec, tvec=tvec,
254 | camera_id=camera_id, name=image_name,
255 | xys=xys, point3D_ids=point3D_ids)
256 | return images
257 |
258 |
259 | def write_images_text(images, path):
260 | """
261 | see: src/base/reconstruction.cc
262 | void Reconstruction::ReadImagesText(const std::string& path)
263 | void Reconstruction::WriteImagesText(const std::string& path)
264 | """
265 | if len(images) == 0:
266 | mean_observations = 0
267 | else:
268 | mean_observations = sum((len(img.point3D_ids) for _, img in images.items()))/len(images)
269 | HEADER = "# Image list with two lines of data per image:\n" + \
270 | "# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n" + \
271 | "# POINTS2D[] as (X, Y, POINT3D_ID)\n" + \
272 | "# Number of images: {}, mean observations per image: {}\n".format(len(images), mean_observations)
273 |
274 | with open(path, "w") as fid:
275 | fid.write(HEADER)
276 | for _, img in images.items():
277 | image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
278 | first_line = " ".join(map(str, image_header))
279 | fid.write(first_line + "\n")
280 |
281 | points_strings = []
282 | for xy, point3D_id in zip(img.xys, img.point3D_ids):
283 | points_strings.append(" ".join(map(str, [*xy, point3D_id])))
284 | fid.write(" ".join(points_strings) + "\n")
285 |
286 |
287 | def write_images_binary(images, path_to_model_file):
288 | """
289 | see: src/base/reconstruction.cc
290 | void Reconstruction::ReadImagesBinary(const std::string& path)
291 | void Reconstruction::WriteImagesBinary(const std::string& path)
292 | """
293 | with open(path_to_model_file, "wb") as fid:
294 | write_next_bytes(fid, len(images), "Q")
295 | for _, img in images.items():
296 | write_next_bytes(fid, img.id, "i")
297 | write_next_bytes(fid, img.qvec.tolist(), "dddd")
298 | write_next_bytes(fid, img.tvec.tolist(), "ddd")
299 | write_next_bytes(fid, img.camera_id, "i")
300 | for char in img.name:
301 | write_next_bytes(fid, char.encode("utf-8"), "c")
302 | write_next_bytes(fid, b"\x00", "c")
303 | write_next_bytes(fid, len(img.point3D_ids), "Q")
304 | for xy, p3d_id in zip(img.xys, img.point3D_ids):
305 | write_next_bytes(fid, [*xy, p3d_id], "ddq")
306 |
307 |
308 | def read_points3D_text(path):
309 | """
310 | see: src/base/reconstruction.cc
311 | void Reconstruction::ReadPoints3DText(const std::string& path)
312 | void Reconstruction::WritePoints3DText(const std::string& path)
313 | """
314 | points3D = {}
315 | with open(path, "r") as fid:
316 | while True:
317 | line = fid.readline()
318 | if not line:
319 | break
320 | line = line.strip()
321 | if len(line) > 0 and line[0] != "#":
322 | elems = line.split()
323 | point3D_id = int(elems[0])
324 | xyz = np.array(tuple(map(float, elems[1:4])))
325 | rgb = np.array(tuple(map(int, elems[4:7])))
326 | error = float(elems[7])
327 | image_ids = np.array(tuple(map(int, elems[8::2])))
328 | point2D_idxs = np.array(tuple(map(int, elems[9::2])))
329 | points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb,
330 | error=error, image_ids=image_ids,
331 | point2D_idxs=point2D_idxs)
332 | return points3D
333 |
334 |
335 | def read_points3D_binary(path_to_model_file):
336 | """
337 | see: src/base/reconstruction.cc
338 | void Reconstruction::ReadPoints3DBinary(const std::string& path)
339 | void Reconstruction::WritePoints3DBinary(const std::string& path)
340 | """
341 | points3D = {}
342 | with open(path_to_model_file, "rb") as fid:
343 | num_points = read_next_bytes(fid, 8, "Q")[0]
344 | for _ in range(num_points):
345 | binary_point_line_properties = read_next_bytes(
346 | fid, num_bytes=43, format_char_sequence="QdddBBBd")
347 | point3D_id = binary_point_line_properties[0]
348 | xyz = np.array(binary_point_line_properties[1:4])
349 | rgb = np.array(binary_point_line_properties[4:7])
350 | error = np.array(binary_point_line_properties[7])
351 | track_length = read_next_bytes(
352 | fid, num_bytes=8, format_char_sequence="Q")[0]
353 | track_elems = read_next_bytes(
354 | fid, num_bytes=8*track_length,
355 | format_char_sequence="ii"*track_length)
356 | image_ids = np.array(tuple(map(int, track_elems[0::2])))
357 | point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
358 | points3D[point3D_id] = Point3D(
359 | id=point3D_id, xyz=xyz, rgb=rgb,
360 | error=error, image_ids=image_ids,
361 | point2D_idxs=point2D_idxs)
362 | return points3D
363 |
364 |
365 | def write_points3D_text(points3D, path):
366 | """
367 | see: src/base/reconstruction.cc
368 | void Reconstruction::ReadPoints3DText(const std::string& path)
369 | void Reconstruction::WritePoints3DText(const std::string& path)
370 | """
371 | if len(points3D) == 0:
372 | mean_track_length = 0
373 | else:
374 | mean_track_length = sum((len(pt.image_ids) for _, pt in points3D.items()))/len(points3D)
375 | HEADER = "# 3D point list with one line of data per point:\n" + \
376 | "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n" + \
377 | "# Number of points: {}, mean track length: {}\n".format(len(points3D), mean_track_length)
378 |
379 | with open(path, "w") as fid:
380 | fid.write(HEADER)
381 | for _, pt in points3D.items():
382 | point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
383 | fid.write(" ".join(map(str, point_header)) + " ")
384 | track_strings = []
385 | for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
386 | track_strings.append(" ".join(map(str, [image_id, point2D])))
387 | fid.write(" ".join(track_strings) + "\n")
388 |
389 |
390 | def write_points3D_binary(points3D, path_to_model_file):
391 | """
392 | see: src/base/reconstruction.cc
393 | void Reconstruction::ReadPoints3DBinary(const std::string& path)
394 | void Reconstruction::WritePoints3DBinary(const std::string& path)
395 | """
396 | with open(path_to_model_file, "wb") as fid:
397 | write_next_bytes(fid, len(points3D), "Q")
398 | for _, pt in points3D.items():
399 | write_next_bytes(fid, pt.id, "Q")
400 | write_next_bytes(fid, pt.xyz.tolist(), "ddd")
401 | write_next_bytes(fid, pt.rgb.tolist(), "BBB")
402 | write_next_bytes(fid, pt.error, "d")
403 | track_length = pt.image_ids.shape[0]
404 | write_next_bytes(fid, track_length, "Q")
405 | for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
406 | write_next_bytes(fid, [image_id, point2D_id], "ii")
407 |
408 |
409 | def detect_model_format(path, ext):
410 | if os.path.isfile(os.path.join(path, "cameras" + ext)) and \
411 | os.path.isfile(os.path.join(path, "images" + ext)) and \
412 | os.path.isfile(os.path.join(path, "points3D" + ext)):
413 | print("Detected model format: '" + ext + "'")
414 | return True
415 |
416 | return False
417 |
418 |
419 | def read_model(path, ext=""):
420 | # try to detect the extension automatically
421 | if ext == "":
422 | if detect_model_format(path, ".bin"):
423 | ext = ".bin"
424 | elif detect_model_format(path, ".txt"):
425 | ext = ".txt"
426 | else:
427 | print("Provide model format: '.bin' or '.txt'")
428 | return
429 |
430 | if ext == ".txt":
431 | cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
432 | images = read_images_text(os.path.join(path, "images" + ext))
433 | points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
434 | else:
435 | cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
436 | images = read_images_binary(os.path.join(path, "images" + ext))
437 | points3D = read_points3D_binary(os.path.join(path, "points3D") + ext)
438 | return cameras, images, points3D
439 |
440 |
441 | def write_model(cameras, images, points3D, path, ext=".bin"):
442 | if ext == ".txt":
443 | write_cameras_text(cameras, os.path.join(path, "cameras" + ext))
444 | write_images_text(images, os.path.join(path, "images" + ext))
445 | write_points3D_text(points3D, os.path.join(path, "points3D") + ext)
446 | else:
447 | write_cameras_binary(cameras, os.path.join(path, "cameras" + ext))
448 | write_images_binary(images, os.path.join(path, "images" + ext))
449 | write_points3D_binary(points3D, os.path.join(path, "points3D") + ext)
450 | return cameras, images, points3D
451 |
452 |
453 | def qvec2rotmat(qvec):
454 | return np.array([
455 | [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
456 | 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
457 | 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
458 | [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
459 | 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
460 | 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
461 | [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
462 | 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
463 | 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
464 |
465 |
466 | def rotmat2qvec(R):
467 | Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
468 | K = np.array([
469 | [Rxx - Ryy - Rzz, 0, 0, 0],
470 | [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
471 | [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
472 | [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
473 | eigvals, eigvecs = np.linalg.eigh(K)
474 | qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
475 | if qvec[0] < 0:
476 | qvec *= -1
477 | return qvec
478 |
479 |
480 | def main():
481 | parser = argparse.ArgumentParser(description="Read and write COLMAP binary and text models")
482 | parser.add_argument("--input_model", help="path to input model folder")
483 | parser.add_argument("--input_format", choices=[".bin", ".txt"],
484 | help="input model format", default="")
485 | parser.add_argument("--output_model",
486 | help="path to output model folder")
487 | parser.add_argument("--output_format", choices=[".bin", ".txt"],
488 | help="outut model format", default=".txt")
489 | args = parser.parse_args()
490 |
491 | cameras, images, points3D = read_model(path=args.input_model, ext=args.input_format)
492 |
493 | print("num_cameras:", len(cameras))
494 | print("num_images:", len(images))
495 | print("num_points3D:", len(points3D))
496 |
497 | if args.output_model is not None:
498 | write_model(cameras, images, points3D, path=args.output_model, ext=args.output_format)
499 |
500 |
501 | if __name__ == "__main__":
502 | main()
503 |
--------------------------------------------------------------------------------