├── __init__.py ├── imgs ├── P01_01_frame_0000003682.jpg ├── P01_01_frame_0000019463.jpg ├── P01_01_frame_0000049183.jpg ├── P01_01_frame_0000091442.jpg ├── P04_02_frame_0000000946.jpg ├── P04_02_frame_0000003062.jpg ├── P04_02_frame_0000005376.jpg ├── P04_02_frame_0000011581.jpg ├── P04_05_frame_0000111070.png ├── P04_12_frame_0000008119.png ├── P04_02_frame_0000016034 take.png ├── P04_02_frame_0000065888 cut.png ├── P04_04_frame_0000006974 dry.png ├── P04_02_frame_0000033785 insert.png ├── P04_02_frame_0000065888 turn-on.png ├── P04_21_frame_0000006463 close.png ├── P04_21_frame_0000006463 insert.png ├── P04_04_frame_0000006974 turn-off.png ├── Screenshot from 2022-12-13 10-31-56.png ├── Screenshot from 2022-12-14 15-54-23.png ├── Screenshot from 2022-12-14 16-28-24.png ├── Screenshot from 2022-12-14 16-31-39.png └── Screenshot from 2022-12-14 16-36-05.png ├── P03_EPIC_100_example ├── rgb │ ├── P03_101_frame_0000000157.jpg │ ├── P03_101_frame_0000000217.jpg │ ├── P03_101_frame_0000000280.jpg │ ├── P03_101_frame_0000000318.jpg │ ├── P03_101_frame_0000000388.jpg │ ├── P03_101_frame_0000000426.jpg │ ├── P03_101_frame_0000000471.jpg │ ├── P03_101_frame_0000000530.jpg │ ├── P03_101_frame_0000000563.jpg │ └── P03_101_frame_0000000626.jpg ├── VISOR_masks │ ├── P03_101_frame_0000000157.png │ ├── P03_101_frame_0000000217.png │ ├── P03_101_frame_0000000280.png │ ├── P03_101_frame_0000000318.png │ ├── P03_101_frame_0000000388.png │ ├── P03_101_frame_0000000426.png │ ├── P03_101_frame_0000000471.png │ ├── P03_101_frame_0000000530.png │ ├── P03_101_frame_0000000563.png │ └── P03_101_frame_0000000626.png ├── easy_EPIC_Aff │ ├── P03_101_frame_0000000157.pkl │ ├── P03_101_frame_0000000217.pkl │ ├── P03_101_frame_0000000280.pkl │ ├── P03_101_frame_0000000318.pkl │ ├── P03_101_frame_0000000388.pkl │ ├── P03_101_frame_0000000426.pkl │ ├── P03_101_frame_0000000471.pkl │ ├── P03_101_frame_0000000530.pkl │ ├── P03_101_frame_0000000563.pkl │ └── P03_101_frame_0000000626.pkl ├── COLMAP_masks │ ├── P03_101_frame_0000000157.jpg.png │ ├── P03_101_frame_0000000217.jpg.png │ ├── P03_101_frame_0000000280.jpg.png │ ├── P03_101_frame_0000000318.jpg.png │ ├── P03_101_frame_0000000388.jpg.png │ ├── P03_101_frame_0000000426.jpg.png │ ├── P03_101_frame_0000000471.jpg.png │ ├── P03_101_frame_0000000530.jpg.png │ ├── P03_101_frame_0000000563.jpg.png │ └── P03_101_frame_0000000626.jpg.png ├── complex_EPIC_Aff │ ├── P03_101_frame_0000000157.pkl │ ├── P03_101_frame_0000000217.pkl │ ├── P03_101_frame_0000000280.pkl │ ├── P03_101_frame_0000000318.pkl │ ├── P03_101_frame_0000000388.pkl │ ├── P03_101_frame_0000000426.pkl │ ├── P03_101_frame_0000000471.pkl │ ├── P03_101_frame_0000000530.pkl │ ├── P03_101_frame_0000000563.pkl │ └── P03_101_frame_0000000626.pkl └── 3D_output_aff │ ├── affordances_P03_101_frame_0000000157.pkl │ ├── affordances_P03_101_frame_0000000217.pkl │ ├── affordances_P03_101_frame_0000000280.pkl │ ├── affordances_P03_101_frame_0000000318.pkl │ ├── affordances_P03_101_frame_0000000388.pkl │ ├── affordances_P03_101_frame_0000000426.pkl │ ├── affordances_P03_101_frame_0000000471.pkl │ ├── affordances_P03_101_frame_0000000530.pkl │ ├── affordances_P03_101_frame_0000000563.pkl │ └── affordances_P03_101_frame_0000000626.pkl ├── data_egom.py ├── read_cameras_colmap.py ├── data.py ├── README.md ├── inference_v2.py ├── utils_read_annotations.py ├── project_from_3D_to_2D.py └── read_write_model.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /imgs/P01_01_frame_0000003682.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P01_01_frame_0000003682.jpg -------------------------------------------------------------------------------- /imgs/P01_01_frame_0000019463.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P01_01_frame_0000019463.jpg -------------------------------------------------------------------------------- /imgs/P01_01_frame_0000049183.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P01_01_frame_0000049183.jpg -------------------------------------------------------------------------------- /imgs/P01_01_frame_0000091442.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P01_01_frame_0000091442.jpg -------------------------------------------------------------------------------- /imgs/P04_02_frame_0000000946.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000000946.jpg -------------------------------------------------------------------------------- /imgs/P04_02_frame_0000003062.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000003062.jpg -------------------------------------------------------------------------------- /imgs/P04_02_frame_0000005376.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000005376.jpg -------------------------------------------------------------------------------- /imgs/P04_02_frame_0000011581.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000011581.jpg -------------------------------------------------------------------------------- /imgs/P04_05_frame_0000111070.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_05_frame_0000111070.png -------------------------------------------------------------------------------- /imgs/P04_12_frame_0000008119.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_12_frame_0000008119.png -------------------------------------------------------------------------------- /imgs/P04_02_frame_0000016034 take.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000016034 take.png -------------------------------------------------------------------------------- /imgs/P04_02_frame_0000065888 cut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000065888 cut.png -------------------------------------------------------------------------------- /imgs/P04_04_frame_0000006974 dry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_04_frame_0000006974 dry.png -------------------------------------------------------------------------------- /imgs/P04_02_frame_0000033785 insert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000033785 insert.png -------------------------------------------------------------------------------- /imgs/P04_02_frame_0000065888 turn-on.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_02_frame_0000065888 turn-on.png -------------------------------------------------------------------------------- /imgs/P04_21_frame_0000006463 close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_21_frame_0000006463 close.png -------------------------------------------------------------------------------- /imgs/P04_21_frame_0000006463 insert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_21_frame_0000006463 insert.png -------------------------------------------------------------------------------- /imgs/P04_04_frame_0000006974 turn-off.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/P04_04_frame_0000006974 turn-off.png -------------------------------------------------------------------------------- /imgs/Screenshot from 2022-12-13 10-31-56.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/Screenshot from 2022-12-13 10-31-56.png -------------------------------------------------------------------------------- /imgs/Screenshot from 2022-12-14 15-54-23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/Screenshot from 2022-12-14 15-54-23.png -------------------------------------------------------------------------------- /imgs/Screenshot from 2022-12-14 16-28-24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/Screenshot from 2022-12-14 16-28-24.png -------------------------------------------------------------------------------- /imgs/Screenshot from 2022-12-14 16-31-39.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/Screenshot from 2022-12-14 16-31-39.png -------------------------------------------------------------------------------- /imgs/Screenshot from 2022-12-14 16-36-05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/imgs/Screenshot from 2022-12-14 16-36-05.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/rgb/P03_101_frame_0000000157.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000157.jpg -------------------------------------------------------------------------------- /P03_EPIC_100_example/rgb/P03_101_frame_0000000217.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000217.jpg -------------------------------------------------------------------------------- /P03_EPIC_100_example/rgb/P03_101_frame_0000000280.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000280.jpg -------------------------------------------------------------------------------- /P03_EPIC_100_example/rgb/P03_101_frame_0000000318.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000318.jpg -------------------------------------------------------------------------------- /P03_EPIC_100_example/rgb/P03_101_frame_0000000388.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000388.jpg -------------------------------------------------------------------------------- /P03_EPIC_100_example/rgb/P03_101_frame_0000000426.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000426.jpg -------------------------------------------------------------------------------- /P03_EPIC_100_example/rgb/P03_101_frame_0000000471.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000471.jpg -------------------------------------------------------------------------------- /P03_EPIC_100_example/rgb/P03_101_frame_0000000530.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000530.jpg -------------------------------------------------------------------------------- /P03_EPIC_100_example/rgb/P03_101_frame_0000000563.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000563.jpg -------------------------------------------------------------------------------- /P03_EPIC_100_example/rgb/P03_101_frame_0000000626.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/rgb/P03_101_frame_0000000626.jpg -------------------------------------------------------------------------------- /P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000157.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000157.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000217.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000217.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000280.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000280.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000318.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000318.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000388.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000388.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000426.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000426.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000471.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000471.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000530.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000530.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000563.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000563.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000626.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/VISOR_masks/P03_101_frame_0000000626.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000157.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000157.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000217.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000217.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000280.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000280.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000318.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000318.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000388.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000388.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000426.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000426.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000471.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000471.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000530.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000530.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000563.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000563.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000626.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/easy_EPIC_Aff/P03_101_frame_0000000626.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000157.jpg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000157.jpg.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000217.jpg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000217.jpg.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000280.jpg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000280.jpg.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000318.jpg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000318.jpg.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000388.jpg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000388.jpg.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000426.jpg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000426.jpg.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000471.jpg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000471.jpg.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000530.jpg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000530.jpg.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000563.jpg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000563.jpg.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000626.jpg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/COLMAP_masks/P03_101_frame_0000000626.jpg.png -------------------------------------------------------------------------------- /P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000157.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000157.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000217.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000217.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000280.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000280.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000318.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000318.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000388.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000388.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000426.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000426.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000471.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000471.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000530.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000530.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000563.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000563.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000626.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/complex_EPIC_Aff/P03_101_frame_0000000626.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000157.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000157.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000217.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000217.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000280.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000280.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000318.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000318.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000388.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000388.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000426.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000426.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000471.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000471.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000530.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000530.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000563.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000563.pkl -------------------------------------------------------------------------------- /P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000626.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmur98/epic_kitchens_affordances/HEAD/P03_EPIC_100_example/3D_output_aff/affordances_P03_101_frame_0000000626.pkl -------------------------------------------------------------------------------- /data_egom.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy as np 4 | import copy 5 | from PIL import Image # using pillow-simd for increased speed 6 | import PIL.Image as pil 7 | import torch 8 | import torch.utils.data as data 9 | from torchvision import transforms 10 | import glob 11 | import collections 12 | import cv2 13 | import open3d as o3d 14 | import pandas as pd 15 | from read_write_model import read_model 16 | from utils_read_annotations import EP100_and_VISOR_annotations 17 | 18 | 19 | Camera = collections.namedtuple("Camera", ["id", "model", "width", "height", "params"]) 20 | 21 | class VideoSequentialDataset(data.Dataset): 22 | """Superclass for sequential images dataloaders 23 | """ 24 | def __init__(self, data_path, kitchen, height, width, frame_idxs): 25 | super(VideoSequentialDataset, self).__init__() 26 | self.colmap_poses = os.path.join(data_path, kitchen,'colmap') 27 | self.masks = os.path.join(data_path, kitchen, 'selected_plus_guided_masks') 28 | self.rgb = os.path.join(data_path, kitchen, 'selected_plus_guided_rgb') 29 | 30 | self.filenames = self.read_directory() 31 | self.height = height 32 | self.width = width 33 | self.colors = self.get_colormap() 34 | self.VISOR_path = '...' 35 | self.EP100_and_VISOR_reader = EP100_and_VISOR_annotations(self.VISOR_path, self.rgb, kitchen) 36 | self.frame_idxs = frame_idxs 37 | 38 | self.cameras_Colmap, self.imgs_Colmap, self.pts_Colmap = read_model(self.colmap_poses, ext=".txt") 39 | self.fx = self.cameras_Colmap[1].params[0] 40 | self.fy = self.cameras_Colmap[1].params[1] 41 | self.cx = self.cameras_Colmap[1].params[2] 42 | self.cy = self.cameras_Colmap[1].params[3] 43 | 44 | 45 | def read_directory(self): 46 | paths = glob.glob(os.path.join(self.rgb, '*.jpg')) 47 | paths.sort() 48 | return paths 49 | 50 | def __len__(self): 51 | return len(self.filenames) 52 | 53 | def __getitem__(self, index): 54 | inputs = {} 55 | full_filename = self.filenames[index] 56 | for i in self.frame_idxs: 57 | inputs[("color", i)] = self.get_color(self.filenames[index + i]) 58 | inputs["full_filename"] = full_filename 59 | inputs["filename"] = full_filename.split('/')[-1] 60 | print(full_filename.split('/')[-1].split('_')[0:2]) 61 | sequence = full_filename.split('/')[-1].split('_')[0:2] 62 | #Join the two string elements of the list with a '_' in the middle 63 | inputs['sequence'] = '_'.join(sequence) 64 | inputs["subset"] = 'train' 65 | inputs["aff_annotation"], inputs["EP100_annotation"], inputs['VISOR_annotation'] = self.EP100_and_VISOR_reader.affordance_hotspot(inputs["filename"], inputs['subset'], inputs['sequence']) 66 | inputs["exists_affordance"] = self.check_exits_affordance(inputs["aff_annotation"]) 67 | return inputs 68 | 69 | def check_exits_affordance(self, aff_annotation): 70 | if aff_annotation is not None: #We have an annotation on EP100 71 | if len(aff_annotation['interacting_objects']) > 0: #The IoU is above the threshold 72 | return True 73 | return False 74 | 75 | def get_color(self, filename): 76 | img = cv2.imread(filename) 77 | return img 78 | 79 | def get_mask(self, filename): 80 | mask = cv2.imread(filename.replace('sampled_rgb', 'sampled_masks').replace('.jpg', '.png'), cv2.IMREAD_GRAYSCALE) 81 | return mask 82 | 83 | 84 | def get_colormap(self, N=256, normalized = False): 85 | def bitget(byteval, idx): 86 | return ((byteval & (1 << idx)) != 0) 87 | 88 | dtype = 'float32' if normalized else 'uint8' 89 | cmap = np.zeros((N, 3), dtype=dtype) 90 | for i in range(N): 91 | r = g = b = 0 92 | c = i 93 | for j in range(8): 94 | r = r | (bitget(c, 0) << 7-j) 95 | g = g | (bitget(c, 1) << 7-j) 96 | b = b | (bitget(c, 2) << 7-j) 97 | c = c >> 3 98 | cmap[i] = np.array([r, g, b]) 99 | 100 | cmap = cmap/255 if normalized else cmap 101 | cmap_dict = {} 102 | for i in range(N): 103 | cmap_dict[i] = [cmap[i,0], cmap[i,1], cmap[i, 2]] 104 | return cmap_dict 105 | 106 | -------------------------------------------------------------------------------- /read_cameras_colmap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import matplotlib.pyplot as plt 4 | import collections 5 | import open3d as o3d 6 | 7 | Camera = collections.namedtuple( 8 | "Camera", ["id", "model", "width", "height", "params"]) 9 | 10 | def qvec2rotmat(qvec): 11 | return np.array([ 12 | [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2, 13 | 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], 14 | 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]], 15 | [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], 16 | 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2, 17 | 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]], 18 | [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], 19 | 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], 20 | 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]]) 21 | 22 | def read_cameras_text(path): 23 | """ 24 | see: src/base/reconstruction.cc 25 | void Reconstruction::WriteCamerasText(const std::string& path) 26 | void Reconstruction::ReadCamerasText(const std::string& path) 27 | """ 28 | cameras = {} 29 | with open(path, "r") as fid: 30 | while True: 31 | line = fid.readline() 32 | if not line: 33 | break 34 | line = line.strip() 35 | if len(line) > 0 and line[0] != "#": 36 | elems = line.split() 37 | camera_id = int(elems[0]) 38 | model = elems[1] 39 | width = int(elems[2]) 40 | height = int(elems[3]) 41 | params = np.array(tuple(map(float, elems[4:]))) 42 | cameras[camera_id] = Camera(id=camera_id, model=model, 43 | width=width, height=height, 44 | params=params) 45 | return cameras 46 | 47 | def read_images_txt(images_path): 48 | if not os.path.exists(images_path): 49 | raise Exception(f"No such file : {images_path}") 50 | 51 | with open(images_path, 'r') as f: 52 | lines = f.readlines() 53 | 54 | if len(lines) < 2: 55 | raise Exception(f"Invalid cameras.txt file : {images_path}") 56 | 57 | comments = lines[:4] 58 | contents = lines[4:] 59 | 60 | img_ids = [] 61 | img_names = [] 62 | t_poses = [] 63 | R_poses = [] 64 | 65 | 66 | for img_idx, content in enumerate(contents[::2]): 67 | content_items = content.split(' ') 68 | img_id = content_items[0] 69 | q_wxyz = np.array(content_items[1:5], dtype=np.float32) # colmap uses wxyz 70 | t_xyz = np.array(content_items[5:8], dtype=np.float32) 71 | #Transform a quaternion into a rotation matrix following Hamilton convention 72 | R = qvec2rotmat(q_wxyz) 73 | t = -R.T @ t_xyz 74 | R = R.T 75 | img_name = content_items[9] 76 | 77 | img_ids.append(img_id) 78 | img_names.append(img_name) 79 | t_poses.append(t) 80 | R_poses.append(R) 81 | 82 | return img_ids, img_names, t_poses, R_poses 83 | 84 | def plot_cameras_colmap(img_names, R_poses, t_poses): 85 | fig = plt.figure() 86 | ax = fig.add_subplot(111, projection='3d') 87 | for i in range(len(img_names)): 88 | T = np.column_stack((R_poses[i], t_poses[i])) 89 | T = np.vstack((T, (0, 0, 0, 1))) 90 | cam_pos = T[:3, 3] 91 | ax.scatter(cam_pos[0], cam_pos[1], cam_pos[2], c='r', marker='o') 92 | #Add a text in each point of the scatter plot 93 | ax.text(cam_pos[0], cam_pos[1], cam_pos[2], str(i), size=10, zorder=1, color='k') 94 | plt.show() 95 | 96 | 97 | 98 | camera = read_cameras_text('/home/lmur/Documents/Monodepth/sequences/P02_101_colmap/cameras.txt') 99 | cam = camera[1] 100 | 101 | if cam.model in ("PINHOLE", "OPENCV", "OPENCV_FISHEYE", "FULL_OPENCV"): 102 | fx = cam.params[0] 103 | fy = cam.params[1] 104 | cx = cam.params[2] 105 | cy = cam.params[3] 106 | 107 | # intrinsics 108 | K_int = np.identity(3) 109 | K_int[0, 0] = fx 110 | K_int[1, 1] = fy 111 | K_int[0, 2] = cx 112 | K_int[1, 2] = cy 113 | K_inv = np.linalg.inv(K_int) 114 | 115 | img_ids, img_names, t_poses, R_poses = read_images_txt('.../P02_101_part_1/sparse/images.txt') 116 | plot_cameras_colmap(img_names, R_poses, t_poses) 117 | 118 | """ 119 | visor = o3d.visualization.Visualizer() 120 | visor.create_window() 121 | for i in range(len(img_names)): 122 | T = np.column_stack((R_poses[i], t_poses[i])) 123 | T = np.vstack((T, (0, 0, 0, 1))) 124 | axis = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5) 125 | axis.transform(T) 126 | visor.add_geometry(axis) 127 | visor.poll_events() 128 | visor.update_renderer() 129 | visor.run() 130 | """ 131 | 132 | 133 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | import os 4 | from PIL import Image 5 | from scipy.stats import multivariate_normal 6 | import time 7 | import cv2 8 | from utils.valid_interactions import colormap_interactions 9 | #Create a dataset class to load an image and its corresponding pickle file 10 | 11 | class Ego_Metric_training_dataset(): 12 | def __init__(self, Ego_Metric_dataset_path): 13 | self.main_dir = Ego_Metric_dataset_path 14 | self.samples_txt = os.path.join(self.main_dir, 'samples.txt') 15 | self.img_dir = 'selected_plus_guided_rgb' 16 | self.label_2d = '2d_output_labels' 17 | self.label_3d = 'aff_on_3d' 18 | self.valid_verbs = ['take', 'remove', 'put', 'insert', 'throw', 'wash', 'dry', 'open', 'turn-on', 19 | 'close', 'turn-off', 'mix', 'fill', 'add', 'cut', 'peel', 'empty', 20 | 'shake', 'squeeze', 'press', 'cook', 'move', 'adjust', 'eat', 21 | 'drink', 'apply', 'sprinkle', 'fold', 'sort', 'clean', 'slice', 'pick'] 22 | self.height = 480 23 | self.width = 854 24 | self.size = 500 25 | self.samples = self.obtain_samples() 26 | self.pos = self.get_pos_for_gaussian() 27 | self.gaussian = self.get_gaussian() 28 | self.colormap_interactions = colormap_interactions 29 | 30 | def obtain_samples(self): 31 | samples = [] 32 | for kitchen in os.listdir(self.main_dir): 33 | if kitchen != 'samples.txt': 34 | if not os.path.exists(os.path.join(self.main_dir, kitchen, self.label_2d)): 35 | continue 36 | for sample in os.listdir(os.path.join(self.main_dir, kitchen, self.label_2d)): 37 | sample_id = sample.split('.')[0] 38 | samples.append(kitchen + '/' + sample_id) 39 | return samples 40 | 41 | def __len__(self): 42 | return len(self.samples) 43 | 44 | def get_pos_for_gaussian(self): 45 | x, y = np.mgrid[0:self.width:1, 0:self.height:1] 46 | pos = np.empty(x.shape + (2,)) 47 | pos[:, :, 0] = x 48 | pos[:, :, 1] = y 49 | return pos 50 | 51 | def get_gaussian(self): 52 | x, y = np.mgrid[0:self.size:1, 0:self.size:1] 53 | pos = np.empty(x.shape + (2,)) 54 | pos[:, :, 0] = x 55 | pos[:, :, 1] = y 56 | gaussian = multivariate_normal(mean=[self.size//2, self.size//2], cov=np.eye(2)*1000) 57 | return gaussian.pdf(pos) 58 | 59 | def get_masks_from_pickle(self, data): 60 | #Cluster the interactions 61 | interaction_clusters = [] 62 | interaction_coordinates = {} 63 | verbs_data = data['verbs'] 64 | points_data = data['points'] 65 | for i in range(len(verbs_data)): #Before good_interactions 66 | if verbs_data[i] not in interaction_clusters: 67 | interaction_clusters.append(verbs_data[i]) 68 | for i in range(len(interaction_clusters)): 69 | interaction_coordinates[interaction_clusters[i]] = [] 70 | for i in range(len(verbs_data)): 71 | interaction_coordinates[verbs_data[i]].append(points_data[i]) 72 | 73 | #Draw the hotspots of the clusters 74 | c = 0 75 | masks = np.zeros((len(self.valid_verbs), self.height, self.width)) 76 | for verb_class in self.valid_verbs: 77 | if verb_class in interaction_coordinates.keys(): 78 | prob_sum = np.zeros((self.width, self.height)) 79 | for j in range(len(interaction_coordinates[verb_class])): 80 | point = interaction_coordinates[verb_class][j][0:2].astype(int) 81 | prob = np.zeros((self.width, self.height)) 82 | 83 | if (self.width - point[0]) > self.size // 2: 84 | gauss_right = self.size 85 | prob_right = point[0] + self.size // 2 86 | else: 87 | gauss_right = self.width - point[0] + self.size // 2 88 | prob_right = self.width 89 | if point[0] > self.size // 2: 90 | gauss_left = 0 91 | prob_left = point[0] - self.size // 2 92 | else: 93 | gauss_left = self.size // 2 - point[0] 94 | prob_left = 0 95 | if (self.height - point[1]) > self.size // 2: 96 | gauss_bottom = self.size 97 | prob_bottom = point[1] + self.size // 2 98 | else: 99 | gauss_bottom = self.height - point[1] + self.size // 2 100 | prob_bottom = self.height 101 | if point[1] > self.size // 2: 102 | gauss_top = 0 103 | prob_top = point[1] - self.size // 2 104 | else: 105 | gauss_top = self.size // 2 - point[1] 106 | prob_top = 0 107 | prob[int(prob_left):int(prob_right),int(prob_top):int(prob_bottom)] = self.gaussian[int(gauss_left):int(gauss_right),int(gauss_top):int(gauss_bottom)] 108 | prob_sum += prob 109 | 110 | prob_sum = (prob_sum / np.max(prob_sum)).T 111 | prob_sum[prob_sum < 0.25] = 0 #If prob_sum < 0.5, set it to 0 112 | prob_sum[prob_sum >= 0.25] = 1 #If prob_sum >= 0.5, set it to 1 113 | masks[c, :, :] = prob_sum 114 | c += 1 115 | return masks 116 | 117 | def visualize(self, img, masks, selected_verb): 118 | img_copy = img.copy() 119 | selected_verb_idx = self.valid_verbs.index(selected_verb) 120 | selected_mask = masks[selected_verb_idx, :, :] 121 | selected_mask_2 = selected_mask[:, :, np.newaxis].astype(np.uint8) 122 | color = np.array(self.colormap_interactions[selected_verb]).reshape(1, 3) 123 | prob_paint = (selected_mask_2 @ color).astype(np.uint8) 124 | img_copy = cv2.addWeighted(img_copy, 1.0, prob_paint, 1.0, 0) 125 | cv2.imwrite(os.path.join('/home/lmur/Desktop/EGO_METRIC_Dataset_v3/Kitchens/P04_EPIC_55/show/img.png'), img_copy) 126 | 127 | 128 | def __getitem__(self, idx): 129 | kitchen, sample_id = self.samples[idx].split('/') 130 | #Load the image 131 | img_path = os.path.join(self.main_dir, kitchen, self.img_dir, sample_id + '.jpg') 132 | img = cv2.imread(img_path) 133 | #Load the labels 134 | label_2d_path = os.path.join(self.main_dir, kitchen, self.label_2d, sample_id + '.pkl') 135 | with open(label_2d_path, 'rb') as f: 136 | data_2d = pickle.load(f) 137 | masks = self.get_masks_from_pickle(data_2d) 138 | return img, masks 139 | 140 | data = Ego_Metric_training_dataset('...') 141 | img, masks = data[15] 142 | #data.visualize(img, masks, 'cut') 143 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Epic-Aff Dataset 2 | 3 | This is the dataset introduced on the ICCV 2023 conference paper **Multi-label affordance mapping from egocentric vision** 🎉🎉, by Lorenzo Mur-Labadia, Ruben Martinez-Cantin and Josechu Guerrero Campo from the University of Zaragoza. 4 | Please, do not hesitate to ask any question on the following mail *lmur@unizar.es* ✉️ 5 | 6 | ## Dataset creation: automatic annotations 7 | 8 | The EPIC-Aff dataset is a new dataset build on the Epic Kitchens 100 and Epic Kitchens VISOR, containing **automatic annotations with multi-label segmentation masks for the interaction hotspots**, generated by the intersection of both datasets. We provide **38,335** images in two different versions of the dataset (easy-EPIC Aff with 20 classes and complex-EPIC Aff with 50 classes). The annotations represent the hotspots in the space with an affordable action, extracted from the past interactions performed on that region and the actual scene context (present objects). Please, refer to the paper for more information 9 | 10 | The total size of the dataset is 15 GB, which we have divided in the different data type. We also provide a example sequence on the PO3_EPIC_100_Example. The full dataset can be downloaded [here](https://zenodo.org/record/8162678) 11 | 12 | -**Images** 📸 : we already provide the images extracted from the videos of EPIC-100 Kitchens in 480x854 of resolution. This avoids download the approximate 700 GB of that dense dataset. [link](https://zenodo.org/record/8162678/files/EPIC_Aff_images.zip?download=1) 13 | 14 | -**Annotations in 3D** 📝 : in a pickle format, we provide a dictionary with the Colmap data (camera pose, camera intrinsics and keypoints), the distribution of the interacting objects, the annotation of the interaction and the distribution of the neutral objects. We encourage to the research community to use this data to develop new tasks like goal path planning. [link](https://zenodo.org/record/8162678/files/EPIC_Aff_3D_output.zip?download=1) 15 | 16 | -**Affordance annotations in the 2D** 📝: we already run the project_from_3D_to_2D.py for all the sequences in order to provide a pickle dictionary with the location of the interaction points for the afforded-actions. We provide two versions of the dataset: 17 | - Easy EPIC-Aff (20 classes): [link](https://zenodo.org/record/8162678/files/EPIC_Aff_20_classes_2d_output_labels.zip?download=1) 18 | - Complex EPIC-Aff (50 classes): [link](https://zenodo.org/record/8162678/files/EPIC_Aff_50_classes_2d_output_labels.zip?download=1) 19 | 20 | -**VISORs masks** 🎭: the semantic mask wit the active objets, which we consider dynamic. In order to obtain the dynamic masks for COLMAP, we select the dynamic and static objects. [link](https://zenodo.org/record/8162678/files/EPIC_Aff_masks_from_VISOR.zip?download=1) 21 | 22 | We detail the procedure for extracting multi-label affordance regions. 23 | 24 | ### 1. Detect the spatial localization of the interaction 25 | 26 | On one hand, we use the narration annotations of the Epic Kitchens 100 to obtain the semantics of the interaction (e.g "cut onion"). Then, we use the masks provided by EPIC VISOR to discover the location of that interaction, placed in the center of the intersection between the respective hand/glove and the interacting object. This provides an understanding about where the interaction occurs at that time step. 27 | 28 |

29 | 30 | 31 | 32 |

33 |

34 | 35 | 36 | 37 |

38 | 39 | ### 2. Leverage all to the 3D 40 | 41 | In a second stage, using Structure from Motion algorithms (COLMAP), we get the camera pose and the global localization of the interaction in the 3D space. This creates a historical distribution of all the taken actions in that environment, cross-linking along different episodes. In the following images, we show in blue the different camera poses, in grey the Colmap keypoints and the different locations where the interactions occur. For each specific physical kitchen, we accumulated all the EPIC videos where the agent interacted. Note that for some sequences, the EPIC-50 and EPIC-100 was different, while in other it was the same environment. 42 | 43 |

44 | 45 | 46 |

47 | 48 | This created a 3D representation with all the past interactions performed in that environment. 49 | 50 | ### 3. Reproject the 3D to the 2D to obtain the affordances. 51 | 52 | Using the camera intrinsic matrix and the camera pose provided in the "3D_output" directories, we reproject all the past interactions by running *"project_from_3D_to_2D.py"*. Since the affordances are all the possible actions for the agent depending on the context, we filter the past interactions by the current distribution of the objects in each time-step. For that, we use the VISOR annotations for the active objets and we assume a constant distribution of passive objcts (cupboard, oven, hob, fridge) since its distribution did not change with time. For example, although the VISOR annotation does not detect any "active cupboard", if we have opened a cupboard in the past in that location, it means that there is a cupboard innactive. Therefore, we should detect that past interaction as a affordance, since it is a possible action associated to that 3D region. 53 | 54 | We show some images of different affordances. Each point represents the location of a past interaction whose interacting objects are present. 55 | 56 |

57 | 58 | 59 |

60 | 61 | Finally, we apply a Gaussian heatmap for each afforded actions in order to create a potential interaction region. We show respectively: takeable, insertable, cuttable and driable. Note that in inference, we assume a possitive affordance label when gaussian heat map is greater than 0.25. 62 | 63 |

64 | 65 | 66 | 67 | 68 |

69 | 70 | *Note*: the files in the *2D_output_labels* directories only contain the pixel points with the affordances and its semantic labels. When you run data.py, in the dataloader we incorporate a function to obtain the Gaussian heatmaps in an efficient way. This avoids to load the *N* masks. 71 | 72 | ## Dataset pipeline 73 | We also share the code for the dataset pipeline extraction, and we encourage the research community to apply in other scenarios. 74 | -------------------------------------------------------------------------------- /inference_v2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import data_egom 5 | import glob 6 | import matplotlib.pyplot as plt 7 | import open3d as o3d 8 | import pickle 9 | import cv2 10 | import time 11 | 12 | 13 | class Inference: 14 | def __init__(self): 15 | self.height = 480 16 | self.width = 854 17 | self.frame_idxs = [0] 18 | self.data_path = '...' 19 | self.kitchen = 'P03_EPIC_100' 20 | 21 | self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") 22 | self.dataset = data_egom.VideoSequentialDataset(self.data_path, self.kitchen, self.height, self.width, self.frame_idxs) 23 | self.palette = self.dataset.colors 24 | self.output_dir = os.path.join(self.data_path, self.kitchen, '3D_output') 25 | if not os.path.exists(self.output_dir): 26 | os.mkdir(self.output_dir) 27 | self.output_dir_2d = os.path.join(self.data_path, self.kitchen, 'aff_on_2d') 28 | if not os.path.exists(self.output_dir_2d): 29 | os.mkdir(self.output_dir_2d) 30 | 31 | self.alpha = 0.6 32 | self.depth_model_type = "DPT_Hybrid" #"DPT_Large" 33 | self.depth_model = torch.hub.load("intel-isl/MiDaS", self.depth_model_type) 34 | self.depth_model.to(self.device) 35 | self.depth_model.eval() 36 | self.depth_transforms = torch.hub.load("intel-isl/MiDaS", "transforms").dpt_transform 37 | 38 | 39 | def depth_extractor(self, img, filename): 40 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 41 | input_batch = self.depth_transforms(img).to(self.device) 42 | with torch.no_grad(): 43 | prediction = self.depth_model(input_batch) 44 | prediction = torch.nn.functional.interpolate( 45 | prediction.unsqueeze(1), 46 | size=img.shape[:2], 47 | mode="bicubic", 48 | align_corners=False,).squeeze() 49 | disparity = prediction.cpu().numpy() 50 | depth = 1 / disparity 51 | return depth 52 | 53 | def paint_affordance_hotpots(self, points, label): #SIMPLIFY THIS FUNCTION 54 | mask = np.zeros((self.height, self.width)) 55 | aff = label 56 | aff_center = np.array(aff['affordance_center']) 57 | cv2.circle(mask, (int(aff_center[0]), int(aff_center[1])), 0, 1, -1) 58 | mask = mask.astype(bool) 59 | points = points[mask] 60 | 61 | painting_color = np.array(self.palette[label['verb_id']]) 62 | img = (np.ones((self.height, self.width, 3)) * painting_color)[mask] 63 | return points, img 64 | 65 | def obtain_rgbd(self, depth, scale): 66 | z = depth * scale 67 | x = (np.tile(np.arange(self.width), (self.height, 1)) - self.dataset.cx) * z / self.dataset.fx 68 | y = (np.tile(np.arange(self.height), (self.width, 1)).T - self.dataset.cy) * z / self.dataset.fy 69 | points = np.stack([x, y, z], axis=2) #h, w, 3 70 | return points 71 | 72 | def new_scale_SfM_depth(self, depth, colmap_depths, colmap_coords): 73 | SfM_depth, NN_depth = [], [] 74 | for kypt in range(len(colmap_coords)): 75 | SfM_depth.append(colmap_depths[kypt]) #Interpretation 1 of the depth: La distancia entre el plano de la camara y el plano paralelo que corta el punto en 3D 76 | # Change order in coords, from XY to YX!!! 77 | u_interp = colmap_coords[kypt, 1] % 1 78 | v_interp = colmap_coords[kypt, 0] % 1 79 | u = int(colmap_coords[kypt, 1]) 80 | v = int(colmap_coords[kypt, 0]) 81 | if u < self.width - 1 and v < self.height - 1: 82 | interpolated_NN_depth = (1 - u_interp) * (1 - v_interp) * depth[v, u] + u_interp * (1 - v_interp) * depth[v, u + 1] + (1 - u_interp) * v_interp * depth[v + 1, u] + u_interp * v_interp * depth[v + 1, u + 1] 83 | NN_depth.append(interpolated_NN_depth) 84 | if u > self.width: 85 | print('alerta 1 !!!', u) 86 | if v > self.height: 87 | print('alerta 2 !!!', v) 88 | local_scale = np.median(np.array(SfM_depth)) / np.median(np.array(NN_depth)) 89 | return local_scale 90 | 91 | def image_to_extrinsics(self, img): 92 | Rc, tc = img.qvec2rotmat(), img.tvec 93 | t = -Rc.T @ tc 94 | R = Rc.T 95 | return R, t 96 | 97 | def paint_aff_on_2D(self, img, frame_dict, keypoints): 98 | label = frame_dict["aff_annotation"] 99 | img_name = frame_dict['filename'] 100 | ep100_label = frame_dict['EP100_annotation'] 101 | visors_objects = frame_dict['VISOR_annotation'] 102 | font = cv2.FONT_HERSHEY_PLAIN 103 | c = 0 104 | if label is not None: 105 | for h in range(len(label['hands'])): 106 | hand_bbox = label['hands'][h]['hand_bbox'] 107 | cv2.rectangle(img, (hand_bbox[0], hand_bbox[1]), (hand_bbox[2], hand_bbox[3]), color=(0, 255, 0), thickness=2) 108 | for o in range(len(label['neutral_objects'])): 109 | obj_bbox = label['neutral_objects'][o]['noun_bbox'] 110 | cv2.rectangle(img, (obj_bbox[0], obj_bbox[1]), (obj_bbox[2], obj_bbox[3]), color=(0, 255, 255), thickness=2) 111 | for aff_o in range(len(label['interacting_objects'])): 112 | obj_bbox = label['interacting_objects'][aff_o]['noun_bbox'] 113 | cv2.rectangle(img, (obj_bbox[0], obj_bbox[1]), (obj_bbox[2], obj_bbox[3]), color=(255, 0, 0), thickness=5) 114 | x_center, y_center = label['interacting_objects'][aff_o]['affordance_center'] 115 | cv2.circle(img, (int(x_center), int(y_center)), radius=10, color=(255, 0, 0), thickness=15) 116 | text = 'The ' + label['interacting_objects'][aff_o]['hand'] + ' is ' + label['interacting_objects'][aff_o]['verb'] + ' the ' + label['interacting_objects'][aff_o]['noun'] 117 | cv2.putText(img, text, (10, 30 * (c + 1)), font, 1.5, (0, 255, 0), 2, cv2.LINE_AA) 118 | c += 1 119 | if keypoints is not None: 120 | for kp in range(len(keypoints)): 121 | cv2.circle(img, (int(keypoints[kp, 1]), int(keypoints[kp, 0])), radius=1, color=(255, 255, 255), thickness=1) 122 | if ep100_label is not None: 123 | for i in range(len(ep100_label)): 124 | text = 'EP100 origi is: ' + ep100_label[i]['non_remapped_noun'] + ' remapped: ' + ep100_label[i]['noun'] + ' The verb ' + ep100_label[i]['verb'] 125 | cv2.putText(img, text, (10, 100 + i*20), font, 1.5, (255,0,0), 2, cv2.LINE_AA) 126 | for i in range(len(visors_objects)): 127 | cv2.putText(img, 'Object given by VISOR ' + visors_objects[i], (10, 110 + 30 * (i + 1)), font, 1.5, (0,0,255), 2, cv2.LINE_AA) 128 | cv2.imwrite(os.path.join(self.output_dir_2d, img_name), img) 129 | 130 | 131 | def run(self): 132 | all_abs_depth, all_abs_colors, cameras, all_keypoints, all_rgb_keypoints = [], [], [], [], [] 133 | global_counter = 0 134 | for i in range(len(self.dataset)): 135 | output_all = {} 136 | frame_dict = self.dataset[i] 137 | 138 | print('---------We are analying the frame', i, '--------- corresponding to the image', frame_dict['filename'], '---------') 139 | try: 140 | v = next(v for v in self.dataset.imgs_Colmap.values() if v.name == frame_dict['filename']) 141 | except: 142 | if frame_dict['exists_affordance']: 143 | global_counter += 1 144 | print('We lost the camera pose for this image') 145 | continue 146 | 147 | R, t = self.image_to_extrinsics(v) #Location of camera with respect to the world 148 | cameras.append(t) 149 | label_t = frame_dict["aff_annotation"] 150 | colmap_coords = None 151 | output_all['EGOMETRIC_label'] = {'affordance_labels': []} 152 | if frame_dict['exists_affordance']: 153 | colmap_depths = np.array([(v.qvec2rotmat() @ self.dataset.pts_Colmap[p3d].xyz + v.tvec)[2] for p3d in v.point3D_ids[v.point3D_ids > -1]]) #WE PASS TO CAMERA COORDINATES 154 | colmap_coords = np.array([v.xys[np.where(v.point3D_ids == p3d)][0, ::-1] for p3d in v.point3D_ids[v.point3D_ids > -1]]) #Depth of the keypoints in the camera coordinates 155 | colmap_keypoints = np.array([self.dataset.pts_Colmap[p3d].xyz for p3d in v.point3D_ids[v.point3D_ids > -1]]) #Absolute coordinates 156 | colmap_rgb = np.array([self.dataset.pts_Colmap[p3d].rgb for p3d in v.point3D_ids[v.point3D_ids > -1]]) #Absolute coordinates 157 | colmap_rgb = self.alpha * colmap_rgb + (1 - self.alpha) * 255 158 | all_keypoints.append(colmap_keypoints) 159 | all_rgb_keypoints.append(colmap_rgb) 160 | depth = self.depth_extractor(frame_dict[('color', 0)], frame_dict['filename']) #Depth map in image coordinates (Relative!!) 161 | local_scale = self.new_scale_SfM_depth(depth, colmap_depths, colmap_coords) 162 | rescaled_rgbd = self.obtain_rgbd(depth, local_scale) 163 | 164 | for aff in range(len(label_t['interacting_objects'])): 165 | rel_points, rel_colors = self.paint_affordance_hotpots(rescaled_rgbd, label_t['interacting_objects'][aff]) 166 | abs_points = np.dot(R, rel_points.reshape(-1, 3).T).T + t 167 | abs_colors = np.reshape(rel_colors, (-1, 3)) 168 | #abs_points = np.concatenate((abs_points, abs_points + np.random.randn(20, 3) * 0.1), axis=0) 169 | #abs_colors = np.concatenate((abs_colors, abs_colors + np.random.randn(20, 3) * 0), axis=0) 170 | all_abs_depth.append(abs_points) 171 | all_abs_colors.append(abs_colors) 172 | dict_aff = {'3D_aff_points': abs_points, 173 | '3D_aff_colors': abs_colors, 174 | 'aff_noun': label_t['interacting_objects'][aff]['noun'], 175 | 'aff_noun_id': label_t['interacting_objects'][aff]['noun_id'], 176 | 'aff_verb': label_t['interacting_objects'][aff]['verb'], 177 | 'aff_verb_id': label_t['interacting_objects'][aff]['verb_id']} 178 | output_all['EGOMETRIC_label']['affordance_labels'].append(dict_aff) 179 | #output_all['EGOMETRIC_label'][aff]['aff_' + str(aff)] = abs_points 180 | #output_all['EGOMETRIC_label'][aff]['aff_rgb_' + str(aff)] = abs_colors 181 | #output_all['EGOMETRIC_label'][aff]['aff_noun_' + str(aff)] = label_t['interacting_objects'][aff]['noun'] 182 | #output_all['EGOMETRIC_label'][aff]['aff_noun_id_' + str(aff)] = label_t['interacting_objects'][aff]['noun_id'] 183 | #output_all['EGOMETRIC_label'][aff]['aff_verb_' + str(aff)] = label_t['interacting_objects'][aff]['verb'] 184 | #output_all['EGOMETRIC_label'][aff]['aff_verb_id_' + str(aff)] = label_t['interacting_objects'][aff]['verb_id'] 185 | #self.paint_aff_on_2D(frame_dict[('color', 0)], frame_dict, colmap_coords) 186 | output_all['colmap'] = {} 187 | output_all['colmap']['keypoints_3D'] = colmap_keypoints 188 | output_all['colmap']['keypoints_rgb'] = colmap_rgb 189 | output_all['colmap']['keypoints_2D'] = colmap_coords 190 | output_all['colmap']['R_pos'] = R #Rotation matrix of the camera 191 | output_all['colmap']['t_pos'] = t #Translation vector of the camera 192 | output_all['VISOR'] = {} 193 | if label_t is not None: 194 | output_all['VISOR']['neutral_objects'] = label_t['neutral_objects'] 195 | output_all['VISOR']['hands'] = label_t['hands'] 196 | output_all['VISOR']['interacting_objects'] = label_t['interacting_objects'] 197 | output_all['EPIC_100'] = frame_dict['EP100_annotation'] 198 | output_all['filename'] = frame_dict['filename'] #Name of the image 199 | output_all['sequence'] = frame_dict['sequence'] #Name of the sequence 200 | 201 | #Save the output_all in a json file 202 | output_filename = os.path.join(self.output_dir, 'affordances_' + output_all['filename'].split('.')[0] +'.pkl') 203 | #With pickle 204 | with open(output_filename, 'wb') as f: 205 | pickle.dump(output_all, f) 206 | 207 | 208 | #Plot the camera pose and the sparse point cloud with Matplotlib 209 | cameras = np.array(cameras) 210 | keypoints = np.concatenate(all_keypoints, axis=0) 211 | abs_depth = np.concatenate(all_abs_depth, axis=0) 212 | abs_colors = np.concatenate(all_abs_colors, axis=0) 213 | rgb_keypoints = np.concatenate(all_rgb_keypoints, axis=0) 214 | print(keypoints.shape, abs_depth.shape, abs_colors.shape, rgb_keypoints.shape) 215 | 216 | #fig = plt.figure() 217 | #ax = fig.add_subplot(projection='3d') 218 | #ax.scatter(keypoints[:, 0], keypoints[:, 1], keypoints[:, 2], c='r') 219 | #ax.scatter(cameras[:, 0], cameras[:, 1], cameras[:, 2], c='b') 220 | #ax.scatter(origin[0], origin[1], origin[2], c='k') 221 | #plt.show() 222 | 223 | #Plot the camera pose and the sparse point cloud with Open3D 224 | pcd_plot = o3d.geometry.PointCloud() 225 | #Draw these points bigger 226 | pcd_plot.points = o3d.utility.Vector3dVector(abs_depth) 227 | pcd_plot.colors = o3d.utility.Vector3dVector(abs_colors / 255.0) 228 | cameras_plot = o3d.geometry.PointCloud() 229 | cameras_plot.points = o3d.utility.Vector3dVector(cameras) 230 | cameras_plot.colors = o3d.utility.Vector3dVector(np.array([[0, 0, 1] for i in range(cameras.shape[0])])) 231 | keypoints_plot = o3d.geometry.PointCloud() 232 | keypoints_plot.points = o3d.utility.Vector3dVector(keypoints) 233 | keypoints_plot.colors = o3d.utility.Vector3dVector(rgb_keypoints / 255.0) 234 | o3d.visualization.draw_geometries([cameras_plot, pcd_plot, keypoints_plot], height = 800, width = 1200) 235 | 236 | 237 | 238 | inf = Inference() 239 | data = inf.run() 240 | 241 | 242 | 243 | 244 | -------------------------------------------------------------------------------- /utils_read_annotations.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | import ijson 5 | import json 6 | import matplotlib.pyplot as plt 7 | import pandas as pd 8 | import time 9 | 10 | 11 | class EP100_and_VISOR_annotations(): 12 | def __init__(self, VISOR_path, img_dir, kitchen): 13 | #Visor dataset 14 | self.VISOR_json_dir_dense = os.path.join(VISOR_path, 'Interpolations-DenseAnnotations', 'train') 15 | self.VISOR_json_dir_sparse = os.path.join(VISOR_path, 'GroundTruth-SparseAnnotations', 'annotations', 'train') 16 | self.kitchen = kitchen.split('_')[0] 17 | 18 | self.all_dense_VISOR_jsons = {} 19 | self.all_sparse_VISOR_jsons = {} 20 | for root, dirs, files in os.walk(self.VISOR_json_dir_dense): 21 | for file in files: 22 | kitchen_name = file.split('_')[0] 23 | sequence_name = file.split('_')[1] 24 | if kitchen_name == self.kitchen and file.endswith('.json'): 25 | dense_file = os.path.join(self.VISOR_json_dir_dense, file) 26 | sparse_file = os.path.join(self.VISOR_json_dir_sparse, file)[:-20] + '.json' 27 | self.all_sparse_VISOR_jsons[kitchen_name + '_' + sequence_name] = sparse_file 28 | self.all_dense_VISOR_jsons[kitchen_name + '_' + sequence_name] = dense_file 29 | print(self.all_sparse_VISOR_jsons) 30 | self.hands = ['left hand', 'hand:left', 'right hand', 'hand:right'] 31 | 32 | #Read the EPIC-Kitchen 100 narration 33 | self.EPIC_100_pkl = os.path.join(VISOR_path, 'EPIC_100_train.pkl') 34 | self.EPIC_100_narration = pd.read_pickle(self.EPIC_100_pkl) 35 | 36 | #Dictionary to remap the VISOR and EPIC-100 classes 37 | self.EPIC_100_nouns = os.path.join(VISOR_path, 'EPIC_100_noun_classes_v2.csv') 38 | self.EPIC_100_nouns = pd.read_csv(self.EPIC_100_nouns) 39 | 40 | #Directory with the sampled images, which we have their colmap poses 41 | self.img_dir = img_dir 42 | 43 | def affordance_hotspot(self, img_name, subset, sequence): 44 | #Output dictionary with the bounding boxes of the interacting hands and objects 45 | output = {'neutral_objects': [], 'interacting_objects': [], 'hands': []} 46 | VISOR_active_objects_list = [] 47 | frame_id = int(img_name.split('.')[0].split('_')[-1]) 48 | EP100_narration_list = self.read_EPIC_100_annot(frame_id, sequence) 49 | if EP100_narration_list is not None: #If there is a narration for the frame 50 | print('que hacen leer', sequence, img_name) 51 | VISOR_active_objects, divisor = self.read_VISOR_annot(img_name, subset, sequence) #Read the VISOR annotations 52 | if VISOR_active_objects is not None: #If there is a VISOR annotation for the frame 53 | for narration in range(len(EP100_narration_list)): #We can have multiple narrations for the same frame 54 | EP100_narration = EP100_narration_list[narration] #Read the EPIC-100 narration 55 | for e_idx, entity in enumerate(VISOR_active_objects): #Read the VISOR annotations 56 | VISOR_active_objects_list.append(entity['name']) #To show later the active objects in the image 57 | if entity['name'] in self.hands: 58 | hand_bbox = self.get_bbox_from_segment(entity['segments']) #Add the bounding box of the hand 59 | output['hands'].append({'hand': entity['name'], 'hand_bbox': tuple([int(item / divisor) for item in hand_bbox])}) 60 | for e_idx2, entity_2 in enumerate(VISOR_active_objects): #VISOR annotations are 'name', but when we remapp them we call 'noun', as well as with EP100 61 | entity_2_name = self.remap_VISOR_annot(entity_2)['noun'] 62 | if entity_2_name in self.hands: 63 | continue 64 | elif entity_2_name in EP100_narration['noun']: 65 | obj_bbox = self.get_bbox_from_segment(entity_2['segments']) 66 | cond_aff_intersect, aff_bbox = self.get_intersection_bbox(hand_bbox, obj_bbox) 67 | if cond_aff_intersect: 68 | x_center, y_center = self.get_bbox_center(aff_bbox) 69 | output['interacting_objects'].append({'hand': entity['name'], 70 | 'verb': EP100_narration['verb'], 71 | 'verb_id': EP100_narration['verb_id'], 72 | 'noun': entity_2_name, 73 | 'noun_id': EP100_narration['noun_id'], 74 | 'noun_bbox': tuple([int(item / divisor) for item in obj_bbox]), 75 | 'hand_bbox': tuple([int(item / divisor) for item in hand_bbox]), 76 | 'affordance_bbox': tuple([int(item / divisor) for item in aff_bbox]), 77 | 'affordance_center': (int(x_center / divisor), int(y_center / divisor))}) 78 | print('There is an interaction!!!:))') 79 | else: 80 | output['neutral_objects'].append({'noun': entity_2_name, 'noun_bbox': tuple([int(item / divisor) for item in obj_bbox])}) 81 | else: 82 | output['neutral_objects'].append({'noun': entity_2_name, 'noun_bbox': tuple([int(item / divisor) for item in self.get_bbox_from_segment(entity_2['segments'])])}) 83 | #Check that if there is not any interacting objects, we add the interaction in the center of the hand bounding box 84 | #if len(output['interacting_objects']) == 0: 85 | 86 | else: 87 | output = None 88 | return output, EP100_narration_list, VISOR_active_objects_list 89 | 90 | def affordance_hotspot_visual(self, img_name): 91 | #Output dictionary with the bounding boxes of the interacting hands and objects 92 | output = {'neutral_objects': [], 'interacting_objects': [], 'hands': []} 93 | self.img_path = os.path.join(self.img_dir, img_name + '.jpg') 94 | frame_id = int(img_name.split('_')[-1]) 95 | VISOR_active_objects = self.read_VISOR_annot() 96 | EP100_narration_list = self.read_EPIC_100_annot(frame_id, sequence) 97 | self.img_show = cv2.imread(self.img_path) 98 | for narration in EP100_narration_list: 99 | EP100_narration = EP100_narration_list[narration] 100 | for e_idx, entity in enumerate(VISOR_active_objects): 101 | if entity['noun'] in self.hands: 102 | hand_bbox = self.get_bbox_from_segment(entity['segments']) 103 | output['hands'].append({'hand': entity['noun'], 'hand_bbox': hand_bbox}) 104 | cv2.rectangle(self.img_show, (hand_bbox[0], hand_bbox[1]), (hand_bbox[2], hand_bbox[3]), color=(0, 0, 255), thickness=10) 105 | for e_idx2, entity_2 in enumerate(VISOR_active_objects): 106 | entity_2_name = self.remap_VISOR_annot(entity_2)['noun'] 107 | if entity_2_name in self.hands: 108 | continue 109 | elif entity_2_name in EP100_narration['noun']: 110 | obj_bbox = self.get_bbox_from_segment(entity_2['segments']) 111 | cv2.rectangle(self.img_show, (obj_bbox[0], obj_bbox[1]), (obj_bbox[2], obj_bbox[3]), color=(0, 255, 0), thickness=10) 112 | if self.get_intersection_bbox(hand_bbox, obj_bbox)[0]: 113 | aff_bbox = self.get_intersection_bbox(hand_bbox, obj_bbox)[1] 114 | x_center, y_center = self.get_bbox_center(aff_bbox) 115 | output['interacting_objects'].append({'hand': entity['noun'],'verb': EP100_narration['verb'],'object': entity_2_name, 'object_bbox': obj_bbox, 'hand_bbox': hand_bbox,'affordance_bbox': aff_bbox, 'affordance_center': (x_center, y_center)}) 116 | cv2.circle(self.img_show, (int(x_center), int(y_center)), radius=10, color=(255, 0, 0), thickness=15) 117 | else: 118 | output['neutral_objects'].append({'object': entity_2_name, 'object_bbox': obj_bbox}) 119 | else: 120 | obj_bbox = self.get_bbox_from_segment(entity_2['segments']) 121 | output['neutral_objects'].append({'object': entity_2_name, 'object_bbox': obj_bbox}) 122 | cv2.rectangle(self.img_show, (obj_bbox[0], obj_bbox[1]), (obj_bbox[2], obj_bbox[3]), color=(0, 255, 255), thickness=2) 123 | cv2.imwrite('.../affordance_hotspot3.jpg', self.img_show) 124 | sampled_mask = os.path.join('.../dense_masks', self.sequence, img_name + '.png') 125 | sampled_mask = cv2.imread(sampled_mask) 126 | cv2.imwrite('.../active_object_masks3.jpg', sampled_mask) 127 | return output 128 | 129 | 130 | def read_VISOR_annot(self, img_name, subset, sequence): 131 | print('PROBAMOS CON EL SPARSE') 132 | VISOR_filename = self.all_sparse_VISOR_jsons[sequence] 133 | the_annotation = None 134 | with open(VISOR_filename, 'r') as f: 135 | VISOR_annot = ijson.items(f, 'video_annotations.item') 136 | for entity in VISOR_annot: 137 | if entity['image']['name'].split('.')[0] == img_name.split('.')[0]: 138 | the_annotation = entity['annotations'] 139 | divisor = 2.25 140 | break 141 | if the_annotation is None: 142 | print('PROBANMOS CON EL DENSE') 143 | VISOR_filename = self.all_dense_VISOR_jsons[sequence] 144 | with open(VISOR_filename, 'r') as f: 145 | VISOR_annot = ijson.items(f, 'video_annotations.item') 146 | for entity in VISOR_annot: 147 | if entity['image']['name'].split('.')[0] == img_name.split('.')[0]: 148 | the_annotation = entity['annotations'] 149 | divisor = 1 150 | break 151 | return the_annotation, divisor 152 | 153 | def remap_VISOR_annot(self, visor_annot): 154 | visor_noun_class = visor_annot['class_id'] 155 | remapped = self.EPIC_100_nouns[self.EPIC_100_nouns['id'] == visor_noun_class] 156 | full_visor_annot = {'noun_id': remapped['id'].values[0], 157 | 'noun': remapped['key'].values[0], 158 | 'category': remapped['category'].values[0], 159 | 'non_remapped_noun': visor_annot['name']} 160 | return full_visor_annot 161 | 162 | def read_EPIC_100_annot(self, frame_id, sequence): 163 | df = self.EPIC_100_narration 164 | df = df[df['video_id'] == sequence] 165 | df = df.reset_index(drop=True) 166 | EP_100_narration = df[(df['start_frame'] <= frame_id) & (df['stop_frame'] >= frame_id)] 167 | if len(EP_100_narration) == 0: 168 | return None 169 | list_annotations = [] 170 | for i in range(len(EP_100_narration)): 171 | EP_100_narration_noun = EP_100_narration['noun_class'].values[i] 172 | remapped = self.EPIC_100_nouns[self.EPIC_100_nouns['id'] == EP_100_narration_noun] 173 | narration_annot = {'noun_id': remapped['id'].values[0], 174 | 'noun': remapped['key'].values[0], 175 | 'category': remapped['category'].values[0], 176 | 'non_remapped_noun': EP_100_narration['noun'].values[i], 177 | 'verb': EP_100_narration['verb'].values[i], 178 | 'verb_id': EP_100_narration['verb_class'].values[i]} 179 | list_annotations.append(narration_annot) 180 | return list_annotations 181 | 182 | def get_bbox_from_segment(self, annot): 183 | mask_clean = [] 184 | for mask in annot: 185 | if len(mask) == 0: continue 186 | mask = np.array(mask, dtype=np.int32) 187 | mask_clean.append(mask) 188 | bbox = self.get_bbox(mask_clean) 189 | x1, y1, x2, y2 = bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3] 190 | return x1, y1, x2, y2 191 | 192 | def get_bbox(self, masks): 193 | ''' 194 | Get bbox for object masks (1 object may have 1> components). Returns: 195 | bbox: [x, y, height, width] 196 | ''' 197 | g_xmin, g_ymin, g_xmax, g_ymax = 10000, 10000, 0, 0 198 | for mask in masks: 199 | if len(mask) == 0: continue 200 | mask = np.array(mask) 201 | xmin, xmax = np.min(mask[:,0]), np.max(mask[:,0]) 202 | ymin, ymax = np.min(mask[:,1]), np.max(mask[:,1]) 203 | 204 | g_xmin = min(g_xmin, xmin) 205 | g_xmax = max(g_xmax, xmax) 206 | g_ymin = min(g_ymin, ymin) 207 | g_ymax = max(g_ymax, ymax) 208 | 209 | bbox = [int(g_xmin), int(g_ymin), int(g_xmax - g_xmin), int(g_ymax - g_ymin)] 210 | return bbox 211 | 212 | def get_intersection_bbox(self, hand_bbox, obj_bbox): 213 | x1, y1, x2, y2 = hand_bbox 214 | x3, y3, x4, y4 = obj_bbox 215 | x_left = max(x1, x3) 216 | y_top = max(y1, y3) 217 | x_right = min(x2, x4) 218 | y_bottom = min(y2, y4) 219 | intersection_bbox = [x_left, y_top, x_right, y_bottom] 220 | if x_right < x_left or y_bottom < y_top: 221 | return False, None 222 | else: 223 | return True, intersection_bbox 224 | 225 | def get_bbox_center(self, bbox): 226 | x1, y1, x2, y2 = bbox #Get the center of the affordance hotspot 227 | x_center = x1 + (x2 - x1)/2 228 | y_center = y1 + (y2 - y1)/2 229 | return x_center, y_center 230 | -------------------------------------------------------------------------------- /project_from_3D_to_2D.py: -------------------------------------------------------------------------------- 1 | # Description: This script is used to project 3D points to 2D image plane 2 | 3 | import numpy as np 4 | import os 5 | import cv2 6 | import pickle 7 | from read_write_model import read_model 8 | import pandas as pd 9 | import torch 10 | from scipy.stats import multivariate_normal 11 | import time 12 | from utils.valid_interactions import valid_interactions, colormap_interactions 13 | 14 | class Reproject_data(): 15 | def __init__(self): 16 | #Initialize directories 17 | self.verbs_EP100_csv = '.../EPIC_100_verb_classes.csv' 18 | self.verbs_EP100_csv = pd.read_csv(self.verbs_EP100_csv) 19 | self.sequence_dir = '.../P04_EPIC_55' 20 | self.labels_dir = os.path.join(self.sequence_dir, '3D_output') 21 | self.colmap_poses = os.path.join(self.sequence_dir, 'colmap') 22 | self.imgs_dir = os.path.join(self.sequence_dir, 'selected_plus_guided_rgb') 23 | self.output_to_show = os.path.join(self.sequence_dir, 'output_to_show') 24 | if not os.path.exists(self.output_to_show): 25 | os.makedirs(self.output_to_show) 26 | self.output_labels_2d = os.path.join(self.sequence_dir, '2d_output_labels') 27 | if not os.path.exists(self.output_labels_2d): 28 | os.makedirs(self.output_labels_2d) 29 | self.output_clusters = os.path.join(self.sequence_dir, 'output_clusters_v2') 30 | if not os.path.exists(self.output_clusters): 31 | os.makedirs(self.output_clusters) 32 | 33 | #35 valid verbs 34 | self.valid_interactions = ['take', 'remove', 'put', 'insert', 'throw', 'wash', 'dry', 'open', 'turn-on', 35 | 'close', 'turn-off', 'mix', 'fill', 'add', 'cut', 'peel', 'empty', 36 | 'shake', 'squeeze', 'press', 'cook', 'move', 'adjust', 'eat', 37 | 'drink', 'apply', 'sprinkle', 'fold', 'sort', 'clean', 'slice', 'pick'] 38 | self.valid_interactions_2 = valid_interactions 39 | 40 | self.colormap_interactions = colormap_interactions 41 | 42 | 43 | #Read the intrinsic parameters of the sequence 44 | self.cameras_Colmap, self.imgs_Colmap, self.pts_Colmap = read_model(self.colmap_poses, ext=".txt") 45 | self.fx = self.cameras_Colmap[1].params[0] 46 | self.fy = self.cameras_Colmap[1].params[1] 47 | self.cx = self.cameras_Colmap[1].params[2] 48 | self.cy = self.cameras_Colmap[1].params[3] 49 | self.projection_matrix = self.get_projection_matrix() 50 | self.height = 480 51 | self.width = 854 52 | self.size = 500 53 | self.gaussian = self.get_gaussian() 54 | self.read_3D_points() 55 | print('the length of the 3D points is: ', self.points_coord.shape) 56 | 57 | 58 | def get_projection_matrix(self): 59 | # Get the projection matrix 60 | projection_matrix = np.zeros((3, 4)) 61 | projection_matrix[0, 0] = self.fx 62 | projection_matrix[1, 1] = self.fy 63 | projection_matrix[0, 2] = self.cx 64 | projection_matrix[1, 2] = self.cy 65 | projection_matrix[2, 2] = 1 66 | return projection_matrix 67 | 68 | def get_camera_pose(self, data): 69 | #Get the camera translation matrix 70 | t = data['colmap']['t_pos'] 71 | R = data['colmap']['R_pos'] 72 | t_c = (-R.T @ t).reshape(3,1) 73 | R_c = R.T 74 | return t_c, R_c 75 | 76 | def remap_verb_EP100(self, data): 77 | ep_verb_class = data['aff_verb_id'] 78 | remapped = self.verbs_EP100_csv[self.verbs_EP100_csv['id'] == ep_verb_class] 79 | remapped_verb_str = remapped['key'].values[0] 80 | remapped_verb_id = remapped['id'].values[0] 81 | return remapped_verb_str, remapped_verb_id 82 | 83 | def read_3D_points(self): 84 | points, rgb_points = [], [] 85 | self.verb_str, self.verb_id, self.noun_str, self.noun_id = [], [], [], [] 86 | #Iterate over all the files in the directory sequence_dir 87 | for root, dirs, files in os.walk(self.labels_dir): 88 | for file in files: 89 | if file.endswith('.pkl'): 90 | pkl = open(os.path.join(root, file), 'rb') #Open a pickle file 91 | data = pickle.load(pkl) #Load the pickle file 92 | for i in range(len(data['EGOMETRIC_label']['affordance_labels'])): 93 | points.append(data['EGOMETRIC_label']['affordance_labels'][i]['3D_aff_points']) 94 | rgb_points.append(data['EGOMETRIC_label']['affordance_labels'][i]['3D_aff_colors']) 95 | remap_verb_str, remap_verb_id = self.remap_verb_EP100(data['EGOMETRIC_label']['affordance_labels'][i]) 96 | self.verb_str.append(remap_verb_str) 97 | self.verb_id.append(remap_verb_id) 98 | self.noun_str.append(data['EGOMETRIC_label']['affordance_labels'][i]['aff_noun']) 99 | self.noun_id.append(data['EGOMETRIC_label']['affordance_labels'][i]['aff_noun_id']) 100 | pkl.close() 101 | 102 | print(len(points), len(rgb_points), len(self.verb_str), len(self.verb_id), len(self.noun_str), len(self.noun_id)) 103 | self.points_coord = np.concatenate(points, axis=0) 104 | self.points_rgb = np.concatenate(rgb_points, axis=0) 105 | 106 | def object_detector_gt(self, visor_annot): 107 | #Static objects which are always present in the scene 108 | objects_in_scene = ['drawer', 'fridge', 'microwave', 'oven', 'sink', 109 | 'hob', 'kettle', 'maker:coffee', 'dishwsher', 110 | 'machine:washing', 'floor', 'table', 'rubbish'] 111 | #We add the dynamics objects 112 | 113 | if len(visor_annot) > 0: 114 | for i in range(len(visor_annot['neutral_objects'])): 115 | objects_in_scene.append(visor_annot['neutral_objects'][i]['noun']) 116 | for i in range(len(visor_annot['interacting_objects'])): 117 | objects_in_scene.append(visor_annot['interacting_objects'][i]['noun']) 118 | return objects_in_scene 119 | 120 | def reproject_points(self, points_in_camera): 121 | points_in_camera = np.append(points_in_camera, np.ones((1, points_in_camera.shape[1])), axis=0) 122 | reprojected_points = np.dot(self.projection_matrix, points_in_camera) 123 | reprojected_points = reprojected_points / reprojected_points[2] 124 | return reprojected_points 125 | 126 | def filter_reprojected_points(self, reprojected_points, present_objects, img_name): 127 | # Filter the reprojected points by the localization and the semantic noun 128 | self.good_reprojected_points, self.good_reprojected_rgb, self.good_verbs, self.good_nouns, self.good_interactions = [], [], [], [], [] 129 | for i in range(reprojected_points.shape[1]): 130 | point = reprojected_points[:, i] 131 | if point[0] >= 0 and point[0] <= self.width and point[1] >= 0 and point[1] <= self.height: 132 | if self.noun_str[i] in present_objects and (self.verb_str[i]) in self.valid_interactions: 133 | self.good_reprojected_points.append(point) 134 | self.good_reprojected_rgb.append(self.points_rgb[i]) 135 | self.good_verbs.append(self.verb_str[i]) 136 | self.good_nouns.append(self.noun_str[i]) 137 | self.good_interactions.append(self.verb_str[i] + ' ' + self.noun_str[i]) 138 | #Save all in a json file 139 | img_name = img_name.split('.')[0] 140 | output_2d = {} 141 | output_2d['points'] = self.good_reprojected_points 142 | output_2d['rgb'] = self.good_reprojected_rgb 143 | output_2d['verbs'] = self.good_verbs 144 | output_2d['nouns'] = self.good_nouns 145 | output_2d['verb plus noun'] = self.good_interactions 146 | output_filename = os.path.join(self.output_labels_2d, img_name +'.pkl') 147 | with open(output_filename, 'wb') as f: 148 | pickle.dump(output_2d, f) 149 | print('we are saving the 2d labels in: ', output_filename, 'with a len', len(self.good_reprojected_points)) 150 | 151 | def cluster_interactions(self): 152 | #Cluster the interactions 153 | self.interaction_clusters = [] 154 | self.interaction_coordinates = {} 155 | for i in range(len(self.good_verbs)): #Before good_interactions 156 | if self.good_verbs[i] not in self.interaction_clusters: 157 | self.interaction_clusters.append(self.good_verbs[i]) 158 | for i in range(len(self.interaction_clusters)): 159 | self.interaction_coordinates[self.interaction_clusters[i]] = [] 160 | for i in range(len(self.good_verbs)): 161 | self.interaction_coordinates[self.good_verbs[i]].append(self.good_reprojected_points[i]) 162 | 163 | def paint_points(self, img, img_name): 164 | img_copy = img.copy() 165 | img_name = img_name.split('.')[0] 166 | font = cv2.FONT_HERSHEY_PLAIN 167 | for i in range(len(self.good_reprojected_points)): 168 | point = self.good_reprojected_points[i] 169 | rgb_point = self.good_reprojected_rgb[i] 170 | text = self.good_verbs[i] + ' ' + self.good_nouns[i] 171 | cv2.circle(img_copy, (int(point[0]), int(point[1])), 3, (int(rgb_point[0]), int(rgb_point[1]), int(rgb_point[2])), -1) 172 | cv2.putText(img_copy, text, (int(point[0]), int(point[1])), font, 1, (255,0,0), 1, cv2.LINE_AA) 173 | cv2.imwrite(os.path.join(self.output_to_show, img_name + '.png'), img_copy) 174 | 175 | def paint_clusters(self, img, img_name): 176 | img_copy = img.copy() 177 | img_name = img_name.split('.')[0] 178 | font = cv2.FONT_HERSHEY_PLAIN 179 | #Draw the hotspots of the clusters 180 | for i in range(len(self.interaction_clusters)): 181 | cluster = self.interaction_clusters[i] 182 | prob_sum = np.zeros((self.width, self.height)) 183 | print(prob_sum.shape, 'que mierdas es ') 184 | for j in range(len(self.interaction_coordinates[cluster])): 185 | point = self.interaction_coordinates[cluster][j][0:2].astype(int) 186 | prob = np.zeros((self.width, self.height)) 187 | 188 | if (self.width - point[0]) > self.size // 2: 189 | gauss_right = self.size 190 | prob_right = point[0] + self.size // 2 191 | else: 192 | gauss_right = self.width - point[0] + self.size // 2 193 | prob_right = self.width 194 | 195 | if point[0] > self.size // 2: 196 | gauss_left = 0 197 | prob_left = point[0] - self.size // 2 198 | else: 199 | gauss_left = self.size // 2 - point[0] 200 | prob_left = 0 201 | 202 | if (self.height - point[1]) > self.size // 2: 203 | gauss_bottom = self.size 204 | prob_bottom = point[1] + self.size // 2 205 | else: 206 | gauss_bottom = self.height - point[1] + self.size // 2 207 | prob_bottom = self.height 208 | 209 | if point[1] > self.size // 2: 210 | gauss_top = 0 211 | prob_top = point[1] - self.size // 2 212 | else: 213 | gauss_top = self.size // 2 - point[1] 214 | prob_top = 0 215 | 216 | prob[int(prob_left):int(prob_right),int(prob_top):int(prob_bottom)] = self.gaussian[int(gauss_left):int(gauss_right),int(gauss_top):int(gauss_bottom)] 217 | prob_sum += prob 218 | 219 | prob_sum = (prob_sum / np.max(prob_sum)).T 220 | #If prob_sum < 0.5, set it to 0 221 | prob_sum[prob_sum < 0.25] = 0 222 | print(prob_sum.shape, 'aqui pasa algo') 223 | #prob_sum[prob_sum > 0.25] = 1 224 | prob_paint = np.expand_dims((prob_sum), axis=2) 225 | print('uy la prob paint', prob_paint.shape) 226 | color = np.array(self.colormap_interactions[cluster]).reshape(1, 3) 227 | print(prob_paint.shape) 228 | prob_paint = (prob_paint @ color).astype(np.uint8) 229 | print(prob_paint.shape) 230 | print(img_copy.shape) 231 | print(color.shape) 232 | img_copy = cv2.addWeighted(img_copy, 0.5, prob_paint, 2.0, 0) 233 | cv2.imwrite(os.path.join(self.output_clusters, img_name + ' ' + cluster + '.png'), img_copy) 234 | print('Saved image', os.path.join(self.output_clusters, img_name + ' ' + cluster + '.png')) 235 | img_copy = cv2.imread(os.path.join(self.imgs_dir, img_name + '.jpg')) 236 | #Draw the text of the clusters for a better visualization 237 | for i in range(len(self.interaction_clusters)): 238 | cluster = self.interaction_clusters[i] 239 | for j in range(len(self.interaction_coordinates[cluster])): 240 | if j == 0: 241 | point = self.interaction_coordinates[cluster][j] 242 | cv2.putText(img_copy, cluster, (int(point[0]), int(point[1])), font, 3, (255,0,0), 3, cv2.LINE_AA) 243 | cv2.imwrite(os.path.join(self.output_clusters, img_name + '------' + '.png'), img_copy) 244 | 245 | def get_gaussian(self): 246 | x, y = np.mgrid[0:self.size:1, 0:self.size:1] 247 | pos = np.empty(x.shape + (2,)) 248 | pos[:, :, 0] = x 249 | pos[:, :, 1] = y 250 | gaussian = multivariate_normal(mean=[self.size//2, self.size//2], cov=np.eye(2)*1000) 251 | return gaussian.pdf(pos) 252 | 253 | def examine_sequence(self): 254 | c = 0 255 | for root, dirs, files in os.walk(self.labels_dir): 256 | for file in files: 257 | if file.endswith('.pkl'): 258 | pkl = open(os.path.join(root, file), 'rb') 259 | print('Processing file', os.path.join(root, file)) 260 | data = pickle.load(pkl) 261 | 262 | img = cv2.imread(os.path.join(self.imgs_dir, data['filename'])) 263 | 264 | t_c, R_c = self.get_camera_pose(data) 265 | points_in_camera = R_c @ self.points_coord.T + t_c 266 | objects_in_scene = self.object_detector_gt(data['VISOR']) 267 | 268 | reprojected_points = self.reproject_points(points_in_camera) 269 | self.filter_reprojected_points(reprojected_points, objects_in_scene, data['filename']) 270 | #self.paint_points(img, data['filename']) 271 | self.cluster_interactions() 272 | self.paint_clusters(img, data['filename']) 273 | pkl.close() 274 | break 275 | c += 1 276 | if c % 100 == 0: 277 | print(c, 'images processed') 278 | 279 | 280 | 281 | 282 | reproject = Reproject_data() 283 | the_3D_points = reproject.examine_sequence() 284 | -------------------------------------------------------------------------------- /read_write_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022, ETH Zurich and UNC Chapel Hill. 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # 10 | # * Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # 14 | # * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of 15 | # its contributors may be used to endorse or promote products derived 16 | # from this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE 22 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | # POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de) 31 | 32 | import os 33 | import collections 34 | import numpy as np 35 | import struct 36 | import argparse 37 | 38 | 39 | CameraModel = collections.namedtuple( 40 | "CameraModel", ["model_id", "model_name", "num_params"]) 41 | Camera = collections.namedtuple( 42 | "Camera", ["id", "model", "width", "height", "params"]) 43 | BaseImage = collections.namedtuple( 44 | "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]) 45 | Point3D = collections.namedtuple( 46 | "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"]) 47 | 48 | 49 | class Image(BaseImage): 50 | def qvec2rotmat(self): 51 | return qvec2rotmat(self.qvec) 52 | 53 | 54 | CAMERA_MODELS = { 55 | CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3), 56 | CameraModel(model_id=1, model_name="PINHOLE", num_params=4), 57 | CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4), 58 | CameraModel(model_id=3, model_name="RADIAL", num_params=5), 59 | CameraModel(model_id=4, model_name="OPENCV", num_params=8), 60 | CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8), 61 | CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12), 62 | CameraModel(model_id=7, model_name="FOV", num_params=5), 63 | CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4), 64 | CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5), 65 | CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12) 66 | } 67 | CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model) 68 | for camera_model in CAMERA_MODELS]) 69 | CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model) 70 | for camera_model in CAMERA_MODELS]) 71 | 72 | 73 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"): 74 | """Read and unpack the next bytes from a binary file. 75 | :param fid: 76 | :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc. 77 | :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}. 78 | :param endian_character: Any of {@, =, <, >, !} 79 | :return: Tuple of read and unpacked values. 80 | """ 81 | data = fid.read(num_bytes) 82 | return struct.unpack(endian_character + format_char_sequence, data) 83 | 84 | 85 | def write_next_bytes(fid, data, format_char_sequence, endian_character="<"): 86 | """pack and write to a binary file. 87 | :param fid: 88 | :param data: data to send, if multiple elements are sent at the same time, 89 | they should be encapsuled either in a list or a tuple 90 | :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}. 91 | should be the same length as the data list or tuple 92 | :param endian_character: Any of {@, =, <, >, !} 93 | """ 94 | if isinstance(data, (list, tuple)): 95 | bytes = struct.pack(endian_character + format_char_sequence, *data) 96 | else: 97 | bytes = struct.pack(endian_character + format_char_sequence, data) 98 | fid.write(bytes) 99 | 100 | 101 | def read_cameras_text(path): 102 | """ 103 | see: src/base/reconstruction.cc 104 | void Reconstruction::WriteCamerasText(const std::string& path) 105 | void Reconstruction::ReadCamerasText(const std::string& path) 106 | """ 107 | cameras = {} 108 | with open(path, "r") as fid: 109 | while True: 110 | line = fid.readline() 111 | if not line: 112 | break 113 | line = line.strip() 114 | if len(line) > 0 and line[0] != "#": 115 | elems = line.split() 116 | camera_id = int(elems[0]) 117 | model = elems[1] 118 | width = int(elems[2]) 119 | height = int(elems[3]) 120 | params = np.array(tuple(map(float, elems[4:]))) 121 | cameras[camera_id] = Camera(id=camera_id, model=model, 122 | width=width, height=height, 123 | params=params) 124 | return cameras 125 | 126 | 127 | def read_cameras_binary(path_to_model_file): 128 | """ 129 | see: src/base/reconstruction.cc 130 | void Reconstruction::WriteCamerasBinary(const std::string& path) 131 | void Reconstruction::ReadCamerasBinary(const std::string& path) 132 | """ 133 | cameras = {} 134 | with open(path_to_model_file, "rb") as fid: 135 | num_cameras = read_next_bytes(fid, 8, "Q")[0] 136 | for _ in range(num_cameras): 137 | camera_properties = read_next_bytes( 138 | fid, num_bytes=24, format_char_sequence="iiQQ") 139 | camera_id = camera_properties[0] 140 | model_id = camera_properties[1] 141 | model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name 142 | width = camera_properties[2] 143 | height = camera_properties[3] 144 | num_params = CAMERA_MODEL_IDS[model_id].num_params 145 | params = read_next_bytes(fid, num_bytes=8*num_params, 146 | format_char_sequence="d"*num_params) 147 | cameras[camera_id] = Camera(id=camera_id, 148 | model=model_name, 149 | width=width, 150 | height=height, 151 | params=np.array(params)) 152 | assert len(cameras) == num_cameras 153 | return cameras 154 | 155 | 156 | def write_cameras_text(cameras, path): 157 | """ 158 | see: src/base/reconstruction.cc 159 | void Reconstruction::WriteCamerasText(const std::string& path) 160 | void Reconstruction::ReadCamerasText(const std::string& path) 161 | """ 162 | HEADER = "# Camera list with one line of data per camera:\n" + \ 163 | "# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n" + \ 164 | "# Number of cameras: {}\n".format(len(cameras)) 165 | with open(path, "w") as fid: 166 | fid.write(HEADER) 167 | for _, cam in cameras.items(): 168 | to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params] 169 | line = " ".join([str(elem) for elem in to_write]) 170 | fid.write(line + "\n") 171 | 172 | 173 | def write_cameras_binary(cameras, path_to_model_file): 174 | """ 175 | see: src/base/reconstruction.cc 176 | void Reconstruction::WriteCamerasBinary(const std::string& path) 177 | void Reconstruction::ReadCamerasBinary(const std::string& path) 178 | """ 179 | with open(path_to_model_file, "wb") as fid: 180 | write_next_bytes(fid, len(cameras), "Q") 181 | for _, cam in cameras.items(): 182 | model_id = CAMERA_MODEL_NAMES[cam.model].model_id 183 | camera_properties = [cam.id, 184 | model_id, 185 | cam.width, 186 | cam.height] 187 | write_next_bytes(fid, camera_properties, "iiQQ") 188 | for p in cam.params: 189 | write_next_bytes(fid, float(p), "d") 190 | return cameras 191 | 192 | 193 | def read_images_text(path): 194 | """ 195 | see: src/base/reconstruction.cc 196 | void Reconstruction::ReadImagesText(const std::string& path) 197 | void Reconstruction::WriteImagesText(const std::string& path) 198 | """ 199 | images = {} 200 | with open(path, "r") as fid: 201 | while True: 202 | line = fid.readline() 203 | if not line: 204 | break 205 | line = line.strip() 206 | if len(line) > 0 and line[0] != "#": 207 | elems = line.split() 208 | image_id = int(elems[0]) 209 | qvec = np.array(tuple(map(float, elems[1:5]))) 210 | tvec = np.array(tuple(map(float, elems[5:8]))) 211 | camera_id = int(elems[8]) 212 | image_name = elems[9] 213 | elems = fid.readline().split() 214 | xys = np.column_stack([tuple(map(float, elems[0::3])), 215 | tuple(map(float, elems[1::3]))]) 216 | point3D_ids = np.array(tuple(map(int, elems[2::3]))) 217 | images[image_id] = Image( 218 | id=image_id, qvec=qvec, tvec=tvec, 219 | camera_id=camera_id, name=image_name, 220 | xys=xys, point3D_ids=point3D_ids) 221 | return images 222 | 223 | 224 | def read_images_binary(path_to_model_file): 225 | """ 226 | see: src/base/reconstruction.cc 227 | void Reconstruction::ReadImagesBinary(const std::string& path) 228 | void Reconstruction::WriteImagesBinary(const std::string& path) 229 | """ 230 | images = {} 231 | with open(path_to_model_file, "rb") as fid: 232 | num_reg_images = read_next_bytes(fid, 8, "Q")[0] 233 | for _ in range(num_reg_images): 234 | binary_image_properties = read_next_bytes( 235 | fid, num_bytes=64, format_char_sequence="idddddddi") 236 | image_id = binary_image_properties[0] 237 | qvec = np.array(binary_image_properties[1:5]) 238 | tvec = np.array(binary_image_properties[5:8]) 239 | camera_id = binary_image_properties[8] 240 | image_name = "" 241 | current_char = read_next_bytes(fid, 1, "c")[0] 242 | while current_char != b"\x00": # look for the ASCII 0 entry 243 | image_name += current_char.decode("utf-8") 244 | current_char = read_next_bytes(fid, 1, "c")[0] 245 | num_points2D = read_next_bytes(fid, num_bytes=8, 246 | format_char_sequence="Q")[0] 247 | x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D, 248 | format_char_sequence="ddq"*num_points2D) 249 | xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])), 250 | tuple(map(float, x_y_id_s[1::3]))]) 251 | point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3]))) 252 | images[image_id] = Image( 253 | id=image_id, qvec=qvec, tvec=tvec, 254 | camera_id=camera_id, name=image_name, 255 | xys=xys, point3D_ids=point3D_ids) 256 | return images 257 | 258 | 259 | def write_images_text(images, path): 260 | """ 261 | see: src/base/reconstruction.cc 262 | void Reconstruction::ReadImagesText(const std::string& path) 263 | void Reconstruction::WriteImagesText(const std::string& path) 264 | """ 265 | if len(images) == 0: 266 | mean_observations = 0 267 | else: 268 | mean_observations = sum((len(img.point3D_ids) for _, img in images.items()))/len(images) 269 | HEADER = "# Image list with two lines of data per image:\n" + \ 270 | "# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n" + \ 271 | "# POINTS2D[] as (X, Y, POINT3D_ID)\n" + \ 272 | "# Number of images: {}, mean observations per image: {}\n".format(len(images), mean_observations) 273 | 274 | with open(path, "w") as fid: 275 | fid.write(HEADER) 276 | for _, img in images.items(): 277 | image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name] 278 | first_line = " ".join(map(str, image_header)) 279 | fid.write(first_line + "\n") 280 | 281 | points_strings = [] 282 | for xy, point3D_id in zip(img.xys, img.point3D_ids): 283 | points_strings.append(" ".join(map(str, [*xy, point3D_id]))) 284 | fid.write(" ".join(points_strings) + "\n") 285 | 286 | 287 | def write_images_binary(images, path_to_model_file): 288 | """ 289 | see: src/base/reconstruction.cc 290 | void Reconstruction::ReadImagesBinary(const std::string& path) 291 | void Reconstruction::WriteImagesBinary(const std::string& path) 292 | """ 293 | with open(path_to_model_file, "wb") as fid: 294 | write_next_bytes(fid, len(images), "Q") 295 | for _, img in images.items(): 296 | write_next_bytes(fid, img.id, "i") 297 | write_next_bytes(fid, img.qvec.tolist(), "dddd") 298 | write_next_bytes(fid, img.tvec.tolist(), "ddd") 299 | write_next_bytes(fid, img.camera_id, "i") 300 | for char in img.name: 301 | write_next_bytes(fid, char.encode("utf-8"), "c") 302 | write_next_bytes(fid, b"\x00", "c") 303 | write_next_bytes(fid, len(img.point3D_ids), "Q") 304 | for xy, p3d_id in zip(img.xys, img.point3D_ids): 305 | write_next_bytes(fid, [*xy, p3d_id], "ddq") 306 | 307 | 308 | def read_points3D_text(path): 309 | """ 310 | see: src/base/reconstruction.cc 311 | void Reconstruction::ReadPoints3DText(const std::string& path) 312 | void Reconstruction::WritePoints3DText(const std::string& path) 313 | """ 314 | points3D = {} 315 | with open(path, "r") as fid: 316 | while True: 317 | line = fid.readline() 318 | if not line: 319 | break 320 | line = line.strip() 321 | if len(line) > 0 and line[0] != "#": 322 | elems = line.split() 323 | point3D_id = int(elems[0]) 324 | xyz = np.array(tuple(map(float, elems[1:4]))) 325 | rgb = np.array(tuple(map(int, elems[4:7]))) 326 | error = float(elems[7]) 327 | image_ids = np.array(tuple(map(int, elems[8::2]))) 328 | point2D_idxs = np.array(tuple(map(int, elems[9::2]))) 329 | points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb, 330 | error=error, image_ids=image_ids, 331 | point2D_idxs=point2D_idxs) 332 | return points3D 333 | 334 | 335 | def read_points3D_binary(path_to_model_file): 336 | """ 337 | see: src/base/reconstruction.cc 338 | void Reconstruction::ReadPoints3DBinary(const std::string& path) 339 | void Reconstruction::WritePoints3DBinary(const std::string& path) 340 | """ 341 | points3D = {} 342 | with open(path_to_model_file, "rb") as fid: 343 | num_points = read_next_bytes(fid, 8, "Q")[0] 344 | for _ in range(num_points): 345 | binary_point_line_properties = read_next_bytes( 346 | fid, num_bytes=43, format_char_sequence="QdddBBBd") 347 | point3D_id = binary_point_line_properties[0] 348 | xyz = np.array(binary_point_line_properties[1:4]) 349 | rgb = np.array(binary_point_line_properties[4:7]) 350 | error = np.array(binary_point_line_properties[7]) 351 | track_length = read_next_bytes( 352 | fid, num_bytes=8, format_char_sequence="Q")[0] 353 | track_elems = read_next_bytes( 354 | fid, num_bytes=8*track_length, 355 | format_char_sequence="ii"*track_length) 356 | image_ids = np.array(tuple(map(int, track_elems[0::2]))) 357 | point2D_idxs = np.array(tuple(map(int, track_elems[1::2]))) 358 | points3D[point3D_id] = Point3D( 359 | id=point3D_id, xyz=xyz, rgb=rgb, 360 | error=error, image_ids=image_ids, 361 | point2D_idxs=point2D_idxs) 362 | return points3D 363 | 364 | 365 | def write_points3D_text(points3D, path): 366 | """ 367 | see: src/base/reconstruction.cc 368 | void Reconstruction::ReadPoints3DText(const std::string& path) 369 | void Reconstruction::WritePoints3DText(const std::string& path) 370 | """ 371 | if len(points3D) == 0: 372 | mean_track_length = 0 373 | else: 374 | mean_track_length = sum((len(pt.image_ids) for _, pt in points3D.items()))/len(points3D) 375 | HEADER = "# 3D point list with one line of data per point:\n" + \ 376 | "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n" + \ 377 | "# Number of points: {}, mean track length: {}\n".format(len(points3D), mean_track_length) 378 | 379 | with open(path, "w") as fid: 380 | fid.write(HEADER) 381 | for _, pt in points3D.items(): 382 | point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error] 383 | fid.write(" ".join(map(str, point_header)) + " ") 384 | track_strings = [] 385 | for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs): 386 | track_strings.append(" ".join(map(str, [image_id, point2D]))) 387 | fid.write(" ".join(track_strings) + "\n") 388 | 389 | 390 | def write_points3D_binary(points3D, path_to_model_file): 391 | """ 392 | see: src/base/reconstruction.cc 393 | void Reconstruction::ReadPoints3DBinary(const std::string& path) 394 | void Reconstruction::WritePoints3DBinary(const std::string& path) 395 | """ 396 | with open(path_to_model_file, "wb") as fid: 397 | write_next_bytes(fid, len(points3D), "Q") 398 | for _, pt in points3D.items(): 399 | write_next_bytes(fid, pt.id, "Q") 400 | write_next_bytes(fid, pt.xyz.tolist(), "ddd") 401 | write_next_bytes(fid, pt.rgb.tolist(), "BBB") 402 | write_next_bytes(fid, pt.error, "d") 403 | track_length = pt.image_ids.shape[0] 404 | write_next_bytes(fid, track_length, "Q") 405 | for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs): 406 | write_next_bytes(fid, [image_id, point2D_id], "ii") 407 | 408 | 409 | def detect_model_format(path, ext): 410 | if os.path.isfile(os.path.join(path, "cameras" + ext)) and \ 411 | os.path.isfile(os.path.join(path, "images" + ext)) and \ 412 | os.path.isfile(os.path.join(path, "points3D" + ext)): 413 | print("Detected model format: '" + ext + "'") 414 | return True 415 | 416 | return False 417 | 418 | 419 | def read_model(path, ext=""): 420 | # try to detect the extension automatically 421 | if ext == "": 422 | if detect_model_format(path, ".bin"): 423 | ext = ".bin" 424 | elif detect_model_format(path, ".txt"): 425 | ext = ".txt" 426 | else: 427 | print("Provide model format: '.bin' or '.txt'") 428 | return 429 | 430 | if ext == ".txt": 431 | cameras = read_cameras_text(os.path.join(path, "cameras" + ext)) 432 | images = read_images_text(os.path.join(path, "images" + ext)) 433 | points3D = read_points3D_text(os.path.join(path, "points3D") + ext) 434 | else: 435 | cameras = read_cameras_binary(os.path.join(path, "cameras" + ext)) 436 | images = read_images_binary(os.path.join(path, "images" + ext)) 437 | points3D = read_points3D_binary(os.path.join(path, "points3D") + ext) 438 | return cameras, images, points3D 439 | 440 | 441 | def write_model(cameras, images, points3D, path, ext=".bin"): 442 | if ext == ".txt": 443 | write_cameras_text(cameras, os.path.join(path, "cameras" + ext)) 444 | write_images_text(images, os.path.join(path, "images" + ext)) 445 | write_points3D_text(points3D, os.path.join(path, "points3D") + ext) 446 | else: 447 | write_cameras_binary(cameras, os.path.join(path, "cameras" + ext)) 448 | write_images_binary(images, os.path.join(path, "images" + ext)) 449 | write_points3D_binary(points3D, os.path.join(path, "points3D") + ext) 450 | return cameras, images, points3D 451 | 452 | 453 | def qvec2rotmat(qvec): 454 | return np.array([ 455 | [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2, 456 | 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], 457 | 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]], 458 | [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], 459 | 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2, 460 | 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]], 461 | [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], 462 | 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], 463 | 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]]) 464 | 465 | 466 | def rotmat2qvec(R): 467 | Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat 468 | K = np.array([ 469 | [Rxx - Ryy - Rzz, 0, 0, 0], 470 | [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0], 471 | [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0], 472 | [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0 473 | eigvals, eigvecs = np.linalg.eigh(K) 474 | qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)] 475 | if qvec[0] < 0: 476 | qvec *= -1 477 | return qvec 478 | 479 | 480 | def main(): 481 | parser = argparse.ArgumentParser(description="Read and write COLMAP binary and text models") 482 | parser.add_argument("--input_model", help="path to input model folder") 483 | parser.add_argument("--input_format", choices=[".bin", ".txt"], 484 | help="input model format", default="") 485 | parser.add_argument("--output_model", 486 | help="path to output model folder") 487 | parser.add_argument("--output_format", choices=[".bin", ".txt"], 488 | help="outut model format", default=".txt") 489 | args = parser.parse_args() 490 | 491 | cameras, images, points3D = read_model(path=args.input_model, ext=args.input_format) 492 | 493 | print("num_cameras:", len(cameras)) 494 | print("num_images:", len(images)) 495 | print("num_points3D:", len(points3D)) 496 | 497 | if args.output_model is not None: 498 | write_model(cameras, images, points3D, path=args.output_model, ext=args.output_format) 499 | 500 | 501 | if __name__ == "__main__": 502 | main() 503 | --------------------------------------------------------------------------------