├── stereodemo ├── __main__.py ├── __init__.py ├── utils.py ├── method_dist_depth.py ├── chang_realtime_stereo_onnx.py ├── methods.py ├── method_chang_realtime_stereo.py ├── oakd_source.py ├── method_opencv_bm.py ├── method_hitnet.py ├── method_sttr.py ├── main.py ├── method_raft_stereo.py ├── method_cre_stereo.py └── visualizer.py ├── datasets ├── opencv-sample │ ├── README.md │ ├── aloe_left.jpg │ ├── aloe_right.jpg │ └── stereodemo_calibration.json ├── drivingstereo │ ├── README.md │ ├── 2018-07-11-14-48-52 │ │ ├── 2018-07-11-14-48-52_2018-07-11-14-50-10-570_left.jpg │ │ ├── 2018-07-11-14-48-52_2018-07-11-14-57-53-937_left.jpg │ │ ├── 2018-07-11-14-48-52_2018-07-11-15-02-03-700_left.jpg │ │ ├── 2018-07-11-14-48-52_2018-07-11-15-02-29-915_left.jpg │ │ ├── 2018-07-11-14-48-52_2018-07-11-15-26-56-946_left.jpg │ │ ├── 2018-07-11-14-48-52_2018-07-11-14-50-10-570_right.jpg │ │ ├── 2018-07-11-14-48-52_2018-07-11-14-57-53-937_right.jpg │ │ ├── 2018-07-11-14-48-52_2018-07-11-15-02-03-700_right.jpg │ │ ├── 2018-07-11-14-48-52_2018-07-11-15-02-29-915_right.jpg │ │ ├── 2018-07-11-14-48-52_2018-07-11-15-26-56-946_right.jpg │ │ ├── 2018-07-11-14-48-52_2018-07-11-15-34-51-679_right.jpg │ │ ├── stereodemo_calibration.json │ │ └── 2018-07-11-14-48-52.txt │ └── convert_kitti_calib.py ├── kitti2015 │ ├── README.md │ ├── kitti_000046_left.png │ ├── kitti_000046_right.png │ └── stereo_calibration.json ├── sceneflow │ ├── README.md │ ├── driving_left.png │ ├── monkaa_left.png │ ├── monkaa_right.png │ ├── driving_right.png │ ├── flyingthings_left.png │ ├── flyingthings_right.png │ └── stereodemo_calibration.json ├── oak-d │ ├── cycles_left.png │ ├── desk_left.png │ ├── desk_right.png │ ├── selfie_left.png │ ├── stairs_left.png │ ├── toy_left.png │ ├── toy_right.png │ ├── corridor_left.png │ ├── corridor_right.png │ ├── cycles_right.png │ ├── donkey_toy_left.png │ ├── donkey_toy_right.png │ ├── kid_bedroom_left.png │ ├── living_room_left.png │ ├── pov_hands_left.png │ ├── pov_hands_right.png │ ├── selfie_right.png │ ├── stairs_right.png │ ├── bedroom_chair_left.png │ ├── bedroom_chair_right.png │ ├── kid_bedroom_right.png │ ├── living_room_right.png │ ├── pov_controllers_left.png │ ├── pov_controllers_right.png │ ├── README.md │ └── stereodemo_calibration.json ├── eth3d_lowres │ ├── electro_2l │ │ ├── im0.png │ │ ├── im1.png │ │ ├── calib.txt │ │ ├── cameras.txt │ │ ├── stereodemo_calibration.json │ │ └── images.txt │ ├── forest_2s │ │ ├── im0.png │ │ ├── im1.png │ │ ├── calib.txt │ │ ├── stereodemo_calibration.json │ │ ├── cameras.txt │ │ └── images.txt │ ├── playground_1l │ │ ├── im0.png │ │ ├── im1.png │ │ ├── calib.txt │ │ ├── cameras.txt │ │ ├── stereodemo_calibration.json │ │ └── images.txt │ ├── delivery_area_1l │ │ ├── im0.png │ │ ├── im1.png │ │ ├── cameras.txt │ │ ├── stereodemo_calibration.json │ │ ├── calib.txt │ │ └── images.txt │ ├── delivery_area_2l │ │ ├── im0.png │ │ ├── im1.png │ │ ├── cameras.txt │ │ ├── stereodemo_calibration.json │ │ ├── calib.txt │ │ └── images.txt │ ├── README.md │ └── convert_calib_txt.py └── middlebury_2014 │ ├── Piano-imperfect │ ├── im0.png │ ├── im1.png │ ├── stereodemo_calibration.json │ └── calib.txt │ ├── Playtable-imperfect │ ├── im0.png │ ├── im1.png │ ├── stereodemo_calibration.json │ └── calib.txt │ ├── README.md │ └── convert_calib_txt.py ├── .gitignore ├── .gitattributes ├── pyproject.toml ├── setup.py ├── CONTRIBUTING.md ├── MANIFEST.in ├── .github └── workflows │ └── unit_tests.yml ├── LICENSE ├── .vscode └── launch.json ├── setup.cfg ├── tests └── test_methods.py ├── tools ├── capture_oakd_frames.py └── chang_realtimestereo_to_torchscript_onnx.py └── README.md /stereodemo/__main__.py: -------------------------------------------------------------------------------- 1 | from . import main 2 | main() 3 | -------------------------------------------------------------------------------- /datasets/opencv-sample/README.md: -------------------------------------------------------------------------------- 1 | From OpenCV samples/data. 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.egg-info/ 3 | imgui.ini 4 | build/ 5 | -------------------------------------------------------------------------------- /stereodemo/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.6.2" 2 | 3 | from .main import main 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.png filter=lfs diff=lfs merge=lfs -text 2 | *.jpg filter=lfs diff=lfs merge=lfs -text -------------------------------------------------------------------------------- /datasets/drivingstereo/README.md: -------------------------------------------------------------------------------- 1 | Tiny subset of the data of https://drivingstereo-dataset.github.io/ 2 | -------------------------------------------------------------------------------- /datasets/kitti2015/README.md: -------------------------------------------------------------------------------- 1 | Sample image taken from http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=stereo -------------------------------------------------------------------------------- /datasets/sceneflow/README.md: -------------------------------------------------------------------------------- 1 | Sample subset of the data of https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html . -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | -------------------------------------------------------------------------------- /datasets/oak-d/cycles_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6ad108cf43caee84384ffb3c769c0b74ad18d7d6d44d907df3847b0ed9174606 3 | size 166889 4 | -------------------------------------------------------------------------------- /datasets/oak-d/desk_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:28f75bba24ea9ea9ef8be7908506cd2725258d59c88413eff39dca1587c82e55 3 | size 133928 4 | -------------------------------------------------------------------------------- /datasets/oak-d/desk_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:99ae5151ba34e57672cde8e752954ed26129ddddbbda0128e551c60e8ba50cf3 3 | size 144049 4 | -------------------------------------------------------------------------------- /datasets/oak-d/selfie_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d8da968d0e5e7311f9f414b807d8d9109aa5adf04ae500a8f4daa540a9281709 3 | size 152113 4 | -------------------------------------------------------------------------------- /datasets/oak-d/stairs_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:3c9c4ff9b10b14ce86c6469ec3ffc2fd738b9eb93ecec6c9d53cd61c97b7d494 3 | size 160253 4 | -------------------------------------------------------------------------------- /datasets/oak-d/toy_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:eb5c44e65417565bb47f633cac4e2c0868e302fdd79de511988de04bef942a3f 3 | size 178829 4 | -------------------------------------------------------------------------------- /datasets/oak-d/toy_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:31d0ad52198e61c8724d2ebefcab10d8a0f3682ae36136655eee90ddadea7ec1 3 | size 184079 4 | -------------------------------------------------------------------------------- /datasets/oak-d/corridor_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d51b47b938221309204e7c4c06a361f9f8e28796a396d345ae289583e5d10c38 3 | size 154357 4 | -------------------------------------------------------------------------------- /datasets/oak-d/corridor_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:45627c4f244e12b4b9f184559ab8aa97ec6ec931696dd8ca8bb460c4fab1d126 3 | size 166458 4 | -------------------------------------------------------------------------------- /datasets/oak-d/cycles_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:814ae281dabc268e64455543dd0d893ab50ea9d406efbeccf199aa0c12310059 3 | size 174756 4 | -------------------------------------------------------------------------------- /datasets/oak-d/donkey_toy_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:06929b3c9d5bd2291e30d967a90604286f7eb4f34daa4348870701c9761c809a 3 | size 178114 4 | -------------------------------------------------------------------------------- /datasets/oak-d/donkey_toy_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0ab2889b84473af6154b2f21d9d65e21b396a73befff61571da91911dd5e8982 3 | size 189676 4 | -------------------------------------------------------------------------------- /datasets/oak-d/kid_bedroom_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:fed5cf79e1ee101e6e023b69645b067561f58d61b95f2deeebe735fa28d6bdf9 3 | size 140337 4 | -------------------------------------------------------------------------------- /datasets/oak-d/living_room_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1ced83ec3c4d871237ff56eed9633dbcb080f42b0e934a8a2eea1caafac7efc4 3 | size 168843 4 | -------------------------------------------------------------------------------- /datasets/oak-d/pov_hands_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:4a3de50ed7931b55f6fa753af4c100d8bd2ee4d2a09ff10069c8d0ad042d0c3f 3 | size 152547 4 | -------------------------------------------------------------------------------- /datasets/oak-d/pov_hands_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:65cf300ba84b9e9b55120070527f4371a468bed98cf2af59222eef49d90efd2d 3 | size 160353 4 | -------------------------------------------------------------------------------- /datasets/oak-d/selfie_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f04614b90fa69c677d92ede5ebcae27c74971e25b5c3070972466421ce3dc0e3 3 | size 157385 4 | -------------------------------------------------------------------------------- /datasets/oak-d/stairs_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:57ac719b1263b545822f153e27a986f5acdd59cdf321006dfe14defe6a1c03c6 3 | size 169966 4 | -------------------------------------------------------------------------------- /datasets/sceneflow/driving_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2ee3cce23b9653d1462bbc9a36830b9260c53657b95260c87a107a4b6b5937d1 3 | size 828498 4 | -------------------------------------------------------------------------------- /datasets/sceneflow/monkaa_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1c82639c8f625ccfa7dc828d711251172d10c4f095311ad3881b7d2136b730a1 3 | size 640939 4 | -------------------------------------------------------------------------------- /datasets/sceneflow/monkaa_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:78fec711927efede0fd7600e9c3db9a7433bcb270767f3d4336c6243ed89471d 3 | size 640633 4 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/electro_2l/im0.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1638d1f8480896f565ee16e241155a4bb59ca7ca4ddbf82be4bb25040a9eb0e8 3 | size 279050 4 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/electro_2l/im1.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8173d90519f89e0c781a2d0e5fcce010477b52497931fdf28a4d1d1f484a2c33 3 | size 274822 4 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/forest_2s/im0.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0413f4ae94162fec588a60579c9169aa12a679cb378861ca74b47de60ffa9e1d 3 | size 309666 4 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/forest_2s/im1.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9e28a2ee1f6b98967fdb78f5cf528ee7bc1860e4068ef2283070b4b8b26fac47 3 | size 298187 4 | -------------------------------------------------------------------------------- /datasets/kitti2015/kitti_000046_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1bd9d1630c7e4960f573abc3657dd9431e1f29b5cd971041ee154cbdf2bad639 3 | size 849417 4 | -------------------------------------------------------------------------------- /datasets/oak-d/bedroom_chair_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bbec94c081186e5c0ed3f90c05e3e69cbece694f6679dd4aa453751d47fad8f7 3 | size 146806 4 | -------------------------------------------------------------------------------- /datasets/oak-d/bedroom_chair_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6d6d34efdc1decbb45aa3e4adfe9a15a7aba976370f8b1de040c9e256c9765c9 3 | size 157006 4 | -------------------------------------------------------------------------------- /datasets/oak-d/kid_bedroom_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e95e3d9db6466d5ff9d30d702c879b52d8ecdfcc7cddcaab6053cf30015772ee 3 | size 150044 4 | -------------------------------------------------------------------------------- /datasets/oak-d/living_room_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a6d6523bb2bb604c2c7e6964a38bf776d98948582ecca8ad893c0dea0071781d 3 | size 176791 4 | -------------------------------------------------------------------------------- /datasets/oak-d/pov_controllers_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:27a038def97f08b1aee0b4ac7521cc2c1e4c1962760dd6ad4ae3ba7c72e8337e 3 | size 151662 4 | -------------------------------------------------------------------------------- /datasets/oak-d/pov_controllers_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1cd1fb2ff3c1596b702b14c190e03d5c6260aee7bc6dde80f3847dda3e04cb4c 3 | size 162293 4 | -------------------------------------------------------------------------------- /datasets/opencv-sample/aloe_left.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:cce5736808efe80d9f04b118dbb978c344d4345672b332718c3e039a3eeb8eee 3 | size 315069 4 | -------------------------------------------------------------------------------- /datasets/opencv-sample/aloe_right.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9b23100df31a846bc6e6a6545563b2b4120b948c9835c7d36cde00af77f4503e 3 | size 315113 4 | -------------------------------------------------------------------------------- /datasets/sceneflow/driving_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1dbb9097ae55fc27c4075b5cd5c4218b9537bb2546e06699275257fd259c8bb1 3 | size 841296 4 | -------------------------------------------------------------------------------- /datasets/sceneflow/flyingthings_left.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:fd14fa89b443fffb712db3b47ab7e3196ae930b41a81bcd2f0daa99d615599de 3 | size 708478 4 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/playground_1l/im0.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c958f6978ce162c28fbfe1ff49b0af07f9e89ec45149d73ef315894f9f9dd685 3 | size 333941 4 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/playground_1l/im1.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b62431c3cfed78b3289f35c7e1dd715bc8f6f6118c94966a244c6b6c5ceb32bd 3 | size 308788 4 | -------------------------------------------------------------------------------- /datasets/kitti2015/kitti_000046_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:165a81149d82f5ec22b95262f05d7242a083fb581ec8ec56f78b7f89e2c40af5 3 | size 808120 4 | -------------------------------------------------------------------------------- /datasets/sceneflow/flyingthings_right.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:952b0752dc649da9733d24f8c82dc6467616f84d91a44fdd781f06610b737649 3 | size 707543 4 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/delivery_area_1l/im0.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:634ff100be6d81ff4b08d7d67949069572ad9f2da51b7bdf651ecb44a41c7879 3 | size 299256 4 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/delivery_area_1l/im1.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f543cd3612fd6cd327fc47d77502964660b7234af6330e77d726745e7632c92e 3 | size 296079 4 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/delivery_area_2l/im0.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f9c9785b75f968d3fd158814d06754e2efe717df2dc390f4b81993885a793107 3 | size 307909 4 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/delivery_area_2l/im1.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:898b4c703593b7bf567786267401abdec5dc44b600302bfb83db3bebebcad0e6 3 | size 304937 4 | -------------------------------------------------------------------------------- /datasets/middlebury_2014/Piano-imperfect/im0.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:53eff04d91f3beeb959aac4c3967f041dcdf8ab1c519767bec81036e1c7f4514 3 | size 1557561 4 | -------------------------------------------------------------------------------- /datasets/middlebury_2014/Piano-imperfect/im1.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f8e65d06b9fac56aa2568576f66640c95a94b27fadf02dbf60f5903ad5188002 3 | size 1568960 4 | -------------------------------------------------------------------------------- /datasets/middlebury_2014/Playtable-imperfect/im0.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e4bc32a5a23ed1683a8c9adccdd8c17b9031c56927fe5d2e3d6a301047651e02 3 | size 1772235 4 | -------------------------------------------------------------------------------- /datasets/middlebury_2014/Playtable-imperfect/im1.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:31eb7ac8ccf6b90ca4b2f497083b78abcc1bc32103f63b2a18f152259ba613a4 3 | size 1764462 4 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/forest_2s/calib.txt: -------------------------------------------------------------------------------- 1 | cam0=[712.53 0 369.856; 0 712.53 239.634; 0 0 1] 2 | cam1=[712.53 0 369.856; 0 712.53 239.634; 0 0 1] 3 | doffs=0 4 | baseline=59.61 5 | width=715 6 | height=440 7 | ndisp=715 8 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/forest_2s/stereodemo_calibration.json: -------------------------------------------------------------------------------- 1 | {"width": 715, "height": 440, "fx": 712.53, "fy": 712.53, "cx0": 369.856, "cx1": 369.856, "cy": 239.634, "baseline_meters": 0.05961, "depth_range": [1.0, 20.0]} -------------------------------------------------------------------------------- /datasets/eth3d_lowres/README.md: -------------------------------------------------------------------------------- 1 | Subset of the low-res dataset from ETH3d. 2 | 3 | https://www.eth3d.net/datasets#low-res-two-view 4 | 5 | The calibration files were converted to json with the `convert_calib_txt.py` script. 6 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/electro_2l/calib.txt: -------------------------------------------------------------------------------- 1 | cam0=[543.757 0 520.961; 0 543.757 293.208; 0 0 1] 2 | cam1=[543.757 0 520.961; 0 543.757 293.208; 0 0 1] 3 | doffs=0 4 | baseline=59.8202 5 | width=927 6 | height=489 7 | ndisp=927 8 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/electro_2l/cameras.txt: -------------------------------------------------------------------------------- 1 | # Camera list with one line of data per camera: 2 | # CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[] 3 | # Number of cameras: 1 4 | 0 PINHOLE 927 489 543.757 543.757 520.961 293.208 5 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/forest_2s/cameras.txt: -------------------------------------------------------------------------------- 1 | # Camera list with one line of data per camera: 2 | # CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[] 3 | # Number of cameras: 1 4 | 0 PINHOLE 715 440 712.53 712.53 369.856 239.634 5 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/delivery_area_1l/cameras.txt: -------------------------------------------------------------------------------- 1 | # Camera list with one line of data per camera: 2 | # CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[] 3 | # Number of cameras: 1 4 | 0 PINHOLE 942 489 541.764 541.764 553.869 232.396 5 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/delivery_area_1l/stereodemo_calibration.json: -------------------------------------------------------------------------------- 1 | {"width": 942, "height": 489, "fx": 541.764, "fy": 541.764, "cx0": 553.869, "cx1": 553.869, "cy": 232.396, "baseline_meters": 0.0599101, "depth_range": [1.0, 20.0]} -------------------------------------------------------------------------------- /datasets/eth3d_lowres/delivery_area_2l/cameras.txt: -------------------------------------------------------------------------------- 1 | # Camera list with one line of data per camera: 2 | # CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[] 3 | # Number of cameras: 1 4 | 0 PINHOLE 942 489 541.764 541.764 553.682 232.397 5 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/delivery_area_2l/stereodemo_calibration.json: -------------------------------------------------------------------------------- 1 | {"width": 942, "height": 489, "fx": 541.764, "fy": 541.764, "cx0": 553.682, "cx1": 553.682, "cy": 232.397, "baseline_meters": 0.0598896, "depth_range": [1.0, 20.0]} -------------------------------------------------------------------------------- /datasets/eth3d_lowres/playground_1l/calib.txt: -------------------------------------------------------------------------------- 1 | cam0=[542.019 0 541.836; 0 542.019 255.198; 0 0 1] 2 | cam1=[542.019 0 541.836; 0 542.019 255.198; 0 0 1] 3 | doffs=0 4 | baseline=59.5549 5 | width=941 6 | height=490 7 | ndisp=941 8 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/playground_1l/cameras.txt: -------------------------------------------------------------------------------- 1 | # Camera list with one line of data per camera: 2 | # CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[] 3 | # Number of cameras: 1 4 | 0 PINHOLE 941 490 542.019 542.019 541.836 255.198 5 | -------------------------------------------------------------------------------- /datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-14-50-10-570_left.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:27e327ae2f5994558c7e704ecb40ce9ea4b8035a01a85122d120e2bcb3299886 3 | size 610121 4 | -------------------------------------------------------------------------------- /datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-14-57-53-937_left.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1c0951ff514b2e2c906c32c4a84633883e28a56856a506f5af82d26948c14605 3 | size 627374 4 | -------------------------------------------------------------------------------- /datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-02-03-700_left.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:df68948ba90d0ab36dc18c89bc575d02c5f8ff26007cc47400486bbf8620ba62 3 | size 700217 4 | -------------------------------------------------------------------------------- /datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-02-29-915_left.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8f76491357534b8ae434088103520ed6559ae0e9cafa30118e58054ce1db777a 3 | size 741667 4 | -------------------------------------------------------------------------------- /datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-26-56-946_left.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ed3ea0fbdedd93f04f8d3eca4e9407f80970612bedb1b43170f397f33e15f3fa 3 | size 465940 4 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/delivery_area_1l/calib.txt: -------------------------------------------------------------------------------- 1 | cam0=[541.764 0 553.869; 0 541.764 232.396; 0 0 1] 2 | cam1=[541.764 0 553.869; 0 541.764 232.396; 0 0 1] 3 | doffs=0 4 | baseline=59.9101 5 | width=942 6 | height=489 7 | ndisp=942 8 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/delivery_area_2l/calib.txt: -------------------------------------------------------------------------------- 1 | cam0=[541.764 0 553.682; 0 541.764 232.397; 0 0 1] 2 | cam1=[541.764 0 553.682; 0 541.764 232.397; 0 0 1] 3 | doffs=0 4 | baseline=59.8896 5 | width=942 6 | height=489 7 | ndisp=942 8 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/electro_2l/stereodemo_calibration.json: -------------------------------------------------------------------------------- 1 | {"width": 927, "height": 489, "fx": 543.757, "fy": 543.757, "cx0": 520.961, "cx1": 520.961, "cy": 293.208, "baseline_meters": 0.059820200000000004, "depth_range": [1.0, 20.0]} -------------------------------------------------------------------------------- /datasets/eth3d_lowres/playground_1l/stereodemo_calibration.json: -------------------------------------------------------------------------------- 1 | {"width": 941, "height": 490, "fx": 542.019, "fy": 542.019, "cx0": 541.836, "cx1": 541.836, "cy": 255.198, "baseline_meters": 0.05955490000000001, "depth_range": [1.0, 20.0]} -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import site 2 | import sys 3 | site.ENABLE_USER_SITE = "--user" in sys.argv[1:] 4 | 5 | # Everything is defined in setup.cfg, added this file only 6 | # to support editable mode. 7 | import setuptools 8 | setuptools.setup() 9 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Making a new release 2 | 3 | - Bump the version numbers in `setup.cfg` and `stereodemo/__init__.py` 4 | 5 | ``` 6 | ./build_release.sh 7 | twine upload dist/* 8 | ``` 9 | 10 | Username is always `__token__` 11 | -------------------------------------------------------------------------------- /datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-14-50-10-570_right.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:96cbbc87e2607a771195010629cd1c8812d721d6291cda1e64c4f8e8143c4b75 3 | size 608992 4 | -------------------------------------------------------------------------------- /datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-14-57-53-937_right.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:07848e6e6019dcf3c7a5eae2b519045f731147c5ea65441433a37bd4989f3d48 3 | size 629669 4 | -------------------------------------------------------------------------------- /datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-02-03-700_right.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d4dca91f9a5fcbc758552baf58e6619701c7baa9ad9d5d56d1c9525711b082f8 3 | size 702221 4 | -------------------------------------------------------------------------------- /datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-02-29-915_right.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:fe508c208583fd1fb9696763a3ed401f48cfbc54a63af774cf9b2ad67dda5979 3 | size 724551 4 | -------------------------------------------------------------------------------- /datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-26-56-946_right.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:70805b98791a546a9881de2571cd647766b5f84bee4fd918ecba1957fb92cd78 3 | size 456132 4 | -------------------------------------------------------------------------------- /datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-34-51-679_right.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8f9eb47de20fa4d383ffd693781483ac53cb30c4fad1890b8abeba32c4bdae44 3 | size 666677 4 | -------------------------------------------------------------------------------- /datasets/middlebury_2014/README.md: -------------------------------------------------------------------------------- 1 | Small subset of the data of https://vision.middlebury.edu/stereo/data/scenes2014/ 2 | 3 | The images were converted to jpg since it's only meant to be used for visual evaluation, not accurate metrics. 4 | -------------------------------------------------------------------------------- /datasets/sceneflow/stereodemo_calibration.json: -------------------------------------------------------------------------------- 1 | { 2 | "width": 960, 3 | "height": 540, 4 | "fx": 1050.0, 5 | "fy": 1050.0, 6 | "cx0": 479.5, 7 | "cx1": 479.5, 8 | "cy": 269.5, 9 | "baseline_meters": 0.065, 10 | "depth_range": [0.1, 5.0] 11 | } 12 | -------------------------------------------------------------------------------- /datasets/middlebury_2014/Piano-imperfect/stereodemo_calibration.json: -------------------------------------------------------------------------------- 1 | { 2 | "width": 1280, 3 | "height": 871, 4 | "fx": 1291.206, 5 | "fy": 1291.206, 6 | "cx0": 644.564, 7 | "cx1": 701.304, 8 | "cy": 431.367, 9 | "baseline_meters": 0.178089, 10 | "depth_range": [1,3] 11 | } 12 | -------------------------------------------------------------------------------- /datasets/middlebury_2014/Playtable-imperfect/stereodemo_calibration.json: -------------------------------------------------------------------------------- 1 | { 2 | "width": 1280, 3 | "height": 872, 4 | "fx": 1102.096, 5 | "fy": 1102.096, 6 | "cx0": 537.248, 7 | "cx1": 587.958, 8 | "cy": 440.703, 9 | "baseline_meters": 0.193006, 10 | "depth_range": [1,3] 11 | } 12 | -------------------------------------------------------------------------------- /datasets/middlebury_2014/Piano-imperfect/calib.txt: -------------------------------------------------------------------------------- 1 | cam0=[2852.758 0 1424.085; 0 2852.758 953.053; 0 0 1] 2 | cam1=[2852.758 0 1549.445; 0 2852.758 953.053; 0 0 1] 3 | doffs=125.36 4 | baseline=178.089 5 | width=2828 6 | height=1924 7 | ndisp=260 8 | isint=0 9 | vmin=36 10 | vmax=218 11 | dyavg=0.408 12 | dymax=1.923 13 | -------------------------------------------------------------------------------- /datasets/middlebury_2014/Playtable-imperfect/calib.txt: -------------------------------------------------------------------------------- 1 | cam0=[2341.955 0 1141.652; 0 2341.955 936.494; 0 0 1] 2 | cam1=[2341.955 0 1249.412; 0 2341.955 936.494; 0 0 1] 3 | doffs=107.76 4 | baseline=193.006 5 | width=2720 6 | height=1852 7 | ndisp=290 8 | isint=0 9 | vmin=27 10 | vmax=271 11 | dyavg=0.962 12 | dymax=2.665 13 | -------------------------------------------------------------------------------- /datasets/drivingstereo/2018-07-11-14-48-52/stereodemo_calibration.json: -------------------------------------------------------------------------------- 1 | { 2 | "width": 881, 3 | "height": 400, 4 | "fx": 1003.556, 5 | "fy": 1003.556, 6 | "cx0": 455.689, 7 | "cx1": 455.689, 8 | "cy": 197.6634, 9 | "baseline_meters": 0.5446133834145297, 10 | "depth_range": [10.0, 100.0] 11 | } 12 | -------------------------------------------------------------------------------- /datasets/oak-d/README.md: -------------------------------------------------------------------------------- 1 | These images were captured with my OAK-D Lite camera with the medium resolution setting (640x480). 2 | 3 | Rectification was done on device with the factory values. 4 | 5 | Note that there is a slightly annoying border in the left images that comes from the rectification. The disparity in that area should be ignored. 6 | -------------------------------------------------------------------------------- /datasets/kitti2015/stereo_calibration.json: -------------------------------------------------------------------------------- 1 | { 2 | "comment": "Not the actual calibration, just made it up for a nice display.", 3 | "width": 1242, 4 | "height": 375, 5 | "fx": 994, 6 | "fy": 994, 7 | "cx0": 621, 8 | "cx1": 621, 9 | "cy": 187.5, 10 | "baseline_meters": 0.54, 11 | "depth_range": [1.0, 100.0] 12 | } 13 | -------------------------------------------------------------------------------- /datasets/opencv-sample/stereodemo_calibration.json: -------------------------------------------------------------------------------- 1 | { 2 | "comment": "Calibration made up manually for the results to look good.", 3 | "width": 1282, 4 | "height": 1110, 5 | "fx": 1025.6, 6 | "fy": 1025.6, 7 | "cx0": 641, 8 | "cx1": 641, 9 | "cy": 555, 10 | "baseline_meters": 0.0599101, 11 | "depth_range": [0.3, 2.0] 12 | } 13 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/electro_2l/images.txt: -------------------------------------------------------------------------------- 1 | # Image list with two lines of data per image: 2 | # IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME 3 | # POINTS2D[] as (X, Y, POINT3D_ID) 4 | # Number of images: 2 5 | 0 0.310023 0.347029 0.635813 -0.615791 1.20075 -3.54645 0.615411 0 im0.png 6 | 7 | 1 0.310023 0.347029 0.635813 -0.615791 1.14093 -3.54645 0.615411 0 im1.png 8 | 9 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/playground_1l/images.txt: -------------------------------------------------------------------------------- 1 | # Image list with two lines of data per image: 2 | # IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME 3 | # POINTS2D[] as (X, Y, POINT3D_ID) 4 | # Number of images: 2 5 | 0 0.549136 0.52558 0.458054 -0.460872 0.191419 1.06806 1.32414 0 im0.png 6 | 7 | 1 0.549136 0.52558 0.458054 -0.460872 0.131864 1.06806 1.32414 0 im1.png 8 | 9 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/delivery_area_1l/images.txt: -------------------------------------------------------------------------------- 1 | # Image list with two lines of data per image: 2 | # IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME 3 | # POINTS2D[] as (X, Y, POINT3D_ID) 4 | # Number of images: 2 5 | 0 0.64809 0.711282 -0.193094 0.191761 2.74638 -2.59869 -8.02903 0 im0.png 6 | 7 | 1 0.64809 0.711282 -0.193094 0.191761 2.68647 -2.59869 -8.02903 0 im1.png 8 | 9 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/delivery_area_2l/images.txt: -------------------------------------------------------------------------------- 1 | # Image list with two lines of data per image: 2 | # IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME 3 | # POINTS2D[] as (X, Y, POINT3D_ID) 4 | # Number of images: 2 5 | 0 0.658038 0.694861 0.20901 -0.20117 -3.19666 -2.90788 -7.30784 0 im0.png 6 | 7 | 1 0.658038 0.694861 0.20901 -0.20117 -3.25655 -2.90788 -7.30784 0 im1.png 8 | 9 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/forest_2s/images.txt: -------------------------------------------------------------------------------- 1 | # Image list with two lines of data per image: 2 | # IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME 3 | # POINTS2D[] as (X, Y, POINT3D_ID) 4 | # Number of images: 2 5 | 0 -0.21362 -0.394959 0.711046 -0.541098 -0.874382 1.30323 -2.45671 0 im0.png 6 | 7 | 1 -0.21362 -0.394959 0.711046 -0.541098 -0.933992 1.30323 -2.45671 0 im1.png 8 | 9 | -------------------------------------------------------------------------------- /datasets/oak-d/stereodemo_calibration.json: -------------------------------------------------------------------------------- 1 | { 2 | "width": 640, 3 | "height": 480, 4 | "baseline_meters": 0.075, 5 | "fx": 451.0344543457031, 6 | "fy": 451.0344543457031, 7 | "cx0": 299.03839111328125, 8 | "cx1": 299.03839111328125, 9 | "cy": 255.16502380371094, 10 | "depth_range": [1.0, 5.0], 11 | "left_image_rect_normalized": [0, 0.0417, 0.9547, 1] 12 | } 13 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | recursive-exclude * .vscode 4 | recursive-exclude * .github 5 | recursive-exclude * __pycache__ 6 | recursive-exclude datasets/drivingstereo * 7 | recursive-exclude datasets/eth3d_lowres * 8 | recursive-exclude datasets/kitti2015 * 9 | recursive-exclude datasets/middlebury_2014 * 10 | recursive-exclude datasets/opencv-sample * 11 | recursive-exclude datasets/sceneflow * 12 | -------------------------------------------------------------------------------- /.github/workflows/unit_tests.yml: -------------------------------------------------------------------------------- 1 | name: Unit Tests 2 | on: [push, pull_request, workflow_dispatch] 3 | jobs: 4 | build: 5 | runs-on: ${{ matrix.os }} 6 | strategy: 7 | matrix: 8 | os: ['ubuntu-latest', 'windows-latest', 'macos-latest'] 9 | steps: 10 | - uses: actions/checkout@v4 11 | with: 12 | lfs: true 13 | - uses: actions/setup-python@v5 14 | with: 15 | python-version: '3.11' 16 | - name: Install the library 17 | run: | 18 | pip install . 19 | - name: Run the unit tests 20 | run: | 21 | python3 tests/test_methods.py 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Nicolas Burrus 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | 8 | // { 9 | // "name": "Python: chang_realtimestereo_to_torchscript_onnx.py", 10 | // "type": "python", 11 | // "request": "launch", 12 | // "program": "${workspaceFolder}/tools/chang_realtimestereo_to_torchscript_onnx.py", 13 | // "cwd": "${workspaceFolder}", 14 | // "console": "integratedTerminal", 15 | // "justMyCode": false, 16 | // "args": [ 17 | // "../RealtimeStereo/", 18 | // "models/pretrained_Kitti2015_realtime.tar" 19 | // ], 20 | // }, 21 | 22 | { 23 | "name": "Python: Module", 24 | "type": "python", 25 | "request": "launch", 26 | "module": "stereodemo", 27 | "justMyCode": false, 28 | "args": [ 29 | "datasets", 30 | "--models-path", "models" 31 | ] 32 | }, 33 | ] 34 | } -------------------------------------------------------------------------------- /stereodemo/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | 5 | import tempfile 6 | import urllib.request 7 | import shutil 8 | import sys 9 | 10 | def download_model (url: str, model_path: Path): 11 | filename = model_path.name 12 | with tempfile.TemporaryDirectory() as d: 13 | tmp_file_path = Path(d) / filename 14 | print (f"Downloading {filename} from {url} to {model_path}...") 15 | urllib.request.urlretrieve(url, tmp_file_path) 16 | shutil.move (tmp_file_path, model_path) 17 | 18 | def pad_width (size: int, multiple: int): 19 | return 0 if size % multiple == 0 else multiple - (size%multiple) 20 | 21 | class ImagePadder: 22 | def __init__(self, multiple, mode): 23 | self.multiple = multiple 24 | self.mode = mode 25 | 26 | def pad (self, im: np.ndarray): 27 | # H,W,C 28 | rows = im.shape[0] 29 | cols = im.shape[1] 30 | self.rows_to_pad = pad_width(rows, self.multiple) 31 | self.cols_to_pad = pad_width(cols, self.multiple) 32 | if self.rows_to_pad == 0 and self.cols_to_pad == 0: 33 | return im 34 | return np.pad (im, ((0, self.rows_to_pad), (0, self.cols_to_pad), (0, 0)), mode=self.mode) 35 | 36 | def unpad (self, im: np.ndarray): 37 | w = im.shape[1] - self.cols_to_pad 38 | h = im.shape[0] - self.rows_to_pad 39 | return im[:h, :w, :] 40 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = stereodemo 3 | version = 0.6.2 4 | author = Nicolas Burrus 5 | author_email = nicolas@burrus.name 6 | description = Compare various stereo depth estimation algorithms on image files or with an OAK-D camera. 7 | long_description = file: README.md 8 | long_description_content_type = text/markdown 9 | url = https://github.com/nburrus/stereodemo 10 | project_urls = 11 | Bug Tracker = https://github.com/nburrus/stereodemo/issues 12 | classifiers = 13 | Programming Language :: Python :: 3 14 | License :: OSI Approved :: MIT License 15 | Operating System :: OS Independent 16 | 17 | [options] 18 | packages = find: 19 | python_requires = >=3.8 20 | # This might be conflicting with options.package_data and the MANIFEST.in 21 | # https://stackoverflow.com/questions/7522250/how-to-include-package-data-with-setuptools-distutils 22 | include_package_data = False 23 | setup_requires = setuptools_git 24 | install_requires = 25 | numpy 26 | onnxruntime >= 1.10.0; sys_platform == "darwin" 27 | onnxruntime-gpu >= 1.10.0; sys_platform != "darwin" 28 | opencv-python 29 | open3d >= 0.15.1 30 | torch >= 1.11.0 # previous version untested, might work? 31 | torchvision 32 | 33 | [options.entry_points] 34 | console_scripts = 35 | stereodemo = stereodemo:main 36 | 37 | [options.package_data] 38 | # This relies on a symlink to datasets existing under stereodemo/ 39 | # This is done by build_release.sh 40 | stereodemo = datasets/oak-d/*.png, datasets/oak-d/*.json 41 | -------------------------------------------------------------------------------- /datasets/middlebury_2014/convert_calib_txt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from dataclasses import dataclass 4 | from pathlib import Path 5 | import sys 6 | import json 7 | import re 8 | 9 | import numpy as np 10 | 11 | @dataclass 12 | class Calibration: 13 | width: int 14 | height: int 15 | fx: float 16 | fy: float 17 | cx0: float 18 | cx1: float 19 | cy: float 20 | baseline_meters: float 21 | 22 | def to_json(self): 23 | return json.dumps(self.__dict__) 24 | 25 | def from_json(json_str): 26 | d = json.loads(json_str) 27 | return Calibration(**d) 28 | 29 | # parse numpy array from a string [a b c ; d e f ; g h i] 30 | def parse_numpy_array(s): 31 | s = s.replace("[", "").replace("]", "").replace(";", " ") 32 | return np.fromstring(s, sep=" ").reshape(3,3) 33 | 34 | fields = {} 35 | 36 | input_path = Path(sys.argv[1]) 37 | 38 | with open(input_path) as f: 39 | for l in f: 40 | kv = l.split('=') 41 | k, v = kv 42 | fields[k] = v.strip() 43 | 44 | print (fields) 45 | 46 | K0 = parse_numpy_array(fields['cam0']) 47 | print (K0) 48 | 49 | K1 = parse_numpy_array(fields['cam1']) 50 | print (K1) 51 | 52 | assert np.count_nonzero(K0 != K1) <= 1 # only cx can differ 53 | 54 | calib = Calibration(int(fields['width']), int(fields['height']), K0[0,0], K0[1,1], K0[0,2], K1[0,2], K0[1,2], float(fields['baseline'])*1e-3) 55 | print (calib) 56 | 57 | output_json = input_path.parent / 'stereodemo_calibration.json' 58 | with open(output_json, 'w') as f: 59 | f.write (calib.to_json()) 60 | -------------------------------------------------------------------------------- /datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52.txt: -------------------------------------------------------------------------------- 1 | calib_time: 20180703 2 | corner_dist: 1.000000e-01 3 | S_101: 9.600000e+2 6.000000e+2 4 | K_101: 2.063200e+03 -5.000000e-01 9.783000e+02 0.000000e+00 2.062400e+03 5.847000e+02 0.000000e+00 0.000000e+00 1.000000e+00 5 | D_101: -8.770000e-02 1.257000e-01 6.159000e-04 6.038000e-04 0.000000e+00 6 | R_101: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 7 | T_101: 0.000000e+00 0.000000e+00 0.000000e+00 8 | S_rect_101: 8.810000e+2 4.000000e+2 9 | P_rect_101: 1.003556e+3 0.000000e+0 4.556890e+2 0.000000e+0 0.000000e+0 1.003556e+3 1.976634e+2 0.000000e+0 0.000000e+0 0.000000e+0 1.000000e+0 0.000000e+0 10 | R_rect_101: 9.995925e-01 2.195515e-02 -1.824139e-02 -2.205491e-02 9.997428e-01 -5.286002e-03 1.812064e-02 5.686160e-03 9.998196e-01 11 | S_103: 9.600000e+2 6.000000e+2 12 | K_103: 2.063400e+03 -1.000000e-01 9.734000e+02 0.000000e+00 2.062600e+03 5.999000e+02 0.000000e+00 0.000000e+00 1.000000e+00 13 | D_103: -8.930000e-02 1.270000e-01 1.600000e-03 1.000000e-04 0.000000e+00 14 | R_103: 9.995332e-01 2.052896e-02 -2.262329e-02 -2.077736e-02 9.997258e-01 -1.079973e-02 2.239539e-02 1.126474e-02 9.996857e-01 15 | T_103: -5.446076e-01 -7.500610e-04 -2.395167e-03 16 | S_rect_103: 8.810000e+2 4.000000e+2 17 | P_rect_103: 1.003556e+3 0.000000e+0 4.556890e+2 -1.093101e+3 0.000000e+0 1.003556e+3 1.976634e+2 0.000000e+0 0.000000e+0 0.000000e+0 1.000000e+0 0.000000e+0 18 | R_rect_103: 9.999894e-01 1.377236e-03 4.397923e-03 -1.401345e-03 9.999840e-01 5.483573e-03 -4.390300e-03 -5.489678e-03 9.999753e-01 19 | -------------------------------------------------------------------------------- /datasets/eth3d_lowres/convert_calib_txt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from dataclasses import dataclass 4 | from pathlib import Path 5 | import sys 6 | import json 7 | import re 8 | from typing import Tuple 9 | 10 | import numpy as np 11 | 12 | @dataclass 13 | class Calibration: 14 | width: int 15 | height: int 16 | fx: float 17 | fy: float 18 | cx0: float 19 | cx1: float 20 | cy: float 21 | baseline_meters: float 22 | depth_range: Tuple[float] = (1.0, 20.0) 23 | 24 | def to_json(self): 25 | return json.dumps(self.__dict__) 26 | 27 | def from_json(json_str): 28 | d = json.loads(json_str) 29 | return Calibration(**d) 30 | 31 | # parse numpy array from a string [a b c ; d e f ; g h i] 32 | def parse_numpy_array(s): 33 | s = s.replace("[", "").replace("]", "").replace(";", " ") 34 | return np.fromstring(s, sep=" ").reshape(3,3) 35 | 36 | fields = {} 37 | 38 | input_path = Path(sys.argv[1]) 39 | 40 | with open(input_path) as f: 41 | for l in f: 42 | kv = l.split('=') 43 | k, v = kv 44 | fields[k] = v.strip() 45 | 46 | print (fields) 47 | 48 | K0 = parse_numpy_array(fields['cam0']) 49 | print (K0) 50 | 51 | K1 = parse_numpy_array(fields['cam1']) 52 | print (K1) 53 | 54 | assert np.all(K0 == K1) 55 | 56 | calib = Calibration(int(fields['width']), int(fields['height']), K0[0,0], K0[1,1], K0[0,2], K1[0,2], K0[1,2], float(fields['baseline'])*1e-3) 57 | print (calib) 58 | 59 | output_json = input_path.parent / 'stereodemo_calibration.json' 60 | with open(output_json, 'w') as f: 61 | f.write (calib.to_json()) 62 | -------------------------------------------------------------------------------- /datasets/drivingstereo/convert_kitti_calib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from dataclasses import dataclass 4 | from pathlib import Path 5 | import sys 6 | import json 7 | import re 8 | 9 | import numpy as np 10 | 11 | @dataclass 12 | class Calibration: 13 | width: int 14 | height: int 15 | fx: float 16 | fy: float 17 | cx0: float 18 | cx1: float 19 | cy: float 20 | baseline_meters: float 21 | 22 | def to_json(self): 23 | return json.dumps(self.__dict__) 24 | 25 | def from_json(json_str): 26 | d = json.loads(json_str) 27 | return Calibration(**d) 28 | 29 | # parse numpy array from a string a b c d e f g h i 30 | def parse_numpy_array(s): 31 | return np.fromstring(s, sep=" ") 32 | 33 | fields = {} 34 | 35 | input_path = Path(sys.argv[1]) 36 | 37 | with open(input_path) as f: 38 | for l in f: 39 | kv = l.split(':') 40 | k, v = kv 41 | fields[k] = v.strip() 42 | 43 | print (fields) 44 | 45 | w0, h0 = parse_numpy_array(fields['S_rect_101']).reshape(2) 46 | w1, h1 = parse_numpy_array(fields['S_rect_103']).reshape(2) 47 | assert w0 == w1 and h0 == h1 48 | 49 | K0 = parse_numpy_array(fields['P_rect_101']).reshape(3, 4) 50 | print (K0) 51 | 52 | K1 = parse_numpy_array(fields['P_rect_103']).reshape(3, 4) 53 | print (K1) 54 | 55 | T = parse_numpy_array(fields['T_103']).reshape(3) 56 | 57 | assert (K0[0,0] == K1[0,0]) 58 | 59 | # https://stackoverflow.com/a/61684187/1737680 60 | # P(i)rect = [[fu 0 cx -fu*bx], 61 | # [0 fv cy -fv*by], 62 | # [0 0 1 0]] 63 | # baseline = -K1[0,3]/K1[0,0] # does not work, ~2x too large 64 | baseline = np.linalg.norm(T) 65 | calib = Calibration(int(w0), int(h0), K0[0,0], K0[1,1], K0[0,2], K1[0,2], K0[1,2], baseline_meters=baseline) 66 | print (calib) 67 | 68 | output_json = input_path.parent / 'stereodemo_calibration.json' 69 | with open(output_json, 'w') as f: 70 | f.write (calib.to_json()) 71 | -------------------------------------------------------------------------------- /tests/test_methods.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import tempfile 4 | import unittest 5 | from pathlib import Path 6 | 7 | import cv2 8 | import numpy as np 9 | import math 10 | 11 | from stereodemo import method_opencv_bm 12 | from stereodemo import method_chang_realtime_stereo 13 | from stereodemo import method_hitnet 14 | from stereodemo import method_cre_stereo 15 | from stereodemo import method_raft_stereo 16 | from stereodemo import method_sttr 17 | from stereodemo.methods import Config, InputPair, Calibration, StereoOutput, StereoMethod 18 | 19 | data_folder = Path(__file__).parent.parent / 'datasets' / 'eth3d_lowres' / 'delivery_area_1l' 20 | left_image = cv2.imread (str(data_folder / 'im0.png'), cv2.IMREAD_COLOR) 21 | right_image = cv2.imread (str(data_folder / 'im1.png'), cv2.IMREAD_COLOR) 22 | calib_file = data_folder / 'stereodemo_calibration.json' 23 | calibration = Calibration.from_json (open(calib_file, 'r').read()) 24 | input = InputPair (left_image, right_image, calibration, "Test images loaded.", None) 25 | models_path = Path(tempfile.gettempdir()) / 'models' 26 | models_path.mkdir(parents=True, exist_ok=True) 27 | config = Config(models_path) 28 | 29 | class TestStereoInference(unittest.TestCase): 30 | 31 | def check_method(self, method: StereoMethod, expected_median: float, expected_coverage: float): 32 | output = method.compute_disparity (input) 33 | valid_pixels = output.disparity_pixels[output.disparity_pixels > 0.] 34 | coverage = valid_pixels.size / output.disparity_pixels.size 35 | median_value = np.median (valid_pixels) 36 | self.assertAlmostEqual (median_value, expected_median, delta=0.01) 37 | self.assertAlmostEqual (coverage, expected_coverage, delta=0.01) 38 | 39 | def test_bm(self): 40 | self.check_method (method_opencv_bm.StereoBM(config), 4.8125, 0.4403) 41 | 42 | def test_sgbm(self): 43 | self.check_method (method_opencv_bm.StereoSGBM(config), 5.1875, 0.8515) 44 | 45 | def test_chang_realtime(self): 46 | m = method_chang_realtime_stereo.ChangRealtimeStereo(config) 47 | m.parameters["Shape"].set_value ("320x240") 48 | self.check_method (m, 12.7776, 1.0) 49 | 50 | def test_hitnet(self): 51 | m = method_hitnet.HitnetStereo(config) 52 | m.parameters["Shape"].set_value ("320x240") 53 | self.check_method (m, 4.9103, 1.0) 54 | 55 | def test_crestereo(self): 56 | m = method_cre_stereo.CREStereo(config) 57 | m.parameters["Shape"].set_value ("320x240") 58 | self.check_method (m, 4.6287, 1.0) 59 | 60 | def test_raft_stereo(self): 61 | m = method_raft_stereo.RaftStereo(config) 62 | m.parameters["Shape"].set_value ("320x256") 63 | self.check_method (m, 4.6408, 1.0) 64 | 65 | def test_sttr(self): 66 | m = method_sttr.StereoTransformers(config) 67 | m.parameters["Shape"].set_value ("640x480 (ds3)") 68 | self.check_method (m, 7.4636, 0.9869) 69 | 70 | if __name__ == '__main__': 71 | unittest.main() 72 | -------------------------------------------------------------------------------- /stereodemo/method_dist_depth.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import shutil 3 | import time 4 | from dataclasses import dataclass 5 | import urllib.request 6 | import tempfile 7 | import sys 8 | 9 | import torch 10 | from torchvision import transforms 11 | 12 | import cv2 13 | import numpy as np 14 | 15 | from .methods import Calibration, Config, EnumParameter, StereoMethod, InputPair, StereoOutput 16 | from . import utils 17 | 18 | urls = { 19 | "dist-depth-256x256.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-distdepth/dist-depth-256x256.scripted.pt", 20 | } 21 | 22 | # https://github.com/facebookresearch/DistDepth 23 | # Exported via torch tracing by tweaking the original demo.py. 24 | # Changes here: https://github.com/nburrus/DistDepth/commit/fde3b427ef2ff31c34f08e99c51c8e6a2427b720 25 | class DistDepth(StereoMethod): 26 | def __init__(self, config: Config): 27 | super().__init__("[Monocular] DistDepth (CVPR 2022)", 28 | "Toward Practical Monocular Indoor Depth Estimation.", 29 | {}, 30 | config) 31 | self.reset_defaults() 32 | 33 | self.net = None 34 | self._loaded_model_path = None 35 | 36 | def reset_defaults(self): 37 | self.parameters.update ({ 38 | # "Device": EnumParameter("Device", 0, ["CPU", "CUDA"]), 39 | # For some reason it crashes with CUDA on my machine, disabling for now. 40 | "Device": EnumParameter("Device", 0, ["CPU"]), 41 | }) 42 | 43 | def compute_disparity(self, input: InputPair) -> StereoOutput: 44 | # The pre-trained model is for 256x256. Their demo script resizes 45 | # all input images to that. 46 | self.target_size = (256, 256) 47 | device = torch.device('cuda') if self.parameters["Device"].value == 'CUDA' else 'cpu' 48 | 49 | model_path = self.config.models_path / f'dist-depth-256x256.scripted.pt' 50 | self._load_model (model_path) 51 | 52 | # raw_img can stay in BGR 53 | raw_img = np.transpose(input.left_image, (2, 0, 1)) 54 | input_image = torch.from_numpy(raw_img).float().to(device) 55 | input_image = (input_image / 255.0).unsqueeze(0) 56 | input_image = torch.nn.functional.interpolate( 57 | input_image, (256, 256), mode="bilinear", align_corners=False 58 | ) 59 | 60 | net = self.net.to(device) 61 | 62 | start = time.time() 63 | with torch.no_grad(): 64 | outputs = net(input_image.to(device)) 65 | elapsed_time = time.time() - start 66 | 67 | disparity_map = self._process_output(outputs, input.calibration) 68 | if disparity_map.shape[:2] != input.left_image.shape[:2]: 69 | disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST) 70 | # not need to scale, the disparity values were already for the input full resolution calibration. 71 | 72 | return StereoOutput(disparity_map, input.left_image, elapsed_time) 73 | 74 | def _process_output(self, outputs, calib: Calibration): 75 | depth_meters = outputs[0].detach().squeeze(0).cpu().numpy() 76 | # The model directly gives a depth map in meters. Let's convert it 77 | # to disparity to fit in the stereo display. 78 | disparity_map = StereoMethod.disparity_from_depth_meters(depth_meters, calib) 79 | return disparity_map 80 | 81 | def _load_model(self, model_path: Path): 82 | if (self._loaded_model_path == model_path): 83 | return 84 | 85 | if not model_path.exists(): 86 | utils.download_model (urls[model_path.name], model_path) 87 | 88 | assert Path(model_path).exists() 89 | self._loaded_model_path = model_path 90 | self.net = torch.jit.load(model_path) 91 | self.net.cpu () 92 | self.net.eval () 93 | -------------------------------------------------------------------------------- /stereodemo/chang_realtime_stereo_onnx.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import shutil 3 | import time 4 | from dataclasses import dataclass 5 | import urllib.request 6 | import tempfile 7 | import sys 8 | 9 | import onnxruntime 10 | 11 | import cv2 12 | import numpy as np 13 | 14 | from .methods import EnumParameter, StereoMethod, InputPair, StereoOutput 15 | from . import utils 16 | 17 | urls = { 18 | "chang-realtime-stereo-cpu-1280x720.onnx": "", 19 | "chang-realtime-stereo-cpu-160x128.onnx": "", 20 | "chang-realtime-stereo-cpu-320x240.onnx": "", 21 | "chang-realtime-stereo-cpu-640x480.onnx": "", 22 | } 23 | 24 | # Adapted from https://github.com/ibaiGorordo/ONNX-CREStereo-Depth-Estimation 25 | # https://github.com/PINTO0309/PINTO_model_zoo/tree/main/284_CREStereo 26 | # IMPORTANT: these ONNX are not working, keeping in case things improve later on. 27 | class ChangRealtimeStereoOnnx(StereoMethod): 28 | def __init__(self): 29 | super().__init__("Chang Real-time Onnx", "Attention-Aware Feature Aggregation for Real-time Stereo Matching on Edge Devices (ACCV 2020).", {}) 30 | self.reset_defaults() 31 | 32 | self._loaded_session = None 33 | self._loaded_model_path = None 34 | 35 | def reset_defaults(self): 36 | self.parameters.update ({ 37 | "Shape": EnumParameter("Processed image size", 1, ["160x128", "320x240", "640x480", "1280x720"]) 38 | }) 39 | 40 | def compute_disparity(self, input: InputPair) -> StereoOutput: 41 | if not models_path.exists(): 42 | models_path.mkdir(parents=True, exist_ok=True) 43 | 44 | cols, rows = self.parameters["Shape"].value.split('x') 45 | cols, rows = int(cols), int(rows) 46 | self.target_size = (cols, rows) 47 | 48 | model_path = models_path / f'chang-realtime-stereo-cpu-{cols}x{rows}.onnx' 49 | self._load_model (model_path) 50 | 51 | left_tensor = self._preprocess_input(input.left_image) 52 | right_tensor = self._preprocess_input(input.right_image) 53 | 54 | start = time.time() 55 | model_inputs = self._loaded_session.get_inputs() 56 | model_outputs = self._loaded_session.get_outputs() 57 | input_names = [model_inputs[i].name for i in range(len(model_inputs))] 58 | output_names = [model_outputs[i].name for i in range(len(model_outputs))] 59 | outputs = self._loaded_session.run(['disparity'], {'left': left_tensor, 60 | 'right': right_tensor}) 61 | elapsed_time = time.time() - start 62 | 63 | disparity_map = self._process_output(outputs) 64 | if disparity_map.shape[:2] != input.left_image.shape[:2]: 65 | disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST) 66 | x_scale = input.left_image.shape[1] / float(cols) 67 | disparity_map *= np.float32(x_scale) 68 | 69 | return StereoOutput(disparity_map, input.left_image, elapsed_time) 70 | 71 | def _preprocess_input (self, img: np.ndarray): 72 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 73 | img = cv2.resize(img, self.target_size, cv2.INTER_AREA) 74 | imagenet_stats = {'mean': np.array([0.485, 0.456, 0.406]), 'std': np.array([0.229, 0.224, 0.225])} 75 | img = (img.astype(np.float32) / 255.0) - imagenet_stats['mean'] / imagenet_stats['std'] 76 | img = img.transpose(2, 0, 1) # C,H,W instead of H,W,C 77 | img = img[np.newaxis, :, :, :] # add batch dimension 78 | return img 79 | 80 | def _process_output(self, outputs): 81 | disparity_map = outputs[0].permute(1,2,0) 82 | return disparity_map 83 | 84 | def _load_model(self, model_path: Path): 85 | if (self._loaded_model_path == model_path): 86 | return 87 | 88 | if not model_path.exists(): 89 | utils.download_model (urls[model_path.name], model_path) 90 | 91 | assert Path(model_path).exists() 92 | self._loaded_model_path = model_path 93 | self._loaded_session = onnxruntime.InferenceSession(str(model_path), providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) 94 | -------------------------------------------------------------------------------- /stereodemo/methods.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from dataclasses import dataclass, field 3 | from pathlib import Path 4 | from typing import Any, Dict, List, Tuple 5 | import time 6 | import json 7 | import numpy as np 8 | 9 | 10 | @dataclass 11 | class Calibration: 12 | width: int 13 | height: int 14 | fx: float 15 | fy: float 16 | cx0: float # cx is the only one that can differ between both cameras. 17 | cx1: float 18 | cy: float 19 | baseline_meters: float 20 | depth_range: Tuple[float] = (0.3, 20.0) 21 | left_image_rect_normalized: np.ndarray = field( 22 | default_factory=lambda: np.array([0., 0., 1., 1.])) # origin, size in percent of image size 23 | comment: str = "" 24 | 25 | def to_json(self): 26 | return json.dumps(self.__dict__) 27 | 28 | @staticmethod 29 | def from_json(json_str): 30 | d = json.loads(json_str) 31 | return Calibration(**d) 32 | 33 | def downsample(self, new_width: int, new_height: int): 34 | sx = new_width / self.width 35 | sy = new_height / self.height 36 | self.width = new_width 37 | self.height = new_height 38 | self.fx *= sx 39 | self.fy *= sy 40 | self.cx0 *= sx 41 | self.cx1 *= sx 42 | self.cy *= sy 43 | 44 | 45 | @dataclass 46 | class InputPair: 47 | left_image: np.ndarray 48 | right_image: np.ndarray 49 | calibration: Calibration 50 | status: str 51 | input_disparity: np.ndarray = None 52 | 53 | def has_data(self): 54 | return self.left_image is not None 55 | 56 | 57 | @dataclass 58 | class StereoOutput: 59 | disparity_pixels: np.ndarray 60 | color_image_bgr: np.ndarray 61 | computation_time: float 62 | point_cloud: Any = None 63 | disparity_color: np.ndarray = None 64 | 65 | 66 | @dataclass 67 | class IntParameter: 68 | description: str 69 | value: int 70 | min: int 71 | max: int 72 | to_valid: Any = lambda x: x # default is to just accept anything 73 | 74 | def set_value(self, x: int): 75 | self.value = self.to_valid(x) 76 | 77 | 78 | @dataclass 79 | class EnumParameter: 80 | description: str 81 | index: int # index in the list 82 | values: List[str] 83 | 84 | def set_index(self, idx: int): 85 | self.index = idx 86 | 87 | def set_value(self, value): 88 | self.index = self.values.index(value) 89 | 90 | @property 91 | def value(self) -> str: 92 | return self.values[self.index] 93 | 94 | 95 | @dataclass 96 | class Config: 97 | models_path: Path 98 | 99 | 100 | class StereoMethod: 101 | def __init__(self, name: str, description: str, parameters: Dict, config: Config): 102 | self.name = name 103 | self.parameters = parameters 104 | self.description = description 105 | self.config = config 106 | 107 | def reset_defaults(self): 108 | pass 109 | 110 | @abstractmethod 111 | def compute_disparity(self, input: InputPair) -> StereoOutput: 112 | """Return the disparity map in pixels and the actual computation time. 113 | 114 | Both input images are assumed to be rectified. 115 | """ 116 | return StereoOutput(None, None, None, None) 117 | 118 | @staticmethod 119 | def depth_meters_from_disparity(disparity_pixels: np.ndarray, calibration: Calibration): 120 | old_seterr = np.seterr(divide='ignore') 121 | dcx = np.float32(calibration.cx0 - calibration.cx1) 122 | depth_meters = np.float32(calibration.baseline_meters * calibration.fx) / (disparity_pixels - dcx) 123 | depth_meters = np.nan_to_num(depth_meters) 124 | depth_meters[disparity_pixels < 0.] = -1.0 125 | np.seterr(**old_seterr) 126 | return depth_meters 127 | 128 | @staticmethod 129 | def disparity_from_depth_meters(depth_meters: np.ndarray, calibration: Calibration): 130 | old_seterr = np.seterr(divide='ignore') 131 | dcx = np.float32(calibration.cx0 - calibration.cx1) 132 | disparity_pixels = (np.float32(calibration.baseline_meters * calibration.fx) / depth_meters) + dcx 133 | disparity_pixels = np.nan_to_num(disparity_pixels) 134 | np.seterr(**old_seterr) 135 | return disparity_pixels -------------------------------------------------------------------------------- /stereodemo/method_chang_realtime_stereo.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import shutil 3 | import time 4 | from dataclasses import dataclass 5 | import urllib.request 6 | import tempfile 7 | import sys 8 | 9 | import torch 10 | from torchvision import transforms 11 | 12 | import cv2 13 | import numpy as np 14 | 15 | from .methods import Config, EnumParameter, StereoMethod, InputPair, StereoOutput 16 | from . import utils 17 | 18 | urls = { 19 | "chang-realtime-stereo-cpu-1280x720.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-chang-realtimestereo/chang-realtime-stereo-cpu-1280x720.scripted.pt", 20 | "chang-realtime-stereo-cpu-160x128.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-chang-realtimestereo/chang-realtime-stereo-cpu-160x128.scripted.pt", 21 | "chang-realtime-stereo-cpu-320x240.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-chang-realtimestereo/chang-realtime-stereo-cpu-320x240.scripted.pt", 22 | "chang-realtime-stereo-cpu-640x480.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-chang-realtimestereo/chang-realtime-stereo-cpu-640x480.scripted.pt", 23 | } 24 | 25 | # https://github.com/JiaRenChang/RealtimeStereo 26 | # I exported the pytorch implementation to torch script via tracing with 27 | # some minor changes to the code https://github.com/JiaRenChang/RealtimeStereo/pull/15 28 | # See chang_realtimestereo_to_torchscript_onnx.py 29 | class ChangRealtimeStereo(StereoMethod): 30 | def __init__(self, config: Config): 31 | super().__init__("Chang Real-time (ACCV 2020)", 32 | "Attention-Aware Feature Aggregation for Real-time Stereo Matching on Edge Devices. Pre-trained on SceneFlow + Kitti 2015.", 33 | {}, 34 | config) 35 | self.reset_defaults() 36 | 37 | self.net = None 38 | self._loaded_model_path = None 39 | 40 | imagenet_stats = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]} 41 | self.img_to_tensor_transforms = transforms.Compose([ 42 | transforms.ToTensor(), 43 | transforms.Normalize(**imagenet_stats), 44 | ]) 45 | 46 | def reset_defaults(self): 47 | self.parameters.update ({ 48 | "Shape": EnumParameter("Processed image size", 2, ["160x128", "320x240", "640x480", "1280x720"]) 49 | }) 50 | 51 | def compute_disparity(self, input: InputPair) -> StereoOutput: 52 | cols, rows = self.parameters["Shape"].value.split('x') 53 | cols, rows = int(cols), int(rows) 54 | self.target_size = (cols, rows) 55 | 56 | model_path = self.config.models_path / f'chang-realtime-stereo-cpu-{cols}x{rows}.scripted.pt' 57 | self._load_model (model_path) 58 | 59 | left_tensor = self._preprocess_input(input.left_image) 60 | right_tensor = self._preprocess_input(input.right_image) 61 | 62 | start = time.time() 63 | with torch.no_grad(): 64 | outputs = self.net(left_tensor, right_tensor) 65 | elapsed_time = time.time() - start 66 | 67 | disparity_map = self._process_output(outputs) 68 | if disparity_map.shape[:2] != input.left_image.shape[:2]: 69 | disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST) 70 | x_scale = input.left_image.shape[1] / float(cols) 71 | disparity_map *= np.float32(x_scale) 72 | 73 | return StereoOutput(disparity_map, input.left_image, elapsed_time) 74 | 75 | def _preprocess_input (self, img: np.ndarray): 76 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 77 | img = cv2.resize(img, self.target_size, cv2.INTER_AREA) 78 | return self.img_to_tensor_transforms (img).unsqueeze(0) 79 | 80 | def _process_output(self, outputs): 81 | disparity_map = outputs[0].detach().cpu().permute(1,2,0).numpy() 82 | return disparity_map 83 | 84 | def _load_model(self, model_path: Path): 85 | if (self._loaded_model_path == model_path): 86 | return 87 | 88 | if not model_path.exists(): 89 | utils.download_model (urls[model_path.name], model_path) 90 | 91 | assert Path(model_path).exists() 92 | self._loaded_model_path = model_path 93 | self.net = torch.jit.load(model_path) 94 | self.net.cpu () 95 | self.net.eval () 96 | -------------------------------------------------------------------------------- /stereodemo/oakd_source.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | import time 4 | 5 | from . import visualizer 6 | from .methods import Config, InputPair, StereoMethod, StereoOutput 7 | 8 | import cv2 9 | 10 | try: 11 | import depthai as dai 12 | except ImportError: 13 | print ("You need to 'pip install depthai' to use the OAK camera.") 14 | sys.exit (1) 15 | 16 | def getFrame(queue): 17 | # Get frame from queue 18 | frame = queue.get() 19 | # Convert frame to OpenCV format and return 20 | return frame.getCvFrame() 21 | 22 | def getMonoCamera(pipeline, isLeft): 23 | # Configure mono camera 24 | mono = pipeline.createMonoCamera() 25 | 26 | # Set Camera Resolution 27 | mono.setResolution(dai.MonoCameraProperties.SensorResolution.THE_480_P) 28 | 29 | if isLeft: 30 | # Get left camera 31 | mono.setBoardSocket(dai.CameraBoardSocket.LEFT) 32 | else : 33 | # Get right camera 34 | mono.setBoardSocket(dai.CameraBoardSocket.RIGHT) 35 | return mono 36 | 37 | def getStereoPair(pipeline, monoLeft, monoRight): 38 | # Configure stereo pair for depth estimation 39 | stereo = pipeline.createStereoDepth() 40 | # Checks occluded pixels and marks them as invalid 41 | stereo.setLeftRightCheck(True) 42 | 43 | # Configure left and right cameras to work as a stereo pair 44 | monoLeft.out.link(stereo.left) 45 | monoRight.out.link(stereo.right) 46 | 47 | return stereo 48 | 49 | class OakdSource (visualizer.Source): 50 | def __init__(self, output_folder: Path = None): 51 | self.connect () 52 | self.output_folder = output_folder 53 | self.frameIndex = 0 54 | 55 | def connect (self): 56 | print ("Trying to connect to an OAK camera...") 57 | pipeline = dai.Pipeline() 58 | 59 | # Set up left and right cameras 60 | monoLeft = getMonoCamera(pipeline, isLeft = True) 61 | monoRight = getMonoCamera(pipeline, isLeft = False) 62 | 63 | # Combine left and right cameras to form a stereo pair 64 | stereo = getStereoPair(pipeline, monoLeft, monoRight) 65 | 66 | 67 | # Set XlinkOut for disparity, rectifiedLeft, and rectifiedRight 68 | xoutDisp = pipeline.createXLinkOut() 69 | xoutDisp.setStreamName("disparity") 70 | 71 | # xoutDepth = pipeline.create(dai.node.XLinkOut) 72 | # xoutDepth.setStreamName("depth") 73 | 74 | xoutRectifiedLeft = pipeline.createXLinkOut() 75 | xoutRectifiedLeft.setStreamName("rectifiedLeft") 76 | 77 | xoutRectifiedRight = pipeline.createXLinkOut() 78 | xoutRectifiedRight.setStreamName("rectifiedRight") 79 | 80 | stereo.disparity.link(xoutDisp.input) 81 | 82 | stereo.rectifiedLeft.link(xoutRectifiedLeft.input) 83 | stereo.rectifiedRight.link(xoutRectifiedRight.input) 84 | # stereo.depth.link(xoutDepth.input) 85 | 86 | self.device = dai.Device(pipeline).__enter__() 87 | 88 | oak_calib = self.device.readCalibration() 89 | w, h = monoLeft.getResolutionSize() 90 | 91 | # Intrinsics of disparity are taken from the right image. 92 | disparityIntrinsics = oak_calib.getCameraIntrinsics(dai.CameraBoardSocket.RIGHT, dai.Size2f(w, h)) 93 | baselineMeters = 1e-2 * oak_calib.getBaselineDistance(dai.CameraBoardSocket.LEFT, dai.CameraBoardSocket.RIGHT) 94 | 95 | self.calibration = visualizer.Calibration(w, h, 96 | fx=disparityIntrinsics[0][0], 97 | fy=disparityIntrinsics[1][1], 98 | cx0=disparityIntrinsics[0][2], 99 | cx1=disparityIntrinsics[0][2], 100 | cy=disparityIntrinsics[1][2], 101 | baseline_meters=baselineMeters) 102 | 103 | # Output queues will be used to get the rgb frames and nn data from the outputs defined above 104 | self.disparityQueue = self.device.getOutputQueue(name="disparity", maxSize=1, blocking=False) 105 | self.rectifiedLeftQueue = self.device.getOutputQueue(name="rectifiedLeft", maxSize=1, blocking=False) 106 | self.rectifiedRightQueue = self.device.getOutputQueue(name="rectifiedRight", maxSize=1, blocking=False) 107 | # depthQueue = self.device.getOutputQueue(name="depth", maxSize=1, blocking=False) 108 | 109 | def is_live(self): 110 | return True 111 | 112 | def get_next_pair(self): 113 | leftFrame = getFrame(self.rectifiedLeftQueue) 114 | rightFrame = getFrame(self.rectifiedRightQueue) 115 | if self.output_folder is not None: 116 | self.output_folder.mkdir(parents=True, exist_ok=True) 117 | cv2.imwrite(str(self.output_folder / f"img_{self.frameIndex:03d}_left.png"), leftFrame) 118 | cv2.imwrite(str(self.output_folder / f"img_{self.frameIndex:03d}_right.png"), rightFrame) 119 | disparityPixels = getFrame(self.disparityQueue) 120 | leftFrame = cv2.cvtColor (leftFrame, cv2.COLOR_GRAY2RGB) 121 | rightFrame = cv2.cvtColor (rightFrame, cv2.COLOR_GRAY2RGB) 122 | self.frameIndex += 1 123 | return visualizer.InputPair(leftFrame, rightFrame, self.calibration, "OAK-D Camera", disparityPixels) 124 | 125 | class StereoFromOakInputSource(StereoMethod): 126 | def __init__(self, config: Config): 127 | super().__init__("Input Source", "Stereo computed by the input source", {}, config) 128 | 129 | def compute_disparity(self, input: InputPair) -> StereoOutput: 130 | # The disparity is aligned to the right image with OAK-D 131 | return StereoOutput(input.input_disparity, input.right_image, 0.0) 132 | -------------------------------------------------------------------------------- /stereodemo/method_opencv_bm.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | import cv2 5 | 6 | from .methods import Config, StereoMethod, IntParameter, EnumParameter, InputPair, StereoOutput 7 | 8 | def odd_only(x): 9 | return x if x % 2 == 1 else x+1 10 | 11 | def multiple_of_16(x): 12 | return max(16, x//16 * 16) 13 | 14 | # Default parameters taken from 15 | # https://github.com/opencv/opencv/blob/4.x/samples/cpp/stereo_match.cpp 16 | class StereoBM(StereoMethod): 17 | def __init__(self, config: Config): 18 | super().__init__("OpenCV BM", "OpenCV Simple Block Matching", {}, config) 19 | self.reset_defaults() 20 | 21 | def reset_defaults(self): 22 | # For more details: 23 | # https://learnopencv.com/depth-perception-using-stereo-camera-python-c/ 24 | self.parameters.update ({ 25 | "Num Disparities": IntParameter("Number of disparities (pixels)", 128, 16, 640, to_valid=multiple_of_16), 26 | "Block Size": IntParameter("Kernel size for block matching (odd)", 9, 5, 63, to_valid=odd_only), 27 | "TextureThreshold": IntParameter("Minimum SAD to consider the texture sufficient", 10, 0, 100), 28 | "Uniqueness Ratio": IntParameter("How unique the match each for each pixel", 15, 0, 100), 29 | "SpeckleWindowSize": IntParameter("Speckle window size in pixels (filter small objects). 0 to disable.", 100, 0, 1000), 30 | "SpeckleRange": IntParameter("Speckle range (max diff within a window)", 32, 0, 64), 31 | "Disp12MaxDiff": IntParameter("Maximum allowable difference in the right-left check", 1, 0, 64), 32 | "PreFilterCap": IntParameter("Max pre-filter output", 31, 1, 63), 33 | "PreFilterSize": IntParameter("Pre-filter size (odd)", 9, 5, 255, to_valid=odd_only), 34 | }) 35 | 36 | def compute_disparity(self, input: InputPair) -> StereoOutput: 37 | left_image, right_image = input.left_image, input.right_image 38 | block_size = self.parameters['Block Size'].value 39 | if block_size % 2 == 0: 40 | block_size += 1 41 | stereoBM = cv2.StereoBM_create(numDisparities=self.parameters['Num Disparities'].value, 42 | blockSize=block_size) 43 | stereoBM.setTextureThreshold(self.parameters['TextureThreshold'].value) 44 | stereoBM.setUniquenessRatio(self.parameters['Uniqueness Ratio'].value) 45 | stereoBM.setSpeckleWindowSize(self.parameters['SpeckleWindowSize'].value) 46 | stereoBM.setSpeckleRange(self.parameters['SpeckleRange'].value) 47 | stereoBM.setDisp12MaxDiff(self.parameters['Disp12MaxDiff'].value) 48 | stereoBM.setPreFilterCap(self.parameters['PreFilterCap'].value) 49 | stereoBM.setPreFilterSize(self.parameters['PreFilterSize'].value) 50 | 51 | gray_left = cv2.cvtColor(left_image, cv2.COLOR_BGR2GRAY) 52 | gray_right = cv2.cvtColor(right_image, cv2.COLOR_BGR2GRAY) 53 | # OpenCV returns 16x the disparity in pixels 54 | start = time.time() 55 | disparity = stereoBM.compute(gray_left, gray_right) / np.float32(16.0) 56 | return StereoOutput(disparity, input.left_image, time.time()-start) 57 | 58 | class StereoSGBM(StereoMethod): 59 | def __init__(self, config: Config): 60 | super().__init__("OpenCV SGBM", "OpenCV Semi-Global Block Matching", {}, config) 61 | self.reset_defaults () 62 | 63 | def reset_defaults(self): 64 | nchannels = 1 65 | # For more details: 66 | # https://learnopencv.com/depth-perception-using-stereo-camera-python-c/ 67 | self.parameters.update ({ 68 | "Num Disparities": IntParameter("Number of disparities (pixels)", 128, 2, 640), 69 | "Block Size": IntParameter("Kernel size for block matching (odd)", 3, 3, 63, to_valid=odd_only), 70 | 71 | "Mode": EnumParameter("Set it to StereoSGBM::MODE_HH to run the full-scale two-pass dynamic programming algorithm. It will consume O(W*H*numDisparities) bytes, which is large for 640x480 stereo and huge for HD-size pictures. By default, it is set to false .", 72 | 0, ["MODE_SGBM", "MODE_HH", "MODE_SGBM_3WAY", "MODE_HH4"]), 73 | 74 | 75 | "P1": IntParameter("Penalty Cost (default=8*NChannels*BlockSize)", 8*nchannels*3*3, 0, 2000), 76 | "P2": IntParameter("Penalty Cost. Must be > P1 (default=32*NChannels*BlockSize).", 32*nchannels*3*3, 0, 2000), 77 | 78 | "Uniqueness Ratio": IntParameter("How unique the match each for each pixel", 10, 0, 100), 79 | "SpeckleWindowSize": IntParameter("Speckle window size in pixels (filter small objects). 0 to disable.", 100, 0, 1000), 80 | "SpeckleRange": IntParameter("Speckle range (max diff within a window)", 32, 0, 64), 81 | 82 | "Disp12MaxDiff": IntParameter("Maximum allowable difference in the right-left check", 1, 0, 64), 83 | "PreFilterCap": IntParameter("Max pre-filter output", 63, 1, 128), 84 | }) 85 | 86 | def compute_disparity(self, input: InputPair) -> StereoOutput: 87 | left_image, right_image = input.left_image, input.right_image 88 | stereoSGBM = cv2.StereoSGBM_create(numDisparities=self.parameters['Num Disparities'].value, 89 | blockSize=self.parameters['Block Size'].value) 90 | 91 | stereoSGBM.setMode(self.parameters['Mode'].index) 92 | stereoSGBM.setP1(self.parameters['P1'].value) 93 | stereoSGBM.setP2(self.parameters['P2'].value) 94 | stereoSGBM.setPreFilterCap(self.parameters['PreFilterCap'].value) 95 | stereoSGBM.setUniquenessRatio(self.parameters['Uniqueness Ratio'].value) 96 | stereoSGBM.setSpeckleWindowSize(self.parameters['SpeckleWindowSize'].value) 97 | stereoSGBM.setSpeckleRange(self.parameters['SpeckleRange'].value) 98 | stereoSGBM.setDisp12MaxDiff(self.parameters['Disp12MaxDiff'].value) 99 | 100 | gray_left = cv2.cvtColor(left_image, cv2.COLOR_BGR2GRAY, left_image) 101 | gray_right = cv2.cvtColor(right_image, cv2.COLOR_BGR2GRAY, right_image) 102 | # OpenCV returns 16x the disparity in pixels 103 | start = time.time() 104 | disparity = stereoSGBM.compute(gray_left, gray_right) / np.float32(16.0) 105 | return StereoOutput(disparity, input.left_image, time.time()-start) 106 | -------------------------------------------------------------------------------- /tools/capture_oakd_frames.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import depthai as dai 3 | import numpy as np 4 | import json 5 | 6 | # Adapted from LearnOpenCV 7 | # https://github.com/spmallick/learnopencv/tree/master/oak-getting-started 8 | 9 | def getFrame(queue): 10 | # Get frame from queue 11 | frame = queue.get() 12 | # Convert frame to OpenCV format and return 13 | return frame.getCvFrame() 14 | 15 | def getMonoCamera(pipeline, isLeft): 16 | # Configure mono camera 17 | mono = pipeline.createMonoCamera() 18 | 19 | # Set Camera Resolution 20 | mono.setResolution(dai.MonoCameraProperties.SensorResolution.THE_480_P) 21 | 22 | if isLeft: 23 | # Get left camera 24 | mono.setBoardSocket(dai.CameraBoardSocket.LEFT) 25 | else : 26 | # Get right camera 27 | mono.setBoardSocket(dai.CameraBoardSocket.RIGHT) 28 | return mono 29 | 30 | 31 | def getStereoPair(pipeline, monoLeft, monoRight): 32 | # Configure stereo pair for depth estimation 33 | stereo = pipeline.createStereoDepth() 34 | # Checks occluded pixels and marks them as invalid 35 | stereo.setLeftRightCheck(True) 36 | 37 | # Configure left and right cameras to work as a stereo pair 38 | monoLeft.out.link(stereo.left) 39 | monoRight.out.link(stereo.right) 40 | 41 | return stereo 42 | 43 | if __name__ == '__main__': 44 | 45 | # Start defining a pipeline 46 | pipeline = dai.Pipeline() 47 | 48 | # Set up left and right cameras 49 | monoLeft = getMonoCamera(pipeline, isLeft = True) 50 | monoRight = getMonoCamera(pipeline, isLeft = False) 51 | 52 | # Combine left and right cameras to form a stereo pair 53 | stereo = getStereoPair(pipeline, monoLeft, monoRight) 54 | 55 | # Set XlinkOut for disparity, rectifiedLeft, and rectifiedRight 56 | xoutDisp = pipeline.createXLinkOut() 57 | xoutDisp.setStreamName("disparity") 58 | 59 | xoutDepth = pipeline.create(dai.node.XLinkOut) 60 | xoutDepth.setStreamName("depth") 61 | 62 | xoutRectifiedLeft = pipeline.createXLinkOut() 63 | xoutRectifiedLeft.setStreamName("rectifiedLeft") 64 | 65 | xoutRectifiedRight = pipeline.createXLinkOut() 66 | xoutRectifiedRight.setStreamName("rectifiedRight") 67 | 68 | stereo.disparity.link(xoutDisp.input) 69 | 70 | stereo.rectifiedLeft.link(xoutRectifiedLeft.input) 71 | stereo.rectifiedRight.link(xoutRectifiedRight.input) 72 | stereo.depth.link(xoutDepth.input) 73 | 74 | # Pipeline is defined, now we can connect to the device 75 | 76 | with dai.Device(pipeline) as device: 77 | 78 | calib = device.readCalibration() 79 | w, h = monoLeft.getResolutionSize() 80 | 81 | # The rectified stereo images intrinsics both correspond to the right camera intrinsics. 82 | disparityIntrinsics = calib.getCameraIntrinsics(dai.CameraBoardSocket.RIGHT, dai.Size2f(w, h)) 83 | baselineMeters = 1e-2 * calib.getBaselineDistance(dai.CameraBoardSocket.LEFT, dai.CameraBoardSocket.RIGHT) 84 | with open('stereodemo-calibration.json', 'w') as f: 85 | d = dict(baseline_meters=baselineMeters, 86 | fx=disparityIntrinsics[0][0], 87 | fy=disparityIntrinsics[1][1], 88 | cx0=disparityIntrinsics[0][2], 89 | cx1=disparityIntrinsics[0][2], 90 | cy=disparityIntrinsics[1][2]) 91 | f.write(json.dumps(d)) 92 | 93 | # Output queues will be used to get the rgb frames and nn data from the outputs defined above 94 | disparityQueue = device.getOutputQueue(name="disparity", maxSize=1, blocking=False) 95 | rectifiedLeftQueue = device.getOutputQueue(name="rectifiedLeft", maxSize=1, blocking=False) 96 | rectifiedRightQueue = device.getOutputQueue(name="rectifiedRight", maxSize=1, blocking=False) 97 | depthQueue = device.getOutputQueue(name="depth", maxSize=1, blocking=False) 98 | 99 | # Calculate a multiplier for colormapping disparity map 100 | disparityMultiplier = 255 / stereo.getMaxDisparity() 101 | 102 | cv2.namedWindow("Stereo Pair") 103 | 104 | # Variable use to toggle between side by side view and one frame view. 105 | sideBySide = False 106 | 107 | print ("Press 's' to save an image") 108 | 109 | save_frame_id = 0 110 | while True: 111 | 112 | # Get disparity map 113 | disparityPixels = getFrame(disparityQueue) 114 | 115 | # Colormap disparity for display 116 | disparity = (disparityPixels * disparityMultiplier).astype(np.uint8) 117 | disparity = cv2.applyColorMap(disparity, cv2.COLORMAP_JET) 118 | 119 | # Depth 120 | depthMm = getFrame(depthQueue) 121 | centralDepthMm = depthMm[h//2, w//2] 122 | centralDisp = disparityPixels[h//2, w//2] 123 | depthFromDispMeters = (baselineMeters * disparityIntrinsics[0][0]) / centralDisp 124 | # print (f"Central pixel depth = {centralDepthMm} disparity_raw = {centralDisp} depthFromDispMeters={depthFromDispMeters}") 125 | 126 | # Get left and right rectified frame 127 | leftFrame = getFrame(rectifiedLeftQueue) 128 | rightFrame = getFrame(rectifiedRightQueue) 129 | 130 | if sideBySide: 131 | # Show side by side view 132 | imOut = np.hstack((leftFrame, rightFrame)) 133 | else : 134 | # Show overlapping frames 135 | imOut = np.uint8(leftFrame/2 + rightFrame/2) 136 | 137 | imOut = cv2.cvtColor(imOut,cv2.COLOR_GRAY2RGB) 138 | cv2.imshow("Stereo Pair", imOut) 139 | cv2.imshow("Disparity", disparity) 140 | 141 | # Check for keyboard input 142 | key = cv2.waitKey(1) 143 | if key == ord('q'): 144 | # Quit when q is pressed 145 | break 146 | elif key == ord('t'): 147 | # Toggle display when t is pressed 148 | sideBySide = not sideBySide 149 | elif key == ord('s'): 150 | # Save the current frames 151 | cv2.imwrite(f"img_{save_frame_id:03d}_left.png", leftFrame) 152 | cv2.imwrite(f"img_{save_frame_id:03d}_right.png", rightFrame) 153 | print (f"Wrote img_{save_frame_id:03d}_left/right.png") 154 | save_frame_id += 1 155 | 156 | -------------------------------------------------------------------------------- /stereodemo/method_hitnet.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import shutil 3 | import time 4 | from dataclasses import dataclass 5 | import urllib.request 6 | import tempfile 7 | import sys 8 | 9 | import onnxruntime 10 | 11 | import cv2 12 | import numpy as np 13 | 14 | from .methods import Config, EnumParameter, StereoMethod, InputPair, StereoOutput 15 | from . import utils 16 | 17 | urls = { 18 | "hitnet_eth3d_120x160.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_eth3d_120x160.onnx", 19 | "hitnet_eth3d_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_eth3d_240x320.onnx", 20 | "hitnet_eth3d_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_eth3d_480x640.onnx", 21 | "hitnet_eth3d_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_eth3d_720x1280.onnx", 22 | "hitnet_middlebury_120x160.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_middlebury_120x160.onnx", 23 | "hitnet_middlebury_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_middlebury_240x320.onnx", 24 | "hitnet_middlebury_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_middlebury_480x640.onnx", 25 | "hitnet_middlebury_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_middlebury_720x1280.onnx", 26 | "hitnet_sceneflow_120x160.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_sceneflow_120x160.onnx", 27 | "hitnet_sceneflow_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_sceneflow_240x320.onnx", 28 | "hitnet_sceneflow_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_sceneflow_480x640.onnx", 29 | "hitnet_sceneflow_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_sceneflow_720x1280.onnx", 30 | } 31 | 32 | # Adapted from https://github.com/ibaiGorordo/ONNX-HITNET-Stereo-Depth-estimation 33 | # Onnx models from https://github.com/PINTO0309/PINTO_model_zoo/tree/main/142_HITNET 34 | # Official implementation https://github.com/google-research/google-research/tree/master/hitnet 35 | class HitnetStereo(StereoMethod): 36 | def __init__(self, config: Config): 37 | super().__init__("Hitnet (CVPR 2021)", 38 | "HITNet: Hierarchical Iterative Tile Refinement Network for Real-time Stereo Matching", 39 | {}, 40 | config) 41 | self.reset_defaults() 42 | 43 | self._loaded_session = None 44 | self._loaded_model_path = None 45 | 46 | def reset_defaults(self): 47 | self.parameters.update ({ 48 | "Shape": EnumParameter("Processed image size", 2, ["160x120", "320x240", "640x480", "1280x720"]), 49 | "Training Set": EnumParameter("Dataset used during training", 1, ["sceneflow", "middlebury", "eth3d"]) 50 | }) 51 | 52 | def compute_disparity(self, input: InputPair) -> StereoOutput: 53 | cols, rows = self.parameters["Shape"].value.split('x') 54 | cols, rows = int(cols), int(rows) 55 | training_set = self.parameters["Training Set"].value 56 | 57 | model_path = self.config.models_path / f'hitnet_{training_set}_{rows}x{cols}.onnx' 58 | self._load_model (model_path) 59 | 60 | model_inputs = self._loaded_session.get_inputs() 61 | model_outputs = self._loaded_session.get_outputs() 62 | model_rows, model_cols = model_inputs[0].shape[2:] # B,C,H,W 63 | self.target_size = (model_cols, model_rows) 64 | 65 | grayscale = True if training_set == 'eth3d' else False 66 | combined_tensor = self._preprocess_input(input.left_image, input.right_image, grayscale) 67 | 68 | start = time.time() 69 | input_names = [model_inputs[i].name for i in range(len(model_inputs))] 70 | output_names = [model_outputs[i].name for i in range(len(model_outputs))] 71 | outputs = self._loaded_session.run(['reference_output_disparity'], { 'input': combined_tensor }) 72 | elapsed_time = time.time() - start 73 | 74 | disparity_map = self._process_output(outputs) 75 | if disparity_map.shape[:2] != input.left_image.shape[:2]: 76 | model_output_cols = disparity_map.shape[1] 77 | disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST) 78 | x_scale = input.left_image.shape[1] / float(model_output_cols) 79 | disparity_map *= np.float32(x_scale) 80 | 81 | return StereoOutput(disparity_map, input.left_image, elapsed_time) 82 | 83 | def _preprocess_input (self, left: np.ndarray, right: np.ndarray, grayscale: bool): 84 | if grayscale: 85 | # H,W 86 | left = cv2.cvtColor(left, cv2.COLOR_BGR2GRAY) 87 | right = cv2.cvtColor(right, cv2.COLOR_BGR2GRAY) 88 | else: 89 | # H,W,C=3 90 | left = cv2.cvtColor(left, cv2.COLOR_BGR2RGB) 91 | right = cv2.cvtColor(right, cv2.COLOR_BGR2RGB) 92 | 93 | left = cv2.resize(left, self.target_size, cv2.INTER_AREA) 94 | right = cv2.resize(right, self.target_size, cv2.INTER_AREA) 95 | 96 | # Grayscale needs expansion to reach H,W,C. 97 | # Need to do that now because resize would change the shape. 98 | if left.ndim == 2: 99 | left = left[..., np.newaxis] 100 | right = right[..., np.newaxis] 101 | 102 | # -> H,W,C=2 or 6 , normalized to [0,1] 103 | combined_img = np.concatenate((left, right), axis=-1) / 255.0 104 | # -> C,H,W 105 | combined_img = combined_img.transpose(2, 0, 1) 106 | # -> B=1,C,H,W 107 | combined_img = np.expand_dims(combined_img, 0).astype(np.float32) 108 | return combined_img 109 | 110 | def _process_output(self, outputs): 111 | disparity_map = outputs[0][0].squeeze(-1) 112 | return disparity_map 113 | 114 | def _load_model(self, model_path: Path): 115 | if (self._loaded_model_path == model_path): 116 | return 117 | 118 | if not model_path.exists(): 119 | utils.download_model (urls[model_path.name], model_path) 120 | 121 | assert Path(model_path).exists() 122 | self._loaded_model_path = model_path 123 | self._loaded_session = onnxruntime.InferenceSession(str(model_path), providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) 124 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Unit Tests](https://github.com/nburrus/stereodemo/actions/workflows/unit_tests.yml/badge.svg)](https://github.com/nburrus/stereodemo/actions/workflows/unit_tests.yml) 2 | 3 | Twitter Badge 4 | 5 | 6 | # stereodemo 7 | 8 | Small Python utility to **compare and visualize** the output of various **stereo depth estimation** algorithms: 9 | 10 | - Make it easy to get a qualitative evaluation of several state-of-the-art models in the wild 11 | - Feed it left/right images or capture live from an [OAK-D camera](https://store.opencv.ai/products/oak-d) 12 | - Interactive colored point-cloud view since nice-looking disparity images can be misleading 13 | - Try different parameters on the same image 14 | 15 | **Included methods** (implementation/pre-trained models taken from their respective authors): 16 | 17 | - [OpenCV](https://opencv.org) stereo block matching and Semi-global block matching baselines, with all their parameters 18 | - [CREStereo](https://github.com/megvii-research/CREStereo): "Practical Stereo Matching via Cascaded Recurrent Network with Adaptive Correlation" (CVPR 2022) 19 | - [RAFT-Stereo](https://github.com/princeton-vl/RAFT-Stereo): "Multilevel Recurrent Field Transforms for Stereo Matching" (3DV 2021) 20 | - [Hitnet](https://github.com/google-research/google-research/tree/master/hitnet): "Hierarchical Iterative Tile Refinement Network for Real-time Stereo Matching" (CVPR 2021) 21 | - [STereo TRansformers](https://github.com/mli0603/stereo-transformer): "Revisiting Stereo Depth Estimation From a Sequence-to-Sequence Perspective with Transformers" (ICCV 2021) 22 | - [Chang et al. RealtimeStereo](https://github.com/JiaRenChang/RealtimeStereo): "Attention-Aware Feature Aggregation for Real-time Stereo Matching on Edge Devices" (ACCV 2020) 23 | 24 | - [DistDepth](https://github.com/facebookresearch/DistDepth): "Toward Practical Monocular Indoor Depth Estimation" (CVPR 2022). This one is actually a **monocular** method, only using the left image. 25 | 26 | See below for more details / credits to get each of these working, and check this [blog post for more results, including performance numbers](https://nicolas.burrus.name/stereo-comparison/). 27 | 28 | https://user-images.githubusercontent.com/541507/169557430-48e62510-60c2-4a2b-8747-f9606e405f74.mp4 29 | 30 | # Getting started 31 | 32 | ## Installation 33 | 34 | ``` 35 | python3 -m pip install stereodemo 36 | ``` 37 | 38 | ## Running it 39 | 40 | ### With an OAK-D camera 41 | 42 | To capture data directly from an OAK-D camera, use: 43 | 44 | ``` 45 | stereodemo --oak 46 | ``` 47 | 48 | Then click on `Next Image` to capture a new one. 49 | 50 | ### With image files 51 | 52 | If you installed stereodemo from pip, then just launch `stereodemo` and it will 53 | show some embedded sample images captured with an OAK-D camera. 54 | 55 | A tiny subset of some popular datasets is also included in this repository. Just 56 | provide a folder to `stereodemo` and it'll look for left/right pairs (either 57 | im0/im1 or left/right in the names): 58 | 59 | ``` 60 | # To evaluate on the oak-d images 61 | stereodemo datasets/oak-d 62 | 63 | # To cycle through all images 64 | stereodemo datasets 65 | ``` 66 | 67 | Then click on `Next Image` to cycle through the images. 68 | 69 | Sample images included in this repository: 70 | - [drivingstereo](datasets/drivingstereo/README.md): outdoor driving. 71 | - [middlebury_2014](datasets/middlebury_2014/README.md): high-res objects. 72 | - [eth3d](datasets/eth3d_lowres/README.md): outdoor and indoor scenes. 73 | - [sceneflow](datasets/sceneflow/README.md): synthetic rendering of objects. 74 | - [oak-d](datasets/oak-d/README.md): indoor images I captured with my OAK-D lite camera. 75 | - [kitti2015](datasets/kitti2015/README.md): outdoor driving (only one image). 76 | 77 | # Dependencies 78 | 79 | `pip` will install the dependencies automatically. Here is the list: 80 | 81 | - [Open3D](https://open3d.org). For the point cloud visualization and the GUI. 82 | - [OpenCV](https://opencv.org). For image loading and the traditional block matching baselines. 83 | - [onnxruntime](https://onnxruntime.ai/). To run pretrained models in the ONNX format. 84 | - [pytorch](https://pytorch.org/). To run pretrained models exported as torch script. 85 | - [depthai](https://docs.luxonis.com/en/latest/). Optional, to grab images from a Luxonis OAK camera. 86 | 87 | # Credits for each method 88 | 89 | I did not implement any of these myself, but just collected pre-trained models or converted them to torch script / ONNX. 90 | 91 | - CREStereo 92 | - Official implementation and pre-trained models: https://github.com/megvii-research/CREStereo 93 | - Model Zoo for the ONNX models: https://github.com/PINTO0309/PINTO_model_zoo/tree/main/284_CREStereo 94 | - Port to ONNX + sample loading code: https://github.com/ibaiGorordo/ONNX-CREStereo-Depth-Estimation 95 | 96 | - RAFT-Stereo 97 | - Official implementation and pre-trained models: https://github.com/princeton-vl/RAFT-Stereo 98 | - I exported the pytorch implementation to torch script via tracing, [with minor modifications of the source code](https://github.com/nburrus/RAFT-Stereo/commit/ebbb5a807227927ab4551274039e9bdd16a1b010). 99 | - Their fastest implementation was not imported. 100 | 101 | - Hitnet 102 | - Official implementation and pre-trained models: https://github.com/google-research/google-research/tree/master/hitnet 103 | - Model Zoo for the ONNX models: https://github.com/PINTO0309/PINTO_model_zoo/tree/main/142_HITNET 104 | - Port to ONNX + sample loading code: https://github.com/ibaiGorordo/ONNX-HITNET-Stereo-Depth-estimation 105 | 106 | - Stereo Transformers 107 | - Official implementation and pre-trained models: https://github.com/mli0603/stereo-transformer 108 | - Made [some small changes](https://github.com/nburrus/stereo-transformer/commit/0006a022c19f0c7c4d7683408531180a863603a5) to allow torch script export via tracing. 109 | - The exported model currently fails with GPU inference, so only CPU inference is enabled. 110 | 111 | - Chang et al. RealtimeStereo 112 | - Official implementation and pre-trained models: https://github.com/JiaRenChang/RealtimeStereo 113 | - I exported the pytorch implementation to torch script via tracing with some minor changes to the code https://github.com/JiaRenChang/RealtimeStereo/pull/15 . See [chang_realtimestereo_to_torchscript_onnx.py](tools/chang_realtimestereo_to_torchscript_onnx.py). 114 | 115 | - DistDepth 116 | - Official implementation and pre-trained models https://github.com/facebookresearch/DistDepth 117 | - I exported the pytorch implementaton to torch script via tracing, see [the changes](https://github.com/facebookresearch/DistDepth/commit/fde3b427ef2ff31c34f08e99c51c8e6a2427b720). 118 | 119 | # License 120 | 121 | The code of stereodemo is MIT licensed, but the pre-trained models are subject to the license of their respective implementation. 122 | 123 | The sample images have the license of their respective source, except for datasets/oak-d which is licenced under [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/). 124 | 125 | -------------------------------------------------------------------------------- /stereodemo/method_sttr.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import shutil 3 | import time 4 | from dataclasses import dataclass 5 | import urllib.request 6 | import gc 7 | import tempfile 8 | import re 9 | import sys 10 | 11 | import torch 12 | from torchvision import transforms 13 | 14 | import cv2 15 | import numpy as np 16 | 17 | from .methods import Config, EnumParameter, StereoMethod, InputPair, StereoOutput 18 | from . import utils 19 | 20 | urls = { 21 | "sttr-kitti-cpu-240x320-ds1.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cpu-240x320-ds1.scripted.pt", 22 | "sttr-kitti-cpu-480x640-ds2.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cpu-480x640-ds2.scripted.pt", 23 | "sttr-kitti-cpu-480x640-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cpu-480x640-ds3.scripted.pt", 24 | "sttr-kitti-cpu-720x1280-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cpu-720x1280-ds3.scripted.pt", 25 | "sttr-kitti-cuda-240x320-ds1.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cuda-240x320-ds1.scripted.pt", 26 | "sttr-kitti-cuda-480x640-ds2.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cuda-480x640-ds2.scripted.pt", 27 | "sttr-kitti-cuda-480x640-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cuda-480x640-ds3.scripted.pt", 28 | "sttr-kitti-cuda-720x1280-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cuda-720x1280-ds3.scripted.pt", 29 | 30 | "sttr-sceneflow-cpu-240x320-ds1.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cpu-240x320-ds1.scripted.pt", 31 | "sttr-sceneflow-cpu-480x640-ds2.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cpu-480x640-ds2.scripted.pt", 32 | "sttr-sceneflow-cpu-480x640-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cpu-480x640-ds3.scripted.pt", 33 | "sttr-sceneflow-cpu-720x1280-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cpu-720x1280-ds3.scripted.pt", 34 | "sttr-sceneflow-cuda-240x320-ds1.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cuda-240x320-ds1.scripted.pt", 35 | "sttr-sceneflow-cuda-480x640-ds2.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cuda-480x640-ds2.scripted.pt", 36 | "sttr-sceneflow-cuda-480x640-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cuda-480x640-ds3.scripted.pt", 37 | "sttr-sceneflow-cuda-720x1280-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cuda-720x1280-ds3.scripted.pt", 38 | } 39 | 40 | def clear_gpu_memory(): 41 | gc.collect() 42 | torch.cuda.empty_cache() 43 | 44 | # https://github.com/mli0603/stereo-transformer 45 | # Made some changes to allow torchscript tracing: 46 | # https://github.com/nburrus/stereo-transformer/commit/0006a022c19f0c7c4d7683408531180a863603a5 47 | class StereoTransformers(StereoMethod): 48 | def __init__(self, config: Config): 49 | super().__init__("STereo TRansformer (ICCV 2021)", 50 | "Revisiting Stereo Depth Estimation From a Sequence-to-Sequence Perspective with Transformers.", 51 | {}, 52 | config) 53 | self.reset_defaults() 54 | 55 | self.net = None 56 | self._loaded_model_path = None 57 | 58 | imagenet_stats = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]} 59 | self.img_to_tensor_transforms = transforms.Compose([ 60 | transforms.ToTensor(), 61 | transforms.Normalize(**imagenet_stats), 62 | ]) 63 | 64 | def reset_defaults(self): 65 | self.parameters.update ({ 66 | "Shape": EnumParameter("Processed image size", 2, ["320x240 (ds1)", "640x480 (ds2)", "640x480 (ds3)", "1280x720 (ds3)"]), 67 | # "Model": EnumParameter("Pre-trained Model", 0, ["kitti-cpu", "sceneflow-cpu", "kitti-cuda", "sceneflow-cuda"]) 68 | # The CUDA ones segfault with my Python 3.8 venv, but someone worked with Python 3.7. 69 | # Maybe related to the installed packages instead, need to investigate more. 70 | # Keeping only the CPU ones for now since it's enough to evaluate. 71 | "Model": EnumParameter("Pre-trained Model", 0, ["kitti-cpu", "sceneflow-cpu"]), 72 | "Detect occlusions": EnumParameter("Detect Occlusions", 0, ["Yes", "No"]) 73 | }) 74 | 75 | def compute_disparity(self, input: InputPair) -> StereoOutput: 76 | stereo_output = self._compute_disparity (input) 77 | clear_gpu_memory () 78 | return stereo_output 79 | 80 | def _compute_disparity(self, input: InputPair) -> StereoOutput: 81 | m = re.match("(\d+)x(\d+) \(ds(\d)\)", self.parameters["Shape"].value) 82 | cols, rows, downsample = map(lambda v: int(v), m.groups()) 83 | self.target_size = (cols, rows) 84 | 85 | variant = self.parameters["Model"].value 86 | detect_occlusions = self.parameters["Detect occlusions"].value == "Yes" 87 | 88 | model_path = self.config.models_path / f'sttr-{variant}-{rows}x{cols}-ds{downsample}.scripted.pt' 89 | self._load_model (model_path) 90 | 91 | left_tensor = self._preprocess_input(input.left_image) 92 | right_tensor = self._preprocess_input(input.right_image) 93 | 94 | col_offset = int(downsample / 2) 95 | row_offset = int(downsample / 2) 96 | sampled_cols = torch.arange(col_offset, cols, downsample)[None,] 97 | sampled_rows = torch.arange(row_offset, rows, downsample)[None,] 98 | 99 | device = torch.device('cuda') if 'cuda' in variant else 'cpu' 100 | net = self.net.to(device) 101 | left_tensor = left_tensor.to(device) 102 | right_tensor = right_tensor.to(device) 103 | sampled_cols = sampled_cols.to(device) 104 | sampled_rows = sampled_rows.to(device) 105 | 106 | start = time.time() 107 | with torch.no_grad(): 108 | outputs = net(left_tensor, right_tensor, sampled_cols, sampled_rows) 109 | elapsed_time = time.time() - start 110 | 111 | disparity_map = self._process_output(outputs, use_occlusion=detect_occlusions) 112 | if disparity_map.shape[:2] != input.left_image.shape[:2]: 113 | disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST) 114 | x_scale = input.left_image.shape[1] / float(cols) 115 | disparity_map *= np.float32(x_scale) 116 | 117 | return StereoOutput(disparity_map, input.left_image, elapsed_time) 118 | 119 | def _preprocess_input (self, img: np.ndarray): 120 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 121 | img = cv2.resize(img, self.target_size, cv2.INTER_AREA) 122 | return self.img_to_tensor_transforms (img).unsqueeze(0) 123 | 124 | def _process_output(self, outputs, use_occlusion: bool): 125 | disparity_map = outputs[0][0].detach().cpu().numpy() 126 | if use_occlusion: 127 | occ_pred = outputs[1][0].data.cpu().numpy() > 0.5 128 | disparity_map[occ_pred] = -1.0 129 | return disparity_map 130 | 131 | def _load_model(self, model_path: Path): 132 | # FIXME: always reload the model, for some reason 133 | # feeding multiple images to the same model freezes 134 | # with CUDA. Maybe due to multi-threading? 135 | # if (self._loaded_model_path == model_path): 136 | # return 137 | 138 | if not model_path.exists(): 139 | utils.download_model (urls[model_path.name], model_path) 140 | 141 | assert Path(model_path).exists() 142 | self._loaded_model_path = model_path 143 | self.net = torch.jit.load(model_path) 144 | self.net.eval () 145 | -------------------------------------------------------------------------------- /stereodemo/main.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | import json 3 | from pathlib import Path 4 | import sys 5 | import tempfile 6 | import time 7 | from types import SimpleNamespace 8 | from typing import List 9 | 10 | import numpy as np 11 | 12 | import cv2 13 | 14 | 15 | from . import visualizer 16 | from . import methods 17 | 18 | from .method_opencv_bm import StereoBM, StereoSGBM 19 | from .method_raft_stereo import RaftStereo 20 | from .method_cre_stereo import CREStereo 21 | from .method_chang_realtime_stereo import ChangRealtimeStereo 22 | from .method_hitnet import HitnetStereo 23 | from .method_sttr import StereoTransformers 24 | from stereodemo.method_dist_depth import DistDepth 25 | 26 | def parse_args(): 27 | import argparse 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument('--oak', action='store_true', help='Use an oak-D camera to grab images.') 30 | parser.add_argument('--oak-output-folder', type=Path, default=None, help='Output folder to save the images grabbed by the OAK camera') 31 | parser.add_argument('images', 32 | help='rectified_left1 rectified_right1 ... [rectified_leftN rectified_rightN]. Load image pairs from disk. You can also specify folders.', 33 | type=Path, 34 | default=None, 35 | nargs='*') 36 | parser.add_argument('--calibration', type=Path, help='Calibration json. If unspecified, it will try to load a stereodemo_calibration.json file in the left image parent folder.', default=None) 37 | default_models_path = Path.home() / ".cache" / "stereodemo" / "models" 38 | parser.add_argument('--models-path', type=Path, help='Path to store the downloaded models.', default=default_models_path) 39 | return parser.parse_args() 40 | 41 | def find_stereo_images_in_dir(dir: Path): 42 | left_files = [] 43 | right_files = [] 44 | 45 | def validated_lists(): 46 | for f in right_files: 47 | assert f.exists() 48 | return left_files, right_files 49 | 50 | for ext in ['jpg', 'png']: 51 | left = sorted(list(dir.glob(f'**/*left*.{ext}'))) 52 | if len(left) != 0: 53 | right = [f.parent / f.name.replace('left', 'right') for f in left] 54 | left_files += left 55 | right_files += right 56 | 57 | for ext in ['jpg', 'png']: 58 | left = sorted(list(dir.glob(f'**/im0.{ext}'))) 59 | if len(left) != 0: 60 | right = [f.parent / f.name.replace('im0', 'im1') for f in left] 61 | left_files += left 62 | right_files += right 63 | 64 | return validated_lists() 65 | 66 | class FileListSource (visualizer.Source): 67 | def __init__(self, file_or_dir_list, calibration=None): 68 | self.left_images_path = [] 69 | self.right_images_path = [] 70 | 71 | while file_or_dir_list: 72 | f = file_or_dir_list.pop(0) 73 | if f.is_dir(): 74 | left, right = find_stereo_images_in_dir (f) 75 | self.left_images_path += left 76 | self.right_images_path += right 77 | else: 78 | if f.suffix.lower() not in ['.png', '.jpg', '.jpeg']: 79 | print (f"Warning: ignoring {f}, not an image extension.") 80 | continue 81 | try: 82 | right_f = file_or_dir_list.pop(0) 83 | except: 84 | print (f"Missing right image for {f}, skipping") 85 | continue 86 | self.left_images_path.append(f) 87 | self.right_images_path.append(right_f) 88 | 89 | self.index = 0 90 | self.user_provided_calibration_path = calibration 91 | self.num_pairs = len(self.left_images_path) 92 | if self.num_pairs == 0: 93 | raise Exception("No image pairs.") 94 | 95 | def is_live(self): 96 | return False 97 | 98 | def selected_index (self) -> int: 99 | return self.index 100 | 101 | def get_pair_at_index(self, idx: int) -> methods.InputPair: 102 | self.index = idx 103 | 104 | def load_image(path): 105 | im = cv2.imread(str(path), cv2.IMREAD_COLOR) 106 | assert im is not None 107 | return im 108 | 109 | left_image_path = self.left_images_path[self.index] 110 | left_image = load_image(left_image_path) 111 | if self.user_provided_calibration_path is None: 112 | calibration_path = left_image_path.parent / 'stereodemo_calibration.json' 113 | if not calibration_path.exists(): 114 | print (f"Warning: no calibration file found {calibration_path}. Using default calibration, the point cloud won't be accurate.") 115 | calibration_path = None 116 | else: 117 | calibration_path = self.user_provided_calibration_path 118 | if calibration_path: 119 | calib = visualizer.Calibration.from_json (open(calibration_path, 'r').read()) 120 | else: 121 | # Fake reasonable calibration. 122 | calib = visualizer.Calibration(left_image.shape[1], 123 | left_image.shape[0], 124 | left_image.shape[0]*0.8, 125 | left_image.shape[0]*0.8, 126 | left_image.shape[1]/2.0, # cx0 127 | left_image.shape[1]/2.0, # cx1 128 | left_image.shape[0]/2.0, 129 | 0.075) 130 | 131 | right_image_path = self.right_images_path[self.index] 132 | status = f"{left_image_path} / {right_image_path}" 133 | return visualizer.InputPair (left_image, load_image(right_image_path), calib, status) 134 | 135 | def get_pair_list(self) -> List[str]: 136 | return [str(f) for f in self.left_images_path] 137 | 138 | def get_next_pair(self): 139 | self.index = (self.index + 1) % self.num_pairs 140 | return self.get_pair_at_index(self.index) 141 | 142 | def main(): 143 | args = parse_args() 144 | 145 | try: 146 | args.models_path.mkdir(parents=True, exist_ok=True) 147 | except Exception as e: 148 | sys.stderr.write (f"Warning: cannot use the default models path {args.models_path}: {e}\n") 149 | sys.stderr.write ("A valid path is necessary to store the downloaded models.\n") 150 | args.models_path = Path(tempfile.gettempdir()) / 'stereodemo_models' 151 | sys.stderr.write (f"Going to use the temporary directory {args.models_path} instead, specify --model-paths to specify a custom persistent path instead.\n") 152 | try: 153 | args.models_path.mkdir(parents=True, exist_ok=True) 154 | except: 155 | sys.stderr.write ("Could not create a temporary directory to store the downloaded models.\n") 156 | sys.stderr.write ("Aborting, you need to specify --models-path with a valid writable path.\n") 157 | sys.exit (1) 158 | print (f"INFO: will store downloaded models in {args.models_path}") 159 | 160 | config = methods.Config(args.models_path) 161 | method_list = [ 162 | StereoBM(config), 163 | StereoSGBM(config), 164 | CREStereo(config), 165 | RaftStereo(config), 166 | HitnetStereo(config), 167 | StereoTransformers(config), 168 | ChangRealtimeStereo(config), 169 | DistDepth(config) 170 | ] 171 | 172 | if args.images: 173 | source = FileListSource(args.images, args.calibration) 174 | elif args.oak: 175 | from .oakd_source import OakdSource, StereoFromOakInputSource 176 | source = OakdSource(args.oak_output_folder) 177 | method_list = [StereoFromOakInputSource(config)] + method_list 178 | else: 179 | datasets_path = Path(__file__).parent / 'datasets' 180 | if not datasets_path.exists(): 181 | print (f"Tried but failed to find files in {datasets_path}") 182 | print ("You need to specify --oak or provide images") 183 | sys.exit (1) 184 | source = FileListSource([datasets_path], args.calibration) 185 | 186 | method_dict = { method.name:method for method in method_list } 187 | 188 | viz = visualizer.Visualizer(method_dict, source) 189 | 190 | while True: 191 | start_time = time.time() 192 | if not viz.update_once (): 193 | break 194 | cv2.waitKey (1) 195 | elapsed = time.time() - start_time 196 | time_to_sleep = 1/30.0 - elapsed 197 | if time_to_sleep > 0: 198 | time.sleep (time_to_sleep) 199 | 200 | 201 | -------------------------------------------------------------------------------- /stereodemo/method_raft_stereo.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import shutil 3 | import time 4 | from dataclasses import dataclass 5 | import urllib.request 6 | import gc 7 | import tempfile 8 | import sys 9 | 10 | import torch 11 | from torchvision import transforms 12 | 13 | import cv2 14 | import numpy as np 15 | 16 | from .methods import Config, EnumParameter, StereoMethod, InputPair, StereoOutput 17 | from . import utils 18 | 19 | urls = { 20 | "raft-stereo-eth3d-cpu-128x160.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cpu-128x160.scripted.pt", 21 | "raft-stereo-eth3d-cpu-256x320.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cpu-256x320.scripted.pt", 22 | "raft-stereo-eth3d-cpu-480x640.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cpu-480x640.scripted.pt", 23 | "raft-stereo-eth3d-cpu-736x1280.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cpu-736x1280.scripted.pt", 24 | "raft-stereo-eth3d-cuda-128x160.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cuda-128x160.scripted.pt", 25 | "raft-stereo-eth3d-cuda-256x320.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cuda-256x320.scripted.pt", 26 | "raft-stereo-eth3d-cuda-480x640.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cuda-480x640.scripted.pt", 27 | "raft-stereo-eth3d-cuda-736x1280.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cuda-736x1280.scripted.pt", 28 | "raft-stereo-fast-cpu-128x160.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cpu-128x160.scripted.pt", 29 | "raft-stereo-fast-cpu-256x320.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cpu-256x320.scripted.pt", 30 | "raft-stereo-fast-cpu-480x640.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cpu-480x640.scripted.pt", 31 | "raft-stereo-fast-cpu-736x1280.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cpu-736x1280.scripted.pt", 32 | "raft-stereo-fast-cuda-128x160.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cuda-128x160.scripted.pt", 33 | "raft-stereo-fast-cuda-256x320.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cuda-256x320.scripted.pt", 34 | "raft-stereo-fast-cuda-480x640.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cuda-480x640.scripted.pt", 35 | "raft-stereo-fast-cuda-736x1280.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cuda-736x1280.scripted.pt", 36 | "raft-stereo-middlebury-cpu-128x160.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cpu-128x160.scripted.pt", 37 | "raft-stereo-middlebury-cpu-256x320.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cpu-256x320.scripted.pt", 38 | "raft-stereo-middlebury-cpu-480x640.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cpu-480x640.scripted.pt", 39 | "raft-stereo-middlebury-cpu-736x1280.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cpu-736x1280.scripted.pt", 40 | "raft-stereo-middlebury-cuda-128x160.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cuda-128x160.scripted.pt", 41 | "raft-stereo-middlebury-cuda-256x320.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cuda-256x320.scripted.pt", 42 | "raft-stereo-middlebury-cuda-480x640.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cuda-480x640.scripted.pt", 43 | "raft-stereo-middlebury-cuda-736x1280.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cuda-736x1280.scripted.pt", 44 | } 45 | 46 | def clear_gpu_memory(): 47 | gc.collect() 48 | torch.cuda.empty_cache() 49 | 50 | # https://github.com/princeton-vl/RAFT-Stereo 51 | # I exported the pytorch implementation to torch script via tracing, with minor modifications of the source code. 52 | # https://github.com/nburrus/RAFT-Stereo/commit/ebbb5a807227927ab4551274039e9bdd16a1b010 53 | # Their fastest implementation was not imported. 54 | class RaftStereo(StereoMethod): 55 | def __init__(self, config: Config): 56 | super().__init__("RAFT-Stereo (3DV 2021)", 57 | "RAFT-Stereo: Multilevel Recurrent Field Transforms for Stereo Matching.", 58 | {}, 59 | config) 60 | self.reset_defaults() 61 | 62 | self.net = None 63 | self._loaded_model_path = None 64 | 65 | def reset_defaults(self): 66 | self.parameters.update ({ 67 | "Shape": EnumParameter("Processed image size", 2, ["160x128", "320x256", "640x480", "1280x736"]), 68 | # "Model": EnumParameter("Pre-trained Model", 1, ["eth3d-cuda", "eth3d-cpu", "fast-cuda", "fast-cpu", "middlebury-cuda"]) 69 | # The eth3d and fast cuda models required --corr_implementation alt to work once loaded via torchscript. 70 | # The supposedly faster "reg" is not working with a torch/cuda segfault, not sure why. 71 | "Model": EnumParameter("Pre-trained Model", 0, ["fast-cpu", "middlebury-cpu", "eth3d-cpu", "fast-cuda", "middlebury-cuda", "eth3d-cuda"]) 72 | }) 73 | 74 | def compute_disparity(self, input: InputPair) -> StereoOutput: 75 | stereo_output = self._compute_disparity (input) 76 | clear_gpu_memory () 77 | return stereo_output 78 | 79 | def _compute_disparity(self, input: InputPair) -> StereoOutput: 80 | cols, rows = self.parameters["Shape"].value.split('x') 81 | cols, rows = int(cols), int(rows) 82 | self.target_size = (cols, rows) 83 | 84 | variant = self.parameters["Model"].value 85 | 86 | model_path = self.config.models_path / f'raft-stereo-{variant}-{rows}x{cols}.scripted.pt' 87 | self._load_model (model_path) 88 | 89 | left_tensor = self._preprocess_input(input.left_image) 90 | right_tensor = self._preprocess_input(input.right_image) 91 | 92 | device = torch.device('cuda') if 'cuda' in variant else 'cpu' 93 | net = self.net.to(device) 94 | left_tensor = left_tensor.to(device) 95 | right_tensor = right_tensor.to(device) 96 | 97 | start = time.time() 98 | with torch.no_grad(): 99 | outputs = self.net(left_tensor, right_tensor) 100 | elapsed_time = time.time() - start 101 | 102 | disparity_map = self._process_output(outputs) 103 | if disparity_map.shape[:2] != input.left_image.shape[:2]: 104 | disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST) 105 | x_scale = input.left_image.shape[1] / float(cols) 106 | disparity_map *= np.float32(x_scale) 107 | 108 | return StereoOutput(disparity_map, input.left_image, elapsed_time) 109 | 110 | def _preprocess_input (self, img: np.ndarray): 111 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 112 | img = cv2.resize(img, self.target_size, cv2.INTER_AREA) 113 | # -> C,H,W 114 | # Normalization done in the model itself. 115 | return torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0).float() 116 | 117 | def _process_output(self, outputs): 118 | disparity_map = outputs[1][0].detach().cpu().squeeze(0).squeeze(0).numpy() * -1.0 119 | return disparity_map 120 | 121 | def _load_model(self, model_path: Path): 122 | # FIXME: always reload the model, for some reason 123 | # feeding multiple images to the same model freezes 124 | # with CUDA. Maybe due to multi-threading? 125 | # if (self._loaded_model_path == model_path): 126 | # return 127 | 128 | if not model_path.exists(): 129 | utils.download_model (urls[model_path.name], model_path) 130 | 131 | assert Path(model_path).exists() 132 | self._loaded_model_path = model_path 133 | self.net = torch.jit.load(model_path) 134 | self.net.eval () 135 | -------------------------------------------------------------------------------- /tools/chang_realtimestereo_to_torchscript_onnx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import torch 4 | from torch import Tensor 5 | import torch.onnx 6 | 7 | import torch.nn.functional as F 8 | 9 | import numpy as np 10 | 11 | from pathlib import Path 12 | import sys 13 | 14 | import cv2 15 | 16 | from torchvision import transforms 17 | 18 | from .visualizer import imshow 19 | 20 | rtstereo_dir = sys.argv[1] 21 | rtstereo_model = sys.argv[2] 22 | 23 | sys.path.insert(0, rtstereo_dir) 24 | from models import RTStereoNet 25 | 26 | def b2mb(x): return (x/2**20) 27 | 28 | class StereodemoPerformanceMonitor: 29 | def __init__(self, name, load_model, do_inference, is_gpu: bool): 30 | self.load_model = load_model 31 | self.do_inference = do_inference 32 | self.is_gpu = is_gpu 33 | self.name = name 34 | 35 | def run (self): 36 | import time 37 | model = self.load_model () 38 | timings = [] 39 | for i in range (0, 5): 40 | tstart = time.time () 41 | self.do_inference (model) 42 | tend = time.time () 43 | dt = tend - tstart 44 | timings.append (dt) 45 | print (f'{dt=}') 46 | print (f'{self.name}: timings {timings}') 47 | 48 | if self.is_gpu: 49 | import gc 50 | peak_memory_inference_mb = [] 51 | for i in range (0, 5): 52 | gc.collect () 53 | torch.cuda.empty_cache() 54 | torch.cuda.reset_max_memory_allocated() # reset the peak gauge to zero 55 | model = self.load_model () 56 | peak_after_load = torch.cuda.max_memory_allocated() 57 | self.do_inference (model) 58 | peak_after_inference = torch.cuda.max_memory_allocated() 59 | print (f'{peak_after_load=}', peak_after_load) 60 | print (f'{peak_after_inference=}', peak_after_inference) 61 | peak_memory_inference_mb.append (b2mb(peak_after_inference)) 62 | print (f'{self.name}: peak memory (MB) {peak_memory_inference_mb}') 63 | 64 | def save_torchscript(net, output_file, device): 65 | scripted_module = torch.jit.script(net) 66 | # net = net.to(device) 67 | # sample_input = (torch.zeros(1,3,256,256).to(device), torch.zeros(1,3,256,256).to(device)) 68 | # scripted_module = torch.jit.trace(net, sample_input) 69 | torch.jit.save(scripted_module, output_file) 70 | return scripted_module 71 | 72 | def save_onnx(net, output_file): 73 | torch.onnx.export(net, # model being run 74 | sample_input, # model input (or a tuple for multiple inputs) 75 | output_file, # where to save the model (can be a file or file-like object) 76 | export_params=True, # store the trained parameter weights inside the model file 77 | opset_version=11, # the ONNX version to export the model to 78 | do_constant_folding=True, # whether to execute constant folding for optimization 79 | input_names = ['left', 'right'], # the model's input names 80 | output_names = ['disparity'], # the model's output names 81 | dynamic_axes={'left' : {0 : 'batch_size', 2 : 'width', 3 : 'height' }, # variable length axes, except channels 82 | 'right' : {0 : 'batch_size', 2 : 'width', 3 : 'height' }, 83 | 'output' : {0 : 'batch_size', 2 : 'width', 3 : 'height'}}) 84 | 85 | def show_color_disparity (name: str, disparity_map: np.ndarray): 86 | min_disp = 0 87 | max_disp = 64 88 | norm_disparity_map = 255*((disparity_map-min_disp) / (max_disp-min_disp)) 89 | disparity_color = cv2.applyColorMap(cv2.convertScaleAbs(norm_disparity_map, 1), cv2.COLORMAP_MAGMA) 90 | imshow (name, disparity_color) 91 | 92 | def export_models (): 93 | checkpoint_file = rtstereo_model 94 | net = RTStereoNet(maxdisp=192, device='cpu') 95 | checkpoint = torch.load(checkpoint_file) 96 | net.load_state_dict(checkpoint['state_dict']) 97 | net.eval() 98 | 99 | # Hacky way to check the original model and make sure the export 100 | # is not screwing up the results. 101 | if False: 102 | # left = cv2.imread("datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-14-50-10-570_left.jpg", cv2.IMREAD_COLOR) 103 | # right = cv2.imread("datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-14-50-10-570_right.jpg", cv2.IMREAD_COLOR) 104 | 105 | # left = cv2.imread("datasets/eth3d_lowres/forest_2s/im0.png", cv2.IMREAD_COLOR) 106 | # right = cv2.imread("datasets/eth3d_lowres/forest_2s/im1.png", cv2.IMREAD_COLOR) 107 | 108 | # left = cv2.imread("datasets/eth3d_lowres/playground_3l/im0.png", cv2.IMREAD_COLOR) 109 | # right = cv2.imread("datasets/eth3d_lowres/playground_3l/im1.png", cv2.IMREAD_COLOR) 110 | 111 | left = cv2.imread("datasets/sceneflow/driving_left.png", cv2.IMREAD_COLOR) 112 | right = cv2.imread("datasets/sceneflow/driving_right.png", cv2.IMREAD_COLOR) 113 | 114 | # left = cv2.resize (left, (1280,720), cv2.INTER_AREA) 115 | # right = cv2.resize (right, (1280,720), cv2.INTER_AREA) 116 | 117 | imagenet_stats = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]} 118 | img_to_tensor_transforms = transforms.Compose([ 119 | transforms.ToTensor(), 120 | transforms.Normalize(**imagenet_stats), 121 | ]) 122 | 123 | left = img_to_tensor_transforms(left) 124 | right = img_to_tensor_transforms(right) 125 | 126 | # pad to width and hight to 16 times 127 | if left.shape[1] % 16 != 0: 128 | times = left.shape[1]//16 129 | top_pad = (times+1)*16 -left.shape[1] 130 | else: 131 | top_pad = 0 132 | 133 | if left.shape[2] % 16 != 0: 134 | times = left.shape[2]//16 135 | right_pad = (times+1)*16-left.shape[2] 136 | else: 137 | right_pad = 0 138 | 139 | left = F.pad(left,(0,right_pad, top_pad,0)).unsqueeze(0) 140 | right = F.pad(right,(0,right_pad, top_pad,0)).unsqueeze(0) 141 | 142 | output = net (left, right) 143 | output = output[0].detach().numpy().transpose(1,2,0) 144 | show_color_disparity ("disparity", output) 145 | cv2.waitKey(0) 146 | 147 | # save_torchscript(net, "chang-realtime-stereo.scripted.pt", torch.device('cpu')) 148 | # save_torchscript(net, "chang-realtime-stereo-gpu.scripted.pt", torch.device('cuda')) 149 | 150 | # Only tracing worked without substantial changes to the codebase. 151 | device = torch.device('cpu') 152 | for w,h in [(1280, 720), (640,480), (320,240), (160,128)]: 153 | sample_input = (torch.zeros(1,3,h,w).to(device), torch.zeros(1,3,h,w).to(device)) 154 | with torch.no_grad(): 155 | scripted_module = torch.jit.trace(net, sample_input) 156 | torch.jit.save(scripted_module, f"chang-realtime-stereo-cpu-{w}x{h}.scripted.pt") 157 | 158 | # Need opset16 for grid sampling, currently needs pytorch nightly to do it (1.11 won't). 159 | # However the exported onnx fails to run: 160 | # [ONNXRuntimeError] : 1 : FAIL : Load model from chang-realtime-stereo-cpu-320x240.onnx 161 | # failed:Type Error: Type parameter (T) of Optype (Mul) bound to different types 162 | # (tensor(float) and tensor(int64) in node (Mul_1675). 163 | if False: 164 | torch.onnx.export(scripted_module, # model being run 165 | sample_input, # model input (or a tuple for multiple inputs) 166 | f"chang-realtime-stereo-cpu-{w}x{h}.onnx", # where to save the model (can be a file or file-like object) 167 | export_params=True, # store the trained parameter weights inside the model file 168 | opset_version=16, # the ONNX version to export the model to 169 | do_constant_folding=True, # whether to execute constant folding for optimization 170 | input_names = ['left', 'right'], # the model's input names 171 | output_names = ['disparity']) # the model's output names 172 | # No dynamic axes with tracing :-( 173 | # dynamic_axes={'left' : {0 : 'batch_size', 2 : 'width', 3 : 'height' }, # variable length axes, except channels 174 | # 'right' : {0 : 'batch_size', 2 : 'width', 3 : 'height' }, 175 | # 'output' : {0 : 'batch_size', 2 : 'width', 3 : 'height'}}) 176 | # save_onnx(scripted_module, "chang-realtime-stereo-cpu.onnx") 177 | 178 | def benchmark_model(name, size, device): 179 | w, h = size 180 | is_gpu = (device == 'cuda') 181 | 182 | def load_model (): 183 | checkpoint_file = rtstereo_model 184 | net = RTStereoNet(maxdisp=192, device=device) 185 | checkpoint = torch.load(checkpoint_file) 186 | net.load_state_dict(checkpoint['state_dict']) 187 | net.eval() 188 | net = net.to (device) 189 | return net 190 | 191 | def do_inference (model): 192 | with torch.no_grad(): 193 | sample_input = (torch.zeros(1, 3, h, w).to(device), torch.zeros(1, 3, h, w).to(device)) 194 | outputs = model (*sample_input) 195 | print (type(outputs)) 196 | 197 | monitor = StereodemoPerformanceMonitor(f'{name}_{device}_{w}x{h}', load_model, do_inference, is_gpu) 198 | monitor.run () 199 | 200 | 201 | if __name__ == "__main__": 202 | export_models () 203 | 204 | # torch.set_num_threads(1) 205 | # for s in [(320,240), (640,480), (1280,720)]: 206 | # benchmark_model ('chang', s, 'cpu') 207 | 208 | -------------------------------------------------------------------------------- /stereodemo/method_cre_stereo.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import time 3 | from dataclasses import dataclass 4 | 5 | import cv2 6 | import numpy as np 7 | import onnxruntime 8 | 9 | from .methods import Config, EnumParameter, StereoMethod, InputPair, StereoOutput 10 | from . import utils 11 | 12 | urls = { 13 | "crestereo_combined_iter10_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter10_240x320.onnx", 14 | "crestereo_combined_iter10_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter10_480x640.onnx", 15 | "crestereo_combined_iter10_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter10_720x1280.onnx", 16 | "crestereo_combined_iter20_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter20_240x320.onnx", 17 | "crestereo_combined_iter20_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter20_480x640.onnx", 18 | "crestereo_combined_iter20_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter20_720x1280.onnx", 19 | "crestereo_combined_iter2_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter2_240x320.onnx", 20 | "crestereo_combined_iter2_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter2_480x640.onnx", 21 | "crestereo_combined_iter2_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter2_720x1280.onnx", 22 | "crestereo_combined_iter5_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter5_240x320.onnx", 23 | "crestereo_combined_iter5_380x480.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter5_380x480.onnx", 24 | "crestereo_combined_iter5_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter5_480x640.onnx", 25 | "crestereo_combined_iter5_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter5_720x1280.onnx", 26 | "crestereo_init_iter10_180x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter10_180x320.onnx", 27 | "crestereo_init_iter10_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter10_240x320.onnx", 28 | "crestereo_init_iter10_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter10_480x640.onnx", 29 | "crestereo_init_iter10_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter10_720x1280.onnx", 30 | "crestereo_init_iter20_180x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter20_180x320.onnx", 31 | "crestereo_init_iter20_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter20_240x320.onnx", 32 | "crestereo_init_iter20_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter20_480x640.onnx", 33 | "crestereo_init_iter20_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter20_720x1280.onnx", 34 | "crestereo_init_iter2_180x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter2_180x320.onnx", 35 | "crestereo_init_iter2_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter2_240x320.onnx", 36 | "crestereo_init_iter2_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter2_480x640.onnx", 37 | "crestereo_init_iter2_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter2_720x1280.onnx", 38 | "crestereo_init_iter5_180x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter5_180x320.onnx", 39 | "crestereo_init_iter5_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter5_240x320.onnx", 40 | "crestereo_init_iter5_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter5_480x640.onnx", 41 | "crestereo_init_iter5_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter5_720x1280.onnx" 42 | } 43 | 44 | # Adapted from https://github.com/ibaiGorordo/ONNX-CREStereo-Depth-Estimation 45 | # https://github.com/PINTO0309/PINTO_model_zoo/tree/main/284_CREStereo 46 | class CREStereo(StereoMethod): 47 | def __init__(self, config: Config): 48 | super().__init__("CRE Stereo (CVPR 2022)", 49 | "Practical Stereo Matching via Cascaded Recurrent Network with Adaptive Correlation. Pre-trained on a large range of datasets.", 50 | {}, 51 | config) 52 | self.reset_defaults() 53 | 54 | self._loaded_session = None 55 | self._loaded_model_path = None 56 | 57 | def reset_defaults(self): 58 | self.parameters.update ({ 59 | "Iterations": EnumParameter("Number of iterations", 1, ["2", "5", "10", "20"]), 60 | "Mode": EnumParameter("Number of passes. The combined version does 2 passes, one to get an initial estimation and a second one to refine it.", 61 | 1, ["init", "combined"]), 62 | "Shape": EnumParameter("Processed image size", 1, ["320x240", "640x480", "1280x720"]) 63 | }) 64 | 65 | def compute_disparity(self, input: InputPair) -> StereoOutput: 66 | left_image, right_image = input.left_image, input.right_image 67 | cols, rows = self.parameters["Shape"].value.split('x') 68 | version = self.parameters["Mode"].value 69 | iters = self.parameters["Iterations"].value 70 | model_path = self.config.models_path / f'crestereo_{version}_iter{iters}_{rows}x{cols}.onnx' 71 | self._load_model (model_path) 72 | 73 | left_tensor = self._prepare_input(left_image) 74 | right_tensor = self._prepare_input(right_image) 75 | 76 | # Get the half resolution to calculate flow_init 77 | if self._has_flow: 78 | left_tensor_half = self._prepare_input(left_image, half=True) 79 | right_tensor_half = self._prepare_input(right_image, half=True) 80 | start = time.time() 81 | outputs = self._inference_with_flow(left_tensor_half, 82 | right_tensor_half, 83 | left_tensor, 84 | right_tensor) 85 | else: 86 | # Estimate the disparity map 87 | start = time.time() 88 | outputs = self._inference_without_flow(left_tensor, right_tensor) 89 | 90 | elapsed_time = time.time() - start 91 | 92 | disparity_map = self.process_output(outputs) 93 | 94 | if disparity_map.shape[:2] != left_image.shape[:2]: 95 | disparity_map = cv2.resize (disparity_map, (left_image.shape[1], left_image.shape[0]), cv2.INTER_NEAREST) 96 | x_scale = left_image.shape[1] / float(cols) 97 | disparity_map *= np.float32(x_scale) 98 | return StereoOutput(disparity_map, input.left_image, elapsed_time) 99 | 100 | def _download_model (self, model_path: Path): 101 | utils.download_model (urls[model_path.name], model_path) 102 | 103 | def _load_model(self, model_path: Path): 104 | if (self._loaded_model_path == model_path): 105 | return 106 | 107 | if not model_path.exists(): 108 | self._download_model (model_path) 109 | 110 | # To try with just one CPU core. 111 | # opts = onnxruntime.SessionOptions() 112 | # opts.intra_op_num_threads = 1 113 | # opts.inter_op_num_threads = 1 114 | # opts.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL 115 | assert Path(model_path).exists() 116 | self._loaded_model_path = model_path 117 | self._loaded_session = onnxruntime.InferenceSession(str(model_path), providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) 118 | 119 | # Get model info 120 | self.load_input_details() 121 | self.load_output_details() 122 | 123 | # Check if the model has init flow 124 | self._has_flow = len(self.input_names) > 2 125 | 126 | def _prepare_input(self, img, half=False): 127 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 128 | if half: 129 | img_input = cv2.resize( 130 | img, (self.input_width//2, self.input_height//2), cv2.INTER_AREA) 131 | else: 132 | img_input = cv2.resize( 133 | img, (self.input_width, self.input_height), cv2.INTER_AREA) 134 | img_input = img_input.transpose(2, 0, 1) 135 | img_input = img_input[np.newaxis, :, :, :] 136 | return img_input.astype(np.float32) 137 | 138 | def _inference_without_flow(self, left_tensor, right_tensor): 139 | 140 | return self._loaded_session.run(self.output_names, {self.input_names[0]: left_tensor, 141 | self.input_names[1]: right_tensor})[0] 142 | 143 | def _inference_with_flow(self, left_tensor_half, right_tensor_half, left_tensor, right_tensor): 144 | 145 | return self._loaded_session.run(self.output_names, {self.input_names[0]: left_tensor_half, 146 | self.input_names[1]: right_tensor_half, 147 | self.input_names[2]: left_tensor, 148 | self.input_names[3]: right_tensor})[0] 149 | 150 | def process_output(self, output): 151 | return np.squeeze(output[:, 0, :, :]) 152 | 153 | def load_input_details(self): 154 | model_inputs = self._loaded_session.get_inputs() 155 | self.input_names = [ 156 | model_inputs[i].name for i in range(len(model_inputs))] 157 | 158 | self.input_shape = model_inputs[-1].shape 159 | self.input_height = self.input_shape[2] 160 | self.input_width = self.input_shape[3] 161 | 162 | def load_output_details(self): 163 | model_outputs = self._loaded_session.get_outputs() 164 | self.output_names = [ 165 | model_outputs[i].name for i in range(len(model_outputs))] 166 | 167 | self.output_shape = model_outputs[0].shape 168 | -------------------------------------------------------------------------------- /stereodemo/visualizer.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import time 3 | from typing import Dict, List, Optional, Tuple 4 | from dataclasses import dataclass 5 | from abc import abstractmethod 6 | from concurrent.futures import ThreadPoolExecutor 7 | 8 | import numpy as np 9 | import cv2 10 | 11 | import open3d as o3d 12 | import open3d.visualization.gui as gui 13 | import open3d.visualization.rendering as rendering 14 | 15 | from .methods import IntParameter, EnumParameter, StereoOutput, StereoMethod, Calibration, InputPair 16 | 17 | disparity_window = None 18 | 19 | class ImageWindow: 20 | def __init__(self, name: str, size: Tuple[int, int]): 21 | self.name = name 22 | self.window = gui.Application.instance.create_window(name, size[0], size[1]) 23 | self.image_widget = gui.ImageWidget() 24 | self.window.add_child(self.image_widget) 25 | 26 | def update_image(self, image: np.ndarray): 27 | image_geom = o3d.geometry.Image(image) 28 | self.image_widget.update_image(image_geom) 29 | self.window.post_redraw() 30 | 31 | class ImageWindowsManager: 32 | def __init__(self): 33 | self.windows_by_name = {} 34 | 35 | def imshow(self, name: str, image: np.ndarray, window_title: Optional[str], max_size: int): 36 | if name not in self.windows_by_name: 37 | rows, cols, _ = image.shape 38 | if cols > rows: 39 | initial_size = max_size, int(max_size * rows / cols) 40 | else: 41 | initial_size = int(max_size * cols / rows), max_size 42 | self.windows_by_name[name] = ImageWindow(name, initial_size) 43 | self.windows_by_name[name].update_image(image) 44 | if window_title is not None: 45 | self.windows_by_name[name].title = window_title 46 | 47 | image_windows_manager = ImageWindowsManager() 48 | 49 | def imshow (name: str, image: np.ndarray, window_title=None, max_size=640): 50 | global image_windows_manager 51 | if image_windows_manager is None: 52 | image_windows_manager = ImageWindowsManager() 53 | image_windows_manager.imshow(name, image, window_title, max_size) 54 | 55 | def color_disparity (disparity_map: np.ndarray, calibration: Calibration): 56 | min_disp = (calibration.fx * calibration.baseline_meters) / calibration.depth_range[1] 57 | # disparity_pixels = (calibration.fx * calibration.baseline_meters) / depth_meters 58 | max_disp = (calibration.fx * calibration.baseline_meters) / calibration.depth_range[0] 59 | norm_disparity_map = 255*((disparity_map-min_disp) / (max_disp-min_disp)) 60 | disparity_color = cv2.applyColorMap(cv2.convertScaleAbs(norm_disparity_map, 1), cv2.COLORMAP_VIRIDIS) 61 | return disparity_color 62 | 63 | def show_color_disparity (name: str, color_disparity: np.ndarray): 64 | imshow ("StereoDemo - Disparity", color_disparity, name) 65 | 66 | class Settings: 67 | def __init__(self): 68 | self.show_axes = False 69 | 70 | class Source: 71 | def __init__(self): 72 | pass 73 | 74 | @abstractmethod 75 | def is_live(self) -> bool: 76 | """Whether the source is capture live images or not""" 77 | return False 78 | 79 | def selected_index (self) -> int: 80 | return 0 81 | 82 | @abstractmethod 83 | def get_next_pair(self) -> InputPair: 84 | return InputPair(None, None, None, None) 85 | 86 | def get_pair_at_index(self, idx: int) -> InputPair: 87 | return InputPair(None, None, None, None) 88 | 89 | def get_pair_list(self) -> List[str]: 90 | return [] 91 | 92 | class Visualizer: 93 | def __init__(self, stereo_methods: Dict[str, StereoMethod], source: Source): 94 | gui.Application.instance.initialize() 95 | 96 | self.vis = gui.Application.instance 97 | self.source = source 98 | 99 | self.executor = ThreadPoolExecutor(max_workers=1) 100 | self.executor_future = None 101 | self._progress_dialog = None 102 | self._last_progress_update_time = None 103 | 104 | self.stereo_methods = stereo_methods 105 | self.stereo_methods_output = {} 106 | self.input = InputPair (None, None, None, None) 107 | self._downsample_factor = 0 108 | 109 | self.window = gui.Application.instance.create_window("StereoDemo", 1280, 1024) 110 | w = self.window # to make the code more concise 111 | 112 | self.settings = Settings() 113 | 114 | # 3D widget 115 | self._scene = gui.SceneWidget() 116 | self._scene.scene = rendering.Open3DScene(w.renderer) 117 | # self._scene.scene.show_ground_plane(True, rendering.Scene.GroundPlane.XZ) 118 | self._scene.set_view_controls(gui.SceneWidget.Controls.ROTATE_CAMERA) 119 | self._scene.set_on_key(self._on_key_pressed) 120 | 121 | self._clear_outputs () 122 | 123 | for name, o in self.stereo_methods_output.items(): 124 | if o.point_cloud is not None: 125 | self._scene.scene.add_geometry(name, o.point_cloud, rendering.MaterialRecord()) 126 | 127 | self._reset_camera() 128 | 129 | em = w.theme.font_size 130 | self.separation_height = int(round(0.5 * em)) 131 | self._settings_panel = gui.Vert(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em)) 132 | 133 | self._next_image_button = gui.Button("Next Image") 134 | self._next_image_button.set_on_clicked(self._next_image_clicked) 135 | self._settings_panel.add_child(self._next_image_button) 136 | if not self.source.is_live(): 137 | # self._next_image_button = gui.Button("Next") 138 | # self._next_image_button.set_on_clicked(self._next_image_clicked) 139 | # horiz.add_child(self._next_image_button) 140 | 141 | # self.images_combo = gui.ListView() 142 | # input_pairs = self.source.get_pair_list() 143 | # self.images_combo.set_items(input_pairs) 144 | # self.images_combo.selected_index = 0 145 | # self.images_combo.set_max_visible_items(3) 146 | # self.images_combo.set_on_selection_changed(self._image_selected) 147 | # self.images_combo.tooltip = self.images_combo.selected_value 148 | # self._settings_panel.add_child(self.images_combo) 149 | # self._settings_panel.add_fixed(self.separation_height) 150 | # horiz.add_child(self.images_combo) 151 | 152 | self._settings_panel.add_fixed(self.separation_height) 153 | self.images_combo = gui.Combobox() 154 | input_pairs = self.source.get_pair_list() 155 | for pair_name in input_pairs: 156 | self.images_combo.add_item(pair_name) 157 | self.images_combo.selected_index = 0 158 | self.images_combo.set_on_selection_changed(self._image_selected) 159 | self._settings_panel.add_child(self.images_combo) 160 | self._settings_panel.add_fixed(self.separation_height) 161 | else: 162 | self.images_combo = None 163 | 164 | horiz = gui.Horiz(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em)) 165 | label = gui.Label("Input downsampling") 166 | label.tooltip = "Number of /2 downsampling steps to apply on the input" 167 | horiz.add_child(label) 168 | downsampling_slider = gui.Slider(gui.Slider.INT) 169 | downsampling_slider.set_limits(0, 4) 170 | downsampling_slider.int_value = self._downsample_factor 171 | downsampling_slider.set_on_value_changed(self._downsampling_changed) 172 | horiz.add_child(downsampling_slider) 173 | self._settings_panel.add_child(horiz) 174 | 175 | self._settings_panel.add_fixed(self.separation_height) 176 | 177 | self.algo_list = gui.ListView() 178 | self.algo_list.set_items(list(stereo_methods.keys())) 179 | self.algo_list.selected_index = 0 180 | self.algo_list.set_max_visible_items(8) 181 | self.algo_list.set_on_selection_changed(self._on_algo_list_selected) 182 | self._settings_panel.add_child(self.algo_list) 183 | 184 | self.method_params_proxy = gui.WidgetProxy() 185 | self._settings_panel.add_child (self.method_params_proxy) 186 | 187 | self.last_runtime = gui.Label("") 188 | self._settings_panel.add_child (self.last_runtime) 189 | 190 | self.input_status = gui.Label("No input.") 191 | self._settings_panel.add_child (self.input_status) 192 | 193 | view_ctrls = gui.CollapsableVert("View controls", 0.25 * em, gui.Margins(em, 0, 0, 0)) 194 | reset_cam_button = gui.Button("Reset Camera") 195 | reset_cam_button.set_on_clicked(self._reset_camera) 196 | view_ctrls.add_child(reset_cam_button) 197 | # self._show_axes = gui.Checkbox("Show axes") 198 | # self._show_axes.set_on_checked(self._on_show_axes) 199 | # view_ctrls.add_child(self._show_axes) 200 | 201 | horiz = gui.Horiz(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em)) 202 | label = gui.Label("Max depth (m)") 203 | label.tooltip = "Max depth to render in meters" 204 | horiz.add_child(label) 205 | self.depth_range_slider = gui.Slider(gui.Slider.DOUBLE) 206 | self.depth_range_slider.set_limits(0.5, 1000) 207 | self.depth_range_slider.double_value = 100 208 | self.depth_range_slider.set_on_value_changed(self._depth_range_slider_changed) 209 | horiz.add_child(self.depth_range_slider) 210 | view_ctrls.add_child(horiz) 211 | 212 | self._depth_range_manually_changed = False 213 | 214 | self._settings_panel.add_fixed(self.separation_height) 215 | self._settings_panel.add_child(view_ctrls) 216 | 217 | w.set_on_layout(self._on_layout) 218 | w.add_child(self._scene) 219 | w.add_child(self._settings_panel) 220 | 221 | self._on_algo_list_selected(self.algo_list.selected_value, False) 222 | self._apply_settings() 223 | 224 | if self.source.is_live(): 225 | self.read_next_pair () 226 | else: 227 | self._image_selected (None, None) 228 | 229 | def _on_key_pressed (self, keyEvent): 230 | if keyEvent.key == gui.KeyName.Q: 231 | self.vis.quit() 232 | return gui.SceneWidget.EventCallbackResult.HANDLED 233 | return gui.SceneWidget.EventCallbackResult.IGNORED 234 | 235 | def _downsampling_changed(self, v): 236 | self._downsample_factor = int(v) 237 | self._process_input (self.full_res_input) 238 | 239 | def _downsample_input (self, input: InputPair): 240 | for i in range(0, self._downsample_factor): 241 | if np.max(input.left_image.shape[:2]) < 250: 242 | break 243 | input.left_image = cv2.pyrDown(input.left_image) 244 | input.right_image = cv2.pyrDown(input.right_image) 245 | if input.input_disparity is not None: 246 | input.input_disparity = cv2.pyrDown(input.input_disparity) 247 | input.calibration.downsample(input.left_image.shape[1], input.left_image.shape[0]) 248 | 249 | def read_next_pair (self): 250 | input = self.source.get_next_pair () 251 | self._update_pair_index () 252 | self._process_input (input) 253 | 254 | def _process_input (self, input): 255 | if self._downsample_factor > 0: 256 | self.full_res_input = input 257 | input = copy.deepcopy(input) 258 | self._downsample_input (input) 259 | else: 260 | self.full_res_input = input 261 | 262 | if not self._depth_range_manually_changed: 263 | self.depth_range_slider.double_value = input.calibration.depth_range[1] 264 | 265 | imshow ("StereoDemo - Input image", np.hstack([input.left_image, input.right_image])) 266 | 267 | self.input = input 268 | self.input_status.text = f"Input: {input.left_image.shape[1]}x{input.left_image.shape[0]} " + input.status 269 | 270 | if self.input.has_data(): 271 | assert self.input.left_image.shape[1] == self.input.calibration.width and self.input.left_image.shape[0] == self.input.calibration.height 272 | self.o3dCameraIntrinsic = o3d.camera.PinholeCameraIntrinsic(width=self.input.left_image.shape[1], 273 | height=self.input.left_image.shape[0], 274 | fx=self.input.calibration.fx, 275 | fy=self.input.calibration.fy, 276 | cx=self.input.calibration.cx0, 277 | cy=self.input.calibration.cy) 278 | 279 | self._clear_outputs () 280 | self._run_current_method () 281 | 282 | def update_once (self): 283 | if self.executor_future is not None: 284 | self._check_run_complete() 285 | return gui.Application.instance.run_one_tick() 286 | 287 | def _clear_outputs (self): 288 | for name in self.stereo_methods.keys(): 289 | self.stereo_methods_output[name] = StereoOutput( 290 | disparity_pixels=None, 291 | color_image_bgr=None, 292 | computation_time=np.nan) 293 | if self._scene.scene.has_geometry(name): 294 | self._scene.scene.remove_geometry(name) 295 | 296 | def _reset_camera (self): 297 | # bbox = o3d.geometry.AxisAlignedBoundingBox(np.array([-10, 0,-10]), np.array([0,3,0])) 298 | bbox = self._scene.scene.bounding_box 299 | min_bound, max_bound = bbox.min_bound.copy(), bbox.max_bound.copy() 300 | min_bound[0] = min(min_bound[0], -5) 301 | min_bound[2] = min(min_bound[2], -5) 302 | max_bound[0] = max(max_bound[0], 5) 303 | max_bound[1] = max(max_bound[1], 2) 304 | max_bound[2] = 0 305 | bbox.min_bound, bbox.max_bound = min_bound, max_bound 306 | 307 | self._scene.setup_camera(60.0, bbox, np.array([0,0,0])) 308 | eye = np.array([0, 0.5, 1.0]) 309 | lookat = np.array([0, 0, -1.0]) 310 | up = np.array([0, 1.0, 0]) 311 | self._scene.look_at(lookat, eye, up) 312 | 313 | if self.input.has_data(): 314 | self._depth_range_manually_changed = False 315 | self.depth_range_slider.double_value = self.input.calibration.depth_range[1] 316 | self._update_rendering () 317 | 318 | def _build_stereo_method_widgets(self, name): 319 | em = self.window.theme.font_size 320 | method = self.stereo_methods[name] 321 | container = gui.Vert(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em)) 322 | label = gui.Label(method.description) 323 | label.text_color = gui.Color(1.0, 0.5, 0.0) 324 | container.add_child(label) 325 | self._reload_settings_functions = [] 326 | for name, param in method.parameters.items(): 327 | if isinstance(param, IntParameter): 328 | horiz = gui.Horiz(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em)) 329 | label = gui.Label(name) 330 | label.tooltip = param.description 331 | horiz.add_child(label) 332 | slider = gui.Slider(gui.Slider.INT) 333 | slider.set_limits(param.min, param.max) 334 | slider.int_value = param.value 335 | def set_value_from_method(slider=slider, method=method, name=name): 336 | slider.int_value = method.parameters[name].value 337 | self._reload_settings_functions.append(set_value_from_method) 338 | # workaround late binding 339 | # https://docs.python-guide.org/writing/gotchas/#:~:text=Python's%20closures%20are%20late%20binding,surrounding%20scope%20at%20call%20time. 340 | def callback(value, method=method, name=name, slider=slider): 341 | p = method.parameters[name] 342 | p.set_value(int(value)) 343 | slider.int_value = p.value 344 | slider.set_on_value_changed(callback) 345 | horiz.add_child(slider) 346 | container.add_child(horiz) 347 | elif isinstance(param, EnumParameter): 348 | horiz = gui.Horiz(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em)) 349 | label = gui.Label(name) 350 | label.tooltip = param.description 351 | horiz.add_child(label) 352 | combo = gui.Combobox() 353 | for value in param.values: 354 | combo.add_item(value) 355 | combo.selected_index = param.index 356 | def callback(combo_idx, combo_val, method=method, name=name, combo=combo): 357 | method.parameters[name].set_index(combo.selected_index) 358 | combo.set_on_selection_changed(callback) 359 | def set_value_from_method(combo=combo, method=method, name=name): 360 | combo.selected_index = method.parameters[name].index 361 | self._reload_settings_functions.append(set_value_from_method) 362 | horiz.add_child(combo) 363 | container.add_child(horiz) 364 | 365 | horiz = gui.Horiz(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em)) 366 | apply_button = gui.Button("Apply") 367 | apply_button.horizontal_padding_em = 3 368 | apply_button.set_on_clicked(self._run_current_method) 369 | horiz.add_child(apply_button) 370 | horiz.add_fixed(self.separation_height) 371 | reset_default = gui.Button("Reset defaults") 372 | reset_default.set_on_clicked(self._reset_method_defaults) 373 | horiz.add_child(reset_default) 374 | container.add_child(horiz) 375 | return container 376 | 377 | def _on_algo_list_selected(self, name: str, is_dbl_click: bool): 378 | self.method_params_proxy.set_widget(self._build_stereo_method_widgets(name)) 379 | self._update_runtime () 380 | for other_name in self.stereo_methods_output.keys(): 381 | self._scene.scene.show_geometry(other_name, False) 382 | self._scene.scene.show_geometry(name, True) 383 | self._apply_settings() 384 | if self.stereo_methods_output[name].disparity_pixels is None: 385 | self._run_current_method () 386 | if self.stereo_methods_output[name].disparity_color is not None: 387 | show_color_disparity (name, self.stereo_methods_output[name].disparity_color) 388 | 389 | def _on_show_axes(self, show): 390 | self.settings.show_axes = show 391 | self._apply_settings() 392 | 393 | def _next_image_clicked(self): 394 | self.read_next_pair () 395 | 396 | def _image_selected(self, combo_idx, combo_val): 397 | idx = self.images_combo.selected_index 398 | input = self.source.get_pair_at_index (idx) 399 | self._process_input (input) 400 | 401 | def _update_pair_index (self): 402 | if self.images_combo is not None: 403 | self.images_combo.selected_index = self.source.selected_index() 404 | 405 | def _apply_settings(self): 406 | self._scene.scene.show_axes(self.settings.show_axes) 407 | 408 | def _reset_method_defaults(self): 409 | name = self.algo_list.selected_value 410 | method = self.stereo_methods[name] 411 | method.reset_defaults() 412 | for m in self._reload_settings_functions: 413 | m() 414 | 415 | def _check_run_complete(self): 416 | if not self.executor_future.done(): 417 | if self._progress_dialog is None: 418 | self._progress_dialog = self._show_progress_dialog("Running the current method", f"Computing {self.algo_list.selected_value}...") 419 | now = time.time() 420 | if (now - self._last_progress_update_time > 0.1): 421 | self._last_progress_update_time = now 422 | self._run_progress.value += (1.0 - self._run_progress.value) / 16.0 423 | return 424 | 425 | if self._progress_dialog: 426 | self.window.close_dialog () 427 | self._progress_dialog = None 428 | 429 | stereo_output = self.executor_future.result() 430 | self.executor_future = None 431 | 432 | x0,y0,x1,y1 = self.input.calibration.left_image_rect_normalized 433 | x0 = int(x0*stereo_output.disparity_pixels.shape[1] + 0.5) 434 | x1 = int(x1*stereo_output.disparity_pixels.shape[1] + 0.5) 435 | y0 = int(y0*stereo_output.disparity_pixels.shape[0] + 0.5) 436 | y1 = int(y1*stereo_output.disparity_pixels.shape[0] + 0.5) 437 | valid_mask = np.zeros(stereo_output.disparity_pixels.shape, dtype=np.uint8) 438 | valid_mask[y0:y1, x0:x1] = 1 439 | stereo_output.disparity_pixels[valid_mask == 0] = -1.0 440 | 441 | name = self.algo_list.selected_value 442 | stereo_output.disparity_color = color_disparity (stereo_output.disparity_pixels, self.input.calibration) 443 | show_color_disparity (name, stereo_output.disparity_color) 444 | 445 | self.stereo_methods_output[name] = stereo_output 446 | self._update_rendering ([name]) 447 | self._update_runtime () 448 | 449 | def _depth_range_slider_changed(self, v: float): 450 | self._depth_range_manually_changed = True 451 | self._update_rendering() 452 | 453 | def _update_rendering (self, names_to_update=None): 454 | if names_to_update is None: 455 | names_to_update = list(self.stereo_methods_output.keys()) 456 | 457 | selected_name = self.algo_list.selected_value 458 | 459 | for name in names_to_update: 460 | stereo_output = self.stereo_methods_output[name] 461 | if stereo_output.disparity_pixels is None: 462 | continue 463 | 464 | depth_meters = StereoMethod.depth_meters_from_disparity(stereo_output.disparity_pixels, self.input.calibration) 465 | 466 | if self._scene.scene.has_geometry(name): 467 | self._scene.scene.remove_geometry(name) 468 | 469 | 470 | o3d_color = o3d.geometry.Image(cv2.cvtColor(stereo_output.color_image_bgr, cv2.COLOR_BGR2RGB)) 471 | o3d_depth = o3d.geometry.Image(depth_meters) 472 | rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(o3d_color, 473 | o3d_depth, 474 | 1, 475 | depth_trunc=self.depth_range_slider.int_value, 476 | convert_rgb_to_intensity=False) 477 | stereo_output.point_cloud = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, self.o3dCameraIntrinsic) 478 | stereo_output.point_cloud.transform([[1,0,0,0],[0,-1,0,0],[0,0,-1,0],[0,0,0,1]]) 479 | self._scene.scene.add_geometry(name, stereo_output.point_cloud, rendering.MaterialRecord()) 480 | self._scene.scene.show_geometry(name, name == selected_name) 481 | 482 | def _run_current_method(self): 483 | if self.executor_future is not None: 484 | return self._check_run_complete () 485 | 486 | if not self.input.has_data(): 487 | return 488 | 489 | name = self.algo_list.selected_value 490 | 491 | def do_beefy_work(): 492 | stereo_output = self.stereo_methods[name].compute_disparity (self.input) 493 | return stereo_output 494 | 495 | self._last_progress_update_time = time.time() 496 | self.executor_future = self.executor.submit (do_beefy_work) 497 | 498 | def _show_progress_dialog(self, title, message): 499 | # A Dialog is just a widget, so you make its child a layout just like 500 | # a Window. 501 | dlg = gui.Dialog(title) 502 | 503 | # Add the message text 504 | em = self.window.theme.font_size 505 | dlg_layout = gui.Vert(em, gui.Margins(em, em, em, em)) 506 | dlg_layout.add_child(gui.Label(message)) 507 | 508 | # Add the Ok button. We need to define a callback function to handle 509 | # the click. 510 | self._run_progress = gui.ProgressBar() 511 | self._run_progress.value = 0.1 # 10% complete 512 | prog_layout = gui.Horiz(em) 513 | prog_layout.add_child(self._run_progress) 514 | dlg_layout.add_child(prog_layout) 515 | 516 | dlg.add_child(dlg_layout) 517 | self.window.show_dialog(dlg) 518 | return dlg 519 | 520 | def _update_runtime (self): 521 | name = self.algo_list.selected_value 522 | output = self.stereo_methods_output[name] 523 | if np.isnan(output.computation_time): 524 | self.last_runtime.text = "No output yet." 525 | else: 526 | self.last_runtime.text = f"Computation time: {output.computation_time*1e3:.1f} ms" 527 | 528 | def _on_layout(self, layout_context): 529 | # The on_layout callback should set the frame (position + size) of every 530 | # child correctly. After the callback is done the window will layout 531 | # the grandchildren. 532 | settings_width = 17 * layout_context.theme.font_size 533 | r = self.window.content_rect 534 | self._scene.frame = gui.Rect(0, r.y, r.get_right() - settings_width, r.height) 535 | # height = min( 536 | # r.height, 537 | # self._settings_panel.calc_preferred_size( 538 | # layout_context, gui.Widget.Constraints()).height) 539 | height = r.height 540 | self._settings_panel.frame = gui.Rect(r.get_right() - settings_width, r.y, settings_width, height) 541 | --------------------------------------------------------------------------------