├── stereodemo
    ├── __main__.py
    ├── __init__.py
    ├── utils.py
    ├── method_dist_depth.py
    ├── chang_realtime_stereo_onnx.py
    ├── methods.py
    ├── method_chang_realtime_stereo.py
    ├── oakd_source.py
    ├── method_opencv_bm.py
    ├── method_hitnet.py
    ├── method_sttr.py
    ├── main.py
    ├── method_raft_stereo.py
    ├── method_cre_stereo.py
    └── visualizer.py
├── datasets
    ├── opencv-sample
    │   ├── README.md
    │   ├── aloe_left.jpg
    │   ├── aloe_right.jpg
    │   └── stereodemo_calibration.json
    ├── drivingstereo
    │   ├── README.md
    │   ├── 2018-07-11-14-48-52
    │   │   ├── 2018-07-11-14-48-52_2018-07-11-14-50-10-570_left.jpg
    │   │   ├── 2018-07-11-14-48-52_2018-07-11-14-57-53-937_left.jpg
    │   │   ├── 2018-07-11-14-48-52_2018-07-11-15-02-03-700_left.jpg
    │   │   ├── 2018-07-11-14-48-52_2018-07-11-15-02-29-915_left.jpg
    │   │   ├── 2018-07-11-14-48-52_2018-07-11-15-26-56-946_left.jpg
    │   │   ├── 2018-07-11-14-48-52_2018-07-11-14-50-10-570_right.jpg
    │   │   ├── 2018-07-11-14-48-52_2018-07-11-14-57-53-937_right.jpg
    │   │   ├── 2018-07-11-14-48-52_2018-07-11-15-02-03-700_right.jpg
    │   │   ├── 2018-07-11-14-48-52_2018-07-11-15-02-29-915_right.jpg
    │   │   ├── 2018-07-11-14-48-52_2018-07-11-15-26-56-946_right.jpg
    │   │   ├── 2018-07-11-14-48-52_2018-07-11-15-34-51-679_right.jpg
    │   │   ├── stereodemo_calibration.json
    │   │   └── 2018-07-11-14-48-52.txt
    │   └── convert_kitti_calib.py
    ├── kitti2015
    │   ├── README.md
    │   ├── kitti_000046_left.png
    │   ├── kitti_000046_right.png
    │   └── stereo_calibration.json
    ├── sceneflow
    │   ├── README.md
    │   ├── driving_left.png
    │   ├── monkaa_left.png
    │   ├── monkaa_right.png
    │   ├── driving_right.png
    │   ├── flyingthings_left.png
    │   ├── flyingthings_right.png
    │   └── stereodemo_calibration.json
    ├── oak-d
    │   ├── cycles_left.png
    │   ├── desk_left.png
    │   ├── desk_right.png
    │   ├── selfie_left.png
    │   ├── stairs_left.png
    │   ├── toy_left.png
    │   ├── toy_right.png
    │   ├── corridor_left.png
    │   ├── corridor_right.png
    │   ├── cycles_right.png
    │   ├── donkey_toy_left.png
    │   ├── donkey_toy_right.png
    │   ├── kid_bedroom_left.png
    │   ├── living_room_left.png
    │   ├── pov_hands_left.png
    │   ├── pov_hands_right.png
    │   ├── selfie_right.png
    │   ├── stairs_right.png
    │   ├── bedroom_chair_left.png
    │   ├── bedroom_chair_right.png
    │   ├── kid_bedroom_right.png
    │   ├── living_room_right.png
    │   ├── pov_controllers_left.png
    │   ├── pov_controllers_right.png
    │   ├── README.md
    │   └── stereodemo_calibration.json
    ├── eth3d_lowres
    │   ├── electro_2l
    │   │   ├── im0.png
    │   │   ├── im1.png
    │   │   ├── calib.txt
    │   │   ├── cameras.txt
    │   │   ├── stereodemo_calibration.json
    │   │   └── images.txt
    │   ├── forest_2s
    │   │   ├── im0.png
    │   │   ├── im1.png
    │   │   ├── calib.txt
    │   │   ├── stereodemo_calibration.json
    │   │   ├── cameras.txt
    │   │   └── images.txt
    │   ├── playground_1l
    │   │   ├── im0.png
    │   │   ├── im1.png
    │   │   ├── calib.txt
    │   │   ├── cameras.txt
    │   │   ├── stereodemo_calibration.json
    │   │   └── images.txt
    │   ├── delivery_area_1l
    │   │   ├── im0.png
    │   │   ├── im1.png
    │   │   ├── cameras.txt
    │   │   ├── stereodemo_calibration.json
    │   │   ├── calib.txt
    │   │   └── images.txt
    │   ├── delivery_area_2l
    │   │   ├── im0.png
    │   │   ├── im1.png
    │   │   ├── cameras.txt
    │   │   ├── stereodemo_calibration.json
    │   │   ├── calib.txt
    │   │   └── images.txt
    │   ├── README.md
    │   └── convert_calib_txt.py
    └── middlebury_2014
    │   ├── Piano-imperfect
    │       ├── im0.png
    │       ├── im1.png
    │       ├── stereodemo_calibration.json
    │       └── calib.txt
    │   ├── Playtable-imperfect
    │       ├── im0.png
    │       ├── im1.png
    │       ├── stereodemo_calibration.json
    │       └── calib.txt
    │   ├── README.md
    │   └── convert_calib_txt.py
├── .gitignore
├── .gitattributes
├── pyproject.toml
├── setup.py
├── CONTRIBUTING.md
├── MANIFEST.in
├── .github
    └── workflows
    │   └── unit_tests.yml
├── LICENSE
├── .vscode
    └── launch.json
├── setup.cfg
├── tests
    └── test_methods.py
├── tools
    ├── capture_oakd_frames.py
    └── chang_realtimestereo_to_torchscript_onnx.py
└── README.md


/stereodemo/__main__.py:
--------------------------------------------------------------------------------
1 | from . import main
2 | main()
3 | 


--------------------------------------------------------------------------------
/datasets/opencv-sample/README.md:
--------------------------------------------------------------------------------
1 | From OpenCV samples/data.
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.egg-info/
3 | imgui.ini
4 | build/
5 | 


--------------------------------------------------------------------------------
/stereodemo/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.6.2"
2 | 
3 | from .main import main
4 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.png filter=lfs diff=lfs merge=lfs -text
2 | *.jpg filter=lfs diff=lfs merge=lfs -text


--------------------------------------------------------------------------------
/datasets/drivingstereo/README.md:
--------------------------------------------------------------------------------
1 | Tiny subset of the data of https://drivingstereo-dataset.github.io/
2 | 


--------------------------------------------------------------------------------
/datasets/kitti2015/README.md:
--------------------------------------------------------------------------------
1 | Sample image taken from http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=stereo


--------------------------------------------------------------------------------
/datasets/sceneflow/README.md:
--------------------------------------------------------------------------------
1 | Sample subset of the data of https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html .


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=42",
4 |     "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"
7 | 


--------------------------------------------------------------------------------
/datasets/oak-d/cycles_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:6ad108cf43caee84384ffb3c769c0b74ad18d7d6d44d907df3847b0ed9174606
3 | size 166889
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/desk_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:28f75bba24ea9ea9ef8be7908506cd2725258d59c88413eff39dca1587c82e55
3 | size 133928
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/desk_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:99ae5151ba34e57672cde8e752954ed26129ddddbbda0128e551c60e8ba50cf3
3 | size 144049
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/selfie_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d8da968d0e5e7311f9f414b807d8d9109aa5adf04ae500a8f4daa540a9281709
3 | size 152113
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/stairs_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:3c9c4ff9b10b14ce86c6469ec3ffc2fd738b9eb93ecec6c9d53cd61c97b7d494
3 | size 160253
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/toy_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:eb5c44e65417565bb47f633cac4e2c0868e302fdd79de511988de04bef942a3f
3 | size 178829
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/toy_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:31d0ad52198e61c8724d2ebefcab10d8a0f3682ae36136655eee90ddadea7ec1
3 | size 184079
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/corridor_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d51b47b938221309204e7c4c06a361f9f8e28796a396d345ae289583e5d10c38
3 | size 154357
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/corridor_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:45627c4f244e12b4b9f184559ab8aa97ec6ec931696dd8ca8bb460c4fab1d126
3 | size 166458
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/cycles_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:814ae281dabc268e64455543dd0d893ab50ea9d406efbeccf199aa0c12310059
3 | size 174756
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/donkey_toy_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:06929b3c9d5bd2291e30d967a90604286f7eb4f34daa4348870701c9761c809a
3 | size 178114
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/donkey_toy_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:0ab2889b84473af6154b2f21d9d65e21b396a73befff61571da91911dd5e8982
3 | size 189676
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/kid_bedroom_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:fed5cf79e1ee101e6e023b69645b067561f58d61b95f2deeebe735fa28d6bdf9
3 | size 140337
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/living_room_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1ced83ec3c4d871237ff56eed9633dbcb080f42b0e934a8a2eea1caafac7efc4
3 | size 168843
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/pov_hands_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:4a3de50ed7931b55f6fa753af4c100d8bd2ee4d2a09ff10069c8d0ad042d0c3f
3 | size 152547
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/pov_hands_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:65cf300ba84b9e9b55120070527f4371a468bed98cf2af59222eef49d90efd2d
3 | size 160353
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/selfie_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f04614b90fa69c677d92ede5ebcae27c74971e25b5c3070972466421ce3dc0e3
3 | size 157385
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/stairs_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:57ac719b1263b545822f153e27a986f5acdd59cdf321006dfe14defe6a1c03c6
3 | size 169966
4 | 


--------------------------------------------------------------------------------
/datasets/sceneflow/driving_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:2ee3cce23b9653d1462bbc9a36830b9260c53657b95260c87a107a4b6b5937d1
3 | size 828498
4 | 


--------------------------------------------------------------------------------
/datasets/sceneflow/monkaa_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1c82639c8f625ccfa7dc828d711251172d10c4f095311ad3881b7d2136b730a1
3 | size 640939
4 | 


--------------------------------------------------------------------------------
/datasets/sceneflow/monkaa_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:78fec711927efede0fd7600e9c3db9a7433bcb270767f3d4336c6243ed89471d
3 | size 640633
4 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/electro_2l/im0.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1638d1f8480896f565ee16e241155a4bb59ca7ca4ddbf82be4bb25040a9eb0e8
3 | size 279050
4 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/electro_2l/im1.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8173d90519f89e0c781a2d0e5fcce010477b52497931fdf28a4d1d1f484a2c33
3 | size 274822
4 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/forest_2s/im0.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:0413f4ae94162fec588a60579c9169aa12a679cb378861ca74b47de60ffa9e1d
3 | size 309666
4 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/forest_2s/im1.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:9e28a2ee1f6b98967fdb78f5cf528ee7bc1860e4068ef2283070b4b8b26fac47
3 | size 298187
4 | 


--------------------------------------------------------------------------------
/datasets/kitti2015/kitti_000046_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1bd9d1630c7e4960f573abc3657dd9431e1f29b5cd971041ee154cbdf2bad639
3 | size 849417
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/bedroom_chair_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:bbec94c081186e5c0ed3f90c05e3e69cbece694f6679dd4aa453751d47fad8f7
3 | size 146806
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/bedroom_chair_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:6d6d34efdc1decbb45aa3e4adfe9a15a7aba976370f8b1de040c9e256c9765c9
3 | size 157006
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/kid_bedroom_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e95e3d9db6466d5ff9d30d702c879b52d8ecdfcc7cddcaab6053cf30015772ee
3 | size 150044
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/living_room_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a6d6523bb2bb604c2c7e6964a38bf776d98948582ecca8ad893c0dea0071781d
3 | size 176791
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/pov_controllers_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:27a038def97f08b1aee0b4ac7521cc2c1e4c1962760dd6ad4ae3ba7c72e8337e
3 | size 151662
4 | 


--------------------------------------------------------------------------------
/datasets/oak-d/pov_controllers_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1cd1fb2ff3c1596b702b14c190e03d5c6260aee7bc6dde80f3847dda3e04cb4c
3 | size 162293
4 | 


--------------------------------------------------------------------------------
/datasets/opencv-sample/aloe_left.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:cce5736808efe80d9f04b118dbb978c344d4345672b332718c3e039a3eeb8eee
3 | size 315069
4 | 


--------------------------------------------------------------------------------
/datasets/opencv-sample/aloe_right.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:9b23100df31a846bc6e6a6545563b2b4120b948c9835c7d36cde00af77f4503e
3 | size 315113
4 | 


--------------------------------------------------------------------------------
/datasets/sceneflow/driving_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1dbb9097ae55fc27c4075b5cd5c4218b9537bb2546e06699275257fd259c8bb1
3 | size 841296
4 | 


--------------------------------------------------------------------------------
/datasets/sceneflow/flyingthings_left.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:fd14fa89b443fffb712db3b47ab7e3196ae930b41a81bcd2f0daa99d615599de
3 | size 708478
4 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/playground_1l/im0.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:c958f6978ce162c28fbfe1ff49b0af07f9e89ec45149d73ef315894f9f9dd685
3 | size 333941
4 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/playground_1l/im1.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:b62431c3cfed78b3289f35c7e1dd715bc8f6f6118c94966a244c6b6c5ceb32bd
3 | size 308788
4 | 


--------------------------------------------------------------------------------
/datasets/kitti2015/kitti_000046_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:165a81149d82f5ec22b95262f05d7242a083fb581ec8ec56f78b7f89e2c40af5
3 | size 808120
4 | 


--------------------------------------------------------------------------------
/datasets/sceneflow/flyingthings_right.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:952b0752dc649da9733d24f8c82dc6467616f84d91a44fdd781f06610b737649
3 | size 707543
4 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/delivery_area_1l/im0.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:634ff100be6d81ff4b08d7d67949069572ad9f2da51b7bdf651ecb44a41c7879
3 | size 299256
4 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/delivery_area_1l/im1.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f543cd3612fd6cd327fc47d77502964660b7234af6330e77d726745e7632c92e
3 | size 296079
4 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/delivery_area_2l/im0.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f9c9785b75f968d3fd158814d06754e2efe717df2dc390f4b81993885a793107
3 | size 307909
4 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/delivery_area_2l/im1.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:898b4c703593b7bf567786267401abdec5dc44b600302bfb83db3bebebcad0e6
3 | size 304937
4 | 


--------------------------------------------------------------------------------
/datasets/middlebury_2014/Piano-imperfect/im0.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:53eff04d91f3beeb959aac4c3967f041dcdf8ab1c519767bec81036e1c7f4514
3 | size 1557561
4 | 


--------------------------------------------------------------------------------
/datasets/middlebury_2014/Piano-imperfect/im1.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f8e65d06b9fac56aa2568576f66640c95a94b27fadf02dbf60f5903ad5188002
3 | size 1568960
4 | 


--------------------------------------------------------------------------------
/datasets/middlebury_2014/Playtable-imperfect/im0.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e4bc32a5a23ed1683a8c9adccdd8c17b9031c56927fe5d2e3d6a301047651e02
3 | size 1772235
4 | 


--------------------------------------------------------------------------------
/datasets/middlebury_2014/Playtable-imperfect/im1.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:31eb7ac8ccf6b90ca4b2f497083b78abcc1bc32103f63b2a18f152259ba613a4
3 | size 1764462
4 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/forest_2s/calib.txt:
--------------------------------------------------------------------------------
1 | cam0=[712.53 0 369.856; 0 712.53 239.634; 0 0 1]
2 | cam1=[712.53 0 369.856; 0 712.53 239.634; 0 0 1]
3 | doffs=0
4 | baseline=59.61
5 | width=715
6 | height=440
7 | ndisp=715
8 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/forest_2s/stereodemo_calibration.json:
--------------------------------------------------------------------------------
1 | {"width": 715, "height": 440, "fx": 712.53, "fy": 712.53, "cx0": 369.856, "cx1": 369.856, "cy": 239.634, "baseline_meters": 0.05961, "depth_range": [1.0, 20.0]}


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/README.md:
--------------------------------------------------------------------------------
1 | Subset of the low-res dataset from ETH3d.
2 | 
3 | https://www.eth3d.net/datasets#low-res-two-view
4 | 
5 | The calibration files were converted to json with the `convert_calib_txt.py` script.
6 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/electro_2l/calib.txt:
--------------------------------------------------------------------------------
1 | cam0=[543.757 0 520.961; 0 543.757 293.208; 0 0 1]
2 | cam1=[543.757 0 520.961; 0 543.757 293.208; 0 0 1]
3 | doffs=0
4 | baseline=59.8202
5 | width=927
6 | height=489
7 | ndisp=927
8 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/electro_2l/cameras.txt:
--------------------------------------------------------------------------------
1 | # Camera list with one line of data per camera:
2 | #   CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]
3 | # Number of cameras: 1
4 | 0 PINHOLE 927 489 543.757 543.757 520.961 293.208
5 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/forest_2s/cameras.txt:
--------------------------------------------------------------------------------
1 | # Camera list with one line of data per camera:
2 | #   CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]
3 | # Number of cameras: 1
4 | 0 PINHOLE 715 440 712.53 712.53 369.856 239.634
5 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/delivery_area_1l/cameras.txt:
--------------------------------------------------------------------------------
1 | # Camera list with one line of data per camera:
2 | #   CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]
3 | # Number of cameras: 1
4 | 0 PINHOLE 942 489 541.764 541.764 553.869 232.396
5 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/delivery_area_1l/stereodemo_calibration.json:
--------------------------------------------------------------------------------
1 | {"width": 942, "height": 489, "fx": 541.764, "fy": 541.764, "cx0": 553.869, "cx1": 553.869, "cy": 232.396, "baseline_meters": 0.0599101, "depth_range": [1.0, 20.0]}


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/delivery_area_2l/cameras.txt:
--------------------------------------------------------------------------------
1 | # Camera list with one line of data per camera:
2 | #   CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]
3 | # Number of cameras: 1
4 | 0 PINHOLE 942 489 541.764 541.764 553.682 232.397
5 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/delivery_area_2l/stereodemo_calibration.json:
--------------------------------------------------------------------------------
1 | {"width": 942, "height": 489, "fx": 541.764, "fy": 541.764, "cx0": 553.682, "cx1": 553.682, "cy": 232.397, "baseline_meters": 0.0598896, "depth_range": [1.0, 20.0]}


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/playground_1l/calib.txt:
--------------------------------------------------------------------------------
1 | cam0=[542.019 0 541.836; 0 542.019 255.198; 0 0 1]
2 | cam1=[542.019 0 541.836; 0 542.019 255.198; 0 0 1]
3 | doffs=0
4 | baseline=59.5549
5 | width=941
6 | height=490
7 | ndisp=941
8 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/playground_1l/cameras.txt:
--------------------------------------------------------------------------------
1 | # Camera list with one line of data per camera:
2 | #   CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]
3 | # Number of cameras: 1
4 | 0 PINHOLE 941 490 542.019 542.019 541.836 255.198
5 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-14-50-10-570_left.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:27e327ae2f5994558c7e704ecb40ce9ea4b8035a01a85122d120e2bcb3299886
3 | size 610121
4 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-14-57-53-937_left.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1c0951ff514b2e2c906c32c4a84633883e28a56856a506f5af82d26948c14605
3 | size 627374
4 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-02-03-700_left.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:df68948ba90d0ab36dc18c89bc575d02c5f8ff26007cc47400486bbf8620ba62
3 | size 700217
4 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-02-29-915_left.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8f76491357534b8ae434088103520ed6559ae0e9cafa30118e58054ce1db777a
3 | size 741667
4 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-26-56-946_left.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:ed3ea0fbdedd93f04f8d3eca4e9407f80970612bedb1b43170f397f33e15f3fa
3 | size 465940
4 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/delivery_area_1l/calib.txt:
--------------------------------------------------------------------------------
1 | cam0=[541.764 0 553.869; 0 541.764 232.396; 0 0 1]
2 | cam1=[541.764 0 553.869; 0 541.764 232.396; 0 0 1]
3 | doffs=0
4 | baseline=59.9101
5 | width=942
6 | height=489
7 | ndisp=942
8 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/delivery_area_2l/calib.txt:
--------------------------------------------------------------------------------
1 | cam0=[541.764 0 553.682; 0 541.764 232.397; 0 0 1]
2 | cam1=[541.764 0 553.682; 0 541.764 232.397; 0 0 1]
3 | doffs=0
4 | baseline=59.8896
5 | width=942
6 | height=489
7 | ndisp=942
8 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/electro_2l/stereodemo_calibration.json:
--------------------------------------------------------------------------------
1 | {"width": 927, "height": 489, "fx": 543.757, "fy": 543.757, "cx0": 520.961, "cx1": 520.961, "cy": 293.208, "baseline_meters": 0.059820200000000004, "depth_range": [1.0, 20.0]}


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/playground_1l/stereodemo_calibration.json:
--------------------------------------------------------------------------------
1 | {"width": 941, "height": 490, "fx": 542.019, "fy": 542.019, "cx0": 541.836, "cx1": 541.836, "cy": 255.198, "baseline_meters": 0.05955490000000001, "depth_range": [1.0, 20.0]}


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import site
2 | import sys
3 | site.ENABLE_USER_SITE = "--user" in sys.argv[1:]
4 | 
5 | # Everything is defined in setup.cfg, added this file only
6 | # to support editable mode.
7 | import setuptools
8 | setuptools.setup()
9 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Making a new release
 2 | 
 3 | - Bump the version numbers in `setup.cfg` and `stereodemo/__init__.py`
 4 | 
 5 | ```
 6 | ./build_release.sh
 7 | twine upload dist/*
 8 | ```
 9 | 
10 | Username is always `__token__`
11 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-14-50-10-570_right.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:96cbbc87e2607a771195010629cd1c8812d721d6291cda1e64c4f8e8143c4b75
3 | size 608992
4 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-14-57-53-937_right.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:07848e6e6019dcf3c7a5eae2b519045f731147c5ea65441433a37bd4989f3d48
3 | size 629669
4 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-02-03-700_right.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d4dca91f9a5fcbc758552baf58e6619701c7baa9ad9d5d56d1c9525711b082f8
3 | size 702221
4 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-02-29-915_right.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:fe508c208583fd1fb9696763a3ed401f48cfbc54a63af774cf9b2ad67dda5979
3 | size 724551
4 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-26-56-946_right.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:70805b98791a546a9881de2571cd647766b5f84bee4fd918ecba1957fb92cd78
3 | size 456132
4 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-15-34-51-679_right.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8f9eb47de20fa4d383ffd693781483ac53cb30c4fad1890b8abeba32c4bdae44
3 | size 666677
4 | 


--------------------------------------------------------------------------------
/datasets/middlebury_2014/README.md:
--------------------------------------------------------------------------------
1 | Small subset of the data of https://vision.middlebury.edu/stereo/data/scenes2014/
2 | 
3 | The images were converted to jpg since it's only meant to be used for visual evaluation, not accurate metrics.
4 | 


--------------------------------------------------------------------------------
/datasets/sceneflow/stereodemo_calibration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "width": 960,
 3 |     "height": 540,
 4 |     "fx": 1050.0,
 5 |     "fy": 1050.0,
 6 |     "cx0": 479.5,
 7 |     "cx1": 479.5,
 8 |     "cy": 269.5,
 9 |     "baseline_meters": 0.065,
10 |     "depth_range": [0.1, 5.0]
11 | }
12 | 


--------------------------------------------------------------------------------
/datasets/middlebury_2014/Piano-imperfect/stereodemo_calibration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "width": 1280,
 3 |     "height": 871,
 4 |     "fx": 1291.206,
 5 |     "fy": 1291.206,
 6 |     "cx0": 644.564,
 7 |     "cx1": 701.304,
 8 |     "cy": 431.367,
 9 |     "baseline_meters": 0.178089,
10 |     "depth_range": [1,3]
11 | }
12 | 


--------------------------------------------------------------------------------
/datasets/middlebury_2014/Playtable-imperfect/stereodemo_calibration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "width": 1280,
 3 |     "height": 872,
 4 |     "fx": 1102.096,
 5 |     "fy": 1102.096,
 6 |     "cx0": 537.248,
 7 |     "cx1": 587.958,
 8 |     "cy": 440.703,
 9 |     "baseline_meters": 0.193006,
10 |     "depth_range": [1,3]
11 | }
12 | 


--------------------------------------------------------------------------------
/datasets/middlebury_2014/Piano-imperfect/calib.txt:
--------------------------------------------------------------------------------
 1 | cam0=[2852.758 0 1424.085; 0 2852.758 953.053; 0 0 1]
 2 | cam1=[2852.758 0 1549.445; 0 2852.758 953.053; 0 0 1]
 3 | doffs=125.36
 4 | baseline=178.089
 5 | width=2828
 6 | height=1924
 7 | ndisp=260
 8 | isint=0
 9 | vmin=36
10 | vmax=218
11 | dyavg=0.408
12 | dymax=1.923
13 | 


--------------------------------------------------------------------------------
/datasets/middlebury_2014/Playtable-imperfect/calib.txt:
--------------------------------------------------------------------------------
 1 | cam0=[2341.955 0 1141.652; 0 2341.955 936.494; 0 0 1]
 2 | cam1=[2341.955 0 1249.412; 0 2341.955 936.494; 0 0 1]
 3 | doffs=107.76
 4 | baseline=193.006
 5 | width=2720
 6 | height=1852
 7 | ndisp=290
 8 | isint=0
 9 | vmin=27
10 | vmax=271
11 | dyavg=0.962
12 | dymax=2.665
13 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/2018-07-11-14-48-52/stereodemo_calibration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "width": 881,
 3 |     "height": 400,
 4 |     "fx": 1003.556,
 5 |     "fy": 1003.556,
 6 |     "cx0": 455.689,
 7 |     "cx1": 455.689,
 8 |     "cy": 197.6634,
 9 |     "baseline_meters": 0.5446133834145297,
10 |     "depth_range": [10.0, 100.0]
11 | }
12 | 


--------------------------------------------------------------------------------
/datasets/oak-d/README.md:
--------------------------------------------------------------------------------
1 | These images were captured with my OAK-D Lite camera with the medium resolution setting (640x480).
2 | 
3 | Rectification was done on device with the factory values.
4 | 
5 | Note that there is a slightly annoying border in the left images that comes from the rectification. The disparity in that area should be ignored.
6 | 


--------------------------------------------------------------------------------
/datasets/kitti2015/stereo_calibration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "comment": "Not the actual calibration, just made it up for a nice display.",
 3 |     "width": 1242,
 4 |     "height": 375,
 5 |     "fx": 994,
 6 |     "fy": 994,
 7 |     "cx0": 621,
 8 |     "cx1": 621,
 9 |     "cy": 187.5,
10 |     "baseline_meters": 0.54,
11 |     "depth_range": [1.0, 100.0]
12 | }
13 | 


--------------------------------------------------------------------------------
/datasets/opencv-sample/stereodemo_calibration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "comment": "Calibration made up manually for the results to look good.",
 3 |     "width": 1282,
 4 |     "height": 1110,
 5 |     "fx": 1025.6,
 6 |     "fy": 1025.6,
 7 |     "cx0": 641,
 8 |     "cx1": 641,
 9 |     "cy": 555,
10 |     "baseline_meters": 0.0599101,
11 |     "depth_range": [0.3, 2.0]
12 | }
13 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/electro_2l/images.txt:
--------------------------------------------------------------------------------
1 | # Image list with two lines of data per image:
2 | #   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME
3 | #   POINTS2D[] as (X, Y, POINT3D_ID)
4 | # Number of images: 2
5 | 0 0.310023 0.347029 0.635813 -0.615791 1.20075 -3.54645 0.615411 0 im0.png
6 | 
7 | 1 0.310023 0.347029 0.635813 -0.615791 1.14093 -3.54645 0.615411 0 im1.png
8 | 
9 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/playground_1l/images.txt:
--------------------------------------------------------------------------------
1 | # Image list with two lines of data per image:
2 | #   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME
3 | #   POINTS2D[] as (X, Y, POINT3D_ID)
4 | # Number of images: 2
5 | 0 0.549136 0.52558 0.458054 -0.460872 0.191419 1.06806 1.32414 0 im0.png
6 | 
7 | 1 0.549136 0.52558 0.458054 -0.460872 0.131864 1.06806 1.32414 0 im1.png
8 | 
9 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/delivery_area_1l/images.txt:
--------------------------------------------------------------------------------
1 | # Image list with two lines of data per image:
2 | #   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME
3 | #   POINTS2D[] as (X, Y, POINT3D_ID)
4 | # Number of images: 2
5 | 0 0.64809 0.711282 -0.193094 0.191761 2.74638 -2.59869 -8.02903 0 im0.png
6 | 
7 | 1 0.64809 0.711282 -0.193094 0.191761 2.68647 -2.59869 -8.02903 0 im1.png
8 | 
9 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/delivery_area_2l/images.txt:
--------------------------------------------------------------------------------
1 | # Image list with two lines of data per image:
2 | #   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME
3 | #   POINTS2D[] as (X, Y, POINT3D_ID)
4 | # Number of images: 2
5 | 0 0.658038 0.694861 0.20901 -0.20117 -3.19666 -2.90788 -7.30784 0 im0.png
6 | 
7 | 1 0.658038 0.694861 0.20901 -0.20117 -3.25655 -2.90788 -7.30784 0 im1.png
8 | 
9 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/forest_2s/images.txt:
--------------------------------------------------------------------------------
1 | # Image list with two lines of data per image:
2 | #   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME
3 | #   POINTS2D[] as (X, Y, POINT3D_ID)
4 | # Number of images: 2
5 | 0 -0.21362 -0.394959 0.711046 -0.541098 -0.874382 1.30323 -2.45671 0 im0.png
6 | 
7 | 1 -0.21362 -0.394959 0.711046 -0.541098 -0.933992 1.30323 -2.45671 0 im1.png
8 | 
9 | 


--------------------------------------------------------------------------------
/datasets/oak-d/stereodemo_calibration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "width": 640,
 3 |     "height": 480,
 4 |     "baseline_meters": 0.075,
 5 |     "fx": 451.0344543457031,
 6 |     "fy": 451.0344543457031,
 7 |     "cx0": 299.03839111328125,
 8 |     "cx1": 299.03839111328125,
 9 |     "cy": 255.16502380371094,
10 |     "depth_range": [1.0, 5.0],
11 |     "left_image_rect_normalized": [0, 0.0417, 0.9547, 1]
12 | }
13 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include LICENSE
 2 | include README.md
 3 | recursive-exclude * .vscode
 4 | recursive-exclude * .github
 5 | recursive-exclude * __pycache__
 6 | recursive-exclude datasets/drivingstereo *
 7 | recursive-exclude datasets/eth3d_lowres *
 8 | recursive-exclude datasets/kitti2015 *
 9 | recursive-exclude datasets/middlebury_2014 *
10 | recursive-exclude datasets/opencv-sample *
11 | recursive-exclude datasets/sceneflow *
12 | 


--------------------------------------------------------------------------------
/.github/workflows/unit_tests.yml:
--------------------------------------------------------------------------------
 1 | name: Unit Tests
 2 | on: [push, pull_request, workflow_dispatch]
 3 | jobs:
 4 |   build:
 5 |     runs-on: ${{ matrix.os }}
 6 |     strategy:
 7 |       matrix:
 8 |         os: ['ubuntu-latest', 'windows-latest', 'macos-latest']
 9 |     steps:
10 |     - uses: actions/checkout@v4
11 |       with:
12 |         lfs: true
13 |     - uses: actions/setup-python@v5
14 |       with:
15 |         python-version: '3.11'
16 |     - name: Install the library
17 |       run: |
18 |         pip install .
19 |     - name: Run the unit tests
20 |       run: |
21 |         python3 tests/test_methods.py
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Nicolas Burrus
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         
 8 |         // {
 9 |         //     "name": "Python: chang_realtimestereo_to_torchscript_onnx.py",
10 |         //     "type": "python",
11 |         //     "request": "launch",
12 |         //     "program": "${workspaceFolder}/tools/chang_realtimestereo_to_torchscript_onnx.py",
13 |         //     "cwd": "${workspaceFolder}",
14 |         //     "console": "integratedTerminal",
15 |         //     "justMyCode": false,
16 |         //     "args": [ 
17 |         //         "../RealtimeStereo/",
18 |         //         "models/pretrained_Kitti2015_realtime.tar" 
19 |         //     ],
20 |         // },
21 | 
22 |         {
23 |             "name": "Python: Module",
24 |             "type": "python",
25 |             "request": "launch",
26 |             "module": "stereodemo",
27 |             "justMyCode": false,
28 |             "args": [
29 |                 "datasets",
30 |                 "--models-path", "models"
31 |             ]
32 |         },
33 |     ]
34 | }


--------------------------------------------------------------------------------
/stereodemo/utils.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import numpy as np
 4 | 
 5 | import tempfile
 6 | import urllib.request
 7 | import shutil
 8 | import sys
 9 | 
10 | def download_model (url: str, model_path: Path):
11 |     filename = model_path.name
12 |     with tempfile.TemporaryDirectory() as d:
13 |         tmp_file_path = Path(d) / filename
14 |         print (f"Downloading {filename} from {url} to {model_path}...")
15 |         urllib.request.urlretrieve(url, tmp_file_path)
16 |         shutil.move (tmp_file_path, model_path)
17 | 
18 | def pad_width (size: int, multiple: int):
19 |     return 0 if size % multiple == 0 else multiple - (size%multiple)
20 | 
21 | class ImagePadder:
22 |     def __init__(self, multiple, mode):
23 |         self.multiple = multiple
24 |         self.mode = mode
25 |     
26 |     def pad (self, im: np.ndarray):
27 |         # H,W,C
28 |         rows = im.shape[0]
29 |         cols = im.shape[1]
30 |         self.rows_to_pad = pad_width(rows, self.multiple)
31 |         self.cols_to_pad = pad_width(cols, self.multiple)
32 |         if self.rows_to_pad == 0 and self.cols_to_pad == 0:
33 |             return im
34 |         return np.pad (im, ((0, self.rows_to_pad), (0, self.cols_to_pad), (0, 0)), mode=self.mode)
35 | 
36 |     def unpad (self, im: np.ndarray):        
37 |         w = im.shape[1] - self.cols_to_pad
38 |         h = im.shape[0] - self.rows_to_pad
39 |         return im[:h, :w, :]
40 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = stereodemo
 3 | version = 0.6.2
 4 | author = Nicolas Burrus
 5 | author_email = nicolas@burrus.name
 6 | description = Compare various stereo depth estimation algorithms on image files or with an OAK-D camera.
 7 | long_description = file: README.md
 8 | long_description_content_type = text/markdown
 9 | url = https://github.com/nburrus/stereodemo
10 | project_urls =
11 |     Bug Tracker = https://github.com/nburrus/stereodemo/issues
12 | classifiers =
13 |     Programming Language :: Python :: 3
14 |     License :: OSI Approved :: MIT License
15 |     Operating System :: OS Independent
16 | 
17 | [options]
18 | packages = find:
19 | python_requires = >=3.8
20 | # This might be conflicting with options.package_data and the MANIFEST.in
21 | # https://stackoverflow.com/questions/7522250/how-to-include-package-data-with-setuptools-distutils
22 | include_package_data = False
23 | setup_requires = setuptools_git
24 | install_requires =
25 |     numpy
26 |     onnxruntime >= 1.10.0; sys_platform == "darwin"
27 |     onnxruntime-gpu >= 1.10.0; sys_platform != "darwin"
28 |     opencv-python
29 |     open3d >= 0.15.1
30 |     torch >= 1.11.0 # previous version untested, might work?
31 |     torchvision
32 | 
33 | [options.entry_points]
34 | console_scripts =
35 |     stereodemo = stereodemo:main
36 | 
37 | [options.package_data]
38 | # This relies on a symlink to datasets existing under stereodemo/
39 | # This is done by build_release.sh
40 | stereodemo = datasets/oak-d/*.png, datasets/oak-d/*.json
41 | 


--------------------------------------------------------------------------------
/datasets/middlebury_2014/convert_calib_txt.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from dataclasses import dataclass
 4 | from pathlib import Path
 5 | import sys
 6 | import json
 7 | import re
 8 | 
 9 | import numpy as np
10 | 
11 | @dataclass
12 | class Calibration:
13 |     width: int
14 |     height: int
15 |     fx: float
16 |     fy: float
17 |     cx0: float
18 |     cx1: float
19 |     cy: float
20 |     baseline_meters: float
21 | 
22 |     def to_json(self):
23 |         return json.dumps(self.__dict__)
24 | 
25 |     def from_json(json_str):
26 |         d = json.loads(json_str)
27 |         return Calibration(**d)
28 | 
29 | # parse numpy array from a string [a b c ; d e f ; g h i]
30 | def parse_numpy_array(s):
31 |     s = s.replace("[", "").replace("]", "").replace(";", " ")
32 |     return np.fromstring(s, sep=" ").reshape(3,3)
33 | 
34 | fields = {}
35 | 
36 | input_path = Path(sys.argv[1])
37 | 
38 | with open(input_path) as f:    
39 |     for l in f:
40 |         kv = l.split('=')
41 |         k, v = kv
42 |         fields[k] = v.strip()
43 | 
44 | print (fields)
45 | 
46 | K0 = parse_numpy_array(fields['cam0'])
47 | print (K0)
48 | 
49 | K1 = parse_numpy_array(fields['cam1'])
50 | print (K1)
51 | 
52 | assert np.count_nonzero(K0 != K1) <= 1 # only cx can differ
53 | 
54 | calib = Calibration(int(fields['width']), int(fields['height']), K0[0,0], K0[1,1], K0[0,2], K1[0,2], K0[1,2], float(fields['baseline'])*1e-3)
55 | print (calib)
56 | 
57 | output_json = input_path.parent / 'stereodemo_calibration.json'
58 | with open(output_json, 'w') as f:
59 |     f.write (calib.to_json())
60 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52.txt:
--------------------------------------------------------------------------------
 1 | calib_time: 20180703
 2 | corner_dist: 1.000000e-01
 3 | S_101: 9.600000e+2 6.000000e+2
 4 | K_101: 2.063200e+03 -5.000000e-01 9.783000e+02 0.000000e+00 2.062400e+03 5.847000e+02 0.000000e+00 0.000000e+00 1.000000e+00
 5 | D_101: -8.770000e-02 1.257000e-01 6.159000e-04 6.038000e-04 0.000000e+00
 6 | R_101: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 
 7 | T_101: 0.000000e+00 0.000000e+00 0.000000e+00
 8 | S_rect_101: 8.810000e+2 4.000000e+2
 9 | P_rect_101: 1.003556e+3 0.000000e+0 4.556890e+2 0.000000e+0 0.000000e+0 1.003556e+3 1.976634e+2 0.000000e+0 0.000000e+0 0.000000e+0 1.000000e+0 0.000000e+0
10 | R_rect_101: 9.995925e-01 2.195515e-02 -1.824139e-02 -2.205491e-02 9.997428e-01 -5.286002e-03 1.812064e-02 5.686160e-03 9.998196e-01
11 | S_103: 9.600000e+2 6.000000e+2
12 | K_103: 2.063400e+03 -1.000000e-01 9.734000e+02 0.000000e+00 2.062600e+03 5.999000e+02 0.000000e+00 0.000000e+00 1.000000e+00
13 | D_103: -8.930000e-02 1.270000e-01 1.600000e-03 1.000000e-04 0.000000e+00
14 | R_103: 9.995332e-01 2.052896e-02 -2.262329e-02 -2.077736e-02 9.997258e-01 -1.079973e-02 2.239539e-02 1.126474e-02 9.996857e-01
15 | T_103: -5.446076e-01 -7.500610e-04 -2.395167e-03
16 | S_rect_103: 8.810000e+2 4.000000e+2
17 | P_rect_103: 1.003556e+3 0.000000e+0 4.556890e+2 -1.093101e+3 0.000000e+0 1.003556e+3 1.976634e+2 0.000000e+0 0.000000e+0 0.000000e+0 1.000000e+0 0.000000e+0
18 | R_rect_103: 9.999894e-01 1.377236e-03 4.397923e-03 -1.401345e-03 9.999840e-01 5.483573e-03 -4.390300e-03 -5.489678e-03 9.999753e-01
19 | 


--------------------------------------------------------------------------------
/datasets/eth3d_lowres/convert_calib_txt.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from dataclasses import dataclass
 4 | from pathlib import Path
 5 | import sys
 6 | import json
 7 | import re
 8 | from typing import Tuple
 9 | 
10 | import numpy as np
11 | 
12 | @dataclass
13 | class Calibration:
14 |     width: int
15 |     height: int
16 |     fx: float
17 |     fy: float
18 |     cx0: float
19 |     cx1: float
20 |     cy: float
21 |     baseline_meters: float
22 |     depth_range: Tuple[float] = (1.0, 20.0)
23 | 
24 |     def to_json(self):
25 |         return json.dumps(self.__dict__)
26 | 
27 |     def from_json(json_str):
28 |         d = json.loads(json_str)
29 |         return Calibration(**d)
30 | 
31 | # parse numpy array from a string [a b c ; d e f ; g h i]
32 | def parse_numpy_array(s):
33 |     s = s.replace("[", "").replace("]", "").replace(";", " ")
34 |     return np.fromstring(s, sep=" ").reshape(3,3)
35 | 
36 | fields = {}
37 | 
38 | input_path = Path(sys.argv[1])
39 | 
40 | with open(input_path) as f:    
41 |     for l in f:
42 |         kv = l.split('=')
43 |         k, v = kv
44 |         fields[k] = v.strip()
45 | 
46 | print (fields)
47 | 
48 | K0 = parse_numpy_array(fields['cam0'])
49 | print (K0)
50 | 
51 | K1 = parse_numpy_array(fields['cam1'])
52 | print (K1)
53 | 
54 | assert np.all(K0 == K1)
55 | 
56 | calib = Calibration(int(fields['width']), int(fields['height']), K0[0,0], K0[1,1], K0[0,2], K1[0,2], K0[1,2], float(fields['baseline'])*1e-3)
57 | print (calib)
58 | 
59 | output_json = input_path.parent / 'stereodemo_calibration.json'
60 | with open(output_json, 'w') as f:
61 |     f.write (calib.to_json())
62 | 


--------------------------------------------------------------------------------
/datasets/drivingstereo/convert_kitti_calib.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from dataclasses import dataclass
 4 | from pathlib import Path
 5 | import sys
 6 | import json
 7 | import re
 8 | 
 9 | import numpy as np
10 | 
11 | @dataclass
12 | class Calibration:
13 |     width: int
14 |     height: int
15 |     fx: float
16 |     fy: float
17 |     cx0: float
18 |     cx1: float
19 |     cy: float
20 |     baseline_meters: float
21 | 
22 |     def to_json(self):
23 |         return json.dumps(self.__dict__)
24 | 
25 |     def from_json(json_str):
26 |         d = json.loads(json_str)
27 |         return Calibration(**d)
28 | 
29 | # parse numpy array from a string a b c d e f g h i
30 | def parse_numpy_array(s):
31 |     return np.fromstring(s, sep=" ")
32 | 
33 | fields = {}
34 | 
35 | input_path = Path(sys.argv[1])
36 | 
37 | with open(input_path) as f:    
38 |     for l in f:
39 |         kv = l.split(':')
40 |         k, v = kv
41 |         fields[k] = v.strip()
42 | 
43 | print (fields)
44 | 
45 | w0, h0 = parse_numpy_array(fields['S_rect_101']).reshape(2)
46 | w1, h1 = parse_numpy_array(fields['S_rect_103']).reshape(2)
47 | assert w0 == w1 and h0 == h1
48 | 
49 | K0 = parse_numpy_array(fields['P_rect_101']).reshape(3, 4)
50 | print (K0)
51 | 
52 | K1 = parse_numpy_array(fields['P_rect_103']).reshape(3, 4)
53 | print (K1)
54 | 
55 | T = parse_numpy_array(fields['T_103']).reshape(3)
56 | 
57 | assert (K0[0,0] == K1[0,0])
58 | 
59 | # https://stackoverflow.com/a/61684187/1737680
60 | # P(i)rect = [[fu 0  cx  -fu*bx],
61 | #            [0  fv  cy -fv*by],
62 | #            [0   0   1  0]]
63 | # baseline = -K1[0,3]/K1[0,0] # does not work, ~2x too large
64 | baseline = np.linalg.norm(T)
65 | calib = Calibration(int(w0), int(h0), K0[0,0], K0[1,1], K0[0,2], K1[0,2], K0[1,2], baseline_meters=baseline)
66 | print (calib)
67 | 
68 | output_json = input_path.parent / 'stereodemo_calibration.json'
69 | with open(output_json, 'w') as f:
70 |     f.write (calib.to_json())
71 | 


--------------------------------------------------------------------------------
/tests/test_methods.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import tempfile
 4 | import unittest
 5 | from pathlib import Path
 6 | 
 7 | import cv2
 8 | import numpy as np
 9 | import math
10 | 
11 | from stereodemo import method_opencv_bm
12 | from stereodemo import method_chang_realtime_stereo
13 | from stereodemo import method_hitnet
14 | from stereodemo import method_cre_stereo
15 | from stereodemo import method_raft_stereo
16 | from stereodemo import method_sttr
17 | from stereodemo.methods import Config, InputPair, Calibration, StereoOutput, StereoMethod
18 | 
19 | data_folder = Path(__file__).parent.parent / 'datasets' / 'eth3d_lowres' / 'delivery_area_1l'
20 | left_image = cv2.imread (str(data_folder / 'im0.png'), cv2.IMREAD_COLOR)
21 | right_image = cv2.imread (str(data_folder / 'im1.png'), cv2.IMREAD_COLOR)
22 | calib_file = data_folder / 'stereodemo_calibration.json'
23 | calibration = Calibration.from_json (open(calib_file, 'r').read())
24 | input = InputPair (left_image, right_image, calibration, "Test images loaded.", None)
25 | models_path = Path(tempfile.gettempdir()) / 'models'
26 | models_path.mkdir(parents=True, exist_ok=True)
27 | config = Config(models_path)
28 | 
29 | class TestStereoInference(unittest.TestCase):
30 | 
31 |     def check_method(self, method: StereoMethod, expected_median: float, expected_coverage: float):
32 |         output = method.compute_disparity (input)
33 |         valid_pixels = output.disparity_pixels[output.disparity_pixels > 0.]
34 |         coverage = valid_pixels.size / output.disparity_pixels.size
35 |         median_value =  np.median (valid_pixels)
36 |         self.assertAlmostEqual (median_value, expected_median, delta=0.01)
37 |         self.assertAlmostEqual (coverage, expected_coverage, delta=0.01)
38 | 
39 |     def test_bm(self):
40 |         self.check_method (method_opencv_bm.StereoBM(config), 4.8125, 0.4403)
41 | 
42 |     def test_sgbm(self):
43 |         self.check_method (method_opencv_bm.StereoSGBM(config), 5.1875, 0.8515)
44 | 
45 |     def test_chang_realtime(self):
46 |         m = method_chang_realtime_stereo.ChangRealtimeStereo(config)
47 |         m.parameters["Shape"].set_value ("320x240")
48 |         self.check_method (m, 12.7776, 1.0)
49 | 
50 |     def test_hitnet(self):
51 |         m = method_hitnet.HitnetStereo(config)
52 |         m.parameters["Shape"].set_value ("320x240")
53 |         self.check_method (m, 4.9103, 1.0)
54 | 
55 |     def test_crestereo(self):
56 |         m = method_cre_stereo.CREStereo(config)
57 |         m.parameters["Shape"].set_value ("320x240")
58 |         self.check_method (m, 4.6287, 1.0)
59 | 
60 |     def test_raft_stereo(self):
61 |         m = method_raft_stereo.RaftStereo(config)
62 |         m.parameters["Shape"].set_value ("320x256")
63 |         self.check_method (m, 4.6408, 1.0)
64 | 
65 |     def test_sttr(self):
66 |         m = method_sttr.StereoTransformers(config)
67 |         m.parameters["Shape"].set_value ("640x480 (ds3)")
68 |         self.check_method (m, 7.4636, 0.9869)
69 | 
70 | if __name__ == '__main__':
71 |     unittest.main()
72 | 


--------------------------------------------------------------------------------
/stereodemo/method_dist_depth.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import shutil
 3 | import time
 4 | from dataclasses import dataclass
 5 | import urllib.request
 6 | import tempfile
 7 | import sys
 8 | 
 9 | import torch
10 | from torchvision import transforms
11 | 
12 | import cv2
13 | import numpy as np
14 | 
15 | from .methods import Calibration, Config, EnumParameter, StereoMethod, InputPair, StereoOutput
16 | from . import utils
17 | 
18 | urls = {
19 |     "dist-depth-256x256.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-distdepth/dist-depth-256x256.scripted.pt",
20 | }
21 | 
22 | # https://github.com/facebookresearch/DistDepth
23 | # Exported via torch tracing by tweaking the original demo.py.
24 | # Changes here: https://github.com/nburrus/DistDepth/commit/fde3b427ef2ff31c34f08e99c51c8e6a2427b720
25 | class DistDepth(StereoMethod):
26 |     def __init__(self, config: Config):
27 |         super().__init__("[Monocular] DistDepth (CVPR 2022)",
28 |                          "Toward Practical Monocular Indoor Depth Estimation.",
29 |                          {},
30 |                          config)
31 |         self.reset_defaults()
32 | 
33 |         self.net = None
34 |         self._loaded_model_path = None
35 | 
36 |     def reset_defaults(self):
37 |         self.parameters.update ({
38 |             # "Device": EnumParameter("Device", 0, ["CPU", "CUDA"]),
39 |             # For some reason it crashes with CUDA on my machine, disabling for now.
40 |             "Device": EnumParameter("Device", 0, ["CPU"]),
41 |         })
42 | 
43 |     def compute_disparity(self, input: InputPair) -> StereoOutput:
44 |         # The pre-trained model is for 256x256. Their demo script resizes
45 |         # all input images to that.
46 |         self.target_size = (256, 256)
47 |         device = torch.device('cuda') if self.parameters["Device"].value == 'CUDA' else 'cpu'
48 |         
49 |         model_path = self.config.models_path / f'dist-depth-256x256.scripted.pt'
50 |         self._load_model (model_path)
51 | 
52 |         # raw_img can stay in BGR
53 |         raw_img = np.transpose(input.left_image, (2, 0, 1))
54 |         input_image = torch.from_numpy(raw_img).float().to(device)
55 |         input_image = (input_image / 255.0).unsqueeze(0)
56 |         input_image = torch.nn.functional.interpolate(
57 |             input_image, (256, 256), mode="bilinear", align_corners=False
58 |         )
59 | 
60 |         net = self.net.to(device)
61 | 
62 |         start = time.time()
63 |         with torch.no_grad():
64 |             outputs = net(input_image.to(device))
65 |         elapsed_time = time.time() - start
66 | 
67 |         disparity_map = self._process_output(outputs, input.calibration)
68 |         if disparity_map.shape[:2] != input.left_image.shape[:2]:
69 |             disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST)
70 |             # not need to scale, the disparity values were already for the input full resolution calibration.
71 | 
72 |         return StereoOutput(disparity_map, input.left_image, elapsed_time)
73 | 
74 |     def _process_output(self, outputs, calib: Calibration):
75 |         depth_meters = outputs[0].detach().squeeze(0).cpu().numpy()
76 |         # The model directly gives a depth map in meters. Let's convert it
77 |         # to disparity to fit in the stereo display.
78 |         disparity_map = StereoMethod.disparity_from_depth_meters(depth_meters, calib)
79 |         return disparity_map
80 | 
81 |     def _load_model(self, model_path: Path):
82 |         if (self._loaded_model_path == model_path):
83 |             return
84 |         
85 |         if not model_path.exists():
86 |             utils.download_model (urls[model_path.name], model_path)
87 | 
88 |         assert Path(model_path).exists()
89 |         self._loaded_model_path = model_path
90 |         self.net = torch.jit.load(model_path)
91 |         self.net.cpu ()
92 |         self.net.eval ()
93 | 


--------------------------------------------------------------------------------
/stereodemo/chang_realtime_stereo_onnx.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import shutil
 3 | import time
 4 | from dataclasses import dataclass
 5 | import urllib.request
 6 | import tempfile
 7 | import sys
 8 | 
 9 | import onnxruntime
10 | 
11 | import cv2
12 | import numpy as np
13 | 
14 | from .methods import EnumParameter, StereoMethod, InputPair, StereoOutput
15 | from . import utils
16 | 
17 | urls = {
18 |     "chang-realtime-stereo-cpu-1280x720.onnx": "",
19 |     "chang-realtime-stereo-cpu-160x128.onnx": "",
20 |     "chang-realtime-stereo-cpu-320x240.onnx": "",
21 |     "chang-realtime-stereo-cpu-640x480.onnx": "",
22 | }
23 | 
24 | # Adapted from https://github.com/ibaiGorordo/ONNX-CREStereo-Depth-Estimation
25 | # https://github.com/PINTO0309/PINTO_model_zoo/tree/main/284_CREStereo
26 | # IMPORTANT: these ONNX are not working, keeping in case things improve later on.
27 | class ChangRealtimeStereoOnnx(StereoMethod):
28 |     def __init__(self):
29 |         super().__init__("Chang Real-time Onnx", "Attention-Aware Feature Aggregation for Real-time Stereo Matching on Edge Devices (ACCV 2020).", {})
30 |         self.reset_defaults()
31 | 
32 |         self._loaded_session = None
33 |         self._loaded_model_path = None
34 | 
35 |     def reset_defaults(self):
36 |         self.parameters.update ({
37 |             "Shape": EnumParameter("Processed image size", 1, ["160x128", "320x240", "640x480", "1280x720"])
38 |         })
39 | 
40 |     def compute_disparity(self, input: InputPair) -> StereoOutput:
41 |         if not models_path.exists():
42 |             models_path.mkdir(parents=True, exist_ok=True)
43 |         
44 |         cols, rows = self.parameters["Shape"].value.split('x')
45 |         cols, rows = int(cols), int(rows)
46 |         self.target_size = (cols, rows)
47 | 
48 |         model_path = models_path / f'chang-realtime-stereo-cpu-{cols}x{rows}.onnx'
49 |         self._load_model (model_path)
50 | 
51 |         left_tensor = self._preprocess_input(input.left_image)
52 |         right_tensor = self._preprocess_input(input.right_image)
53 | 
54 |         start = time.time()
55 |         model_inputs = self._loaded_session.get_inputs()
56 |         model_outputs = self._loaded_session.get_outputs()
57 |         input_names = [model_inputs[i].name for i in range(len(model_inputs))]
58 |         output_names = [model_outputs[i].name for i in range(len(model_outputs))]
59 |         outputs = self._loaded_session.run(['disparity'], {'left': left_tensor,
60 |                                                            'right': right_tensor})
61 |         elapsed_time = time.time() - start
62 | 
63 |         disparity_map = self._process_output(outputs)
64 |         if disparity_map.shape[:2] != input.left_image.shape[:2]:
65 |             disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST)
66 |             x_scale = input.left_image.shape[1] / float(cols)
67 |             disparity_map *= np.float32(x_scale)
68 | 
69 |         return StereoOutput(disparity_map, input.left_image, elapsed_time)
70 | 
71 |     def _preprocess_input (self, img: np.ndarray):
72 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
73 |         img = cv2.resize(img, self.target_size, cv2.INTER_AREA)
74 |         imagenet_stats = {'mean': np.array([0.485, 0.456, 0.406]), 'std': np.array([0.229, 0.224, 0.225])}
75 |         img = (img.astype(np.float32) / 255.0) - imagenet_stats['mean'] / imagenet_stats['std']
76 |         img = img.transpose(2, 0, 1) # C,H,W instead of H,W,C
77 |         img = img[np.newaxis, :, :, :] # add batch dimension
78 |         return img
79 | 
80 |     def _process_output(self, outputs):
81 |         disparity_map = outputs[0].permute(1,2,0)
82 |         return disparity_map
83 | 
84 |     def _load_model(self, model_path: Path):
85 |         if (self._loaded_model_path == model_path):
86 |             return
87 |         
88 |         if not model_path.exists():
89 |             utils.download_model (urls[model_path.name], model_path)
90 | 
91 |         assert Path(model_path).exists()
92 |         self._loaded_model_path = model_path
93 |         self._loaded_session = onnxruntime.InferenceSession(str(model_path), providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
94 | 


--------------------------------------------------------------------------------
/stereodemo/methods.py:
--------------------------------------------------------------------------------
  1 | from abc import abstractmethod
  2 | from dataclasses import dataclass, field
  3 | from pathlib import Path
  4 | from typing import Any, Dict, List, Tuple
  5 | import time
  6 | import json
  7 | import numpy as np
  8 | 
  9 | 
 10 | @dataclass
 11 | class Calibration:
 12 |     width: int
 13 |     height: int
 14 |     fx: float
 15 |     fy: float
 16 |     cx0: float  # cx is the only one that can differ between both cameras.
 17 |     cx1: float
 18 |     cy: float
 19 |     baseline_meters: float
 20 |     depth_range: Tuple[float] = (0.3, 20.0)
 21 |     left_image_rect_normalized: np.ndarray = field(
 22 |         default_factory=lambda: np.array([0., 0., 1., 1.])) # origin, size in percent of image size
 23 |     comment: str = ""
 24 | 
 25 |     def to_json(self):
 26 |         return json.dumps(self.__dict__)
 27 | 
 28 |     @staticmethod
 29 |     def from_json(json_str):
 30 |         d = json.loads(json_str)
 31 |         return Calibration(**d)
 32 | 
 33 |     def downsample(self, new_width: int, new_height: int):
 34 |         sx = new_width / self.width
 35 |         sy = new_height / self.height
 36 |         self.width = new_width
 37 |         self.height = new_height
 38 |         self.fx *= sx
 39 |         self.fy *= sy
 40 |         self.cx0 *= sx
 41 |         self.cx1 *= sx
 42 |         self.cy *= sy
 43 | 
 44 | 
 45 | @dataclass
 46 | class InputPair:
 47 |     left_image: np.ndarray
 48 |     right_image: np.ndarray
 49 |     calibration: Calibration
 50 |     status: str
 51 |     input_disparity: np.ndarray = None
 52 | 
 53 |     def has_data(self):
 54 |         return self.left_image is not None
 55 | 
 56 | 
 57 | @dataclass
 58 | class StereoOutput:
 59 |     disparity_pixels: np.ndarray
 60 |     color_image_bgr: np.ndarray
 61 |     computation_time: float
 62 |     point_cloud: Any = None
 63 |     disparity_color: np.ndarray = None
 64 | 
 65 | 
 66 | @dataclass
 67 | class IntParameter:
 68 |     description: str
 69 |     value: int
 70 |     min: int
 71 |     max: int
 72 |     to_valid: Any = lambda x: x  # default is to just accept anything
 73 | 
 74 |     def set_value(self, x: int):
 75 |         self.value = self.to_valid(x)
 76 | 
 77 | 
 78 | @dataclass
 79 | class EnumParameter:
 80 |     description: str
 81 |     index: int  # index in the list
 82 |     values: List[str]
 83 | 
 84 |     def set_index(self, idx: int):
 85 |         self.index = idx
 86 | 
 87 |     def set_value(self, value):
 88 |         self.index = self.values.index(value)
 89 | 
 90 |     @property
 91 |     def value(self) -> str:
 92 |         return self.values[self.index]
 93 | 
 94 | 
 95 | @dataclass
 96 | class Config:
 97 |     models_path: Path
 98 | 
 99 | 
100 | class StereoMethod:
101 |     def __init__(self, name: str, description: str, parameters: Dict, config: Config):
102 |         self.name = name
103 |         self.parameters = parameters
104 |         self.description = description
105 |         self.config = config
106 | 
107 |     def reset_defaults(self):
108 |         pass
109 | 
110 |     @abstractmethod
111 |     def compute_disparity(self, input: InputPair) -> StereoOutput:
112 |         """Return the disparity map in pixels and the actual computation time.
113 | 
114 |         Both input images are assumed to be rectified.
115 |         """
116 |         return StereoOutput(None, None, None, None)
117 | 
118 |     @staticmethod
119 |     def depth_meters_from_disparity(disparity_pixels: np.ndarray, calibration: Calibration):
120 |         old_seterr = np.seterr(divide='ignore')
121 |         dcx = np.float32(calibration.cx0 - calibration.cx1)
122 |         depth_meters = np.float32(calibration.baseline_meters * calibration.fx) / (disparity_pixels - dcx)
123 |         depth_meters = np.nan_to_num(depth_meters)
124 |         depth_meters[disparity_pixels < 0.] = -1.0
125 |         np.seterr(**old_seterr)
126 |         return depth_meters
127 | 
128 |     @staticmethod
129 |     def disparity_from_depth_meters(depth_meters: np.ndarray, calibration: Calibration):
130 |         old_seterr = np.seterr(divide='ignore')
131 |         dcx = np.float32(calibration.cx0 - calibration.cx1)
132 |         disparity_pixels = (np.float32(calibration.baseline_meters * calibration.fx) / depth_meters) + dcx
133 |         disparity_pixels = np.nan_to_num(disparity_pixels)
134 |         np.seterr(**old_seterr)
135 |         return disparity_pixels


--------------------------------------------------------------------------------
/stereodemo/method_chang_realtime_stereo.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import shutil
 3 | import time
 4 | from dataclasses import dataclass
 5 | import urllib.request
 6 | import tempfile
 7 | import sys
 8 | 
 9 | import torch
10 | from torchvision import transforms
11 | 
12 | import cv2
13 | import numpy as np
14 | 
15 | from .methods import Config, EnumParameter, StereoMethod, InputPair, StereoOutput
16 | from . import utils
17 | 
18 | urls = {
19 |     "chang-realtime-stereo-cpu-1280x720.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-chang-realtimestereo/chang-realtime-stereo-cpu-1280x720.scripted.pt",
20 |     "chang-realtime-stereo-cpu-160x128.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-chang-realtimestereo/chang-realtime-stereo-cpu-160x128.scripted.pt",
21 |     "chang-realtime-stereo-cpu-320x240.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-chang-realtimestereo/chang-realtime-stereo-cpu-320x240.scripted.pt",
22 |     "chang-realtime-stereo-cpu-640x480.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-chang-realtimestereo/chang-realtime-stereo-cpu-640x480.scripted.pt",
23 | }
24 | 
25 | # https://github.com/JiaRenChang/RealtimeStereo
26 | # I exported the pytorch implementation to torch script via tracing with
27 | # some minor changes to the code https://github.com/JiaRenChang/RealtimeStereo/pull/15
28 | # See chang_realtimestereo_to_torchscript_onnx.py
29 | class ChangRealtimeStereo(StereoMethod):
30 |     def __init__(self, config: Config):
31 |         super().__init__("Chang Real-time (ACCV 2020)",
32 |                          "Attention-Aware Feature Aggregation for Real-time Stereo Matching on Edge Devices. Pre-trained on SceneFlow + Kitti 2015.",
33 |                          {},
34 |                          config)
35 |         self.reset_defaults()
36 | 
37 |         self.net = None
38 |         self._loaded_model_path = None
39 | 
40 |         imagenet_stats = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]}
41 |         self.img_to_tensor_transforms = transforms.Compose([
42 |             transforms.ToTensor(),
43 |             transforms.Normalize(**imagenet_stats),
44 |         ])
45 | 
46 |     def reset_defaults(self):
47 |         self.parameters.update ({
48 |             "Shape": EnumParameter("Processed image size", 2, ["160x128", "320x240", "640x480", "1280x720"])
49 |         })
50 | 
51 |     def compute_disparity(self, input: InputPair) -> StereoOutput:
52 |         cols, rows = self.parameters["Shape"].value.split('x')
53 |         cols, rows = int(cols), int(rows)
54 |         self.target_size = (cols, rows)
55 | 
56 |         model_path = self.config.models_path / f'chang-realtime-stereo-cpu-{cols}x{rows}.scripted.pt'
57 |         self._load_model (model_path)
58 | 
59 |         left_tensor = self._preprocess_input(input.left_image)
60 |         right_tensor = self._preprocess_input(input.right_image)
61 | 
62 |         start = time.time()
63 |         with torch.no_grad():
64 |             outputs = self.net(left_tensor, right_tensor)
65 |         elapsed_time = time.time() - start
66 | 
67 |         disparity_map = self._process_output(outputs)
68 |         if disparity_map.shape[:2] != input.left_image.shape[:2]:
69 |             disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST)
70 |             x_scale = input.left_image.shape[1] / float(cols)
71 |             disparity_map *= np.float32(x_scale)
72 | 
73 |         return StereoOutput(disparity_map, input.left_image, elapsed_time)
74 | 
75 |     def _preprocess_input (self, img: np.ndarray):
76 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
77 |         img = cv2.resize(img, self.target_size, cv2.INTER_AREA)
78 |         return self.img_to_tensor_transforms (img).unsqueeze(0)
79 | 
80 |     def _process_output(self, outputs):
81 |         disparity_map = outputs[0].detach().cpu().permute(1,2,0).numpy()
82 |         return disparity_map
83 | 
84 |     def _load_model(self, model_path: Path):
85 |         if (self._loaded_model_path == model_path):
86 |             return
87 |         
88 |         if not model_path.exists():
89 |             utils.download_model (urls[model_path.name], model_path)
90 | 
91 |         assert Path(model_path).exists()
92 |         self._loaded_model_path = model_path
93 |         self.net = torch.jit.load(model_path)
94 |         self.net.cpu ()
95 |         self.net.eval ()
96 | 


--------------------------------------------------------------------------------
/stereodemo/oakd_source.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import sys
  3 | import time
  4 | 
  5 | from . import visualizer
  6 | from .methods import Config, InputPair, StereoMethod, StereoOutput
  7 | 
  8 | import cv2
  9 | 
 10 | try:
 11 |     import depthai as dai
 12 | except ImportError:
 13 |     print ("You need to 'pip install depthai' to use the OAK camera.")
 14 |     sys.exit (1)
 15 | 
 16 | def getFrame(queue):
 17 |   # Get frame from queue
 18 |   frame = queue.get()
 19 |   # Convert frame to OpenCV format and return
 20 |   return frame.getCvFrame()
 21 | 
 22 | def getMonoCamera(pipeline, isLeft):
 23 |   # Configure mono camera
 24 |   mono = pipeline.createMonoCamera()
 25 | 
 26 |   # Set Camera Resolution
 27 |   mono.setResolution(dai.MonoCameraProperties.SensorResolution.THE_480_P)
 28 | 
 29 |   if isLeft:
 30 |       # Get left camera
 31 |       mono.setBoardSocket(dai.CameraBoardSocket.LEFT)
 32 |   else :
 33 |       # Get right camera
 34 |       mono.setBoardSocket(dai.CameraBoardSocket.RIGHT)
 35 |   return mono
 36 | 
 37 | def getStereoPair(pipeline, monoLeft, monoRight):
 38 |     # Configure stereo pair for depth estimation
 39 |     stereo = pipeline.createStereoDepth()
 40 |     # Checks occluded pixels and marks them as invalid
 41 |     stereo.setLeftRightCheck(True)
 42 |     
 43 |     # Configure left and right cameras to work as a stereo pair
 44 |     monoLeft.out.link(stereo.left)
 45 |     monoRight.out.link(stereo.right)
 46 | 
 47 |     return stereo
 48 | 
 49 | class OakdSource (visualizer.Source):
 50 |     def __init__(self, output_folder: Path = None):
 51 |         self.connect ()
 52 |         self.output_folder = output_folder
 53 |         self.frameIndex = 0
 54 | 
 55 |     def connect (self):
 56 |         print ("Trying to connect to an OAK camera...")
 57 |         pipeline = dai.Pipeline()
 58 | 
 59 |         # Set up left and right cameras
 60 |         monoLeft = getMonoCamera(pipeline, isLeft = True)
 61 |         monoRight = getMonoCamera(pipeline, isLeft = False)
 62 | 
 63 |         # Combine left and right cameras to form a stereo pair
 64 |         stereo = getStereoPair(pipeline, monoLeft, monoRight)
 65 | 
 66 |         
 67 |         # Set XlinkOut for disparity, rectifiedLeft, and rectifiedRight
 68 |         xoutDisp = pipeline.createXLinkOut()
 69 |         xoutDisp.setStreamName("disparity")
 70 |         
 71 |         # xoutDepth = pipeline.create(dai.node.XLinkOut)
 72 |         # xoutDepth.setStreamName("depth")
 73 | 
 74 |         xoutRectifiedLeft = pipeline.createXLinkOut()
 75 |         xoutRectifiedLeft.setStreamName("rectifiedLeft")
 76 | 
 77 |         xoutRectifiedRight = pipeline.createXLinkOut()
 78 |         xoutRectifiedRight.setStreamName("rectifiedRight")
 79 | 
 80 |         stereo.disparity.link(xoutDisp.input)
 81 |         
 82 |         stereo.rectifiedLeft.link(xoutRectifiedLeft.input)
 83 |         stereo.rectifiedRight.link(xoutRectifiedRight.input)
 84 |         # stereo.depth.link(xoutDepth.input)
 85 | 
 86 |         self.device = dai.Device(pipeline).__enter__()
 87 | 
 88 |         oak_calib = self.device.readCalibration()
 89 |         w, h = monoLeft.getResolutionSize()
 90 | 
 91 |         # Intrinsics of disparity are taken from the right image.
 92 |         disparityIntrinsics = oak_calib.getCameraIntrinsics(dai.CameraBoardSocket.RIGHT, dai.Size2f(w, h))
 93 |         baselineMeters = 1e-2 * oak_calib.getBaselineDistance(dai.CameraBoardSocket.LEFT, dai.CameraBoardSocket.RIGHT)
 94 | 
 95 |         self.calibration = visualizer.Calibration(w, h,
 96 |                                                   fx=disparityIntrinsics[0][0],
 97 |                                                   fy=disparityIntrinsics[1][1],
 98 |                                                   cx0=disparityIntrinsics[0][2],
 99 |                                                   cx1=disparityIntrinsics[0][2],
100 |                                                   cy=disparityIntrinsics[1][2],
101 |                                                   baseline_meters=baselineMeters)
102 | 
103 |         # Output queues will be used to get the rgb frames and nn data from the outputs defined above
104 |         self.disparityQueue = self.device.getOutputQueue(name="disparity", maxSize=1, blocking=False)
105 |         self.rectifiedLeftQueue = self.device.getOutputQueue(name="rectifiedLeft", maxSize=1, blocking=False)
106 |         self.rectifiedRightQueue = self.device.getOutputQueue(name="rectifiedRight", maxSize=1, blocking=False)
107 |         # depthQueue = self.device.getOutputQueue(name="depth", maxSize=1, blocking=False)
108 | 
109 |     def is_live(self):
110 |         return True
111 | 
112 |     def get_next_pair(self):
113 |         leftFrame = getFrame(self.rectifiedLeftQueue)
114 |         rightFrame = getFrame(self.rectifiedRightQueue)
115 |         if self.output_folder is not None:
116 |             self.output_folder.mkdir(parents=True, exist_ok=True)
117 |             cv2.imwrite(str(self.output_folder / f"img_{self.frameIndex:03d}_left.png"), leftFrame)
118 |             cv2.imwrite(str(self.output_folder / f"img_{self.frameIndex:03d}_right.png"), rightFrame)
119 |         disparityPixels = getFrame(self.disparityQueue)
120 |         leftFrame = cv2.cvtColor (leftFrame, cv2.COLOR_GRAY2RGB)
121 |         rightFrame = cv2.cvtColor (rightFrame, cv2.COLOR_GRAY2RGB)
122 |         self.frameIndex += 1
123 |         return visualizer.InputPair(leftFrame, rightFrame, self.calibration, "OAK-D Camera", disparityPixels)
124 | 
125 | class StereoFromOakInputSource(StereoMethod):
126 |     def __init__(self, config: Config):
127 |         super().__init__("Input Source", "Stereo computed by the input source", {}, config)
128 | 
129 |     def compute_disparity(self, input: InputPair) -> StereoOutput:
130 |         # The disparity is aligned to the right image with OAK-D
131 |         return StereoOutput(input.input_disparity, input.right_image, 0.0)
132 |         


--------------------------------------------------------------------------------
/stereodemo/method_opencv_bm.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import numpy as np
  4 | import cv2
  5 | 
  6 | from .methods import Config, StereoMethod, IntParameter, EnumParameter, InputPair, StereoOutput
  7 | 
  8 | def odd_only(x):
  9 |     return x if x % 2 == 1 else x+1
 10 | 
 11 | def multiple_of_16(x):
 12 |     return max(16, x//16 * 16)
 13 | 
 14 | # Default parameters taken from 
 15 | # https://github.com/opencv/opencv/blob/4.x/samples/cpp/stereo_match.cpp
 16 | class StereoBM(StereoMethod):
 17 |     def __init__(self, config: Config):
 18 |         super().__init__("OpenCV BM", "OpenCV Simple Block Matching", {}, config)
 19 |         self.reset_defaults()
 20 | 
 21 |     def reset_defaults(self):
 22 |         # For more details:
 23 |         # https://learnopencv.com/depth-perception-using-stereo-camera-python-c/
 24 |         self.parameters.update ({
 25 |             "Num Disparities": IntParameter("Number of disparities (pixels)", 128, 16, 640, to_valid=multiple_of_16),
 26 |             "Block Size": IntParameter("Kernel size for block matching (odd)", 9, 5, 63, to_valid=odd_only),
 27 |             "TextureThreshold": IntParameter("Minimum SAD to consider the texture sufficient", 10, 0, 100),
 28 |             "Uniqueness Ratio": IntParameter("How unique the match each for each pixel", 15, 0, 100),
 29 |             "SpeckleWindowSize": IntParameter("Speckle window size in pixels (filter small objects). 0 to disable.", 100, 0, 1000),
 30 |             "SpeckleRange": IntParameter("Speckle range (max diff within a window)", 32, 0, 64),
 31 |             "Disp12MaxDiff": IntParameter("Maximum allowable difference in the right-left check", 1, 0, 64),
 32 |             "PreFilterCap": IntParameter("Max pre-filter output", 31, 1, 63),
 33 |             "PreFilterSize": IntParameter("Pre-filter size (odd)", 9, 5, 255, to_valid=odd_only),
 34 |         })
 35 | 
 36 |     def compute_disparity(self, input: InputPair) -> StereoOutput:
 37 |         left_image, right_image = input.left_image, input.right_image
 38 |         block_size = self.parameters['Block Size'].value
 39 |         if block_size % 2 == 0:
 40 |             block_size += 1
 41 |         stereoBM = cv2.StereoBM_create(numDisparities=self.parameters['Num Disparities'].value, 
 42 |                                        blockSize=block_size)
 43 |         stereoBM.setTextureThreshold(self.parameters['TextureThreshold'].value)
 44 |         stereoBM.setUniquenessRatio(self.parameters['Uniqueness Ratio'].value)
 45 |         stereoBM.setSpeckleWindowSize(self.parameters['SpeckleWindowSize'].value)
 46 |         stereoBM.setSpeckleRange(self.parameters['SpeckleRange'].value)
 47 |         stereoBM.setDisp12MaxDiff(self.parameters['Disp12MaxDiff'].value)
 48 |         stereoBM.setPreFilterCap(self.parameters['PreFilterCap'].value)
 49 |         stereoBM.setPreFilterSize(self.parameters['PreFilterSize'].value)
 50 | 
 51 |         gray_left = cv2.cvtColor(left_image, cv2.COLOR_BGR2GRAY)
 52 |         gray_right = cv2.cvtColor(right_image, cv2.COLOR_BGR2GRAY)
 53 |         # OpenCV returns 16x the disparity in pixels
 54 |         start = time.time()
 55 |         disparity = stereoBM.compute(gray_left, gray_right) / np.float32(16.0)
 56 |         return StereoOutput(disparity, input.left_image, time.time()-start)
 57 | 
 58 | class StereoSGBM(StereoMethod):
 59 |     def __init__(self, config: Config):
 60 |         super().__init__("OpenCV SGBM", "OpenCV Semi-Global Block Matching", {}, config)
 61 |         self.reset_defaults ()
 62 | 
 63 |     def reset_defaults(self):
 64 |         nchannels = 1
 65 |         # For more details:
 66 |         # https://learnopencv.com/depth-perception-using-stereo-camera-python-c/
 67 |         self.parameters.update ({
 68 |             "Num Disparities": IntParameter("Number of disparities (pixels)", 128, 2, 640),
 69 |             "Block Size": IntParameter("Kernel size for block matching (odd)", 3, 3, 63, to_valid=odd_only),
 70 |             
 71 |             "Mode": EnumParameter("Set it to StereoSGBM::MODE_HH to run the full-scale two-pass dynamic programming algorithm. It will consume O(W*H*numDisparities) bytes, which is large for 640x480 stereo and huge for HD-size pictures. By default, it is set to false .",
 72 |                                   0, ["MODE_SGBM", "MODE_HH", "MODE_SGBM_3WAY", "MODE_HH4"]),
 73 |             
 74 |             
 75 |             "P1": IntParameter("Penalty Cost (default=8*NChannels*BlockSize)", 8*nchannels*3*3, 0, 2000),
 76 |             "P2": IntParameter("Penalty Cost. Must be > P1 (default=32*NChannels*BlockSize).", 32*nchannels*3*3, 0, 2000),
 77 |             
 78 |             "Uniqueness Ratio": IntParameter("How unique the match each for each pixel", 10, 0, 100),
 79 |             "SpeckleWindowSize": IntParameter("Speckle window size in pixels (filter small objects). 0 to disable.", 100, 0, 1000),
 80 |             "SpeckleRange": IntParameter("Speckle range (max diff within a window)", 32, 0, 64),
 81 |             
 82 |             "Disp12MaxDiff": IntParameter("Maximum allowable difference in the right-left check", 1, 0, 64),
 83 |             "PreFilterCap": IntParameter("Max pre-filter output", 63, 1, 128),
 84 |         })
 85 | 
 86 |     def compute_disparity(self, input: InputPair) -> StereoOutput:
 87 |         left_image, right_image = input.left_image, input.right_image
 88 |         stereoSGBM = cv2.StereoSGBM_create(numDisparities=self.parameters['Num Disparities'].value, 
 89 |                                          blockSize=self.parameters['Block Size'].value)
 90 | 
 91 |         stereoSGBM.setMode(self.parameters['Mode'].index)
 92 |         stereoSGBM.setP1(self.parameters['P1'].value)
 93 |         stereoSGBM.setP2(self.parameters['P2'].value)
 94 |         stereoSGBM.setPreFilterCap(self.parameters['PreFilterCap'].value)
 95 |         stereoSGBM.setUniquenessRatio(self.parameters['Uniqueness Ratio'].value)
 96 |         stereoSGBM.setSpeckleWindowSize(self.parameters['SpeckleWindowSize'].value)
 97 |         stereoSGBM.setSpeckleRange(self.parameters['SpeckleRange'].value)
 98 |         stereoSGBM.setDisp12MaxDiff(self.parameters['Disp12MaxDiff'].value)
 99 | 
100 |         gray_left = cv2.cvtColor(left_image, cv2.COLOR_BGR2GRAY, left_image)
101 |         gray_right = cv2.cvtColor(right_image, cv2.COLOR_BGR2GRAY, right_image)
102 |         # OpenCV returns 16x the disparity in pixels
103 |         start = time.time()
104 |         disparity = stereoSGBM.compute(gray_left, gray_right) / np.float32(16.0)
105 |         return StereoOutput(disparity, input.left_image, time.time()-start)
106 | 


--------------------------------------------------------------------------------
/tools/capture_oakd_frames.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import depthai as dai
  3 | import numpy as np
  4 | import json
  5 | 
  6 | # Adapted from LearnOpenCV
  7 | # https://github.com/spmallick/learnopencv/tree/master/oak-getting-started
  8 | 
  9 | def getFrame(queue):
 10 |   # Get frame from queue
 11 |   frame = queue.get()
 12 |   # Convert frame to OpenCV format and return
 13 |   return frame.getCvFrame()
 14 | 
 15 | def getMonoCamera(pipeline, isLeft):
 16 |   # Configure mono camera
 17 |   mono = pipeline.createMonoCamera()
 18 | 
 19 |   # Set Camera Resolution
 20 |   mono.setResolution(dai.MonoCameraProperties.SensorResolution.THE_480_P)
 21 | 
 22 |   if isLeft:
 23 |       # Get left camera
 24 |       mono.setBoardSocket(dai.CameraBoardSocket.LEFT)
 25 |   else :
 26 |       # Get right camera
 27 |       mono.setBoardSocket(dai.CameraBoardSocket.RIGHT)
 28 |   return mono
 29 | 
 30 | 
 31 | def getStereoPair(pipeline, monoLeft, monoRight):
 32 |     # Configure stereo pair for depth estimation
 33 |     stereo = pipeline.createStereoDepth()
 34 |     # Checks occluded pixels and marks them as invalid
 35 |     stereo.setLeftRightCheck(True)
 36 |     
 37 |     # Configure left and right cameras to work as a stereo pair
 38 |     monoLeft.out.link(stereo.left)
 39 |     monoRight.out.link(stereo.right)
 40 | 
 41 |     return stereo
 42 | 
 43 | if __name__ == '__main__':
 44 | 
 45 |     # Start defining a pipeline
 46 |     pipeline = dai.Pipeline()
 47 | 
 48 |     # Set up left and right cameras
 49 |     monoLeft = getMonoCamera(pipeline, isLeft = True)
 50 |     monoRight = getMonoCamera(pipeline, isLeft = False)
 51 | 
 52 |     # Combine left and right cameras to form a stereo pair
 53 |     stereo = getStereoPair(pipeline, monoLeft, monoRight)
 54 |     
 55 |     # Set XlinkOut for disparity, rectifiedLeft, and rectifiedRight
 56 |     xoutDisp = pipeline.createXLinkOut()
 57 |     xoutDisp.setStreamName("disparity")
 58 |     
 59 |     xoutDepth = pipeline.create(dai.node.XLinkOut)
 60 |     xoutDepth.setStreamName("depth")
 61 | 
 62 |     xoutRectifiedLeft = pipeline.createXLinkOut()
 63 |     xoutRectifiedLeft.setStreamName("rectifiedLeft")
 64 | 
 65 |     xoutRectifiedRight = pipeline.createXLinkOut()
 66 |     xoutRectifiedRight.setStreamName("rectifiedRight")
 67 | 
 68 |     stereo.disparity.link(xoutDisp.input)
 69 |     
 70 |     stereo.rectifiedLeft.link(xoutRectifiedLeft.input)
 71 |     stereo.rectifiedRight.link(xoutRectifiedRight.input)
 72 |     stereo.depth.link(xoutDepth.input)
 73 |     
 74 |     # Pipeline is defined, now we can connect to the device
 75 | 
 76 |     with dai.Device(pipeline) as device:
 77 | 
 78 |         calib = device.readCalibration()
 79 |         w, h = monoLeft.getResolutionSize()
 80 |         
 81 |         # The rectified stereo images intrinsics both correspond to the right camera intrinsics.
 82 |         disparityIntrinsics = calib.getCameraIntrinsics(dai.CameraBoardSocket.RIGHT, dai.Size2f(w, h))
 83 |         baselineMeters = 1e-2 * calib.getBaselineDistance(dai.CameraBoardSocket.LEFT, dai.CameraBoardSocket.RIGHT)
 84 |         with open('stereodemo-calibration.json', 'w') as f:
 85 |             d = dict(baseline_meters=baselineMeters,
 86 |                      fx=disparityIntrinsics[0][0],
 87 |                      fy=disparityIntrinsics[1][1],
 88 |                      cx0=disparityIntrinsics[0][2],
 89 |                      cx1=disparityIntrinsics[0][2],
 90 |                      cy=disparityIntrinsics[1][2])
 91 |             f.write(json.dumps(d))
 92 | 
 93 |         # Output queues will be used to get the rgb frames and nn data from the outputs defined above
 94 |         disparityQueue = device.getOutputQueue(name="disparity", maxSize=1, blocking=False)
 95 |         rectifiedLeftQueue = device.getOutputQueue(name="rectifiedLeft", maxSize=1, blocking=False)
 96 |         rectifiedRightQueue = device.getOutputQueue(name="rectifiedRight", maxSize=1, blocking=False)
 97 |         depthQueue = device.getOutputQueue(name="depth", maxSize=1, blocking=False)
 98 | 
 99 |         # Calculate a multiplier for colormapping disparity map
100 |         disparityMultiplier = 255 / stereo.getMaxDisparity()
101 | 
102 |         cv2.namedWindow("Stereo Pair")
103 |         
104 |         # Variable use to toggle between side by side view and one frame view.
105 |         sideBySide = False
106 | 
107 |         print ("Press 's' to save an image")
108 | 
109 |         save_frame_id = 0
110 |         while True:
111 |             
112 |             # Get disparity map
113 |             disparityPixels = getFrame(disparityQueue)
114 |             
115 |             # Colormap disparity for display
116 |             disparity = (disparityPixels * disparityMultiplier).astype(np.uint8)
117 |             disparity = cv2.applyColorMap(disparity, cv2.COLORMAP_JET)
118 |             
119 |             # Depth
120 |             depthMm = getFrame(depthQueue)
121 |             centralDepthMm = depthMm[h//2, w//2]
122 |             centralDisp = disparityPixels[h//2, w//2]
123 |             depthFromDispMeters = (baselineMeters * disparityIntrinsics[0][0]) / centralDisp
124 |             # print (f"Central pixel depth = {centralDepthMm} disparity_raw = {centralDisp} depthFromDispMeters={depthFromDispMeters}")
125 | 
126 |             # Get left and right rectified frame
127 |             leftFrame = getFrame(rectifiedLeftQueue)
128 |             rightFrame = getFrame(rectifiedRightQueue)
129 |             
130 |             if sideBySide:
131 |                 # Show side by side view
132 |                 imOut = np.hstack((leftFrame, rightFrame))
133 |             else :
134 |                 # Show overlapping frames
135 |                 imOut = np.uint8(leftFrame/2 + rightFrame/2)
136 |                         
137 |             imOut = cv2.cvtColor(imOut,cv2.COLOR_GRAY2RGB)
138 |             cv2.imshow("Stereo Pair", imOut)
139 |             cv2.imshow("Disparity", disparity)
140 | 
141 |             # Check for keyboard input
142 |             key = cv2.waitKey(1)
143 |             if key == ord('q'):
144 |                 # Quit when q is pressed
145 |                 break
146 |             elif key == ord('t'):
147 |                 # Toggle display when t is pressed
148 |                 sideBySide = not sideBySide
149 |             elif key == ord('s'):
150 |                 # Save the current frames  
151 |                 cv2.imwrite(f"img_{save_frame_id:03d}_left.png", leftFrame)
152 |                 cv2.imwrite(f"img_{save_frame_id:03d}_right.png", rightFrame)
153 |                 print (f"Wrote img_{save_frame_id:03d}_left/right.png")
154 |                 save_frame_id += 1
155 | 
156 | 


--------------------------------------------------------------------------------
/stereodemo/method_hitnet.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import shutil
  3 | import time
  4 | from dataclasses import dataclass
  5 | import urllib.request
  6 | import tempfile
  7 | import sys
  8 | 
  9 | import onnxruntime
 10 | 
 11 | import cv2
 12 | import numpy as np
 13 | 
 14 | from .methods import Config, EnumParameter, StereoMethod, InputPair, StereoOutput
 15 | from . import utils
 16 | 
 17 | urls = {
 18 |     "hitnet_eth3d_120x160.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_eth3d_120x160.onnx",
 19 |     "hitnet_eth3d_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_eth3d_240x320.onnx",
 20 |     "hitnet_eth3d_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_eth3d_480x640.onnx",
 21 |     "hitnet_eth3d_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_eth3d_720x1280.onnx",
 22 |     "hitnet_middlebury_120x160.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_middlebury_120x160.onnx",
 23 |     "hitnet_middlebury_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_middlebury_240x320.onnx",
 24 |     "hitnet_middlebury_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_middlebury_480x640.onnx",
 25 |     "hitnet_middlebury_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_middlebury_720x1280.onnx",
 26 |     "hitnet_sceneflow_120x160.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_sceneflow_120x160.onnx",
 27 |     "hitnet_sceneflow_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_sceneflow_240x320.onnx",
 28 |     "hitnet_sceneflow_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_sceneflow_480x640.onnx",
 29 |     "hitnet_sceneflow_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-hitnet/hitnet_sceneflow_720x1280.onnx",
 30 | }
 31 | 
 32 | # Adapted from https://github.com/ibaiGorordo/ONNX-HITNET-Stereo-Depth-estimation
 33 | # Onnx models from https://github.com/PINTO0309/PINTO_model_zoo/tree/main/142_HITNET
 34 | # Official implementation https://github.com/google-research/google-research/tree/master/hitnet
 35 | class HitnetStereo(StereoMethod):
 36 |     def __init__(self, config: Config):
 37 |         super().__init__("Hitnet (CVPR 2021)",
 38 |                          "HITNet: Hierarchical Iterative Tile Refinement Network for Real-time Stereo Matching",
 39 |                          {},
 40 |                          config)
 41 |         self.reset_defaults()
 42 | 
 43 |         self._loaded_session = None
 44 |         self._loaded_model_path = None
 45 | 
 46 |     def reset_defaults(self):
 47 |         self.parameters.update ({
 48 |             "Shape": EnumParameter("Processed image size", 2, ["160x120", "320x240", "640x480", "1280x720"]),
 49 |             "Training Set": EnumParameter("Dataset used during training", 1, ["sceneflow", "middlebury", "eth3d"])
 50 |         })
 51 | 
 52 |     def compute_disparity(self, input: InputPair) -> StereoOutput:        
 53 |         cols, rows = self.parameters["Shape"].value.split('x')
 54 |         cols, rows = int(cols), int(rows)
 55 |         training_set = self.parameters["Training Set"].value
 56 | 
 57 |         model_path = self.config.models_path / f'hitnet_{training_set}_{rows}x{cols}.onnx'
 58 |         self._load_model (model_path)
 59 | 
 60 |         model_inputs = self._loaded_session.get_inputs()
 61 |         model_outputs = self._loaded_session.get_outputs()
 62 |         model_rows, model_cols = model_inputs[0].shape[2:] # B,C,H,W
 63 |         self.target_size = (model_cols, model_rows)
 64 | 
 65 |         grayscale = True if training_set == 'eth3d' else False
 66 |         combined_tensor = self._preprocess_input(input.left_image, input.right_image, grayscale)
 67 | 
 68 |         start = time.time()
 69 |         input_names = [model_inputs[i].name for i in range(len(model_inputs))]
 70 |         output_names = [model_outputs[i].name for i in range(len(model_outputs))]
 71 |         outputs = self._loaded_session.run(['reference_output_disparity'], { 'input': combined_tensor })
 72 |         elapsed_time = time.time() - start
 73 | 
 74 |         disparity_map = self._process_output(outputs)
 75 |         if disparity_map.shape[:2] != input.left_image.shape[:2]:
 76 |             model_output_cols = disparity_map.shape[1]
 77 |             disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST)
 78 |             x_scale = input.left_image.shape[1] / float(model_output_cols)
 79 |             disparity_map *= np.float32(x_scale)
 80 | 
 81 |         return StereoOutput(disparity_map, input.left_image, elapsed_time)
 82 | 
 83 |     def _preprocess_input (self, left: np.ndarray, right: np.ndarray, grayscale: bool):
 84 |         if grayscale:
 85 |             # H,W
 86 |             left = cv2.cvtColor(left, cv2.COLOR_BGR2GRAY)
 87 |             right = cv2.cvtColor(right, cv2.COLOR_BGR2GRAY)
 88 |         else:
 89 |             # H,W,C=3
 90 |             left = cv2.cvtColor(left, cv2.COLOR_BGR2RGB)
 91 |             right = cv2.cvtColor(right, cv2.COLOR_BGR2RGB)
 92 | 
 93 |         left = cv2.resize(left, self.target_size, cv2.INTER_AREA)
 94 |         right = cv2.resize(right, self.target_size, cv2.INTER_AREA)
 95 | 
 96 |         # Grayscale needs expansion to reach H,W,C.
 97 |         # Need to do that now because resize would change the shape.
 98 |         if left.ndim == 2:
 99 |             left = left[..., np.newaxis]
100 |             right = right[..., np.newaxis]
101 | 
102 |         # -> H,W,C=2 or 6 , normalized to [0,1]
103 |         combined_img = np.concatenate((left, right), axis=-1) / 255.0
104 |         # -> C,H,W
105 |         combined_img = combined_img.transpose(2, 0, 1)
106 |         # -> B=1,C,H,W
107 |         combined_img = np.expand_dims(combined_img, 0).astype(np.float32)
108 |         return combined_img
109 | 
110 |     def _process_output(self, outputs):
111 |         disparity_map = outputs[0][0].squeeze(-1)
112 |         return disparity_map
113 | 
114 |     def _load_model(self, model_path: Path):
115 |         if (self._loaded_model_path == model_path):
116 |             return
117 |         
118 |         if not model_path.exists():
119 |             utils.download_model (urls[model_path.name], model_path)
120 | 
121 |         assert Path(model_path).exists()
122 |         self._loaded_model_path = model_path
123 |         self._loaded_session = onnxruntime.InferenceSession(str(model_path), providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
124 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Unit Tests](https://github.com/nburrus/stereodemo/actions/workflows/unit_tests.yml/badge.svg)](https://github.com/nburrus/stereodemo/actions/workflows/unit_tests.yml)
  2 | <a href="https://twitter.com/nburrus">
  3 | <img src="https://img.shields.io/twitter/url?label=Twitter&style=social&url=https%3A%2F%2Ftwitter.com%2Fnburrus" alt="Twitter Badge"/>
  4 | </a>
  5 | 
  6 | # stereodemo
  7 | 
  8 | Small Python utility to **compare and visualize** the output of various **stereo depth estimation** algorithms:
  9 | 
 10 | - Make it easy to get a qualitative evaluation of several state-of-the-art models in the wild
 11 | - Feed it left/right images or capture live from an [OAK-D camera](https://store.opencv.ai/products/oak-d)
 12 | - Interactive colored point-cloud view since nice-looking disparity images can be misleading
 13 | - Try different parameters on the same image
 14 | 
 15 | **Included methods** (implementation/pre-trained models taken from their respective authors):
 16 | 
 17 | - [OpenCV](https://opencv.org) stereo block matching and Semi-global block matching baselines, with all their parameters
 18 | - [CREStereo](https://github.com/megvii-research/CREStereo): "Practical Stereo Matching via Cascaded Recurrent Network with Adaptive Correlation" (CVPR 2022)
 19 | - [RAFT-Stereo](https://github.com/princeton-vl/RAFT-Stereo): "Multilevel Recurrent Field Transforms for Stereo Matching" (3DV 2021)
 20 | - [Hitnet](https://github.com/google-research/google-research/tree/master/hitnet): "Hierarchical Iterative Tile Refinement Network for Real-time Stereo Matching" (CVPR 2021)
 21 | - [STereo TRansformers](https://github.com/mli0603/stereo-transformer): "Revisiting Stereo Depth Estimation From a Sequence-to-Sequence Perspective with Transformers" (ICCV 2021)
 22 | - [Chang et al. RealtimeStereo](https://github.com/JiaRenChang/RealtimeStereo): "Attention-Aware Feature Aggregation for Real-time Stereo Matching on Edge Devices" (ACCV 2020)
 23 | 
 24 | - [DistDepth](https://github.com/facebookresearch/DistDepth): "Toward Practical Monocular Indoor Depth Estimation" (CVPR 2022). This one is actually a **monocular** method, only using the left image.
 25 | 
 26 | See below for more details / credits to get each of these working, and check this [blog post for more results, including performance numbers](https://nicolas.burrus.name/stereo-comparison/).
 27 | 
 28 | https://user-images.githubusercontent.com/541507/169557430-48e62510-60c2-4a2b-8747-f9606e405f74.mp4
 29 | 
 30 | # Getting started
 31 | 
 32 | ## Installation
 33 | 
 34 | ```
 35 | python3 -m pip install stereodemo
 36 | ```
 37 | 
 38 | ## Running it
 39 | 
 40 | ### With an OAK-D camera
 41 | 
 42 | To capture data directly from an OAK-D camera, use:
 43 | 
 44 | ```
 45 | stereodemo --oak
 46 | ```
 47 | 
 48 | Then click on `Next Image` to capture a new one.
 49 | 
 50 | ### With image files
 51 | 
 52 | If you installed stereodemo from pip, then just launch `stereodemo` and it will
 53 | show some embedded sample images captured with an OAK-D camera.
 54 | 
 55 | A tiny subset of some popular datasets is also included in this repository. Just
 56 | provide a folder to `stereodemo` and it'll look for left/right pairs (either
 57 | im0/im1 or left/right in the names):
 58 | 
 59 | ```
 60 | # To evaluate on the oak-d images
 61 | stereodemo datasets/oak-d 
 62 | 
 63 | # To cycle through all images
 64 | stereodemo datasets
 65 | ```
 66 | 
 67 | Then click on `Next Image` to cycle through the images.
 68 | 
 69 | Sample images included in this repository:
 70 | - [drivingstereo](datasets/drivingstereo/README.md): outdoor driving.
 71 | - [middlebury_2014](datasets/middlebury_2014/README.md): high-res objects.
 72 | - [eth3d](datasets/eth3d_lowres/README.md): outdoor and indoor scenes.
 73 | - [sceneflow](datasets/sceneflow/README.md): synthetic rendering of objects.
 74 | - [oak-d](datasets/oak-d/README.md): indoor images I captured with my OAK-D lite camera.
 75 | - [kitti2015](datasets/kitti2015/README.md): outdoor driving (only one image).
 76 | 
 77 | # Dependencies
 78 | 
 79 | `pip` will install the dependencies automatically. Here is the list:
 80 | 
 81 | - [Open3D](https://open3d.org). For the point cloud visualization and the GUI.
 82 | - [OpenCV](https://opencv.org). For image loading and the traditional block matching baselines.
 83 | - [onnxruntime](https://onnxruntime.ai/). To run pretrained models in the ONNX format.
 84 | - [pytorch](https://pytorch.org/). To run pretrained models exported as torch script.
 85 | - [depthai](https://docs.luxonis.com/en/latest/). Optional, to grab images from a Luxonis OAK camera.
 86 | 
 87 | # Credits for each method
 88 | 
 89 | I did not implement any of these myself, but just collected pre-trained models or converted them to torch script / ONNX.
 90 | 
 91 | - CREStereo
 92 |   - Official implementation and pre-trained models: https://github.com/megvii-research/CREStereo
 93 |   - Model Zoo for the ONNX models: https://github.com/PINTO0309/PINTO_model_zoo/tree/main/284_CREStereo
 94 |   - Port to ONNX + sample loading code: https://github.com/ibaiGorordo/ONNX-CREStereo-Depth-Estimation
 95 | 
 96 | - RAFT-Stereo
 97 |   - Official implementation and pre-trained models: https://github.com/princeton-vl/RAFT-Stereo
 98 |   - I exported the pytorch implementation to torch script via tracing, [with minor modifications of the source code](https://github.com/nburrus/RAFT-Stereo/commit/ebbb5a807227927ab4551274039e9bdd16a1b010).
 99 |   - Their fastest implementation was not imported.
100 | 
101 | - Hitnet
102 |   - Official implementation and pre-trained models: https://github.com/google-research/google-research/tree/master/hitnet
103 |   - Model Zoo for the ONNX models: https://github.com/PINTO0309/PINTO_model_zoo/tree/main/142_HITNET
104 |   - Port to ONNX + sample loading code: https://github.com/ibaiGorordo/ONNX-HITNET-Stereo-Depth-estimation
105 | 
106 | - Stereo Transformers
107 |   - Official implementation and pre-trained models: https://github.com/mli0603/stereo-transformer
108 |   - Made [some small changes](https://github.com/nburrus/stereo-transformer/commit/0006a022c19f0c7c4d7683408531180a863603a5) to allow torch script export via tracing.
109 |   - The exported model currently fails with GPU inference, so only CPU inference is enabled.
110 | 
111 | - Chang et al. RealtimeStereo
112 |   - Official implementation and pre-trained models: https://github.com/JiaRenChang/RealtimeStereo
113 |   - I exported the pytorch implementation to torch script via tracing with some minor changes to the code https://github.com/JiaRenChang/RealtimeStereo/pull/15 . See [chang_realtimestereo_to_torchscript_onnx.py](tools/chang_realtimestereo_to_torchscript_onnx.py).
114 | 
115 | - DistDepth
116 |   - Official implementation and pre-trained models https://github.com/facebookresearch/DistDepth
117 |   - I exported the pytorch implementaton to torch script via tracing, see [the changes](https://github.com/facebookresearch/DistDepth/commit/fde3b427ef2ff31c34f08e99c51c8e6a2427b720).
118 | 
119 | # License
120 | 
121 | The code of stereodemo is MIT licensed, but the pre-trained models are subject to the license of their respective implementation.
122 | 
123 | The sample images have the license of their respective source, except for datasets/oak-d which is licenced under [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/).
124 | 
125 | 


--------------------------------------------------------------------------------
/stereodemo/method_sttr.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import shutil
  3 | import time
  4 | from dataclasses import dataclass
  5 | import urllib.request
  6 | import gc
  7 | import tempfile
  8 | import re
  9 | import sys
 10 | 
 11 | import torch
 12 | from torchvision import transforms
 13 | 
 14 | import cv2
 15 | import numpy as np
 16 | 
 17 | from .methods import Config, EnumParameter, StereoMethod, InputPair, StereoOutput
 18 | from . import utils
 19 | 
 20 | urls = {
 21 |     "sttr-kitti-cpu-240x320-ds1.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cpu-240x320-ds1.scripted.pt",
 22 |     "sttr-kitti-cpu-480x640-ds2.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cpu-480x640-ds2.scripted.pt",
 23 |     "sttr-kitti-cpu-480x640-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cpu-480x640-ds3.scripted.pt",
 24 |     "sttr-kitti-cpu-720x1280-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cpu-720x1280-ds3.scripted.pt",
 25 |     "sttr-kitti-cuda-240x320-ds1.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cuda-240x320-ds1.scripted.pt",
 26 |     "sttr-kitti-cuda-480x640-ds2.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cuda-480x640-ds2.scripted.pt",
 27 |     "sttr-kitti-cuda-480x640-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cuda-480x640-ds3.scripted.pt",
 28 |     "sttr-kitti-cuda-720x1280-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-kitti-cuda-720x1280-ds3.scripted.pt",
 29 | 
 30 |     "sttr-sceneflow-cpu-240x320-ds1.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cpu-240x320-ds1.scripted.pt",
 31 |     "sttr-sceneflow-cpu-480x640-ds2.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cpu-480x640-ds2.scripted.pt",
 32 |     "sttr-sceneflow-cpu-480x640-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cpu-480x640-ds3.scripted.pt",
 33 |     "sttr-sceneflow-cpu-720x1280-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cpu-720x1280-ds3.scripted.pt",
 34 |     "sttr-sceneflow-cuda-240x320-ds1.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cuda-240x320-ds1.scripted.pt",
 35 |     "sttr-sceneflow-cuda-480x640-ds2.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cuda-480x640-ds2.scripted.pt",
 36 |     "sttr-sceneflow-cuda-480x640-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cuda-480x640-ds3.scripted.pt",
 37 |     "sttr-sceneflow-cuda-720x1280-ds3.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-sttr/sttr-sceneflow-cuda-720x1280-ds3.scripted.pt",
 38 | }
 39 | 
 40 | def clear_gpu_memory():
 41 |     gc.collect()
 42 |     torch.cuda.empty_cache()    
 43 | 
 44 | # https://github.com/mli0603/stereo-transformer
 45 | # Made some changes to allow torchscript tracing:
 46 | # https://github.com/nburrus/stereo-transformer/commit/0006a022c19f0c7c4d7683408531180a863603a5
 47 | class StereoTransformers(StereoMethod):
 48 |     def __init__(self, config: Config):
 49 |         super().__init__("STereo TRansformer (ICCV 2021)",
 50 |                          "Revisiting Stereo Depth Estimation From a Sequence-to-Sequence Perspective with Transformers.",
 51 |                          {},
 52 |                          config)
 53 |         self.reset_defaults()
 54 | 
 55 |         self.net = None
 56 |         self._loaded_model_path = None
 57 |         
 58 |         imagenet_stats = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]}
 59 |         self.img_to_tensor_transforms = transforms.Compose([
 60 |             transforms.ToTensor(),
 61 |             transforms.Normalize(**imagenet_stats),
 62 |         ])
 63 | 
 64 |     def reset_defaults(self):
 65 |         self.parameters.update ({
 66 |             "Shape": EnumParameter("Processed image size", 2, ["320x240 (ds1)", "640x480 (ds2)", "640x480 (ds3)", "1280x720 (ds3)"]),
 67 |             # "Model": EnumParameter("Pre-trained Model", 0, ["kitti-cpu", "sceneflow-cpu", "kitti-cuda", "sceneflow-cuda"])
 68 |             # The CUDA ones segfault with my Python 3.8 venv, but someone worked with Python 3.7.
 69 |             # Maybe related to the installed packages instead, need to investigate more.
 70 |             # Keeping only the CPU ones for now since it's enough to evaluate.
 71 |             "Model": EnumParameter("Pre-trained Model", 0, ["kitti-cpu", "sceneflow-cpu"]),
 72 |             "Detect occlusions": EnumParameter("Detect Occlusions", 0, ["Yes", "No"])
 73 |         })
 74 | 
 75 |     def compute_disparity(self, input: InputPair) -> StereoOutput:
 76 |         stereo_output = self._compute_disparity (input)
 77 |         clear_gpu_memory ()
 78 |         return stereo_output
 79 | 
 80 |     def _compute_disparity(self, input: InputPair) -> StereoOutput:
 81 |         m = re.match("(\d+)x(\d+) \(ds(\d)\)", self.parameters["Shape"].value)
 82 |         cols, rows, downsample = map(lambda v: int(v), m.groups())
 83 |         self.target_size = (cols, rows)
 84 | 
 85 |         variant = self.parameters["Model"].value
 86 |         detect_occlusions = self.parameters["Detect occlusions"].value == "Yes"
 87 |         
 88 |         model_path = self.config.models_path / f'sttr-{variant}-{rows}x{cols}-ds{downsample}.scripted.pt'
 89 |         self._load_model (model_path)
 90 | 
 91 |         left_tensor = self._preprocess_input(input.left_image)
 92 |         right_tensor = self._preprocess_input(input.right_image)
 93 | 
 94 |         col_offset = int(downsample / 2)
 95 |         row_offset = int(downsample / 2)
 96 |         sampled_cols = torch.arange(col_offset, cols, downsample)[None,]
 97 |         sampled_rows = torch.arange(row_offset, rows, downsample)[None,]
 98 | 
 99 |         device = torch.device('cuda') if 'cuda' in variant else 'cpu'
100 |         net = self.net.to(device)
101 |         left_tensor = left_tensor.to(device)
102 |         right_tensor = right_tensor.to(device)
103 |         sampled_cols = sampled_cols.to(device)
104 |         sampled_rows = sampled_rows.to(device)
105 | 
106 |         start = time.time()
107 |         with torch.no_grad():
108 |             outputs = net(left_tensor, right_tensor, sampled_cols, sampled_rows)
109 |         elapsed_time = time.time() - start
110 | 
111 |         disparity_map = self._process_output(outputs, use_occlusion=detect_occlusions)
112 |         if disparity_map.shape[:2] != input.left_image.shape[:2]:
113 |             disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST)
114 |             x_scale = input.left_image.shape[1] / float(cols)
115 |             disparity_map *= np.float32(x_scale)
116 | 
117 |         return StereoOutput(disparity_map, input.left_image, elapsed_time)
118 | 
119 |     def _preprocess_input (self, img: np.ndarray):
120 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
121 |         img = cv2.resize(img, self.target_size, cv2.INTER_AREA)
122 |         return self.img_to_tensor_transforms (img).unsqueeze(0)
123 | 
124 |     def _process_output(self, outputs, use_occlusion: bool):
125 |         disparity_map = outputs[0][0].detach().cpu().numpy()
126 |         if use_occlusion:
127 |             occ_pred = outputs[1][0].data.cpu().numpy() > 0.5
128 |             disparity_map[occ_pred] = -1.0
129 |         return disparity_map
130 | 
131 |     def _load_model(self, model_path: Path):
132 |         # FIXME: always reload the model, for some reason
133 |         # feeding multiple images to the same model freezes
134 |         # with CUDA. Maybe due to multi-threading?
135 |         # if (self._loaded_model_path == model_path):
136 |         #     return
137 |         
138 |         if not model_path.exists():
139 |             utils.download_model (urls[model_path.name], model_path)
140 | 
141 |         assert Path(model_path).exists()
142 |         self._loaded_model_path = model_path
143 |         self.net = torch.jit.load(model_path)
144 |         self.net.eval ()
145 | 


--------------------------------------------------------------------------------
/stereodemo/main.py:
--------------------------------------------------------------------------------
  1 | from abc import abstractmethod
  2 | import json
  3 | from pathlib import Path
  4 | import sys
  5 | import tempfile
  6 | import time
  7 | from types import SimpleNamespace
  8 | from typing import List
  9 | 
 10 | import numpy as np
 11 | 
 12 | import cv2
 13 | 
 14 | 
 15 | from . import visualizer
 16 | from . import methods
 17 | 
 18 | from .method_opencv_bm import StereoBM, StereoSGBM
 19 | from .method_raft_stereo import RaftStereo
 20 | from .method_cre_stereo import CREStereo
 21 | from .method_chang_realtime_stereo import ChangRealtimeStereo
 22 | from .method_hitnet import HitnetStereo
 23 | from .method_sttr import StereoTransformers
 24 | from stereodemo.method_dist_depth import DistDepth
 25 | 
 26 | def parse_args():
 27 |     import argparse
 28 |     parser = argparse.ArgumentParser()
 29 |     parser.add_argument('--oak', action='store_true', help='Use an oak-D camera to grab images.')
 30 |     parser.add_argument('--oak-output-folder', type=Path, default=None, help='Output folder to save the images grabbed by the OAK camera')
 31 |     parser.add_argument('images',
 32 |                         help='rectified_left1 rectified_right1 ... [rectified_leftN rectified_rightN]. Load image pairs from disk. You can also specify folders.',
 33 |                         type=Path, 
 34 |                         default=None,
 35 |                         nargs='*')
 36 |     parser.add_argument('--calibration', type=Path, help='Calibration json. If unspecified, it will try to load a stereodemo_calibration.json file in the left image parent folder.', default=None)
 37 |     default_models_path = Path.home() / ".cache" / "stereodemo" / "models"
 38 |     parser.add_argument('--models-path', type=Path, help='Path to store the downloaded models.', default=default_models_path)
 39 |     return parser.parse_args()
 40 | 
 41 | def find_stereo_images_in_dir(dir: Path):
 42 |     left_files = []
 43 |     right_files = []
 44 | 
 45 |     def validated_lists():
 46 |         for f in right_files:
 47 |             assert f.exists()
 48 |         return left_files, right_files
 49 | 
 50 |     for ext in ['jpg', 'png']:
 51 |         left = sorted(list(dir.glob(f'**/*left*.{ext}')))
 52 |         if len(left) != 0:
 53 |             right = [f.parent / f.name.replace('left', 'right') for f in left]
 54 |             left_files += left
 55 |             right_files += right
 56 | 
 57 |     for ext in ['jpg', 'png']:
 58 |         left = sorted(list(dir.glob(f'**/im0.{ext}')))
 59 |         if len(left) != 0:
 60 |             right = [f.parent / f.name.replace('im0', 'im1') for f in left]
 61 |             left_files += left
 62 |             right_files += right            
 63 |     
 64 |     return validated_lists()
 65 | 
 66 | class FileListSource (visualizer.Source):
 67 |     def __init__(self, file_or_dir_list, calibration=None):        
 68 |         self.left_images_path = []
 69 |         self.right_images_path = []
 70 | 
 71 |         while file_or_dir_list:
 72 |             f = file_or_dir_list.pop(0)
 73 |             if f.is_dir():
 74 |                 left, right = find_stereo_images_in_dir (f)
 75 |                 self.left_images_path += left
 76 |                 self.right_images_path += right
 77 |             else:
 78 |                 if f.suffix.lower() not in ['.png', '.jpg', '.jpeg']:
 79 |                     print (f"Warning: ignoring {f}, not an image extension.")
 80 |                     continue
 81 |                 try:
 82 |                     right_f = file_or_dir_list.pop(0)
 83 |                 except:
 84 |                     print (f"Missing right image for {f}, skipping")
 85 |                     continue
 86 |                 self.left_images_path.append(f)
 87 |                 self.right_images_path.append(right_f)
 88 |         
 89 |         self.index = 0
 90 |         self.user_provided_calibration_path = calibration
 91 |         self.num_pairs = len(self.left_images_path)
 92 |         if self.num_pairs == 0:
 93 |             raise Exception("No image pairs.")
 94 | 
 95 |     def is_live(self):
 96 |         return False
 97 | 
 98 |     def selected_index (self) -> int:
 99 |         return self.index
100 | 
101 |     def get_pair_at_index(self, idx: int) -> methods.InputPair:
102 |         self.index = idx
103 |         
104 |         def load_image(path):
105 |             im =  cv2.imread(str(path), cv2.IMREAD_COLOR)
106 |             assert im is not None
107 |             return im
108 | 
109 |         left_image_path = self.left_images_path[self.index]
110 |         left_image = load_image(left_image_path)
111 |         if self.user_provided_calibration_path is None:
112 |             calibration_path = left_image_path.parent / 'stereodemo_calibration.json'
113 |             if not calibration_path.exists():
114 |                 print (f"Warning: no calibration file found {calibration_path}. Using default calibration, the point cloud won't be accurate.")
115 |                 calibration_path = None
116 |         else:
117 |             calibration_path = self.user_provided_calibration_path
118 |         if calibration_path:
119 |             calib = visualizer.Calibration.from_json (open(calibration_path, 'r').read())
120 |         else:
121 |             # Fake reasonable calibration.
122 |             calib = visualizer.Calibration(left_image.shape[1],
123 |                                            left_image.shape[0],
124 |                                            left_image.shape[0]*0.8,
125 |                                            left_image.shape[0]*0.8,
126 |                                            left_image.shape[1]/2.0, # cx0
127 |                                            left_image.shape[1]/2.0, # cx1
128 |                                            left_image.shape[0]/2.0,
129 |                                            0.075)
130 |             
131 |         right_image_path = self.right_images_path[self.index]
132 |         status = f"{left_image_path} / {right_image_path}"
133 |         return visualizer.InputPair (left_image, load_image(right_image_path), calib, status)
134 | 
135 |     def get_pair_list(self) -> List[str]:
136 |         return [str(f) for f in self.left_images_path]
137 | 
138 |     def get_next_pair(self):        
139 |         self.index = (self.index + 1) % self.num_pairs
140 |         return self.get_pair_at_index(self.index)
141 | 
142 | def main():
143 |     args = parse_args()
144 | 
145 |     try:
146 |         args.models_path.mkdir(parents=True, exist_ok=True)
147 |     except Exception as e:
148 |         sys.stderr.write (f"Warning: cannot use the default models path {args.models_path}: {e}\n")
149 |         sys.stderr.write ("A valid path is necessary to store the downloaded models.\n")
150 |         args.models_path = Path(tempfile.gettempdir()) / 'stereodemo_models'
151 |         sys.stderr.write (f"Going to use the temporary directory {args.models_path} instead, specify --model-paths to specify a custom persistent path instead.\n")
152 |         try:
153 |             args.models_path.mkdir(parents=True, exist_ok=True)
154 |         except:
155 |             sys.stderr.write ("Could not create a temporary directory to store the downloaded models.\n")
156 |             sys.stderr.write ("Aborting, you need to specify --models-path with a valid writable path.\n")
157 |             sys.exit (1)
158 |     print (f"INFO: will store downloaded models in {args.models_path}")
159 | 
160 |     config = methods.Config(args.models_path)
161 |     method_list = [
162 |         StereoBM(config),
163 |         StereoSGBM(config),
164 |         CREStereo(config),
165 |         RaftStereo(config),
166 |         HitnetStereo(config),
167 |         StereoTransformers(config),
168 |         ChangRealtimeStereo(config),
169 |         DistDepth(config)
170 |     ]
171 | 
172 |     if args.images:
173 |         source = FileListSource(args.images, args.calibration)
174 |     elif args.oak:
175 |         from .oakd_source import OakdSource, StereoFromOakInputSource
176 |         source = OakdSource(args.oak_output_folder)
177 |         method_list = [StereoFromOakInputSource(config)] + method_list
178 |     else:
179 |         datasets_path = Path(__file__).parent / 'datasets'
180 |         if not datasets_path.exists():
181 |             print (f"Tried but failed to find files in {datasets_path}")
182 |             print ("You need to specify --oak or provide images")
183 |             sys.exit (1)
184 |         source = FileListSource([datasets_path], args.calibration)
185 | 
186 |     method_dict = { method.name:method for method in method_list } 
187 | 
188 |     viz = visualizer.Visualizer(method_dict, source)
189 | 
190 |     while True:
191 |         start_time = time.time()
192 |         if not viz.update_once ():
193 |             break
194 |         cv2.waitKey (1)
195 |         elapsed = time.time() - start_time
196 |         time_to_sleep = 1/30.0 - elapsed
197 |         if time_to_sleep > 0:
198 |             time.sleep (time_to_sleep)
199 | 
200 | 
201 | 


--------------------------------------------------------------------------------
/stereodemo/method_raft_stereo.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import shutil
  3 | import time
  4 | from dataclasses import dataclass
  5 | import urllib.request
  6 | import gc
  7 | import tempfile
  8 | import sys
  9 | 
 10 | import torch
 11 | from torchvision import transforms
 12 | 
 13 | import cv2
 14 | import numpy as np
 15 | 
 16 | from .methods import Config, EnumParameter, StereoMethod, InputPair, StereoOutput
 17 | from . import utils
 18 | 
 19 | urls = {
 20 |     "raft-stereo-eth3d-cpu-128x160.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cpu-128x160.scripted.pt",
 21 |     "raft-stereo-eth3d-cpu-256x320.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cpu-256x320.scripted.pt",
 22 |     "raft-stereo-eth3d-cpu-480x640.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cpu-480x640.scripted.pt",
 23 |     "raft-stereo-eth3d-cpu-736x1280.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cpu-736x1280.scripted.pt",
 24 |     "raft-stereo-eth3d-cuda-128x160.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cuda-128x160.scripted.pt",
 25 |     "raft-stereo-eth3d-cuda-256x320.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cuda-256x320.scripted.pt",
 26 |     "raft-stereo-eth3d-cuda-480x640.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cuda-480x640.scripted.pt",
 27 |     "raft-stereo-eth3d-cuda-736x1280.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-eth3d-cuda-736x1280.scripted.pt",
 28 |     "raft-stereo-fast-cpu-128x160.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cpu-128x160.scripted.pt",
 29 |     "raft-stereo-fast-cpu-256x320.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cpu-256x320.scripted.pt",
 30 |     "raft-stereo-fast-cpu-480x640.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cpu-480x640.scripted.pt",
 31 |     "raft-stereo-fast-cpu-736x1280.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cpu-736x1280.scripted.pt",
 32 |     "raft-stereo-fast-cuda-128x160.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cuda-128x160.scripted.pt",
 33 |     "raft-stereo-fast-cuda-256x320.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cuda-256x320.scripted.pt",
 34 |     "raft-stereo-fast-cuda-480x640.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cuda-480x640.scripted.pt",
 35 |     "raft-stereo-fast-cuda-736x1280.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-fast-cuda-736x1280.scripted.pt",
 36 |     "raft-stereo-middlebury-cpu-128x160.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cpu-128x160.scripted.pt",
 37 |     "raft-stereo-middlebury-cpu-256x320.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cpu-256x320.scripted.pt",
 38 |     "raft-stereo-middlebury-cpu-480x640.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cpu-480x640.scripted.pt",
 39 |     "raft-stereo-middlebury-cpu-736x1280.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cpu-736x1280.scripted.pt",
 40 |     "raft-stereo-middlebury-cuda-128x160.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cuda-128x160.scripted.pt",
 41 |     "raft-stereo-middlebury-cuda-256x320.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cuda-256x320.scripted.pt",
 42 |     "raft-stereo-middlebury-cuda-480x640.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cuda-480x640.scripted.pt",
 43 |     "raft-stereo-middlebury-cuda-736x1280.scripted.pt": "https://github.com/nburrus/stereodemo/releases/download/v0.1-raft-stereo/raft-stereo-middlebury-cuda-736x1280.scripted.pt",
 44 | }
 45 | 
 46 | def clear_gpu_memory():
 47 |     gc.collect()
 48 |     torch.cuda.empty_cache()    
 49 | 
 50 | # https://github.com/princeton-vl/RAFT-Stereo
 51 | # I exported the pytorch implementation to torch script via tracing, with minor modifications of the source code.
 52 | # https://github.com/nburrus/RAFT-Stereo/commit/ebbb5a807227927ab4551274039e9bdd16a1b010
 53 | # Their fastest implementation was not imported.
 54 | class RaftStereo(StereoMethod):
 55 |     def __init__(self, config: Config):
 56 |         super().__init__("RAFT-Stereo (3DV 2021)",
 57 |                          "RAFT-Stereo: Multilevel Recurrent Field Transforms for Stereo Matching.",
 58 |                          {},
 59 |                          config)
 60 |         self.reset_defaults()
 61 | 
 62 |         self.net = None
 63 |         self._loaded_model_path = None
 64 | 
 65 |     def reset_defaults(self):
 66 |         self.parameters.update ({
 67 |             "Shape": EnumParameter("Processed image size", 2, ["160x128", "320x256", "640x480", "1280x736"]),
 68 |             # "Model": EnumParameter("Pre-trained Model", 1, ["eth3d-cuda", "eth3d-cpu", "fast-cuda", "fast-cpu", "middlebury-cuda"])
 69 |             # The eth3d and fast cuda models required --corr_implementation alt to work once loaded via torchscript.
 70 |             # The supposedly faster "reg" is not working with a torch/cuda segfault, not sure why.
 71 |             "Model": EnumParameter("Pre-trained Model", 0, ["fast-cpu", "middlebury-cpu", "eth3d-cpu", "fast-cuda", "middlebury-cuda", "eth3d-cuda"])
 72 |         })
 73 | 
 74 |     def compute_disparity(self, input: InputPair) -> StereoOutput:
 75 |         stereo_output = self._compute_disparity (input)
 76 |         clear_gpu_memory ()
 77 |         return stereo_output
 78 | 
 79 |     def _compute_disparity(self, input: InputPair) -> StereoOutput:
 80 |         cols, rows = self.parameters["Shape"].value.split('x')
 81 |         cols, rows = int(cols), int(rows)
 82 |         self.target_size = (cols, rows)
 83 | 
 84 |         variant = self.parameters["Model"].value
 85 |         
 86 |         model_path = self.config.models_path / f'raft-stereo-{variant}-{rows}x{cols}.scripted.pt'
 87 |         self._load_model (model_path)
 88 | 
 89 |         left_tensor = self._preprocess_input(input.left_image)
 90 |         right_tensor = self._preprocess_input(input.right_image)
 91 | 
 92 |         device = torch.device('cuda') if 'cuda' in variant else 'cpu'
 93 |         net = self.net.to(device)
 94 |         left_tensor = left_tensor.to(device)
 95 |         right_tensor = right_tensor.to(device)
 96 | 
 97 |         start = time.time()
 98 |         with torch.no_grad():
 99 |             outputs = self.net(left_tensor, right_tensor)
100 |         elapsed_time = time.time() - start
101 | 
102 |         disparity_map = self._process_output(outputs)
103 |         if disparity_map.shape[:2] != input.left_image.shape[:2]:
104 |             disparity_map = cv2.resize (disparity_map, (input.left_image.shape[1], input.left_image.shape[0]), cv2.INTER_NEAREST)
105 |             x_scale = input.left_image.shape[1] / float(cols)
106 |             disparity_map *= np.float32(x_scale)
107 | 
108 |         return StereoOutput(disparity_map, input.left_image, elapsed_time)
109 | 
110 |     def _preprocess_input (self, img: np.ndarray):
111 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
112 |         img = cv2.resize(img, self.target_size, cv2.INTER_AREA)
113 |         # -> C,H,W
114 |         # Normalization done in the model itself.
115 |         return torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0).float()
116 | 
117 |     def _process_output(self, outputs):
118 |         disparity_map = outputs[1][0].detach().cpu().squeeze(0).squeeze(0).numpy() * -1.0
119 |         return disparity_map
120 | 
121 |     def _load_model(self, model_path: Path):
122 |         # FIXME: always reload the model, for some reason
123 |         # feeding multiple images to the same model freezes
124 |         # with CUDA. Maybe due to multi-threading?
125 |         # if (self._loaded_model_path == model_path):
126 |         #     return
127 |         
128 |         if not model_path.exists():
129 |             utils.download_model (urls[model_path.name], model_path)
130 | 
131 |         assert Path(model_path).exists()
132 |         self._loaded_model_path = model_path
133 |         self.net = torch.jit.load(model_path)
134 |         self.net.eval ()
135 | 


--------------------------------------------------------------------------------
/tools/chang_realtimestereo_to_torchscript_onnx.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import torch
  4 | from torch import Tensor
  5 | import torch.onnx
  6 | 
  7 | import torch.nn.functional as F
  8 | 
  9 | import numpy as np
 10 | 
 11 | from pathlib import Path
 12 | import sys
 13 | 
 14 | import cv2
 15 | 
 16 | from torchvision import transforms
 17 | 
 18 | from .visualizer import imshow
 19 | 
 20 | rtstereo_dir = sys.argv[1]
 21 | rtstereo_model = sys.argv[2]
 22 | 
 23 | sys.path.insert(0, rtstereo_dir)
 24 | from models import RTStereoNet
 25 | 
 26 | def b2mb(x): return (x/2**20)
 27 | 
 28 | class StereodemoPerformanceMonitor:
 29 |     def __init__(self, name, load_model, do_inference, is_gpu: bool):
 30 |         self.load_model = load_model
 31 |         self.do_inference = do_inference
 32 |         self.is_gpu = is_gpu
 33 |         self.name = name
 34 | 
 35 |     def run (self):
 36 |         import time
 37 |         model = self.load_model ()
 38 |         timings = []
 39 |         for i in range (0, 5):
 40 |             tstart = time.time ()
 41 |             self.do_inference (model)
 42 |             tend = time.time ()
 43 |             dt = tend - tstart
 44 |             timings.append (dt)
 45 |             print (f'{dt=}')
 46 |         print (f'{self.name}: timings {timings}')
 47 |         
 48 |         if self.is_gpu:
 49 |             import gc
 50 |             peak_memory_inference_mb = []
 51 |             for i in range (0, 5):
 52 |                 gc.collect ()
 53 |                 torch.cuda.empty_cache()
 54 |                 torch.cuda.reset_max_memory_allocated() # reset the peak gauge to zero
 55 |                 model = self.load_model ()
 56 |                 peak_after_load = torch.cuda.max_memory_allocated()
 57 |                 self.do_inference (model)
 58 |                 peak_after_inference = torch.cuda.max_memory_allocated()
 59 |                 print (f'{peak_after_load=}', peak_after_load)
 60 |                 print (f'{peak_after_inference=}', peak_after_inference)
 61 |                 peak_memory_inference_mb.append (b2mb(peak_after_inference))
 62 |             print (f'{self.name}: peak memory (MB) {peak_memory_inference_mb}')
 63 | 
 64 | def save_torchscript(net, output_file, device):
 65 |     scripted_module = torch.jit.script(net)
 66 |     # net = net.to(device)
 67 |     # sample_input = (torch.zeros(1,3,256,256).to(device), torch.zeros(1,3,256,256).to(device))
 68 |     # scripted_module = torch.jit.trace(net, sample_input)
 69 |     torch.jit.save(scripted_module, output_file)
 70 |     return scripted_module
 71 | 
 72 | def save_onnx(net, output_file):
 73 |     torch.onnx.export(net,                   # model being run
 74 |                   sample_input,              # model input (or a tuple for multiple inputs)
 75 |                   output_file,               # where to save the model (can be a file or file-like object)
 76 |                   export_params=True,        # store the trained parameter weights inside the model file
 77 |                   opset_version=11,          # the ONNX version to export the model to
 78 |                   do_constant_folding=True,  # whether to execute constant folding for optimization
 79 |                   input_names = ['left', 'right'],   # the model's input names
 80 |                   output_names = ['disparity'], # the model's output names
 81 |                   dynamic_axes={'left' : {0 : 'batch_size', 2 : 'width', 3 : 'height' },  # variable length axes, except channels
 82 |                                 'right' : {0 : 'batch_size', 2 : 'width', 3 : 'height' },
 83 |                                 'output' : {0 : 'batch_size', 2 : 'width', 3 : 'height'}})
 84 | 
 85 | def show_color_disparity (name: str, disparity_map: np.ndarray):
 86 |     min_disp = 0
 87 |     max_disp = 64
 88 |     norm_disparity_map = 255*((disparity_map-min_disp) / (max_disp-min_disp))
 89 |     disparity_color = cv2.applyColorMap(cv2.convertScaleAbs(norm_disparity_map, 1), cv2.COLORMAP_MAGMA)
 90 |     imshow (name, disparity_color)
 91 | 
 92 | def export_models ():
 93 |     checkpoint_file = rtstereo_model
 94 |     net = RTStereoNet(maxdisp=192, device='cpu')
 95 |     checkpoint = torch.load(checkpoint_file)
 96 |     net.load_state_dict(checkpoint['state_dict'])
 97 |     net.eval()
 98 | 
 99 |     # Hacky way to check the original model and make sure the export
100 |     # is not screwing up the results.
101 |     if False:
102 |         # left = cv2.imread("datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-14-50-10-570_left.jpg", cv2.IMREAD_COLOR)
103 |         # right = cv2.imread("datasets/drivingstereo/2018-07-11-14-48-52/2018-07-11-14-48-52_2018-07-11-14-50-10-570_right.jpg", cv2.IMREAD_COLOR)
104 | 
105 |         # left = cv2.imread("datasets/eth3d_lowres/forest_2s/im0.png", cv2.IMREAD_COLOR)
106 |         # right = cv2.imread("datasets/eth3d_lowres/forest_2s/im1.png", cv2.IMREAD_COLOR)
107 | 
108 |         # left = cv2.imread("datasets/eth3d_lowres/playground_3l/im0.png", cv2.IMREAD_COLOR)
109 |         # right = cv2.imread("datasets/eth3d_lowres/playground_3l/im1.png", cv2.IMREAD_COLOR)
110 | 
111 |         left = cv2.imread("datasets/sceneflow/driving_left.png", cv2.IMREAD_COLOR)
112 |         right = cv2.imread("datasets/sceneflow/driving_right.png", cv2.IMREAD_COLOR)
113 | 
114 |         # left = cv2.resize (left, (1280,720), cv2.INTER_AREA)
115 |         # right = cv2.resize (right, (1280,720), cv2.INTER_AREA)
116 |         
117 |         imagenet_stats = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]}
118 |         img_to_tensor_transforms = transforms.Compose([
119 |             transforms.ToTensor(),
120 |             transforms.Normalize(**imagenet_stats),
121 |         ])
122 | 
123 |         left = img_to_tensor_transforms(left)
124 |         right = img_to_tensor_transforms(right)
125 | 
126 |         # pad to width and hight to 16 times
127 |         if left.shape[1] % 16 != 0:
128 |             times = left.shape[1]//16       
129 |             top_pad = (times+1)*16 -left.shape[1]
130 |         else:
131 |             top_pad = 0
132 | 
133 |         if left.shape[2] % 16 != 0:
134 |             times = left.shape[2]//16                       
135 |             right_pad = (times+1)*16-left.shape[2]
136 |         else:
137 |             right_pad = 0    
138 | 
139 |         left = F.pad(left,(0,right_pad, top_pad,0)).unsqueeze(0)
140 |         right = F.pad(right,(0,right_pad, top_pad,0)).unsqueeze(0)
141 | 
142 |         output = net (left, right)
143 |         output = output[0].detach().numpy().transpose(1,2,0)
144 |         show_color_disparity ("disparity", output)
145 |         cv2.waitKey(0)
146 | 
147 |     # save_torchscript(net, "chang-realtime-stereo.scripted.pt", torch.device('cpu'))
148 |     # save_torchscript(net, "chang-realtime-stereo-gpu.scripted.pt", torch.device('cuda'))
149 |     
150 |     # Only tracing worked without substantial changes to the codebase.
151 |     device = torch.device('cpu')
152 |     for w,h in [(1280, 720), (640,480), (320,240), (160,128)]:
153 |         sample_input = (torch.zeros(1,3,h,w).to(device), torch.zeros(1,3,h,w).to(device))
154 |         with torch.no_grad():
155 |             scripted_module = torch.jit.trace(net, sample_input)
156 |             torch.jit.save(scripted_module, f"chang-realtime-stereo-cpu-{w}x{h}.scripted.pt")
157 | 
158 |         # Need opset16 for grid sampling, currently needs pytorch nightly to do it (1.11 won't).
159 |         # However the exported onnx fails to run:
160 |         # [ONNXRuntimeError] : 1 : FAIL : Load model from chang-realtime-stereo-cpu-320x240.onnx
161 |         #  failed:Type Error: Type parameter (T) of Optype (Mul) bound to different types
162 |         #  (tensor(float) and tensor(int64) in node (Mul_1675).
163 |         if False:
164 |             torch.onnx.export(scripted_module,                 # model being run
165 |                         sample_input,              # model input (or a tuple for multiple inputs)
166 |                         f"chang-realtime-stereo-cpu-{w}x{h}.onnx", # where to save the model (can be a file or file-like object)
167 |                         export_params=True,        # store the trained parameter weights inside the model file
168 |                         opset_version=16,          # the ONNX version to export the model to
169 |                         do_constant_folding=True,  # whether to execute constant folding for optimization
170 |                         input_names = ['left', 'right'],   # the model's input names
171 |                         output_names = ['disparity']) # the model's output names
172 |                         # No dynamic axes with tracing :-(
173 |                         # dynamic_axes={'left' : {0 : 'batch_size', 2 : 'width', 3 : 'height' },  # variable length axes, except channels
174 |                         #             'right' : {0 : 'batch_size', 2 : 'width', 3 : 'height' },
175 |                         #             'output' : {0 : 'batch_size', 2 : 'width', 3 : 'height'}})
176 |     # save_onnx(scripted_module, "chang-realtime-stereo-cpu.onnx")
177 | 
178 | def benchmark_model(name, size, device):
179 |     w, h = size
180 |     is_gpu = (device == 'cuda')
181 | 
182 |     def load_model ():
183 |         checkpoint_file = rtstereo_model
184 |         net = RTStereoNet(maxdisp=192, device=device)
185 |         checkpoint = torch.load(checkpoint_file)
186 |         net.load_state_dict(checkpoint['state_dict'])
187 |         net.eval()
188 |         net = net.to (device)
189 |         return net
190 | 
191 |     def do_inference (model):
192 |         with torch.no_grad():
193 |             sample_input = (torch.zeros(1, 3, h, w).to(device), torch.zeros(1, 3, h, w).to(device))
194 |             outputs = model (*sample_input)
195 |             print (type(outputs))
196 |     
197 |     monitor = StereodemoPerformanceMonitor(f'{name}_{device}_{w}x{h}', load_model, do_inference, is_gpu)
198 |     monitor.run ()
199 |   
200 | 
201 | if __name__ == "__main__":
202 |     export_models ()
203 |     
204 |     # torch.set_num_threads(1)
205 |     # for s in [(320,240), (640,480), (1280,720)]:
206 |     #     benchmark_model ('chang', s, 'cpu')
207 | 
208 |     


--------------------------------------------------------------------------------
/stereodemo/method_cre_stereo.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import time
  3 | from dataclasses import dataclass
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | import onnxruntime
  8 | 
  9 | from .methods import Config, EnumParameter, StereoMethod, InputPair, StereoOutput
 10 | from . import utils
 11 | 
 12 | urls = {
 13 |     "crestereo_combined_iter10_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter10_240x320.onnx",
 14 |     "crestereo_combined_iter10_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter10_480x640.onnx",
 15 |     "crestereo_combined_iter10_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter10_720x1280.onnx",
 16 |     "crestereo_combined_iter20_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter20_240x320.onnx",
 17 |     "crestereo_combined_iter20_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter20_480x640.onnx",
 18 |     "crestereo_combined_iter20_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter20_720x1280.onnx",
 19 |     "crestereo_combined_iter2_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter2_240x320.onnx",
 20 |     "crestereo_combined_iter2_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter2_480x640.onnx",
 21 |     "crestereo_combined_iter2_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter2_720x1280.onnx",
 22 |     "crestereo_combined_iter5_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter5_240x320.onnx",
 23 |     "crestereo_combined_iter5_380x480.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter5_380x480.onnx",
 24 |     "crestereo_combined_iter5_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter5_480x640.onnx",
 25 |     "crestereo_combined_iter5_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_combined_iter5_720x1280.onnx",
 26 |     "crestereo_init_iter10_180x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter10_180x320.onnx",
 27 |     "crestereo_init_iter10_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter10_240x320.onnx",
 28 |     "crestereo_init_iter10_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter10_480x640.onnx",
 29 |     "crestereo_init_iter10_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter10_720x1280.onnx",
 30 |     "crestereo_init_iter20_180x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter20_180x320.onnx",
 31 |     "crestereo_init_iter20_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter20_240x320.onnx",
 32 |     "crestereo_init_iter20_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter20_480x640.onnx",
 33 |     "crestereo_init_iter20_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter20_720x1280.onnx",
 34 |     "crestereo_init_iter2_180x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter2_180x320.onnx",
 35 |     "crestereo_init_iter2_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter2_240x320.onnx",
 36 |     "crestereo_init_iter2_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter2_480x640.onnx",
 37 |     "crestereo_init_iter2_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter2_720x1280.onnx",
 38 |     "crestereo_init_iter5_180x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter5_180x320.onnx",
 39 |     "crestereo_init_iter5_240x320.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter5_240x320.onnx",
 40 |     "crestereo_init_iter5_480x640.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter5_480x640.onnx",
 41 |     "crestereo_init_iter5_720x1280.onnx": "https://github.com/nburrus/stereodemo/releases/download/v0.1-crestereo/crestereo_init_iter5_720x1280.onnx"
 42 | }
 43 | 
 44 | # Adapted from https://github.com/ibaiGorordo/ONNX-CREStereo-Depth-Estimation
 45 | # https://github.com/PINTO0309/PINTO_model_zoo/tree/main/284_CREStereo
 46 | class CREStereo(StereoMethod):
 47 |     def __init__(self, config: Config):
 48 |         super().__init__("CRE Stereo (CVPR 2022)", 
 49 |                          "Practical Stereo Matching via Cascaded Recurrent Network with Adaptive Correlation. Pre-trained on a large range of datasets.", 
 50 |                          {},
 51 |                          config)
 52 |         self.reset_defaults()
 53 | 
 54 |         self._loaded_session = None
 55 |         self._loaded_model_path = None
 56 | 
 57 |     def reset_defaults(self):
 58 |         self.parameters.update ({
 59 |             "Iterations": EnumParameter("Number of iterations", 1, ["2", "5", "10", "20"]),
 60 |             "Mode": EnumParameter("Number of passes. The combined version does 2 passes, one to get an initial estimation and a second one to refine it.",
 61 |                                   1, ["init", "combined"]),
 62 |             "Shape": EnumParameter("Processed image size", 1, ["320x240", "640x480", "1280x720"])            
 63 |         })
 64 | 
 65 |     def compute_disparity(self, input: InputPair) -> StereoOutput:
 66 |         left_image, right_image = input.left_image, input.right_image
 67 |         cols, rows = self.parameters["Shape"].value.split('x')
 68 |         version = self.parameters["Mode"].value
 69 |         iters = self.parameters["Iterations"].value
 70 |         model_path = self.config.models_path / f'crestereo_{version}_iter{iters}_{rows}x{cols}.onnx'
 71 |         self._load_model (model_path)
 72 | 
 73 |         left_tensor = self._prepare_input(left_image)
 74 |         right_tensor = self._prepare_input(right_image)
 75 | 
 76 |         # Get the half resolution to calculate flow_init
 77 |         if self._has_flow:
 78 |             left_tensor_half = self._prepare_input(left_image, half=True)
 79 |             right_tensor_half = self._prepare_input(right_image, half=True)
 80 |             start = time.time()
 81 |             outputs = self._inference_with_flow(left_tensor_half,
 82 |                                                right_tensor_half,
 83 |                                                left_tensor,
 84 |                                                right_tensor)
 85 |         else:
 86 |             # Estimate the disparity map
 87 |             start = time.time()
 88 |             outputs = self._inference_without_flow(left_tensor, right_tensor)
 89 | 
 90 |         elapsed_time = time.time() - start
 91 |         
 92 |         disparity_map = self.process_output(outputs)
 93 | 
 94 |         if disparity_map.shape[:2] != left_image.shape[:2]:
 95 |             disparity_map = cv2.resize (disparity_map, (left_image.shape[1], left_image.shape[0]), cv2.INTER_NEAREST)
 96 |             x_scale = left_image.shape[1] / float(cols)
 97 |             disparity_map *= np.float32(x_scale)        
 98 |         return StereoOutput(disparity_map, input.left_image, elapsed_time)
 99 | 
100 |     def _download_model (self, model_path: Path):
101 |         utils.download_model (urls[model_path.name], model_path)
102 | 
103 |     def _load_model(self, model_path: Path):
104 |         if (self._loaded_model_path == model_path):
105 |             return
106 |         
107 |         if not model_path.exists():
108 |             self._download_model (model_path)
109 | 
110 |         # To try with just one CPU core.
111 |         # opts = onnxruntime.SessionOptions()
112 |         # opts.intra_op_num_threads = 1
113 |         # opts.inter_op_num_threads = 1
114 |         # opts.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
115 |         assert Path(model_path).exists()
116 |         self._loaded_model_path = model_path
117 |         self._loaded_session = onnxruntime.InferenceSession(str(model_path), providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
118 |         
119 |         # Get model info
120 |         self.load_input_details()
121 |         self.load_output_details()
122 | 
123 |         # Check if the model has init flow
124 |         self._has_flow = len(self.input_names) > 2
125 | 
126 |     def _prepare_input(self, img, half=False):
127 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
128 |         if half:
129 |             img_input = cv2.resize(
130 |                 img, (self.input_width//2, self.input_height//2), cv2.INTER_AREA)
131 |         else:
132 |             img_input = cv2.resize(
133 |                 img, (self.input_width, self.input_height), cv2.INTER_AREA)
134 |         img_input = img_input.transpose(2, 0, 1)
135 |         img_input = img_input[np.newaxis, :, :, :]
136 |         return img_input.astype(np.float32)
137 | 
138 |     def _inference_without_flow(self, left_tensor, right_tensor):
139 | 
140 |         return self._loaded_session.run(self.output_names, {self.input_names[0]: left_tensor,
141 |                                                             self.input_names[1]: right_tensor})[0]
142 | 
143 |     def _inference_with_flow(self, left_tensor_half, right_tensor_half, left_tensor, right_tensor):
144 | 
145 |         return self._loaded_session.run(self.output_names, {self.input_names[0]: left_tensor_half,
146 |                                                             self.input_names[1]: right_tensor_half,
147 |                                                             self.input_names[2]: left_tensor,
148 |                                                             self.input_names[3]: right_tensor})[0]
149 | 
150 |     def process_output(self, output):
151 |         return np.squeeze(output[:, 0, :, :])
152 | 
153 |     def load_input_details(self):
154 |         model_inputs = self._loaded_session.get_inputs()
155 |         self.input_names = [
156 |             model_inputs[i].name for i in range(len(model_inputs))]
157 | 
158 |         self.input_shape = model_inputs[-1].shape
159 |         self.input_height = self.input_shape[2]
160 |         self.input_width = self.input_shape[3]
161 | 
162 |     def load_output_details(self):
163 |         model_outputs = self._loaded_session.get_outputs()
164 |         self.output_names = [
165 |             model_outputs[i].name for i in range(len(model_outputs))]
166 | 
167 |         self.output_shape = model_outputs[0].shape
168 | 


--------------------------------------------------------------------------------
/stereodemo/visualizer.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import time
  3 | from typing import Dict, List, Optional, Tuple
  4 | from dataclasses import dataclass
  5 | from abc import abstractmethod
  6 | from concurrent.futures import ThreadPoolExecutor
  7 | 
  8 | import numpy as np
  9 | import cv2
 10 | 
 11 | import open3d as o3d
 12 | import open3d.visualization.gui as gui
 13 | import open3d.visualization.rendering as rendering
 14 | 
 15 | from .methods import IntParameter, EnumParameter, StereoOutput, StereoMethod, Calibration, InputPair
 16 |    
 17 | disparity_window = None
 18 | 
 19 | class ImageWindow:
 20 |     def __init__(self, name: str, size: Tuple[int, int]):
 21 |         self.name = name
 22 |         self.window = gui.Application.instance.create_window(name, size[0], size[1])
 23 |         self.image_widget = gui.ImageWidget()
 24 |         self.window.add_child(self.image_widget)
 25 | 
 26 |     def update_image(self, image: np.ndarray):
 27 |         image_geom = o3d.geometry.Image(image)
 28 |         self.image_widget.update_image(image_geom)
 29 |         self.window.post_redraw()
 30 | 
 31 | class ImageWindowsManager:
 32 |     def __init__(self):
 33 |         self.windows_by_name = {}
 34 | 
 35 |     def imshow(self, name: str, image: np.ndarray, window_title: Optional[str], max_size: int):
 36 |         if name not in self.windows_by_name:
 37 |             rows, cols, _ = image.shape
 38 |             if cols > rows:
 39 |                 initial_size = max_size, int(max_size * rows / cols)            
 40 |             else:
 41 |                 initial_size = int(max_size * cols / rows), max_size
 42 |             self.windows_by_name[name] = ImageWindow(name, initial_size)
 43 |         self.windows_by_name[name].update_image(image)
 44 |         if window_title is not None:
 45 |             self.windows_by_name[name].title = window_title
 46 | 
 47 | image_windows_manager = ImageWindowsManager()
 48 | 
 49 | def imshow (name: str, image: np.ndarray, window_title=None, max_size=640):
 50 |     global image_windows_manager
 51 |     if image_windows_manager is None:
 52 |         image_windows_manager = ImageWindowsManager()
 53 |     image_windows_manager.imshow(name, image, window_title, max_size)
 54 | 
 55 | def color_disparity (disparity_map: np.ndarray, calibration: Calibration):
 56 |     min_disp = (calibration.fx * calibration.baseline_meters) / calibration.depth_range[1]
 57 |     # disparity_pixels = (calibration.fx * calibration.baseline_meters) / depth_meters
 58 |     max_disp = (calibration.fx * calibration.baseline_meters) / calibration.depth_range[0]
 59 |     norm_disparity_map = 255*((disparity_map-min_disp) / (max_disp-min_disp))
 60 |     disparity_color = cv2.applyColorMap(cv2.convertScaleAbs(norm_disparity_map, 1), cv2.COLORMAP_VIRIDIS)
 61 |     return disparity_color
 62 | 
 63 | def show_color_disparity (name: str, color_disparity: np.ndarray):
 64 |     imshow ("StereoDemo - Disparity", color_disparity, name)
 65 | 
 66 | class Settings:
 67 |     def __init__(self):
 68 |         self.show_axes = False
 69 | 
 70 | class Source:
 71 |     def __init__(self):
 72 |         pass
 73 | 
 74 |     @abstractmethod
 75 |     def is_live(self) -> bool:
 76 |         """Whether the source is capture live images or not"""
 77 |         return False
 78 | 
 79 |     def selected_index (self) -> int:
 80 |         return 0
 81 | 
 82 |     @abstractmethod
 83 |     def get_next_pair(self) -> InputPair:
 84 |         return InputPair(None, None, None, None)
 85 | 
 86 |     def get_pair_at_index(self, idx: int) -> InputPair:
 87 |         return InputPair(None, None, None, None)
 88 | 
 89 |     def get_pair_list(self) -> List[str]:
 90 |         return []
 91 | 
 92 | class Visualizer:
 93 |     def __init__(self, stereo_methods: Dict[str, StereoMethod], source: Source):
 94 |         gui.Application.instance.initialize()
 95 | 
 96 |         self.vis = gui.Application.instance
 97 |         self.source = source
 98 | 
 99 |         self.executor = ThreadPoolExecutor(max_workers=1)
100 |         self.executor_future = None
101 |         self._progress_dialog = None
102 |         self._last_progress_update_time = None
103 | 
104 |         self.stereo_methods = stereo_methods
105 |         self.stereo_methods_output = {}
106 |         self.input = InputPair (None, None, None, None)
107 |         self._downsample_factor = 0
108 | 
109 |         self.window = gui.Application.instance.create_window("StereoDemo", 1280, 1024)
110 |         w = self.window  # to make the code more concise
111 | 
112 |         self.settings = Settings()
113 | 
114 |         # 3D widget
115 |         self._scene = gui.SceneWidget()        
116 |         self._scene.scene = rendering.Open3DScene(w.renderer)
117 |         # self._scene.scene.show_ground_plane(True, rendering.Scene.GroundPlane.XZ)
118 |         self._scene.set_view_controls(gui.SceneWidget.Controls.ROTATE_CAMERA)
119 |         self._scene.set_on_key(self._on_key_pressed)
120 | 
121 |         self._clear_outputs ()
122 | 
123 |         for name, o in self.stereo_methods_output.items():
124 |             if o.point_cloud is not None:
125 |                 self._scene.scene.add_geometry(name, o.point_cloud, rendering.MaterialRecord())
126 | 
127 |         self._reset_camera()
128 | 
129 |         em = w.theme.font_size
130 |         self.separation_height = int(round(0.5 * em))
131 |         self._settings_panel = gui.Vert(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em))
132 | 
133 |         self._next_image_button = gui.Button("Next Image")
134 |         self._next_image_button.set_on_clicked(self._next_image_clicked)
135 |         self._settings_panel.add_child(self._next_image_button)
136 |         if not self.source.is_live():
137 |             # self._next_image_button = gui.Button("Next")
138 |             # self._next_image_button.set_on_clicked(self._next_image_clicked)
139 |             # horiz.add_child(self._next_image_button)
140 | 
141 |             # self.images_combo = gui.ListView()
142 |             # input_pairs = self.source.get_pair_list()
143 |             # self.images_combo.set_items(input_pairs)
144 |             # self.images_combo.selected_index = 0
145 |             # self.images_combo.set_max_visible_items(3)
146 |             # self.images_combo.set_on_selection_changed(self._image_selected)
147 |             # self.images_combo.tooltip = self.images_combo.selected_value
148 |             # self._settings_panel.add_child(self.images_combo)
149 |             # self._settings_panel.add_fixed(self.separation_height)
150 |             # horiz.add_child(self.images_combo)
151 | 
152 |             self._settings_panel.add_fixed(self.separation_height)
153 |             self.images_combo = gui.Combobox()
154 |             input_pairs = self.source.get_pair_list()
155 |             for pair_name in input_pairs:
156 |                 self.images_combo.add_item(pair_name)
157 |             self.images_combo.selected_index = 0
158 |             self.images_combo.set_on_selection_changed(self._image_selected)
159 |             self._settings_panel.add_child(self.images_combo)
160 |             self._settings_panel.add_fixed(self.separation_height)
161 |         else:
162 |             self.images_combo = None
163 |         
164 |         horiz = gui.Horiz(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em))
165 |         label = gui.Label("Input downsampling")
166 |         label.tooltip = "Number of /2 downsampling steps to apply on the input"
167 |         horiz.add_child(label)
168 |         downsampling_slider = gui.Slider(gui.Slider.INT)
169 |         downsampling_slider.set_limits(0, 4)
170 |         downsampling_slider.int_value = self._downsample_factor
171 |         downsampling_slider.set_on_value_changed(self._downsampling_changed)
172 |         horiz.add_child(downsampling_slider)
173 |         self._settings_panel.add_child(horiz)
174 | 
175 |         self._settings_panel.add_fixed(self.separation_height)
176 | 
177 |         self.algo_list = gui.ListView()
178 |         self.algo_list.set_items(list(stereo_methods.keys()))
179 |         self.algo_list.selected_index = 0
180 |         self.algo_list.set_max_visible_items(8)
181 |         self.algo_list.set_on_selection_changed(self._on_algo_list_selected)
182 |         self._settings_panel.add_child(self.algo_list)
183 | 
184 |         self.method_params_proxy = gui.WidgetProxy()
185 |         self._settings_panel.add_child (self.method_params_proxy)
186 | 
187 |         self.last_runtime = gui.Label("")
188 |         self._settings_panel.add_child (self.last_runtime)
189 | 
190 |         self.input_status = gui.Label("No input.")
191 |         self._settings_panel.add_child (self.input_status)
192 | 
193 |         view_ctrls = gui.CollapsableVert("View controls", 0.25 * em, gui.Margins(em, 0, 0, 0))
194 |         reset_cam_button = gui.Button("Reset Camera")
195 |         reset_cam_button.set_on_clicked(self._reset_camera)
196 |         view_ctrls.add_child(reset_cam_button)
197 |         # self._show_axes = gui.Checkbox("Show axes")
198 |         # self._show_axes.set_on_checked(self._on_show_axes)
199 |         # view_ctrls.add_child(self._show_axes)
200 |         
201 |         horiz = gui.Horiz(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em))
202 |         label = gui.Label("Max depth (m)")
203 |         label.tooltip = "Max depth to render in meters"
204 |         horiz.add_child(label)
205 |         self.depth_range_slider = gui.Slider(gui.Slider.DOUBLE)
206 |         self.depth_range_slider.set_limits(0.5, 1000)
207 |         self.depth_range_slider.double_value = 100
208 |         self.depth_range_slider.set_on_value_changed(self._depth_range_slider_changed)
209 |         horiz.add_child(self.depth_range_slider)
210 |         view_ctrls.add_child(horiz)
211 | 
212 |         self._depth_range_manually_changed = False
213 |         
214 |         self._settings_panel.add_fixed(self.separation_height)
215 |         self._settings_panel.add_child(view_ctrls)
216 | 
217 |         w.set_on_layout(self._on_layout)
218 |         w.add_child(self._scene)
219 |         w.add_child(self._settings_panel)
220 |         
221 |         self._on_algo_list_selected(self.algo_list.selected_value, False)
222 |         self._apply_settings()
223 | 
224 |         if self.source.is_live():
225 |             self.read_next_pair ()
226 |         else:
227 |             self._image_selected (None, None)
228 | 
229 |     def _on_key_pressed (self, keyEvent):
230 |         if keyEvent.key == gui.KeyName.Q:
231 |             self.vis.quit()
232 |             return gui.SceneWidget.EventCallbackResult.HANDLED
233 |         return gui.SceneWidget.EventCallbackResult.IGNORED
234 |     
235 |     def _downsampling_changed(self, v):
236 |         self._downsample_factor = int(v)
237 |         self._process_input (self.full_res_input)
238 | 
239 |     def _downsample_input (self, input: InputPair):
240 |         for i in range(0, self._downsample_factor):
241 |             if np.max(input.left_image.shape[:2]) < 250:
242 |                 break
243 |             input.left_image = cv2.pyrDown(input.left_image)
244 |             input.right_image = cv2.pyrDown(input.right_image)
245 |             if input.input_disparity is not None:
246 |                 input.input_disparity = cv2.pyrDown(input.input_disparity)
247 |             input.calibration.downsample(input.left_image.shape[1], input.left_image.shape[0])
248 | 
249 |     def read_next_pair (self):
250 |         input = self.source.get_next_pair ()
251 |         self._update_pair_index ()
252 |         self._process_input (input)
253 | 
254 |     def _process_input (self, input):
255 |         if self._downsample_factor > 0:
256 |             self.full_res_input = input
257 |             input = copy.deepcopy(input)
258 |             self._downsample_input (input)
259 |         else:
260 |             self.full_res_input = input
261 | 
262 |         if not self._depth_range_manually_changed:
263 |             self.depth_range_slider.double_value = input.calibration.depth_range[1]
264 | 
265 |         imshow ("StereoDemo - Input image", np.hstack([input.left_image, input.right_image]))
266 |         
267 |         self.input = input
268 |         self.input_status.text = f"Input: {input.left_image.shape[1]}x{input.left_image.shape[0]} " + input.status
269 | 
270 |         if self.input.has_data():
271 |             assert self.input.left_image.shape[1] == self.input.calibration.width and self.input.left_image.shape[0] == self.input.calibration.height
272 |             self.o3dCameraIntrinsic = o3d.camera.PinholeCameraIntrinsic(width=self.input.left_image.shape[1],
273 |                                                                         height=self.input.left_image.shape[0],
274 |                                                                         fx=self.input.calibration.fx,
275 |                                                                         fy=self.input.calibration.fy,
276 |                                                                         cx=self.input.calibration.cx0,
277 |                                                                         cy=self.input.calibration.cy)
278 | 
279 |             self._clear_outputs ()
280 |             self._run_current_method ()
281 | 
282 |     def update_once (self):
283 |         if self.executor_future is not None:
284 |             self._check_run_complete()
285 |         return gui.Application.instance.run_one_tick()
286 | 
287 |     def _clear_outputs (self):
288 |         for name in self.stereo_methods.keys():
289 |             self.stereo_methods_output[name] = StereoOutput(
290 |                 disparity_pixels=None,
291 |                 color_image_bgr=None,
292 |                 computation_time=np.nan)
293 |             if self._scene.scene.has_geometry(name):
294 |                 self._scene.scene.remove_geometry(name)
295 | 
296 |     def _reset_camera (self):
297 |         # bbox = o3d.geometry.AxisAlignedBoundingBox(np.array([-10, 0,-10]), np.array([0,3,0]))
298 |         bbox = self._scene.scene.bounding_box
299 |         min_bound, max_bound = bbox.min_bound.copy(), bbox.max_bound.copy()
300 |         min_bound[0] = min(min_bound[0], -5)
301 |         min_bound[2] = min(min_bound[2], -5)
302 |         max_bound[0] = max(max_bound[0],  5)
303 |         max_bound[1] = max(max_bound[1],  2)
304 |         max_bound[2] = 0
305 |         bbox.min_bound, bbox.max_bound = min_bound, max_bound
306 | 
307 |         self._scene.setup_camera(60.0, bbox, np.array([0,0,0]))
308 |         eye = np.array([0, 0.5,  1.0])
309 |         lookat = np.array([0, 0, -1.0])
310 |         up = np.array([0, 1.0, 0])
311 |         self._scene.look_at(lookat, eye, up)
312 | 
313 |         if self.input.has_data():
314 |             self._depth_range_manually_changed = False
315 |             self.depth_range_slider.double_value = self.input.calibration.depth_range[1]
316 |             self._update_rendering ()
317 | 
318 |     def _build_stereo_method_widgets(self, name):
319 |         em = self.window.theme.font_size
320 |         method = self.stereo_methods[name]
321 |         container = gui.Vert(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em))
322 |         label = gui.Label(method.description)
323 |         label.text_color = gui.Color(1.0, 0.5, 0.0)
324 |         container.add_child(label)
325 |         self._reload_settings_functions = []
326 |         for name, param in method.parameters.items():
327 |             if isinstance(param, IntParameter):
328 |                 horiz = gui.Horiz(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em))
329 |                 label = gui.Label(name)
330 |                 label.tooltip = param.description
331 |                 horiz.add_child(label)
332 |                 slider = gui.Slider(gui.Slider.INT)
333 |                 slider.set_limits(param.min, param.max)
334 |                 slider.int_value = param.value
335 |                 def set_value_from_method(slider=slider, method=method, name=name):
336 |                     slider.int_value = method.parameters[name].value
337 |                 self._reload_settings_functions.append(set_value_from_method)
338 |                 # workaround late binding
339 |                 # https://docs.python-guide.org/writing/gotchas/#:~:text=Python's%20closures%20are%20late%20binding,surrounding%20scope%20at%20call%20time.
340 |                 def callback(value, method=method, name=name, slider=slider):
341 |                     p = method.parameters[name]
342 |                     p.set_value(int(value))
343 |                     slider.int_value = p.value
344 |                 slider.set_on_value_changed(callback)
345 |                 horiz.add_child(slider)
346 |                 container.add_child(horiz)
347 |             elif isinstance(param, EnumParameter):
348 |                 horiz = gui.Horiz(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em))
349 |                 label = gui.Label(name)
350 |                 label.tooltip = param.description
351 |                 horiz.add_child(label)
352 |                 combo = gui.Combobox()
353 |                 for value in param.values:
354 |                     combo.add_item(value)
355 |                 combo.selected_index = param.index
356 |                 def callback(combo_idx, combo_val, method=method, name=name, combo=combo):
357 |                     method.parameters[name].set_index(combo.selected_index)
358 |                 combo.set_on_selection_changed(callback)
359 |                 def set_value_from_method(combo=combo, method=method, name=name):
360 |                     combo.selected_index = method.parameters[name].index
361 |                 self._reload_settings_functions.append(set_value_from_method)
362 |                 horiz.add_child(combo)
363 |                 container.add_child(horiz)
364 |             
365 |         horiz = gui.Horiz(0, gui.Margins(0.25 * em, 0.25 * em, 0.25 * em, 0.25 * em))        
366 |         apply_button = gui.Button("Apply")
367 |         apply_button.horizontal_padding_em = 3
368 |         apply_button.set_on_clicked(self._run_current_method)
369 |         horiz.add_child(apply_button)
370 |         horiz.add_fixed(self.separation_height)
371 |         reset_default = gui.Button("Reset defaults")            
372 |         reset_default.set_on_clicked(self._reset_method_defaults)
373 |         horiz.add_child(reset_default)
374 |         container.add_child(horiz)
375 |         return container
376 | 
377 |     def _on_algo_list_selected(self, name: str, is_dbl_click: bool):
378 |         self.method_params_proxy.set_widget(self._build_stereo_method_widgets(name))
379 |         self._update_runtime ()
380 |         for other_name in self.stereo_methods_output.keys():
381 |             self._scene.scene.show_geometry(other_name, False)
382 |         self._scene.scene.show_geometry(name, True)
383 |         self._apply_settings()
384 |         if self.stereo_methods_output[name].disparity_pixels is None:
385 |             self._run_current_method ()
386 |         if self.stereo_methods_output[name].disparity_color is not None:
387 |             show_color_disparity (name, self.stereo_methods_output[name].disparity_color)
388 | 
389 |     def _on_show_axes(self, show):
390 |         self.settings.show_axes = show
391 |         self._apply_settings()
392 | 
393 |     def _next_image_clicked(self):
394 |         self.read_next_pair ()
395 | 
396 |     def _image_selected(self, combo_idx, combo_val):
397 |         idx = self.images_combo.selected_index
398 |         input = self.source.get_pair_at_index (idx)
399 |         self._process_input (input)
400 | 
401 |     def _update_pair_index (self):
402 |         if self.images_combo is not None:
403 |             self.images_combo.selected_index = self.source.selected_index()
404 | 
405 |     def _apply_settings(self):
406 |         self._scene.scene.show_axes(self.settings.show_axes)
407 | 
408 |     def _reset_method_defaults(self):
409 |         name = self.algo_list.selected_value
410 |         method = self.stereo_methods[name]
411 |         method.reset_defaults()
412 |         for m in self._reload_settings_functions:
413 |             m()
414 | 
415 |     def _check_run_complete(self):                
416 |         if not self.executor_future.done():
417 |             if self._progress_dialog is None:
418 |                 self._progress_dialog = self._show_progress_dialog("Running the current method", f"Computing {self.algo_list.selected_value}...")
419 |             now = time.time()
420 |             if (now - self._last_progress_update_time > 0.1):
421 |                 self._last_progress_update_time = now
422 |                 self._run_progress.value += (1.0 - self._run_progress.value) / 16.0
423 |             return
424 | 
425 |         if self._progress_dialog:
426 |             self.window.close_dialog ()
427 |         self._progress_dialog = None
428 | 
429 |         stereo_output = self.executor_future.result()
430 |         self.executor_future = None
431 | 
432 |         x0,y0,x1,y1 = self.input.calibration.left_image_rect_normalized
433 |         x0 = int(x0*stereo_output.disparity_pixels.shape[1] + 0.5)
434 |         x1 = int(x1*stereo_output.disparity_pixels.shape[1] + 0.5)
435 |         y0 = int(y0*stereo_output.disparity_pixels.shape[0] + 0.5)
436 |         y1 = int(y1*stereo_output.disparity_pixels.shape[0] + 0.5)
437 |         valid_mask = np.zeros(stereo_output.disparity_pixels.shape, dtype=np.uint8)
438 |         valid_mask[y0:y1, x0:x1] = 1
439 |         stereo_output.disparity_pixels[valid_mask == 0] = -1.0
440 | 
441 |         name = self.algo_list.selected_value
442 |         stereo_output.disparity_color = color_disparity (stereo_output.disparity_pixels, self.input.calibration)
443 |         show_color_disparity (name, stereo_output.disparity_color)
444 | 
445 |         self.stereo_methods_output[name] = stereo_output
446 |         self._update_rendering ([name])
447 |         self._update_runtime ()
448 |     
449 |     def _depth_range_slider_changed(self, v: float):
450 |         self._depth_range_manually_changed = True
451 |         self._update_rendering()
452 | 
453 |     def _update_rendering (self, names_to_update=None):
454 |         if names_to_update is None:
455 |             names_to_update = list(self.stereo_methods_output.keys())
456 | 
457 |         selected_name = self.algo_list.selected_value
458 | 
459 |         for name in names_to_update:
460 |             stereo_output = self.stereo_methods_output[name]
461 |             if stereo_output.disparity_pixels is None:
462 |                 continue
463 | 
464 |             depth_meters = StereoMethod.depth_meters_from_disparity(stereo_output.disparity_pixels, self.input.calibration)
465 | 
466 |             if self._scene.scene.has_geometry(name):
467 |                 self._scene.scene.remove_geometry(name)
468 | 
469 | 
470 |             o3d_color = o3d.geometry.Image(cv2.cvtColor(stereo_output.color_image_bgr, cv2.COLOR_BGR2RGB))
471 |             o3d_depth = o3d.geometry.Image(depth_meters)
472 |             rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(o3d_color,
473 |                                                                       o3d_depth,
474 |                                                                       1,
475 |                                                                       depth_trunc=self.depth_range_slider.int_value,
476 |                                                                       convert_rgb_to_intensity=False)
477 |             stereo_output.point_cloud = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, self.o3dCameraIntrinsic)
478 |             stereo_output.point_cloud.transform([[1,0,0,0],[0,-1,0,0],[0,0,-1,0],[0,0,0,1]])
479 |             self._scene.scene.add_geometry(name, stereo_output.point_cloud, rendering.MaterialRecord())
480 |             self._scene.scene.show_geometry(name, name == selected_name)
481 |     
482 |     def _run_current_method(self):
483 |         if self.executor_future is not None:
484 |             return self._check_run_complete ()
485 | 
486 |         if not self.input.has_data():
487 |             return
488 | 
489 |         name = self.algo_list.selected_value
490 | 
491 |         def do_beefy_work():
492 |             stereo_output = self.stereo_methods[name].compute_disparity (self.input)
493 |             return stereo_output
494 | 
495 |         self._last_progress_update_time = time.time()
496 |         self.executor_future = self.executor.submit (do_beefy_work)
497 |     
498 |     def _show_progress_dialog(self, title, message):
499 |         # A Dialog is just a widget, so you make its child a layout just like
500 |         # a Window.
501 |         dlg = gui.Dialog(title)
502 | 
503 |         # Add the message text
504 |         em = self.window.theme.font_size
505 |         dlg_layout = gui.Vert(em, gui.Margins(em, em, em, em))
506 |         dlg_layout.add_child(gui.Label(message))
507 | 
508 |         # Add the Ok button. We need to define a callback function to handle
509 |         # the click.
510 |         self._run_progress = gui.ProgressBar()
511 |         self._run_progress.value = 0.1  # 10% complete
512 |         prog_layout = gui.Horiz(em)
513 |         prog_layout.add_child(self._run_progress)
514 |         dlg_layout.add_child(prog_layout)
515 | 
516 |         dlg.add_child(dlg_layout)
517 |         self.window.show_dialog(dlg)
518 |         return dlg
519 | 
520 |     def _update_runtime (self):
521 |         name = self.algo_list.selected_value
522 |         output = self.stereo_methods_output[name]
523 |         if np.isnan(output.computation_time):
524 |             self.last_runtime.text = "No output yet."
525 |         else:
526 |             self.last_runtime.text = f"Computation time: {output.computation_time*1e3:.1f} ms"
527 | 
528 |     def _on_layout(self, layout_context):
529 |         # The on_layout callback should set the frame (position + size) of every
530 |         # child correctly. After the callback is done the window will layout
531 |         # the grandchildren.
532 |         settings_width = 17 * layout_context.theme.font_size
533 |         r = self.window.content_rect
534 |         self._scene.frame = gui.Rect(0, r.y, r.get_right() - settings_width, r.height)
535 |         # height = min(
536 |         #     r.height,
537 |         #     self._settings_panel.calc_preferred_size(
538 |         #         layout_context, gui.Widget.Constraints()).height)
539 |         height = r.height
540 |         self._settings_panel.frame = gui.Rect(r.get_right() - settings_width, r.y, settings_width, height)
541 | 


--------------------------------------------------------------------------------