├── .gitignore
├── LICENSE
├── README.md
├── StereoVision.py
├── another_main.py
├── main.py
├── test_l.png
└── test_r.png


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Hou Yuzhou
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # StereoVision-tensorflow
2 | 使用tensorflow实现双目视觉中的BM, SGBM算法
3 | 


--------------------------------------------------------------------------------
/StereoVision.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | 
  5 | class BM:
  6 |     def __init__(self,  image_height, image_width, WindowSize=15, minDisparity=0, numberOfDisparities=32,
  7 |                  textureThreshold=10, uniquenessRatio=15, speckleWindowSize=100,
  8 |                  speckleRange=32, preFilterCap = 31, disp12MaxDiff=1, diffMethod='SAD'):
  9 |         self.image_height = image_height
 10 |         self.image_width = image_width
 11 |         self.WindowSize = WindowSize
 12 |         self.minDisparity = minDisparity
 13 |         self.numberOfDisparities = numberOfDisparities
 14 |         self.textureThreshold = textureThreshold
 15 |         self.uniquenessRatio = uniquenessRatio
 16 |         self.speckleWindowsSize = speckleWindowSize
 17 |         self.speckleRange = speckleRange
 18 |         self.preFilterCap = preFilterCap
 19 |         self.disp12MaxDiff = disp12MaxDiff
 20 |         self.left_image_raw = tf.placeholder(tf.int32, [self.image_height, self.image_width, 1])
 21 |         self.right_image_raw = tf.placeholder(tf.int32, [self.image_height, self.image_width, 1])
 22 |         self.diffMethod = diffMethod
 23 |         self.left_image, self.right_image = self.prefilterXSobel()
 24 | 
 25 |     def prefilterXSobel(self):
 26 |         weights = tf.Variable(tf.constant([[-1., 0., 1.],
 27 |                                            [-2., 0., 2.],
 28 |                                            [-1., 0., 1.]]))
 29 |         weights = tf.reshape(weights, [3, 3, 1, 1])
 30 |         left_image = tf.reshape(tf.cast(self.left_image_raw, tf.float32),
 31 |                                      [1, self.image_height, self.image_width, 1])
 32 |         right_image = tf.reshape(tf.cast(self.right_image_raw, tf.float32),
 33 |                                         [1, self.image_height, self.image_width, 1])
 34 |         left_image = tf.nn.conv2d(left_image, weights, [1, 1, 1, 1], padding='SAME')
 35 |         right_image = tf.nn.conv2d(right_image, weights, [1, 1, 1, 1], padding='SAME')
 36 |         left_image = tf.where(tf.greater(left_image, -self.preFilterCap),
 37 |                                    tf.where(tf.greater(left_image, self.preFilterCap),
 38 |                                             2. * self.preFilterCap * tf.ones_like(left_image),
 39 |                                             left_image + self.preFilterCap), 0. * left_image)
 40 |         right_image = tf.where(tf.greater(right_image, -self.preFilterCap),
 41 |                                    tf.where(tf.greater(right_image, self.preFilterCap),
 42 |                                             2. * self.preFilterCap * tf.ones_like(right_image),
 43 |                                             right_image + self.preFilterCap), 0. * right_image)
 44 |         left_image = tf.squeeze(tf.cast(left_image, tf.int32), 0)
 45 |         right_image = tf.squeeze(tf.cast(right_image, tf.int32), 0)
 46 |         return left_image, right_image
 47 | 
 48 |     def  cvFindCorrespondenceBM(self):
 49 |         win = self.WindowSize // 2
 50 |         shape = self.left_image.get_shape()
 51 |         disp = tf.get_variable('disp', [shape[0], shape[1], 1], tf.int32, tf.zeros_initializer)
 52 |         for i in range(0, shape[0]  - self.WindowSize + 1):
 53 |             for j in range(0, shape[1]  - self.WindowSize + 1 - self.numberOfDisparities):
 54 |                 bestMatchSoFar = self.coMatch(i, j)
 55 |                 indices = tf.constant([[(i + win) * self.image_height + (j + win)]])
 56 |                 updates = tf.reshape(bestMatchSoFar, [1])
 57 |                 disp_shape = tf.constant([self.image_height * self.image_width])
 58 |                 scatter = tf.reshape(tf.scatter_nd(indices, updates, disp_shape), [shape[0], shape[1], shape[2]])
 59 |                 disp = tf.add(disp, scatter)
 60 |         return disp
 61 | 
 62 |     def coMatch(self, i, j):
 63 |         prevdiff_1 = tf.Variable(tf.constant(2147483647))  # 32767
 64 |         prevdiff_2 = tf.Variable(tf.constant(2147483647))
 65 |         bestMatchSoFar = tf.Variable(tf.constant(self.minDisparity))
 66 |         bestMatchSoFar_1 = tf.Variable(tf.constant(self.minDisparity))
 67 |         bestMatchSoFar_2 = tf.Variable(tf.constant(self.minDisparity))
 68 |         for dispRange in range(self.minDisparity, self.numberOfDisparities):
 69 |             block_left = tf.image.crop_to_bounding_box(self.left_image, i, j,
 70 |                                                        self.WindowSize, self.WindowSize)
 71 |             block_right = tf.image.crop_to_bounding_box(self.right_image, i, j + dispRange,
 72 |                                                         self.WindowSize, self.WindowSize)
 73 |             if self.diffMethod == 'SSD':
 74 |                 diff = tf.reduce_sum(tf.square(tf.subtract(block_left, block_right)))
 75 |             elif self.diffMethod == 'NCC':
 76 |                 diff = tf.cast((tf.reduce_sum(block_left * block_right)) / \
 77 |                        (tf.reduce_sum(tf.square(block_left) * tf.square(block_right))), tf.float32)
 78 |                 prevdiff_1 = tf.cast(prevdiff_1, tf.float32)
 79 |             else:
 80 |                 diff = tf.reduce_sum(tf.abs(tf.subtract(block_left, block_right)))
 81 |             bestMatchSoFar_1 = tf.where(tf.greater(prevdiff_1, diff), dispRange,
 82 |                                         bestMatchSoFar_1)
 83 |             prevdiff_1  = tf.where(tf.greater(prevdiff_1, diff), diff,
 84 |                                    prevdiff_1)
 85 | 
 86 |         for dispRange in range(self.minDisparity, self.numberOfDisparities):
 87 |             co_block_right = tf.image.crop_to_bounding_box(self.right_image, i, j + bestMatchSoFar_1,
 88 |                                                            self.WindowSize, self.WindowSize)
 89 |             co_block_left = tf.image.crop_to_bounding_box(self.left_image, i, j + dispRange,
 90 |                                                           self.WindowSize, self.WindowSize)
 91 |             if self.diffMethod == 'SSD':
 92 |                 diff = tf.reduce_sum(tf.square(tf.subtract(co_block_left, co_block_right)))
 93 |             elif self.diffMethod == 'NCC':
 94 |                 diff = tf.cast((tf.reduce_sum(co_block_left * co_block_right)) / \
 95 |                        (tf.reduce_sum(tf.square(co_block_left) * tf.square(co_block_right))), tf.float32)
 96 |                 prevdiff_2 = tf.cast(prevdiff_2, tf.float32)
 97 |             else:
 98 |                 diff = tf.reduce_sum(tf.abs(tf.subtract(co_block_left, co_block_right)))
 99 |             bestMatchSoFar_2 = tf.where(tf.greater(prevdiff_2, diff), bestMatchSoFar_1 - dispRange,
100 |                                         bestMatchSoFar_2)
101 |             prevdiff_2 = tf.where(tf.greater(prevdiff_2, diff), diff,
102 |                                   prevdiff_2)
103 |             bestMatchSoFar = tf.where(tf.greater(tf.abs(bestMatchSoFar_1 - bestMatchSoFar_2), self.disp12MaxDiff),
104 |                                       bestMatchSoFar, bestMatchSoFar_1)
105 |         return bestMatchSoFar
106 | 


--------------------------------------------------------------------------------
/another_main.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from StereoVision import BM
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | 
 7 | def run():
 8 |     left_image_path = 'test_l.png'
 9 |     right_image_path = 'test_r.png'
10 |     left = cv2.imread(left_image_path, cv2.IMREAD_GRAYSCALE)
11 |     right = cv2.imread(right_image_path, cv2.IMREAD_GRAYSCALE)
12 |     left = cv2.resize(left, (32, 24))
13 |     right = cv2.resize(right, (32, 24))
14 |     shape = left.shape
15 | 
16 |     bm = BM(shape[0], shape[1], WindowSize=5, numberOfDisparities=4, diffMethod='SAD')
17 |     disp = bm.cvFindCorrespondenceBM()
18 |     sess = tf.Session()
19 |     sess.run(tf.global_variables_initializer())
20 |     d = sess.run(disp, feed_dict={bm.left_image_raw:np.reshape(left, (bm.image_height,
21 |                                                                       bm.image_width, 1)),
22 |                                   bm.right_image_raw:np.reshape(right, (bm.image_height,
23 |                                                                        bm.image_width, 1))})
24 |     d = np.reshape(d, (24, 32))
25 |     cv2.imshow('disparity', d)
26 |     cv2.imshow('left_image', left)
27 |     cv2.imshow('right_image', right)
28 |     print(d)
29 |     cv2.waitKey(0)
30 | 
31 | 
32 | def main():
33 |     run()
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     main()
38 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from StereoVision import BM
 3 | import numpy as np
 4 | 
 5 | 
 6 | def run():
 7 |     left = tf.Variable(tf.constant([[0, 0, 0, 0, 0, 0, 0],
 8 |                                     [0, 0, 0, 0, 0, 0, 0],
 9 |                                     [0, 0, 0, 0, 0, 0, 0],
10 |                                     [0, 0, 1, 2, 3, 0, 0],
11 |                                     [0, 0, 2, 4, 6, 0, 0],
12 |                                     [0, 0, 3, 6, 9, 0, 0],
13 |                                     [0, 0, 0, 0, 0, 0, 0]]), dtype=tf.int32)
14 | 
15 |     right = tf.Variable(tf.constant([[0, 0, 0, 0, 0, 0, 0],
16 |                                     [0, 0, 0, 0, 0, 0, 0],
17 |                                     [0, 0, 0, 0, 0, 0, 0],
18 |                                     [0, 0, 0, 0, 1, 2, 3],
19 |                                     [0, 0, 0, 0, 2, 4, 6],
20 |                                     [0, 0, 0, 0, 3, 6, 9],
21 |                                     [0, 0, 0, 0, 0, 0, 0]]), dtype=tf.int32)
22 |     left = tf.reshape(left, [7, 7, 1])
23 |     right = tf.reshape(right, [7, 7, 1])
24 |     bm = BM(7, 7, 3, numberOfDisparities=3)
25 |     disp = bm.cvFindCorrespondenceBM()
26 |     # dist, sad, block_left, block_right, co_block_left, co_block_right = bm.coMatch(3, 2)
27 |     sess = tf.Session()
28 |     sess.run(tf.global_variables_initializer())
29 |     left, right = sess.run([left, right])
30 |     """
31 |     d, di, s, d_l, d_r, c_d_l, c_d_r = sess.run([disp, dist, sad, block_left, block_right,
32 |                                                  co_block_left, co_block_right],
33 |                                                 feed_dict={bm.left_image:left, bm.right_image:right})
34 |                                                 """
35 |     d = sess.run(disp, feed_dict={bm.left_image:left, bm.right_image:right})
36 |     # print("d_l :", np.reshape(d_l, (3, 3)), "d_r :", np.reshape(d_r, (3, 3)),
37 |           # "c_d_l :", np.reshape(c_d_l, (3, 3)), "c_d_r :", np.reshape(c_d_r, (3, 3)))
38 |     print(np.reshape(d, (bm.image_height, bm.image_width)))
39 | 
40 | 
41 | def main():
42 |     run()
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     main()


--------------------------------------------------------------------------------
/test_l.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HEIDIES/StereoVision-tensorflow/d45064d3a5acdc0ca74f27a7851c9293e814ec7f/test_l.png


--------------------------------------------------------------------------------
/test_r.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HEIDIES/StereoVision-tensorflow/d45064d3a5acdc0ca74f27a7851c9293e814ec7f/test_r.png


--------------------------------------------------------------------------------