├── .github
    └── workflows
    │   └── pep8-check.yml
├── .gitignore
├── DoG.py
├── LICENSE
├── README.md
├── calibrate_camera.py
├── camera_stream.py
├── canny.py
├── chromaticity_lightness.py
├── cnn_ssd_detection.py
├── contour_edges.py
├── cycleimages.py
├── download-models.sh
├── eigenfaces.py
├── faster-rcnn.py
├── fcn_segmentation.py
├── gaussian.py
├── generic_interface.py
├── gradient_orientation.py
├── haar_cascade_detection.py
├── harris.py
├── hog.py
├── houghlines.py
├── kalman_tracking_live.py
├── lbp_cascade_detection.py
├── mask-rcnn.py
├── mog-background-subtraction.py
├── openpose.py
├── opticflow.py
├── pyramid.py
├── selective_search.py
├── sift_detection.py
├── sobel.py
├── squeezenet.py
├── stereo_sgbm.py
├── test_all.sh
└── yolo.py


/.github/workflows/pep8-check.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python - PEP8
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v2
19 |     - name: Set up Python 3.8
20 |       uses: actions/setup-python@v2
21 |       with:
22 |         python-version: 3.8
23 |     - name: Install dependencies
24 |       run: |
25 |         python -m pip install --upgrade pip
26 |         pip install flake8 pytest
27 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28 |     - name: Lint with flake8
29 |       run: |
30 |         # stop the build if there are Python PEP8 style, syntax errors, undefined names, unused imports ...
31 |         flake8 . 
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *,cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # IPython Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # dotenv
 79 | .env
 80 | 
 81 | # virtualenv
 82 | venv/
 83 | ENV/
 84 | 
 85 | # Spyder project settings
 86 | .spyderproject
 87 | 
 88 | # Rope project settings
 89 | .ropeproject
 90 | 
 91 | # specific to this repo
 92 | 
 93 | *.pb
 94 | *.xml
 95 | *.pbtxt
 96 | *.txt
 97 | *.caffemodel
 98 | *.prototxt
 99 | *.cfg
100 | *.weights
101 | *.names
102 | *.avi
103 | *.jpg
104 | calibration
105 | faster_rcnn_inception_v2_coco_2018_01_28
106 | mask_rcnn_inception_v2_coco_2018_01_28
107 | 


--------------------------------------------------------------------------------
/DoG.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example :  Difference of Gaussian (DoG) of a video file
  4 | # specified on the command line (e.g. python FILE.py video_file) or from an
  5 | # attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2017-2019 Dept. Engineering & Dept. Computer Science,
 10 | #                         Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | #####################################################################
 14 | 
 15 | import cv2
 16 | import argparse
 17 | import sys
 18 | 
 19 | #####################################################################
 20 | 
 21 | keep_processing = True
 22 | 
 23 | # parse command line arguments for camera ID or video file
 24 | 
 25 | parser = argparse.ArgumentParser(
 26 |     description='Perform ' +
 27 |     sys.argv[0] +
 28 |     ' example operation on incoming camera/video image')
 29 | parser.add_argument(
 30 |     "-c",
 31 |     "--camera_to_use",
 32 |     type=int,
 33 |     help="specify camera to use",
 34 |     default=0)
 35 | parser.add_argument(
 36 |     "-r",
 37 |     "--rescale",
 38 |     type=float,
 39 |     help="rescale image by this factor",
 40 |     default=1.0)
 41 | parser.add_argument(
 42 |     "-s",
 43 |     "--set_resolution",
 44 |     type=int,
 45 |     nargs=2,
 46 |     help='override default camera resolution as H W')
 47 | parser.add_argument("-i", "--is_image", action='store_true',
 48 |                     help="specify file is an image, not a video")
 49 | parser.add_argument(
 50 |     'video_file',
 51 |     metavar='file',
 52 |     type=str,
 53 |     nargs='?',
 54 |     help='specify optional video file')
 55 | args = parser.parse_args()
 56 | 
 57 | #####################################################################
 58 | 
 59 | # this function is called as a call-back everytime the trackbar is moved
 60 | # (here we just do nothing)
 61 | 
 62 | 
 63 | def nothing(x):
 64 |     pass
 65 | 
 66 | #####################################################################
 67 | 
 68 | # define video capture object
 69 | 
 70 | 
 71 | try:
 72 |     # to use a non-buffered camera stream (via a separate thread)
 73 | 
 74 |     if not (args.video_file):
 75 |         import camera_stream
 76 |         cap = camera_stream.CameraVideoStream(use_tapi=True)
 77 |     else:
 78 |         cap = cv2.VideoCapture()  # not needed for video files
 79 | 
 80 | except BaseException:
 81 |     # if not then just use OpenCV default
 82 | 
 83 |     print("INFO: camera_stream class not found - camera input may be buffered")
 84 |     cap = cv2.VideoCapture()
 85 | 
 86 | # define display window name
 87 | 
 88 | window_name = "Live Camera Input"  # window name
 89 | window_nameU = "Gaussian  Upper"  # window name
 90 | window_nameL = "Gaussian  Lower"  # window name
 91 | window_nameDoG = "DoG"  # window name
 92 | 
 93 | # if command line arguments are provided try to read video_name
 94 | # otherwise default to capture from attached H/W camera
 95 | 
 96 | if (((args.video_file) and (cap.open(str(args.video_file))))
 97 |         or (cap.open(args.camera_to_use))):
 98 | 
 99 |     # create window by name (as resizable)
100 | 
101 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
102 |     cv2.namedWindow(window_nameL, cv2.WINDOW_NORMAL)
103 |     cv2.namedWindow(window_nameU, cv2.WINDOW_NORMAL)
104 |     cv2.namedWindow(window_nameDoG, cv2.WINDOW_NORMAL)
105 | 
106 |     # add some track bar controllers for settings
107 | 
108 |     sigmaU = 2  # greater than 7 seems to crash
109 |     cv2.createTrackbar("sigma U", window_nameU, sigmaU, 15, nothing)
110 |     sigmaL = 1  # greater than 7 seems to crash
111 |     cv2.createTrackbar("sigma L", window_nameL, sigmaL, 15, nothing)
112 | 
113 |     # override default camera resolution
114 | 
115 |     if (args.set_resolution is not None):
116 |         cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
117 |         cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
118 | 
119 |     print("INFO: input resolution : (",
120 |           int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
121 |           int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
122 | 
123 |     while (keep_processing):
124 | 
125 |         # if video file successfully open then read frame from video
126 | 
127 |         if (cap.isOpened):
128 |             ret, frame = cap.read()
129 | 
130 |             # when we reach the end of the video (file) exit cleanly
131 | 
132 |             if (ret == 0):
133 |                 keep_processing = False
134 |                 continue
135 | 
136 |             # rescale if specified
137 | 
138 |             if (args.rescale != 1.0):
139 |                 frame = cv2.resize(
140 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
141 | 
142 |         # if it is a still image, load that instead
143 | 
144 |         if (args.is_image):
145 |             frame = cv2.imread(args.video_file, cv2.IMREAD_COLOR)
146 | 
147 |         # get parameters from track bars
148 | 
149 |         sigmaU = cv2.getTrackbarPos("sigma U", window_nameU)
150 |         sigmaL = cv2.getTrackbarPos("sigma L", window_nameL)
151 | 
152 |         # check sigma's are greater than 1
153 | 
154 |         sigmaU = max(1, sigmaU)
155 |         sigmaL = max(1, sigmaL)
156 | 
157 |         # check sigma are correct
158 | 
159 |         if (sigmaL >= sigmaU) and (sigmaU > 1):
160 |             sigmaL = sigmaU - 1
161 |             print("auto-correcting sigmas such that U > L")
162 | 
163 |         # convert to grayscale
164 | 
165 |         gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
166 | 
167 |         # performing smoothing on the image using a smoothing mask
168 |         # specify 0x0 mask size then size is auto-computed from the sigma
169 |         # values
170 | 
171 |         smoothedU = cv2.GaussianBlur(gray_frame, (0, 0), sigmaU)
172 |         smoothedL = cv2.GaussianBlur(gray_frame, (0, 0), sigmaL)
173 | 
174 |         # perform abs_diff() to get DoG
175 | 
176 |         DoG = cv2.absdiff(smoothedU, smoothedL)
177 | 
178 |         # auto-scale to full 0 -> 255 range for display
179 | 
180 |         cv2.normalize(DoG, DoG, 0, 255, cv2.NORM_MINMAX)
181 | 
182 |         # display image
183 | 
184 |         cv2.imshow(window_name, frame)
185 |         cv2.imshow(window_nameU, smoothedU)
186 |         cv2.imshow(window_nameL, smoothedL)
187 |         cv2.imshow(window_nameDoG, DoG)
188 | 
189 |         # start the event loop - essential
190 | 
191 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
192 |         # ms). It waits for specified milliseconds for any keyboard event.
193 |         # If you press any key in that time, the program continues.
194 |         # If 0 is passed, it waits indefinitely for a key stroke.
195 |         # (bitwise and with 0xFF to extract least significant byte of
196 |         # multi-byte response)
197 | 
198 |         # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
199 |         key = cv2.waitKey(40) & 0xFF
200 | 
201 |         # It can also be set to detect specific key strokes by recording which
202 |         # key is pressed
203 | 
204 |         # e.g. if user presses "x" then exit
205 | 
206 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
207 |         # display
208 | 
209 |         if (key == ord('x')):
210 |             keep_processing = False
211 |         elif (key == ord('f')):
212 |             cv2.setWindowProperty(
213 |                 window_nameDoG,
214 |                 cv2.WND_PROP_FULLSCREEN,
215 |                 cv2.WINDOW_FULLSCREEN)
216 | 
217 |     # close all windows
218 | 
219 |     cv2.destroyAllWindows()
220 | 
221 | else:
222 |     print("No video file specified or camera connected.")
223 | 
224 | #####################################################################
225 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                    GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Python Computer Vision OpenCV Teaching Examples
  2 | 
  3 | OpenCV Python computer vision examples used for teaching within the undergraduate Computer Science programme
  4 | at [Durham University](http://www.durham.ac.uk) (UK) by [Prof. Toby Breckon](https://breckon.org/toby/).
  5 | 
  6 | ![Python - PEP8](https://github.com/tobybreckon/python-examples-cv/workflows/Python%20-%20PEP8/badge.svg)
  7 | 
  8 | All tested with [OpenCV](http://www.opencv.org) 3.x / 4.x and Python 3.x.
  9 | 
 10 | ```
 11 | # Example : <................................> processing from a video file
 12 | # specified on the command line (e.g. python FILE.py video_file) or from an
 13 | # attached web camera
 14 | ```
 15 | ---
 16 | 
 17 | ### Background:
 18 | 
 19 | Directly adapted (and in some cases extended/added to) from the [C++](https://github.com/tobybreckon/cpp-examples-ipcv.git) and earlier [C](https://github.com/tobybreckon/c-examples-ipcv.git) language teaching examples used to generate the video examples within the ebook version of:
 20 | 
 21 | [Dictionary of Computer Vision and Image Processing](http://dx.doi.org/10.1002/9781119286462) (R.B. Fisher, T.P. Breckon, K. Dawson-Howe, A. Fitzgibbon, C. Robertson, E. Trucco, C.K.I. Williams), Wiley, 2014.
 22 | [[Google Books](http://books.google.co.uk/books?id=TaEQAgAAQBAJ&lpg=PP1&dq=isbn%3A1118706811&pg=PP1v=onepage&q&f=false)] [[doi](http://dx.doi.org/10.1002/9781119286462)]
 23 | 
 24 | Notably, the [C++](https://github.com/tobybreckon/cpp-examples-ipcv.git) examples may contain further speed optimizations in some cases.
 25 | 
 26 | A related supporting set of [Python Image Processing OpenCV Teaching Examples](https://github.com/tobybreckon/python-examples-ip.git) are also available covering basic image processing operations.
 27 | 
 28 | ---
 29 | 
 30 | ### How to download and run:
 31 | 
 32 | Download each file as needed or to download the entire repository and run each try:
 33 | 
 34 | ```
 35 | git clone https://github.com/tobybreckon/python-examples-cv.git
 36 | cd python-examples-cv
 37 | python3 ./<insert file name of one of the examples>.py [optional video file]
 38 | ```
 39 | 
 40 | Demo source code is provided _"as is"_ to aid learning and understanding of topics on the course and beyond.
 41 | 
 42 | Most run with a webcam connected or from a command line supplied video file of a format OpenCV supports on your system (otherwise edit the script to provide your own image source). For examples each individual ```.py``` example file can be used as follows:
 43 | 
 44 | ```
 45 | $ python3 ./generic_interface.py -h
 46 | usage: generic_interface.py [-h] [-c CAMERA_TO_USE] [-r RESCALE] [-fs]
 47 |                             [video_file]
 48 | 
 49 | Perform ./generic_interface.py example operation on incoming camera/video
 50 | image
 51 | 
 52 | positional arguments:
 53 |   video_file            specify optional video file
 54 | 
 55 | optional arguments:
 56 |   -h, --help            show this help message and exit
 57 |   -c CAMERA_TO_USE, --camera_to_use CAMERA_TO_USE
 58 |                         specify camera to use
 59 |   -r RESCALE, --rescale RESCALE
 60 |                         rescale image by this factor
 61 | 
 62 | ```
 63 | 
 64 | For several of the demos that largely rely on effective demonstration using just a single output window - press the _"f"_ key to run fullscreen. In all examples press _"x"_ to exit.
 65 | 
 66 | Use script ```sh download-models.sh``` to download CNN model files associated with some examples.
 67 | 
 68 | ---
 69 | 
 70 | ### Re-usable Exemplar Components (Python Classes):
 71 | 
 72 | This codebase contains the following re-usable exemplar elements:
 73 | 
 74 | - ```camera_stream.py``` - a re-usable threaded camera class, that is call compatible with the existing OpenCV VideoCapture class, designed to always deliver the latest frame from a single camera without buffering delays (used by all examples if available).
 75 | 
 76 | - ```h_concatenate()``` - a re-usable function for horiozontal image concatenation for display in single window handling variations in size/channels (see ```chromaticity_lightness.py```).
 77 | 
 78 | ---
 79 | 
 80 | ### References:
 81 | 
 82 | If referencing these examples in your own work (e.g _"... based on the implementation of REF..."_), please reference the related research work from which these sample OpenCV reference implementations were derived (in terms of parameters choice etc., presented in bibtex format).
 83 | 
 84 | For the SGBM stereo vision and camera calibration examples, reference:
 85 | ```
 86 | @Article{mroz12stereo,
 87 |   author = 	 {Mroz, F. and Breckon, T.P.},
 88 |   title = 	 {An Empirical Comparison of Real-time Dense Stereo Approaches for use in the Automotive Environment},
 89 |   journal =  {EURASIP Journal on Image and Video Processing},
 90 |   year =     {2012},
 91 |   volume = 	 {2012},
 92 |   number = 	 {13},
 93 |   pages = 	 {1-19},
 94 |   publisher = {Springer},
 95 |   url = 	 {https://breckon.org/toby/publications/papers/mroz12stereo.pdf},
 96 |   doi = 	 {10.1186/1687-5281-2012-13}
 97 | }
 98 | ```
 99 | 
100 | For the Mixture of Gaussian (MOG) background subtraction and Kalman filtering example, reference:
101 | ```
102 | @InProceedings{kundegorski14photogrammetric,
103 |   author = 	 {Kundegorski, M.E. and Breckon, T.P.},
104 |   title = 	 {A Photogrammetric Approach for Real-time 3D Localization and Tracking of Pedestrians in Monocular Infrared Imagery},
105 |   booktitle = {Proc. SPIE Optics and Photonics for Counterterrorism, Crime Fighting and Defence},
106 |   year = 	    {2014},
107 |   month =     {September},
108 |   volume =    {9253},
109 |   number =    {01},
110 |   publisher = {SPIE},
111 |   pages =     {1-16},
112 |   url = 	 {https://breckon.org/toby/publications/papers/kundegorski14photogrammetric.pdf},
113 |   doi = 	 {10.1117/12.2065673}
114 | }
115 | ```
116 | 
117 | 
118 | For the DoG, Canny, contour, Harris  and Sobel examples, please reference:
119 | ```
120 | @Book{solomonbreckon10fundamentals,
121 |   author = 	 {Solomon, C.J. and Breckon, T.P.},
122 |   title = 	 {Fundamentals of Digital Image Processing: A Practical Approach with Examples in Matlab},
123 |   publisher = 	 {Wiley-Blackwell},
124 |   year = 	 {2010},
125 |   isbn = {0470844736},
126 |   doi = 	 {10.1002/9780470689776},
127 |   note = {ISBN-13: 978-0470844731},
128 | }
129 | ```
130 | 
131 | For all other examples reference the original paper as outlined in the OpenCV manual or the header comment of the ```.py``` example file.
132 | 
133 | ---
134 | 
135 | If you find any bugs raise an issue (or much better still submit a git pull request with a fix) - toby.breckon@durham.ac.uk
136 | 
137 | _"may the source be with you"_ - anon.
138 | 


--------------------------------------------------------------------------------
/calibrate_camera.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example : perform intrinsic calibration of a  connected camera
  4 | 
  5 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  6 | 
  7 | # Copyright (c) 2018-2021 Department of Computer Science,
  8 | #                         Durham University, UK
  9 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 10 | 
 11 | # Acknowledgements:
 12 | 
 13 | # http://opencv-python-tutroals.readthedocs.org/en/latest/ \
 14 | # py_tutorials/py_calib3d/py_table_of_contents_calib3d/py_table_of_contents_calib3d.html
 15 | 
 16 | # http://docs.ros.org/electric/api/cob_camera_calibration/html/calibrator_8py_source.html
 17 | 
 18 | #####################################################################
 19 | 
 20 | import cv2
 21 | import argparse
 22 | import sys
 23 | import numpy as np
 24 | 
 25 | #####################################################################
 26 | 
 27 | keep_processing = True
 28 | 
 29 | # parse command line arguments for camera ID or video file
 30 | 
 31 | parser = argparse.ArgumentParser(
 32 |     description='Perform ' +
 33 |     sys.argv[0] +
 34 |     ' example operation on incoming camera/video image')
 35 | parser.add_argument(
 36 |     "-c",
 37 |     "--camera_to_use",
 38 |     type=int,
 39 |     help="specify camera to use",
 40 |     default=0)
 41 | parser.add_argument(
 42 |     "-r",
 43 |     "--rescale",
 44 |     type=float,
 45 |     help="rescale image by this factor",
 46 |     default=1.0)
 47 | parser.add_argument(
 48 |     "-s",
 49 |     "--set_resolution",
 50 |     type=int,
 51 |     nargs=2,
 52 |     help='override default camera resolution as H W')
 53 | parser.add_argument(
 54 |     "-cbx",
 55 |     "--chessboardx",
 56 |     type=int,
 57 |     help="specify number of internal chessboard squares \
 58 |             (corners) in x-direction",
 59 |     default=6)
 60 | parser.add_argument(
 61 |     "-cby",
 62 |     "--chessboardy",
 63 |     type=int,
 64 |     help="specify number of internal chessboard squares \
 65 |             (corners) in y-direction",
 66 |     default=8)
 67 | parser.add_argument(
 68 |     "-cbw",
 69 |     "--chessboardw",
 70 |     type=float,
 71 |     help="specify width/height of chessboard squares in mm",
 72 |     default=40.0)
 73 | parser.add_argument(
 74 |     "-i",
 75 |     "--iterations",
 76 |     type=int,
 77 |     help="specify number of iterations for each stage of optimisation",
 78 |     default=100)
 79 | parser.add_argument(
 80 |     "-e",
 81 |     "--minimum_error",
 82 |     type=float,
 83 |     help="specify lower error threshold upon which to stop \
 84 |         optimisation stages",
 85 |     default=0.001)
 86 | args = parser.parse_args()
 87 | 
 88 | #####################################################################
 89 | 
 90 | #  define video capture object
 91 | 
 92 | try:
 93 |     # to use a non-buffered camera stream (via a separate thread)
 94 | 
 95 |     import camera_stream
 96 |     cap = camera_stream.CameraVideoStream()
 97 | 
 98 | except BaseException:
 99 |     # if not then just use OpenCV default
100 | 
101 |     print("INFO: camera_stream class not found - camera input may be buffered")
102 |     cap = cv2.VideoCapture()
103 | 
104 | # define display window names
105 | 
106 | window_name = "Camera Input"  # window name
107 | window_nameU = "Undistored (calibrated) Camera"  # window name
108 | 
109 | #####################################################################
110 | 
111 | # perform intrinsic calibration (removal of image distortion in image)
112 | 
113 | do_calibration = False
114 | termination_criteria_subpix = (
115 |     cv2.TERM_CRITERIA_EPS +
116 |     cv2.TERM_CRITERIA_MAX_ITER,
117 |     args.iterations,
118 |     args.minimum_error)
119 | 
120 | # set up a set of real-world "object points" for the chessboard pattern
121 | 
122 | patternX = args.chessboardx
123 | patternY = args.chessboardy
124 | square_size_in_mm = args.chessboardw
125 | 
126 | # prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
127 | 
128 | objp = np.zeros((patternX * patternY, 3), np.float32)
129 | objp[:, :2] = np.mgrid[0:patternX, 0:patternY].T.reshape(-1, 2)
130 | objp = objp * square_size_in_mm
131 | 
132 | # create arrays to store object points and image points from all the images.
133 | objpoints = []  # 3d point in real world space
134 | imgpoints = []  # 2d points in image plane.
135 | 
136 | #####################################################################
137 | 
138 | # count number of chessboard detections
139 | chessboard_pattern_detections = 0
140 | 
141 | print()
142 | print("--> hold up chessboard (grabbing images at 2 fps)")
143 | print("press c : to continue to calibration")
144 | 
145 | #####################################################################
146 | 
147 | # open connected camera
148 | 
149 | if cap.open(args.camera_to_use):
150 | 
151 |     # override default camera resolution
152 | 
153 |     if (args.set_resolution is not None):
154 |         cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
155 |         cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
156 | 
157 |     print("INFO: input resolution : (",
158 |           int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
159 |           int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
160 | 
161 |     while (not (do_calibration)):
162 | 
163 |         # grab frames from camera
164 | 
165 |         ret, frame = cap.read()
166 | 
167 |         # rescale if specified
168 | 
169 |         if (args.rescale != 1.0):
170 |             frame = cv2.resize(frame, (0, 0), fx=args.rescale, fy=args.rescale)
171 | 
172 |         # convert to grayscale
173 | 
174 |         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
175 | 
176 |         # Find the chess board corners in the image
177 |         # (change flags to perhaps improve detection ?)
178 | 
179 |         ret, corners = cv2.findChessboardCorners(
180 |             gray, (patternX, patternY), None, cv2.CALIB_CB_ADAPTIVE_THRESH |
181 |             cv2.CALIB_CB_FAST_CHECK | cv2.CALIB_CB_NORMALIZE_IMAGE)
182 | 
183 |         # If found, add object points, image points (after refining them)
184 | 
185 |         if (ret):
186 | 
187 |             chessboard_pattern_detections += 1
188 | 
189 |             # add object points to global list
190 | 
191 |             objpoints.append(objp)
192 | 
193 |             # refine corner locations to sub-pixel accuracy and then
194 | 
195 |             corners_sp = cv2.cornerSubPix(
196 |                 gray, corners, (11, 11), (-1, -1), termination_criteria_subpix)
197 |             imgpoints.append(corners_sp)
198 | 
199 |             # Draw and display the corners
200 | 
201 |             drawboard = cv2.drawChessboardCorners(
202 |                 frame, (patternX, patternY), corners_sp, ret)
203 | 
204 |             text = 'detected: ' + str(chessboard_pattern_detections)
205 |             cv2.putText(drawboard, text, (10, 25),
206 |                         cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, 8)
207 | 
208 |             cv2.imshow(window_name, drawboard)
209 |         else:
210 |             text = 'detected: ' + str(chessboard_pattern_detections)
211 |             cv2.putText(frame, text, (10, 25),
212 |                         cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, 8)
213 | 
214 |             cv2.imshow(window_name, frame)
215 | 
216 |         # start the event loop
217 | 
218 |         key = cv2.waitKey(500) & 0xFF  # wait 500 ms. between frames
219 |         if (key == ord('c')):
220 |             do_calibration = True
221 | 
222 | else:
223 |     print("Cannot open connected camera.")
224 |     exit()
225 | 
226 | #####################################################################
227 | 
228 | # check we detected some patterns within the first loop
229 | 
230 | if (chessboard_pattern_detections == 0):
231 |     print("No calibration patterns detected - exiting.")
232 |     exit()
233 | 
234 | #####################################################################
235 | 
236 | # perform calibration - uses [Zhang, 2000]
237 | 
238 | print("START - intrinsic calibration ...")
239 | 
240 | ret, K, D, rvecs, tvecs = cv2.calibrateCamera(
241 |     objpoints, imgpoints, gray.shape[::-1], None, None)
242 | 
243 | print("FINISHED - intrinsic calibration")
244 | 
245 | # print output in readable format
246 | 
247 | print()
248 | print("Intrinsic Camera Calibration Matrix, K - from intrinsic calibration:")
249 | print("(format as follows: fx, fy - focal lengths / cx, cy - optical centers)")
250 | print("[fx, 0, cx]\n[0, fy, cy]\n[0,  0,  1]")
251 | np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})
252 | print(K)
253 | print()
254 | print("Intrinsic Distortion Co-effients, D - from intrinsic calibration:")
255 | print("(k1, k2, k3 - radial p1, p2 - tangential - distortion coefficients)")
256 | print("[k1, k2, p1, p2, k3]")
257 | np.set_printoptions(formatter={'float': lambda x: "{0:0.5f}".format(x)})
258 | print(D)
259 | print()
260 | print("Image resolution used (width, height): ", np.flip(frame.shape[:2]))
261 | 
262 | #####################################################################
263 | 
264 | # perform undistortion (i.e. calibration) of the images
265 | 
266 | keep_processing = True
267 | 
268 | print()
269 | print("-> performing undistortion")
270 | print("press x : to exit")
271 | 
272 | while (keep_processing):
273 | 
274 |     # grab frames from camera
275 | 
276 |     ret, frame = cap.read()
277 | 
278 |     # undistort image using camera matrix K and distortion coefficients D
279 | 
280 |     undistorted = cv2.undistort(frame, K, D, None, None)
281 | 
282 |     # display both images
283 | 
284 |     cv2.imshow(window_name, frame)
285 |     cv2.imshow(window_nameU, undistorted)
286 | 
287 |     # start the event loop - essential
288 | 
289 |     key = cv2.waitKey(40) & 0xFF  # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
290 | 
291 |     if (key == ord('x')):
292 |         keep_processing = False
293 | 
294 | #####################################################################
295 | 
296 | # close all windows and cams.
297 | 
298 | cv2.destroyAllWindows()
299 | 
300 | #####################################################################
301 | 


--------------------------------------------------------------------------------
/canny.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example :  canny edge detection for a a video file
  4 | # specified on the command line (e.g. python FILE.py video_file) or from an
  5 | # attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2016 School of Engineering & Computing Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | #####################################################################
 14 | 
 15 | import cv2
 16 | import argparse
 17 | import sys
 18 | 
 19 | #####################################################################
 20 | 
 21 | keep_processing = True
 22 | 
 23 | # parse command line arguments for camera ID or video file
 24 | 
 25 | parser = argparse.ArgumentParser(
 26 |     description='Perform ' +
 27 |     sys.argv[0] +
 28 |     ' example operation on incoming camera/video image')
 29 | parser.add_argument(
 30 |     "-c",
 31 |     "--camera_to_use",
 32 |     type=int,
 33 |     help="specify camera to use",
 34 |     default=0)
 35 | parser.add_argument(
 36 |     "-r",
 37 |     "--rescale",
 38 |     type=float,
 39 |     help="rescale image by this factor",
 40 |     default=1.0)
 41 | parser.add_argument(
 42 |     "-s",
 43 |     "--set_resolution",
 44 |     type=int,
 45 |     nargs=2,
 46 |     help='override default camera resolution as H W')
 47 | parser.add_argument(
 48 |     "-fs",
 49 |     "--fullscreen",
 50 |     action='store_true',
 51 |     help="run in full screen mode")
 52 | parser.add_argument(
 53 |     "-nc",
 54 |     "--nocontrols",
 55 |     action='store_true',
 56 |     help="no onscreen controls")
 57 | parser.add_argument(
 58 |     'video_file',
 59 |     metavar='video_file',
 60 |     type=str,
 61 |     nargs='?',
 62 |     help='specify optional video file')
 63 | args = parser.parse_args()
 64 | 
 65 | #####################################################################
 66 | 
 67 | # this function is called as a call-back everytime the trackbar is moved
 68 | # (here we just do nothing)
 69 | 
 70 | 
 71 | def nothing(x):
 72 |     pass
 73 | 
 74 | 
 75 | #####################################################################
 76 | 
 77 | # define video capture object
 78 | 
 79 | try:
 80 |     # to use a non-buffered camera stream (via a separate thread)
 81 | 
 82 |     if not (args.video_file):
 83 |         import camera_stream
 84 |         cap = camera_stream.CameraVideoStream(use_tapi=True)
 85 |     else:
 86 |         cap = cv2.VideoCapture()  # not needed for video files
 87 | 
 88 | except BaseException:
 89 |     # if not then just use OpenCV default
 90 | 
 91 |     print("INFO: camera_stream class not found - camera input may be buffered")
 92 |     cap = cv2.VideoCapture()
 93 | 
 94 | # define display window name
 95 | 
 96 | window_name = "Live Camera Input"  # window name
 97 | window_name2 = "Canny Edges"  # window name
 98 | 
 99 | # if command line arguments are provided try to read video_name
100 | # otherwise default to capture from attached H/W camera
101 | 
102 | if (((args.video_file) and (cap.open(str(args.video_file))))
103 |         or (cap.open(args.camera_to_use))):
104 | 
105 |     # create window by name (as resizable)
106 | 
107 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
108 |     cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL)
109 |     cv2.setWindowProperty(window_name2, cv2.WND_PROP_FULLSCREEN,
110 |                           cv2.WINDOW_FULLSCREEN & args.fullscreen)
111 | 
112 |     # add some track bar controllers for settings
113 | 
114 |     lower_threshold = 25
115 |     upper_threshold = 120
116 |     smoothing_neighbourhood = 3
117 |     sobel_size = 3  # greater than 7 seems to crash
118 | 
119 |     if (not (args.nocontrols)):
120 |         cv2.createTrackbar("lower", window_name2, lower_threshold,
121 |                            255, nothing)
122 |         cv2.createTrackbar("upper", window_name2, upper_threshold,
123 |                            255, nothing)
124 |         cv2.createTrackbar("smoothing", window_name2, smoothing_neighbourhood,
125 |                            15, nothing)
126 |         cv2.createTrackbar("sobel size", window_name2, sobel_size,
127 |                            7, nothing)
128 | 
129 |     # override default camera resolution
130 | 
131 |     if (args.set_resolution is not None):
132 |         cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
133 |         cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
134 | 
135 |     print("INFO: input resolution : (",
136 |           int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
137 |           int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
138 | 
139 |     while (keep_processing):
140 | 
141 |         # if video file successfully open then read frame from video
142 | 
143 |         if (cap.isOpened):
144 |             ret, frame = cap.read()  # rescale if specified
145 | 
146 |             # when we reach the end of the video (file) exit cleanly
147 | 
148 |             if (ret == 0):
149 |                 keep_processing = False
150 |                 continue
151 | 
152 |             # rescale if specified
153 | 
154 |             if (args.rescale != 1.0):
155 |                 frame = cv2.resize(
156 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
157 | 
158 |         # get parameters from track bars
159 | 
160 |         if (not (args.nocontrols)):
161 |             lower_threshold = cv2.getTrackbarPos("lower", window_name2)
162 |             upper_threshold = cv2.getTrackbarPos("upper", window_name2)
163 |             smoothing_neighbourhood = cv2.getTrackbarPos("smoothing",
164 |                                                          window_name2)
165 |             sobel_size = cv2.getTrackbarPos("sobel size", window_name2)
166 | 
167 |         # check neighbourhood is greater than 3 and odd
168 | 
169 |         smoothing_neighbourhood = max(3, smoothing_neighbourhood)
170 |         if not (smoothing_neighbourhood % 2):
171 |             smoothing_neighbourhood = smoothing_neighbourhood + 1
172 | 
173 |         sobel_size = max(3, sobel_size)
174 |         if not (sobel_size % 2):
175 |             sobel_size = sobel_size + 1
176 | 
177 |         # convert to grayscale
178 | 
179 |         gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
180 | 
181 |         # performing smoothing on the image using a 5x5 smoothing mark (see
182 |         # manual entry for GaussianBlur())
183 | 
184 |         smoothed = cv2.GaussianBlur(
185 |             gray_frame, (smoothing_neighbourhood, smoothing_neighbourhood), 0)
186 | 
187 |         # perform canny edge detection
188 | 
189 |         canny = cv2.Canny(
190 |             smoothed,
191 |             lower_threshold,
192 |             upper_threshold,
193 |             apertureSize=sobel_size)
194 | 
195 |         # display image
196 | 
197 |         cv2.imshow(window_name, frame)
198 |         cv2.imshow(window_name2, canny)
199 | 
200 |         # start the event loop - essential
201 | 
202 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
203 |         # milliseconds). It waits for specified milliseconds for any keyboard
204 |         # event. If you press any key in that time, the program continues.
205 |         # If 0 is passed, it waits indefinitely for a key stroke.
206 |         # (bitwise and with 0xFF to extract least significant byte of
207 |         # multi-byte response)
208 | 
209 |         # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
210 |         key = cv2.waitKey(40) & 0xFF
211 | 
212 |         # It can also be set to detect specific key strokes by recording which
213 |         # key is pressed
214 | 
215 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
216 |         # display
217 | 
218 |         if (key == ord('x')):
219 |             keep_processing = False
220 |         elif (key == ord('f')):
221 |             cv2.setWindowProperty(
222 |                 window_name2,
223 |                 cv2.WND_PROP_FULLSCREEN,
224 |                 cv2.WINDOW_FULLSCREEN)
225 | 
226 |     # close all windows
227 | 
228 |     cv2.destroyAllWindows()
229 | 
230 | else:
231 |     print("No video file specified or camera connected.")
232 | 
233 | #####################################################################
234 | 


--------------------------------------------------------------------------------
/chromaticity_lightness.py:
--------------------------------------------------------------------------------
  1 | ##########################################################################
  2 | 
  3 | # Example : perform live chromaticity/lightness display from a video file
  4 | # specified on the command line (e.g. python FILE.py video_file) or from an
  5 | # attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2018 Toby Breckon, Engineering & Computer Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | ##########################################################################
 14 | 
 15 | import cv2
 16 | import argparse
 17 | import sys
 18 | import math
 19 | import numpy as np
 20 | 
 21 | ##########################################################################
 22 | 
 23 | keep_processing = True
 24 | 
 25 | # parse command line arguments for camera ID or video file
 26 | 
 27 | parser = argparse.ArgumentParser(
 28 |     description='Perform ' +
 29 |     sys.argv[0] +
 30 |     ' example operation on incoming camera/video image')
 31 | parser.add_argument(
 32 |     "-c",
 33 |     "--camera_to_use",
 34 |     type=int,
 35 |     help="specify camera to use",
 36 |     default=0)
 37 | parser.add_argument(
 38 |     "-r",
 39 |     "--rescale",
 40 |     type=float,
 41 |     help="rescale image by this factor",
 42 |     default=1.0)
 43 | parser.add_argument(
 44 |     "-s",
 45 |     "--set_resolution",
 46 |     type=int,
 47 |     nargs=2,
 48 |     help='override default camera resolution as H W')
 49 | parser.add_argument(
 50 |     "-fs",
 51 |     "--fullscreen",
 52 |     action='store_true',
 53 |     help="run in full screen mode")
 54 | parser.add_argument(
 55 |     'video_file',
 56 |     metavar='video_file',
 57 |     type=str,
 58 |     nargs='?',
 59 |     help='specify optional video file')
 60 | args = parser.parse_args()
 61 | 
 62 | ##########################################################################
 63 | 
 64 | # concatenate two RGB/grayscale images horizontally (left to right)
 65 | # handling differing channel numbers or image heights in the input
 66 | 
 67 | 
 68 | def h_concatenate(img1, img2):
 69 | 
 70 |     # get size and channels for both images
 71 | 
 72 |     height1 = img1.shape[0]
 73 | 
 74 |     if (len(img1.shape) == 2):
 75 |         channels1 = 1
 76 |     else:
 77 |         channels1 = img1.shape[2]
 78 | 
 79 |     height2 = img2.shape[0]
 80 |     width2 = img2.shape[1]
 81 |     if (len(img2.shape) == 2):
 82 |         channels2 = 1
 83 |     else:
 84 |         channels2 = img2.shape[2]
 85 | 
 86 |     # make all images 3 channel, or assume all same channel
 87 | 
 88 |     if ((channels1 > channels2) and (channels1 == 3)):
 89 |         out2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
 90 |         out1 = img1
 91 |     elif ((channels2 > channels1) and (channels2 == 3)):
 92 |         out1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR)
 93 |         out2 = img2
 94 |     else:  # both must be equal
 95 |         out1 = img1
 96 |         out2 = img2
 97 | 
 98 |     # height of first image is master height, width can remain unchanged
 99 | 
100 |     if (height1 != height2):
101 |         out2 = cv2.resize(out2, (width2, height1))
102 | 
103 |     return np.hstack((out1, out2))
104 | 
105 | ##########################################################################
106 | 
107 | # define video capture object
108 | 
109 | 
110 | try:
111 |     # to use a non-buffered camera stream (via a separate thread)
112 | 
113 |     if not (args.video_file):
114 |         import camera_stream
115 |         cap = camera_stream.CameraVideoStream()
116 |     else:
117 |         cap = cv2.VideoCapture()  # not needed for video files
118 | 
119 | except BaseException:
120 |     # if not then just use OpenCV default
121 | 
122 |     print("INFO: camera_stream class not found - camera input may be buffered")
123 |     cap = cv2.VideoCapture()
124 | 
125 | # define display window name
126 | 
127 | window_name = "Live - [Original RGB | Chromaticity {r,g,b} | Lightness (l)]"
128 | 
129 | # if command line arguments are provided try to read video_name
130 | # otherwise default to capture from attached camera
131 | 
132 | if (((args.video_file) and (cap.open(str(args.video_file))))
133 |         or (cap.open(args.camera_to_use))):
134 | 
135 |     # create window by name (as resizable)
136 | 
137 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
138 | 
139 |     # override default camera resolution
140 | 
141 |     if (args.set_resolution is not None):
142 |         cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
143 |         cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
144 | 
145 |     while (keep_processing):
146 | 
147 |         # start a timer (to see how long processing and display takes)
148 | 
149 |         start_t = cv2.getTickCount()
150 | 
151 |         # if camera /video file successfully open then read frame
152 | 
153 |         if (cap.isOpened):
154 |             ret, frame = cap.read()
155 | 
156 |             # when we reach the end of the video (file) exit cleanly
157 | 
158 |             if (ret == 0):
159 |                 keep_processing = False
160 |                 continue
161 | 
162 |             # rescale if specified
163 | 
164 |             if (args.rescale != 1.0):
165 |                 frame = cv2.resize(
166 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
167 | 
168 |         # compute chromaticity as  c = c / SUM(RGB) for c = {R, G, B} with
169 |         # safety for divide by zero errors
170 |         # chromaticity {r,g,b} range is floating point 0 -> 1
171 | 
172 |         # N.B. if extracting chromaticity {r,g} from this remember to
173 |         # take channels r = 2 and g = 1 due to OpenCV BGR channel ordering
174 | 
175 |         chromaticity = np.zeros(frame.shape).astype(np.float32)
176 |         sum_channel = (frame[:, :, 0].astype(np.float32)
177 |                        + frame[:, :, 1].astype(np.float32)
178 |                        + frame[:, :, 2].astype(np.float32)
179 |                        + 1)
180 |         chromaticity[:, :, 0] = (frame[:, :, 0] / sum_channel)
181 |         chromaticity[:, :, 1] = (frame[:, :, 1] / sum_channel)
182 |         chromaticity[:, :, 2] = (frame[:, :, 2] / sum_channel)
183 | 
184 |         # compute lightness as an integer = RGB / 3 (range is 0 -> 255)
185 | 
186 |         lightness = np.floor(sum_channel / 3)
187 | 
188 |         # display image as a concatenated triple of [ RGB | Chromaticity |
189 |         # Lightness ] adjusting back to 8-bit and scaling appropriately
190 | 
191 |         cv2.imshow(
192 |             window_name,
193 |             h_concatenate(
194 |                 h_concatenate(
195 |                     frame,
196 |                     (chromaticity *
197 |                      255).astype(
198 |                         np.uint8)),
199 |                 lightness.astype(
200 |                     np.uint8)))
201 |         cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
202 |                               cv2.WINDOW_FULLSCREEN & args.fullscreen)
203 | 
204 |         # stop the timer and convert to ms. (to see how long processing and
205 |         # display takes)
206 | 
207 |         stop_t = ((cv2.getTickCount() - start_t) /
208 |                   cv2.getTickFrequency()) * 1000
209 | 
210 |         # start the event loop - essential
211 | 
212 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
213 |         # ms). It waits for specified milliseconds for any keyboard event.
214 |         # If you press any key in that time, the program continues.
215 |         # If 0 is passed, it waits indefinitely for a key stroke.
216 |         # (bitwise and with 0xFF to extract least significant byte of
217 |         # multi-byte response)
218 | 
219 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
220 |         # 25 fps = 40 ms)
221 | 
222 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
223 | 
224 |         # It can also be set to detect specific key strokes by recording which
225 |         # key is pressed
226 | 
227 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
228 |         # display
229 | 
230 |         if (key == ord('x')):
231 |             keep_processing = False
232 |         elif (key == ord('f')):
233 |             args.fullscreen = not (args.fullscreen)
234 | 
235 |     # close all windows
236 | 
237 |     cv2.destroyAllWindows()
238 | 
239 | else:
240 |     print("No video file specified or camera connected.")
241 | 
242 | ##########################################################################
243 | 


--------------------------------------------------------------------------------
/cnn_ssd_detection.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example : perform live object detectoon using a pre-trained SSD CNN model
  4 | # and display from a video file specified on the command line
  5 | # (e.g. python FILE.py video_file) or from an attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2017 Department of Computer Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | # based on provided examples at:
 14 | # https://github.com/opencv/opencv/tree/master/samples/dnn
 15 | # see here for how to load Caffe/TensorFlow/... models etc.
 16 | 
 17 | # implements a version of:
 18 | 
 19 | # MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Apps.
 20 | # Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang,
 21 | # Tobias Weyand, Marco Andreetto, Hartwig Adam
 22 | # research paper: https://arxiv.org/abs/1704.04861
 23 | 
 24 | # requires Caffe network model files (.prototxt / .caffemodel) downloaded from:
 25 | # https://github.com/chuanqi305/MobileNet-SSD/
 26 | 
 27 | #####################################################################
 28 | 
 29 | import cv2
 30 | import argparse
 31 | import sys
 32 | import math
 33 | 
 34 | #####################################################################
 35 | 
 36 | keep_processing = True
 37 | 
 38 | # parse command line arguments for camera ID or video file
 39 | 
 40 | parser = argparse.ArgumentParser(
 41 |     description='Perform ' +
 42 |     sys.argv[0] +
 43 |     ' example operation on incoming camera/video image')
 44 | parser.add_argument(
 45 |     "-c",
 46 |     "--camera_to_use",
 47 |     type=int,
 48 |     help="specify camera to use",
 49 |     default=0)
 50 | parser.add_argument(
 51 |     "-r",
 52 |     "--rescale",
 53 |     type=float,
 54 |     help="rescale image by this factor",
 55 |     default=1.0)
 56 | parser.add_argument(
 57 |     'video_file',
 58 |     metavar='video_file',
 59 |     type=str,
 60 |     nargs='?',
 61 |     help='specify optional video file')
 62 | args = parser.parse_args()
 63 | 
 64 | cnn_model_to_load = "MobileNetSSD_deploy"
 65 | 
 66 | #####################################################################
 67 | 
 68 | 
 69 | def trackbar_callback(pos):
 70 |     global confidence_threshold
 71 |     confidence_threshold = pos / 100.0
 72 | 
 73 | #####################################################################
 74 | 
 75 | # define video capture object
 76 | 
 77 | 
 78 | try:
 79 |     # to use a non-buffered camera stream (via a separate thread)
 80 | 
 81 |     if not (args.video_file):
 82 |         import camera_stream
 83 |         cap = camera_stream.CameraVideoStream()
 84 |     else:
 85 |         cap = cv2.VideoCapture()  # not needed for video files
 86 | 
 87 | except BaseException:
 88 |     # if not then just use OpenCV default
 89 | 
 90 |     print("INFO: camera_stream class not found - camera input may be buffered")
 91 |     cap = cv2.VideoCapture()
 92 | 
 93 | # define display window name
 94 | 
 95 | window_name = "Live Object Detection - CNN: " + cnn_model_to_load
 96 | 
 97 | # if command line arguments are provided try to read video_name
 98 | # otherwise default to capture from attached camera
 99 | 
100 | if (((args.video_file) and (cap.open(str(args.video_file))))
101 |         or (cap.open(args.camera_to_use))):
102 | 
103 |     # create window by name (as resizable)
104 | 
105 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
106 | 
107 |     # add track bar to window for confidence threshold
108 | 
109 |     confidence_threshold = 0.7
110 |     cv2.createTrackbar('Confidence threshold, %', window_name, int(
111 |         confidence_threshold * 100), 99, trackbar_callback)
112 | 
113 |     # init CNN model - here from Caffe, although OpenCV can import from
114 |     # mosyt deep learning templates
115 | 
116 |     net = cv2.dnn.readNetFromCaffe(
117 |         cnn_model_to_load + ".prototxt",
118 |         cnn_model_to_load + ".caffemodel")
119 | 
120 | # provide mappings from class numbers to string labels - these are the
121 | # PASCAL VOC classees
122 | 
123 |     classNames = {0: 'background',
124 |                   1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat',
125 |                   5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat', 9: 'chair',
126 |                   10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse',
127 |                   14: 'motorbike', 15: 'person', 16: 'pottedplant',
128 |                   17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor'}
129 | 
130 |     while (keep_processing):
131 | 
132 |         # start a timer (to see how long processing and display takes)
133 | 
134 |         start_t = cv2.getTickCount()
135 | 
136 |         # if video file successfully open then read frame from video
137 | 
138 |         if (cap.isOpened):
139 |             ret, frame = cap.read()
140 | 
141 |             # when we reach the end of the video (file) exit cleanly
142 | 
143 |             if (ret == 0):
144 |                 keep_processing = False
145 |                 continue
146 | 
147 |             # rescale if specified
148 | 
149 |             if (args.rescale != 1.0):
150 |                 frame = cv2.resize(
151 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
152 | 
153 |         # get size of input
154 | 
155 |         cols = frame.shape[1]
156 |         rows = frame.shape[0]
157 | 
158 |         # transform the image into a network input "blob" (i.e. tensor)
159 |         # by scaling the image to the input size of the network, in this case
160 |         # not swapping the R and G channels (i.e. used when network trained on
161 |         # RGB and not the BGR of OpenCV) and re-scaling the inputs from 0->255
162 |         # to 0->1 by specifing the mean value for each channel
163 | 
164 |         swapRBchannels = False             # do not swap channels
165 |         crop = False                       # crop image or not
166 |         meanChannelVal = 255.0 / 2.0       # mean channel value
167 | 
168 |         inWidth = 300                      # network input width
169 |         inHeight = 300                     # network input height
170 |         inScaleFactor = 0.007843           # input scale factor
171 | 
172 |         blob = cv2.dnn.blobFromImage(
173 |             frame,
174 |             inScaleFactor,
175 |             (inWidth,
176 |              inHeight),
177 |             (meanChannelVal,
178 |              meanChannelVal,
179 |              meanChannelVal),
180 |             swapRBchannels,
181 |             crop)
182 | 
183 |         # set this transformed image -> tensor blob as the network input
184 | 
185 |         net.setInput(blob)
186 | 
187 |         # perform forward inference on the network
188 | 
189 |         detections = net.forward()
190 | 
191 |         # process the detections from the CNN to give bounding boxes
192 |         # i.e. for each detection returned from the network
193 | 
194 |         for i in range(detections.shape[2]):
195 | 
196 |             # extract the confidence of the detection
197 | 
198 |             confidence = detections[0, 0, i, 2]
199 | 
200 |             # provided that is above a threshold
201 | 
202 |             if confidence > confidence_threshold:
203 | 
204 |                 # get the class number id and the bounding box
205 | 
206 |                 class_id = int(detections[0, 0, i, 1])
207 | 
208 |                 xLeftBottom = int(detections[0, 0, i, 3] * cols)
209 |                 yLeftBottom = int(detections[0, 0, i, 4] * rows)
210 |                 xRightTop = int(detections[0, 0, i, 5] * cols)
211 |                 yRightTop = int(detections[0, 0, i, 6] * rows)
212 | 
213 |                 # draw the bounding box on the frame
214 | 
215 |                 cv2.rectangle(frame, (xLeftBottom, yLeftBottom),
216 |                               (xRightTop, yRightTop), (0, 255, 0))
217 | 
218 |                 # look up the class name based on the class id and draw it on
219 |                 # the frame also
220 | 
221 |                 if class_id in classNames:
222 |                     label = classNames[class_id] + (": %.2f" % confidence)
223 |                     labelSize, baseLine = cv2.getTextSize(
224 |                         label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
225 | 
226 |                     yLeftBottom = max(yLeftBottom, labelSize[1])
227 |                     cv2.rectangle(
228 |                                     frame, (
229 |                                                 xLeftBottom, yLeftBottom -
230 |                                                 labelSize[1]
231 |                                             ), (
232 |                                                 xLeftBottom + labelSize[0],
233 |                                                 yLeftBottom + baseLine
234 |                                             ), (255, 255, 255),
235 |                                     cv2.FILLED
236 |                     )
237 |                     cv2.putText(frame, label, (xLeftBottom, yLeftBottom),
238 |                                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
239 | 
240 |         # Display efficiency information - the function getPerfProfile returns
241 |         # the overall time for inference from the network
242 | 
243 |         t, _ = net.getPerfProfile()
244 |         inference_t = (t * 1000.0 / cv2.getTickFrequency())
245 |         label = ('Inference time: %.2f ms' % inference_t) + \
246 |             (' (Framerate: %.2f fps' % (1000 / inference_t)) + ')'
247 |         cv2.putText(frame, label, (0, 15),
248 |                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
249 | 
250 |         # display image
251 | 
252 |         cv2.imshow(window_name, frame)
253 | 
254 |         # stop the timer and convert to ms. (to see how long processing and
255 |         # display takes)
256 | 
257 |         stop_t = ((cv2.getTickCount() - start_t) /
258 |                   cv2.getTickFrequency()) * 1000
259 | 
260 |         # start the event loop - essential
261 | 
262 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
263 |         # ms). It waits for specified milliseconds for any keyboard event.
264 |         # If you press any key in that time, the program continues.
265 |         # If 0 is passed, it waits indefinitely for a key stroke.
266 |         # (bitwise and with 0xFF to extract least significant byte of
267 |         # multi-byte response)
268 | 
269 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
270 |         # 25 fps = 40 ms)
271 | 
272 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
273 | 
274 |         # It can also be set to detect specific key strokes by recording which
275 |         # key is pressed
276 | 
277 |         # e.g. if user presses "x" then exit / press "f" for fullscreen
278 | 
279 |         if (key == ord('x')):
280 |             keep_processing = False
281 |         elif (key == ord('f')):
282 |             cv2.setWindowProperty(
283 |                 window_name,
284 |                 cv2.WND_PROP_FULLSCREEN,
285 |                 cv2.WINDOW_FULLSCREEN)
286 | 
287 |     # close all windows
288 | 
289 |     cv2.destroyAllWindows()
290 | 
291 | else:
292 |     print("No video file specified or camera connected.")
293 | 
294 | #####################################################################
295 | 


--------------------------------------------------------------------------------
/contour_edges.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example :  contour edges for a video file
  4 | # specified on the command line (e.g. python FILE.py video_file) or from an
  5 | # attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2016 School of Engineering & Computing Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | #####################################################################
 14 | 
 15 | import cv2
 16 | import argparse
 17 | import sys
 18 | 
 19 | #####################################################################
 20 | 
 21 | keep_processing = True
 22 | 
 23 | # parse command line arguments for camera ID or video file
 24 | 
 25 | parser = argparse.ArgumentParser(
 26 |     description='Perform ' +
 27 |     sys.argv[0] +
 28 |     ' example operation on incoming camera/video image')
 29 | parser.add_argument(
 30 |     "-c",
 31 |     "--camera_to_use",
 32 |     type=int,
 33 |     help="specify camera to use",
 34 |     default=0)
 35 | parser.add_argument(
 36 |     "-r",
 37 |     "--rescale",
 38 |     type=float,
 39 |     help="rescale image by this factor",
 40 |     default=1.0)
 41 | parser.add_argument(
 42 |     "-s",
 43 |     "--set_resolution",
 44 |     type=int,
 45 |     nargs=2,
 46 |     help='override default camera resolution as H W')
 47 | parser.add_argument(
 48 |     'video_file',
 49 |     metavar='video_file',
 50 |     type=str,
 51 |     nargs='?',
 52 |     help='specify optional video file')
 53 | args = parser.parse_args()
 54 | 
 55 | #####################################################################
 56 | 
 57 | # this function is called as a call-back everytime the trackbar is moved
 58 | # (here we just do nothing)
 59 | 
 60 | 
 61 | def nothing(x):
 62 |     pass
 63 | 
 64 | #####################################################################
 65 | 
 66 | # define video capture object
 67 | 
 68 | 
 69 | try:
 70 |     # to use a non-buffered camera stream (via a separate thread)
 71 | 
 72 |     if not (args.video_file):
 73 |         import camera_stream
 74 |         cap = camera_stream.CameraVideoStream(use_tapi=True)
 75 |     else:
 76 |         cap = cv2.VideoCapture()  # not needed for video files
 77 | 
 78 | except BaseException:
 79 |     # if not then just use OpenCV default
 80 | 
 81 |     print("INFO: camera_stream class not found - camera input may be buffered")
 82 |     cap = cv2.VideoCapture()
 83 | 
 84 | # define display window name
 85 | 
 86 | window_name = "Largest Area Contour"  # window name
 87 | window_name2 = "All Contours"  # window name
 88 | 
 89 | # if command line arguments are provided try to read video_name
 90 | # otherwise default to capture from attached H/W camera
 91 | 
 92 | if (((args.video_file) and (cap.open(str(args.video_file))))
 93 |         or (cap.open(args.camera_to_use))):
 94 | 
 95 |     # create window by name (as resizable)
 96 | 
 97 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
 98 |     cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL)
 99 | 
100 |     # add some track bar controllers for settings
101 | 
102 |     lower_threshold = 112
103 |     cv2.createTrackbar("lower", window_name2, lower_threshold, 255, nothing)
104 |     upper_threshold = 170
105 |     cv2.createTrackbar("upper", window_name2, upper_threshold, 255, nothing)
106 |     smoothing_neighbourhood = 3
107 |     cv2.createTrackbar(
108 |         "smoothing",
109 |         window_name2,
110 |         smoothing_neighbourhood,
111 |         15,
112 |         nothing)
113 |     sobel_size = 3  # greater than 7 seems to crash
114 |     cv2.createTrackbar("sobel size", window_name2, sobel_size, 7, nothing)
115 | 
116 |     # override default camera resolution
117 | 
118 |     if (args.set_resolution is not None):
119 |         cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
120 |         cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
121 | 
122 |     print("INFO: input resolution : (",
123 |           int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
124 |           int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
125 | 
126 |     while (keep_processing):
127 | 
128 |         # if video file successfully open then read frame from video
129 | 
130 |         if (cap.isOpened):
131 |             ret, frame = cap.read()
132 | 
133 |             # when we reach the end of the video (file) exit cleanly
134 | 
135 |             if (ret == 0):
136 |                 keep_processing = False
137 |                 continue
138 | 
139 |             # rescale if specified
140 | 
141 |             if (args.rescale != 1.0):
142 |                 frame = cv2.resize(
143 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
144 | 
145 |         # get parameters from track bars
146 | 
147 |         lower_threshold = cv2.getTrackbarPos("lower", window_name2)
148 |         upper_threshold = cv2.getTrackbarPos("upper", window_name2)
149 |         smoothing_neighbourhood = cv2.getTrackbarPos("smoothing", window_name2)
150 |         sobel_size = cv2.getTrackbarPos("sobel size", window_name2)
151 | 
152 |         # check neighbourhood is greater than 3 and odd
153 | 
154 |         smoothing_neighbourhood = max(3, smoothing_neighbourhood)
155 |         if not (smoothing_neighbourhood % 2):
156 |             smoothing_neighbourhood = smoothing_neighbourhood + 1
157 | 
158 |         sobel_size = max(3, sobel_size)
159 |         if not (sobel_size % 2):
160 |             sobel_size = sobel_size + 1
161 | 
162 |         # convert to grayscale
163 | 
164 |         gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
165 | 
166 |         # performing smoothing on the image using a 5x5 smoothing mark (see
167 |         # manual entry for GaussianBlur())
168 | 
169 |         smoothed = cv2.GaussianBlur(
170 |             gray_frame, (smoothing_neighbourhood, smoothing_neighbourhood), 0)
171 | 
172 |         # perform canny edge detection
173 | 
174 |         canny = cv2.Canny(
175 |             smoothed,
176 |             lower_threshold,
177 |             upper_threshold,
178 |             apertureSize=sobel_size)
179 | 
180 |         # convert the canny edges into contours (check OpenCV version >= 4.x)
181 | 
182 |         if (int(cv2.__version__.split(".")[0]) >= 4):
183 |             contours, hierarchy = cv2.findContours(
184 |                 canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
185 |         else:
186 |             _, contours, hierarchy = cv2.findContours(
187 |                 canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
188 | 
189 |         # find largest contour by area
190 | 
191 |         max_contour_area = -1
192 |         for cnt in contours:
193 |             area = cv2.contourArea(cnt)
194 |             if (area > max_contour_area):
195 |                 max_contour_area = area
196 |                 largest_contour = cnt
197 | 
198 |         # draw contours (one display for all of them, one for the largest only)
199 | 
200 |         # make 3 channel to draw on it in colour
201 |         gray_frame = cv2.cvtColor(gray_frame, cv2.COLOR_GRAY2BGR)
202 |         cv2.drawContours(gray_frame, contours, -1, (0, 255, 0), 3)   # in green
203 |         cv2.drawContours(frame, [largest_contour], 0, (0, 0, 255), 3)  # in red
204 | 
205 |         # display image
206 | 
207 |         cv2.imshow(window_name, frame)
208 |         cv2.imshow(window_name2, gray_frame)
209 | 
210 |         # start the event loop - essential
211 | 
212 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
213 |         # ms). It waits for specified milliseconds for any keyboard event.
214 |         # If you press any key in that time, the program continues.
215 |         # If 0 is passed, it waits indefinitely for a key stroke.
216 |         # (bitwise and with 0xFF to extract least significant byte of
217 |         # multi-byte response)
218 | 
219 |         # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
220 |         key = cv2.waitKey(40) & 0xFF
221 | 
222 |         # It can also be set to detect specific key strokes by recording which
223 |         # key is pressed
224 | 
225 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
226 |         # display
227 | 
228 |         if (key == ord('x')):
229 |             keep_processing = False
230 |         elif (key == ord('f')):
231 |             cv2.setWindowProperty(
232 |                 window_name,
233 |                 cv2.WND_PROP_FULLSCREEN,
234 |                 cv2.WINDOW_FULLSCREEN)
235 | 
236 |     # close all windows
237 | 
238 |     cv2.destroyAllWindows()
239 | 
240 | else:
241 |     print("No video file specified or camera connected.")
242 | 
243 | #####################################################################
244 | 


--------------------------------------------------------------------------------
/cycleimages.py:
--------------------------------------------------------------------------------
 1 | #####################################################################
 2 | 
 3 | # Example : load and display a set of images from a directory
 4 | # basic illustrative python script
 5 | 
 6 | # For use with provided test / training datasets
 7 | 
 8 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
 9 | 
10 | # Copyright (c) 2015 / 2016 School of Engineering & Computing Science,
11 | #                    Durham University, UK
12 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
13 | 
14 | #####################################################################
15 | 
16 | import cv2
17 | import os
18 | 
19 | directory_to_cycle = "path-to-directory-to-cycle"  # edit this
20 | 
21 | #####################################################################
22 | 
23 | # display all images in directory (sorted by filename)
24 | 
25 | for filename in sorted(os.listdir(directory_to_cycle)):
26 | 
27 |     # if it is a PNG file
28 | 
29 |     if '.png' in filename:
30 |         print(os.path.join(directory_to_cycle, filename))
31 | 
32 |         # read it and display in a window
33 | 
34 |         img = cv2.imread(
35 |             os.path.join(
36 |                 directory_to_cycle,
37 |                 filename),
38 |             cv2.IMREAD_COLOR)
39 |         cv2.imshow('the image', img)
40 |         key = cv2.waitKey(200)  # wait 200ms
41 |         if (key == ord('x')):
42 |             break
43 | 
44 | 
45 | # close all windows
46 | 
47 | cv2.destroyAllWindows()
48 | 
49 | #####################################################################
50 | 


--------------------------------------------------------------------------------
/download-models.sh:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | 
  3 | # multi model file downloader - (c) 2021 Toby Breckon, Durham University, UK
  4 | 
  5 | ################################################################################
  6 | 
  7 | # models and associated files for automated download
  8 | 
  9 | MODELS=(  https://data.pjreddie.com/files/yolov3.weights
 10 |           https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3.cfg
 11 |           https://raw.githubusercontent.com/AlexeyAB/darknet/master/data/coco.names
 12 |           https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt
 13 |           https://github.com/forresti/SqueezeNet/raw/master/SqueezeNet_v1.1/squeezenet_v1.1.caffemodel
 14 |           https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/squeezenet_v1.1.prototxt
 15 |           https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_coco.txt
 16 |           https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt
 17 |           http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz
 18 |           https://github.com/foss-for-synopsys-dwc-arc-processors/synopsys-caffe-models/raw/master/caffe_models/openpose/caffe_model/pose_iter_440000.caffemodel
 19 |           https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/openpose/master/models/pose/coco/pose_deploy_linevec.prototxt
 20 |           https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml
 21 |           https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_eye.xml
 22 |           https://raw.githubusercontent.com/opencv/opencv/master/data/lbpcascades/lbpcascade_frontalface_improved.xml
 23 |           http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz
 24 |           https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt
 25 |           http://dl.caffe.berkeleyvision.org/fcn8s-heavy-pascal.caffemodel
 26 |           https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/fcn8s-heavy-pascal.prototxt
 27 |           https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_pascal_voc.txt
 28 |           https://raw.githubusercontent.com/PINTO0309/MobileNet-SSD-RealSense/master/caffemodel/MobileNetSSD/MobileNetSSD_deploy.caffemodel
 29 |           https://raw.githubusercontent.com/chuanqi305/MobileNet-SSD/master/voc/MobileNetSSD_deploy.prototxt
 30 |         )
 31 | 
 32 | # associated MD5 checksums (output of md5sum filename)
 33 | 
 34 | MD5SUMS=( "4fdfb6d202e9d8e65da14c78b604af95  classification_classes_ILSVRC2012.txt"
 35 |           "8fc50561361f8bcf96b0177086e7616c  coco.names"
 36 |           "81d7d9cb3438456214afcdb5c83e7bfb  object_detection_classes_coco.txt"
 37 |           "c9e6e28e5b84b7b49c436f929b58db91  pose_deploy_linevec.prototxt"
 38 |           "5156d31f670511fce9b4e28b403f2939  pose_iter_440000.caffemodel"
 39 |           "0357e4e11d173c72a01615888826bc8e  squeezenet_v1.1.caffemodel"
 40 |           "dfe9c8d69b154f0ebbba87bc32371e2d  squeezenet_v1.1.prototxt"
 41 |           "5d442b0e550e6c640068e7e15e498599  yolov3.cfg"
 42 |           "c84e5b99d0e52cd466ae710cadf6d84c  yolov3.weights"
 43 |           "1f1902262c16c2d9acb9bc4f8a8c266f  faster_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb"
 44 |           "2d6fac0caaec1f9558872755ff34818d  haarcascade_eye.xml"
 45 |           "a03f92a797e309e76e6a034ab9e02616  haarcascade_frontalface_default.xml"
 46 |           "acee557d79a3684cac72ebd811a4eee0  lbpcascade_frontalface_improved.xml"
 47 |           "5708e4e579d8e4eabeec6c555d4234b2  mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"
 48 |           "b47e443b313a709e4c39c1caeaa3ecb3  mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb"
 49 |           "c03b2953ebd846c270da1a8e8f200c09  fcn8s-heavy-pascal.caffemodel"
 50 |           "532698b83c2e8fa5a010bd996d19d30a  fcn8s-heavy-pascal.prototxt"
 51 |           "5ae5d62183cfb6f6d3ac109359d06a1b  object_detection_classes_pascal_voc.txt"
 52 |           "8bed6fa43361685f4c78f1c084be7775  MobileNetSSD_deploy.caffemodel"
 53 |           "aa2a13fe1fba2c3b7e067067a6749e7e  MobileNetSSD_deploy.prototxt"
 54 | 
 55 |         )
 56 | 
 57 | ################################################################################
 58 | 
 59 | DIR_LOCAL_TARGET=/tmp/python-examples-cv-models
 60 | PWD_SCRIPT=`pwd`
 61 | 
 62 | ################################################################################
 63 | 
 64 | # Preset this script to fail on error
 65 | 
 66 | set -e
 67 | 
 68 | # check for required commands to download and md5 check
 69 | 
 70 | (command -v curl | grep curl > /dev/null) ||
 71 |   (echo "Error: curl command not found, cannot download.")
 72 | 
 73 |   (command -v md5sum | grep md5sum > /dev/null) ||
 74 |     (echo "Error: md5sum command not found, cannot verify files.")
 75 | 
 76 | 
 77 | ################################################################################
 78 | 
 79 | # Download - perform download of each model
 80 | 
 81 | mkdir -p $DIR_LOCAL_TARGET
 82 | cd $DIR_LOCAL_TARGET
 83 | 
 84 | for URL in ${MODELS[@]}; do
 85 |   echo
 86 |   echo "Downloading ... " $URL " -> " $DIR_LOCAL_TARGET/
 87 |   curl -L -k -O --remote-name $URL
 88 | done
 89 | 
 90 | # un-tar/gz any models that need this
 91 | 
 92 | for GZT in `ls *tar.gz`; do
 93 |   tar -xzf $GZT
 94 |   rm $GZT
 95 | done
 96 | 
 97 | cd $PWD_SCRIPT
 98 | 
 99 | ################################################################################
100 | 
101 | # Post Download - check md5sum
102 | 
103 | cd $DIR_LOCAL_TARGET
104 | echo
105 | echo "Performing MD5 file verification checks ..."
106 | printf '%s\n' "${MD5SUMS[@]}" > md5sums.txt
107 | md5sum -c md5sums.txt
108 | rm -f md5sums.txt
109 | 
110 | # Post Download - link all files to current directory
111 | 
112 | cd $PWD_SCRIPT
113 | echo
114 | echo "Linking files to current directory ..."
115 | ln -sv $DIR_LOCAL_TARGET/* .
116 | 
117 | ################################################################################
118 | 


--------------------------------------------------------------------------------
/fcn_segmentation.py:
--------------------------------------------------------------------------------
  1 | ##########################################################################
  2 | 
  3 | # Example : perform FCN semantic image segmentation from a video file
  4 | # specified on the command line (e.g. python FILE.py video_file) or from an
  5 | # attached web camera (FCN segmentation: Long et al, CVPR 2015)
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # This code: significant portions based on the example available at:
 10 | # https://github.com/opencv/opencv/blob/master/samples/dnn/segmentation.py
 11 | 
 12 | 
 13 | # Copyright (c) 2021 Toby Breckon, Dept. Computer Science,
 14 | #                    Durham University, UK
 15 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 16 | 
 17 | ##########################################################################
 18 | 
 19 | # To use download the following files:
 20 | 
 21 | # http://dl.caffe.berkeleyvision.org/fcn8s-heavy-pascal.caffemodel
 22 | # https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/fcn8s-heavy-pascal.prototxt
 23 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_pascal_voc.txt
 24 | 
 25 | ##########################################################################
 26 | 
 27 | import cv2
 28 | import argparse
 29 | import sys
 30 | import math
 31 | import numpy as np
 32 | 
 33 | ##########################################################################
 34 | 
 35 | keep_processing = True
 36 | colors = None
 37 | 
 38 | ##########################################################################
 39 | 
 40 | # generate and display colour legend for segmentation classes
 41 | 
 42 | 
 43 | def generate_legend(classes, height):
 44 |     blockHeight = math.floor(height/len(classes))
 45 | 
 46 |     legend = np.zeros((blockHeight * len(colors), 200, 3), np.uint8)
 47 |     for i in range(len(classes)):
 48 |         block = legend[i * blockHeight:(i + 1) * blockHeight]
 49 |         block[:, :] = colors[i]
 50 |         cv2.putText(block, classes[i],
 51 |                     (0, blockHeight//2),
 52 |                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
 53 | 
 54 |     return legend
 55 | 
 56 | ##########################################################################
 57 | 
 58 | # concatenate two RGB/grayscale images horizontally (left to right)
 59 | # handling differing channel numbers or image heights in the input
 60 | 
 61 | 
 62 | def h_concatenate(img1, img2):
 63 | 
 64 |     # get size and channels for both images
 65 | 
 66 |     height1 = img1.shape[0]
 67 | 
 68 |     if (len(img1.shape) == 2):
 69 |         channels1 = 1
 70 |     else:
 71 |         channels1 = img1.shape[2]
 72 | 
 73 |     height2 = img2.shape[0]
 74 |     width2 = img2.shape[1]
 75 |     if (len(img2.shape) == 2):
 76 |         channels2 = 1
 77 |     else:
 78 |         channels2 = img2.shape[2]
 79 | 
 80 |     # make all images 3 channel, or assume all same channel
 81 | 
 82 |     if ((channels1 > channels2) and (channels1 == 3)):
 83 |         out2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
 84 |         out1 = img1
 85 |     elif ((channels2 > channels1) and (channels2 == 3)):
 86 |         out1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR)
 87 |         out2 = img2
 88 |     else:  # both must be equal
 89 |         out1 = img1
 90 |         out2 = img2
 91 | 
 92 |     # height of first image is master height, width can remain unchanged
 93 | 
 94 |     if (height1 != height2):
 95 |         out2 = cv2.resize(out2, (width2, height1))
 96 | 
 97 |     return np.hstack((out1, out2))
 98 | 
 99 | 
100 | ##########################################################################
101 | 
102 | # parse command line arguments for camera ID or video file
103 | 
104 | parser = argparse.ArgumentParser(
105 |     description='Perform ' +
106 |     sys.argv[0] +
107 |     ' example operation on incoming camera/video image')
108 | parser.add_argument(
109 |     "-c",
110 |     "--camera_to_use",
111 |     type=int,
112 |     help="specify camera to use",
113 |     default=0)
114 | parser.add_argument(
115 |     "-r",
116 |     "--rescale",
117 |     type=float,
118 |     help="rescale image by this factor",
119 |     default=1.0)
120 | parser.add_argument(
121 |     "-fs",
122 |     "--fullscreen",
123 |     action='store_true',
124 |     help="run in full screen mode")
125 | parser.add_argument(
126 |     "-use",
127 |     "--target",
128 |     type=str,
129 |     choices=['cpu', 'gpu', 'opencl'],
130 |     help="select computational backend",
131 |     default='gpu')
132 | parser.add_argument(
133 |     'video_file',
134 |     metavar='video_file',
135 |     type=str,
136 |     nargs='?',
137 |     help='specify optional video file')
138 | args = parser.parse_args()
139 | 
140 | ##########################################################################
141 | 
142 | # define video capture object
143 | 
144 | try:
145 |     # to use a non-buffered camera stream (via a separate thread)
146 | 
147 |     if not (args.video_file):
148 |         import camera_stream
149 |         cap = camera_stream.CameraVideoStream()
150 |     else:
151 |         cap = cv2.VideoCapture()  # not needed for video files
152 | 
153 | except BaseException:
154 |     # if not then just use OpenCV default
155 | 
156 |     print("INFO: camera_stream class not found - camera input may be buffered")
157 |     cap = cv2.VideoCapture()
158 | 
159 | # define display window name
160 | 
161 | window_name = "FCN Semantic Image Segmentation"  # window name
162 | 
163 | ##########################################################################
164 | 
165 | # Load names of class labels (background = class 0, for PASCAL VOC)
166 | 
167 | classes = None
168 | with open("object_detection_classes_pascal_voc.txt", 'rt') as f:
169 |     classes = f.read().rstrip('\n').split('\n')
170 | classes.insert(0, "background")  # insery a background class as 0
171 | 
172 | ##########################################################################
173 | 
174 | # Load CNN model
175 | 
176 | net = cv2.dnn.readNet(
177 |     "fcn8s-heavy-pascal.caffemodel",
178 |     "fcn8s-heavy-pascal.prototxt",
179 |     'caffe')
180 | 
181 | # set up compute target as one of [GPU, OpenCL, CPU]
182 | 
183 | if (args.target == 'gpu'):
184 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
185 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
186 | elif (args.target == 'opencl'):
187 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
188 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
189 | else:
190 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
191 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
192 | 
193 | ##########################################################################
194 | 
195 | # if command line arguments are provided try to read video_name
196 | # otherwise default to capture from attached camera
197 | 
198 | if (((args.video_file) and (cap.open(str(args.video_file))))
199 |         or (cap.open(args.camera_to_use))):
200 | 
201 |     # create window by name (as resizable)
202 | 
203 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
204 | 
205 |     while (keep_processing):
206 | 
207 |         # start a timer (to see how long processing and display takes)
208 | 
209 |         start_t = cv2.getTickCount()
210 | 
211 |         # if camera /video file successfully open then read frame
212 | 
213 |         if (cap.isOpened):
214 |             ret, frame = cap.read()
215 | 
216 |             # when we reach the end of the video (file) exit cleanly
217 | 
218 |             if (ret == 0):
219 |                 keep_processing = False
220 |                 continue
221 | 
222 |             # rescale if specified
223 | 
224 |             if (args.rescale != 1.0):
225 |                 frame = cv2.resize(
226 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
227 | 
228 |         frameHeight = frame.shape[0]
229 |         frameWidth = frame.shape[1]
230 | 
231 |         #######################################################################
232 |         # FCN Segmentation:
233 |         # model: "fcn8s-heavy-pascal.caffemodel"
234 |         # config: "fcn8s-heavy-pascal.prototxt"
235 |         # mean: [0, 0, 0]
236 |         # scale: 1.0
237 |         # width: 500
238 |         # height: 500
239 |         # rgb: false
240 |         #
241 |         # classes: object_detection_classes_pascal_voc.txt
242 |         #######################################################################
243 | 
244 |         # create a 4D tensor "blob" from a frame.
245 | 
246 |         blob = cv2.dnn.blobFromImage(
247 |                                      frame, scalefactor=1.0,
248 |                                      size=(500, 500), mean=[0, 0, 0],
249 |                                      swapRB=False, crop=False
250 |                                     )
251 | 
252 |         # Run forward inference on the model
253 | 
254 |         net.setInput(blob)
255 |         result = net.forward()
256 | 
257 |         numClasses = result.shape[1]
258 |         height = result.shape[2]
259 |         width = result.shape[3]
260 | 
261 |         # define colours
262 | 
263 |         if not colors:
264 |             np.random.seed(888)
265 |             colors = [np.array([0, 0, 0], np.uint8)]
266 |             for i in range(1, numClasses + 1):
267 |                 colors.append((colors[i - 1] +
268 |                               np.random.randint(0, 256, [3],
269 |                               np.uint8)) / 2
270 |                               )
271 |             del colors[0]
272 | 
273 |             # generate legend
274 |             legend = generate_legend(classes, frameHeight)
275 | 
276 |         # display segmentation
277 | 
278 |         classIds = np.argmax(result[0], axis=0)
279 |         segm = np.stack([colors[idx] for idx in classIds.flatten()])
280 |         segm = segm.reshape(height, width, 3)
281 | 
282 |         segm = cv2.resize(segm, (frameWidth, frameHeight),
283 |                           interpolation=cv2.INTER_NEAREST)
284 | 
285 |         # stop the timer and convert to ms. (to see how long processing and
286 |         # display takes)
287 | 
288 |         stop_t = ((cv2.getTickCount() - start_t) /
289 |                   cv2.getTickFrequency()) * 1000
290 | 
291 |         # Display efficiency information
292 | 
293 |         label = ('Inference time: %.2f ms' % stop_t) + \
294 |             (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')'
295 |         cv2.putText(frame, label, (0, 15),
296 |                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
297 | 
298 |         # display image(s) as concatenated single image
299 | 
300 |         cv2.imshow(window_name,
301 |                    h_concatenate(h_concatenate(frame, segm.astype(np.uint8)),
302 |                                  legend))
303 |         cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
304 |                               cv2.WINDOW_FULLSCREEN & args.fullscreen)
305 | 
306 |         # start the event loop - essential
307 | 
308 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
309 |         # 25 fps = 40 ms)
310 | 
311 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
312 | 
313 |         # if user presses "x" then exit  / press "f" for fullscreen display
314 | 
315 |         if (key == ord('x')):
316 |             keep_processing = False
317 |         elif (key == ord('f')):
318 |             args.fullscreen = not (args.fullscreen)
319 | 
320 |     # close all windows
321 | 
322 |     cv2.destroyAllWindows()
323 | 
324 | else:
325 |     print("No video file specified or camera connected.")
326 | 
327 | ##########################################################################
328 | 


--------------------------------------------------------------------------------
/gaussian.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example :  gaussian smoothing for a a video file specified on the
  4 | # command line (e.g. python FILE.py video_file) or from an
  5 | # attached web camera with selectable opencl acceleration
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2021 Dept Computer Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | #####################################################################
 14 | 
 15 | import cv2
 16 | import argparse
 17 | import sys
 18 | import math
 19 | 
 20 | #####################################################################
 21 | 
 22 | keep_processing = True
 23 | 
 24 | # parse command line arguments for camera ID or video file
 25 | 
 26 | parser = argparse.ArgumentParser(
 27 |     description='Perform ' +
 28 |     sys.argv[0] +
 29 |     ' example operation on incoming camera/video image')
 30 | parser.add_argument(
 31 |     "-c",
 32 |     "--camera_to_use",
 33 |     type=int,
 34 |     help="specify camera to use",
 35 |     default=0)
 36 | parser.add_argument(
 37 |     "-r",
 38 |     "--rescale",
 39 |     type=float,
 40 |     help="rescale image by this factor",
 41 |     default=1.0)
 42 | parser.add_argument(
 43 |     "-s",
 44 |     "--set_resolution",
 45 |     type=int,
 46 |     nargs=2,
 47 |     help='override default camera resolution as H W')
 48 | parser.add_argument(
 49 |     "-ocl",
 50 |     "--opencl",
 51 |     action='store_true',
 52 |     help="enable opencl hardware acceleration")
 53 | parser.add_argument(
 54 |     'video_file',
 55 |     metavar='video_file',
 56 |     type=str,
 57 |     nargs='?',
 58 |     help='specify optional video file')
 59 | 
 60 | args = parser.parse_args()
 61 | 
 62 | #####################################################################
 63 | 
 64 | # this function is called as a call-back everytime the trackbar is moved
 65 | # (here we just do nothing)
 66 | 
 67 | 
 68 | def nothing(x):
 69 |     pass
 70 | 
 71 | 
 72 | #####################################################################
 73 | 
 74 | # define video capture object
 75 | 
 76 | try:
 77 |     # to use a non-buffered camera stream (via a separate thread)
 78 | 
 79 |     if not (args.video_file):
 80 |         import camera_stream
 81 |         cap = camera_stream.CameraVideoStream(use_tapi=args.opencl)
 82 |     else:
 83 |         cap = cv2.VideoCapture()  # not needed for video files
 84 | 
 85 | except BaseException:
 86 |     # if not then just use OpenCV default
 87 | 
 88 |     print("INFO: camera_stream class not found - camera input may be buffered")
 89 |     cap = cv2.VideoCapture()
 90 | 
 91 | # define display window name
 92 | 
 93 | window_name = "Live Camera Input"  # window name
 94 | window_name2 = "Gaussian Smoothing"  # window name
 95 | 
 96 | # setup OpenCL if specified on command line only
 97 | 
 98 | cv2.ocl.setUseOpenCL(args.opencl)
 99 | 
100 | # if command line arguments are provided try to read video_name
101 | # otherwise default to capture from attached H/W camera
102 | 
103 | if (((args.video_file) and (cap.open(str(args.video_file))))
104 |         or (cap.open(args.camera_to_use))):
105 | 
106 |     # create window by name (as resizable)
107 | 
108 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
109 |     cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL)
110 | 
111 |     # add some track bar controllers for settings
112 | 
113 |     smoothing_neighbourhood = 3
114 |     cv2.createTrackbar(
115 |         "kernel size",
116 |         window_name2,
117 |         smoothing_neighbourhood,
118 |         250,
119 |         nothing)
120 | 
121 |     # override default camera resolution
122 | 
123 |     if (args.set_resolution is not None):
124 |         cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
125 |         cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
126 | 
127 |     print("INFO: input resolution : (",
128 |           int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
129 |           int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
130 | 
131 |     while (keep_processing):
132 | 
133 |         # start a timer (to see how long processing and display takes)
134 | 
135 |         start_t = cv2.getTickCount()
136 | 
137 |         # if video file successfully open then read frame from video
138 | 
139 |         if (cap.isOpened):
140 |             ret, frame = cap.read()  # rescale if specified
141 | 
142 |             # when we reach the end of the video (file) exit cleanly
143 | 
144 |             if (ret == 0):
145 |                 keep_processing = False
146 |                 continue
147 | 
148 |             # rescale if specified
149 | 
150 |             if (args.rescale != 1.0):
151 |                 frame = cv2.resize(
152 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
153 | 
154 |         # get parameters from track bars
155 | 
156 |         smoothing_neighbourhood = cv2.getTrackbarPos("kernel size",
157 |                                                      window_name2)
158 | 
159 |         # check neighbourhood is greater than 3 and odd
160 | 
161 |         smoothing_neighbourhood = max(3, smoothing_neighbourhood)
162 |         if not (smoothing_neighbourhood % 2):
163 |             smoothing_neighbourhood = smoothing_neighbourhood + 1
164 | 
165 |         # performing smoothing on the image using a 5x5 smoothing mark (see
166 |         # manual entry for GaussianBlur())
167 | 
168 |         smoothed = cv2.GaussianBlur(frame, (smoothing_neighbourhood,
169 |                                     smoothing_neighbourhood), 0)
170 | 
171 |         # stop the timer and convert to ms. (to see how long processing and
172 |         # display takes)
173 | 
174 |         stop_t = ((cv2.getTickCount() - start_t) /
175 |                   cv2.getTickFrequency()) * 1000
176 | 
177 |         label = ('Processing time: %.2f ms' % stop_t) + \
178 |             (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')'
179 |         cv2.putText(smoothed, label, (0, 15),
180 |                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
181 | 
182 |         # display image
183 | 
184 |         cv2.imshow(window_name, frame)
185 |         cv2.imshow(window_name2, smoothed)
186 | 
187 |         # start the event loop - essential
188 | 
189 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
190 |         # milliseconds). It waits for specified milliseconds for any keyboard
191 |         # event. If you press any key in that time, the program continues.
192 |         # If 0 is passed, it waits indefinitely for a key stroke.
193 |         # (bitwise and with 0xFF to extract least significant byte of
194 |         # multi-byte response)
195 | 
196 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
197 |         # 25 fps = 40 ms)
198 | 
199 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
200 | 
201 |         # It can also be set to detect specific key strokes by recording which
202 |         # key is pressed
203 | 
204 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
205 |         # display
206 | 
207 |         if (key == ord('x')):
208 |             keep_processing = False
209 |         elif (key == ord('f')):
210 |             cv2.setWindowProperty(
211 |                 window_name2,
212 |                 cv2.WND_PROP_FULLSCREEN,
213 |                 cv2.WINDOW_FULLSCREEN)
214 | 
215 |     # close all windows
216 | 
217 |     cv2.destroyAllWindows()
218 | 
219 | else:
220 |     print("No video file specified or camera connected.")
221 | 
222 | #####################################################################
223 | 


--------------------------------------------------------------------------------
/generic_interface.py:
--------------------------------------------------------------------------------
  1 | ##########################################################################
  2 | 
  3 | # Example : perform generic live display from a video file
  4 | # specified on the command line (e.g. python FILE.py video_file) or from an
  5 | # attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2015 - 2018 Toby Breckon, Engineering & Computing Science,
 10 | #                           Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | ##########################################################################
 14 | 
 15 | import cv2
 16 | import argparse
 17 | import sys
 18 | import math
 19 | 
 20 | ##########################################################################
 21 | 
 22 | keep_processing = True
 23 | 
 24 | # parse command line arguments for camera ID or video file
 25 | 
 26 | parser = argparse.ArgumentParser(
 27 |     description='Perform ' +
 28 |     sys.argv[0] +
 29 |     ' example operation on incoming camera/video image')
 30 | parser.add_argument(
 31 |     "-c",
 32 |     "--camera_to_use",
 33 |     type=int,
 34 |     help="specify camera to use",
 35 |     default=0)
 36 | parser.add_argument(
 37 |     "-r",
 38 |     "--rescale",
 39 |     type=float,
 40 |     help="rescale image by this factor",
 41 |     default=1.0)
 42 | parser.add_argument(
 43 |     "-fs",
 44 |     "--fullscreen",
 45 |     action='store_true',
 46 |     help="run in full screen mode")
 47 | parser.add_argument(
 48 |     'video_file',
 49 |     metavar='video_file',
 50 |     type=str,
 51 |     nargs='?',
 52 |     help='specify optional video file')
 53 | args = parser.parse_args()
 54 | 
 55 | ##########################################################################
 56 | 
 57 | # define video capture object
 58 | 
 59 | try:
 60 |     # to use a non-buffered camera stream (via a separate thread)
 61 | 
 62 |     if not (args.video_file):
 63 |         import camera_stream
 64 |         cap = camera_stream.CameraVideoStream()
 65 |     else:
 66 |         cap = cv2.VideoCapture()  # not needed for video files
 67 | 
 68 | except BaseException:
 69 |     # if not then just use OpenCV default
 70 | 
 71 |     print("INFO: camera_stream class not found - camera input may be buffered")
 72 |     cap = cv2.VideoCapture()
 73 | 
 74 | # define display window name
 75 | 
 76 | window_name = "Live Camera Input"  # window name
 77 | 
 78 | # if command line arguments are provided try to read video_name
 79 | # otherwise default to capture from attached camera
 80 | 
 81 | if (((args.video_file) and (cap.open(str(args.video_file))))
 82 |         or (cap.open(args.camera_to_use))):
 83 | 
 84 |     # create window by name (as resizable)
 85 | 
 86 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
 87 |     cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
 88 |                           cv2.WINDOW_FULLSCREEN & args.fullscreen)
 89 | 
 90 |     while (keep_processing):
 91 | 
 92 |         # start a timer (to see how long processing and display takes)
 93 | 
 94 |         start_t = cv2.getTickCount()
 95 | 
 96 |         # if camera /video file successfully open then read frame
 97 | 
 98 |         if (cap.isOpened):
 99 |             ret, frame = cap.read()
100 | 
101 |             # when we reach the end of the video (file) exit cleanly
102 | 
103 |             if (ret == 0):
104 |                 keep_processing = False
105 |                 continue
106 | 
107 |             # rescale if specified
108 | 
109 |             if (args.rescale != 1.0):
110 |                 frame = cv2.resize(
111 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
112 | 
113 |         # ***
114 |         # *** do any processing here ****
115 |         # ***
116 | 
117 |         # display image
118 | 
119 |         cv2.imshow(window_name, frame)
120 | 
121 |         # stop the timer and convert to ms. (to see how long processing and
122 |         # display takes)
123 | 
124 |         stop_t = ((cv2.getTickCount() - start_t) /
125 |                   cv2.getTickFrequency()) * 1000
126 | 
127 |         # start the event loop - essential
128 | 
129 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
130 |         # milliseconds). It waits for specified milliseconds for any keyboard
131 |         # event. If you press any key in that time, the program continues.
132 |         # If 0 is passed, it waits indefinitely for a key stroke.
133 |         # (bitwise and with 0xFF to extract least significant byte of
134 |         # multi-byte response)
135 | 
136 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
137 |         # 25 fps = 40 ms)
138 | 
139 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
140 | 
141 |         # It can also be set to detect specific key strokes by recording which
142 |         # key is pressed
143 | 
144 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
145 |         # display
146 | 
147 |         if (key == ord('x')):
148 |             keep_processing = False
149 |         elif (key == ord('f')):
150 |             args.fullscreen = not (args.fullscreen)
151 |             cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
152 |                                   cv2.WINDOW_FULLSCREEN & args.fullscreen)
153 | 
154 |     # close all windows
155 | 
156 |     cv2.destroyAllWindows()
157 | 
158 | else:
159 |     print("No video file specified or camera connected.")
160 | 
161 | ##########################################################################
162 | 


--------------------------------------------------------------------------------
/gradient_orientation.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example : perform generic live display of gradient orientations
  4 | # (which form the essensce of the Histogram of Oriented Gradient (HOG) feature)
  5 | # from a video file specified on the command line
  6 | # (e.g. python FILE.py video_file) or from an attached web camera
  7 | 
  8 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  9 | 
 10 | # https://www.learnopencv.com/histogram-of-oriented-gradients/
 11 | 
 12 | # Copyright (c) 2018 Dept. Computer Science,
 13 | #                    Durham University, UK
 14 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 15 | 
 16 | #####################################################################
 17 | 
 18 | import cv2
 19 | import argparse
 20 | import sys
 21 | import math
 22 | import numpy as np
 23 | 
 24 | #####################################################################
 25 | 
 26 | keep_processing = True
 27 | 
 28 | # parse command line arguments for camera ID or video file
 29 | 
 30 | parser = argparse.ArgumentParser(
 31 |     description='Perform ' +
 32 |     sys.argv[0] +
 33 |     ' example operation on incoming camera/video image')
 34 | parser.add_argument(
 35 |     "-c",
 36 |     "--camera_to_use",
 37 |     type=int,
 38 |     help="specify camera to use",
 39 |     default=0)
 40 | parser.add_argument(
 41 |     "-r",
 42 |     "--rescale",
 43 |     type=float,
 44 |     help="rescale image by this factor",
 45 |     default=1.0)
 46 | parser.add_argument(
 47 |     'video_file',
 48 |     metavar='video_file',
 49 |     type=str,
 50 |     nargs='?',
 51 |     help='specify optional video file')
 52 | args = parser.parse_args()
 53 | 
 54 | #####################################################################
 55 | 
 56 | # this function is called as a call-back everytime the trackbar is moved
 57 | # (here we just do nothing)
 58 | 
 59 | 
 60 | def nothing(x):
 61 |     pass
 62 | 
 63 | 
 64 | #####################################################################
 65 | 
 66 | # define video capture object
 67 | 
 68 | try:
 69 |     # to use a non-buffered camera stream (via a separate thread)
 70 | 
 71 |     if not (args.video_file):
 72 |         import camera_stream
 73 |         cap = camera_stream.CameraVideoStream()
 74 |     else:
 75 |         cap = cv2.VideoCapture()  # not needed for video files
 76 | 
 77 | except BaseException:
 78 |     # if not then just use OpenCV default
 79 | 
 80 |     print("INFO: camera_stream class not found - camera input may be buffered")
 81 |     cap = cv2.VideoCapture()
 82 | 
 83 | # define display window names
 84 | 
 85 | window_nameGx = "Gradient - Gx"  # window name
 86 | window_nameGy = "Gradient - Gy"  # window name
 87 | window_nameAngle = "Gradient Angle"  # window name
 88 | 
 89 | # if command line arguments are provided try to read video_name
 90 | # otherwise default to capture from attached camera
 91 | 
 92 | if (((args.video_file) and (cap.open(str(args.video_file))))
 93 |         or (cap.open(args.camera_to_use))):
 94 | 
 95 |     # create window by name (as resizable)
 96 | 
 97 |     cv2.namedWindow(window_nameGx, cv2.WINDOW_NORMAL)
 98 |     cv2.namedWindow(window_nameGy, cv2.WINDOW_NORMAL)
 99 |     cv2.namedWindow(window_nameAngle, cv2.WINDOW_NORMAL)
100 | 
101 |     # add some track bar controllers for settings
102 | 
103 |     lower_threshold = 0
104 |     cv2.createTrackbar(
105 |         "lower",
106 |         window_nameAngle,
107 |         lower_threshold,
108 |         180,
109 |         nothing)
110 | 
111 |     upper_threshold = 180
112 |     cv2.createTrackbar(
113 |         "upper",
114 |         window_nameAngle,
115 |         upper_threshold,
116 |         180,
117 |         nothing)
118 | 
119 |     neighbourhood = 3
120 |     cv2.createTrackbar(
121 |         "neighbourhood, N",
122 |         window_nameGy,
123 |         neighbourhood,
124 |         40,
125 |         nothing)
126 | 
127 |     sigma = 1
128 |     cv2.createTrackbar(
129 |         "sigma",
130 |         window_nameGy,
131 |         sigma,
132 |         10,
133 |         nothing)
134 | 
135 |     while (keep_processing):
136 | 
137 |         # start a timer (to see how long processing and display takes)
138 | 
139 |         start_t = cv2.getTickCount()
140 | 
141 |         # if video file successfully open then read frame from video
142 | 
143 |         if (cap.isOpened):
144 |             ret, frame = cap.read()
145 | 
146 |             # when we reach the end of the video (file) exit cleanly
147 | 
148 |             if (ret == 0):
149 |                 keep_processing = False
150 |                 continue
151 | 
152 |             # rescale if specified
153 | 
154 |             if (args.rescale != 1.0):
155 |                 frame = cv2.resize(
156 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
157 | 
158 |         # get parameter from track bars - Gaussian pre-smoothing
159 | 
160 |         neighbourhood = cv2.getTrackbarPos("neighbourhood, N", window_nameGy)
161 |         sigma = cv2.getTrackbarPos("sigma", window_nameGy)
162 | 
163 |         # check neighbourhood is greater than 3 and odd
164 | 
165 |         neighbourhood = max(3, neighbourhood)
166 |         if not (neighbourhood % 2):
167 |             neighbourhood = neighbourhood + 1
168 | 
169 |         # perform Gaussian smoothing using NxN neighbourhood
170 | 
171 |         smoothed_img = cv2.GaussianBlur(
172 |             frame,
173 |             (neighbourhood,
174 |              neighbourhood),
175 |             sigma,
176 |             sigma,
177 |             borderType=cv2.BORDER_REPLICATE)
178 | 
179 |         # compute the gradients in the x and y directions separately
180 |         # N.B from here onward these images are 32-bit float
181 | 
182 |         gx = cv2.Sobel(smoothed_img, cv2.CV_32F, 1, 0)
183 |         gy = cv2.Sobel(smoothed_img, cv2.CV_32F, 0, 1)
184 | 
185 |         # calculate gradient magnitude and direction (in degrees)
186 | 
187 |         mag, angle = cv2.cartToPolar(gx, gy, angleInDegrees=True)
188 | 
189 |         # normalize
190 | 
191 |         gx = np.abs(gx)
192 |         gy = np.abs(gy)
193 |         angle = np.abs(angle)
194 | 
195 |         # normalize other values 0 -> 180
196 | 
197 |         gx = cv2.normalize(gx, None, 0, 255, cv2.NORM_MINMAX)
198 |         gy = cv2.normalize(gy, None, 0, 255, cv2.NORM_MINMAX)
199 |         angle = cv2.normalize(angle, None, 0, 180, cv2.NORM_MINMAX)
200 | 
201 |         # for the angle take the max across all three channels
202 | 
203 |         (aB, aG, aR) = cv2.split(angle)
204 |         angle = np.maximum(np.maximum(aR, aG), aB)
205 | 
206 |         # get threshold from trackbars and threshold to keep inner range
207 | 
208 |         lower_threshold = cv2.getTrackbarPos("lower", window_nameAngle)
209 |         upper_threshold = cv2.getTrackbarPos("upper", window_nameAngle)
210 | 
211 |         mask = cv2.inRange(angle, lower_threshold, upper_threshold)
212 |         angle = cv2.bitwise_and(angle.astype(np.uint8), mask)
213 | 
214 |         # display images (as 8-bit)
215 | 
216 |         cv2.imshow(window_nameGx, gx.astype(np.uint8))
217 |         cv2.imshow(window_nameGy, gy.astype(np.uint8))
218 |         cv2.imshow(window_nameAngle, angle.astype(np.uint8))
219 | 
220 |         # stop the timer and convert to ms. (to see how long processing and
221 |         # display takes)
222 | 
223 |         stop_t = ((cv2.getTickCount() - start_t) /
224 |                   cv2.getTickFrequency()) * 1000
225 | 
226 |         # start the event loop - essential
227 | 
228 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
229 |         # milliseconds). It waits for specified milliseconds for any keyboard
230 |         # event. If you press any key in that time, the program continues.
231 |         # If 0 is passed, it waits indefinitely for a key stroke.
232 |         # (bitwise and with 0xFF to extract least significant byte of
233 |         # multi-byte response)
234 | 
235 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
236 |         # 25 fps = 40 ms)
237 | 
238 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
239 | 
240 |         # It can also be set to detect specific key strokes by recording which
241 |         # key is pressed
242 | 
243 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
244 |         # display
245 | 
246 |         if (key == ord('x')):
247 |             keep_processing = False
248 |         elif (key == ord('f')):
249 |             cv2.setWindowProperty(
250 |                 window_nameAngle,
251 |                 cv2.WND_PROP_FULLSCREEN,
252 |                 cv2.WINDOW_FULLSCREEN)
253 | 
254 |     # close all windows
255 | 
256 |     cv2.destroyAllWindows()
257 | 
258 | else:
259 |     print("No video file specified or camera connected.")
260 | 
261 | #####################################################################
262 | 


--------------------------------------------------------------------------------
/haar_cascade_detection.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example : perform haar cascade detection on live display from a video file
  4 | # specified on the command line (e.g. python FILE.py video_file) or from an
  5 | # attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2015 / 2016 School of Engineering & Computing Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | # based on example at:
 14 | # http://docs.opencv.org/3.1.0/d7/d8b/tutorial_py_face_detection.html#gsc.tab=0
 15 | 
 16 | # get trained cascade files from:
 17 | # https://github.com/opencv/opencv/tree/master/data/haarcascades
 18 | 
 19 | #####################################################################
 20 | 
 21 | import cv2
 22 | import argparse
 23 | import sys
 24 | import os
 25 | import math
 26 | 
 27 | #####################################################################
 28 | 
 29 | keep_processing = True
 30 | faces_recorded = 0
 31 | 
 32 | # parse command line arguments for camera ID or video file
 33 | 
 34 | parser = argparse.ArgumentParser(
 35 |     description='Perform ' +
 36 |     sys.argv[0] +
 37 |     ' example operation on incoming camera/video image')
 38 | parser.add_argument(
 39 |     "-c",
 40 |     "--camera_to_use",
 41 |     type=int,
 42 |     help="specify camera to use",
 43 |     default=0)
 44 | parser.add_argument(
 45 |     "-r",
 46 |     "--rescale",
 47 |     type=float,
 48 |     help="rescale image by this factor",
 49 |     default=1.0)
 50 | parser.add_argument(
 51 |     "-ha",
 52 |     "--harvest",
 53 |     type=str,
 54 |     help="path to save detected faces to",
 55 |     default='')
 56 | parser.add_argument(
 57 |     'video_file',
 58 |     metavar='video_file',
 59 |     type=str,
 60 |     nargs='?',
 61 |     help='specify optional video file')
 62 | args = parser.parse_args()
 63 | 
 64 | #####################################################################
 65 | # set up directory to save faces to if specified
 66 | 
 67 | if (len(args.harvest) > 0):
 68 |     try:
 69 |         os.mkdir(args.harvest)
 70 |     except OSError:
 71 |         print("Harvesting to existing directory: " + args.harvest)
 72 | 
 73 | #####################################################################
 74 | 
 75 | # define video capture object
 76 | 
 77 | try:
 78 |     # to use a non-buffered camera stream (via a separate thread)
 79 | 
 80 |     if not (args.video_file):
 81 |         import camera_stream
 82 |         cap = camera_stream.CameraVideoStream()
 83 |     else:
 84 |         cap = cv2.VideoCapture()  # not needed for video files
 85 | 
 86 | except BaseException:
 87 |     # if not then just use OpenCV default
 88 | 
 89 |     print("INFO: camera_stream class not found - camera input may be buffered")
 90 |     cap = cv2.VideoCapture()
 91 | 
 92 | # define display window name
 93 | 
 94 | window_name = "Face Detection using Haar Cascades"  # window name
 95 | 
 96 | # define haar cascade objects
 97 | 
 98 | # required cascade classifier files (and many others) available from:
 99 | # https://github.com/opencv/opencv/tree/master/data/haarcascades
100 | 
101 | face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
102 | eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')
103 | 
104 | if (face_cascade.empty() or eye_cascade.empty()):
105 |     print("Failed to load cascade from file.")
106 | 
107 | # if command line arguments are provided try to read video_name
108 | # otherwise default to capture from attached H/W camera
109 | 
110 | if (((args.video_file) and (cap.open(str(args.video_file))))
111 |         or (cap.open(args.camera_to_use))):
112 | 
113 |     # create window by name (as resizable)
114 | 
115 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
116 | 
117 |     while (keep_processing):
118 | 
119 |         # if video file successfully open then read frame from video
120 | 
121 |         if (cap.isOpened):
122 |             ret, frame = cap.read()
123 | 
124 |             # rescale if specified
125 | 
126 |             if (args.rescale != 1.0):
127 |                 frame = cv2.resize(
128 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
129 | 
130 |         # start a timer (to see how long processing and display takes)
131 | 
132 |         start_t = cv2.getTickCount()
133 | 
134 |         # convert to grayscale
135 | 
136 |         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
137 | 
138 |         # detect faces using haar cascade trained on faces
139 | 
140 |         faces = face_cascade.detectMultiScale(
141 |             gray, scaleFactor=1.2, minNeighbors=4, minSize=(
142 |                 30, 30), flags=cv2.CASCADE_DO_CANNY_PRUNING)
143 | 
144 |         # for each detected face, try to detect eyes inside the top
145 |         # half of the face region face region
146 | 
147 |         for (x, y, w, h) in faces:
148 | 
149 |             # extract regions of interest (roi) and draw each face bounding box
150 |             # and
151 | 
152 |             # top 50% to detect eyes
153 |             roi_gray = gray[y:y + math.floor(h * 0.5), x:x + w]
154 |             # copy to save if required
155 |             roi_color = frame[y:y + h, x:x + w].copy()
156 | 
157 |             cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
158 | 
159 |             # detect eyes using haar cascade trained on eyes
160 | 
161 |             eyes = eye_cascade.detectMultiScale(roi_gray)
162 | 
163 |             # for each detected eye, draw bounding box
164 | 
165 |             for (ex, ey, ew, eh) in eyes:
166 |                 cv2.rectangle(frame, (x + ex, y + ey),
167 |                               (x + ex + ew, y + ey + eh), (0, 255, 0), 2)
168 | 
169 |             # if specified, record all the faces we see to a specified
170 |             # directory
171 | 
172 |             if (len(args.harvest) > 0):
173 |                 filename = os.path.join(
174 |                     args.harvest, "face_" +
175 |                     str(format(faces_recorded, '04')) + ".png")
176 |                 cv2.imwrite(filename, roi_color)
177 |                 faces_recorded += 1
178 | 
179 |         # display image
180 | 
181 |         cv2.imshow(window_name, frame)
182 | 
183 |         # stop the timer and convert to ms. (to see how long processing and
184 |         # display takes)
185 | 
186 |         stop_t = ((cv2.getTickCount() - start_t) /
187 |                   cv2.getTickFrequency()) * 1000
188 | 
189 |         # start the event loop - essential
190 | 
191 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
192 |         # ms.) It waits for specified milliseconds for any keyboard event.
193 |         # If you press any key in that time, the program continues.
194 |         # If 0 is passed, it waits indefinitely for a key stroke.
195 |         # (bitwise and with 0xFF to extract least significant byte of
196 |         # multi-byte response) here we use a wait time in ms. that takes
197 |         # account of processing time already used in the loop
198 | 
199 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
200 |         # 25 fps = 40 ms)
201 | 
202 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
203 | 
204 |         # It can also be set to detect specific key strokes by recording which
205 |         # key is pressed
206 | 
207 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
208 |         # display
209 | 
210 |         if (key == ord('x')):
211 |             keep_processing = False
212 |         elif (key == ord('f')):
213 |             cv2.setWindowProperty(
214 |                 window_name,
215 |                 cv2.WND_PROP_FULLSCREEN,
216 |                 cv2.WINDOW_FULLSCREEN)
217 | 
218 |     # close all windows
219 | 
220 |     cv2.destroyAllWindows()
221 | 
222 | else:
223 |     print("No video file specified or camera connected.")
224 | 


--------------------------------------------------------------------------------
/harris.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example : harris feature points from a video file
  4 | # specified on the command line (e.g. python FILE.py video_file) or from an
  5 | # attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2015-2024 Engineering & Computing Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | #####################################################################
 14 | 
 15 | import cv2
 16 | import argparse
 17 | import sys
 18 | import numpy as np
 19 | 
 20 | #####################################################################
 21 | 
 22 | keep_processing = True
 23 | 
 24 | # parse command line arguments for camera ID or video file
 25 | 
 26 | parser = argparse.ArgumentParser(
 27 |     description='Perform ' +
 28 |     sys.argv[0] +
 29 |     ' example operation on incoming camera/video image')
 30 | parser.add_argument(
 31 |     "-c",
 32 |     "--camera_to_use",
 33 |     type=int,
 34 |     help="specify camera to use",
 35 |     default=0)
 36 | parser.add_argument(
 37 |     "-r",
 38 |     "--rescale",
 39 |     type=float,
 40 |     help="rescale image by this factor",
 41 |     default=1.0)
 42 | parser.add_argument(
 43 |     "-s",
 44 |     "--set_resolution",
 45 |     type=int,
 46 |     nargs=2,
 47 |     help='override default camera resolution as H W')
 48 | parser.add_argument(
 49 |     'video_file',
 50 |     metavar='video_file',
 51 |     type=str,
 52 |     nargs='?',
 53 |     help='specify optional video file')
 54 | args = parser.parse_args()
 55 | 
 56 | #####################################################################
 57 | 
 58 | # this function is called as a call-back everytime the trackbar is moved
 59 | # (here we just do nothing)
 60 | 
 61 | 
 62 | def nothing(x):
 63 |     pass
 64 | 
 65 | #####################################################################
 66 | 
 67 | # define video capture object
 68 | 
 69 | 
 70 | try:
 71 |     # to use a non-buffered camera stream (via a separate thread)
 72 | 
 73 |     if not (args.video_file):
 74 |         import camera_stream
 75 |         cap = camera_stream.CameraVideoStream()  # T-API breaks code
 76 |     else:
 77 |         cap = cv2.VideoCapture()  # not needed for video files
 78 | 
 79 | except BaseException:
 80 |     # if not then just use OpenCV default
 81 | 
 82 |     print("INFO: camera_stream class not found - camera input may be buffered")
 83 |     cap = cv2.VideoCapture()
 84 | 
 85 | # define display window name
 86 | 
 87 | window_name = "Live Camera Input"  # window name
 88 | 
 89 | # if command line arguments are provided try to read video_name
 90 | # otherwise default to capture from attached H/W camera
 91 | 
 92 | if (((args.video_file) and (cap.open(str(args.video_file))))
 93 |         or (cap.open(args.camera_to_use))):
 94 | 
 95 |     # create window by name (as resizable)
 96 | 
 97 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
 98 | 
 99 |     # add some track bar controllers for settings
100 | 
101 |     neighbourhood = 3
102 |     cv2.createTrackbar(
103 |         "neighbourhood, N",
104 |         window_name,
105 |         neighbourhood,
106 |         15,
107 |         nothing)
108 | 
109 |     # override default camera resolution
110 | 
111 |     if (args.set_resolution is not None):
112 |         cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
113 |         cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
114 | 
115 |     print("INFO: input resolution : (",
116 |           int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
117 |           int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
118 | 
119 |     while (keep_processing):
120 | 
121 |         # if video file successfully open then read frame from video
122 | 
123 |         if (cap.isOpened):
124 |             ret, frame = cap.read()
125 | 
126 |             # when we reach the end of the video (file) exit cleanly
127 | 
128 |             if (ret == 0):
129 |                 keep_processing = False
130 |                 continue
131 | 
132 |             # rescale if specified
133 | 
134 |             if (args.rescale != 1.0):
135 |                 frame = cv2.resize(
136 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
137 | 
138 |         # convert to single channel grayscale image
139 |         # with 32-bit float representation per pixel
140 | 
141 |         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
142 |         gray = np.float32(gray)
143 | 
144 |         # get parameters from track bars
145 | 
146 |         neighbourhood = cv2.getTrackbarPos("neighbourhood, N", window_name)
147 | 
148 |         # check neighbourhood is greater than 3 and odd
149 | 
150 |         neighbourhood = max(3, neighbourhood)
151 |         if not (neighbourhood % 2):
152 |             neighbourhood = neighbourhood + 1
153 | 
154 |         # find harris corners (via the good features to track function)
155 | 
156 |         corners = cv2.goodFeaturesToTrack(
157 |             gray,
158 |             maxCorners=500,
159 |             qualityLevel=0.01,
160 |             minDistance=10,
161 |             blockSize=neighbourhood,
162 |             useHarrisDetector=True,
163 |             k=0.01)
164 |         corners = np.intp(corners)
165 | 
166 |         for i in corners:
167 |             x, y = i.ravel()
168 |             cv2.circle(frame, (x, y), 3, (0, 255, 0), -1)
169 | 
170 |         # alternatively get the raw harris eigenvalue response
171 | 
172 |         # dst = cv2.cornerHarris(gray,neighbourhood,neighbourhood, 0.01)
173 | 
174 |         # Threshold for an optimal value, it may vary depending on the image.
175 | 
176 |         # frame[dst>0.005*dst.max()]=[0,255,0]
177 | 
178 |         # display image
179 | 
180 |         cv2.imshow(window_name, frame)
181 | 
182 |         # start the event loop - essential
183 | 
184 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
185 |         # milliseconds). It waits for specified milliseconds for any keyboard
186 |         # event. If you press any key in that time, the program continues.
187 |         # If 0 is passed, it waits indefinitely for a key stroke.
188 |         # (bitwise and with 0xFF to extract least significant byte of
189 |         # multi-byte response)
190 | 
191 |         # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
192 | 
193 |         key = cv2.waitKey(40) & 0xFF
194 | 
195 |         # It can also be set to detect specific key strokes by recording which
196 |         # key is pressed
197 | 
198 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
199 |         # display
200 | 
201 |         if (key == ord('x')):
202 |             keep_processing = False
203 |         elif (key == ord('f')):
204 |             cv2.setWindowProperty(
205 |                 window_name,
206 |                 cv2.WND_PROP_FULLSCREEN,
207 |                 cv2.WINDOW_FULLSCREEN)
208 | 
209 |     # close all windows
210 | 
211 |     cv2.destroyAllWindows()
212 | 
213 | else:
214 |     print("No video file specified or camera connected.")
215 | 
216 | #####################################################################
217 | 


--------------------------------------------------------------------------------
/hog.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example : HOG pedestrain detection from a video file
  4 | # specified on the command line (e.g. FILE.py video_file) or from an
  5 | # attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2015 / 2016 School of Engineering & Computing Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | #####################################################################
 14 | 
 15 | import cv2
 16 | import argparse
 17 | import sys
 18 | import math
 19 | import numpy as np
 20 | 
 21 | #####################################################################
 22 | 
 23 | keep_processing = True
 24 | 
 25 | # parse command line arguments for camera ID or video file
 26 | 
 27 | parser = argparse.ArgumentParser(
 28 |     description='Perform ' +
 29 |     sys.argv[0] +
 30 |     ' example operation on incoming camera/video image')
 31 | parser.add_argument(
 32 |     "-c",
 33 |     "--camera_to_use",
 34 |     type=int,
 35 |     help="specify camera to use",
 36 |     default=0)
 37 | parser.add_argument(
 38 |     "-r",
 39 |     "--rescale",
 40 |     type=float,
 41 |     help="rescale image by this factor",
 42 |     default=1.0)
 43 | parser.add_argument(
 44 |     'video_file',
 45 |     metavar='video_file',
 46 |     type=str,
 47 |     nargs='?',
 48 |     help='specify optional video file')
 49 | args = parser.parse_args()
 50 | 
 51 | #####################################################################
 52 | 
 53 | # if we have OpenCL H/W acceleration availale, use it - we'll need it
 54 | 
 55 | cv2.ocl.setUseOpenCL(True)
 56 | print(
 57 |     "INFO: OpenCL - available: ",
 58 |     cv2.ocl.haveOpenCL(),
 59 |     " using: ",
 60 |     cv2.ocl.useOpenCL())
 61 | 
 62 | #####################################################################
 63 | 
 64 | 
 65 | def inside(r, q):
 66 |     rx, ry, rw, rh = r
 67 |     qx, qy, qw, qh = q
 68 |     return rx > qx and ry > qy and rx + rw < qx + qw and ry + rh < qy + qh
 69 | 
 70 | 
 71 | def draw_detections(img, rects, thickness=1):
 72 |     for x, y, w, h in rects:
 73 |         # the HOG detector returns slightly larger rectangles than the
 74 |         # real objects so we slightly shrink the rectangles to
 75 |         # get a nicer output.
 76 |         pad_w, pad_h = int(0.15 * w), int(0.05 * h)
 77 |         cv2.rectangle(img, (x + pad_w, y + pad_h),
 78 |                       (x + w - pad_w, y + h - pad_h), (0, 255, 0), thickness)
 79 | 
 80 | #####################################################################
 81 | 
 82 | # power law transform
 83 | # image - colour image
 84 | # gamma - "gradient" co-efficient of gamma function
 85 | 
 86 | 
 87 | def powerlaw_transform(image, gamma):
 88 | 
 89 |     # compute power-law transform
 90 |     # remembering not defined for pixel = 0 (!)
 91 | 
 92 |     # handle any overflow in a quick and dirty way using 0-255 clipping
 93 | 
 94 |     image = np.clip(np.power(image, gamma), 0, 255).astype('uint8')
 95 | 
 96 |     return image
 97 | 
 98 | 
 99 | #####################################################################
100 | 
101 | # this function is called as a call-back everytime the trackbar is moved
102 | # (here we just do nothing)
103 | 
104 | def nothing(x):
105 |     pass
106 | 
107 | 
108 | #####################################################################
109 | 
110 | # define video capture object
111 | 
112 | 
113 | try:
114 |     # to use a non-buffered camera stream (via a separate thread)
115 | 
116 |     if not (args.video_file):
117 |         import camera_stream
118 |         cap = camera_stream.CameraVideoStream()  # T-API done later
119 |     else:
120 |         cap = cv2.VideoCapture()  # not needed for video files
121 | 
122 | except BaseException:
123 |     # if not then just use OpenCV default
124 | 
125 |     print("INFO: camera_stream class not found - camera input may be buffered")
126 |     cap = cv2.VideoCapture()
127 | 
128 | #####################################################################
129 | 
130 | # define display window name
131 | 
132 | window_name = "HOG pedestrain detection"  # window name
133 | 
134 | # if command line arguments are provided try to read video_name
135 | # otherwise default to capture from attached H/W camera
136 | 
137 | if (((args.video_file) and (cap.open(str(args.video_file))))
138 |         or (cap.open(args.camera_to_use))):
139 | 
140 |     # create window by name (as resizable)
141 | 
142 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
143 | 
144 |     # set up HoG detector
145 | 
146 |     hog = cv2.HOGDescriptor()
147 |     hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
148 | 
149 |     # add some track bar controllers for settings
150 | 
151 |     neighbourhood = 3
152 |     cv2.createTrackbar("Smoothing : neighbourhood, N", window_name,
153 |                        neighbourhood, 40, nothing)
154 | 
155 |     sigma = 1
156 |     cv2.createTrackbar("Smoothing : sigma", window_name, sigma, 10, nothing)
157 | 
158 |     gamma = 100  # default gamma = 100 * 0.01 = 1 -> no change
159 |     cv2.createTrackbar("gamma, (* 0.01)", window_name, gamma, 150, nothing)
160 | 
161 |     svm_threshold = 0  # by default the SVM's own threshold at the hyperplane
162 |     cv2.createTrackbar("SVM threshold, (distance from hyper-plane, * 0.1)",
163 |                        window_name, svm_threshold, 10, nothing)
164 | 
165 |     while (keep_processing):
166 | 
167 |         # if video file successfully open then read frame from video
168 | 
169 |         if (cap.isOpened):
170 |             ret, frame = cap.read()
171 | 
172 |             # when we reach the end of the video (file) exit cleanly
173 | 
174 |             if (ret == 0):
175 |                 keep_processing = False
176 |                 continue
177 | 
178 |             # rescale if specified
179 | 
180 |             if (args.rescale != 1.0):
181 |                 frame = cv2.resize(
182 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
183 | 
184 |         # start a timer (to see how long processing and display takes)
185 | 
186 |         start_t = cv2.getTickCount()
187 | 
188 |         # get parameters from track bars
189 | 
190 |         neighbourhood = cv2.getTrackbarPos(
191 |             "Smoothing : neighbourhood, N", window_name)
192 |         sigma = cv2.getTrackbarPos("Smoothing : sigma", window_name)
193 |         gamma = cv2.getTrackbarPos("gamma, (* 0.01)", window_name) * 0.01
194 |         svm_threshold = cv2.getTrackbarPos(
195 |             "SVM threshold, (distance from hyper-plane, * 0.1)",
196 |             window_name) * 0.1
197 | 
198 |         # check neighbourhood is greater than 3 and odd
199 | 
200 |         neighbourhood = max(3, neighbourhood)
201 |         if not (neighbourhood % 2):
202 |             neighbourhood = neighbourhood + 1
203 | 
204 |         # use power-law function to perform gamma correction
205 |         # and convert np array to T-API universal array for H/W acceleration
206 | 
207 |         frame = cv2.UMat(powerlaw_transform(frame, gamma))
208 | 
209 |         # perform Gaussian smoothing using NxN neighbourhood
210 | 
211 |         frame = cv2.GaussianBlur(
212 |             frame,
213 |             (neighbourhood,
214 |              neighbourhood),
215 |             sigma,
216 |             sigma,
217 |             borderType=cv2.BORDER_REPLICATE)
218 | 
219 |         # perform HOG based pedestrain detection
220 | 
221 |         found, w = hog.detectMultiScale(
222 |             frame, winStride=(
223 |                 8, 8), padding=(
224 |                 32, 32), scale=1.05, hitThreshold=svm_threshold)
225 |         found_filtered = []
226 | 
227 |         for ri, r in enumerate(found):
228 |             for qi, q in enumerate(found):
229 |                 if ri != qi and inside(r, q):
230 |                     break
231 |                 else:
232 |                     found_filtered.append(r)
233 | 
234 |         draw_detections(frame, found_filtered, 3)
235 | 
236 |         # display image
237 | 
238 |         cv2.imshow(window_name, frame)
239 | 
240 |         # stop the timer and convert to ms. (to see how long processing and
241 |         # display takes)
242 | 
243 |         stop_t = ((cv2.getTickCount() - start_t) /
244 |                   cv2.getTickFrequency()) * 1000
245 | 
246 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
247 |         # 25 fps = 40 ms)
248 | 
249 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
250 | 
251 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
252 |         # display
253 | 
254 |         if (key == ord('x')):
255 |             keep_processing = False
256 |         elif (key == ord('f')):
257 |             cv2.setWindowProperty(
258 |                 window_name,
259 |                 cv2.WND_PROP_FULLSCREEN,
260 |                 cv2.WINDOW_FULLSCREEN)
261 | 
262 |     # close all windows
263 | 
264 |     cv2.destroyAllWindows()
265 | 
266 | else:
267 |     print("No video file specified or camera connected.")
268 | 
269 | #####################################################################
270 | 


--------------------------------------------------------------------------------
/houghlines.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example :  hough line detection based on canny edge detection
  4 | # for a a video file specified on the command line (e.g. python FILE.py
  5 | # video_file) or from an attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2021 Dept. Computer Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | #####################################################################
 14 | 
 15 | import cv2
 16 | import argparse
 17 | import sys
 18 | import numpy as np
 19 | 
 20 | #####################################################################
 21 | 
 22 | keep_processing = True
 23 | use_probablistic_hough = False
 24 | 
 25 | # parse command line arguments for camera ID or video file
 26 | 
 27 | parser = argparse.ArgumentParser(
 28 |     description='Perform ' +
 29 |     sys.argv[0] +
 30 |     ' example operation on incoming camera/video image')
 31 | parser.add_argument(
 32 |     "-c",
 33 |     "--camera_to_use",
 34 |     type=int,
 35 |     help="specify camera to use",
 36 |     default=0)
 37 | parser.add_argument(
 38 |     "-r",
 39 |     "--rescale",
 40 |     type=float,
 41 |     help="rescale image by this factor",
 42 |     default=1.0)
 43 | parser.add_argument(
 44 |     'video_file',
 45 |     metavar='video_file',
 46 |     type=str,
 47 |     nargs='?',
 48 |     help='specify optional video file')
 49 | args = parser.parse_args()
 50 | 
 51 | #####################################################################
 52 | 
 53 | # this function is called as a call-back everytime the trackbar is moved
 54 | # (here we just do nothing)
 55 | 
 56 | 
 57 | def nothing(x):
 58 |     pass
 59 | 
 60 | 
 61 | #####################################################################
 62 | 
 63 | # define video capture object
 64 | 
 65 | try:
 66 |     # to use a non-buffered camera stream (via a separate thread)
 67 | 
 68 |     if not (args.video_file):
 69 |         import camera_stream
 70 |         cap = camera_stream.CameraVideoStream()
 71 |     else:
 72 |         cap = cv2.VideoCapture()  # not needed for video files
 73 | 
 74 | except BaseException:
 75 |     # if not then just use OpenCV default
 76 | 
 77 |     print("INFO: camera_stream class not found - camera input may be buffered")
 78 |     cap = cv2.VideoCapture()
 79 | 
 80 | # define display window name
 81 | 
 82 | window_name = "Live Camera Input"  # window name
 83 | window_name2 = "Hough Lines"  # window name
 84 | 
 85 | # if command line arguments are provided try to read video_name
 86 | # otherwise default to capture from attached H/W camera
 87 | 
 88 | if (((args.video_file) and (cap.open(str(args.video_file))))
 89 |         or (cap.open(args.camera_to_use))):
 90 | 
 91 |     # create window by name (as resizable)
 92 | 
 93 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
 94 |     cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL)
 95 | 
 96 |     # add some track bar controllers for settings
 97 | 
 98 |     lower_threshold = 25
 99 |     cv2.createTrackbar("lower", window_name2, lower_threshold, 255, nothing)
100 |     upper_threshold = 120
101 |     cv2.createTrackbar("upper", window_name2, upper_threshold, 255, nothing)
102 |     smoothing_neighbourhood = 3
103 |     cv2.createTrackbar(
104 |         "smoothing",
105 |         window_name2,
106 |         smoothing_neighbourhood,
107 |         15,
108 |         nothing)
109 |     sobel_size = 3  # greater than 7 seems to crash
110 |     cv2.createTrackbar("sobel size", window_name2, sobel_size, 7, nothing)
111 | 
112 |     while (keep_processing):
113 | 
114 |         # if video file successfully open then read frame from video
115 | 
116 |         if (cap.isOpened):
117 |             ret, frame = cap.read()  # rescale if specified
118 | 
119 |             # when we reach the end of the video (file) exit cleanly
120 | 
121 |             if (ret == 0):
122 |                 keep_processing = False
123 |                 continue
124 | 
125 |             # rescale if specified
126 | 
127 |             if (args.rescale != 1.0):
128 |                 frame = cv2.resize(
129 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
130 | 
131 |         # get parameters from track bars
132 | 
133 |         lower_threshold = cv2.getTrackbarPos("lower", window_name2)
134 |         upper_threshold = cv2.getTrackbarPos("upper", window_name2)
135 |         smoothing_neighbourhood = cv2.getTrackbarPos("smoothing", window_name2)
136 |         sobel_size = cv2.getTrackbarPos("sobel size", window_name2)
137 | 
138 |         # check neighbourhood is greater than 3 and odd
139 | 
140 |         smoothing_neighbourhood = max(3, smoothing_neighbourhood)
141 |         if not (smoothing_neighbourhood % 2):
142 |             smoothing_neighbourhood = smoothing_neighbourhood + 1
143 | 
144 |         sobel_size = max(3, sobel_size)
145 |         if not (sobel_size % 2):
146 |             sobel_size = sobel_size + 1
147 | 
148 |         # convert to grayscale
149 | 
150 |         gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
151 | 
152 |         # performing smoothing on the image using a 5x5 smoothing mark (see
153 |         # manual entry for GaussianBlur())
154 | 
155 |         smoothed = cv2.GaussianBlur(
156 |             gray_frame, (smoothing_neighbourhood, smoothing_neighbourhood), 0)
157 | 
158 |         # perform canny edge detection
159 | 
160 |         canny = cv2.Canny(
161 |             smoothed,
162 |             lower_threshold,
163 |             upper_threshold,
164 |             apertureSize=sobel_size)
165 | 
166 |         # perform hough line detection
167 |         # based on tutorial at:
168 |         # https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_houghlines/py_houghlines.html
169 | 
170 |         if not (use_probablistic_hough):
171 |             lines = cv2.HoughLines(canny, 1, np.pi/180, 40)
172 |             if lines is not None:
173 |                 for rho, theta in lines[0]:
174 |                     a = np.cos(theta)
175 |                     b = np.sin(theta)
176 |                     x0 = a*rho
177 |                     y0 = b*rho
178 |                     x1 = int(x0 + 1000*(-b))
179 |                     y1 = int(y0 + 1000*(a))
180 |                     x2 = int(x0 - 1000*(-b))
181 |                     y2 = int(y0 - 1000*(a))
182 | 
183 |                     cv2.line(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
184 | 
185 |         else:
186 | 
187 |             # use use probablistic hough transform
188 | 
189 |             min_line_length = 100   # requires tuning
190 |             max_line_gap = 10       # requires tuning
191 | 
192 |             lines = cv2.HoughLinesP(canny, 1, np.pi/180, 10,
193 |                                     min_line_length, max_line_gap)
194 |             if lines is not None:
195 |                 for x1, y1, x2, y2 in lines[0]:
196 |                     cv2.line(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
197 | 
198 |         # display image
199 | 
200 |         cv2.imshow(window_name, frame)
201 |         cv2.imshow(window_name2, canny)
202 | 
203 |         # start the event loop - essential
204 | 
205 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
206 |         # milliseconds). It waits for specified milliseconds for any keyboard
207 |         # event. If you press any key in that time, the program continues.
208 |         # If 0 is passed, it waits indefinitely for a key stroke.
209 |         # (bitwise and with 0xFF to extract least significant byte of
210 |         # multi-byte response)
211 | 
212 |         # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
213 |         key = cv2.waitKey(40) & 0xFF
214 | 
215 |         # It can also be set to detect specific key strokes by recording which
216 |         # key is pressed
217 | 
218 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
219 |         # display
220 | 
221 |         if (key == ord('x')):
222 |             keep_processing = False
223 |         elif (key == ord('f')):
224 |             cv2.setWindowProperty(
225 |                 window_name2,
226 |                 cv2.WND_PROP_FULLSCREEN,
227 |                 cv2.WINDOW_FULLSCREEN)
228 |         elif (key == ord('p')):
229 |             use_probablistic_hough = not (use_probablistic_hough)
230 | 
231 |     # close all windows
232 | 
233 |     cv2.destroyAllWindows()
234 | 
235 | else:
236 |     print("No video file specified or camera connected.")
237 | 
238 | #####################################################################
239 | 


--------------------------------------------------------------------------------
/lbp_cascade_detection.py:
--------------------------------------------------------------------------------
  1 | # Example : perform LBP cascade detection on live display from a video file
  2 | # specified on the command line (e.g. python FILE.py video_file) or from an
  3 | # attached web camera
  4 | 
  5 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  6 | 
  7 | # Copyright (c) 2016 School of Engineering & Computing Science,
  8 | #                    Durham University, UK
  9 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 10 | 
 11 | # based on haar example at:
 12 | # http://docs.opencv.org/3.1.0/d7/d8b/tutorial_py_face_detection.html#gsc.tab=0
 13 | 
 14 | # get trained cascade files from:
 15 | # https://github.com/opencv/opencv/tree/master/data/
 16 | 
 17 | #####################################################################
 18 | 
 19 | import cv2
 20 | import argparse
 21 | import sys
 22 | import math
 23 | 
 24 | #####################################################################
 25 | 
 26 | keep_processing = True
 27 | 
 28 | # parse command line arguments for camera ID or video file
 29 | 
 30 | parser = argparse.ArgumentParser(
 31 |     description='Perform ' +
 32 |     sys.argv[0] +
 33 |     ' example operation on incoming camera/video image')
 34 | parser.add_argument(
 35 |     "-c",
 36 |     "--camera_to_use",
 37 |     type=int,
 38 |     help="specify camera to use",
 39 |     default=0)
 40 | parser.add_argument(
 41 |     "-r",
 42 |     "--rescale",
 43 |     type=float,
 44 |     help="rescale image by this factor",
 45 |     default=1.0)
 46 | parser.add_argument(
 47 |     'video_file',
 48 |     metavar='video_file',
 49 |     type=str,
 50 |     nargs='?',
 51 |     help='specify optional video file')
 52 | args = parser.parse_args()
 53 | 
 54 | #####################################################################
 55 | 
 56 | # define video capture object
 57 | 
 58 | try:
 59 |     # to use a non-buffered camera stream (via a separate thread)
 60 | 
 61 |     if not (args.video_file):
 62 |         import camera_stream
 63 |         cap = camera_stream.CameraVideoStream()
 64 |     else:
 65 |         cap = cv2.VideoCapture()  # not needed for video files
 66 | 
 67 | except BaseException:
 68 |     # if not then just use OpenCV default
 69 | 
 70 |     print("INFO: camera_stream class not found - camera input may be buffered")
 71 |     cap = cv2.VideoCapture()
 72 | 
 73 | # define display window name
 74 | 
 75 | window_name = "Face Detection using LBP Cascades"  # window name
 76 | 
 77 | # define lbpcascades cascade objects
 78 | 
 79 | # required cascade classifier files (and many others) available from:
 80 | # https://github.com/opencv/opencv/tree/master/data/lbpcascades
 81 | 
 82 | face_cascade = cv2.CascadeClassifier('lbpcascade_frontalface_improved.xml')
 83 | 
 84 | if (face_cascade.empty()):
 85 |     print("Failed to load cascade from file.")
 86 | 
 87 | 
 88 | # if command line arguments are provided try to read video_name
 89 | # otherwise default to capture from attached H/W camera
 90 | 
 91 | if (((args.video_file) and (cap.open(str(args.video_file))))
 92 |         or (cap.open(args.camera_to_use))):
 93 | 
 94 |     # create window by name (as resizable)
 95 | 
 96 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
 97 | 
 98 |     while (keep_processing):
 99 | 
100 |         # if video file successfully open then read frame from video
101 | 
102 |         if (cap.isOpened):
103 |             ret, frame = cap.read()
104 | 
105 |             # when we reach the end of the video (file) exit cleanly
106 | 
107 |             if (ret == 0):
108 |                 keep_processing = False
109 |                 continue
110 | 
111 |             # rescale if specified
112 | 
113 |             if (args.rescale != 1.0):
114 |                 frame = cv2.resize(
115 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
116 | 
117 |         # start a timer (to see how long processing and display takes)
118 | 
119 |         start_t = cv2.getTickCount()
120 | 
121 |         # convert to grayscale
122 | 
123 |         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
124 | 
125 |         # detect faces using LBP cascade trained on faces
126 | 
127 |         faces = face_cascade.detectMultiScale(
128 |             gray, scaleFactor=1.3, minNeighbors=3, minSize=(30, 30))
129 | 
130 |         # for each detected face, try to detect eyes inside the top
131 |         # half of the face region face region
132 | 
133 |         for (x, y, w, h) in faces:
134 | 
135 |             # draw each face bounding box and extract regions of interest (roi)
136 | 
137 |             cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
138 |             roi_gray = gray[y:y + math.floor(h * 0.5), x:x + w]
139 |             roi_color = frame[y:y + math.floor(h * 0.5), x:x + w]
140 | 
141 |         # display image
142 | 
143 |         cv2.imshow(window_name, frame)
144 | 
145 |         # stop the timer and convert to ms. (to see how long processing and
146 |         # display takes)
147 | 
148 |         stop_t = ((cv2.getTickCount() - start_t) /
149 |                   cv2.getTickFrequency()) * 1000
150 | 
151 |         # start the event loop - essential
152 | 
153 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
154 |         # ms.) It waits for specified milliseconds for any keyboard event.
155 |         # If you press any key in that time, the program continues.
156 |         # If 0 is passed, it waits indefinitely for a key stroke.
157 |         # (bitwise and with 0xFF to extract least significant byte of
158 |         # multi-byte response) here we use a wait time in ms. that takes
159 |         # account of processing time already used in the loop
160 | 
161 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
162 |         # 25 fps = 40 ms)
163 | 
164 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
165 | 
166 |         # It can also be set to detect specific key strokes by recording which
167 |         # key is pressed
168 | 
169 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
170 |         # display
171 | 
172 |         if (key == ord('x')):
173 |             keep_processing = False
174 |         elif (key == ord('f')):
175 |             cv2.setWindowProperty(
176 |                 window_name,
177 |                 cv2.WND_PROP_FULLSCREEN,
178 |                 cv2.WINDOW_FULLSCREEN)
179 | 
180 |     # close all windows
181 | 
182 |     cv2.destroyAllWindows()
183 | 
184 | else:
185 |     print("No video file specified or camera connected.")
186 | 


--------------------------------------------------------------------------------
/mask-rcnn.py:
--------------------------------------------------------------------------------
  1 | ##########################################################################
  2 | 
  3 | # Example : performs Mask R-CNN object instance segmentation from a video file
  4 | # specified on the command line (e.g. python FILE.py video_file) or from an
  5 | # attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2021 Toby Breckon, Durham University, UK
 10 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 11 | 
 12 | # Implements the Mask R-CNN instance segmentation architecture decribed in:
 13 | # Mask R-CNN - Kaiming He, Georgia Gkioxari, Piotr Dollár, Ross Girshick
 14 | # https://arxiv.org/abs/1703.06870
 15 | 
 16 | # This code: significant portions based on the example available at:
 17 | # https://github.com/opencv/opencv/blob/master/samples/dnn/mask_rcnn.py
 18 | 
 19 | # To use first download and unpack the following files:
 20 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_coco.txt
 21 | # http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz
 22 | # https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt
 23 | # then unpack and rename as follows:
 24 | # tar -xzf mask_rcnn_inception_v2_coco_2018_01_28.tar.gz
 25 | 
 26 | ##########################################################################
 27 | 
 28 | import cv2
 29 | import argparse
 30 | import sys
 31 | import math
 32 | import numpy as np
 33 | 
 34 | ##########################################################################
 35 | 
 36 | keep_processing = True
 37 | colors = None
 38 | 
 39 | # parse command line arguments for camera ID or video file, and Mask
 40 | # R-CNN files
 41 | parser = argparse.ArgumentParser(
 42 |     description='Perform ' +
 43 |     sys.argv[0] +
 44 |     ' example operation on incoming camera/video image')
 45 | parser.add_argument(
 46 |     "-c",
 47 |     "--camera_to_use",
 48 |     type=int,
 49 |     help="specify camera to use",
 50 |     default=0)
 51 | parser.add_argument(
 52 |     "-r",
 53 |     "--rescale",
 54 |     type=float,
 55 |     help="rescale image by this factor",
 56 |     default=1.0)
 57 | parser.add_argument(
 58 |     "-fs",
 59 |     "--fullscreen",
 60 |     action='store_true',
 61 |     help="run in full screen mode")
 62 | parser.add_argument(
 63 |     "-use",
 64 |     "--target",
 65 |     type=str,
 66 |     choices=['cpu', 'gpu', 'opencl'],
 67 |     help="select computational backend",
 68 |     default='gpu')
 69 | parser.add_argument(
 70 |     'video_file',
 71 |     metavar='video_file',
 72 |     type=str,
 73 |     nargs='?',
 74 |     help='specify optional video file')
 75 | parser.add_argument(
 76 |     "-cl",
 77 |     "--class_file",
 78 |     type=str,
 79 |     help="list of classes",
 80 |     default='object_detection_classes_coco.txt')
 81 | parser.add_argument(
 82 |     "-cf",
 83 |     "--config_file",
 84 |     type=str,
 85 |     help="network config",
 86 |     default='mask_rcnn_inception_v2_coco_2018_01_28.pbtxt')
 87 | parser.add_argument(
 88 |     "-w",
 89 |     "--weights_file",
 90 |     type=str,
 91 |     help="network weights",
 92 |     default="mask_rcnn_inception_v2_coco_2018_01_28/"
 93 |             + "/frozen_inference_graph.pb")
 94 | 
 95 | args = parser.parse_args()
 96 | 
 97 | ##########################################################################
 98 | # dummy on trackbar callback function
 99 | 
100 | 
101 | def on_trackbar(val):
102 |     return
103 | 
104 | #####################################################################
105 | # Draw the predicted bounding box on the specified image
106 | # image: image detection performed on
107 | # class_name: string name of detected object_detection
108 | # left, top, right, bottom: rectangle parameters for detection
109 | # colour: to draw detection rectangle in
110 | 
111 | 
112 | def drawPred(image, class_name, confidence, left, top, right, bottom, colour):
113 |     # Draw a bounding box.
114 |     cv2.rectangle(image, (left, top), (right, bottom), colour, 3)
115 | 
116 |     # construct label
117 |     label = '%s:%.2f' % (class_name, confidence)
118 | 
119 |     # Display the label at the top of the bounding box
120 |     labelSize, baseLine = cv2.getTextSize(
121 |         label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
122 |     top = max(top, labelSize[1])
123 |     cv2.rectangle(
124 |         image,
125 |         (left,
126 |          top -
127 |          round(
128 |              1.5 *
129 |              labelSize[1])),
130 |         (left +
131 |          round(
132 |              1.5 *
133 |              labelSize[0]),
134 |             top +
135 |             baseLine),
136 |         (255,
137 |          255,
138 |          255),
139 |         cv2.FILLED)
140 |     cv2.putText(image, label, (left, top),
141 |                 cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 1)
142 | 
143 | ##########################################################################
144 | 
145 | # define video capture object
146 | 
147 | 
148 | try:
149 |     # to use a non-buffered camera stream (via a separate thread)
150 | 
151 |     if not (args.video_file):
152 |         import camera_stream
153 |         cap = camera_stream.CameraVideoStream()
154 |     else:
155 |         cap = cv2.VideoCapture()  # not needed for video files
156 | 
157 | except BaseException:
158 |     # if not then just use OpenCV default
159 | 
160 |     print("INFO: camera_stream class not found - camera input may be buffered")
161 |     cap = cv2.VideoCapture()
162 | 
163 | ##########################################################################
164 | 
165 | # init Mask R-CNN object detection model
166 | 
167 | inpWidth = 800       # Width of network's input image
168 | inpHeight = 800      # Height of network's input image
169 | 
170 | # Load names of classes from file
171 | 
172 | classesFile = args.class_file
173 | classes = None
174 | with open(classesFile, 'rt') as f:
175 |     classes = f.read().rstrip('\n').split('\n')
176 | 
177 | # load configuration and weight files for the model and load the network
178 | # using them
179 | 
180 | net = cv2.dnn.readNet(args.config_file, args.weights_file)
181 | 
182 | # set up compute target as one of [GPU, OpenCL, CPU]
183 | 
184 | if (args.target == 'gpu'):
185 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
186 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
187 | elif (args.target == 'opencl'):
188 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
189 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
190 | else:
191 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
192 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
193 | 
194 | ##########################################################################
195 | 
196 | # define display window name + trackbar
197 | 
198 | window_name = 'Mask R-CNN instance segmentation: ' + args.weights_file
199 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
200 | trackbarName = 'reporting confidence > (x 0.01)'
201 | cv2.createTrackbar(trackbarName, window_name, 70, 100, on_trackbar)
202 | 
203 | ##########################################################################
204 | 
205 | # if command line arguments are provided try to read video_name
206 | # otherwise default to capture from attached camera
207 | 
208 | if (((args.video_file) and (cap.open(str(args.video_file))))
209 |         or (cap.open(args.camera_to_use))):
210 | 
211 |     # create window by name (as resizable)
212 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
213 | 
214 |     while (keep_processing):
215 | 
216 |         # start a timer (to see how long processing and display takes)
217 |         start_t = cv2.getTickCount()
218 | 
219 |         # if camera /video file successfully open then read frame
220 |         if (cap.isOpened):
221 |             ret, frame = cap.read()
222 | 
223 |             # when we reach the end of the video (file) exit cleanly
224 |             if (ret == 0):
225 |                 keep_processing = False
226 |                 continue
227 | 
228 |             # rescale if specified
229 |             if (args.rescale != 1.0):
230 |                 frame = cv2.resize(
231 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
232 | 
233 |         # get frame dimensions
234 |         frameH = frame.shape[0]
235 |         frameW = frame.shape[1]
236 | 
237 |         # create a 4D tensor (OpenCV 'blob') from image frame (pixels not
238 |         # scaled, image resized)
239 |         tensor = cv2.dnn.blobFromImage(
240 |             frame, 1.0, (inpWidth, inpHeight), [0, 0, 0],
241 |             swapRB=True, crop=False)
242 | 
243 |         # set the input to the CNN network
244 |         net.setInput(tensor)
245 | 
246 |         # runs forward inference to get output of the final output layers
247 |         boxes, masks = net.forward(['detection_out_final', 'detection_masks'])
248 | 
249 |         # get confidence threshold from trackbar
250 |         confThreshold = cv2.getTrackbarPos(trackbarName, window_name) / 100
251 | 
252 |         # get number of classes detected and number of detections
253 |         numClasses = masks.shape[1]
254 |         numDetections = boxes.shape[2]
255 | 
256 |         # draw segmentation - first generate colours if needed
257 | 
258 |         if not colors:
259 |             np.random.seed(324)
260 |             colors = [np.array([0, 0, 0], np.uint8)]
261 |             for i in range(1, numClasses + 1):
262 |                 colors.append((colors[i - 1] +
263 |                               np.random.randint(0, 256, [3],
264 |                               np.uint8)) / 2
265 |                               )
266 |             del colors[0]
267 | 
268 |         # draw segmentation - draw instance segments
269 | 
270 |         boxesToDraw = []
271 |         for i in range(numDetections):
272 |             box = boxes[0, 0, i]
273 |             mask = masks[i]
274 |             confidence = box[2]
275 |             if confidence > confThreshold:
276 | 
277 |                 # **** draw bounding box (as per Faster R-CNN)
278 | 
279 |                 classId = int(box[1])
280 |                 left = int(frameW * box[3])
281 |                 top = int(frameH * box[4])
282 |                 right = int(frameW * box[5])
283 |                 bottom = int(frameH * box[6])
284 | 
285 |                 left = max(0, min(left, frameW - 1))
286 |                 top = max(0, min(top, frameH - 1))
287 |                 right = max(0, min(right, frameW - 1))
288 |                 bottom = max(0, min(bottom, frameH - 1))
289 | 
290 |                 drawPred(frame, classes[classId], confidence,
291 |                          left, top, right, bottom, (0, 255, 0))
292 | 
293 |                 # **** draw object instance mask
294 |                 # get mask, re-size from 28x28 to size of bounding box
295 |                 # then theshold at 0.5
296 | 
297 |                 classMask = mask[classId]
298 |                 classMask = cv2.resize(classMask,
299 |                                        (right - left + 1, bottom - top + 1),
300 |                                        cv2.INTER_CUBIC)
301 |                 mask = (classMask > 0.5)
302 | 
303 |                 roi = frame[top:bottom+1, left:right+1][mask]
304 |                 frame[top:bottom+1, left:right+1][mask] = (
305 |                     0.8 * colors[classId] + 0.2 * roi).astype(np.uint8)
306 | 
307 |         # stop the timer and convert to ms. (to see how long processing takes)
308 | 
309 |         stop_t = ((cv2.getTickCount() - start_t) /
310 |                   cv2.getTickFrequency()) * 1000
311 | 
312 |         # Display efficiency information
313 | 
314 |         label = ('Inference time: %.2f ms' % stop_t) + \
315 |             (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')'
316 |         cv2.putText(frame, label, (0, 15),
317 |                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
318 | 
319 |         # display image
320 |         cv2.imshow(window_name, frame)
321 |         cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
322 |                               cv2.WINDOW_FULLSCREEN & args.fullscreen)
323 | 
324 |         # start the event loop + detect specific key strokes
325 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
326 |         # 25 fps = 40 ms)
327 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
328 | 
329 |         # if user presses "x" then exit  / press "f" for fullscreen display
330 |         if (key == ord('x')):
331 |             keep_processing = False
332 |         elif (key == ord('f')):
333 |             args.fullscreen = not (args.fullscreen)
334 | 
335 |     # close all windows
336 |     cv2.destroyAllWindows()
337 | 
338 | else:
339 |     print("No video file specified or camera connected.")
340 | 
341 | ##########################################################################
342 | 


--------------------------------------------------------------------------------
/mog-background-subtraction.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example : perform MoG based foreground/background subtraction from a video
  4 | # file specified on the command line (e.g. python FILE.py video_file) or from
  5 | # an attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2015-25 Toby Breckon, Engineering & Computer Science,
 10 | #                       Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | #####################################################################
 14 | 
 15 | import cv2
 16 | import argparse
 17 | import sys
 18 | import numpy as np
 19 | 
 20 | #####################################################################
 21 | 
 22 | # concatenate two RGB/grayscale images horizontally (left to right)
 23 | # handling differing channel numbers or image heights in the input
 24 | 
 25 | 
 26 | def h_concat(img1, img2):
 27 | 
 28 |     # get size and channels for both images
 29 | 
 30 |     height1 = img1.shape[0]
 31 |     # width1 = img1.shape[1]
 32 |     if (len(img1.shape) == 2):
 33 |         channels1 = 1
 34 |     else:
 35 |         channels1 = img1.shape[2]
 36 | 
 37 |     height2 = img2.shape[0]
 38 |     width2 = img2.shape[1]
 39 |     if (len(img2.shape) == 2):
 40 |         channels2 = 1
 41 |     else:
 42 |         channels2 = img2.shape[2]
 43 | 
 44 |     # make all images 3 channel, or assume all same channel
 45 | 
 46 |     if ((channels1 > channels2) and (channels1 == 3)):
 47 |         out2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
 48 |         out1 = img1
 49 |     elif ((channels2 > channels1) and (channels2 == 3)):
 50 |         out1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR)
 51 |         out2 = img2
 52 |     else:  # both must be equal
 53 |         out1 = img1
 54 |         out2 = img2
 55 | 
 56 |     # height of first image is master height, width remains unchanged
 57 | 
 58 |     if (height1 != height2):
 59 |         out2 = cv2.resize(out2, (height1, width2))
 60 | 
 61 |     return np.hstack((out1, out2))
 62 | 
 63 | #####################################################################
 64 | 
 65 | # concatenate two RGB/grayscale images vertically (top to bottom)
 66 | # handling differing channel numbers or image heights in the input
 67 | 
 68 | 
 69 | def v_concat(img1, img2):
 70 | 
 71 |     # get size and channels for both images
 72 | 
 73 |     # height1 = img1.shape[0]
 74 |     width1 = img1.shape[1]
 75 |     if (len(img1.shape) == 2):
 76 |         channels1 = 1
 77 |     else:
 78 |         channels1 = img1.shape[2]
 79 | 
 80 |     height2 = img2.shape[0]
 81 |     width2 = img2.shape[1]
 82 |     if (len(img2.shape) == 2):
 83 |         channels2 = 1
 84 |     else:
 85 |         channels2 = img2.shape[2]
 86 | 
 87 |     # make all images 3 channel, or assume all same channel
 88 | 
 89 |     if ((channels1 > channels2) and (channels1 == 3)):
 90 |         out2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
 91 |         out1 = img1
 92 |     elif ((channels2 > channels1) and (channels2 == 3)):
 93 |         out1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR)
 94 |         out2 = img2
 95 |     else:  # both must be equal
 96 |         out1 = img1
 97 |         out2 = img2
 98 | 
 99 |     # width of first image is master height, height remains unchanged
100 | 
101 |     if (width1 != width2):
102 |         out2 = cv2.resize(out2, (height2, width1))
103 | 
104 |     return np.vstack((out1, out2))
105 | 
106 | #####################################################################
107 | 
108 | 
109 | keep_processing = True
110 | 
111 | # parse command line arguments for camera ID or video file
112 | 
113 | parser = argparse.ArgumentParser(
114 |     description='Perform ' +
115 |     sys.argv[0] +
116 |     ' example operation on incoming camera/video image')
117 | parser.add_argument(
118 |     "-c",
119 |     "--camera_to_use",
120 |     type=int,
121 |     help="specify camera to use",
122 |     default=0)
123 | parser.add_argument(
124 |     "-r",
125 |     "--rescale",
126 |     type=float,
127 |     help="rescale image by this factor",
128 |     default=1.0)
129 | parser.add_argument(
130 |     "-s",
131 |     "--set_resolution",
132 |     type=int,
133 |     nargs=2,
134 |     help='override default camera resolution as H W')
135 | parser.add_argument(
136 |     "-fs",
137 |     "--fullscreen",
138 |     action='store_true',
139 |     help="run in full screen mode")
140 | parser.add_argument(
141 |     'video_file',
142 |     metavar='video_file',
143 |     type=str,
144 |     nargs='?',
145 |     help='specify optional video file')
146 | args = parser.parse_args()
147 | 
148 | #####################################################################
149 | 
150 | # define video capture object
151 | 
152 | try:
153 |     # to use a non-buffered camera stream (via a separate thread)
154 | 
155 |     if not (args.video_file):
156 |         import camera_stream
157 |         cap = camera_stream.CameraVideoStream()
158 |     else:
159 |         cap = cv2.VideoCapture()  # not needed for video files
160 | 
161 | except BaseException:
162 |     # if not then just use OpenCV default
163 | 
164 |     print("INFO: camera_stream class not found - camera input may be buffered")
165 |     cap = cv2.VideoCapture()
166 | 
167 | # check versions to work around this bug in OpenCV 3.1
168 | # https://github.com/opencv/opencv/issues/6055
169 | 
170 | (major, minor, _) = cv2.__version__.split(".")
171 | if ((major == '3') and (minor == '1')):
172 |     cv2.ocl.setUseOpenCL(False)
173 | 
174 | # define display window name
175 | 
176 | window_name = "Live Camera Input"  # window name
177 | window_nameBG = "Background Model"  # window name
178 | window_nameFG = "Foreground Objects"  # window name
179 | window_nameFGP = "Foreground Probabiity"  # window name
180 | 
181 | # if command line arguments are provided try to read video_name
182 | # otherwise default to capture from attached H/W camera
183 | 
184 | if (((args.video_file) and (cap.open(str(args.video_file))))
185 |         or (cap.open(args.camera_to_use))):
186 | 
187 |     # create window by name (as resizable)
188 | 
189 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
190 |     cv2.namedWindow(window_nameBG, cv2.WINDOW_NORMAL)
191 |     cv2.namedWindow(window_nameFG, cv2.WINDOW_NORMAL)
192 |     cv2.namedWindow(window_nameFGP, cv2.WINDOW_NORMAL)
193 | 
194 |     # override default camera resolution
195 | 
196 |     if (args.set_resolution is not None):
197 |         cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
198 |         cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
199 | 
200 |     # create GMM background subtraction object
201 |     # (using default parameters which are suitable for quick lecture demos
202 |     # - see manual for suitable choice of values to use in anger)
203 | 
204 |     mog = cv2.createBackgroundSubtractorMOG2(
205 |         history=2000, varThreshold=16, detectShadows=True)
206 | 
207 |     print("\nPress <space> to reset MoG model ...\n")
208 | 
209 |     while (keep_processing):
210 | 
211 |         # if video file successfully open then read frame from video
212 | 
213 |         if (cap.isOpened):
214 |             ret, frame = cap.read()
215 | 
216 |             # when we reach the end of the video (file) exit cleanly
217 | 
218 |             if (ret == 0):
219 |                 keep_processing = False
220 |                 continue
221 | 
222 |             # rescale if specified
223 | 
224 |             if (args.rescale != 1.0):
225 |                 frame = cv2.resize(
226 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
227 | 
228 |         # add current frame to background model and retrieve current foreground
229 |         # objects (use learningRate parameter for tuning, see manual )
230 | 
231 |         fgmask = mog.apply(frame)
232 | 
233 |         # threshold and clean it up using erosion/dilation w/ elliptic mask
234 | 
235 |         fgthres = cv2.threshold(fgmask.copy(), 200, 255, cv2.THRESH_BINARY)[1]
236 |         fgeroded = cv2.erode(
237 |             fgthres, kernel=cv2.getStructuringElement(
238 |                 cv2.MORPH_ELLIPSE, (3, 3)), iterations=3)
239 |         fgdilated = cv2.dilate(
240 |             fgeroded, kernel=cv2.getStructuringElement(
241 |                 cv2.MORPH_ELLIPSE, (3, 3)), iterations=3)
242 | 
243 |         # get current background image (representative of current GMM model)
244 | 
245 |         bgmodel = mog.getBackgroundImage()
246 | 
247 |         # display images - input, background and original
248 | 
249 |         if (args.fullscreen):
250 | 
251 |             window_name = "[ Live | BG | Pr(FG) | FG ]"
252 |             cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
253 |             cv2.imshow(window_name, v_concat(
254 |                                              h_concat(frame, bgmodel),
255 |                                              h_concat(fgmask, fgeroded)
256 |                                             ))
257 |             cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
258 |                                   cv2.WINDOW_FULLSCREEN & args.fullscreen)
259 | 
260 |         else:
261 | 
262 |             cv2.imshow(window_name, frame)
263 |             cv2.imshow(window_nameFG, fgeroded)
264 |             cv2.imshow(window_nameFGP, fgmask)
265 |             cv2.imshow(window_nameBG, bgmodel)
266 | 
267 |         # start the event loop - essential
268 | 
269 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
270 |         # ms.) It waits for specified milliseconds for any keyboard event.
271 |         # If you press any key in that time, the program continues.
272 |         # If 0 is passed, it waits indefinitely for a key stroke.
273 |         # (bitwise and with 0xFF to extract least significant byte of
274 |         # multi-byte response) here we use a wait time in ms. that takes
275 |         # account of processing time already used in the loop
276 | 
277 |         # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
278 |         key = cv2.waitKey(40) & 0xFF
279 | 
280 |         # It can also be set to detect specific key strokes by recording which
281 |         # key is pressed
282 | 
283 |         # e.g. if user presses "x" then exit, "f" for fullscreen
284 |         # or reset MoG model when space is pressed
285 | 
286 |         if (key == ord('x')):
287 |             keep_processing = False
288 |         elif (key == ord(' ')):
289 |             print("\nResetting MoG background model ...\n")
290 |             mog = cv2.createBackgroundSubtractorMOG2(
291 |                 history=2000, varThreshold=16, detectShadows=True)
292 |         elif (key == ord('f')):
293 |             args.fullscreen = not (args.fullscreen)
294 | 
295 |     # close all windows
296 | 
297 |     cv2.destroyAllWindows()
298 | 
299 | else:
300 |     print("No video file specified or camera connected.")
301 | 
302 | #####################################################################
303 | 


--------------------------------------------------------------------------------
/openpose.py:
--------------------------------------------------------------------------------
  1 | ##########################################################################
  2 | 
  3 | # Example : perform live display of openpose body pose regression from a video
  4 | # file specified on the command line (e.g. python FILE.py video_file) or from
  5 | # an attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2019 Toby Breckon, Engineering & Computing Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | # Based heavily on the example provided at:
 14 | # https://github.com/opencv/opencv/blob/master/samples/dnn/openpose.py
 15 | 
 16 | ##########################################################################
 17 | 
 18 | # To use download COCO model pose files from:
 19 | # https://github.com/CMU-Perceptual-Computing-Lab/openpose/
 20 | # using
 21 | # https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/models/getModels.sh
 22 | 
 23 | ##########################################################################
 24 | 
 25 | import cv2
 26 | import argparse
 27 | import sys
 28 | import math
 29 | 
 30 | ##########################################################################
 31 | 
 32 | keep_processing = True
 33 | 
 34 | # parse command line arguments for camera ID or video file
 35 | 
 36 | parser = argparse.ArgumentParser(
 37 |     description='Perform ' +
 38 |     sys.argv[0] +
 39 |     ' example operation on incoming camera/video image')
 40 | parser.add_argument(
 41 |     "-c",
 42 |     "--camera_to_use",
 43 |     type=int,
 44 |     help="specify camera to use",
 45 |     default=0)
 46 | parser.add_argument(
 47 |     "-r",
 48 |     "--rescale",
 49 |     type=float,
 50 |     help="rescale image by this factor",
 51 |     default=1.0)
 52 | parser.add_argument(
 53 |     "-fs",
 54 |     "--fullscreen",
 55 |     action='store_true',
 56 |     help="run in full screen mode")
 57 | parser.add_argument(
 58 |     "-use",
 59 |     "--target",
 60 |     type=str,
 61 |     choices=['cpu', 'gpu', 'opencl'],
 62 |     help="select computational backend",
 63 |     default='gpu')
 64 | parser.add_argument(
 65 |     'video_file',
 66 |     metavar='video_file',
 67 |     type=str,
 68 |     nargs='?',
 69 |     help='specify optional video file')
 70 | args = parser.parse_args()
 71 | 
 72 | ##########################################################################
 73 | 
 74 | # define video capture object
 75 | 
 76 | try:
 77 |     # to use a non-buffered camera stream (via a separate thread)
 78 | 
 79 |     if not (args.video_file):
 80 |         import camera_stream
 81 |         cap = camera_stream.CameraVideoStream()
 82 |     else:
 83 |         cap = cv2.VideoCapture()  # not needed for video files
 84 | 
 85 | except BaseException:
 86 |     # if not then just use OpenCV default
 87 | 
 88 |     print("INFO: camera_stream class not found - camera input may be buffered")
 89 |     cap = cv2.VideoCapture()
 90 | 
 91 | ##########################################################################
 92 | 
 93 | # define display window name
 94 | 
 95 | window_name = "OpenPose Body Pose Regression - Live"  # window name
 96 | 
 97 | # create window by name (as resizable)
 98 | 
 99 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
100 | 
101 | ##########################################################################
102 | 
103 | # set pose labels - based on COCO dataset training
104 | 
105 | BODY_PARTS = {"Nose": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
106 |               "LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8, "RKnee": 9,
107 |               "RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13, "REye": 14,
108 |               "LEye": 15, "REar": 16, "LEar": 17, "Background": 18}
109 | 
110 | POSE_PAIRS = [
111 |                 ["Neck", "RShoulder"], ["Neck", "LShoulder"],
112 |                 ["RShoulder", "RElbow"], ["RElbow", "RWrist"],
113 |                 ["LShoulder", "LElbow"], ["LElbow", "LWrist"],
114 |                 ["Neck", "RHip"], ["RHip", "RKnee"],
115 |                 ["RKnee", "RAnkle"], ["Neck", "LHip"],
116 |                 ["LHip", "LKnee"], ["LKnee", "LAnkle"],
117 |                 ["Neck", "Nose"], ["Nose", "REye"],
118 |                 ["REye", "REar"], ["Nose", "LEye"],
119 |                 ["LEye", "LEar"]
120 |             ]
121 | 
122 | ##########################################################################
123 | 
124 | # Load CNN model
125 | net = cv2.dnn.readNet(
126 |     "pose_iter_440000.caffemodel",
127 |     "pose_deploy_linevec.prototxt",
128 |     'caffe')
129 | 
130 | # set up compute target as one of [GPU, OpenCL, CPU]
131 | 
132 | if (args.target == 'gpu'):
133 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
134 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
135 | elif (args.target == 'opencl'):
136 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
137 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
138 | else:
139 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
140 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
141 | 
142 | ##########################################################################
143 | 
144 | # if command line arguments are provided try to read video_name
145 | # otherwise default to capture from attached camera
146 | 
147 | if (((args.video_file) and (cap.open(str(args.video_file))))
148 |         or (cap.open(args.camera_to_use))):
149 | 
150 |     while (keep_processing):
151 | 
152 |         # start a timer (to see how long processing and display takes)
153 | 
154 |         start_t = cv2.getTickCount()
155 | 
156 |         # if camera /video file successfully open then read frame
157 | 
158 |         if (cap.isOpened):
159 |             ret, frame = cap.read()
160 | 
161 |             # when we reach the end of the video (file) exit cleanly
162 | 
163 |             if (ret == 0):
164 |                 keep_processing = False
165 |                 continue
166 | 
167 |             # rescale if specified
168 | 
169 |             if (args.rescale != 1.0):
170 |                 frame = cv2.resize(
171 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
172 | 
173 |         # create a 4D tensor "blob" from a frame - defaults from OpenCV
174 |         # OpenPose example
175 | 
176 |         blob = cv2.dnn.blobFromImage(
177 |             frame, scalefactor=0.003922, size=(
178 |                 368, 368), mean=[
179 |                 0, 0, 0], swapRB=False, crop=False)
180 | 
181 |         # Run forward inference on the model
182 | 
183 |         net.setInput(blob)
184 |         out = net.forward()
185 | 
186 |         # draw body parts
187 | 
188 |         if (len(BODY_PARTS) <= out.shape[1]):
189 | 
190 |             frameWidth = frame.shape[1]
191 |             frameHeight = frame.shape[0]
192 | 
193 |             points = []
194 |             for i in range(len(BODY_PARTS)):
195 |                 # Slice heatmap of corresponding body's part.
196 |                 heatMap = out[0, i, :, :]
197 | 
198 |                 # Originally, we try to find all the local maximums.
199 |                 # To simplify a sample we just find a global one.
200 |                 # However only a single pose at the same time
201 |                 # could be detected this way.
202 |                 _, conf, _, point = cv2.minMaxLoc(heatMap)
203 |                 x = (frameWidth * point[0]) / out.shape[3]
204 |                 y = (frameHeight * point[1]) / out.shape[2]
205 | 
206 |                 # Add a point if it's confidence is higher than threshold.
207 |                 points.append((int(x), int(y)) if conf > 0.1 else None)
208 | 
209 |             for pair in POSE_PAIRS:
210 |                 partFrom = pair[0]
211 |                 partTo = pair[1]
212 |                 assert (partFrom in BODY_PARTS)
213 |                 assert (partTo in BODY_PARTS)
214 | 
215 |                 idFrom = BODY_PARTS[partFrom]
216 |                 idTo = BODY_PARTS[partTo]
217 | 
218 |                 if points[idFrom] and points[idTo]:
219 |                     cv2.line(
220 |                         frame, points[idFrom], points[idTo], (0, 255, 0), 3)
221 |                     cv2.ellipse(
222 |                         frame, points[idFrom], (3, 3), 0, 0, 360,
223 |                         (0, 0, 255), cv2.FILLED)
224 |                     cv2.ellipse(
225 |                         frame, points[idTo], (3, 3), 0, 0, 360,
226 |                         (0, 0, 255), cv2.FILLED)
227 | 
228 |         # stop the timer and convert to ms.
229 | 
230 |         stop_t = ((cv2.getTickCount() - start_t) /
231 |                   cv2.getTickFrequency()) * 1000
232 | 
233 |         # add efficiency information
234 | 
235 |         label = ('Inference time: %.2f ms' % stop_t) + \
236 |             (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')'
237 |         cv2.putText(frame, label, (0, 15),
238 |                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0))
239 | 
240 |         # display image
241 | 
242 |         cv2.imshow(window_name, frame)
243 |         cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
244 |                               cv2.WINDOW_FULLSCREEN & args.fullscreen)
245 | 
246 |         # start the event loop - essentials
247 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
248 |         # 25 fps = 40 ms)
249 | 
250 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
251 | 
252 |         # It can also be set to detect specific key strokes by recording which
253 |         # key is pressed
254 | 
255 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
256 |         # display
257 | 
258 |         if (key == ord('x')):
259 |             keep_processing = False
260 |         elif (key == ord('f')):
261 |             args.fullscreen = not (args.fullscreen)
262 | 
263 |     # close all windows
264 | 
265 |     cv2.destroyAllWindows()
266 | 
267 | else:
268 |     print("No video file specified or camera connected.")
269 | 
270 | ##########################################################################
271 | 


--------------------------------------------------------------------------------
/opticflow.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example : perform live visualization of optic flow from a video file
  4 | # specified on the command line (e.g. python FILE.py video_file) or from
  5 | # an attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2017 School of Engineering & Computing Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | #####################################################################
 14 | 
 15 | import cv2
 16 | import argparse
 17 | import sys
 18 | import numpy as np
 19 | 
 20 | #####################################################################
 21 | 
 22 | keep_processing = True
 23 | 
 24 | # parse command line arguments for camera ID or video file
 25 | 
 26 | parser = argparse.ArgumentParser(
 27 |     description='Perform ' +
 28 |     sys.argv[0] +
 29 |     ' example operation on incoming camera/video image')
 30 | parser.add_argument(
 31 |     "-c",
 32 |     "--camera_to_use",
 33 |     type=int,
 34 |     help="specify camera to use",
 35 |     default=0)
 36 | parser.add_argument(
 37 |     "-r",
 38 |     "--rescale",
 39 |     type=float,
 40 |     help="rescale image by this factor",
 41 |     default=1.0)
 42 | parser.add_argument(
 43 |     'video_file',
 44 |     metavar='video_file',
 45 |     type=str,
 46 |     nargs='?',
 47 |     help='specify optional video file')
 48 | args = parser.parse_args()
 49 | 
 50 | #####################################################################
 51 | 
 52 | # draw optic flow visualization on image using a given step size for
 53 | # the line glyphs that show the flow vectors on the image
 54 | 
 55 | 
 56 | def draw_flow(img, flow, step=8):
 57 |     h, w = img.shape[:2]
 58 |     y, x = np.mgrid[step / 2:h:step, step /
 59 |                     2:w:step].reshape(2, -1).astype(int)
 60 |     fx, fy = flow[y, x].T
 61 |     lines = np.vstack([x, y, x + fx, y + fy]).T.reshape(-1, 2, 2)
 62 |     lines = np.int32(lines + 0.5)
 63 |     vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
 64 |     cv2.polylines(vis, lines, 0, (0, 255, 0))
 65 |     for (x1, y1), (x2, y2) in lines:
 66 |         cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
 67 |     return vis
 68 | 
 69 | #####################################################################
 70 | 
 71 | # define video capture object
 72 | 
 73 | 
 74 | try:
 75 |     # to use a non-buffered camera stream (via a separate thread)
 76 | 
 77 |     if not (args.video_file):
 78 |         import camera_stream
 79 |         cap = camera_stream.CameraVideoStream()
 80 |     else:
 81 |         cap = cv2.VideoCapture()  # not needed for video files
 82 | 
 83 | except BaseException:
 84 |     # if not then just use OpenCV default
 85 | 
 86 |     print("INFO: camera_stream class not found - camera input may be buffered")
 87 |     cap = cv2.VideoCapture()
 88 | 
 89 | # define display window name
 90 | 
 91 | window_name = "Dense Optic Flow"  # window name
 92 | 
 93 | # if command line arguments are provided try to read video_name
 94 | # otherwise default to capture from attached H/W camera
 95 | 
 96 | if (((args.video_file) and (cap.open(str(args.video_file))))
 97 |         or (cap.open(args.camera_to_use))):
 98 | 
 99 |     # create window by name (as resizable)
100 | 
101 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
102 | 
103 |     # if video file successfully open then read an initial frame from video
104 | 
105 |     if (cap.isOpened):
106 |         ret, frame = cap.read()
107 | 
108 |         # rescale if specified
109 | 
110 |         if (args.rescale != 1.0):
111 |             frame = cv2.resize(frame, (0, 0), fx=args.rescale, fy=args.rescale)
112 | 
113 |     # convert image to grayscale to be previous frame
114 | 
115 |     prevgray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
116 | 
117 |     while (keep_processing):
118 | 
119 |         # if video file successfully open then read frame from video
120 | 
121 |         if (cap.isOpened):
122 |             ret, frame = cap.read()
123 | 
124 |             # when we reach the end of the video (file) exit cleanly
125 | 
126 |             if (ret == 0):
127 |                 keep_processing = False
128 |                 continue
129 | 
130 |             # rescale if specified
131 | 
132 |             if (args.rescale != 1.0):
133 |                 frame = cv2.resize(
134 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
135 | 
136 |         # convert image to grayscale
137 | 
138 |         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
139 | 
140 |         # compute dense optic flow using technique of Farneback 2003
141 |         # parameters from example (OpenCV 3.2):
142 |         # https://github.com/opencv/opencv/blob/master/samples/python/opt_flow.py
143 | 
144 |         flow = cv2.calcOpticalFlowFarneback(
145 |             prevgray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
146 |         prevgray = gray
147 | 
148 |         # display image with optic flow overlay
149 | 
150 |         cv2.imshow(window_name, draw_flow(gray, flow))
151 | 
152 |         # start the event loop - essential
153 | 
154 |         # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
155 |         key = cv2.waitKey(40) & 0xFF
156 | 
157 |         # It can also be set to detect specific key strokes by recording which
158 |         # key is pressed
159 | 
160 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
161 |         # display
162 | 
163 |         if (key == ord('x')):
164 |             keep_processing = False
165 |         elif (key == ord('f')):
166 |             cv2.setWindowProperty(
167 |                 window_name,
168 |                 cv2.WND_PROP_FULLSCREEN,
169 |                 cv2.WINDOW_FULLSCREEN)
170 | 
171 |     # close all windows
172 | 
173 |     cv2.destroyAllWindows()
174 | 
175 | else:
176 |     print("No video file specified or camera connected.")
177 | 
178 | #####################################################################
179 | 


--------------------------------------------------------------------------------
/pyramid.py:
--------------------------------------------------------------------------------
  1 | ##########################################################################
  2 | 
  3 | # Example : perform Gaussian/Laplacian pyramid live display from a video file
  4 | # specified on the command line (e.g. python FILE.py video_file) or from an
  5 | # attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2021 Toby Breckon, Engineering & Computing Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | # Acknowledgements: based in part from tutorial at:
 14 | # https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_pyramids/py_pyramids.html
 15 | 
 16 | ##########################################################################
 17 | 
 18 | import cv2
 19 | import argparse
 20 | import sys
 21 | import math
 22 | import numpy as np
 23 | 
 24 | ##########################################################################
 25 | 
 26 | keep_processing = True
 27 | 
 28 | # parse command line arguments for camera ID or video file
 29 | 
 30 | parser = argparse.ArgumentParser(
 31 |     description='Perform ' +
 32 |     sys.argv[0] +
 33 |     ' example operation on incoming camera/video image')
 34 | parser.add_argument(
 35 |     "-c",
 36 |     "--camera_to_use",
 37 |     type=int,
 38 |     help="specify camera to use",
 39 |     default=0)
 40 | parser.add_argument(
 41 |     "-r",
 42 |     "--rescale",
 43 |     type=float,
 44 |     help="rescale image by this factor",
 45 |     default=1.0)
 46 | parser.add_argument(
 47 |     'video_file',
 48 |     metavar='video_file',
 49 |     type=str,
 50 |     nargs='?',
 51 |     help='specify optional video file')
 52 | args = parser.parse_args()
 53 | 
 54 | #####################################################################
 55 | 
 56 | # define display window name
 57 | 
 58 | window_name = "Live Camera Input"  # window name
 59 | 
 60 | ##########################################################################
 61 | 
 62 | # define video capture object
 63 | 
 64 | try:
 65 |     # to use a non-buffered camera stream (via a separate thread)
 66 | 
 67 |     if not (args.video_file):
 68 |         import camera_stream
 69 |         cap = camera_stream.CameraVideoStream()
 70 |     else:
 71 |         cap = cv2.VideoCapture()  # not needed for video files
 72 | 
 73 | except BaseException:
 74 |     # if not then just use OpenCV default
 75 | 
 76 |     print("INFO: camera_stream class not found - camera input may be buffered")
 77 |     cap = cv2.VideoCapture()
 78 | 
 79 | # if command line arguments are provided try to read video_name
 80 | # otherwise default to capture from attached camera
 81 | 
 82 | if (((args.video_file) and (cap.open(str(args.video_file))))
 83 |         or (cap.open(args.camera_to_use))):
 84 | 
 85 |     # create window by name (as resizable)
 86 | 
 87 |     cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
 88 | 
 89 |     # set initial number of pyramid levels
 90 | 
 91 |     nlevels = 5
 92 | 
 93 |     # print user key commands
 94 | 
 95 |     print()
 96 |     print("'-' - reduce pyramid levels")
 97 |     print("'+' - increase pyramid levels (max 6 levels)")
 98 |     print()
 99 | 
100 |     while (keep_processing):
101 | 
102 |         # start a timer (to see how long processing and display takes)
103 | 
104 |         start_t = cv2.getTickCount()
105 | 
106 |         # if camera /video file successfully open then read frame
107 | 
108 |         if (cap.isOpened):
109 |             ret, frame = cap.read()
110 | 
111 |             # when we reach the end of the video (file) exit cleanly
112 | 
113 |             if (ret == 0):
114 |                 keep_processing = False
115 |                 continue
116 | 
117 |             # rescale if specified
118 | 
119 |             if (args.rescale != 1.0):
120 |                 frame = cv2.resize(
121 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
122 | 
123 |         # generate Gaussian pyramid for image frame
124 | 
125 |         g_level = frame.copy()
126 |         g_pyramid = [g_level]
127 |         for layer in range(nlevels):
128 |             g_level = cv2.pyrDown(g_level)
129 |             cv2.namedWindow("Gaussian Level: " + str(layer),
130 |                             cv2.WINDOW_AUTOSIZE)
131 |             cv2.imshow("Gaussian Level: " + str(layer), g_level)
132 |             g_pyramid.append(g_level.copy())
133 | 
134 |         # generate Laplacian pyramid image frame
135 | 
136 |         lp_pyramid = [g_pyramid[nlevels - 1]]
137 |         for layer in range(nlevels, 0, -1):
138 |             g_level_enlarged = cv2.pyrUp(g_pyramid[layer])
139 | 
140 |             # catch this rounding error occurence in image sizes
141 |             if (g_pyramid[layer-1].shape != g_level_enlarged.shape):
142 |                 g_level_enlarged = cv2.resize(
143 |                             g_level_enlarged,
144 |                             tuple(reversed(g_pyramid[layer-1].shape[:2])),
145 |                             interpolation=cv2.INTER_LINEAR)
146 | 
147 |             l_level = cv2.subtract(g_pyramid[layer-1], g_level_enlarged)
148 |             cv2.normalize(l_level, l_level, 0, 255, cv2.NORM_MINMAX)
149 |             cv2.namedWindow("Laplacian Level: " + str(layer),
150 |                             cv2.WINDOW_AUTOSIZE)
151 |             cv2.imshow("Laplacian Level: " + str(layer), l_level)
152 |             lp_pyramid.append(l_level.copy())
153 | 
154 |         # display image
155 | 
156 |         cv2.imshow(window_name, frame)
157 | 
158 |         # stop the timer and convert to ms. (to see how long processing and
159 |         # display takes)
160 | 
161 |         stop_t = ((cv2.getTickCount() - start_t) /
162 |                   cv2.getTickFrequency()) * 1000
163 | 
164 |         # start the event loop - essential
165 | 
166 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
167 |         # 25 fps = 40 ms)
168 | 
169 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
170 | 
171 |         if (key == ord('x')):
172 |             keep_processing = False
173 |         elif (key == ord('+')):
174 |             cv2.destroyAllWindows()
175 |             nlevels = np.min([6, nlevels + 1])
176 |         elif (key == ord('-')):
177 |             cv2.destroyAllWindows()
178 |             nlevels = np.max([0, nlevels - 1])
179 | 
180 |     # close all windows
181 | 
182 |     cv2.destroyAllWindows()
183 | 
184 | else:
185 |     print("No video file specified or camera connected.")
186 | 
187 | ##########################################################################
188 | 


--------------------------------------------------------------------------------
/selective_search.py:
--------------------------------------------------------------------------------
  1 | ##########################################################################
  2 | 
  3 | # Example : detect live selective search bounding boxes from a video file
  4 | # specified on the command line (e.g. python FILE.py video_file) or from an
  5 | # attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2021 Dept. Computer Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | ##########################################################################
 14 | 
 15 | import cv2
 16 | import argparse
 17 | import sys
 18 | import math
 19 | 
 20 | #####################################################################
 21 | 
 22 | # press all the go-faster buttons - i.e. speed-up using multithreads
 23 | 
 24 | cv2.setUseOptimized(True)
 25 | cv2.setNumThreads(4)
 26 | 
 27 | # if we have OpenCL H/W acceleration availale, use it - we'll need it
 28 | 
 29 | cv2.ocl.setUseOpenCL(True)
 30 | print(
 31 |     "INFO: OpenCL - available: ",
 32 |     cv2.ocl.haveOpenCL(),
 33 |     " using: ",
 34 |     cv2.ocl.useOpenCL())
 35 | 
 36 | ##########################################################################
 37 | 
 38 | keep_processing = True
 39 | 
 40 | # parse command line arguments for camera ID or video file
 41 | 
 42 | parser = argparse.ArgumentParser(
 43 |     description='Perform ' +
 44 |     sys.argv[0] +
 45 |     ' example operation on incoming camera/video image')
 46 | parser.add_argument(
 47 |     "-c",
 48 |     "--camera_to_use",
 49 |     type=int,
 50 |     help="specify camera to use",
 51 |     default=0)
 52 | parser.add_argument(
 53 |     "-r",
 54 |     "--rescale",
 55 |     type=float,
 56 |     help="rescale image by this factor",
 57 |     default=1.0)
 58 | parser.add_argument(
 59 |     "-fs",
 60 |     "--fullscreen",
 61 |     action='store_true',
 62 |     help="run in full screen mode")
 63 | parser.add_argument(
 64 |     'video_file',
 65 |     metavar='video_file',
 66 |     type=str,
 67 |     nargs='?',
 68 |     help='specify optional video file')
 69 | args = parser.parse_args()
 70 | 
 71 | ##########################################################################
 72 | 
 73 | # define video capture object
 74 | 
 75 | try:
 76 |     # to use a non-buffered camera stream (via a separate thread)
 77 | 
 78 |     if not (args.video_file):
 79 |         import camera_stream
 80 |         cap = camera_stream.CameraVideoStream()
 81 |     else:
 82 |         cap = cv2.VideoCapture()  # not needed for video files
 83 | 
 84 | except BaseException:
 85 |     # if not then just use OpenCV default
 86 | 
 87 |     print("INFO: camera_stream class not found - camera input may be buffered")
 88 |     cap = cv2.VideoCapture()
 89 | 
 90 | # define display window name
 91 | 
 92 | window_name = "Selective Search - Bounding Boxes"  # window name
 93 | 
 94 | # if command line arguments are provided try to read video_name
 95 | # otherwise default to capture from attached camera
 96 | 
 97 | if (((args.video_file) and (cap.open(str(args.video_file))))
 98 |         or (cap.open(args.camera_to_use))):
 99 | 
100 |     # create window by name (as resizable)
101 | 
102 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
103 | 
104 |     #####################################################################
105 | 
106 |     # create Selective Search Segmentation Object using default parameters
107 | 
108 |     ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
109 | 
110 |     while (keep_processing):
111 | 
112 |         # start a timer (to see how long processing and display takes)
113 | 
114 |         start_t = cv2.getTickCount()
115 | 
116 |         # if camera /video file successfully open then read frame
117 | 
118 |         if (cap.isOpened):
119 |             ret, frame = cap.read()
120 | 
121 |             # when we reach the end of the video (file) exit cleanly
122 | 
123 |             if (ret == 0):
124 |                 keep_processing = False
125 |                 continue
126 | 
127 |             # rescale if specified
128 | 
129 |             if (args.rescale != 1.0):
130 |                 frame = cv2.resize(
131 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
132 | 
133 |             # set input image on which we will run segmentation
134 | 
135 |             ss.setBaseImage(frame)
136 | 
137 |             # Switch to fast but low recall Selective Search method
138 |             ss.switchToSelectiveSearchFast()
139 | 
140 |             # Switch to high recall but slow Selective Search method (slower)
141 |             # ss.switchToSelectiveSearchQuality()
142 | 
143 |             # run selective search segmentation on input image
144 |             rects = ss.process()
145 |             print('Total Number of Region Proposals: {}'.format(len(rects)))
146 | 
147 |             # number of region proposals to show
148 |             numShowRects = 100
149 | 
150 |             # iterate over all the region proposals
151 |             for i, rect in enumerate(rects):
152 |                 # draw rectangle for region proposal till numShowRects
153 |                 if (i < numShowRects):
154 |                     x, y, w, h = rect
155 |                     cv2.rectangle(frame, (x, y), (x+w, y+h),
156 |                                   (0, 255, 0), 1, cv2.LINE_AA)
157 |                 else:
158 |                     break
159 | 
160 |         # stop the timer and convert to ms. (to see how long processing and
161 |         # display takes)
162 | 
163 |         stop_t = ((cv2.getTickCount() - start_t) /
164 |                   cv2.getTickFrequency()) * 1000
165 | 
166 |         label = ('Processing time: %.2f ms' % stop_t) + \
167 |             (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')'
168 |         cv2.putText(frame, label, (0, 15),
169 |                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
170 | 
171 |         # display image
172 | 
173 |         cv2.imshow(window_name, frame)
174 |         cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
175 |                               cv2.WINDOW_FULLSCREEN & args.fullscreen)
176 | 
177 |         # start the event loop - essential
178 | 
179 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
180 |         # milliseconds). It waits for specified milliseconds for any keyboard
181 |         # event. If you press any key in that time, the program continues.
182 |         # If 0 is passed, it waits indefinitely for a key stroke.
183 |         # (bitwise and with 0xFF to extract least significant byte of
184 |         # multi-byte response)
185 | 
186 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
187 |         # 25 fps = 40 ms)
188 | 
189 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
190 | 
191 |         # It can also be set to detect specific key strokes by recording which
192 |         # key is pressed
193 | 
194 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
195 |         # display
196 | 
197 |         if (key == ord('x')):
198 |             keep_processing = False
199 |         elif (key == ord('f')):
200 |             args.fullscreen = not (args.fullscreen)
201 | 
202 |     # close all windows
203 | 
204 |     cv2.destroyAllWindows()
205 | 
206 | else:
207 |     print("No video file specified or camera connected.")
208 | 
209 | ##########################################################################
210 | 


--------------------------------------------------------------------------------
/sobel.py:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | 
  3 | # Example : Sobel edge filtering for a a video file
  4 | # specified on the command line (e.g. python FILE.py video_file) or from an
  5 | # attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2016 School of Engineering & Computing Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | #####################################################################
 14 | 
 15 | import cv2
 16 | import argparse
 17 | import sys
 18 | 
 19 | #####################################################################
 20 | 
 21 | keep_processing = True
 22 | 
 23 | # parse command line arguments for camera ID or video file
 24 | 
 25 | parser = argparse.ArgumentParser(
 26 |     description='Perform ' +
 27 |     sys.argv[0] +
 28 |     ' example operation on incoming camera/video image')
 29 | parser.add_argument(
 30 |     "-c",
 31 |     "--camera_to_use",
 32 |     type=int,
 33 |     help="specify camera to use",
 34 |     default=0)
 35 | parser.add_argument(
 36 |     "-r",
 37 |     "--rescale",
 38 |     type=float,
 39 |     help="rescale image by this factor",
 40 |     default=1.0)
 41 | parser.add_argument(
 42 |     "-s",
 43 |     "--set_resolution",
 44 |     type=int,
 45 |     nargs=2,
 46 |     help='override default camera resolution as H W')
 47 | parser.add_argument(
 48 |     'video_file',
 49 |     metavar='video_file',
 50 |     type=str,
 51 |     nargs='?',
 52 |     help='specify optional video file')
 53 | args = parser.parse_args()
 54 | 
 55 | #####################################################################
 56 | 
 57 | # this function is called as a call-back everytime the trackbar is moved
 58 | # (here we just do nothing)
 59 | 
 60 | 
 61 | def nothing(x):
 62 |     pass
 63 | 
 64 | #####################################################################
 65 | 
 66 | # define video capture object
 67 | 
 68 | 
 69 | try:
 70 |     # to use a non-buffered camera stream (via a separate thread)
 71 |     # enabling subsequent hardware acceleration where available
 72 | 
 73 |     if not (args.video_file):
 74 |         import camera_stream
 75 |         cap = camera_stream.CameraVideoStream(use_tapi=True)
 76 |     else:
 77 |         cap = cv2.VideoCapture()  # not needed for video files
 78 | 
 79 | except BaseException:
 80 |     # if not then just use OpenCV default
 81 | 
 82 |     print("INFO: camera_stream class not found - camera input may be buffered")
 83 |     cap = cv2.VideoCapture()
 84 | 
 85 | # define display window name
 86 | 
 87 | window_name = "Live Camera Input"  # window name
 88 | window_name2 = "Sobel Gradient Edge Response"  # window name
 89 | 
 90 | # if command line arguments are provided try to read video_name
 91 | # otherwise default to capture from attached H/W camera
 92 | 
 93 | if (((args.video_file) and (cap.open(str(args.video_file))))
 94 |         or (cap.open(args.camera_to_use))):
 95 | 
 96 |     # create window by name (as resizable)
 97 | 
 98 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
 99 |     cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL)
100 | 
101 |     # add some track bar controllers for settings
102 | 
103 |     neighbourhood = 3
104 |     cv2.createTrackbar(
105 |         "neighbourhood, N",
106 |         window_name2,
107 |         neighbourhood,
108 |         15,
109 |         nothing)
110 | 
111 |     # override default camera resolution
112 | 
113 |     if (args.set_resolution is not None):
114 |         cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
115 |         cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
116 | 
117 |     print("INFO: input resolution : (",
118 |           int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
119 |           int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
120 | 
121 |     while (keep_processing):
122 | 
123 |         # if video file successfully open then read frame from video
124 | 
125 |         if (cap.isOpened):
126 |             ret, frame = cap.read()
127 | 
128 |             # when we reach the end of the video (file) exit cleanly
129 | 
130 |             if (ret == 0):
131 |                 keep_processing = False
132 |                 continue
133 | 
134 |             # rescale if specified
135 | 
136 |             if (args.rescale != 1.0):
137 |                 frame = cv2.resize(
138 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
139 | 
140 |         # get parameters from track bars
141 | 
142 |         neighbourhood = cv2.getTrackbarPos("neighbourhood, N", window_name2)
143 | 
144 |         # check neighbourhood is greater than 3 and odd
145 | 
146 |         neighbourhood = max(3, neighbourhood)
147 |         if not (neighbourhood % 2):
148 |             neighbourhood = neighbourhood + 1
149 | 
150 |         # perform sobel across all three colour channels of the image
151 |         # in both the x and y directions
152 | 
153 |         sobel = cv2.Sobel(frame, cv2.CV_8U, 1, 1, ksize=neighbourhood)
154 | 
155 |         # display images
156 | 
157 |         cv2.imshow(window_name, frame)
158 |         cv2.imshow(window_name2, sobel)
159 | 
160 |         # start the event loop - essential
161 | 
162 |         # cv2.waitKey() is a keyboard binding function (argument is the time in
163 |         # milliseconds). It waits for specified milliseconds for any keyboard
164 |         # event. If you press any key in that time, the program continues.
165 |         # If 0 is passed, it waits indefinitely for a key stroke.
166 |         # (bitwise and with 0xFF to extract least significant byte of
167 |         # multi-byte response)
168 | 
169 |         # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
170 |         key = cv2.waitKey(40) & 0xFF
171 | 
172 |         # It can also be set to detect specific key strokes by recording which
173 |         # key is pressed
174 | 
175 |         # e.g. if user presses "x" then exit  / press "f" to toggle fullscreen
176 | 
177 |         if (key == ord('x')):
178 |             keep_processing = False
179 |         elif (key == ord('f')):
180 |             cv2.setWindowProperty(
181 |                 window_name2,
182 |                 cv2.WND_PROP_FULLSCREEN,
183 |                 cv2.WINDOW_FULLSCREEN &
184 |                 (cv2.getWindowProperty(window_name2,
185 |                                        cv2.WND_PROP_FULLSCREEN) == 0))
186 | 
187 |     # close all windows
188 | 
189 |     cv2.destroyAllWindows()
190 | 
191 | else:
192 |     print("No video file specified or camera connected.")
193 | 
194 | #####################################################################
195 | 


--------------------------------------------------------------------------------
/squeezenet.py:
--------------------------------------------------------------------------------
  1 | ##########################################################################
  2 | 
  3 | # Example : perform live display of squeezenet CNN classification from a video
  4 | # file specified on the command line (e.g. python FILE.py video_file) or from
  5 | # an attached web camera
  6 | 
  7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
  8 | 
  9 | # Copyright (c) 2019 Toby Breckon, Engineering & Computing Science,
 10 | #                    Durham University, UK
 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
 12 | 
 13 | # Based heavily on the example provided at:
 14 | # https://github.com/opencv/opencv/blob/master/samples/dnn/classification.py
 15 | 
 16 | ##########################################################################
 17 | 
 18 | # To use download the following files:
 19 | 
 20 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt
 21 | # -> classification_classes_ILSVRC2012.txt
 22 | # https://github.com/forresti/SqueezeNet/raw/master/SqueezeNet_v1.1/squeezenet_v1.1.caffemodel
 23 | # -> squeezenet_v1.1.caffemodel
 24 | # https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/squeezenet_v1.1.prototxt
 25 | # -> squeezenet_v1.1.prototxt
 26 | 
 27 | ##########################################################################
 28 | 
 29 | import cv2
 30 | import argparse
 31 | import sys
 32 | import math
 33 | import numpy as np
 34 | 
 35 | ##########################################################################
 36 | # dummy on trackbar callback function
 37 | 
 38 | 
 39 | def on_trackbar(val):
 40 |     return
 41 | 
 42 | ##########################################################################
 43 | 
 44 | 
 45 | keep_processing = True
 46 | 
 47 | # parse command line arguments for camera ID or video file
 48 | 
 49 | parser = argparse.ArgumentParser(
 50 |     description='Perform ' +
 51 |     sys.argv[0] +
 52 |     ' example operation on incoming camera/video image')
 53 | parser.add_argument(
 54 |     "-c",
 55 |     "--camera_to_use",
 56 |     type=int,
 57 |     help="specify camera to use",
 58 |     default=0)
 59 | parser.add_argument(
 60 |     "-r",
 61 |     "--rescale",
 62 |     type=float,
 63 |     help="rescale image by this factor",
 64 |     default=1.0)
 65 | parser.add_argument(
 66 |     "-fs",
 67 |     "--fullscreen",
 68 |     action='store_true',
 69 |     help="run in full screen mode")
 70 | parser.add_argument(
 71 |     "-use",
 72 |     "--target",
 73 |     type=str,
 74 |     choices=['cpu', 'gpu', 'opencl'],
 75 |     help="select computational backend",
 76 |     default='gpu')
 77 | parser.add_argument(
 78 |     'video_file',
 79 |     metavar='video_file',
 80 |     type=str,
 81 |     nargs='?',
 82 |     help='specify optional video file')
 83 | args = parser.parse_args()
 84 | 
 85 | ##########################################################################
 86 | 
 87 | # define video capture object
 88 | 
 89 | try:
 90 |     # to use a non-buffered camera stream (via a separate thread)
 91 | 
 92 |     if not (args.video_file):
 93 |         import camera_stream
 94 |         cap = camera_stream.CameraVideoStream()
 95 |     else:
 96 |         cap = cv2.VideoCapture()  # not needed for video files
 97 | 
 98 | except BaseException:
 99 |     # if not then just use OpenCV default
100 | 
101 |     print("INFO: camera_stream class not found - camera input may be buffered")
102 |     cap = cv2.VideoCapture()
103 | 
104 | ##########################################################################
105 | 
106 | # define display window name
107 | 
108 | window_name = "SqueezeNet Image Classification - Live"  # window name
109 | 
110 | # create window by name (as resizable)
111 | 
112 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
113 | trackbarName = 'reporting confidence > (x 0.01)'
114 | cv2.createTrackbar(trackbarName, window_name, 50, 100, on_trackbar)
115 | 
116 | ##########################################################################
117 | 
118 | # Load names of class labels
119 | 
120 | classes = None
121 | with open("classification_classes_ILSVRC2012.txt", 'rt') as f:
122 |     classes = f.read().rstrip('\n').split('\n')
123 | 
124 | ##########################################################################
125 | 
126 | # Load CNN model
127 | 
128 | net = cv2.dnn.readNet(
129 |     "squeezenet_v1.1.caffemodel",
130 |     "squeezenet_v1.1.prototxt",
131 |     'caffe')
132 | 
133 | # set up compute target as one of [GPU, OpenCL, CPU]
134 | 
135 | if (args.target == 'gpu'):
136 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
137 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
138 | elif (args.target == 'opencl'):
139 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
140 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
141 | else:
142 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
143 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
144 | 
145 | ##########################################################################
146 | 
147 | # if command line arguments are provided try to read video_name
148 | # otherwise default to capture from attached camera
149 | 
150 | if (((args.video_file) and (cap.open(str(args.video_file))))
151 |         or (cap.open(args.camera_to_use))):
152 | 
153 |     while (keep_processing):
154 | 
155 |         # start a timer (to see how long processing and display takes)
156 | 
157 |         start_t = cv2.getTickCount()
158 | 
159 |         # if camera /video file successfully open then read frame
160 | 
161 |         if (cap.isOpened):
162 |             ret, frame = cap.read()
163 | 
164 |             # when we reach the end of the video (file) exit cleanly
165 | 
166 |             if (ret == 0):
167 |                 keep_processing = False
168 |                 continue
169 | 
170 |             # rescale if specified
171 | 
172 |             if (args.rescale != 1.0):
173 |                 frame = cv2.resize(
174 |                     frame, (0, 0), fx=args.rescale, fy=args.rescale)
175 | 
176 |         #######################################################################
177 |         # squeezenet:
178 |         #   model: "squeezenet_v1.1.caffemodel"
179 |         #   config: "squeezenet_v1.1.prototxt"
180 |         #   mean: [0, 0, 0]
181 |         #   scale: 1.0
182 |         #   width: 227
183 |         #   height: 227
184 |         #   rgb: false
185 |         #   classes: "classification_classes_ILSVRC2012.txt
186 |         #######################################################################
187 | 
188 |         # create a 4D tensor "blob" from a frame.
189 | 
190 |         blob = cv2.dnn.blobFromImage(
191 |             frame, scalefactor=1.0, size=(
192 |                 227, 227), mean=[
193 |                 0, 0, 0], swapRB=False, crop=False)
194 | 
195 |         # Run forward inference on the model
196 | 
197 |         net.setInput(blob)
198 |         out = net.forward()
199 | 
200 |         # get class label with a highest score from final softmax() layer
201 | 
202 |         out = out.flatten()
203 |         classId = np.argmax(out)
204 |         confidence = out[classId]
205 | 
206 |         # stop the timer and convert to ms. (to see how long processing takes
207 | 
208 |         stop_t = ((cv2.getTickCount() - start_t) /
209 |                   cv2.getTickFrequency()) * 1000
210 | 
211 |         # Display efficiency information
212 | 
213 |         label = ('Inference time: %.2f ms' % stop_t) + \
214 |             (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')'
215 |         cv2.putText(frame, label, (0, 15),
216 |                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
217 | 
218 |         # get confidence threshold from track bar
219 |         confThreshold = cv2.getTrackbarPos(trackbarName, window_name) / 100
220 | 
221 |         # if we are quite confidene about classification then dispplay
222 |         if (confidence > confThreshold):
223 |             # add predicted class.
224 |             label = '%s: %.4f' % (
225 |                 classes[classId]
226 |                 if classes else 'Class #%d' % classId, confidence)
227 |             cv2.putText(frame, label, (0, 40),
228 |                         cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
229 | 
230 |         # display image
231 | 
232 |         cv2.imshow(window_name, frame)
233 |         cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
234 |                               cv2.WINDOW_FULLSCREEN & args.fullscreen)
235 | 
236 |         # start the event loop - essential
237 | 
238 |         # wait 40ms or less depending on processing time taken (i.e. 1000ms /
239 |         # 25 fps = 40 ms)
240 | 
241 |         key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
242 | 
243 |         # It can also be set to detect specific key strokes by recording which
244 |         # key is pressed
245 | 
246 |         # e.g. if user presses "x" then exit  / press "f" for fullscreen
247 |         # display
248 | 
249 |         if (key == ord('x')):
250 |             keep_processing = False
251 |         elif (key == ord('f')):
252 |             args.fullscreen = not (args.fullscreen)
253 | 
254 |     # close all windows
255 | 
256 |     cv2.destroyAllWindows()
257 | 
258 | else:
259 |     print("No video file specified or camera connected.")
260 | 
261 | ##########################################################################
262 | 


--------------------------------------------------------------------------------
/test_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | ################################################################################
 4 | 
 5 | # run a batch test over all the examples from the bash shell (linux)
 6 | 
 7 | # Copyright (c) 2019 Dept Computer Science,
 8 | #                    Durham University, UK
 9 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
10 | 
11 | ################################################################################
12 | 
13 | PYTHON_INTERPRETATOR=python3
14 | CAM_TO_TEST=0
15 | VIDEO_TO_TEST=video.avi
16 | 
17 | echo
18 | echo Using $PYTHON_INTERPRETATOR with camera $CAM_TO_TEST and video $VIDEO_TO_TEST
19 | echo "Running test suite - press 'x' in OpenCV window to exist each example."
20 | echo
21 | 
22 | # get testing resouces if they do not exist
23 | 
24 | [ -f example.jpg ] || { wget https://upload.wikimedia.org/wikipedia/commons/b/b4/JPEG_example_JPG_RIP_100.jpg; mv JPEG_example_JPG_RIP_100.jpg example.jpg; }
25 | [ -f video.avi ] || { wget http://clips.vorwaerts-gmbh.de/big_buck_bunny.mp4; mv big_buck_bunny.mp4 video.avi; }
26 | 
27 | ################################################################################
28 | 
29 | # run defaults
30 | 
31 | echo "Running default tests ..."
32 | echo
33 | 
34 | for example in *.py
35 | do
36 |  echo "Testing example: " $example
37 |  $PYTHON_INTERPRETATOR $example
38 |  echo
39 | done
40 | 
41 | ################################################################################
42 | 
43 | # run cam test
44 | 
45 | echo "Running camera based tests ..."
46 | echo
47 | 
48 | for example in *.py
49 | do
50 |  echo "Testing example: " $example -c $CAM_TO_TEST
51 |  $PYTHON_INTERPRETATOR $example -c $CAM_TO_TEST
52 |  echo
53 | done
54 | 
55 | ################################################################################
56 | 
57 | # run cam test and resize
58 | 
59 | echo "Running camera based tests with resizing ..."
60 | echo
61 | 
62 | for example in *.py
63 | do
64 |  echo "Testing example: " $example -c $CAM_TO_TEST -r 0.25
65 |  $PYTHON_INTERPRETATOR $example -c $CAM_TO_TEST -r 0.25
66 |  echo
67 | done
68 | 
69 | 
70 | ################################################################################
71 | 
72 | # run video file test
73 | 
74 | echo "Running video file based tests ..."
75 | echo
76 | 
77 | for example in *.py
78 | do
79 |  echo "Testing example: " $example $VIDEO_TO_TEST
80 |  $PYTHON_INTERPRETATOR $example $VIDEO_TO_TEST
81 |  echo
82 | done
83 | 
84 | ################################################################################
85 | 


--------------------------------------------------------------------------------