├── .github
└── workflows
│ └── pep8-check.yml
├── .gitignore
├── DoG.py
├── LICENSE
├── README.md
├── calibrate_camera.py
├── camera_stream.py
├── canny.py
├── chromaticity_lightness.py
├── cnn_ssd_detection.py
├── contour_edges.py
├── cycleimages.py
├── download-models.sh
├── eigenfaces.py
├── faster-rcnn.py
├── fcn_segmentation.py
├── gaussian.py
├── generic_interface.py
├── gradient_orientation.py
├── haar_cascade_detection.py
├── harris.py
├── hog.py
├── houghlines.py
├── kalman_tracking_live.py
├── lbp_cascade_detection.py
├── mask-rcnn.py
├── mog-background-subtraction.py
├── openpose.py
├── opticflow.py
├── pyramid.py
├── selective_search.py
├── sift_detection.py
├── sobel.py
├── squeezenet.py
├── stereo_sgbm.py
├── test_all.sh
└── yolo.py
/.github/workflows/pep8-check.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Python - PEP8
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - uses: actions/checkout@v2
19 | - name: Set up Python 3.8
20 | uses: actions/setup-python@v2
21 | with:
22 | python-version: 3.8
23 | - name: Install dependencies
24 | run: |
25 | python -m pip install --upgrade pip
26 | pip install flake8 pytest
27 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28 | - name: Lint with flake8
29 | run: |
30 | # stop the build if there are Python PEP8 style, syntax errors, undefined names, unused imports ...
31 | flake8 .
32 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # dotenv
79 | .env
80 |
81 | # virtualenv
82 | venv/
83 | ENV/
84 |
85 | # Spyder project settings
86 | .spyderproject
87 |
88 | # Rope project settings
89 | .ropeproject
90 |
91 | # specific to this repo
92 |
93 | *.pb
94 | *.xml
95 | *.pbtxt
96 | *.txt
97 | *.caffemodel
98 | *.prototxt
99 | *.cfg
100 | *.weights
101 | *.names
102 | *.avi
103 | *.jpg
104 | calibration
105 | faster_rcnn_inception_v2_coco_2018_01_28
106 | mask_rcnn_inception_v2_coco_2018_01_28
107 |
--------------------------------------------------------------------------------
/DoG.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : Difference of Gaussian (DoG) of a video file
4 | # specified on the command line (e.g. python FILE.py video_file) or from an
5 | # attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2017-2019 Dept. Engineering & Dept. Computer Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | #####################################################################
14 |
15 | import cv2
16 | import argparse
17 | import sys
18 |
19 | #####################################################################
20 |
21 | keep_processing = True
22 |
23 | # parse command line arguments for camera ID or video file
24 |
25 | parser = argparse.ArgumentParser(
26 | description='Perform ' +
27 | sys.argv[0] +
28 | ' example operation on incoming camera/video image')
29 | parser.add_argument(
30 | "-c",
31 | "--camera_to_use",
32 | type=int,
33 | help="specify camera to use",
34 | default=0)
35 | parser.add_argument(
36 | "-r",
37 | "--rescale",
38 | type=float,
39 | help="rescale image by this factor",
40 | default=1.0)
41 | parser.add_argument(
42 | "-s",
43 | "--set_resolution",
44 | type=int,
45 | nargs=2,
46 | help='override default camera resolution as H W')
47 | parser.add_argument("-i", "--is_image", action='store_true',
48 | help="specify file is an image, not a video")
49 | parser.add_argument(
50 | 'video_file',
51 | metavar='file',
52 | type=str,
53 | nargs='?',
54 | help='specify optional video file')
55 | args = parser.parse_args()
56 |
57 | #####################################################################
58 |
59 | # this function is called as a call-back everytime the trackbar is moved
60 | # (here we just do nothing)
61 |
62 |
63 | def nothing(x):
64 | pass
65 |
66 | #####################################################################
67 |
68 | # define video capture object
69 |
70 |
71 | try:
72 | # to use a non-buffered camera stream (via a separate thread)
73 |
74 | if not (args.video_file):
75 | import camera_stream
76 | cap = camera_stream.CameraVideoStream(use_tapi=True)
77 | else:
78 | cap = cv2.VideoCapture() # not needed for video files
79 |
80 | except BaseException:
81 | # if not then just use OpenCV default
82 |
83 | print("INFO: camera_stream class not found - camera input may be buffered")
84 | cap = cv2.VideoCapture()
85 |
86 | # define display window name
87 |
88 | window_name = "Live Camera Input" # window name
89 | window_nameU = "Gaussian Upper" # window name
90 | window_nameL = "Gaussian Lower" # window name
91 | window_nameDoG = "DoG" # window name
92 |
93 | # if command line arguments are provided try to read video_name
94 | # otherwise default to capture from attached H/W camera
95 |
96 | if (((args.video_file) and (cap.open(str(args.video_file))))
97 | or (cap.open(args.camera_to_use))):
98 |
99 | # create window by name (as resizable)
100 |
101 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
102 | cv2.namedWindow(window_nameL, cv2.WINDOW_NORMAL)
103 | cv2.namedWindow(window_nameU, cv2.WINDOW_NORMAL)
104 | cv2.namedWindow(window_nameDoG, cv2.WINDOW_NORMAL)
105 |
106 | # add some track bar controllers for settings
107 |
108 | sigmaU = 2 # greater than 7 seems to crash
109 | cv2.createTrackbar("sigma U", window_nameU, sigmaU, 15, nothing)
110 | sigmaL = 1 # greater than 7 seems to crash
111 | cv2.createTrackbar("sigma L", window_nameL, sigmaL, 15, nothing)
112 |
113 | # override default camera resolution
114 |
115 | if (args.set_resolution is not None):
116 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
117 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
118 |
119 | print("INFO: input resolution : (",
120 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
121 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
122 |
123 | while (keep_processing):
124 |
125 | # if video file successfully open then read frame from video
126 |
127 | if (cap.isOpened):
128 | ret, frame = cap.read()
129 |
130 | # when we reach the end of the video (file) exit cleanly
131 |
132 | if (ret == 0):
133 | keep_processing = False
134 | continue
135 |
136 | # rescale if specified
137 |
138 | if (args.rescale != 1.0):
139 | frame = cv2.resize(
140 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
141 |
142 | # if it is a still image, load that instead
143 |
144 | if (args.is_image):
145 | frame = cv2.imread(args.video_file, cv2.IMREAD_COLOR)
146 |
147 | # get parameters from track bars
148 |
149 | sigmaU = cv2.getTrackbarPos("sigma U", window_nameU)
150 | sigmaL = cv2.getTrackbarPos("sigma L", window_nameL)
151 |
152 | # check sigma's are greater than 1
153 |
154 | sigmaU = max(1, sigmaU)
155 | sigmaL = max(1, sigmaL)
156 |
157 | # check sigma are correct
158 |
159 | if (sigmaL >= sigmaU) and (sigmaU > 1):
160 | sigmaL = sigmaU - 1
161 | print("auto-correcting sigmas such that U > L")
162 |
163 | # convert to grayscale
164 |
165 | gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
166 |
167 | # performing smoothing on the image using a smoothing mask
168 | # specify 0x0 mask size then size is auto-computed from the sigma
169 | # values
170 |
171 | smoothedU = cv2.GaussianBlur(gray_frame, (0, 0), sigmaU)
172 | smoothedL = cv2.GaussianBlur(gray_frame, (0, 0), sigmaL)
173 |
174 | # perform abs_diff() to get DoG
175 |
176 | DoG = cv2.absdiff(smoothedU, smoothedL)
177 |
178 | # auto-scale to full 0 -> 255 range for display
179 |
180 | cv2.normalize(DoG, DoG, 0, 255, cv2.NORM_MINMAX)
181 |
182 | # display image
183 |
184 | cv2.imshow(window_name, frame)
185 | cv2.imshow(window_nameU, smoothedU)
186 | cv2.imshow(window_nameL, smoothedL)
187 | cv2.imshow(window_nameDoG, DoG)
188 |
189 | # start the event loop - essential
190 |
191 | # cv2.waitKey() is a keyboard binding function (argument is the time in
192 | # ms). It waits for specified milliseconds for any keyboard event.
193 | # If you press any key in that time, the program continues.
194 | # If 0 is passed, it waits indefinitely for a key stroke.
195 | # (bitwise and with 0xFF to extract least significant byte of
196 | # multi-byte response)
197 |
198 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
199 | key = cv2.waitKey(40) & 0xFF
200 |
201 | # It can also be set to detect specific key strokes by recording which
202 | # key is pressed
203 |
204 | # e.g. if user presses "x" then exit
205 |
206 | # e.g. if user presses "x" then exit / press "f" for fullscreen
207 | # display
208 |
209 | if (key == ord('x')):
210 | keep_processing = False
211 | elif (key == ord('f')):
212 | cv2.setWindowProperty(
213 | window_nameDoG,
214 | cv2.WND_PROP_FULLSCREEN,
215 | cv2.WINDOW_FULLSCREEN)
216 |
217 | # close all windows
218 |
219 | cv2.destroyAllWindows()
220 |
221 | else:
222 | print("No video file specified or camera connected.")
223 |
224 | #####################################################################
225 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python Computer Vision OpenCV Teaching Examples
2 |
3 | OpenCV Python computer vision examples used for teaching within the undergraduate Computer Science programme
4 | at [Durham University](http://www.durham.ac.uk) (UK) by [Prof. Toby Breckon](https://breckon.org/toby/).
5 |
6 | 
7 |
8 | All tested with [OpenCV](http://www.opencv.org) 3.x / 4.x and Python 3.x.
9 |
10 | ```
11 | # Example : <................................> processing from a video file
12 | # specified on the command line (e.g. python FILE.py video_file) or from an
13 | # attached web camera
14 | ```
15 | ---
16 |
17 | ### Background:
18 |
19 | Directly adapted (and in some cases extended/added to) from the [C++](https://github.com/tobybreckon/cpp-examples-ipcv.git) and earlier [C](https://github.com/tobybreckon/c-examples-ipcv.git) language teaching examples used to generate the video examples within the ebook version of:
20 |
21 | [Dictionary of Computer Vision and Image Processing](http://dx.doi.org/10.1002/9781119286462) (R.B. Fisher, T.P. Breckon, K. Dawson-Howe, A. Fitzgibbon, C. Robertson, E. Trucco, C.K.I. Williams), Wiley, 2014.
22 | [[Google Books](http://books.google.co.uk/books?id=TaEQAgAAQBAJ&lpg=PP1&dq=isbn%3A1118706811&pg=PP1v=onepage&q&f=false)] [[doi](http://dx.doi.org/10.1002/9781119286462)]
23 |
24 | Notably, the [C++](https://github.com/tobybreckon/cpp-examples-ipcv.git) examples may contain further speed optimizations in some cases.
25 |
26 | A related supporting set of [Python Image Processing OpenCV Teaching Examples](https://github.com/tobybreckon/python-examples-ip.git) are also available covering basic image processing operations.
27 |
28 | ---
29 |
30 | ### How to download and run:
31 |
32 | Download each file as needed or to download the entire repository and run each try:
33 |
34 | ```
35 | git clone https://github.com/tobybreckon/python-examples-cv.git
36 | cd python-examples-cv
37 | python3 ./.py [optional video file]
38 | ```
39 |
40 | Demo source code is provided _"as is"_ to aid learning and understanding of topics on the course and beyond.
41 |
42 | Most run with a webcam connected or from a command line supplied video file of a format OpenCV supports on your system (otherwise edit the script to provide your own image source). For examples each individual ```.py``` example file can be used as follows:
43 |
44 | ```
45 | $ python3 ./generic_interface.py -h
46 | usage: generic_interface.py [-h] [-c CAMERA_TO_USE] [-r RESCALE] [-fs]
47 | [video_file]
48 |
49 | Perform ./generic_interface.py example operation on incoming camera/video
50 | image
51 |
52 | positional arguments:
53 | video_file specify optional video file
54 |
55 | optional arguments:
56 | -h, --help show this help message and exit
57 | -c CAMERA_TO_USE, --camera_to_use CAMERA_TO_USE
58 | specify camera to use
59 | -r RESCALE, --rescale RESCALE
60 | rescale image by this factor
61 |
62 | ```
63 |
64 | For several of the demos that largely rely on effective demonstration using just a single output window - press the _"f"_ key to run fullscreen. In all examples press _"x"_ to exit.
65 |
66 | Use script ```sh download-models.sh``` to download CNN model files associated with some examples.
67 |
68 | ---
69 |
70 | ### Re-usable Exemplar Components (Python Classes):
71 |
72 | This codebase contains the following re-usable exemplar elements:
73 |
74 | - ```camera_stream.py``` - a re-usable threaded camera class, that is call compatible with the existing OpenCV VideoCapture class, designed to always deliver the latest frame from a single camera without buffering delays (used by all examples if available).
75 |
76 | - ```h_concatenate()``` - a re-usable function for horiozontal image concatenation for display in single window handling variations in size/channels (see ```chromaticity_lightness.py```).
77 |
78 | ---
79 |
80 | ### References:
81 |
82 | If referencing these examples in your own work (e.g _"... based on the implementation of REF..."_), please reference the related research work from which these sample OpenCV reference implementations were derived (in terms of parameters choice etc., presented in bibtex format).
83 |
84 | For the SGBM stereo vision and camera calibration examples, reference:
85 | ```
86 | @Article{mroz12stereo,
87 | author = {Mroz, F. and Breckon, T.P.},
88 | title = {An Empirical Comparison of Real-time Dense Stereo Approaches for use in the Automotive Environment},
89 | journal = {EURASIP Journal on Image and Video Processing},
90 | year = {2012},
91 | volume = {2012},
92 | number = {13},
93 | pages = {1-19},
94 | publisher = {Springer},
95 | url = {https://breckon.org/toby/publications/papers/mroz12stereo.pdf},
96 | doi = {10.1186/1687-5281-2012-13}
97 | }
98 | ```
99 |
100 | For the Mixture of Gaussian (MOG) background subtraction and Kalman filtering example, reference:
101 | ```
102 | @InProceedings{kundegorski14photogrammetric,
103 | author = {Kundegorski, M.E. and Breckon, T.P.},
104 | title = {A Photogrammetric Approach for Real-time 3D Localization and Tracking of Pedestrians in Monocular Infrared Imagery},
105 | booktitle = {Proc. SPIE Optics and Photonics for Counterterrorism, Crime Fighting and Defence},
106 | year = {2014},
107 | month = {September},
108 | volume = {9253},
109 | number = {01},
110 | publisher = {SPIE},
111 | pages = {1-16},
112 | url = {https://breckon.org/toby/publications/papers/kundegorski14photogrammetric.pdf},
113 | doi = {10.1117/12.2065673}
114 | }
115 | ```
116 |
117 |
118 | For the DoG, Canny, contour, Harris and Sobel examples, please reference:
119 | ```
120 | @Book{solomonbreckon10fundamentals,
121 | author = {Solomon, C.J. and Breckon, T.P.},
122 | title = {Fundamentals of Digital Image Processing: A Practical Approach with Examples in Matlab},
123 | publisher = {Wiley-Blackwell},
124 | year = {2010},
125 | isbn = {0470844736},
126 | doi = {10.1002/9780470689776},
127 | note = {ISBN-13: 978-0470844731},
128 | }
129 | ```
130 |
131 | For all other examples reference the original paper as outlined in the OpenCV manual or the header comment of the ```.py``` example file.
132 |
133 | ---
134 |
135 | If you find any bugs raise an issue (or much better still submit a git pull request with a fix) - toby.breckon@durham.ac.uk
136 |
137 | _"may the source be with you"_ - anon.
138 |
--------------------------------------------------------------------------------
/calibrate_camera.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : perform intrinsic calibration of a connected camera
4 |
5 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
6 |
7 | # Copyright (c) 2018-2021 Department of Computer Science,
8 | # Durham University, UK
9 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
10 |
11 | # Acknowledgements:
12 |
13 | # http://opencv-python-tutroals.readthedocs.org/en/latest/ \
14 | # py_tutorials/py_calib3d/py_table_of_contents_calib3d/py_table_of_contents_calib3d.html
15 |
16 | # http://docs.ros.org/electric/api/cob_camera_calibration/html/calibrator_8py_source.html
17 |
18 | #####################################################################
19 |
20 | import cv2
21 | import argparse
22 | import sys
23 | import numpy as np
24 |
25 | #####################################################################
26 |
27 | keep_processing = True
28 |
29 | # parse command line arguments for camera ID or video file
30 |
31 | parser = argparse.ArgumentParser(
32 | description='Perform ' +
33 | sys.argv[0] +
34 | ' example operation on incoming camera/video image')
35 | parser.add_argument(
36 | "-c",
37 | "--camera_to_use",
38 | type=int,
39 | help="specify camera to use",
40 | default=0)
41 | parser.add_argument(
42 | "-r",
43 | "--rescale",
44 | type=float,
45 | help="rescale image by this factor",
46 | default=1.0)
47 | parser.add_argument(
48 | "-s",
49 | "--set_resolution",
50 | type=int,
51 | nargs=2,
52 | help='override default camera resolution as H W')
53 | parser.add_argument(
54 | "-cbx",
55 | "--chessboardx",
56 | type=int,
57 | help="specify number of internal chessboard squares \
58 | (corners) in x-direction",
59 | default=6)
60 | parser.add_argument(
61 | "-cby",
62 | "--chessboardy",
63 | type=int,
64 | help="specify number of internal chessboard squares \
65 | (corners) in y-direction",
66 | default=8)
67 | parser.add_argument(
68 | "-cbw",
69 | "--chessboardw",
70 | type=float,
71 | help="specify width/height of chessboard squares in mm",
72 | default=40.0)
73 | parser.add_argument(
74 | "-i",
75 | "--iterations",
76 | type=int,
77 | help="specify number of iterations for each stage of optimisation",
78 | default=100)
79 | parser.add_argument(
80 | "-e",
81 | "--minimum_error",
82 | type=float,
83 | help="specify lower error threshold upon which to stop \
84 | optimisation stages",
85 | default=0.001)
86 | args = parser.parse_args()
87 |
88 | #####################################################################
89 |
90 | # define video capture object
91 |
92 | try:
93 | # to use a non-buffered camera stream (via a separate thread)
94 |
95 | import camera_stream
96 | cap = camera_stream.CameraVideoStream()
97 |
98 | except BaseException:
99 | # if not then just use OpenCV default
100 |
101 | print("INFO: camera_stream class not found - camera input may be buffered")
102 | cap = cv2.VideoCapture()
103 |
104 | # define display window names
105 |
106 | window_name = "Camera Input" # window name
107 | window_nameU = "Undistored (calibrated) Camera" # window name
108 |
109 | #####################################################################
110 |
111 | # perform intrinsic calibration (removal of image distortion in image)
112 |
113 | do_calibration = False
114 | termination_criteria_subpix = (
115 | cv2.TERM_CRITERIA_EPS +
116 | cv2.TERM_CRITERIA_MAX_ITER,
117 | args.iterations,
118 | args.minimum_error)
119 |
120 | # set up a set of real-world "object points" for the chessboard pattern
121 |
122 | patternX = args.chessboardx
123 | patternY = args.chessboardy
124 | square_size_in_mm = args.chessboardw
125 |
126 | # prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
127 |
128 | objp = np.zeros((patternX * patternY, 3), np.float32)
129 | objp[:, :2] = np.mgrid[0:patternX, 0:patternY].T.reshape(-1, 2)
130 | objp = objp * square_size_in_mm
131 |
132 | # create arrays to store object points and image points from all the images.
133 | objpoints = [] # 3d point in real world space
134 | imgpoints = [] # 2d points in image plane.
135 |
136 | #####################################################################
137 |
138 | # count number of chessboard detections
139 | chessboard_pattern_detections = 0
140 |
141 | print()
142 | print("--> hold up chessboard (grabbing images at 2 fps)")
143 | print("press c : to continue to calibration")
144 |
145 | #####################################################################
146 |
147 | # open connected camera
148 |
149 | if cap.open(args.camera_to_use):
150 |
151 | # override default camera resolution
152 |
153 | if (args.set_resolution is not None):
154 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
155 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
156 |
157 | print("INFO: input resolution : (",
158 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
159 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
160 |
161 | while (not (do_calibration)):
162 |
163 | # grab frames from camera
164 |
165 | ret, frame = cap.read()
166 |
167 | # rescale if specified
168 |
169 | if (args.rescale != 1.0):
170 | frame = cv2.resize(frame, (0, 0), fx=args.rescale, fy=args.rescale)
171 |
172 | # convert to grayscale
173 |
174 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
175 |
176 | # Find the chess board corners in the image
177 | # (change flags to perhaps improve detection ?)
178 |
179 | ret, corners = cv2.findChessboardCorners(
180 | gray, (patternX, patternY), None, cv2.CALIB_CB_ADAPTIVE_THRESH |
181 | cv2.CALIB_CB_FAST_CHECK | cv2.CALIB_CB_NORMALIZE_IMAGE)
182 |
183 | # If found, add object points, image points (after refining them)
184 |
185 | if (ret):
186 |
187 | chessboard_pattern_detections += 1
188 |
189 | # add object points to global list
190 |
191 | objpoints.append(objp)
192 |
193 | # refine corner locations to sub-pixel accuracy and then
194 |
195 | corners_sp = cv2.cornerSubPix(
196 | gray, corners, (11, 11), (-1, -1), termination_criteria_subpix)
197 | imgpoints.append(corners_sp)
198 |
199 | # Draw and display the corners
200 |
201 | drawboard = cv2.drawChessboardCorners(
202 | frame, (patternX, patternY), corners_sp, ret)
203 |
204 | text = 'detected: ' + str(chessboard_pattern_detections)
205 | cv2.putText(drawboard, text, (10, 25),
206 | cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, 8)
207 |
208 | cv2.imshow(window_name, drawboard)
209 | else:
210 | text = 'detected: ' + str(chessboard_pattern_detections)
211 | cv2.putText(frame, text, (10, 25),
212 | cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, 8)
213 |
214 | cv2.imshow(window_name, frame)
215 |
216 | # start the event loop
217 |
218 | key = cv2.waitKey(500) & 0xFF # wait 500 ms. between frames
219 | if (key == ord('c')):
220 | do_calibration = True
221 |
222 | else:
223 | print("Cannot open connected camera.")
224 | exit()
225 |
226 | #####################################################################
227 |
228 | # check we detected some patterns within the first loop
229 |
230 | if (chessboard_pattern_detections == 0):
231 | print("No calibration patterns detected - exiting.")
232 | exit()
233 |
234 | #####################################################################
235 |
236 | # perform calibration - uses [Zhang, 2000]
237 |
238 | print("START - intrinsic calibration ...")
239 |
240 | ret, K, D, rvecs, tvecs = cv2.calibrateCamera(
241 | objpoints, imgpoints, gray.shape[::-1], None, None)
242 |
243 | print("FINISHED - intrinsic calibration")
244 |
245 | # print output in readable format
246 |
247 | print()
248 | print("Intrinsic Camera Calibration Matrix, K - from intrinsic calibration:")
249 | print("(format as follows: fx, fy - focal lengths / cx, cy - optical centers)")
250 | print("[fx, 0, cx]\n[0, fy, cy]\n[0, 0, 1]")
251 | np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})
252 | print(K)
253 | print()
254 | print("Intrinsic Distortion Co-effients, D - from intrinsic calibration:")
255 | print("(k1, k2, k3 - radial p1, p2 - tangential - distortion coefficients)")
256 | print("[k1, k2, p1, p2, k3]")
257 | np.set_printoptions(formatter={'float': lambda x: "{0:0.5f}".format(x)})
258 | print(D)
259 | print()
260 | print("Image resolution used (width, height): ", np.flip(frame.shape[:2]))
261 |
262 | #####################################################################
263 |
264 | # perform undistortion (i.e. calibration) of the images
265 |
266 | keep_processing = True
267 |
268 | print()
269 | print("-> performing undistortion")
270 | print("press x : to exit")
271 |
272 | while (keep_processing):
273 |
274 | # grab frames from camera
275 |
276 | ret, frame = cap.read()
277 |
278 | # undistort image using camera matrix K and distortion coefficients D
279 |
280 | undistorted = cv2.undistort(frame, K, D, None, None)
281 |
282 | # display both images
283 |
284 | cv2.imshow(window_name, frame)
285 | cv2.imshow(window_nameU, undistorted)
286 |
287 | # start the event loop - essential
288 |
289 | key = cv2.waitKey(40) & 0xFF # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
290 |
291 | if (key == ord('x')):
292 | keep_processing = False
293 |
294 | #####################################################################
295 |
296 | # close all windows and cams.
297 |
298 | cv2.destroyAllWindows()
299 |
300 | #####################################################################
301 |
--------------------------------------------------------------------------------
/canny.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : canny edge detection for a a video file
4 | # specified on the command line (e.g. python FILE.py video_file) or from an
5 | # attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2016 School of Engineering & Computing Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | #####################################################################
14 |
15 | import cv2
16 | import argparse
17 | import sys
18 |
19 | #####################################################################
20 |
21 | keep_processing = True
22 |
23 | # parse command line arguments for camera ID or video file
24 |
25 | parser = argparse.ArgumentParser(
26 | description='Perform ' +
27 | sys.argv[0] +
28 | ' example operation on incoming camera/video image')
29 | parser.add_argument(
30 | "-c",
31 | "--camera_to_use",
32 | type=int,
33 | help="specify camera to use",
34 | default=0)
35 | parser.add_argument(
36 | "-r",
37 | "--rescale",
38 | type=float,
39 | help="rescale image by this factor",
40 | default=1.0)
41 | parser.add_argument(
42 | "-s",
43 | "--set_resolution",
44 | type=int,
45 | nargs=2,
46 | help='override default camera resolution as H W')
47 | parser.add_argument(
48 | "-fs",
49 | "--fullscreen",
50 | action='store_true',
51 | help="run in full screen mode")
52 | parser.add_argument(
53 | "-nc",
54 | "--nocontrols",
55 | action='store_true',
56 | help="no onscreen controls")
57 | parser.add_argument(
58 | 'video_file',
59 | metavar='video_file',
60 | type=str,
61 | nargs='?',
62 | help='specify optional video file')
63 | args = parser.parse_args()
64 |
65 | #####################################################################
66 |
67 | # this function is called as a call-back everytime the trackbar is moved
68 | # (here we just do nothing)
69 |
70 |
71 | def nothing(x):
72 | pass
73 |
74 |
75 | #####################################################################
76 |
77 | # define video capture object
78 |
79 | try:
80 | # to use a non-buffered camera stream (via a separate thread)
81 |
82 | if not (args.video_file):
83 | import camera_stream
84 | cap = camera_stream.CameraVideoStream(use_tapi=True)
85 | else:
86 | cap = cv2.VideoCapture() # not needed for video files
87 |
88 | except BaseException:
89 | # if not then just use OpenCV default
90 |
91 | print("INFO: camera_stream class not found - camera input may be buffered")
92 | cap = cv2.VideoCapture()
93 |
94 | # define display window name
95 |
96 | window_name = "Live Camera Input" # window name
97 | window_name2 = "Canny Edges" # window name
98 |
99 | # if command line arguments are provided try to read video_name
100 | # otherwise default to capture from attached H/W camera
101 |
102 | if (((args.video_file) and (cap.open(str(args.video_file))))
103 | or (cap.open(args.camera_to_use))):
104 |
105 | # create window by name (as resizable)
106 |
107 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
108 | cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL)
109 | cv2.setWindowProperty(window_name2, cv2.WND_PROP_FULLSCREEN,
110 | cv2.WINDOW_FULLSCREEN & args.fullscreen)
111 |
112 | # add some track bar controllers for settings
113 |
114 | lower_threshold = 25
115 | upper_threshold = 120
116 | smoothing_neighbourhood = 3
117 | sobel_size = 3 # greater than 7 seems to crash
118 |
119 | if (not (args.nocontrols)):
120 | cv2.createTrackbar("lower", window_name2, lower_threshold,
121 | 255, nothing)
122 | cv2.createTrackbar("upper", window_name2, upper_threshold,
123 | 255, nothing)
124 | cv2.createTrackbar("smoothing", window_name2, smoothing_neighbourhood,
125 | 15, nothing)
126 | cv2.createTrackbar("sobel size", window_name2, sobel_size,
127 | 7, nothing)
128 |
129 | # override default camera resolution
130 |
131 | if (args.set_resolution is not None):
132 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
133 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
134 |
135 | print("INFO: input resolution : (",
136 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
137 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
138 |
139 | while (keep_processing):
140 |
141 | # if video file successfully open then read frame from video
142 |
143 | if (cap.isOpened):
144 | ret, frame = cap.read() # rescale if specified
145 |
146 | # when we reach the end of the video (file) exit cleanly
147 |
148 | if (ret == 0):
149 | keep_processing = False
150 | continue
151 |
152 | # rescale if specified
153 |
154 | if (args.rescale != 1.0):
155 | frame = cv2.resize(
156 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
157 |
158 | # get parameters from track bars
159 |
160 | if (not (args.nocontrols)):
161 | lower_threshold = cv2.getTrackbarPos("lower", window_name2)
162 | upper_threshold = cv2.getTrackbarPos("upper", window_name2)
163 | smoothing_neighbourhood = cv2.getTrackbarPos("smoothing",
164 | window_name2)
165 | sobel_size = cv2.getTrackbarPos("sobel size", window_name2)
166 |
167 | # check neighbourhood is greater than 3 and odd
168 |
169 | smoothing_neighbourhood = max(3, smoothing_neighbourhood)
170 | if not (smoothing_neighbourhood % 2):
171 | smoothing_neighbourhood = smoothing_neighbourhood + 1
172 |
173 | sobel_size = max(3, sobel_size)
174 | if not (sobel_size % 2):
175 | sobel_size = sobel_size + 1
176 |
177 | # convert to grayscale
178 |
179 | gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
180 |
181 | # performing smoothing on the image using a 5x5 smoothing mark (see
182 | # manual entry for GaussianBlur())
183 |
184 | smoothed = cv2.GaussianBlur(
185 | gray_frame, (smoothing_neighbourhood, smoothing_neighbourhood), 0)
186 |
187 | # perform canny edge detection
188 |
189 | canny = cv2.Canny(
190 | smoothed,
191 | lower_threshold,
192 | upper_threshold,
193 | apertureSize=sobel_size)
194 |
195 | # display image
196 |
197 | cv2.imshow(window_name, frame)
198 | cv2.imshow(window_name2, canny)
199 |
200 | # start the event loop - essential
201 |
202 | # cv2.waitKey() is a keyboard binding function (argument is the time in
203 | # milliseconds). It waits for specified milliseconds for any keyboard
204 | # event. If you press any key in that time, the program continues.
205 | # If 0 is passed, it waits indefinitely for a key stroke.
206 | # (bitwise and with 0xFF to extract least significant byte of
207 | # multi-byte response)
208 |
209 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
210 | key = cv2.waitKey(40) & 0xFF
211 |
212 | # It can also be set to detect specific key strokes by recording which
213 | # key is pressed
214 |
215 | # e.g. if user presses "x" then exit / press "f" for fullscreen
216 | # display
217 |
218 | if (key == ord('x')):
219 | keep_processing = False
220 | elif (key == ord('f')):
221 | cv2.setWindowProperty(
222 | window_name2,
223 | cv2.WND_PROP_FULLSCREEN,
224 | cv2.WINDOW_FULLSCREEN)
225 |
226 | # close all windows
227 |
228 | cv2.destroyAllWindows()
229 |
230 | else:
231 | print("No video file specified or camera connected.")
232 |
233 | #####################################################################
234 |
--------------------------------------------------------------------------------
/chromaticity_lightness.py:
--------------------------------------------------------------------------------
1 | ##########################################################################
2 |
3 | # Example : perform live chromaticity/lightness display from a video file
4 | # specified on the command line (e.g. python FILE.py video_file) or from an
5 | # attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2018 Toby Breckon, Engineering & Computer Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | ##########################################################################
14 |
15 | import cv2
16 | import argparse
17 | import sys
18 | import math
19 | import numpy as np
20 |
21 | ##########################################################################
22 |
23 | keep_processing = True
24 |
25 | # parse command line arguments for camera ID or video file
26 |
27 | parser = argparse.ArgumentParser(
28 | description='Perform ' +
29 | sys.argv[0] +
30 | ' example operation on incoming camera/video image')
31 | parser.add_argument(
32 | "-c",
33 | "--camera_to_use",
34 | type=int,
35 | help="specify camera to use",
36 | default=0)
37 | parser.add_argument(
38 | "-r",
39 | "--rescale",
40 | type=float,
41 | help="rescale image by this factor",
42 | default=1.0)
43 | parser.add_argument(
44 | "-s",
45 | "--set_resolution",
46 | type=int,
47 | nargs=2,
48 | help='override default camera resolution as H W')
49 | parser.add_argument(
50 | "-fs",
51 | "--fullscreen",
52 | action='store_true',
53 | help="run in full screen mode")
54 | parser.add_argument(
55 | 'video_file',
56 | metavar='video_file',
57 | type=str,
58 | nargs='?',
59 | help='specify optional video file')
60 | args = parser.parse_args()
61 |
62 | ##########################################################################
63 |
64 | # concatenate two RGB/grayscale images horizontally (left to right)
65 | # handling differing channel numbers or image heights in the input
66 |
67 |
68 | def h_concatenate(img1, img2):
69 |
70 | # get size and channels for both images
71 |
72 | height1 = img1.shape[0]
73 |
74 | if (len(img1.shape) == 2):
75 | channels1 = 1
76 | else:
77 | channels1 = img1.shape[2]
78 |
79 | height2 = img2.shape[0]
80 | width2 = img2.shape[1]
81 | if (len(img2.shape) == 2):
82 | channels2 = 1
83 | else:
84 | channels2 = img2.shape[2]
85 |
86 | # make all images 3 channel, or assume all same channel
87 |
88 | if ((channels1 > channels2) and (channels1 == 3)):
89 | out2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
90 | out1 = img1
91 | elif ((channels2 > channels1) and (channels2 == 3)):
92 | out1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR)
93 | out2 = img2
94 | else: # both must be equal
95 | out1 = img1
96 | out2 = img2
97 |
98 | # height of first image is master height, width can remain unchanged
99 |
100 | if (height1 != height2):
101 | out2 = cv2.resize(out2, (width2, height1))
102 |
103 | return np.hstack((out1, out2))
104 |
105 | ##########################################################################
106 |
107 | # define video capture object
108 |
109 |
110 | try:
111 | # to use a non-buffered camera stream (via a separate thread)
112 |
113 | if not (args.video_file):
114 | import camera_stream
115 | cap = camera_stream.CameraVideoStream()
116 | else:
117 | cap = cv2.VideoCapture() # not needed for video files
118 |
119 | except BaseException:
120 | # if not then just use OpenCV default
121 |
122 | print("INFO: camera_stream class not found - camera input may be buffered")
123 | cap = cv2.VideoCapture()
124 |
125 | # define display window name
126 |
127 | window_name = "Live - [Original RGB | Chromaticity {r,g,b} | Lightness (l)]"
128 |
129 | # if command line arguments are provided try to read video_name
130 | # otherwise default to capture from attached camera
131 |
132 | if (((args.video_file) and (cap.open(str(args.video_file))))
133 | or (cap.open(args.camera_to_use))):
134 |
135 | # create window by name (as resizable)
136 |
137 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
138 |
139 | # override default camera resolution
140 |
141 | if (args.set_resolution is not None):
142 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
143 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
144 |
145 | while (keep_processing):
146 |
147 | # start a timer (to see how long processing and display takes)
148 |
149 | start_t = cv2.getTickCount()
150 |
151 | # if camera /video file successfully open then read frame
152 |
153 | if (cap.isOpened):
154 | ret, frame = cap.read()
155 |
156 | # when we reach the end of the video (file) exit cleanly
157 |
158 | if (ret == 0):
159 | keep_processing = False
160 | continue
161 |
162 | # rescale if specified
163 |
164 | if (args.rescale != 1.0):
165 | frame = cv2.resize(
166 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
167 |
168 | # compute chromaticity as c = c / SUM(RGB) for c = {R, G, B} with
169 | # safety for divide by zero errors
170 | # chromaticity {r,g,b} range is floating point 0 -> 1
171 |
172 | # N.B. if extracting chromaticity {r,g} from this remember to
173 | # take channels r = 2 and g = 1 due to OpenCV BGR channel ordering
174 |
175 | chromaticity = np.zeros(frame.shape).astype(np.float32)
176 | sum_channel = (frame[:, :, 0].astype(np.float32)
177 | + frame[:, :, 1].astype(np.float32)
178 | + frame[:, :, 2].astype(np.float32)
179 | + 1)
180 | chromaticity[:, :, 0] = (frame[:, :, 0] / sum_channel)
181 | chromaticity[:, :, 1] = (frame[:, :, 1] / sum_channel)
182 | chromaticity[:, :, 2] = (frame[:, :, 2] / sum_channel)
183 |
184 | # compute lightness as an integer = RGB / 3 (range is 0 -> 255)
185 |
186 | lightness = np.floor(sum_channel / 3)
187 |
188 | # display image as a concatenated triple of [ RGB | Chromaticity |
189 | # Lightness ] adjusting back to 8-bit and scaling appropriately
190 |
191 | cv2.imshow(
192 | window_name,
193 | h_concatenate(
194 | h_concatenate(
195 | frame,
196 | (chromaticity *
197 | 255).astype(
198 | np.uint8)),
199 | lightness.astype(
200 | np.uint8)))
201 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
202 | cv2.WINDOW_FULLSCREEN & args.fullscreen)
203 |
204 | # stop the timer and convert to ms. (to see how long processing and
205 | # display takes)
206 |
207 | stop_t = ((cv2.getTickCount() - start_t) /
208 | cv2.getTickFrequency()) * 1000
209 |
210 | # start the event loop - essential
211 |
212 | # cv2.waitKey() is a keyboard binding function (argument is the time in
213 | # ms). It waits for specified milliseconds for any keyboard event.
214 | # If you press any key in that time, the program continues.
215 | # If 0 is passed, it waits indefinitely for a key stroke.
216 | # (bitwise and with 0xFF to extract least significant byte of
217 | # multi-byte response)
218 |
219 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
220 | # 25 fps = 40 ms)
221 |
222 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
223 |
224 | # It can also be set to detect specific key strokes by recording which
225 | # key is pressed
226 |
227 | # e.g. if user presses "x" then exit / press "f" for fullscreen
228 | # display
229 |
230 | if (key == ord('x')):
231 | keep_processing = False
232 | elif (key == ord('f')):
233 | args.fullscreen = not (args.fullscreen)
234 |
235 | # close all windows
236 |
237 | cv2.destroyAllWindows()
238 |
239 | else:
240 | print("No video file specified or camera connected.")
241 |
242 | ##########################################################################
243 |
--------------------------------------------------------------------------------
/cnn_ssd_detection.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : perform live object detectoon using a pre-trained SSD CNN model
4 | # and display from a video file specified on the command line
5 | # (e.g. python FILE.py video_file) or from an attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2017 Department of Computer Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | # based on provided examples at:
14 | # https://github.com/opencv/opencv/tree/master/samples/dnn
15 | # see here for how to load Caffe/TensorFlow/... models etc.
16 |
17 | # implements a version of:
18 |
19 | # MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Apps.
20 | # Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang,
21 | # Tobias Weyand, Marco Andreetto, Hartwig Adam
22 | # research paper: https://arxiv.org/abs/1704.04861
23 |
24 | # requires Caffe network model files (.prototxt / .caffemodel) downloaded from:
25 | # https://github.com/chuanqi305/MobileNet-SSD/
26 |
27 | #####################################################################
28 |
29 | import cv2
30 | import argparse
31 | import sys
32 | import math
33 |
34 | #####################################################################
35 |
36 | keep_processing = True
37 |
38 | # parse command line arguments for camera ID or video file
39 |
40 | parser = argparse.ArgumentParser(
41 | description='Perform ' +
42 | sys.argv[0] +
43 | ' example operation on incoming camera/video image')
44 | parser.add_argument(
45 | "-c",
46 | "--camera_to_use",
47 | type=int,
48 | help="specify camera to use",
49 | default=0)
50 | parser.add_argument(
51 | "-r",
52 | "--rescale",
53 | type=float,
54 | help="rescale image by this factor",
55 | default=1.0)
56 | parser.add_argument(
57 | 'video_file',
58 | metavar='video_file',
59 | type=str,
60 | nargs='?',
61 | help='specify optional video file')
62 | args = parser.parse_args()
63 |
64 | cnn_model_to_load = "MobileNetSSD_deploy"
65 |
66 | #####################################################################
67 |
68 |
69 | def trackbar_callback(pos):
70 | global confidence_threshold
71 | confidence_threshold = pos / 100.0
72 |
73 | #####################################################################
74 |
75 | # define video capture object
76 |
77 |
78 | try:
79 | # to use a non-buffered camera stream (via a separate thread)
80 |
81 | if not (args.video_file):
82 | import camera_stream
83 | cap = camera_stream.CameraVideoStream()
84 | else:
85 | cap = cv2.VideoCapture() # not needed for video files
86 |
87 | except BaseException:
88 | # if not then just use OpenCV default
89 |
90 | print("INFO: camera_stream class not found - camera input may be buffered")
91 | cap = cv2.VideoCapture()
92 |
93 | # define display window name
94 |
95 | window_name = "Live Object Detection - CNN: " + cnn_model_to_load
96 |
97 | # if command line arguments are provided try to read video_name
98 | # otherwise default to capture from attached camera
99 |
100 | if (((args.video_file) and (cap.open(str(args.video_file))))
101 | or (cap.open(args.camera_to_use))):
102 |
103 | # create window by name (as resizable)
104 |
105 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
106 |
107 | # add track bar to window for confidence threshold
108 |
109 | confidence_threshold = 0.7
110 | cv2.createTrackbar('Confidence threshold, %', window_name, int(
111 | confidence_threshold * 100), 99, trackbar_callback)
112 |
113 | # init CNN model - here from Caffe, although OpenCV can import from
114 | # mosyt deep learning templates
115 |
116 | net = cv2.dnn.readNetFromCaffe(
117 | cnn_model_to_load + ".prototxt",
118 | cnn_model_to_load + ".caffemodel")
119 |
120 | # provide mappings from class numbers to string labels - these are the
121 | # PASCAL VOC classees
122 |
123 | classNames = {0: 'background',
124 | 1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat',
125 | 5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat', 9: 'chair',
126 | 10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse',
127 | 14: 'motorbike', 15: 'person', 16: 'pottedplant',
128 | 17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor'}
129 |
130 | while (keep_processing):
131 |
132 | # start a timer (to see how long processing and display takes)
133 |
134 | start_t = cv2.getTickCount()
135 |
136 | # if video file successfully open then read frame from video
137 |
138 | if (cap.isOpened):
139 | ret, frame = cap.read()
140 |
141 | # when we reach the end of the video (file) exit cleanly
142 |
143 | if (ret == 0):
144 | keep_processing = False
145 | continue
146 |
147 | # rescale if specified
148 |
149 | if (args.rescale != 1.0):
150 | frame = cv2.resize(
151 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
152 |
153 | # get size of input
154 |
155 | cols = frame.shape[1]
156 | rows = frame.shape[0]
157 |
158 | # transform the image into a network input "blob" (i.e. tensor)
159 | # by scaling the image to the input size of the network, in this case
160 | # not swapping the R and G channels (i.e. used when network trained on
161 | # RGB and not the BGR of OpenCV) and re-scaling the inputs from 0->255
162 | # to 0->1 by specifing the mean value for each channel
163 |
164 | swapRBchannels = False # do not swap channels
165 | crop = False # crop image or not
166 | meanChannelVal = 255.0 / 2.0 # mean channel value
167 |
168 | inWidth = 300 # network input width
169 | inHeight = 300 # network input height
170 | inScaleFactor = 0.007843 # input scale factor
171 |
172 | blob = cv2.dnn.blobFromImage(
173 | frame,
174 | inScaleFactor,
175 | (inWidth,
176 | inHeight),
177 | (meanChannelVal,
178 | meanChannelVal,
179 | meanChannelVal),
180 | swapRBchannels,
181 | crop)
182 |
183 | # set this transformed image -> tensor blob as the network input
184 |
185 | net.setInput(blob)
186 |
187 | # perform forward inference on the network
188 |
189 | detections = net.forward()
190 |
191 | # process the detections from the CNN to give bounding boxes
192 | # i.e. for each detection returned from the network
193 |
194 | for i in range(detections.shape[2]):
195 |
196 | # extract the confidence of the detection
197 |
198 | confidence = detections[0, 0, i, 2]
199 |
200 | # provided that is above a threshold
201 |
202 | if confidence > confidence_threshold:
203 |
204 | # get the class number id and the bounding box
205 |
206 | class_id = int(detections[0, 0, i, 1])
207 |
208 | xLeftBottom = int(detections[0, 0, i, 3] * cols)
209 | yLeftBottom = int(detections[0, 0, i, 4] * rows)
210 | xRightTop = int(detections[0, 0, i, 5] * cols)
211 | yRightTop = int(detections[0, 0, i, 6] * rows)
212 |
213 | # draw the bounding box on the frame
214 |
215 | cv2.rectangle(frame, (xLeftBottom, yLeftBottom),
216 | (xRightTop, yRightTop), (0, 255, 0))
217 |
218 | # look up the class name based on the class id and draw it on
219 | # the frame also
220 |
221 | if class_id in classNames:
222 | label = classNames[class_id] + (": %.2f" % confidence)
223 | labelSize, baseLine = cv2.getTextSize(
224 | label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
225 |
226 | yLeftBottom = max(yLeftBottom, labelSize[1])
227 | cv2.rectangle(
228 | frame, (
229 | xLeftBottom, yLeftBottom -
230 | labelSize[1]
231 | ), (
232 | xLeftBottom + labelSize[0],
233 | yLeftBottom + baseLine
234 | ), (255, 255, 255),
235 | cv2.FILLED
236 | )
237 | cv2.putText(frame, label, (xLeftBottom, yLeftBottom),
238 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
239 |
240 | # Display efficiency information - the function getPerfProfile returns
241 | # the overall time for inference from the network
242 |
243 | t, _ = net.getPerfProfile()
244 | inference_t = (t * 1000.0 / cv2.getTickFrequency())
245 | label = ('Inference time: %.2f ms' % inference_t) + \
246 | (' (Framerate: %.2f fps' % (1000 / inference_t)) + ')'
247 | cv2.putText(frame, label, (0, 15),
248 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
249 |
250 | # display image
251 |
252 | cv2.imshow(window_name, frame)
253 |
254 | # stop the timer and convert to ms. (to see how long processing and
255 | # display takes)
256 |
257 | stop_t = ((cv2.getTickCount() - start_t) /
258 | cv2.getTickFrequency()) * 1000
259 |
260 | # start the event loop - essential
261 |
262 | # cv2.waitKey() is a keyboard binding function (argument is the time in
263 | # ms). It waits for specified milliseconds for any keyboard event.
264 | # If you press any key in that time, the program continues.
265 | # If 0 is passed, it waits indefinitely for a key stroke.
266 | # (bitwise and with 0xFF to extract least significant byte of
267 | # multi-byte response)
268 |
269 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
270 | # 25 fps = 40 ms)
271 |
272 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
273 |
274 | # It can also be set to detect specific key strokes by recording which
275 | # key is pressed
276 |
277 | # e.g. if user presses "x" then exit / press "f" for fullscreen
278 |
279 | if (key == ord('x')):
280 | keep_processing = False
281 | elif (key == ord('f')):
282 | cv2.setWindowProperty(
283 | window_name,
284 | cv2.WND_PROP_FULLSCREEN,
285 | cv2.WINDOW_FULLSCREEN)
286 |
287 | # close all windows
288 |
289 | cv2.destroyAllWindows()
290 |
291 | else:
292 | print("No video file specified or camera connected.")
293 |
294 | #####################################################################
295 |
--------------------------------------------------------------------------------
/contour_edges.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : contour edges for a video file
4 | # specified on the command line (e.g. python FILE.py video_file) or from an
5 | # attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2016 School of Engineering & Computing Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | #####################################################################
14 |
15 | import cv2
16 | import argparse
17 | import sys
18 |
19 | #####################################################################
20 |
21 | keep_processing = True
22 |
23 | # parse command line arguments for camera ID or video file
24 |
25 | parser = argparse.ArgumentParser(
26 | description='Perform ' +
27 | sys.argv[0] +
28 | ' example operation on incoming camera/video image')
29 | parser.add_argument(
30 | "-c",
31 | "--camera_to_use",
32 | type=int,
33 | help="specify camera to use",
34 | default=0)
35 | parser.add_argument(
36 | "-r",
37 | "--rescale",
38 | type=float,
39 | help="rescale image by this factor",
40 | default=1.0)
41 | parser.add_argument(
42 | "-s",
43 | "--set_resolution",
44 | type=int,
45 | nargs=2,
46 | help='override default camera resolution as H W')
47 | parser.add_argument(
48 | 'video_file',
49 | metavar='video_file',
50 | type=str,
51 | nargs='?',
52 | help='specify optional video file')
53 | args = parser.parse_args()
54 |
55 | #####################################################################
56 |
57 | # this function is called as a call-back everytime the trackbar is moved
58 | # (here we just do nothing)
59 |
60 |
61 | def nothing(x):
62 | pass
63 |
64 | #####################################################################
65 |
66 | # define video capture object
67 |
68 |
69 | try:
70 | # to use a non-buffered camera stream (via a separate thread)
71 |
72 | if not (args.video_file):
73 | import camera_stream
74 | cap = camera_stream.CameraVideoStream(use_tapi=True)
75 | else:
76 | cap = cv2.VideoCapture() # not needed for video files
77 |
78 | except BaseException:
79 | # if not then just use OpenCV default
80 |
81 | print("INFO: camera_stream class not found - camera input may be buffered")
82 | cap = cv2.VideoCapture()
83 |
84 | # define display window name
85 |
86 | window_name = "Largest Area Contour" # window name
87 | window_name2 = "All Contours" # window name
88 |
89 | # if command line arguments are provided try to read video_name
90 | # otherwise default to capture from attached H/W camera
91 |
92 | if (((args.video_file) and (cap.open(str(args.video_file))))
93 | or (cap.open(args.camera_to_use))):
94 |
95 | # create window by name (as resizable)
96 |
97 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
98 | cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL)
99 |
100 | # add some track bar controllers for settings
101 |
102 | lower_threshold = 112
103 | cv2.createTrackbar("lower", window_name2, lower_threshold, 255, nothing)
104 | upper_threshold = 170
105 | cv2.createTrackbar("upper", window_name2, upper_threshold, 255, nothing)
106 | smoothing_neighbourhood = 3
107 | cv2.createTrackbar(
108 | "smoothing",
109 | window_name2,
110 | smoothing_neighbourhood,
111 | 15,
112 | nothing)
113 | sobel_size = 3 # greater than 7 seems to crash
114 | cv2.createTrackbar("sobel size", window_name2, sobel_size, 7, nothing)
115 |
116 | # override default camera resolution
117 |
118 | if (args.set_resolution is not None):
119 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
120 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
121 |
122 | print("INFO: input resolution : (",
123 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
124 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
125 |
126 | while (keep_processing):
127 |
128 | # if video file successfully open then read frame from video
129 |
130 | if (cap.isOpened):
131 | ret, frame = cap.read()
132 |
133 | # when we reach the end of the video (file) exit cleanly
134 |
135 | if (ret == 0):
136 | keep_processing = False
137 | continue
138 |
139 | # rescale if specified
140 |
141 | if (args.rescale != 1.0):
142 | frame = cv2.resize(
143 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
144 |
145 | # get parameters from track bars
146 |
147 | lower_threshold = cv2.getTrackbarPos("lower", window_name2)
148 | upper_threshold = cv2.getTrackbarPos("upper", window_name2)
149 | smoothing_neighbourhood = cv2.getTrackbarPos("smoothing", window_name2)
150 | sobel_size = cv2.getTrackbarPos("sobel size", window_name2)
151 |
152 | # check neighbourhood is greater than 3 and odd
153 |
154 | smoothing_neighbourhood = max(3, smoothing_neighbourhood)
155 | if not (smoothing_neighbourhood % 2):
156 | smoothing_neighbourhood = smoothing_neighbourhood + 1
157 |
158 | sobel_size = max(3, sobel_size)
159 | if not (sobel_size % 2):
160 | sobel_size = sobel_size + 1
161 |
162 | # convert to grayscale
163 |
164 | gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
165 |
166 | # performing smoothing on the image using a 5x5 smoothing mark (see
167 | # manual entry for GaussianBlur())
168 |
169 | smoothed = cv2.GaussianBlur(
170 | gray_frame, (smoothing_neighbourhood, smoothing_neighbourhood), 0)
171 |
172 | # perform canny edge detection
173 |
174 | canny = cv2.Canny(
175 | smoothed,
176 | lower_threshold,
177 | upper_threshold,
178 | apertureSize=sobel_size)
179 |
180 | # convert the canny edges into contours (check OpenCV version >= 4.x)
181 |
182 | if (int(cv2.__version__.split(".")[0]) >= 4):
183 | contours, hierarchy = cv2.findContours(
184 | canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
185 | else:
186 | _, contours, hierarchy = cv2.findContours(
187 | canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
188 |
189 | # find largest contour by area
190 |
191 | max_contour_area = -1
192 | for cnt in contours:
193 | area = cv2.contourArea(cnt)
194 | if (area > max_contour_area):
195 | max_contour_area = area
196 | largest_contour = cnt
197 |
198 | # draw contours (one display for all of them, one for the largest only)
199 |
200 | # make 3 channel to draw on it in colour
201 | gray_frame = cv2.cvtColor(gray_frame, cv2.COLOR_GRAY2BGR)
202 | cv2.drawContours(gray_frame, contours, -1, (0, 255, 0), 3) # in green
203 | cv2.drawContours(frame, [largest_contour], 0, (0, 0, 255), 3) # in red
204 |
205 | # display image
206 |
207 | cv2.imshow(window_name, frame)
208 | cv2.imshow(window_name2, gray_frame)
209 |
210 | # start the event loop - essential
211 |
212 | # cv2.waitKey() is a keyboard binding function (argument is the time in
213 | # ms). It waits for specified milliseconds for any keyboard event.
214 | # If you press any key in that time, the program continues.
215 | # If 0 is passed, it waits indefinitely for a key stroke.
216 | # (bitwise and with 0xFF to extract least significant byte of
217 | # multi-byte response)
218 |
219 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
220 | key = cv2.waitKey(40) & 0xFF
221 |
222 | # It can also be set to detect specific key strokes by recording which
223 | # key is pressed
224 |
225 | # e.g. if user presses "x" then exit / press "f" for fullscreen
226 | # display
227 |
228 | if (key == ord('x')):
229 | keep_processing = False
230 | elif (key == ord('f')):
231 | cv2.setWindowProperty(
232 | window_name,
233 | cv2.WND_PROP_FULLSCREEN,
234 | cv2.WINDOW_FULLSCREEN)
235 |
236 | # close all windows
237 |
238 | cv2.destroyAllWindows()
239 |
240 | else:
241 | print("No video file specified or camera connected.")
242 |
243 | #####################################################################
244 |
--------------------------------------------------------------------------------
/cycleimages.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : load and display a set of images from a directory
4 | # basic illustrative python script
5 |
6 | # For use with provided test / training datasets
7 |
8 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
9 |
10 | # Copyright (c) 2015 / 2016 School of Engineering & Computing Science,
11 | # Durham University, UK
12 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
13 |
14 | #####################################################################
15 |
16 | import cv2
17 | import os
18 |
19 | directory_to_cycle = "path-to-directory-to-cycle" # edit this
20 |
21 | #####################################################################
22 |
23 | # display all images in directory (sorted by filename)
24 |
25 | for filename in sorted(os.listdir(directory_to_cycle)):
26 |
27 | # if it is a PNG file
28 |
29 | if '.png' in filename:
30 | print(os.path.join(directory_to_cycle, filename))
31 |
32 | # read it and display in a window
33 |
34 | img = cv2.imread(
35 | os.path.join(
36 | directory_to_cycle,
37 | filename),
38 | cv2.IMREAD_COLOR)
39 | cv2.imshow('the image', img)
40 | key = cv2.waitKey(200) # wait 200ms
41 | if (key == ord('x')):
42 | break
43 |
44 |
45 | # close all windows
46 |
47 | cv2.destroyAllWindows()
48 |
49 | #####################################################################
50 |
--------------------------------------------------------------------------------
/download-models.sh:
--------------------------------------------------------------------------------
1 | ################################################################################
2 |
3 | # multi model file downloader - (c) 2021 Toby Breckon, Durham University, UK
4 |
5 | ################################################################################
6 |
7 | # models and associated files for automated download
8 |
9 | MODELS=( https://data.pjreddie.com/files/yolov3.weights
10 | https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3.cfg
11 | https://raw.githubusercontent.com/AlexeyAB/darknet/master/data/coco.names
12 | https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt
13 | https://github.com/forresti/SqueezeNet/raw/master/SqueezeNet_v1.1/squeezenet_v1.1.caffemodel
14 | https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/squeezenet_v1.1.prototxt
15 | https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_coco.txt
16 | https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt
17 | http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz
18 | https://github.com/foss-for-synopsys-dwc-arc-processors/synopsys-caffe-models/raw/master/caffe_models/openpose/caffe_model/pose_iter_440000.caffemodel
19 | https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/openpose/master/models/pose/coco/pose_deploy_linevec.prototxt
20 | https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml
21 | https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_eye.xml
22 | https://raw.githubusercontent.com/opencv/opencv/master/data/lbpcascades/lbpcascade_frontalface_improved.xml
23 | http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz
24 | https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt
25 | http://dl.caffe.berkeleyvision.org/fcn8s-heavy-pascal.caffemodel
26 | https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/fcn8s-heavy-pascal.prototxt
27 | https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_pascal_voc.txt
28 | https://raw.githubusercontent.com/PINTO0309/MobileNet-SSD-RealSense/master/caffemodel/MobileNetSSD/MobileNetSSD_deploy.caffemodel
29 | https://raw.githubusercontent.com/chuanqi305/MobileNet-SSD/master/voc/MobileNetSSD_deploy.prototxt
30 | )
31 |
32 | # associated MD5 checksums (output of md5sum filename)
33 |
34 | MD5SUMS=( "4fdfb6d202e9d8e65da14c78b604af95 classification_classes_ILSVRC2012.txt"
35 | "8fc50561361f8bcf96b0177086e7616c coco.names"
36 | "81d7d9cb3438456214afcdb5c83e7bfb object_detection_classes_coco.txt"
37 | "c9e6e28e5b84b7b49c436f929b58db91 pose_deploy_linevec.prototxt"
38 | "5156d31f670511fce9b4e28b403f2939 pose_iter_440000.caffemodel"
39 | "0357e4e11d173c72a01615888826bc8e squeezenet_v1.1.caffemodel"
40 | "dfe9c8d69b154f0ebbba87bc32371e2d squeezenet_v1.1.prototxt"
41 | "5d442b0e550e6c640068e7e15e498599 yolov3.cfg"
42 | "c84e5b99d0e52cd466ae710cadf6d84c yolov3.weights"
43 | "1f1902262c16c2d9acb9bc4f8a8c266f faster_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb"
44 | "2d6fac0caaec1f9558872755ff34818d haarcascade_eye.xml"
45 | "a03f92a797e309e76e6a034ab9e02616 haarcascade_frontalface_default.xml"
46 | "acee557d79a3684cac72ebd811a4eee0 lbpcascade_frontalface_improved.xml"
47 | "5708e4e579d8e4eabeec6c555d4234b2 mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"
48 | "b47e443b313a709e4c39c1caeaa3ecb3 mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb"
49 | "c03b2953ebd846c270da1a8e8f200c09 fcn8s-heavy-pascal.caffemodel"
50 | "532698b83c2e8fa5a010bd996d19d30a fcn8s-heavy-pascal.prototxt"
51 | "5ae5d62183cfb6f6d3ac109359d06a1b object_detection_classes_pascal_voc.txt"
52 | "8bed6fa43361685f4c78f1c084be7775 MobileNetSSD_deploy.caffemodel"
53 | "aa2a13fe1fba2c3b7e067067a6749e7e MobileNetSSD_deploy.prototxt"
54 |
55 | )
56 |
57 | ################################################################################
58 |
59 | DIR_LOCAL_TARGET=/tmp/python-examples-cv-models
60 | PWD_SCRIPT=`pwd`
61 |
62 | ################################################################################
63 |
64 | # Preset this script to fail on error
65 |
66 | set -e
67 |
68 | # check for required commands to download and md5 check
69 |
70 | (command -v curl | grep curl > /dev/null) ||
71 | (echo "Error: curl command not found, cannot download.")
72 |
73 | (command -v md5sum | grep md5sum > /dev/null) ||
74 | (echo "Error: md5sum command not found, cannot verify files.")
75 |
76 |
77 | ################################################################################
78 |
79 | # Download - perform download of each model
80 |
81 | mkdir -p $DIR_LOCAL_TARGET
82 | cd $DIR_LOCAL_TARGET
83 |
84 | for URL in ${MODELS[@]}; do
85 | echo
86 | echo "Downloading ... " $URL " -> " $DIR_LOCAL_TARGET/
87 | curl -L -k -O --remote-name $URL
88 | done
89 |
90 | # un-tar/gz any models that need this
91 |
92 | for GZT in `ls *tar.gz`; do
93 | tar -xzf $GZT
94 | rm $GZT
95 | done
96 |
97 | cd $PWD_SCRIPT
98 |
99 | ################################################################################
100 |
101 | # Post Download - check md5sum
102 |
103 | cd $DIR_LOCAL_TARGET
104 | echo
105 | echo "Performing MD5 file verification checks ..."
106 | printf '%s\n' "${MD5SUMS[@]}" > md5sums.txt
107 | md5sum -c md5sums.txt
108 | rm -f md5sums.txt
109 |
110 | # Post Download - link all files to current directory
111 |
112 | cd $PWD_SCRIPT
113 | echo
114 | echo "Linking files to current directory ..."
115 | ln -sv $DIR_LOCAL_TARGET/* .
116 |
117 | ################################################################################
118 |
--------------------------------------------------------------------------------
/fcn_segmentation.py:
--------------------------------------------------------------------------------
1 | ##########################################################################
2 |
3 | # Example : perform FCN semantic image segmentation from a video file
4 | # specified on the command line (e.g. python FILE.py video_file) or from an
5 | # attached web camera (FCN segmentation: Long et al, CVPR 2015)
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # This code: significant portions based on the example available at:
10 | # https://github.com/opencv/opencv/blob/master/samples/dnn/segmentation.py
11 |
12 |
13 | # Copyright (c) 2021 Toby Breckon, Dept. Computer Science,
14 | # Durham University, UK
15 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
16 |
17 | ##########################################################################
18 |
19 | # To use download the following files:
20 |
21 | # http://dl.caffe.berkeleyvision.org/fcn8s-heavy-pascal.caffemodel
22 | # https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/fcn8s-heavy-pascal.prototxt
23 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_pascal_voc.txt
24 |
25 | ##########################################################################
26 |
27 | import cv2
28 | import argparse
29 | import sys
30 | import math
31 | import numpy as np
32 |
33 | ##########################################################################
34 |
35 | keep_processing = True
36 | colors = None
37 |
38 | ##########################################################################
39 |
40 | # generate and display colour legend for segmentation classes
41 |
42 |
43 | def generate_legend(classes, height):
44 | blockHeight = math.floor(height/len(classes))
45 |
46 | legend = np.zeros((blockHeight * len(colors), 200, 3), np.uint8)
47 | for i in range(len(classes)):
48 | block = legend[i * blockHeight:(i + 1) * blockHeight]
49 | block[:, :] = colors[i]
50 | cv2.putText(block, classes[i],
51 | (0, blockHeight//2),
52 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
53 |
54 | return legend
55 |
56 | ##########################################################################
57 |
58 | # concatenate two RGB/grayscale images horizontally (left to right)
59 | # handling differing channel numbers or image heights in the input
60 |
61 |
62 | def h_concatenate(img1, img2):
63 |
64 | # get size and channels for both images
65 |
66 | height1 = img1.shape[0]
67 |
68 | if (len(img1.shape) == 2):
69 | channels1 = 1
70 | else:
71 | channels1 = img1.shape[2]
72 |
73 | height2 = img2.shape[0]
74 | width2 = img2.shape[1]
75 | if (len(img2.shape) == 2):
76 | channels2 = 1
77 | else:
78 | channels2 = img2.shape[2]
79 |
80 | # make all images 3 channel, or assume all same channel
81 |
82 | if ((channels1 > channels2) and (channels1 == 3)):
83 | out2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
84 | out1 = img1
85 | elif ((channels2 > channels1) and (channels2 == 3)):
86 | out1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR)
87 | out2 = img2
88 | else: # both must be equal
89 | out1 = img1
90 | out2 = img2
91 |
92 | # height of first image is master height, width can remain unchanged
93 |
94 | if (height1 != height2):
95 | out2 = cv2.resize(out2, (width2, height1))
96 |
97 | return np.hstack((out1, out2))
98 |
99 |
100 | ##########################################################################
101 |
102 | # parse command line arguments for camera ID or video file
103 |
104 | parser = argparse.ArgumentParser(
105 | description='Perform ' +
106 | sys.argv[0] +
107 | ' example operation on incoming camera/video image')
108 | parser.add_argument(
109 | "-c",
110 | "--camera_to_use",
111 | type=int,
112 | help="specify camera to use",
113 | default=0)
114 | parser.add_argument(
115 | "-r",
116 | "--rescale",
117 | type=float,
118 | help="rescale image by this factor",
119 | default=1.0)
120 | parser.add_argument(
121 | "-fs",
122 | "--fullscreen",
123 | action='store_true',
124 | help="run in full screen mode")
125 | parser.add_argument(
126 | "-use",
127 | "--target",
128 | type=str,
129 | choices=['cpu', 'gpu', 'opencl'],
130 | help="select computational backend",
131 | default='gpu')
132 | parser.add_argument(
133 | 'video_file',
134 | metavar='video_file',
135 | type=str,
136 | nargs='?',
137 | help='specify optional video file')
138 | args = parser.parse_args()
139 |
140 | ##########################################################################
141 |
142 | # define video capture object
143 |
144 | try:
145 | # to use a non-buffered camera stream (via a separate thread)
146 |
147 | if not (args.video_file):
148 | import camera_stream
149 | cap = camera_stream.CameraVideoStream()
150 | else:
151 | cap = cv2.VideoCapture() # not needed for video files
152 |
153 | except BaseException:
154 | # if not then just use OpenCV default
155 |
156 | print("INFO: camera_stream class not found - camera input may be buffered")
157 | cap = cv2.VideoCapture()
158 |
159 | # define display window name
160 |
161 | window_name = "FCN Semantic Image Segmentation" # window name
162 |
163 | ##########################################################################
164 |
165 | # Load names of class labels (background = class 0, for PASCAL VOC)
166 |
167 | classes = None
168 | with open("object_detection_classes_pascal_voc.txt", 'rt') as f:
169 | classes = f.read().rstrip('\n').split('\n')
170 | classes.insert(0, "background") # insery a background class as 0
171 |
172 | ##########################################################################
173 |
174 | # Load CNN model
175 |
176 | net = cv2.dnn.readNet(
177 | "fcn8s-heavy-pascal.caffemodel",
178 | "fcn8s-heavy-pascal.prototxt",
179 | 'caffe')
180 |
181 | # set up compute target as one of [GPU, OpenCL, CPU]
182 |
183 | if (args.target == 'gpu'):
184 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
185 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
186 | elif (args.target == 'opencl'):
187 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
188 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
189 | else:
190 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
191 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
192 |
193 | ##########################################################################
194 |
195 | # if command line arguments are provided try to read video_name
196 | # otherwise default to capture from attached camera
197 |
198 | if (((args.video_file) and (cap.open(str(args.video_file))))
199 | or (cap.open(args.camera_to_use))):
200 |
201 | # create window by name (as resizable)
202 |
203 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
204 |
205 | while (keep_processing):
206 |
207 | # start a timer (to see how long processing and display takes)
208 |
209 | start_t = cv2.getTickCount()
210 |
211 | # if camera /video file successfully open then read frame
212 |
213 | if (cap.isOpened):
214 | ret, frame = cap.read()
215 |
216 | # when we reach the end of the video (file) exit cleanly
217 |
218 | if (ret == 0):
219 | keep_processing = False
220 | continue
221 |
222 | # rescale if specified
223 |
224 | if (args.rescale != 1.0):
225 | frame = cv2.resize(
226 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
227 |
228 | frameHeight = frame.shape[0]
229 | frameWidth = frame.shape[1]
230 |
231 | #######################################################################
232 | # FCN Segmentation:
233 | # model: "fcn8s-heavy-pascal.caffemodel"
234 | # config: "fcn8s-heavy-pascal.prototxt"
235 | # mean: [0, 0, 0]
236 | # scale: 1.0
237 | # width: 500
238 | # height: 500
239 | # rgb: false
240 | #
241 | # classes: object_detection_classes_pascal_voc.txt
242 | #######################################################################
243 |
244 | # create a 4D tensor "blob" from a frame.
245 |
246 | blob = cv2.dnn.blobFromImage(
247 | frame, scalefactor=1.0,
248 | size=(500, 500), mean=[0, 0, 0],
249 | swapRB=False, crop=False
250 | )
251 |
252 | # Run forward inference on the model
253 |
254 | net.setInput(blob)
255 | result = net.forward()
256 |
257 | numClasses = result.shape[1]
258 | height = result.shape[2]
259 | width = result.shape[3]
260 |
261 | # define colours
262 |
263 | if not colors:
264 | np.random.seed(888)
265 | colors = [np.array([0, 0, 0], np.uint8)]
266 | for i in range(1, numClasses + 1):
267 | colors.append((colors[i - 1] +
268 | np.random.randint(0, 256, [3],
269 | np.uint8)) / 2
270 | )
271 | del colors[0]
272 |
273 | # generate legend
274 | legend = generate_legend(classes, frameHeight)
275 |
276 | # display segmentation
277 |
278 | classIds = np.argmax(result[0], axis=0)
279 | segm = np.stack([colors[idx] for idx in classIds.flatten()])
280 | segm = segm.reshape(height, width, 3)
281 |
282 | segm = cv2.resize(segm, (frameWidth, frameHeight),
283 | interpolation=cv2.INTER_NEAREST)
284 |
285 | # stop the timer and convert to ms. (to see how long processing and
286 | # display takes)
287 |
288 | stop_t = ((cv2.getTickCount() - start_t) /
289 | cv2.getTickFrequency()) * 1000
290 |
291 | # Display efficiency information
292 |
293 | label = ('Inference time: %.2f ms' % stop_t) + \
294 | (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')'
295 | cv2.putText(frame, label, (0, 15),
296 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
297 |
298 | # display image(s) as concatenated single image
299 |
300 | cv2.imshow(window_name,
301 | h_concatenate(h_concatenate(frame, segm.astype(np.uint8)),
302 | legend))
303 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
304 | cv2.WINDOW_FULLSCREEN & args.fullscreen)
305 |
306 | # start the event loop - essential
307 |
308 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
309 | # 25 fps = 40 ms)
310 |
311 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
312 |
313 | # if user presses "x" then exit / press "f" for fullscreen display
314 |
315 | if (key == ord('x')):
316 | keep_processing = False
317 | elif (key == ord('f')):
318 | args.fullscreen = not (args.fullscreen)
319 |
320 | # close all windows
321 |
322 | cv2.destroyAllWindows()
323 |
324 | else:
325 | print("No video file specified or camera connected.")
326 |
327 | ##########################################################################
328 |
--------------------------------------------------------------------------------
/gaussian.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : gaussian smoothing for a a video file specified on the
4 | # command line (e.g. python FILE.py video_file) or from an
5 | # attached web camera with selectable opencl acceleration
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2021 Dept Computer Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | #####################################################################
14 |
15 | import cv2
16 | import argparse
17 | import sys
18 | import math
19 |
20 | #####################################################################
21 |
22 | keep_processing = True
23 |
24 | # parse command line arguments for camera ID or video file
25 |
26 | parser = argparse.ArgumentParser(
27 | description='Perform ' +
28 | sys.argv[0] +
29 | ' example operation on incoming camera/video image')
30 | parser.add_argument(
31 | "-c",
32 | "--camera_to_use",
33 | type=int,
34 | help="specify camera to use",
35 | default=0)
36 | parser.add_argument(
37 | "-r",
38 | "--rescale",
39 | type=float,
40 | help="rescale image by this factor",
41 | default=1.0)
42 | parser.add_argument(
43 | "-s",
44 | "--set_resolution",
45 | type=int,
46 | nargs=2,
47 | help='override default camera resolution as H W')
48 | parser.add_argument(
49 | "-ocl",
50 | "--opencl",
51 | action='store_true',
52 | help="enable opencl hardware acceleration")
53 | parser.add_argument(
54 | 'video_file',
55 | metavar='video_file',
56 | type=str,
57 | nargs='?',
58 | help='specify optional video file')
59 |
60 | args = parser.parse_args()
61 |
62 | #####################################################################
63 |
64 | # this function is called as a call-back everytime the trackbar is moved
65 | # (here we just do nothing)
66 |
67 |
68 | def nothing(x):
69 | pass
70 |
71 |
72 | #####################################################################
73 |
74 | # define video capture object
75 |
76 | try:
77 | # to use a non-buffered camera stream (via a separate thread)
78 |
79 | if not (args.video_file):
80 | import camera_stream
81 | cap = camera_stream.CameraVideoStream(use_tapi=args.opencl)
82 | else:
83 | cap = cv2.VideoCapture() # not needed for video files
84 |
85 | except BaseException:
86 | # if not then just use OpenCV default
87 |
88 | print("INFO: camera_stream class not found - camera input may be buffered")
89 | cap = cv2.VideoCapture()
90 |
91 | # define display window name
92 |
93 | window_name = "Live Camera Input" # window name
94 | window_name2 = "Gaussian Smoothing" # window name
95 |
96 | # setup OpenCL if specified on command line only
97 |
98 | cv2.ocl.setUseOpenCL(args.opencl)
99 |
100 | # if command line arguments are provided try to read video_name
101 | # otherwise default to capture from attached H/W camera
102 |
103 | if (((args.video_file) and (cap.open(str(args.video_file))))
104 | or (cap.open(args.camera_to_use))):
105 |
106 | # create window by name (as resizable)
107 |
108 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
109 | cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL)
110 |
111 | # add some track bar controllers for settings
112 |
113 | smoothing_neighbourhood = 3
114 | cv2.createTrackbar(
115 | "kernel size",
116 | window_name2,
117 | smoothing_neighbourhood,
118 | 250,
119 | nothing)
120 |
121 | # override default camera resolution
122 |
123 | if (args.set_resolution is not None):
124 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
125 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
126 |
127 | print("INFO: input resolution : (",
128 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
129 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
130 |
131 | while (keep_processing):
132 |
133 | # start a timer (to see how long processing and display takes)
134 |
135 | start_t = cv2.getTickCount()
136 |
137 | # if video file successfully open then read frame from video
138 |
139 | if (cap.isOpened):
140 | ret, frame = cap.read() # rescale if specified
141 |
142 | # when we reach the end of the video (file) exit cleanly
143 |
144 | if (ret == 0):
145 | keep_processing = False
146 | continue
147 |
148 | # rescale if specified
149 |
150 | if (args.rescale != 1.0):
151 | frame = cv2.resize(
152 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
153 |
154 | # get parameters from track bars
155 |
156 | smoothing_neighbourhood = cv2.getTrackbarPos("kernel size",
157 | window_name2)
158 |
159 | # check neighbourhood is greater than 3 and odd
160 |
161 | smoothing_neighbourhood = max(3, smoothing_neighbourhood)
162 | if not (smoothing_neighbourhood % 2):
163 | smoothing_neighbourhood = smoothing_neighbourhood + 1
164 |
165 | # performing smoothing on the image using a 5x5 smoothing mark (see
166 | # manual entry for GaussianBlur())
167 |
168 | smoothed = cv2.GaussianBlur(frame, (smoothing_neighbourhood,
169 | smoothing_neighbourhood), 0)
170 |
171 | # stop the timer and convert to ms. (to see how long processing and
172 | # display takes)
173 |
174 | stop_t = ((cv2.getTickCount() - start_t) /
175 | cv2.getTickFrequency()) * 1000
176 |
177 | label = ('Processing time: %.2f ms' % stop_t) + \
178 | (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')'
179 | cv2.putText(smoothed, label, (0, 15),
180 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
181 |
182 | # display image
183 |
184 | cv2.imshow(window_name, frame)
185 | cv2.imshow(window_name2, smoothed)
186 |
187 | # start the event loop - essential
188 |
189 | # cv2.waitKey() is a keyboard binding function (argument is the time in
190 | # milliseconds). It waits for specified milliseconds for any keyboard
191 | # event. If you press any key in that time, the program continues.
192 | # If 0 is passed, it waits indefinitely for a key stroke.
193 | # (bitwise and with 0xFF to extract least significant byte of
194 | # multi-byte response)
195 |
196 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
197 | # 25 fps = 40 ms)
198 |
199 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
200 |
201 | # It can also be set to detect specific key strokes by recording which
202 | # key is pressed
203 |
204 | # e.g. if user presses "x" then exit / press "f" for fullscreen
205 | # display
206 |
207 | if (key == ord('x')):
208 | keep_processing = False
209 | elif (key == ord('f')):
210 | cv2.setWindowProperty(
211 | window_name2,
212 | cv2.WND_PROP_FULLSCREEN,
213 | cv2.WINDOW_FULLSCREEN)
214 |
215 | # close all windows
216 |
217 | cv2.destroyAllWindows()
218 |
219 | else:
220 | print("No video file specified or camera connected.")
221 |
222 | #####################################################################
223 |
--------------------------------------------------------------------------------
/generic_interface.py:
--------------------------------------------------------------------------------
1 | ##########################################################################
2 |
3 | # Example : perform generic live display from a video file
4 | # specified on the command line (e.g. python FILE.py video_file) or from an
5 | # attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2015 - 2018 Toby Breckon, Engineering & Computing Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | ##########################################################################
14 |
15 | import cv2
16 | import argparse
17 | import sys
18 | import math
19 |
20 | ##########################################################################
21 |
22 | keep_processing = True
23 |
24 | # parse command line arguments for camera ID or video file
25 |
26 | parser = argparse.ArgumentParser(
27 | description='Perform ' +
28 | sys.argv[0] +
29 | ' example operation on incoming camera/video image')
30 | parser.add_argument(
31 | "-c",
32 | "--camera_to_use",
33 | type=int,
34 | help="specify camera to use",
35 | default=0)
36 | parser.add_argument(
37 | "-r",
38 | "--rescale",
39 | type=float,
40 | help="rescale image by this factor",
41 | default=1.0)
42 | parser.add_argument(
43 | "-fs",
44 | "--fullscreen",
45 | action='store_true',
46 | help="run in full screen mode")
47 | parser.add_argument(
48 | 'video_file',
49 | metavar='video_file',
50 | type=str,
51 | nargs='?',
52 | help='specify optional video file')
53 | args = parser.parse_args()
54 |
55 | ##########################################################################
56 |
57 | # define video capture object
58 |
59 | try:
60 | # to use a non-buffered camera stream (via a separate thread)
61 |
62 | if not (args.video_file):
63 | import camera_stream
64 | cap = camera_stream.CameraVideoStream()
65 | else:
66 | cap = cv2.VideoCapture() # not needed for video files
67 |
68 | except BaseException:
69 | # if not then just use OpenCV default
70 |
71 | print("INFO: camera_stream class not found - camera input may be buffered")
72 | cap = cv2.VideoCapture()
73 |
74 | # define display window name
75 |
76 | window_name = "Live Camera Input" # window name
77 |
78 | # if command line arguments are provided try to read video_name
79 | # otherwise default to capture from attached camera
80 |
81 | if (((args.video_file) and (cap.open(str(args.video_file))))
82 | or (cap.open(args.camera_to_use))):
83 |
84 | # create window by name (as resizable)
85 |
86 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
87 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
88 | cv2.WINDOW_FULLSCREEN & args.fullscreen)
89 |
90 | while (keep_processing):
91 |
92 | # start a timer (to see how long processing and display takes)
93 |
94 | start_t = cv2.getTickCount()
95 |
96 | # if camera /video file successfully open then read frame
97 |
98 | if (cap.isOpened):
99 | ret, frame = cap.read()
100 |
101 | # when we reach the end of the video (file) exit cleanly
102 |
103 | if (ret == 0):
104 | keep_processing = False
105 | continue
106 |
107 | # rescale if specified
108 |
109 | if (args.rescale != 1.0):
110 | frame = cv2.resize(
111 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
112 |
113 | # ***
114 | # *** do any processing here ****
115 | # ***
116 |
117 | # display image
118 |
119 | cv2.imshow(window_name, frame)
120 |
121 | # stop the timer and convert to ms. (to see how long processing and
122 | # display takes)
123 |
124 | stop_t = ((cv2.getTickCount() - start_t) /
125 | cv2.getTickFrequency()) * 1000
126 |
127 | # start the event loop - essential
128 |
129 | # cv2.waitKey() is a keyboard binding function (argument is the time in
130 | # milliseconds). It waits for specified milliseconds for any keyboard
131 | # event. If you press any key in that time, the program continues.
132 | # If 0 is passed, it waits indefinitely for a key stroke.
133 | # (bitwise and with 0xFF to extract least significant byte of
134 | # multi-byte response)
135 |
136 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
137 | # 25 fps = 40 ms)
138 |
139 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
140 |
141 | # It can also be set to detect specific key strokes by recording which
142 | # key is pressed
143 |
144 | # e.g. if user presses "x" then exit / press "f" for fullscreen
145 | # display
146 |
147 | if (key == ord('x')):
148 | keep_processing = False
149 | elif (key == ord('f')):
150 | args.fullscreen = not (args.fullscreen)
151 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
152 | cv2.WINDOW_FULLSCREEN & args.fullscreen)
153 |
154 | # close all windows
155 |
156 | cv2.destroyAllWindows()
157 |
158 | else:
159 | print("No video file specified or camera connected.")
160 |
161 | ##########################################################################
162 |
--------------------------------------------------------------------------------
/gradient_orientation.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : perform generic live display of gradient orientations
4 | # (which form the essensce of the Histogram of Oriented Gradient (HOG) feature)
5 | # from a video file specified on the command line
6 | # (e.g. python FILE.py video_file) or from an attached web camera
7 |
8 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
9 |
10 | # https://www.learnopencv.com/histogram-of-oriented-gradients/
11 |
12 | # Copyright (c) 2018 Dept. Computer Science,
13 | # Durham University, UK
14 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
15 |
16 | #####################################################################
17 |
18 | import cv2
19 | import argparse
20 | import sys
21 | import math
22 | import numpy as np
23 |
24 | #####################################################################
25 |
26 | keep_processing = True
27 |
28 | # parse command line arguments for camera ID or video file
29 |
30 | parser = argparse.ArgumentParser(
31 | description='Perform ' +
32 | sys.argv[0] +
33 | ' example operation on incoming camera/video image')
34 | parser.add_argument(
35 | "-c",
36 | "--camera_to_use",
37 | type=int,
38 | help="specify camera to use",
39 | default=0)
40 | parser.add_argument(
41 | "-r",
42 | "--rescale",
43 | type=float,
44 | help="rescale image by this factor",
45 | default=1.0)
46 | parser.add_argument(
47 | 'video_file',
48 | metavar='video_file',
49 | type=str,
50 | nargs='?',
51 | help='specify optional video file')
52 | args = parser.parse_args()
53 |
54 | #####################################################################
55 |
56 | # this function is called as a call-back everytime the trackbar is moved
57 | # (here we just do nothing)
58 |
59 |
60 | def nothing(x):
61 | pass
62 |
63 |
64 | #####################################################################
65 |
66 | # define video capture object
67 |
68 | try:
69 | # to use a non-buffered camera stream (via a separate thread)
70 |
71 | if not (args.video_file):
72 | import camera_stream
73 | cap = camera_stream.CameraVideoStream()
74 | else:
75 | cap = cv2.VideoCapture() # not needed for video files
76 |
77 | except BaseException:
78 | # if not then just use OpenCV default
79 |
80 | print("INFO: camera_stream class not found - camera input may be buffered")
81 | cap = cv2.VideoCapture()
82 |
83 | # define display window names
84 |
85 | window_nameGx = "Gradient - Gx" # window name
86 | window_nameGy = "Gradient - Gy" # window name
87 | window_nameAngle = "Gradient Angle" # window name
88 |
89 | # if command line arguments are provided try to read video_name
90 | # otherwise default to capture from attached camera
91 |
92 | if (((args.video_file) and (cap.open(str(args.video_file))))
93 | or (cap.open(args.camera_to_use))):
94 |
95 | # create window by name (as resizable)
96 |
97 | cv2.namedWindow(window_nameGx, cv2.WINDOW_NORMAL)
98 | cv2.namedWindow(window_nameGy, cv2.WINDOW_NORMAL)
99 | cv2.namedWindow(window_nameAngle, cv2.WINDOW_NORMAL)
100 |
101 | # add some track bar controllers for settings
102 |
103 | lower_threshold = 0
104 | cv2.createTrackbar(
105 | "lower",
106 | window_nameAngle,
107 | lower_threshold,
108 | 180,
109 | nothing)
110 |
111 | upper_threshold = 180
112 | cv2.createTrackbar(
113 | "upper",
114 | window_nameAngle,
115 | upper_threshold,
116 | 180,
117 | nothing)
118 |
119 | neighbourhood = 3
120 | cv2.createTrackbar(
121 | "neighbourhood, N",
122 | window_nameGy,
123 | neighbourhood,
124 | 40,
125 | nothing)
126 |
127 | sigma = 1
128 | cv2.createTrackbar(
129 | "sigma",
130 | window_nameGy,
131 | sigma,
132 | 10,
133 | nothing)
134 |
135 | while (keep_processing):
136 |
137 | # start a timer (to see how long processing and display takes)
138 |
139 | start_t = cv2.getTickCount()
140 |
141 | # if video file successfully open then read frame from video
142 |
143 | if (cap.isOpened):
144 | ret, frame = cap.read()
145 |
146 | # when we reach the end of the video (file) exit cleanly
147 |
148 | if (ret == 0):
149 | keep_processing = False
150 | continue
151 |
152 | # rescale if specified
153 |
154 | if (args.rescale != 1.0):
155 | frame = cv2.resize(
156 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
157 |
158 | # get parameter from track bars - Gaussian pre-smoothing
159 |
160 | neighbourhood = cv2.getTrackbarPos("neighbourhood, N", window_nameGy)
161 | sigma = cv2.getTrackbarPos("sigma", window_nameGy)
162 |
163 | # check neighbourhood is greater than 3 and odd
164 |
165 | neighbourhood = max(3, neighbourhood)
166 | if not (neighbourhood % 2):
167 | neighbourhood = neighbourhood + 1
168 |
169 | # perform Gaussian smoothing using NxN neighbourhood
170 |
171 | smoothed_img = cv2.GaussianBlur(
172 | frame,
173 | (neighbourhood,
174 | neighbourhood),
175 | sigma,
176 | sigma,
177 | borderType=cv2.BORDER_REPLICATE)
178 |
179 | # compute the gradients in the x and y directions separately
180 | # N.B from here onward these images are 32-bit float
181 |
182 | gx = cv2.Sobel(smoothed_img, cv2.CV_32F, 1, 0)
183 | gy = cv2.Sobel(smoothed_img, cv2.CV_32F, 0, 1)
184 |
185 | # calculate gradient magnitude and direction (in degrees)
186 |
187 | mag, angle = cv2.cartToPolar(gx, gy, angleInDegrees=True)
188 |
189 | # normalize
190 |
191 | gx = np.abs(gx)
192 | gy = np.abs(gy)
193 | angle = np.abs(angle)
194 |
195 | # normalize other values 0 -> 180
196 |
197 | gx = cv2.normalize(gx, None, 0, 255, cv2.NORM_MINMAX)
198 | gy = cv2.normalize(gy, None, 0, 255, cv2.NORM_MINMAX)
199 | angle = cv2.normalize(angle, None, 0, 180, cv2.NORM_MINMAX)
200 |
201 | # for the angle take the max across all three channels
202 |
203 | (aB, aG, aR) = cv2.split(angle)
204 | angle = np.maximum(np.maximum(aR, aG), aB)
205 |
206 | # get threshold from trackbars and threshold to keep inner range
207 |
208 | lower_threshold = cv2.getTrackbarPos("lower", window_nameAngle)
209 | upper_threshold = cv2.getTrackbarPos("upper", window_nameAngle)
210 |
211 | mask = cv2.inRange(angle, lower_threshold, upper_threshold)
212 | angle = cv2.bitwise_and(angle.astype(np.uint8), mask)
213 |
214 | # display images (as 8-bit)
215 |
216 | cv2.imshow(window_nameGx, gx.astype(np.uint8))
217 | cv2.imshow(window_nameGy, gy.astype(np.uint8))
218 | cv2.imshow(window_nameAngle, angle.astype(np.uint8))
219 |
220 | # stop the timer and convert to ms. (to see how long processing and
221 | # display takes)
222 |
223 | stop_t = ((cv2.getTickCount() - start_t) /
224 | cv2.getTickFrequency()) * 1000
225 |
226 | # start the event loop - essential
227 |
228 | # cv2.waitKey() is a keyboard binding function (argument is the time in
229 | # milliseconds). It waits for specified milliseconds for any keyboard
230 | # event. If you press any key in that time, the program continues.
231 | # If 0 is passed, it waits indefinitely for a key stroke.
232 | # (bitwise and with 0xFF to extract least significant byte of
233 | # multi-byte response)
234 |
235 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
236 | # 25 fps = 40 ms)
237 |
238 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
239 |
240 | # It can also be set to detect specific key strokes by recording which
241 | # key is pressed
242 |
243 | # e.g. if user presses "x" then exit / press "f" for fullscreen
244 | # display
245 |
246 | if (key == ord('x')):
247 | keep_processing = False
248 | elif (key == ord('f')):
249 | cv2.setWindowProperty(
250 | window_nameAngle,
251 | cv2.WND_PROP_FULLSCREEN,
252 | cv2.WINDOW_FULLSCREEN)
253 |
254 | # close all windows
255 |
256 | cv2.destroyAllWindows()
257 |
258 | else:
259 | print("No video file specified or camera connected.")
260 |
261 | #####################################################################
262 |
--------------------------------------------------------------------------------
/haar_cascade_detection.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : perform haar cascade detection on live display from a video file
4 | # specified on the command line (e.g. python FILE.py video_file) or from an
5 | # attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2015 / 2016 School of Engineering & Computing Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | # based on example at:
14 | # http://docs.opencv.org/3.1.0/d7/d8b/tutorial_py_face_detection.html#gsc.tab=0
15 |
16 | # get trained cascade files from:
17 | # https://github.com/opencv/opencv/tree/master/data/haarcascades
18 |
19 | #####################################################################
20 |
21 | import cv2
22 | import argparse
23 | import sys
24 | import os
25 | import math
26 |
27 | #####################################################################
28 |
29 | keep_processing = True
30 | faces_recorded = 0
31 |
32 | # parse command line arguments for camera ID or video file
33 |
34 | parser = argparse.ArgumentParser(
35 | description='Perform ' +
36 | sys.argv[0] +
37 | ' example operation on incoming camera/video image')
38 | parser.add_argument(
39 | "-c",
40 | "--camera_to_use",
41 | type=int,
42 | help="specify camera to use",
43 | default=0)
44 | parser.add_argument(
45 | "-r",
46 | "--rescale",
47 | type=float,
48 | help="rescale image by this factor",
49 | default=1.0)
50 | parser.add_argument(
51 | "-ha",
52 | "--harvest",
53 | type=str,
54 | help="path to save detected faces to",
55 | default='')
56 | parser.add_argument(
57 | 'video_file',
58 | metavar='video_file',
59 | type=str,
60 | nargs='?',
61 | help='specify optional video file')
62 | args = parser.parse_args()
63 |
64 | #####################################################################
65 | # set up directory to save faces to if specified
66 |
67 | if (len(args.harvest) > 0):
68 | try:
69 | os.mkdir(args.harvest)
70 | except OSError:
71 | print("Harvesting to existing directory: " + args.harvest)
72 |
73 | #####################################################################
74 |
75 | # define video capture object
76 |
77 | try:
78 | # to use a non-buffered camera stream (via a separate thread)
79 |
80 | if not (args.video_file):
81 | import camera_stream
82 | cap = camera_stream.CameraVideoStream()
83 | else:
84 | cap = cv2.VideoCapture() # not needed for video files
85 |
86 | except BaseException:
87 | # if not then just use OpenCV default
88 |
89 | print("INFO: camera_stream class not found - camera input may be buffered")
90 | cap = cv2.VideoCapture()
91 |
92 | # define display window name
93 |
94 | window_name = "Face Detection using Haar Cascades" # window name
95 |
96 | # define haar cascade objects
97 |
98 | # required cascade classifier files (and many others) available from:
99 | # https://github.com/opencv/opencv/tree/master/data/haarcascades
100 |
101 | face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
102 | eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')
103 |
104 | if (face_cascade.empty() or eye_cascade.empty()):
105 | print("Failed to load cascade from file.")
106 |
107 | # if command line arguments are provided try to read video_name
108 | # otherwise default to capture from attached H/W camera
109 |
110 | if (((args.video_file) and (cap.open(str(args.video_file))))
111 | or (cap.open(args.camera_to_use))):
112 |
113 | # create window by name (as resizable)
114 |
115 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
116 |
117 | while (keep_processing):
118 |
119 | # if video file successfully open then read frame from video
120 |
121 | if (cap.isOpened):
122 | ret, frame = cap.read()
123 |
124 | # rescale if specified
125 |
126 | if (args.rescale != 1.0):
127 | frame = cv2.resize(
128 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
129 |
130 | # start a timer (to see how long processing and display takes)
131 |
132 | start_t = cv2.getTickCount()
133 |
134 | # convert to grayscale
135 |
136 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
137 |
138 | # detect faces using haar cascade trained on faces
139 |
140 | faces = face_cascade.detectMultiScale(
141 | gray, scaleFactor=1.2, minNeighbors=4, minSize=(
142 | 30, 30), flags=cv2.CASCADE_DO_CANNY_PRUNING)
143 |
144 | # for each detected face, try to detect eyes inside the top
145 | # half of the face region face region
146 |
147 | for (x, y, w, h) in faces:
148 |
149 | # extract regions of interest (roi) and draw each face bounding box
150 | # and
151 |
152 | # top 50% to detect eyes
153 | roi_gray = gray[y:y + math.floor(h * 0.5), x:x + w]
154 | # copy to save if required
155 | roi_color = frame[y:y + h, x:x + w].copy()
156 |
157 | cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
158 |
159 | # detect eyes using haar cascade trained on eyes
160 |
161 | eyes = eye_cascade.detectMultiScale(roi_gray)
162 |
163 | # for each detected eye, draw bounding box
164 |
165 | for (ex, ey, ew, eh) in eyes:
166 | cv2.rectangle(frame, (x + ex, y + ey),
167 | (x + ex + ew, y + ey + eh), (0, 255, 0), 2)
168 |
169 | # if specified, record all the faces we see to a specified
170 | # directory
171 |
172 | if (len(args.harvest) > 0):
173 | filename = os.path.join(
174 | args.harvest, "face_" +
175 | str(format(faces_recorded, '04')) + ".png")
176 | cv2.imwrite(filename, roi_color)
177 | faces_recorded += 1
178 |
179 | # display image
180 |
181 | cv2.imshow(window_name, frame)
182 |
183 | # stop the timer and convert to ms. (to see how long processing and
184 | # display takes)
185 |
186 | stop_t = ((cv2.getTickCount() - start_t) /
187 | cv2.getTickFrequency()) * 1000
188 |
189 | # start the event loop - essential
190 |
191 | # cv2.waitKey() is a keyboard binding function (argument is the time in
192 | # ms.) It waits for specified milliseconds for any keyboard event.
193 | # If you press any key in that time, the program continues.
194 | # If 0 is passed, it waits indefinitely for a key stroke.
195 | # (bitwise and with 0xFF to extract least significant byte of
196 | # multi-byte response) here we use a wait time in ms. that takes
197 | # account of processing time already used in the loop
198 |
199 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
200 | # 25 fps = 40 ms)
201 |
202 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
203 |
204 | # It can also be set to detect specific key strokes by recording which
205 | # key is pressed
206 |
207 | # e.g. if user presses "x" then exit / press "f" for fullscreen
208 | # display
209 |
210 | if (key == ord('x')):
211 | keep_processing = False
212 | elif (key == ord('f')):
213 | cv2.setWindowProperty(
214 | window_name,
215 | cv2.WND_PROP_FULLSCREEN,
216 | cv2.WINDOW_FULLSCREEN)
217 |
218 | # close all windows
219 |
220 | cv2.destroyAllWindows()
221 |
222 | else:
223 | print("No video file specified or camera connected.")
224 |
--------------------------------------------------------------------------------
/harris.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : harris feature points from a video file
4 | # specified on the command line (e.g. python FILE.py video_file) or from an
5 | # attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2015-2024 Engineering & Computing Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | #####################################################################
14 |
15 | import cv2
16 | import argparse
17 | import sys
18 | import numpy as np
19 |
20 | #####################################################################
21 |
22 | keep_processing = True
23 |
24 | # parse command line arguments for camera ID or video file
25 |
26 | parser = argparse.ArgumentParser(
27 | description='Perform ' +
28 | sys.argv[0] +
29 | ' example operation on incoming camera/video image')
30 | parser.add_argument(
31 | "-c",
32 | "--camera_to_use",
33 | type=int,
34 | help="specify camera to use",
35 | default=0)
36 | parser.add_argument(
37 | "-r",
38 | "--rescale",
39 | type=float,
40 | help="rescale image by this factor",
41 | default=1.0)
42 | parser.add_argument(
43 | "-s",
44 | "--set_resolution",
45 | type=int,
46 | nargs=2,
47 | help='override default camera resolution as H W')
48 | parser.add_argument(
49 | 'video_file',
50 | metavar='video_file',
51 | type=str,
52 | nargs='?',
53 | help='specify optional video file')
54 | args = parser.parse_args()
55 |
56 | #####################################################################
57 |
58 | # this function is called as a call-back everytime the trackbar is moved
59 | # (here we just do nothing)
60 |
61 |
62 | def nothing(x):
63 | pass
64 |
65 | #####################################################################
66 |
67 | # define video capture object
68 |
69 |
70 | try:
71 | # to use a non-buffered camera stream (via a separate thread)
72 |
73 | if not (args.video_file):
74 | import camera_stream
75 | cap = camera_stream.CameraVideoStream() # T-API breaks code
76 | else:
77 | cap = cv2.VideoCapture() # not needed for video files
78 |
79 | except BaseException:
80 | # if not then just use OpenCV default
81 |
82 | print("INFO: camera_stream class not found - camera input may be buffered")
83 | cap = cv2.VideoCapture()
84 |
85 | # define display window name
86 |
87 | window_name = "Live Camera Input" # window name
88 |
89 | # if command line arguments are provided try to read video_name
90 | # otherwise default to capture from attached H/W camera
91 |
92 | if (((args.video_file) and (cap.open(str(args.video_file))))
93 | or (cap.open(args.camera_to_use))):
94 |
95 | # create window by name (as resizable)
96 |
97 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
98 |
99 | # add some track bar controllers for settings
100 |
101 | neighbourhood = 3
102 | cv2.createTrackbar(
103 | "neighbourhood, N",
104 | window_name,
105 | neighbourhood,
106 | 15,
107 | nothing)
108 |
109 | # override default camera resolution
110 |
111 | if (args.set_resolution is not None):
112 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
113 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
114 |
115 | print("INFO: input resolution : (",
116 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
117 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
118 |
119 | while (keep_processing):
120 |
121 | # if video file successfully open then read frame from video
122 |
123 | if (cap.isOpened):
124 | ret, frame = cap.read()
125 |
126 | # when we reach the end of the video (file) exit cleanly
127 |
128 | if (ret == 0):
129 | keep_processing = False
130 | continue
131 |
132 | # rescale if specified
133 |
134 | if (args.rescale != 1.0):
135 | frame = cv2.resize(
136 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
137 |
138 | # convert to single channel grayscale image
139 | # with 32-bit float representation per pixel
140 |
141 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
142 | gray = np.float32(gray)
143 |
144 | # get parameters from track bars
145 |
146 | neighbourhood = cv2.getTrackbarPos("neighbourhood, N", window_name)
147 |
148 | # check neighbourhood is greater than 3 and odd
149 |
150 | neighbourhood = max(3, neighbourhood)
151 | if not (neighbourhood % 2):
152 | neighbourhood = neighbourhood + 1
153 |
154 | # find harris corners (via the good features to track function)
155 |
156 | corners = cv2.goodFeaturesToTrack(
157 | gray,
158 | maxCorners=500,
159 | qualityLevel=0.01,
160 | minDistance=10,
161 | blockSize=neighbourhood,
162 | useHarrisDetector=True,
163 | k=0.01)
164 | corners = np.intp(corners)
165 |
166 | for i in corners:
167 | x, y = i.ravel()
168 | cv2.circle(frame, (x, y), 3, (0, 255, 0), -1)
169 |
170 | # alternatively get the raw harris eigenvalue response
171 |
172 | # dst = cv2.cornerHarris(gray,neighbourhood,neighbourhood, 0.01)
173 |
174 | # Threshold for an optimal value, it may vary depending on the image.
175 |
176 | # frame[dst>0.005*dst.max()]=[0,255,0]
177 |
178 | # display image
179 |
180 | cv2.imshow(window_name, frame)
181 |
182 | # start the event loop - essential
183 |
184 | # cv2.waitKey() is a keyboard binding function (argument is the time in
185 | # milliseconds). It waits for specified milliseconds for any keyboard
186 | # event. If you press any key in that time, the program continues.
187 | # If 0 is passed, it waits indefinitely for a key stroke.
188 | # (bitwise and with 0xFF to extract least significant byte of
189 | # multi-byte response)
190 |
191 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
192 |
193 | key = cv2.waitKey(40) & 0xFF
194 |
195 | # It can also be set to detect specific key strokes by recording which
196 | # key is pressed
197 |
198 | # e.g. if user presses "x" then exit / press "f" for fullscreen
199 | # display
200 |
201 | if (key == ord('x')):
202 | keep_processing = False
203 | elif (key == ord('f')):
204 | cv2.setWindowProperty(
205 | window_name,
206 | cv2.WND_PROP_FULLSCREEN,
207 | cv2.WINDOW_FULLSCREEN)
208 |
209 | # close all windows
210 |
211 | cv2.destroyAllWindows()
212 |
213 | else:
214 | print("No video file specified or camera connected.")
215 |
216 | #####################################################################
217 |
--------------------------------------------------------------------------------
/hog.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : HOG pedestrain detection from a video file
4 | # specified on the command line (e.g. FILE.py video_file) or from an
5 | # attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2015 / 2016 School of Engineering & Computing Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | #####################################################################
14 |
15 | import cv2
16 | import argparse
17 | import sys
18 | import math
19 | import numpy as np
20 |
21 | #####################################################################
22 |
23 | keep_processing = True
24 |
25 | # parse command line arguments for camera ID or video file
26 |
27 | parser = argparse.ArgumentParser(
28 | description='Perform ' +
29 | sys.argv[0] +
30 | ' example operation on incoming camera/video image')
31 | parser.add_argument(
32 | "-c",
33 | "--camera_to_use",
34 | type=int,
35 | help="specify camera to use",
36 | default=0)
37 | parser.add_argument(
38 | "-r",
39 | "--rescale",
40 | type=float,
41 | help="rescale image by this factor",
42 | default=1.0)
43 | parser.add_argument(
44 | 'video_file',
45 | metavar='video_file',
46 | type=str,
47 | nargs='?',
48 | help='specify optional video file')
49 | args = parser.parse_args()
50 |
51 | #####################################################################
52 |
53 | # if we have OpenCL H/W acceleration availale, use it - we'll need it
54 |
55 | cv2.ocl.setUseOpenCL(True)
56 | print(
57 | "INFO: OpenCL - available: ",
58 | cv2.ocl.haveOpenCL(),
59 | " using: ",
60 | cv2.ocl.useOpenCL())
61 |
62 | #####################################################################
63 |
64 |
65 | def inside(r, q):
66 | rx, ry, rw, rh = r
67 | qx, qy, qw, qh = q
68 | return rx > qx and ry > qy and rx + rw < qx + qw and ry + rh < qy + qh
69 |
70 |
71 | def draw_detections(img, rects, thickness=1):
72 | for x, y, w, h in rects:
73 | # the HOG detector returns slightly larger rectangles than the
74 | # real objects so we slightly shrink the rectangles to
75 | # get a nicer output.
76 | pad_w, pad_h = int(0.15 * w), int(0.05 * h)
77 | cv2.rectangle(img, (x + pad_w, y + pad_h),
78 | (x + w - pad_w, y + h - pad_h), (0, 255, 0), thickness)
79 |
80 | #####################################################################
81 |
82 | # power law transform
83 | # image - colour image
84 | # gamma - "gradient" co-efficient of gamma function
85 |
86 |
87 | def powerlaw_transform(image, gamma):
88 |
89 | # compute power-law transform
90 | # remembering not defined for pixel = 0 (!)
91 |
92 | # handle any overflow in a quick and dirty way using 0-255 clipping
93 |
94 | image = np.clip(np.power(image, gamma), 0, 255).astype('uint8')
95 |
96 | return image
97 |
98 |
99 | #####################################################################
100 |
101 | # this function is called as a call-back everytime the trackbar is moved
102 | # (here we just do nothing)
103 |
104 | def nothing(x):
105 | pass
106 |
107 |
108 | #####################################################################
109 |
110 | # define video capture object
111 |
112 |
113 | try:
114 | # to use a non-buffered camera stream (via a separate thread)
115 |
116 | if not (args.video_file):
117 | import camera_stream
118 | cap = camera_stream.CameraVideoStream() # T-API done later
119 | else:
120 | cap = cv2.VideoCapture() # not needed for video files
121 |
122 | except BaseException:
123 | # if not then just use OpenCV default
124 |
125 | print("INFO: camera_stream class not found - camera input may be buffered")
126 | cap = cv2.VideoCapture()
127 |
128 | #####################################################################
129 |
130 | # define display window name
131 |
132 | window_name = "HOG pedestrain detection" # window name
133 |
134 | # if command line arguments are provided try to read video_name
135 | # otherwise default to capture from attached H/W camera
136 |
137 | if (((args.video_file) and (cap.open(str(args.video_file))))
138 | or (cap.open(args.camera_to_use))):
139 |
140 | # create window by name (as resizable)
141 |
142 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
143 |
144 | # set up HoG detector
145 |
146 | hog = cv2.HOGDescriptor()
147 | hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
148 |
149 | # add some track bar controllers for settings
150 |
151 | neighbourhood = 3
152 | cv2.createTrackbar("Smoothing : neighbourhood, N", window_name,
153 | neighbourhood, 40, nothing)
154 |
155 | sigma = 1
156 | cv2.createTrackbar("Smoothing : sigma", window_name, sigma, 10, nothing)
157 |
158 | gamma = 100 # default gamma = 100 * 0.01 = 1 -> no change
159 | cv2.createTrackbar("gamma, (* 0.01)", window_name, gamma, 150, nothing)
160 |
161 | svm_threshold = 0 # by default the SVM's own threshold at the hyperplane
162 | cv2.createTrackbar("SVM threshold, (distance from hyper-plane, * 0.1)",
163 | window_name, svm_threshold, 10, nothing)
164 |
165 | while (keep_processing):
166 |
167 | # if video file successfully open then read frame from video
168 |
169 | if (cap.isOpened):
170 | ret, frame = cap.read()
171 |
172 | # when we reach the end of the video (file) exit cleanly
173 |
174 | if (ret == 0):
175 | keep_processing = False
176 | continue
177 |
178 | # rescale if specified
179 |
180 | if (args.rescale != 1.0):
181 | frame = cv2.resize(
182 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
183 |
184 | # start a timer (to see how long processing and display takes)
185 |
186 | start_t = cv2.getTickCount()
187 |
188 | # get parameters from track bars
189 |
190 | neighbourhood = cv2.getTrackbarPos(
191 | "Smoothing : neighbourhood, N", window_name)
192 | sigma = cv2.getTrackbarPos("Smoothing : sigma", window_name)
193 | gamma = cv2.getTrackbarPos("gamma, (* 0.01)", window_name) * 0.01
194 | svm_threshold = cv2.getTrackbarPos(
195 | "SVM threshold, (distance from hyper-plane, * 0.1)",
196 | window_name) * 0.1
197 |
198 | # check neighbourhood is greater than 3 and odd
199 |
200 | neighbourhood = max(3, neighbourhood)
201 | if not (neighbourhood % 2):
202 | neighbourhood = neighbourhood + 1
203 |
204 | # use power-law function to perform gamma correction
205 | # and convert np array to T-API universal array for H/W acceleration
206 |
207 | frame = cv2.UMat(powerlaw_transform(frame, gamma))
208 |
209 | # perform Gaussian smoothing using NxN neighbourhood
210 |
211 | frame = cv2.GaussianBlur(
212 | frame,
213 | (neighbourhood,
214 | neighbourhood),
215 | sigma,
216 | sigma,
217 | borderType=cv2.BORDER_REPLICATE)
218 |
219 | # perform HOG based pedestrain detection
220 |
221 | found, w = hog.detectMultiScale(
222 | frame, winStride=(
223 | 8, 8), padding=(
224 | 32, 32), scale=1.05, hitThreshold=svm_threshold)
225 | found_filtered = []
226 |
227 | for ri, r in enumerate(found):
228 | for qi, q in enumerate(found):
229 | if ri != qi and inside(r, q):
230 | break
231 | else:
232 | found_filtered.append(r)
233 |
234 | draw_detections(frame, found_filtered, 3)
235 |
236 | # display image
237 |
238 | cv2.imshow(window_name, frame)
239 |
240 | # stop the timer and convert to ms. (to see how long processing and
241 | # display takes)
242 |
243 | stop_t = ((cv2.getTickCount() - start_t) /
244 | cv2.getTickFrequency()) * 1000
245 |
246 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
247 | # 25 fps = 40 ms)
248 |
249 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
250 |
251 | # e.g. if user presses "x" then exit / press "f" for fullscreen
252 | # display
253 |
254 | if (key == ord('x')):
255 | keep_processing = False
256 | elif (key == ord('f')):
257 | cv2.setWindowProperty(
258 | window_name,
259 | cv2.WND_PROP_FULLSCREEN,
260 | cv2.WINDOW_FULLSCREEN)
261 |
262 | # close all windows
263 |
264 | cv2.destroyAllWindows()
265 |
266 | else:
267 | print("No video file specified or camera connected.")
268 |
269 | #####################################################################
270 |
--------------------------------------------------------------------------------
/houghlines.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : hough line detection based on canny edge detection
4 | # for a a video file specified on the command line (e.g. python FILE.py
5 | # video_file) or from an attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2021 Dept. Computer Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | #####################################################################
14 |
15 | import cv2
16 | import argparse
17 | import sys
18 | import numpy as np
19 |
20 | #####################################################################
21 |
22 | keep_processing = True
23 | use_probablistic_hough = False
24 |
25 | # parse command line arguments for camera ID or video file
26 |
27 | parser = argparse.ArgumentParser(
28 | description='Perform ' +
29 | sys.argv[0] +
30 | ' example operation on incoming camera/video image')
31 | parser.add_argument(
32 | "-c",
33 | "--camera_to_use",
34 | type=int,
35 | help="specify camera to use",
36 | default=0)
37 | parser.add_argument(
38 | "-r",
39 | "--rescale",
40 | type=float,
41 | help="rescale image by this factor",
42 | default=1.0)
43 | parser.add_argument(
44 | 'video_file',
45 | metavar='video_file',
46 | type=str,
47 | nargs='?',
48 | help='specify optional video file')
49 | args = parser.parse_args()
50 |
51 | #####################################################################
52 |
53 | # this function is called as a call-back everytime the trackbar is moved
54 | # (here we just do nothing)
55 |
56 |
57 | def nothing(x):
58 | pass
59 |
60 |
61 | #####################################################################
62 |
63 | # define video capture object
64 |
65 | try:
66 | # to use a non-buffered camera stream (via a separate thread)
67 |
68 | if not (args.video_file):
69 | import camera_stream
70 | cap = camera_stream.CameraVideoStream()
71 | else:
72 | cap = cv2.VideoCapture() # not needed for video files
73 |
74 | except BaseException:
75 | # if not then just use OpenCV default
76 |
77 | print("INFO: camera_stream class not found - camera input may be buffered")
78 | cap = cv2.VideoCapture()
79 |
80 | # define display window name
81 |
82 | window_name = "Live Camera Input" # window name
83 | window_name2 = "Hough Lines" # window name
84 |
85 | # if command line arguments are provided try to read video_name
86 | # otherwise default to capture from attached H/W camera
87 |
88 | if (((args.video_file) and (cap.open(str(args.video_file))))
89 | or (cap.open(args.camera_to_use))):
90 |
91 | # create window by name (as resizable)
92 |
93 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
94 | cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL)
95 |
96 | # add some track bar controllers for settings
97 |
98 | lower_threshold = 25
99 | cv2.createTrackbar("lower", window_name2, lower_threshold, 255, nothing)
100 | upper_threshold = 120
101 | cv2.createTrackbar("upper", window_name2, upper_threshold, 255, nothing)
102 | smoothing_neighbourhood = 3
103 | cv2.createTrackbar(
104 | "smoothing",
105 | window_name2,
106 | smoothing_neighbourhood,
107 | 15,
108 | nothing)
109 | sobel_size = 3 # greater than 7 seems to crash
110 | cv2.createTrackbar("sobel size", window_name2, sobel_size, 7, nothing)
111 |
112 | while (keep_processing):
113 |
114 | # if video file successfully open then read frame from video
115 |
116 | if (cap.isOpened):
117 | ret, frame = cap.read() # rescale if specified
118 |
119 | # when we reach the end of the video (file) exit cleanly
120 |
121 | if (ret == 0):
122 | keep_processing = False
123 | continue
124 |
125 | # rescale if specified
126 |
127 | if (args.rescale != 1.0):
128 | frame = cv2.resize(
129 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
130 |
131 | # get parameters from track bars
132 |
133 | lower_threshold = cv2.getTrackbarPos("lower", window_name2)
134 | upper_threshold = cv2.getTrackbarPos("upper", window_name2)
135 | smoothing_neighbourhood = cv2.getTrackbarPos("smoothing", window_name2)
136 | sobel_size = cv2.getTrackbarPos("sobel size", window_name2)
137 |
138 | # check neighbourhood is greater than 3 and odd
139 |
140 | smoothing_neighbourhood = max(3, smoothing_neighbourhood)
141 | if not (smoothing_neighbourhood % 2):
142 | smoothing_neighbourhood = smoothing_neighbourhood + 1
143 |
144 | sobel_size = max(3, sobel_size)
145 | if not (sobel_size % 2):
146 | sobel_size = sobel_size + 1
147 |
148 | # convert to grayscale
149 |
150 | gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
151 |
152 | # performing smoothing on the image using a 5x5 smoothing mark (see
153 | # manual entry for GaussianBlur())
154 |
155 | smoothed = cv2.GaussianBlur(
156 | gray_frame, (smoothing_neighbourhood, smoothing_neighbourhood), 0)
157 |
158 | # perform canny edge detection
159 |
160 | canny = cv2.Canny(
161 | smoothed,
162 | lower_threshold,
163 | upper_threshold,
164 | apertureSize=sobel_size)
165 |
166 | # perform hough line detection
167 | # based on tutorial at:
168 | # https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_houghlines/py_houghlines.html
169 |
170 | if not (use_probablistic_hough):
171 | lines = cv2.HoughLines(canny, 1, np.pi/180, 40)
172 | if lines is not None:
173 | for rho, theta in lines[0]:
174 | a = np.cos(theta)
175 | b = np.sin(theta)
176 | x0 = a*rho
177 | y0 = b*rho
178 | x1 = int(x0 + 1000*(-b))
179 | y1 = int(y0 + 1000*(a))
180 | x2 = int(x0 - 1000*(-b))
181 | y2 = int(y0 - 1000*(a))
182 |
183 | cv2.line(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
184 |
185 | else:
186 |
187 | # use use probablistic hough transform
188 |
189 | min_line_length = 100 # requires tuning
190 | max_line_gap = 10 # requires tuning
191 |
192 | lines = cv2.HoughLinesP(canny, 1, np.pi/180, 10,
193 | min_line_length, max_line_gap)
194 | if lines is not None:
195 | for x1, y1, x2, y2 in lines[0]:
196 | cv2.line(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
197 |
198 | # display image
199 |
200 | cv2.imshow(window_name, frame)
201 | cv2.imshow(window_name2, canny)
202 |
203 | # start the event loop - essential
204 |
205 | # cv2.waitKey() is a keyboard binding function (argument is the time in
206 | # milliseconds). It waits for specified milliseconds for any keyboard
207 | # event. If you press any key in that time, the program continues.
208 | # If 0 is passed, it waits indefinitely for a key stroke.
209 | # (bitwise and with 0xFF to extract least significant byte of
210 | # multi-byte response)
211 |
212 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
213 | key = cv2.waitKey(40) & 0xFF
214 |
215 | # It can also be set to detect specific key strokes by recording which
216 | # key is pressed
217 |
218 | # e.g. if user presses "x" then exit / press "f" for fullscreen
219 | # display
220 |
221 | if (key == ord('x')):
222 | keep_processing = False
223 | elif (key == ord('f')):
224 | cv2.setWindowProperty(
225 | window_name2,
226 | cv2.WND_PROP_FULLSCREEN,
227 | cv2.WINDOW_FULLSCREEN)
228 | elif (key == ord('p')):
229 | use_probablistic_hough = not (use_probablistic_hough)
230 |
231 | # close all windows
232 |
233 | cv2.destroyAllWindows()
234 |
235 | else:
236 | print("No video file specified or camera connected.")
237 |
238 | #####################################################################
239 |
--------------------------------------------------------------------------------
/lbp_cascade_detection.py:
--------------------------------------------------------------------------------
1 | # Example : perform LBP cascade detection on live display from a video file
2 | # specified on the command line (e.g. python FILE.py video_file) or from an
3 | # attached web camera
4 |
5 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
6 |
7 | # Copyright (c) 2016 School of Engineering & Computing Science,
8 | # Durham University, UK
9 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
10 |
11 | # based on haar example at:
12 | # http://docs.opencv.org/3.1.0/d7/d8b/tutorial_py_face_detection.html#gsc.tab=0
13 |
14 | # get trained cascade files from:
15 | # https://github.com/opencv/opencv/tree/master/data/
16 |
17 | #####################################################################
18 |
19 | import cv2
20 | import argparse
21 | import sys
22 | import math
23 |
24 | #####################################################################
25 |
26 | keep_processing = True
27 |
28 | # parse command line arguments for camera ID or video file
29 |
30 | parser = argparse.ArgumentParser(
31 | description='Perform ' +
32 | sys.argv[0] +
33 | ' example operation on incoming camera/video image')
34 | parser.add_argument(
35 | "-c",
36 | "--camera_to_use",
37 | type=int,
38 | help="specify camera to use",
39 | default=0)
40 | parser.add_argument(
41 | "-r",
42 | "--rescale",
43 | type=float,
44 | help="rescale image by this factor",
45 | default=1.0)
46 | parser.add_argument(
47 | 'video_file',
48 | metavar='video_file',
49 | type=str,
50 | nargs='?',
51 | help='specify optional video file')
52 | args = parser.parse_args()
53 |
54 | #####################################################################
55 |
56 | # define video capture object
57 |
58 | try:
59 | # to use a non-buffered camera stream (via a separate thread)
60 |
61 | if not (args.video_file):
62 | import camera_stream
63 | cap = camera_stream.CameraVideoStream()
64 | else:
65 | cap = cv2.VideoCapture() # not needed for video files
66 |
67 | except BaseException:
68 | # if not then just use OpenCV default
69 |
70 | print("INFO: camera_stream class not found - camera input may be buffered")
71 | cap = cv2.VideoCapture()
72 |
73 | # define display window name
74 |
75 | window_name = "Face Detection using LBP Cascades" # window name
76 |
77 | # define lbpcascades cascade objects
78 |
79 | # required cascade classifier files (and many others) available from:
80 | # https://github.com/opencv/opencv/tree/master/data/lbpcascades
81 |
82 | face_cascade = cv2.CascadeClassifier('lbpcascade_frontalface_improved.xml')
83 |
84 | if (face_cascade.empty()):
85 | print("Failed to load cascade from file.")
86 |
87 |
88 | # if command line arguments are provided try to read video_name
89 | # otherwise default to capture from attached H/W camera
90 |
91 | if (((args.video_file) and (cap.open(str(args.video_file))))
92 | or (cap.open(args.camera_to_use))):
93 |
94 | # create window by name (as resizable)
95 |
96 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
97 |
98 | while (keep_processing):
99 |
100 | # if video file successfully open then read frame from video
101 |
102 | if (cap.isOpened):
103 | ret, frame = cap.read()
104 |
105 | # when we reach the end of the video (file) exit cleanly
106 |
107 | if (ret == 0):
108 | keep_processing = False
109 | continue
110 |
111 | # rescale if specified
112 |
113 | if (args.rescale != 1.0):
114 | frame = cv2.resize(
115 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
116 |
117 | # start a timer (to see how long processing and display takes)
118 |
119 | start_t = cv2.getTickCount()
120 |
121 | # convert to grayscale
122 |
123 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
124 |
125 | # detect faces using LBP cascade trained on faces
126 |
127 | faces = face_cascade.detectMultiScale(
128 | gray, scaleFactor=1.3, minNeighbors=3, minSize=(30, 30))
129 |
130 | # for each detected face, try to detect eyes inside the top
131 | # half of the face region face region
132 |
133 | for (x, y, w, h) in faces:
134 |
135 | # draw each face bounding box and extract regions of interest (roi)
136 |
137 | cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
138 | roi_gray = gray[y:y + math.floor(h * 0.5), x:x + w]
139 | roi_color = frame[y:y + math.floor(h * 0.5), x:x + w]
140 |
141 | # display image
142 |
143 | cv2.imshow(window_name, frame)
144 |
145 | # stop the timer and convert to ms. (to see how long processing and
146 | # display takes)
147 |
148 | stop_t = ((cv2.getTickCount() - start_t) /
149 | cv2.getTickFrequency()) * 1000
150 |
151 | # start the event loop - essential
152 |
153 | # cv2.waitKey() is a keyboard binding function (argument is the time in
154 | # ms.) It waits for specified milliseconds for any keyboard event.
155 | # If you press any key in that time, the program continues.
156 | # If 0 is passed, it waits indefinitely for a key stroke.
157 | # (bitwise and with 0xFF to extract least significant byte of
158 | # multi-byte response) here we use a wait time in ms. that takes
159 | # account of processing time already used in the loop
160 |
161 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
162 | # 25 fps = 40 ms)
163 |
164 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
165 |
166 | # It can also be set to detect specific key strokes by recording which
167 | # key is pressed
168 |
169 | # e.g. if user presses "x" then exit / press "f" for fullscreen
170 | # display
171 |
172 | if (key == ord('x')):
173 | keep_processing = False
174 | elif (key == ord('f')):
175 | cv2.setWindowProperty(
176 | window_name,
177 | cv2.WND_PROP_FULLSCREEN,
178 | cv2.WINDOW_FULLSCREEN)
179 |
180 | # close all windows
181 |
182 | cv2.destroyAllWindows()
183 |
184 | else:
185 | print("No video file specified or camera connected.")
186 |
--------------------------------------------------------------------------------
/mask-rcnn.py:
--------------------------------------------------------------------------------
1 | ##########################################################################
2 |
3 | # Example : performs Mask R-CNN object instance segmentation from a video file
4 | # specified on the command line (e.g. python FILE.py video_file) or from an
5 | # attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2021 Toby Breckon, Durham University, UK
10 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
11 |
12 | # Implements the Mask R-CNN instance segmentation architecture decribed in:
13 | # Mask R-CNN - Kaiming He, Georgia Gkioxari, Piotr Dollár, Ross Girshick
14 | # https://arxiv.org/abs/1703.06870
15 |
16 | # This code: significant portions based on the example available at:
17 | # https://github.com/opencv/opencv/blob/master/samples/dnn/mask_rcnn.py
18 |
19 | # To use first download and unpack the following files:
20 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_coco.txt
21 | # http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz
22 | # https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt
23 | # then unpack and rename as follows:
24 | # tar -xzf mask_rcnn_inception_v2_coco_2018_01_28.tar.gz
25 |
26 | ##########################################################################
27 |
28 | import cv2
29 | import argparse
30 | import sys
31 | import math
32 | import numpy as np
33 |
34 | ##########################################################################
35 |
36 | keep_processing = True
37 | colors = None
38 |
39 | # parse command line arguments for camera ID or video file, and Mask
40 | # R-CNN files
41 | parser = argparse.ArgumentParser(
42 | description='Perform ' +
43 | sys.argv[0] +
44 | ' example operation on incoming camera/video image')
45 | parser.add_argument(
46 | "-c",
47 | "--camera_to_use",
48 | type=int,
49 | help="specify camera to use",
50 | default=0)
51 | parser.add_argument(
52 | "-r",
53 | "--rescale",
54 | type=float,
55 | help="rescale image by this factor",
56 | default=1.0)
57 | parser.add_argument(
58 | "-fs",
59 | "--fullscreen",
60 | action='store_true',
61 | help="run in full screen mode")
62 | parser.add_argument(
63 | "-use",
64 | "--target",
65 | type=str,
66 | choices=['cpu', 'gpu', 'opencl'],
67 | help="select computational backend",
68 | default='gpu')
69 | parser.add_argument(
70 | 'video_file',
71 | metavar='video_file',
72 | type=str,
73 | nargs='?',
74 | help='specify optional video file')
75 | parser.add_argument(
76 | "-cl",
77 | "--class_file",
78 | type=str,
79 | help="list of classes",
80 | default='object_detection_classes_coco.txt')
81 | parser.add_argument(
82 | "-cf",
83 | "--config_file",
84 | type=str,
85 | help="network config",
86 | default='mask_rcnn_inception_v2_coco_2018_01_28.pbtxt')
87 | parser.add_argument(
88 | "-w",
89 | "--weights_file",
90 | type=str,
91 | help="network weights",
92 | default="mask_rcnn_inception_v2_coco_2018_01_28/"
93 | + "/frozen_inference_graph.pb")
94 |
95 | args = parser.parse_args()
96 |
97 | ##########################################################################
98 | # dummy on trackbar callback function
99 |
100 |
101 | def on_trackbar(val):
102 | return
103 |
104 | #####################################################################
105 | # Draw the predicted bounding box on the specified image
106 | # image: image detection performed on
107 | # class_name: string name of detected object_detection
108 | # left, top, right, bottom: rectangle parameters for detection
109 | # colour: to draw detection rectangle in
110 |
111 |
112 | def drawPred(image, class_name, confidence, left, top, right, bottom, colour):
113 | # Draw a bounding box.
114 | cv2.rectangle(image, (left, top), (right, bottom), colour, 3)
115 |
116 | # construct label
117 | label = '%s:%.2f' % (class_name, confidence)
118 |
119 | # Display the label at the top of the bounding box
120 | labelSize, baseLine = cv2.getTextSize(
121 | label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
122 | top = max(top, labelSize[1])
123 | cv2.rectangle(
124 | image,
125 | (left,
126 | top -
127 | round(
128 | 1.5 *
129 | labelSize[1])),
130 | (left +
131 | round(
132 | 1.5 *
133 | labelSize[0]),
134 | top +
135 | baseLine),
136 | (255,
137 | 255,
138 | 255),
139 | cv2.FILLED)
140 | cv2.putText(image, label, (left, top),
141 | cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 1)
142 |
143 | ##########################################################################
144 |
145 | # define video capture object
146 |
147 |
148 | try:
149 | # to use a non-buffered camera stream (via a separate thread)
150 |
151 | if not (args.video_file):
152 | import camera_stream
153 | cap = camera_stream.CameraVideoStream()
154 | else:
155 | cap = cv2.VideoCapture() # not needed for video files
156 |
157 | except BaseException:
158 | # if not then just use OpenCV default
159 |
160 | print("INFO: camera_stream class not found - camera input may be buffered")
161 | cap = cv2.VideoCapture()
162 |
163 | ##########################################################################
164 |
165 | # init Mask R-CNN object detection model
166 |
167 | inpWidth = 800 # Width of network's input image
168 | inpHeight = 800 # Height of network's input image
169 |
170 | # Load names of classes from file
171 |
172 | classesFile = args.class_file
173 | classes = None
174 | with open(classesFile, 'rt') as f:
175 | classes = f.read().rstrip('\n').split('\n')
176 |
177 | # load configuration and weight files for the model and load the network
178 | # using them
179 |
180 | net = cv2.dnn.readNet(args.config_file, args.weights_file)
181 |
182 | # set up compute target as one of [GPU, OpenCL, CPU]
183 |
184 | if (args.target == 'gpu'):
185 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
186 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
187 | elif (args.target == 'opencl'):
188 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
189 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
190 | else:
191 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
192 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
193 |
194 | ##########################################################################
195 |
196 | # define display window name + trackbar
197 |
198 | window_name = 'Mask R-CNN instance segmentation: ' + args.weights_file
199 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
200 | trackbarName = 'reporting confidence > (x 0.01)'
201 | cv2.createTrackbar(trackbarName, window_name, 70, 100, on_trackbar)
202 |
203 | ##########################################################################
204 |
205 | # if command line arguments are provided try to read video_name
206 | # otherwise default to capture from attached camera
207 |
208 | if (((args.video_file) and (cap.open(str(args.video_file))))
209 | or (cap.open(args.camera_to_use))):
210 |
211 | # create window by name (as resizable)
212 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
213 |
214 | while (keep_processing):
215 |
216 | # start a timer (to see how long processing and display takes)
217 | start_t = cv2.getTickCount()
218 |
219 | # if camera /video file successfully open then read frame
220 | if (cap.isOpened):
221 | ret, frame = cap.read()
222 |
223 | # when we reach the end of the video (file) exit cleanly
224 | if (ret == 0):
225 | keep_processing = False
226 | continue
227 |
228 | # rescale if specified
229 | if (args.rescale != 1.0):
230 | frame = cv2.resize(
231 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
232 |
233 | # get frame dimensions
234 | frameH = frame.shape[0]
235 | frameW = frame.shape[1]
236 |
237 | # create a 4D tensor (OpenCV 'blob') from image frame (pixels not
238 | # scaled, image resized)
239 | tensor = cv2.dnn.blobFromImage(
240 | frame, 1.0, (inpWidth, inpHeight), [0, 0, 0],
241 | swapRB=True, crop=False)
242 |
243 | # set the input to the CNN network
244 | net.setInput(tensor)
245 |
246 | # runs forward inference to get output of the final output layers
247 | boxes, masks = net.forward(['detection_out_final', 'detection_masks'])
248 |
249 | # get confidence threshold from trackbar
250 | confThreshold = cv2.getTrackbarPos(trackbarName, window_name) / 100
251 |
252 | # get number of classes detected and number of detections
253 | numClasses = masks.shape[1]
254 | numDetections = boxes.shape[2]
255 |
256 | # draw segmentation - first generate colours if needed
257 |
258 | if not colors:
259 | np.random.seed(324)
260 | colors = [np.array([0, 0, 0], np.uint8)]
261 | for i in range(1, numClasses + 1):
262 | colors.append((colors[i - 1] +
263 | np.random.randint(0, 256, [3],
264 | np.uint8)) / 2
265 | )
266 | del colors[0]
267 |
268 | # draw segmentation - draw instance segments
269 |
270 | boxesToDraw = []
271 | for i in range(numDetections):
272 | box = boxes[0, 0, i]
273 | mask = masks[i]
274 | confidence = box[2]
275 | if confidence > confThreshold:
276 |
277 | # **** draw bounding box (as per Faster R-CNN)
278 |
279 | classId = int(box[1])
280 | left = int(frameW * box[3])
281 | top = int(frameH * box[4])
282 | right = int(frameW * box[5])
283 | bottom = int(frameH * box[6])
284 |
285 | left = max(0, min(left, frameW - 1))
286 | top = max(0, min(top, frameH - 1))
287 | right = max(0, min(right, frameW - 1))
288 | bottom = max(0, min(bottom, frameH - 1))
289 |
290 | drawPred(frame, classes[classId], confidence,
291 | left, top, right, bottom, (0, 255, 0))
292 |
293 | # **** draw object instance mask
294 | # get mask, re-size from 28x28 to size of bounding box
295 | # then theshold at 0.5
296 |
297 | classMask = mask[classId]
298 | classMask = cv2.resize(classMask,
299 | (right - left + 1, bottom - top + 1),
300 | cv2.INTER_CUBIC)
301 | mask = (classMask > 0.5)
302 |
303 | roi = frame[top:bottom+1, left:right+1][mask]
304 | frame[top:bottom+1, left:right+1][mask] = (
305 | 0.8 * colors[classId] + 0.2 * roi).astype(np.uint8)
306 |
307 | # stop the timer and convert to ms. (to see how long processing takes)
308 |
309 | stop_t = ((cv2.getTickCount() - start_t) /
310 | cv2.getTickFrequency()) * 1000
311 |
312 | # Display efficiency information
313 |
314 | label = ('Inference time: %.2f ms' % stop_t) + \
315 | (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')'
316 | cv2.putText(frame, label, (0, 15),
317 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
318 |
319 | # display image
320 | cv2.imshow(window_name, frame)
321 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
322 | cv2.WINDOW_FULLSCREEN & args.fullscreen)
323 |
324 | # start the event loop + detect specific key strokes
325 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
326 | # 25 fps = 40 ms)
327 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
328 |
329 | # if user presses "x" then exit / press "f" for fullscreen display
330 | if (key == ord('x')):
331 | keep_processing = False
332 | elif (key == ord('f')):
333 | args.fullscreen = not (args.fullscreen)
334 |
335 | # close all windows
336 | cv2.destroyAllWindows()
337 |
338 | else:
339 | print("No video file specified or camera connected.")
340 |
341 | ##########################################################################
342 |
--------------------------------------------------------------------------------
/mog-background-subtraction.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : perform MoG based foreground/background subtraction from a video
4 | # file specified on the command line (e.g. python FILE.py video_file) or from
5 | # an attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2015-25 Toby Breckon, Engineering & Computer Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | #####################################################################
14 |
15 | import cv2
16 | import argparse
17 | import sys
18 | import numpy as np
19 |
20 | #####################################################################
21 |
22 | # concatenate two RGB/grayscale images horizontally (left to right)
23 | # handling differing channel numbers or image heights in the input
24 |
25 |
26 | def h_concat(img1, img2):
27 |
28 | # get size and channels for both images
29 |
30 | height1 = img1.shape[0]
31 | # width1 = img1.shape[1]
32 | if (len(img1.shape) == 2):
33 | channels1 = 1
34 | else:
35 | channels1 = img1.shape[2]
36 |
37 | height2 = img2.shape[0]
38 | width2 = img2.shape[1]
39 | if (len(img2.shape) == 2):
40 | channels2 = 1
41 | else:
42 | channels2 = img2.shape[2]
43 |
44 | # make all images 3 channel, or assume all same channel
45 |
46 | if ((channels1 > channels2) and (channels1 == 3)):
47 | out2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
48 | out1 = img1
49 | elif ((channels2 > channels1) and (channels2 == 3)):
50 | out1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR)
51 | out2 = img2
52 | else: # both must be equal
53 | out1 = img1
54 | out2 = img2
55 |
56 | # height of first image is master height, width remains unchanged
57 |
58 | if (height1 != height2):
59 | out2 = cv2.resize(out2, (height1, width2))
60 |
61 | return np.hstack((out1, out2))
62 |
63 | #####################################################################
64 |
65 | # concatenate two RGB/grayscale images vertically (top to bottom)
66 | # handling differing channel numbers or image heights in the input
67 |
68 |
69 | def v_concat(img1, img2):
70 |
71 | # get size and channels for both images
72 |
73 | # height1 = img1.shape[0]
74 | width1 = img1.shape[1]
75 | if (len(img1.shape) == 2):
76 | channels1 = 1
77 | else:
78 | channels1 = img1.shape[2]
79 |
80 | height2 = img2.shape[0]
81 | width2 = img2.shape[1]
82 | if (len(img2.shape) == 2):
83 | channels2 = 1
84 | else:
85 | channels2 = img2.shape[2]
86 |
87 | # make all images 3 channel, or assume all same channel
88 |
89 | if ((channels1 > channels2) and (channels1 == 3)):
90 | out2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
91 | out1 = img1
92 | elif ((channels2 > channels1) and (channels2 == 3)):
93 | out1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR)
94 | out2 = img2
95 | else: # both must be equal
96 | out1 = img1
97 | out2 = img2
98 |
99 | # width of first image is master height, height remains unchanged
100 |
101 | if (width1 != width2):
102 | out2 = cv2.resize(out2, (height2, width1))
103 |
104 | return np.vstack((out1, out2))
105 |
106 | #####################################################################
107 |
108 |
109 | keep_processing = True
110 |
111 | # parse command line arguments for camera ID or video file
112 |
113 | parser = argparse.ArgumentParser(
114 | description='Perform ' +
115 | sys.argv[0] +
116 | ' example operation on incoming camera/video image')
117 | parser.add_argument(
118 | "-c",
119 | "--camera_to_use",
120 | type=int,
121 | help="specify camera to use",
122 | default=0)
123 | parser.add_argument(
124 | "-r",
125 | "--rescale",
126 | type=float,
127 | help="rescale image by this factor",
128 | default=1.0)
129 | parser.add_argument(
130 | "-s",
131 | "--set_resolution",
132 | type=int,
133 | nargs=2,
134 | help='override default camera resolution as H W')
135 | parser.add_argument(
136 | "-fs",
137 | "--fullscreen",
138 | action='store_true',
139 | help="run in full screen mode")
140 | parser.add_argument(
141 | 'video_file',
142 | metavar='video_file',
143 | type=str,
144 | nargs='?',
145 | help='specify optional video file')
146 | args = parser.parse_args()
147 |
148 | #####################################################################
149 |
150 | # define video capture object
151 |
152 | try:
153 | # to use a non-buffered camera stream (via a separate thread)
154 |
155 | if not (args.video_file):
156 | import camera_stream
157 | cap = camera_stream.CameraVideoStream()
158 | else:
159 | cap = cv2.VideoCapture() # not needed for video files
160 |
161 | except BaseException:
162 | # if not then just use OpenCV default
163 |
164 | print("INFO: camera_stream class not found - camera input may be buffered")
165 | cap = cv2.VideoCapture()
166 |
167 | # check versions to work around this bug in OpenCV 3.1
168 | # https://github.com/opencv/opencv/issues/6055
169 |
170 | (major, minor, _) = cv2.__version__.split(".")
171 | if ((major == '3') and (minor == '1')):
172 | cv2.ocl.setUseOpenCL(False)
173 |
174 | # define display window name
175 |
176 | window_name = "Live Camera Input" # window name
177 | window_nameBG = "Background Model" # window name
178 | window_nameFG = "Foreground Objects" # window name
179 | window_nameFGP = "Foreground Probabiity" # window name
180 |
181 | # if command line arguments are provided try to read video_name
182 | # otherwise default to capture from attached H/W camera
183 |
184 | if (((args.video_file) and (cap.open(str(args.video_file))))
185 | or (cap.open(args.camera_to_use))):
186 |
187 | # create window by name (as resizable)
188 |
189 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
190 | cv2.namedWindow(window_nameBG, cv2.WINDOW_NORMAL)
191 | cv2.namedWindow(window_nameFG, cv2.WINDOW_NORMAL)
192 | cv2.namedWindow(window_nameFGP, cv2.WINDOW_NORMAL)
193 |
194 | # override default camera resolution
195 |
196 | if (args.set_resolution is not None):
197 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
198 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
199 |
200 | # create GMM background subtraction object
201 | # (using default parameters which are suitable for quick lecture demos
202 | # - see manual for suitable choice of values to use in anger)
203 |
204 | mog = cv2.createBackgroundSubtractorMOG2(
205 | history=2000, varThreshold=16, detectShadows=True)
206 |
207 | print("\nPress to reset MoG model ...\n")
208 |
209 | while (keep_processing):
210 |
211 | # if video file successfully open then read frame from video
212 |
213 | if (cap.isOpened):
214 | ret, frame = cap.read()
215 |
216 | # when we reach the end of the video (file) exit cleanly
217 |
218 | if (ret == 0):
219 | keep_processing = False
220 | continue
221 |
222 | # rescale if specified
223 |
224 | if (args.rescale != 1.0):
225 | frame = cv2.resize(
226 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
227 |
228 | # add current frame to background model and retrieve current foreground
229 | # objects (use learningRate parameter for tuning, see manual )
230 |
231 | fgmask = mog.apply(frame)
232 |
233 | # threshold and clean it up using erosion/dilation w/ elliptic mask
234 |
235 | fgthres = cv2.threshold(fgmask.copy(), 200, 255, cv2.THRESH_BINARY)[1]
236 | fgeroded = cv2.erode(
237 | fgthres, kernel=cv2.getStructuringElement(
238 | cv2.MORPH_ELLIPSE, (3, 3)), iterations=3)
239 | fgdilated = cv2.dilate(
240 | fgeroded, kernel=cv2.getStructuringElement(
241 | cv2.MORPH_ELLIPSE, (3, 3)), iterations=3)
242 |
243 | # get current background image (representative of current GMM model)
244 |
245 | bgmodel = mog.getBackgroundImage()
246 |
247 | # display images - input, background and original
248 |
249 | if (args.fullscreen):
250 |
251 | window_name = "[ Live | BG | Pr(FG) | FG ]"
252 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
253 | cv2.imshow(window_name, v_concat(
254 | h_concat(frame, bgmodel),
255 | h_concat(fgmask, fgeroded)
256 | ))
257 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
258 | cv2.WINDOW_FULLSCREEN & args.fullscreen)
259 |
260 | else:
261 |
262 | cv2.imshow(window_name, frame)
263 | cv2.imshow(window_nameFG, fgeroded)
264 | cv2.imshow(window_nameFGP, fgmask)
265 | cv2.imshow(window_nameBG, bgmodel)
266 |
267 | # start the event loop - essential
268 |
269 | # cv2.waitKey() is a keyboard binding function (argument is the time in
270 | # ms.) It waits for specified milliseconds for any keyboard event.
271 | # If you press any key in that time, the program continues.
272 | # If 0 is passed, it waits indefinitely for a key stroke.
273 | # (bitwise and with 0xFF to extract least significant byte of
274 | # multi-byte response) here we use a wait time in ms. that takes
275 | # account of processing time already used in the loop
276 |
277 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
278 | key = cv2.waitKey(40) & 0xFF
279 |
280 | # It can also be set to detect specific key strokes by recording which
281 | # key is pressed
282 |
283 | # e.g. if user presses "x" then exit, "f" for fullscreen
284 | # or reset MoG model when space is pressed
285 |
286 | if (key == ord('x')):
287 | keep_processing = False
288 | elif (key == ord(' ')):
289 | print("\nResetting MoG background model ...\n")
290 | mog = cv2.createBackgroundSubtractorMOG2(
291 | history=2000, varThreshold=16, detectShadows=True)
292 | elif (key == ord('f')):
293 | args.fullscreen = not (args.fullscreen)
294 |
295 | # close all windows
296 |
297 | cv2.destroyAllWindows()
298 |
299 | else:
300 | print("No video file specified or camera connected.")
301 |
302 | #####################################################################
303 |
--------------------------------------------------------------------------------
/openpose.py:
--------------------------------------------------------------------------------
1 | ##########################################################################
2 |
3 | # Example : perform live display of openpose body pose regression from a video
4 | # file specified on the command line (e.g. python FILE.py video_file) or from
5 | # an attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2019 Toby Breckon, Engineering & Computing Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | # Based heavily on the example provided at:
14 | # https://github.com/opencv/opencv/blob/master/samples/dnn/openpose.py
15 |
16 | ##########################################################################
17 |
18 | # To use download COCO model pose files from:
19 | # https://github.com/CMU-Perceptual-Computing-Lab/openpose/
20 | # using
21 | # https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/models/getModels.sh
22 |
23 | ##########################################################################
24 |
25 | import cv2
26 | import argparse
27 | import sys
28 | import math
29 |
30 | ##########################################################################
31 |
32 | keep_processing = True
33 |
34 | # parse command line arguments for camera ID or video file
35 |
36 | parser = argparse.ArgumentParser(
37 | description='Perform ' +
38 | sys.argv[0] +
39 | ' example operation on incoming camera/video image')
40 | parser.add_argument(
41 | "-c",
42 | "--camera_to_use",
43 | type=int,
44 | help="specify camera to use",
45 | default=0)
46 | parser.add_argument(
47 | "-r",
48 | "--rescale",
49 | type=float,
50 | help="rescale image by this factor",
51 | default=1.0)
52 | parser.add_argument(
53 | "-fs",
54 | "--fullscreen",
55 | action='store_true',
56 | help="run in full screen mode")
57 | parser.add_argument(
58 | "-use",
59 | "--target",
60 | type=str,
61 | choices=['cpu', 'gpu', 'opencl'],
62 | help="select computational backend",
63 | default='gpu')
64 | parser.add_argument(
65 | 'video_file',
66 | metavar='video_file',
67 | type=str,
68 | nargs='?',
69 | help='specify optional video file')
70 | args = parser.parse_args()
71 |
72 | ##########################################################################
73 |
74 | # define video capture object
75 |
76 | try:
77 | # to use a non-buffered camera stream (via a separate thread)
78 |
79 | if not (args.video_file):
80 | import camera_stream
81 | cap = camera_stream.CameraVideoStream()
82 | else:
83 | cap = cv2.VideoCapture() # not needed for video files
84 |
85 | except BaseException:
86 | # if not then just use OpenCV default
87 |
88 | print("INFO: camera_stream class not found - camera input may be buffered")
89 | cap = cv2.VideoCapture()
90 |
91 | ##########################################################################
92 |
93 | # define display window name
94 |
95 | window_name = "OpenPose Body Pose Regression - Live" # window name
96 |
97 | # create window by name (as resizable)
98 |
99 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
100 |
101 | ##########################################################################
102 |
103 | # set pose labels - based on COCO dataset training
104 |
105 | BODY_PARTS = {"Nose": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
106 | "LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8, "RKnee": 9,
107 | "RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13, "REye": 14,
108 | "LEye": 15, "REar": 16, "LEar": 17, "Background": 18}
109 |
110 | POSE_PAIRS = [
111 | ["Neck", "RShoulder"], ["Neck", "LShoulder"],
112 | ["RShoulder", "RElbow"], ["RElbow", "RWrist"],
113 | ["LShoulder", "LElbow"], ["LElbow", "LWrist"],
114 | ["Neck", "RHip"], ["RHip", "RKnee"],
115 | ["RKnee", "RAnkle"], ["Neck", "LHip"],
116 | ["LHip", "LKnee"], ["LKnee", "LAnkle"],
117 | ["Neck", "Nose"], ["Nose", "REye"],
118 | ["REye", "REar"], ["Nose", "LEye"],
119 | ["LEye", "LEar"]
120 | ]
121 |
122 | ##########################################################################
123 |
124 | # Load CNN model
125 | net = cv2.dnn.readNet(
126 | "pose_iter_440000.caffemodel",
127 | "pose_deploy_linevec.prototxt",
128 | 'caffe')
129 |
130 | # set up compute target as one of [GPU, OpenCL, CPU]
131 |
132 | if (args.target == 'gpu'):
133 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
134 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
135 | elif (args.target == 'opencl'):
136 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
137 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
138 | else:
139 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
140 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
141 |
142 | ##########################################################################
143 |
144 | # if command line arguments are provided try to read video_name
145 | # otherwise default to capture from attached camera
146 |
147 | if (((args.video_file) and (cap.open(str(args.video_file))))
148 | or (cap.open(args.camera_to_use))):
149 |
150 | while (keep_processing):
151 |
152 | # start a timer (to see how long processing and display takes)
153 |
154 | start_t = cv2.getTickCount()
155 |
156 | # if camera /video file successfully open then read frame
157 |
158 | if (cap.isOpened):
159 | ret, frame = cap.read()
160 |
161 | # when we reach the end of the video (file) exit cleanly
162 |
163 | if (ret == 0):
164 | keep_processing = False
165 | continue
166 |
167 | # rescale if specified
168 |
169 | if (args.rescale != 1.0):
170 | frame = cv2.resize(
171 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
172 |
173 | # create a 4D tensor "blob" from a frame - defaults from OpenCV
174 | # OpenPose example
175 |
176 | blob = cv2.dnn.blobFromImage(
177 | frame, scalefactor=0.003922, size=(
178 | 368, 368), mean=[
179 | 0, 0, 0], swapRB=False, crop=False)
180 |
181 | # Run forward inference on the model
182 |
183 | net.setInput(blob)
184 | out = net.forward()
185 |
186 | # draw body parts
187 |
188 | if (len(BODY_PARTS) <= out.shape[1]):
189 |
190 | frameWidth = frame.shape[1]
191 | frameHeight = frame.shape[0]
192 |
193 | points = []
194 | for i in range(len(BODY_PARTS)):
195 | # Slice heatmap of corresponding body's part.
196 | heatMap = out[0, i, :, :]
197 |
198 | # Originally, we try to find all the local maximums.
199 | # To simplify a sample we just find a global one.
200 | # However only a single pose at the same time
201 | # could be detected this way.
202 | _, conf, _, point = cv2.minMaxLoc(heatMap)
203 | x = (frameWidth * point[0]) / out.shape[3]
204 | y = (frameHeight * point[1]) / out.shape[2]
205 |
206 | # Add a point if it's confidence is higher than threshold.
207 | points.append((int(x), int(y)) if conf > 0.1 else None)
208 |
209 | for pair in POSE_PAIRS:
210 | partFrom = pair[0]
211 | partTo = pair[1]
212 | assert (partFrom in BODY_PARTS)
213 | assert (partTo in BODY_PARTS)
214 |
215 | idFrom = BODY_PARTS[partFrom]
216 | idTo = BODY_PARTS[partTo]
217 |
218 | if points[idFrom] and points[idTo]:
219 | cv2.line(
220 | frame, points[idFrom], points[idTo], (0, 255, 0), 3)
221 | cv2.ellipse(
222 | frame, points[idFrom], (3, 3), 0, 0, 360,
223 | (0, 0, 255), cv2.FILLED)
224 | cv2.ellipse(
225 | frame, points[idTo], (3, 3), 0, 0, 360,
226 | (0, 0, 255), cv2.FILLED)
227 |
228 | # stop the timer and convert to ms.
229 |
230 | stop_t = ((cv2.getTickCount() - start_t) /
231 | cv2.getTickFrequency()) * 1000
232 |
233 | # add efficiency information
234 |
235 | label = ('Inference time: %.2f ms' % stop_t) + \
236 | (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')'
237 | cv2.putText(frame, label, (0, 15),
238 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0))
239 |
240 | # display image
241 |
242 | cv2.imshow(window_name, frame)
243 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
244 | cv2.WINDOW_FULLSCREEN & args.fullscreen)
245 |
246 | # start the event loop - essentials
247 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
248 | # 25 fps = 40 ms)
249 |
250 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
251 |
252 | # It can also be set to detect specific key strokes by recording which
253 | # key is pressed
254 |
255 | # e.g. if user presses "x" then exit / press "f" for fullscreen
256 | # display
257 |
258 | if (key == ord('x')):
259 | keep_processing = False
260 | elif (key == ord('f')):
261 | args.fullscreen = not (args.fullscreen)
262 |
263 | # close all windows
264 |
265 | cv2.destroyAllWindows()
266 |
267 | else:
268 | print("No video file specified or camera connected.")
269 |
270 | ##########################################################################
271 |
--------------------------------------------------------------------------------
/opticflow.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : perform live visualization of optic flow from a video file
4 | # specified on the command line (e.g. python FILE.py video_file) or from
5 | # an attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2017 School of Engineering & Computing Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | #####################################################################
14 |
15 | import cv2
16 | import argparse
17 | import sys
18 | import numpy as np
19 |
20 | #####################################################################
21 |
22 | keep_processing = True
23 |
24 | # parse command line arguments for camera ID or video file
25 |
26 | parser = argparse.ArgumentParser(
27 | description='Perform ' +
28 | sys.argv[0] +
29 | ' example operation on incoming camera/video image')
30 | parser.add_argument(
31 | "-c",
32 | "--camera_to_use",
33 | type=int,
34 | help="specify camera to use",
35 | default=0)
36 | parser.add_argument(
37 | "-r",
38 | "--rescale",
39 | type=float,
40 | help="rescale image by this factor",
41 | default=1.0)
42 | parser.add_argument(
43 | 'video_file',
44 | metavar='video_file',
45 | type=str,
46 | nargs='?',
47 | help='specify optional video file')
48 | args = parser.parse_args()
49 |
50 | #####################################################################
51 |
52 | # draw optic flow visualization on image using a given step size for
53 | # the line glyphs that show the flow vectors on the image
54 |
55 |
56 | def draw_flow(img, flow, step=8):
57 | h, w = img.shape[:2]
58 | y, x = np.mgrid[step / 2:h:step, step /
59 | 2:w:step].reshape(2, -1).astype(int)
60 | fx, fy = flow[y, x].T
61 | lines = np.vstack([x, y, x + fx, y + fy]).T.reshape(-1, 2, 2)
62 | lines = np.int32(lines + 0.5)
63 | vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
64 | cv2.polylines(vis, lines, 0, (0, 255, 0))
65 | for (x1, y1), (x2, y2) in lines:
66 | cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
67 | return vis
68 |
69 | #####################################################################
70 |
71 | # define video capture object
72 |
73 |
74 | try:
75 | # to use a non-buffered camera stream (via a separate thread)
76 |
77 | if not (args.video_file):
78 | import camera_stream
79 | cap = camera_stream.CameraVideoStream()
80 | else:
81 | cap = cv2.VideoCapture() # not needed for video files
82 |
83 | except BaseException:
84 | # if not then just use OpenCV default
85 |
86 | print("INFO: camera_stream class not found - camera input may be buffered")
87 | cap = cv2.VideoCapture()
88 |
89 | # define display window name
90 |
91 | window_name = "Dense Optic Flow" # window name
92 |
93 | # if command line arguments are provided try to read video_name
94 | # otherwise default to capture from attached H/W camera
95 |
96 | if (((args.video_file) and (cap.open(str(args.video_file))))
97 | or (cap.open(args.camera_to_use))):
98 |
99 | # create window by name (as resizable)
100 |
101 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
102 |
103 | # if video file successfully open then read an initial frame from video
104 |
105 | if (cap.isOpened):
106 | ret, frame = cap.read()
107 |
108 | # rescale if specified
109 |
110 | if (args.rescale != 1.0):
111 | frame = cv2.resize(frame, (0, 0), fx=args.rescale, fy=args.rescale)
112 |
113 | # convert image to grayscale to be previous frame
114 |
115 | prevgray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
116 |
117 | while (keep_processing):
118 |
119 | # if video file successfully open then read frame from video
120 |
121 | if (cap.isOpened):
122 | ret, frame = cap.read()
123 |
124 | # when we reach the end of the video (file) exit cleanly
125 |
126 | if (ret == 0):
127 | keep_processing = False
128 | continue
129 |
130 | # rescale if specified
131 |
132 | if (args.rescale != 1.0):
133 | frame = cv2.resize(
134 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
135 |
136 | # convert image to grayscale
137 |
138 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
139 |
140 | # compute dense optic flow using technique of Farneback 2003
141 | # parameters from example (OpenCV 3.2):
142 | # https://github.com/opencv/opencv/blob/master/samples/python/opt_flow.py
143 |
144 | flow = cv2.calcOpticalFlowFarneback(
145 | prevgray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
146 | prevgray = gray
147 |
148 | # display image with optic flow overlay
149 |
150 | cv2.imshow(window_name, draw_flow(gray, flow))
151 |
152 | # start the event loop - essential
153 |
154 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
155 | key = cv2.waitKey(40) & 0xFF
156 |
157 | # It can also be set to detect specific key strokes by recording which
158 | # key is pressed
159 |
160 | # e.g. if user presses "x" then exit / press "f" for fullscreen
161 | # display
162 |
163 | if (key == ord('x')):
164 | keep_processing = False
165 | elif (key == ord('f')):
166 | cv2.setWindowProperty(
167 | window_name,
168 | cv2.WND_PROP_FULLSCREEN,
169 | cv2.WINDOW_FULLSCREEN)
170 |
171 | # close all windows
172 |
173 | cv2.destroyAllWindows()
174 |
175 | else:
176 | print("No video file specified or camera connected.")
177 |
178 | #####################################################################
179 |
--------------------------------------------------------------------------------
/pyramid.py:
--------------------------------------------------------------------------------
1 | ##########################################################################
2 |
3 | # Example : perform Gaussian/Laplacian pyramid live display from a video file
4 | # specified on the command line (e.g. python FILE.py video_file) or from an
5 | # attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2021 Toby Breckon, Engineering & Computing Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | # Acknowledgements: based in part from tutorial at:
14 | # https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_pyramids/py_pyramids.html
15 |
16 | ##########################################################################
17 |
18 | import cv2
19 | import argparse
20 | import sys
21 | import math
22 | import numpy as np
23 |
24 | ##########################################################################
25 |
26 | keep_processing = True
27 |
28 | # parse command line arguments for camera ID or video file
29 |
30 | parser = argparse.ArgumentParser(
31 | description='Perform ' +
32 | sys.argv[0] +
33 | ' example operation on incoming camera/video image')
34 | parser.add_argument(
35 | "-c",
36 | "--camera_to_use",
37 | type=int,
38 | help="specify camera to use",
39 | default=0)
40 | parser.add_argument(
41 | "-r",
42 | "--rescale",
43 | type=float,
44 | help="rescale image by this factor",
45 | default=1.0)
46 | parser.add_argument(
47 | 'video_file',
48 | metavar='video_file',
49 | type=str,
50 | nargs='?',
51 | help='specify optional video file')
52 | args = parser.parse_args()
53 |
54 | #####################################################################
55 |
56 | # define display window name
57 |
58 | window_name = "Live Camera Input" # window name
59 |
60 | ##########################################################################
61 |
62 | # define video capture object
63 |
64 | try:
65 | # to use a non-buffered camera stream (via a separate thread)
66 |
67 | if not (args.video_file):
68 | import camera_stream
69 | cap = camera_stream.CameraVideoStream()
70 | else:
71 | cap = cv2.VideoCapture() # not needed for video files
72 |
73 | except BaseException:
74 | # if not then just use OpenCV default
75 |
76 | print("INFO: camera_stream class not found - camera input may be buffered")
77 | cap = cv2.VideoCapture()
78 |
79 | # if command line arguments are provided try to read video_name
80 | # otherwise default to capture from attached camera
81 |
82 | if (((args.video_file) and (cap.open(str(args.video_file))))
83 | or (cap.open(args.camera_to_use))):
84 |
85 | # create window by name (as resizable)
86 |
87 | cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
88 |
89 | # set initial number of pyramid levels
90 |
91 | nlevels = 5
92 |
93 | # print user key commands
94 |
95 | print()
96 | print("'-' - reduce pyramid levels")
97 | print("'+' - increase pyramid levels (max 6 levels)")
98 | print()
99 |
100 | while (keep_processing):
101 |
102 | # start a timer (to see how long processing and display takes)
103 |
104 | start_t = cv2.getTickCount()
105 |
106 | # if camera /video file successfully open then read frame
107 |
108 | if (cap.isOpened):
109 | ret, frame = cap.read()
110 |
111 | # when we reach the end of the video (file) exit cleanly
112 |
113 | if (ret == 0):
114 | keep_processing = False
115 | continue
116 |
117 | # rescale if specified
118 |
119 | if (args.rescale != 1.0):
120 | frame = cv2.resize(
121 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
122 |
123 | # generate Gaussian pyramid for image frame
124 |
125 | g_level = frame.copy()
126 | g_pyramid = [g_level]
127 | for layer in range(nlevels):
128 | g_level = cv2.pyrDown(g_level)
129 | cv2.namedWindow("Gaussian Level: " + str(layer),
130 | cv2.WINDOW_AUTOSIZE)
131 | cv2.imshow("Gaussian Level: " + str(layer), g_level)
132 | g_pyramid.append(g_level.copy())
133 |
134 | # generate Laplacian pyramid image frame
135 |
136 | lp_pyramid = [g_pyramid[nlevels - 1]]
137 | for layer in range(nlevels, 0, -1):
138 | g_level_enlarged = cv2.pyrUp(g_pyramid[layer])
139 |
140 | # catch this rounding error occurence in image sizes
141 | if (g_pyramid[layer-1].shape != g_level_enlarged.shape):
142 | g_level_enlarged = cv2.resize(
143 | g_level_enlarged,
144 | tuple(reversed(g_pyramid[layer-1].shape[:2])),
145 | interpolation=cv2.INTER_LINEAR)
146 |
147 | l_level = cv2.subtract(g_pyramid[layer-1], g_level_enlarged)
148 | cv2.normalize(l_level, l_level, 0, 255, cv2.NORM_MINMAX)
149 | cv2.namedWindow("Laplacian Level: " + str(layer),
150 | cv2.WINDOW_AUTOSIZE)
151 | cv2.imshow("Laplacian Level: " + str(layer), l_level)
152 | lp_pyramid.append(l_level.copy())
153 |
154 | # display image
155 |
156 | cv2.imshow(window_name, frame)
157 |
158 | # stop the timer and convert to ms. (to see how long processing and
159 | # display takes)
160 |
161 | stop_t = ((cv2.getTickCount() - start_t) /
162 | cv2.getTickFrequency()) * 1000
163 |
164 | # start the event loop - essential
165 |
166 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
167 | # 25 fps = 40 ms)
168 |
169 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
170 |
171 | if (key == ord('x')):
172 | keep_processing = False
173 | elif (key == ord('+')):
174 | cv2.destroyAllWindows()
175 | nlevels = np.min([6, nlevels + 1])
176 | elif (key == ord('-')):
177 | cv2.destroyAllWindows()
178 | nlevels = np.max([0, nlevels - 1])
179 |
180 | # close all windows
181 |
182 | cv2.destroyAllWindows()
183 |
184 | else:
185 | print("No video file specified or camera connected.")
186 |
187 | ##########################################################################
188 |
--------------------------------------------------------------------------------
/selective_search.py:
--------------------------------------------------------------------------------
1 | ##########################################################################
2 |
3 | # Example : detect live selective search bounding boxes from a video file
4 | # specified on the command line (e.g. python FILE.py video_file) or from an
5 | # attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2021 Dept. Computer Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | ##########################################################################
14 |
15 | import cv2
16 | import argparse
17 | import sys
18 | import math
19 |
20 | #####################################################################
21 |
22 | # press all the go-faster buttons - i.e. speed-up using multithreads
23 |
24 | cv2.setUseOptimized(True)
25 | cv2.setNumThreads(4)
26 |
27 | # if we have OpenCL H/W acceleration availale, use it - we'll need it
28 |
29 | cv2.ocl.setUseOpenCL(True)
30 | print(
31 | "INFO: OpenCL - available: ",
32 | cv2.ocl.haveOpenCL(),
33 | " using: ",
34 | cv2.ocl.useOpenCL())
35 |
36 | ##########################################################################
37 |
38 | keep_processing = True
39 |
40 | # parse command line arguments for camera ID or video file
41 |
42 | parser = argparse.ArgumentParser(
43 | description='Perform ' +
44 | sys.argv[0] +
45 | ' example operation on incoming camera/video image')
46 | parser.add_argument(
47 | "-c",
48 | "--camera_to_use",
49 | type=int,
50 | help="specify camera to use",
51 | default=0)
52 | parser.add_argument(
53 | "-r",
54 | "--rescale",
55 | type=float,
56 | help="rescale image by this factor",
57 | default=1.0)
58 | parser.add_argument(
59 | "-fs",
60 | "--fullscreen",
61 | action='store_true',
62 | help="run in full screen mode")
63 | parser.add_argument(
64 | 'video_file',
65 | metavar='video_file',
66 | type=str,
67 | nargs='?',
68 | help='specify optional video file')
69 | args = parser.parse_args()
70 |
71 | ##########################################################################
72 |
73 | # define video capture object
74 |
75 | try:
76 | # to use a non-buffered camera stream (via a separate thread)
77 |
78 | if not (args.video_file):
79 | import camera_stream
80 | cap = camera_stream.CameraVideoStream()
81 | else:
82 | cap = cv2.VideoCapture() # not needed for video files
83 |
84 | except BaseException:
85 | # if not then just use OpenCV default
86 |
87 | print("INFO: camera_stream class not found - camera input may be buffered")
88 | cap = cv2.VideoCapture()
89 |
90 | # define display window name
91 |
92 | window_name = "Selective Search - Bounding Boxes" # window name
93 |
94 | # if command line arguments are provided try to read video_name
95 | # otherwise default to capture from attached camera
96 |
97 | if (((args.video_file) and (cap.open(str(args.video_file))))
98 | or (cap.open(args.camera_to_use))):
99 |
100 | # create window by name (as resizable)
101 |
102 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
103 |
104 | #####################################################################
105 |
106 | # create Selective Search Segmentation Object using default parameters
107 |
108 | ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
109 |
110 | while (keep_processing):
111 |
112 | # start a timer (to see how long processing and display takes)
113 |
114 | start_t = cv2.getTickCount()
115 |
116 | # if camera /video file successfully open then read frame
117 |
118 | if (cap.isOpened):
119 | ret, frame = cap.read()
120 |
121 | # when we reach the end of the video (file) exit cleanly
122 |
123 | if (ret == 0):
124 | keep_processing = False
125 | continue
126 |
127 | # rescale if specified
128 |
129 | if (args.rescale != 1.0):
130 | frame = cv2.resize(
131 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
132 |
133 | # set input image on which we will run segmentation
134 |
135 | ss.setBaseImage(frame)
136 |
137 | # Switch to fast but low recall Selective Search method
138 | ss.switchToSelectiveSearchFast()
139 |
140 | # Switch to high recall but slow Selective Search method (slower)
141 | # ss.switchToSelectiveSearchQuality()
142 |
143 | # run selective search segmentation on input image
144 | rects = ss.process()
145 | print('Total Number of Region Proposals: {}'.format(len(rects)))
146 |
147 | # number of region proposals to show
148 | numShowRects = 100
149 |
150 | # iterate over all the region proposals
151 | for i, rect in enumerate(rects):
152 | # draw rectangle for region proposal till numShowRects
153 | if (i < numShowRects):
154 | x, y, w, h = rect
155 | cv2.rectangle(frame, (x, y), (x+w, y+h),
156 | (0, 255, 0), 1, cv2.LINE_AA)
157 | else:
158 | break
159 |
160 | # stop the timer and convert to ms. (to see how long processing and
161 | # display takes)
162 |
163 | stop_t = ((cv2.getTickCount() - start_t) /
164 | cv2.getTickFrequency()) * 1000
165 |
166 | label = ('Processing time: %.2f ms' % stop_t) + \
167 | (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')'
168 | cv2.putText(frame, label, (0, 15),
169 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
170 |
171 | # display image
172 |
173 | cv2.imshow(window_name, frame)
174 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
175 | cv2.WINDOW_FULLSCREEN & args.fullscreen)
176 |
177 | # start the event loop - essential
178 |
179 | # cv2.waitKey() is a keyboard binding function (argument is the time in
180 | # milliseconds). It waits for specified milliseconds for any keyboard
181 | # event. If you press any key in that time, the program continues.
182 | # If 0 is passed, it waits indefinitely for a key stroke.
183 | # (bitwise and with 0xFF to extract least significant byte of
184 | # multi-byte response)
185 |
186 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
187 | # 25 fps = 40 ms)
188 |
189 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
190 |
191 | # It can also be set to detect specific key strokes by recording which
192 | # key is pressed
193 |
194 | # e.g. if user presses "x" then exit / press "f" for fullscreen
195 | # display
196 |
197 | if (key == ord('x')):
198 | keep_processing = False
199 | elif (key == ord('f')):
200 | args.fullscreen = not (args.fullscreen)
201 |
202 | # close all windows
203 |
204 | cv2.destroyAllWindows()
205 |
206 | else:
207 | print("No video file specified or camera connected.")
208 |
209 | ##########################################################################
210 |
--------------------------------------------------------------------------------
/sobel.py:
--------------------------------------------------------------------------------
1 | #####################################################################
2 |
3 | # Example : Sobel edge filtering for a a video file
4 | # specified on the command line (e.g. python FILE.py video_file) or from an
5 | # attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2016 School of Engineering & Computing Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | #####################################################################
14 |
15 | import cv2
16 | import argparse
17 | import sys
18 |
19 | #####################################################################
20 |
21 | keep_processing = True
22 |
23 | # parse command line arguments for camera ID or video file
24 |
25 | parser = argparse.ArgumentParser(
26 | description='Perform ' +
27 | sys.argv[0] +
28 | ' example operation on incoming camera/video image')
29 | parser.add_argument(
30 | "-c",
31 | "--camera_to_use",
32 | type=int,
33 | help="specify camera to use",
34 | default=0)
35 | parser.add_argument(
36 | "-r",
37 | "--rescale",
38 | type=float,
39 | help="rescale image by this factor",
40 | default=1.0)
41 | parser.add_argument(
42 | "-s",
43 | "--set_resolution",
44 | type=int,
45 | nargs=2,
46 | help='override default camera resolution as H W')
47 | parser.add_argument(
48 | 'video_file',
49 | metavar='video_file',
50 | type=str,
51 | nargs='?',
52 | help='specify optional video file')
53 | args = parser.parse_args()
54 |
55 | #####################################################################
56 |
57 | # this function is called as a call-back everytime the trackbar is moved
58 | # (here we just do nothing)
59 |
60 |
61 | def nothing(x):
62 | pass
63 |
64 | #####################################################################
65 |
66 | # define video capture object
67 |
68 |
69 | try:
70 | # to use a non-buffered camera stream (via a separate thread)
71 | # enabling subsequent hardware acceleration where available
72 |
73 | if not (args.video_file):
74 | import camera_stream
75 | cap = camera_stream.CameraVideoStream(use_tapi=True)
76 | else:
77 | cap = cv2.VideoCapture() # not needed for video files
78 |
79 | except BaseException:
80 | # if not then just use OpenCV default
81 |
82 | print("INFO: camera_stream class not found - camera input may be buffered")
83 | cap = cv2.VideoCapture()
84 |
85 | # define display window name
86 |
87 | window_name = "Live Camera Input" # window name
88 | window_name2 = "Sobel Gradient Edge Response" # window name
89 |
90 | # if command line arguments are provided try to read video_name
91 | # otherwise default to capture from attached H/W camera
92 |
93 | if (((args.video_file) and (cap.open(str(args.video_file))))
94 | or (cap.open(args.camera_to_use))):
95 |
96 | # create window by name (as resizable)
97 |
98 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
99 | cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL)
100 |
101 | # add some track bar controllers for settings
102 |
103 | neighbourhood = 3
104 | cv2.createTrackbar(
105 | "neighbourhood, N",
106 | window_name2,
107 | neighbourhood,
108 | 15,
109 | nothing)
110 |
111 | # override default camera resolution
112 |
113 | if (args.set_resolution is not None):
114 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1])
115 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0])
116 |
117 | print("INFO: input resolution : (",
118 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x",
119 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")")
120 |
121 | while (keep_processing):
122 |
123 | # if video file successfully open then read frame from video
124 |
125 | if (cap.isOpened):
126 | ret, frame = cap.read()
127 |
128 | # when we reach the end of the video (file) exit cleanly
129 |
130 | if (ret == 0):
131 | keep_processing = False
132 | continue
133 |
134 | # rescale if specified
135 |
136 | if (args.rescale != 1.0):
137 | frame = cv2.resize(
138 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
139 |
140 | # get parameters from track bars
141 |
142 | neighbourhood = cv2.getTrackbarPos("neighbourhood, N", window_name2)
143 |
144 | # check neighbourhood is greater than 3 and odd
145 |
146 | neighbourhood = max(3, neighbourhood)
147 | if not (neighbourhood % 2):
148 | neighbourhood = neighbourhood + 1
149 |
150 | # perform sobel across all three colour channels of the image
151 | # in both the x and y directions
152 |
153 | sobel = cv2.Sobel(frame, cv2.CV_8U, 1, 1, ksize=neighbourhood)
154 |
155 | # display images
156 |
157 | cv2.imshow(window_name, frame)
158 | cv2.imshow(window_name2, sobel)
159 |
160 | # start the event loop - essential
161 |
162 | # cv2.waitKey() is a keyboard binding function (argument is the time in
163 | # milliseconds). It waits for specified milliseconds for any keyboard
164 | # event. If you press any key in that time, the program continues.
165 | # If 0 is passed, it waits indefinitely for a key stroke.
166 | # (bitwise and with 0xFF to extract least significant byte of
167 | # multi-byte response)
168 |
169 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms)
170 | key = cv2.waitKey(40) & 0xFF
171 |
172 | # It can also be set to detect specific key strokes by recording which
173 | # key is pressed
174 |
175 | # e.g. if user presses "x" then exit / press "f" to toggle fullscreen
176 |
177 | if (key == ord('x')):
178 | keep_processing = False
179 | elif (key == ord('f')):
180 | cv2.setWindowProperty(
181 | window_name2,
182 | cv2.WND_PROP_FULLSCREEN,
183 | cv2.WINDOW_FULLSCREEN &
184 | (cv2.getWindowProperty(window_name2,
185 | cv2.WND_PROP_FULLSCREEN) == 0))
186 |
187 | # close all windows
188 |
189 | cv2.destroyAllWindows()
190 |
191 | else:
192 | print("No video file specified or camera connected.")
193 |
194 | #####################################################################
195 |
--------------------------------------------------------------------------------
/squeezenet.py:
--------------------------------------------------------------------------------
1 | ##########################################################################
2 |
3 | # Example : perform live display of squeezenet CNN classification from a video
4 | # file specified on the command line (e.g. python FILE.py video_file) or from
5 | # an attached web camera
6 |
7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk
8 |
9 | # Copyright (c) 2019 Toby Breckon, Engineering & Computing Science,
10 | # Durham University, UK
11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 |
13 | # Based heavily on the example provided at:
14 | # https://github.com/opencv/opencv/blob/master/samples/dnn/classification.py
15 |
16 | ##########################################################################
17 |
18 | # To use download the following files:
19 |
20 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt
21 | # -> classification_classes_ILSVRC2012.txt
22 | # https://github.com/forresti/SqueezeNet/raw/master/SqueezeNet_v1.1/squeezenet_v1.1.caffemodel
23 | # -> squeezenet_v1.1.caffemodel
24 | # https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/squeezenet_v1.1.prototxt
25 | # -> squeezenet_v1.1.prototxt
26 |
27 | ##########################################################################
28 |
29 | import cv2
30 | import argparse
31 | import sys
32 | import math
33 | import numpy as np
34 |
35 | ##########################################################################
36 | # dummy on trackbar callback function
37 |
38 |
39 | def on_trackbar(val):
40 | return
41 |
42 | ##########################################################################
43 |
44 |
45 | keep_processing = True
46 |
47 | # parse command line arguments for camera ID or video file
48 |
49 | parser = argparse.ArgumentParser(
50 | description='Perform ' +
51 | sys.argv[0] +
52 | ' example operation on incoming camera/video image')
53 | parser.add_argument(
54 | "-c",
55 | "--camera_to_use",
56 | type=int,
57 | help="specify camera to use",
58 | default=0)
59 | parser.add_argument(
60 | "-r",
61 | "--rescale",
62 | type=float,
63 | help="rescale image by this factor",
64 | default=1.0)
65 | parser.add_argument(
66 | "-fs",
67 | "--fullscreen",
68 | action='store_true',
69 | help="run in full screen mode")
70 | parser.add_argument(
71 | "-use",
72 | "--target",
73 | type=str,
74 | choices=['cpu', 'gpu', 'opencl'],
75 | help="select computational backend",
76 | default='gpu')
77 | parser.add_argument(
78 | 'video_file',
79 | metavar='video_file',
80 | type=str,
81 | nargs='?',
82 | help='specify optional video file')
83 | args = parser.parse_args()
84 |
85 | ##########################################################################
86 |
87 | # define video capture object
88 |
89 | try:
90 | # to use a non-buffered camera stream (via a separate thread)
91 |
92 | if not (args.video_file):
93 | import camera_stream
94 | cap = camera_stream.CameraVideoStream()
95 | else:
96 | cap = cv2.VideoCapture() # not needed for video files
97 |
98 | except BaseException:
99 | # if not then just use OpenCV default
100 |
101 | print("INFO: camera_stream class not found - camera input may be buffered")
102 | cap = cv2.VideoCapture()
103 |
104 | ##########################################################################
105 |
106 | # define display window name
107 |
108 | window_name = "SqueezeNet Image Classification - Live" # window name
109 |
110 | # create window by name (as resizable)
111 |
112 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
113 | trackbarName = 'reporting confidence > (x 0.01)'
114 | cv2.createTrackbar(trackbarName, window_name, 50, 100, on_trackbar)
115 |
116 | ##########################################################################
117 |
118 | # Load names of class labels
119 |
120 | classes = None
121 | with open("classification_classes_ILSVRC2012.txt", 'rt') as f:
122 | classes = f.read().rstrip('\n').split('\n')
123 |
124 | ##########################################################################
125 |
126 | # Load CNN model
127 |
128 | net = cv2.dnn.readNet(
129 | "squeezenet_v1.1.caffemodel",
130 | "squeezenet_v1.1.prototxt",
131 | 'caffe')
132 |
133 | # set up compute target as one of [GPU, OpenCL, CPU]
134 |
135 | if (args.target == 'gpu'):
136 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
137 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
138 | elif (args.target == 'opencl'):
139 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
140 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
141 | else:
142 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
143 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
144 |
145 | ##########################################################################
146 |
147 | # if command line arguments are provided try to read video_name
148 | # otherwise default to capture from attached camera
149 |
150 | if (((args.video_file) and (cap.open(str(args.video_file))))
151 | or (cap.open(args.camera_to_use))):
152 |
153 | while (keep_processing):
154 |
155 | # start a timer (to see how long processing and display takes)
156 |
157 | start_t = cv2.getTickCount()
158 |
159 | # if camera /video file successfully open then read frame
160 |
161 | if (cap.isOpened):
162 | ret, frame = cap.read()
163 |
164 | # when we reach the end of the video (file) exit cleanly
165 |
166 | if (ret == 0):
167 | keep_processing = False
168 | continue
169 |
170 | # rescale if specified
171 |
172 | if (args.rescale != 1.0):
173 | frame = cv2.resize(
174 | frame, (0, 0), fx=args.rescale, fy=args.rescale)
175 |
176 | #######################################################################
177 | # squeezenet:
178 | # model: "squeezenet_v1.1.caffemodel"
179 | # config: "squeezenet_v1.1.prototxt"
180 | # mean: [0, 0, 0]
181 | # scale: 1.0
182 | # width: 227
183 | # height: 227
184 | # rgb: false
185 | # classes: "classification_classes_ILSVRC2012.txt
186 | #######################################################################
187 |
188 | # create a 4D tensor "blob" from a frame.
189 |
190 | blob = cv2.dnn.blobFromImage(
191 | frame, scalefactor=1.0, size=(
192 | 227, 227), mean=[
193 | 0, 0, 0], swapRB=False, crop=False)
194 |
195 | # Run forward inference on the model
196 |
197 | net.setInput(blob)
198 | out = net.forward()
199 |
200 | # get class label with a highest score from final softmax() layer
201 |
202 | out = out.flatten()
203 | classId = np.argmax(out)
204 | confidence = out[classId]
205 |
206 | # stop the timer and convert to ms. (to see how long processing takes
207 |
208 | stop_t = ((cv2.getTickCount() - start_t) /
209 | cv2.getTickFrequency()) * 1000
210 |
211 | # Display efficiency information
212 |
213 | label = ('Inference time: %.2f ms' % stop_t) + \
214 | (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')'
215 | cv2.putText(frame, label, (0, 15),
216 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
217 |
218 | # get confidence threshold from track bar
219 | confThreshold = cv2.getTrackbarPos(trackbarName, window_name) / 100
220 |
221 | # if we are quite confidene about classification then dispplay
222 | if (confidence > confThreshold):
223 | # add predicted class.
224 | label = '%s: %.4f' % (
225 | classes[classId]
226 | if classes else 'Class #%d' % classId, confidence)
227 | cv2.putText(frame, label, (0, 40),
228 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
229 |
230 | # display image
231 |
232 | cv2.imshow(window_name, frame)
233 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
234 | cv2.WINDOW_FULLSCREEN & args.fullscreen)
235 |
236 | # start the event loop - essential
237 |
238 | # wait 40ms or less depending on processing time taken (i.e. 1000ms /
239 | # 25 fps = 40 ms)
240 |
241 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
242 |
243 | # It can also be set to detect specific key strokes by recording which
244 | # key is pressed
245 |
246 | # e.g. if user presses "x" then exit / press "f" for fullscreen
247 | # display
248 |
249 | if (key == ord('x')):
250 | keep_processing = False
251 | elif (key == ord('f')):
252 | args.fullscreen = not (args.fullscreen)
253 |
254 | # close all windows
255 |
256 | cv2.destroyAllWindows()
257 |
258 | else:
259 | print("No video file specified or camera connected.")
260 |
261 | ##########################################################################
262 |
--------------------------------------------------------------------------------
/test_all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | ################################################################################
4 |
5 | # run a batch test over all the examples from the bash shell (linux)
6 |
7 | # Copyright (c) 2019 Dept Computer Science,
8 | # Durham University, UK
9 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html
10 |
11 | ################################################################################
12 |
13 | PYTHON_INTERPRETATOR=python3
14 | CAM_TO_TEST=0
15 | VIDEO_TO_TEST=video.avi
16 |
17 | echo
18 | echo Using $PYTHON_INTERPRETATOR with camera $CAM_TO_TEST and video $VIDEO_TO_TEST
19 | echo "Running test suite - press 'x' in OpenCV window to exist each example."
20 | echo
21 |
22 | # get testing resouces if they do not exist
23 |
24 | [ -f example.jpg ] || { wget https://upload.wikimedia.org/wikipedia/commons/b/b4/JPEG_example_JPG_RIP_100.jpg; mv JPEG_example_JPG_RIP_100.jpg example.jpg; }
25 | [ -f video.avi ] || { wget http://clips.vorwaerts-gmbh.de/big_buck_bunny.mp4; mv big_buck_bunny.mp4 video.avi; }
26 |
27 | ################################################################################
28 |
29 | # run defaults
30 |
31 | echo "Running default tests ..."
32 | echo
33 |
34 | for example in *.py
35 | do
36 | echo "Testing example: " $example
37 | $PYTHON_INTERPRETATOR $example
38 | echo
39 | done
40 |
41 | ################################################################################
42 |
43 | # run cam test
44 |
45 | echo "Running camera based tests ..."
46 | echo
47 |
48 | for example in *.py
49 | do
50 | echo "Testing example: " $example -c $CAM_TO_TEST
51 | $PYTHON_INTERPRETATOR $example -c $CAM_TO_TEST
52 | echo
53 | done
54 |
55 | ################################################################################
56 |
57 | # run cam test and resize
58 |
59 | echo "Running camera based tests with resizing ..."
60 | echo
61 |
62 | for example in *.py
63 | do
64 | echo "Testing example: " $example -c $CAM_TO_TEST -r 0.25
65 | $PYTHON_INTERPRETATOR $example -c $CAM_TO_TEST -r 0.25
66 | echo
67 | done
68 |
69 |
70 | ################################################################################
71 |
72 | # run video file test
73 |
74 | echo "Running video file based tests ..."
75 | echo
76 |
77 | for example in *.py
78 | do
79 | echo "Testing example: " $example $VIDEO_TO_TEST
80 | $PYTHON_INTERPRETATOR $example $VIDEO_TO_TEST
81 | echo
82 | done
83 |
84 | ################################################################################
85 |
--------------------------------------------------------------------------------