├── .github └── workflows │ └── pep8-check.yml ├── .gitignore ├── DoG.py ├── LICENSE ├── README.md ├── calibrate_camera.py ├── camera_stream.py ├── canny.py ├── chromaticity_lightness.py ├── cnn_ssd_detection.py ├── contour_edges.py ├── cycleimages.py ├── download-models.sh ├── eigenfaces.py ├── faster-rcnn.py ├── fcn_segmentation.py ├── gaussian.py ├── generic_interface.py ├── gradient_orientation.py ├── haar_cascade_detection.py ├── harris.py ├── hog.py ├── houghlines.py ├── kalman_tracking_live.py ├── lbp_cascade_detection.py ├── mask-rcnn.py ├── mog-background-subtraction.py ├── openpose.py ├── opticflow.py ├── pyramid.py ├── selective_search.py ├── sift_detection.py ├── sobel.py ├── squeezenet.py ├── stereo_sgbm.py ├── test_all.sh └── yolo.py /.github/workflows/pep8-check.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python - PEP8 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python 3.8 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: 3.8 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install flake8 pytest 27 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 28 | - name: Lint with flake8 29 | run: | 30 | # stop the build if there are Python PEP8 style, syntax errors, undefined names, unused imports ... 31 | flake8 . 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # specific to this repo 92 | 93 | *.pb 94 | *.xml 95 | *.pbtxt 96 | *.txt 97 | *.caffemodel 98 | *.prototxt 99 | *.cfg 100 | *.weights 101 | *.names 102 | *.avi 103 | *.jpg 104 | calibration 105 | faster_rcnn_inception_v2_coco_2018_01_28 106 | mask_rcnn_inception_v2_coco_2018_01_28 107 | -------------------------------------------------------------------------------- /DoG.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : Difference of Gaussian (DoG) of a video file 4 | # specified on the command line (e.g. python FILE.py video_file) or from an 5 | # attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2017-2019 Dept. Engineering & Dept. Computer Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | ##################################################################### 14 | 15 | import cv2 16 | import argparse 17 | import sys 18 | 19 | ##################################################################### 20 | 21 | keep_processing = True 22 | 23 | # parse command line arguments for camera ID or video file 24 | 25 | parser = argparse.ArgumentParser( 26 | description='Perform ' + 27 | sys.argv[0] + 28 | ' example operation on incoming camera/video image') 29 | parser.add_argument( 30 | "-c", 31 | "--camera_to_use", 32 | type=int, 33 | help="specify camera to use", 34 | default=0) 35 | parser.add_argument( 36 | "-r", 37 | "--rescale", 38 | type=float, 39 | help="rescale image by this factor", 40 | default=1.0) 41 | parser.add_argument( 42 | "-s", 43 | "--set_resolution", 44 | type=int, 45 | nargs=2, 46 | help='override default camera resolution as H W') 47 | parser.add_argument("-i", "--is_image", action='store_true', 48 | help="specify file is an image, not a video") 49 | parser.add_argument( 50 | 'video_file', 51 | metavar='file', 52 | type=str, 53 | nargs='?', 54 | help='specify optional video file') 55 | args = parser.parse_args() 56 | 57 | ##################################################################### 58 | 59 | # this function is called as a call-back everytime the trackbar is moved 60 | # (here we just do nothing) 61 | 62 | 63 | def nothing(x): 64 | pass 65 | 66 | ##################################################################### 67 | 68 | # define video capture object 69 | 70 | 71 | try: 72 | # to use a non-buffered camera stream (via a separate thread) 73 | 74 | if not (args.video_file): 75 | import camera_stream 76 | cap = camera_stream.CameraVideoStream(use_tapi=True) 77 | else: 78 | cap = cv2.VideoCapture() # not needed for video files 79 | 80 | except BaseException: 81 | # if not then just use OpenCV default 82 | 83 | print("INFO: camera_stream class not found - camera input may be buffered") 84 | cap = cv2.VideoCapture() 85 | 86 | # define display window name 87 | 88 | window_name = "Live Camera Input" # window name 89 | window_nameU = "Gaussian Upper" # window name 90 | window_nameL = "Gaussian Lower" # window name 91 | window_nameDoG = "DoG" # window name 92 | 93 | # if command line arguments are provided try to read video_name 94 | # otherwise default to capture from attached H/W camera 95 | 96 | if (((args.video_file) and (cap.open(str(args.video_file)))) 97 | or (cap.open(args.camera_to_use))): 98 | 99 | # create window by name (as resizable) 100 | 101 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 102 | cv2.namedWindow(window_nameL, cv2.WINDOW_NORMAL) 103 | cv2.namedWindow(window_nameU, cv2.WINDOW_NORMAL) 104 | cv2.namedWindow(window_nameDoG, cv2.WINDOW_NORMAL) 105 | 106 | # add some track bar controllers for settings 107 | 108 | sigmaU = 2 # greater than 7 seems to crash 109 | cv2.createTrackbar("sigma U", window_nameU, sigmaU, 15, nothing) 110 | sigmaL = 1 # greater than 7 seems to crash 111 | cv2.createTrackbar("sigma L", window_nameL, sigmaL, 15, nothing) 112 | 113 | # override default camera resolution 114 | 115 | if (args.set_resolution is not None): 116 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1]) 117 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0]) 118 | 119 | print("INFO: input resolution : (", 120 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x", 121 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")") 122 | 123 | while (keep_processing): 124 | 125 | # if video file successfully open then read frame from video 126 | 127 | if (cap.isOpened): 128 | ret, frame = cap.read() 129 | 130 | # when we reach the end of the video (file) exit cleanly 131 | 132 | if (ret == 0): 133 | keep_processing = False 134 | continue 135 | 136 | # rescale if specified 137 | 138 | if (args.rescale != 1.0): 139 | frame = cv2.resize( 140 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 141 | 142 | # if it is a still image, load that instead 143 | 144 | if (args.is_image): 145 | frame = cv2.imread(args.video_file, cv2.IMREAD_COLOR) 146 | 147 | # get parameters from track bars 148 | 149 | sigmaU = cv2.getTrackbarPos("sigma U", window_nameU) 150 | sigmaL = cv2.getTrackbarPos("sigma L", window_nameL) 151 | 152 | # check sigma's are greater than 1 153 | 154 | sigmaU = max(1, sigmaU) 155 | sigmaL = max(1, sigmaL) 156 | 157 | # check sigma are correct 158 | 159 | if (sigmaL >= sigmaU) and (sigmaU > 1): 160 | sigmaL = sigmaU - 1 161 | print("auto-correcting sigmas such that U > L") 162 | 163 | # convert to grayscale 164 | 165 | gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 166 | 167 | # performing smoothing on the image using a smoothing mask 168 | # specify 0x0 mask size then size is auto-computed from the sigma 169 | # values 170 | 171 | smoothedU = cv2.GaussianBlur(gray_frame, (0, 0), sigmaU) 172 | smoothedL = cv2.GaussianBlur(gray_frame, (0, 0), sigmaL) 173 | 174 | # perform abs_diff() to get DoG 175 | 176 | DoG = cv2.absdiff(smoothedU, smoothedL) 177 | 178 | # auto-scale to full 0 -> 255 range for display 179 | 180 | cv2.normalize(DoG, DoG, 0, 255, cv2.NORM_MINMAX) 181 | 182 | # display image 183 | 184 | cv2.imshow(window_name, frame) 185 | cv2.imshow(window_nameU, smoothedU) 186 | cv2.imshow(window_nameL, smoothedL) 187 | cv2.imshow(window_nameDoG, DoG) 188 | 189 | # start the event loop - essential 190 | 191 | # cv2.waitKey() is a keyboard binding function (argument is the time in 192 | # ms). It waits for specified milliseconds for any keyboard event. 193 | # If you press any key in that time, the program continues. 194 | # If 0 is passed, it waits indefinitely for a key stroke. 195 | # (bitwise and with 0xFF to extract least significant byte of 196 | # multi-byte response) 197 | 198 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms) 199 | key = cv2.waitKey(40) & 0xFF 200 | 201 | # It can also be set to detect specific key strokes by recording which 202 | # key is pressed 203 | 204 | # e.g. if user presses "x" then exit 205 | 206 | # e.g. if user presses "x" then exit / press "f" for fullscreen 207 | # display 208 | 209 | if (key == ord('x')): 210 | keep_processing = False 211 | elif (key == ord('f')): 212 | cv2.setWindowProperty( 213 | window_nameDoG, 214 | cv2.WND_PROP_FULLSCREEN, 215 | cv2.WINDOW_FULLSCREEN) 216 | 217 | # close all windows 218 | 219 | cv2.destroyAllWindows() 220 | 221 | else: 222 | print("No video file specified or camera connected.") 223 | 224 | ##################################################################### 225 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python Computer Vision OpenCV Teaching Examples 2 | 3 | OpenCV Python computer vision examples used for teaching within the undergraduate Computer Science programme 4 | at [Durham University](http://www.durham.ac.uk) (UK) by [Prof. Toby Breckon](https://breckon.org/toby/). 5 | 6 | ![Python - PEP8](https://github.com/tobybreckon/python-examples-cv/workflows/Python%20-%20PEP8/badge.svg) 7 | 8 | All tested with [OpenCV](http://www.opencv.org) 3.x / 4.x and Python 3.x. 9 | 10 | ``` 11 | # Example : <................................> processing from a video file 12 | # specified on the command line (e.g. python FILE.py video_file) or from an 13 | # attached web camera 14 | ``` 15 | --- 16 | 17 | ### Background: 18 | 19 | Directly adapted (and in some cases extended/added to) from the [C++](https://github.com/tobybreckon/cpp-examples-ipcv.git) and earlier [C](https://github.com/tobybreckon/c-examples-ipcv.git) language teaching examples used to generate the video examples within the ebook version of: 20 | 21 | [Dictionary of Computer Vision and Image Processing](http://dx.doi.org/10.1002/9781119286462) (R.B. Fisher, T.P. Breckon, K. Dawson-Howe, A. Fitzgibbon, C. Robertson, E. Trucco, C.K.I. Williams), Wiley, 2014. 22 | [[Google Books](http://books.google.co.uk/books?id=TaEQAgAAQBAJ&lpg=PP1&dq=isbn%3A1118706811&pg=PP1v=onepage&q&f=false)] [[doi](http://dx.doi.org/10.1002/9781119286462)] 23 | 24 | Notably, the [C++](https://github.com/tobybreckon/cpp-examples-ipcv.git) examples may contain further speed optimizations in some cases. 25 | 26 | A related supporting set of [Python Image Processing OpenCV Teaching Examples](https://github.com/tobybreckon/python-examples-ip.git) are also available covering basic image processing operations. 27 | 28 | --- 29 | 30 | ### How to download and run: 31 | 32 | Download each file as needed or to download the entire repository and run each try: 33 | 34 | ``` 35 | git clone https://github.com/tobybreckon/python-examples-cv.git 36 | cd python-examples-cv 37 | python3 ./.py [optional video file] 38 | ``` 39 | 40 | Demo source code is provided _"as is"_ to aid learning and understanding of topics on the course and beyond. 41 | 42 | Most run with a webcam connected or from a command line supplied video file of a format OpenCV supports on your system (otherwise edit the script to provide your own image source). For examples each individual ```.py``` example file can be used as follows: 43 | 44 | ``` 45 | $ python3 ./generic_interface.py -h 46 | usage: generic_interface.py [-h] [-c CAMERA_TO_USE] [-r RESCALE] [-fs] 47 | [video_file] 48 | 49 | Perform ./generic_interface.py example operation on incoming camera/video 50 | image 51 | 52 | positional arguments: 53 | video_file specify optional video file 54 | 55 | optional arguments: 56 | -h, --help show this help message and exit 57 | -c CAMERA_TO_USE, --camera_to_use CAMERA_TO_USE 58 | specify camera to use 59 | -r RESCALE, --rescale RESCALE 60 | rescale image by this factor 61 | 62 | ``` 63 | 64 | For several of the demos that largely rely on effective demonstration using just a single output window - press the _"f"_ key to run fullscreen. In all examples press _"x"_ to exit. 65 | 66 | Use script ```sh download-models.sh``` to download CNN model files associated with some examples. 67 | 68 | --- 69 | 70 | ### Re-usable Exemplar Components (Python Classes): 71 | 72 | This codebase contains the following re-usable exemplar elements: 73 | 74 | - ```camera_stream.py``` - a re-usable threaded camera class, that is call compatible with the existing OpenCV VideoCapture class, designed to always deliver the latest frame from a single camera without buffering delays (used by all examples if available). 75 | 76 | - ```h_concatenate()``` - a re-usable function for horiozontal image concatenation for display in single window handling variations in size/channels (see ```chromaticity_lightness.py```). 77 | 78 | --- 79 | 80 | ### References: 81 | 82 | If referencing these examples in your own work (e.g _"... based on the implementation of REF..."_), please reference the related research work from which these sample OpenCV reference implementations were derived (in terms of parameters choice etc., presented in bibtex format). 83 | 84 | For the SGBM stereo vision and camera calibration examples, reference: 85 | ``` 86 | @Article{mroz12stereo, 87 | author = {Mroz, F. and Breckon, T.P.}, 88 | title = {An Empirical Comparison of Real-time Dense Stereo Approaches for use in the Automotive Environment}, 89 | journal = {EURASIP Journal on Image and Video Processing}, 90 | year = {2012}, 91 | volume = {2012}, 92 | number = {13}, 93 | pages = {1-19}, 94 | publisher = {Springer}, 95 | url = {https://breckon.org/toby/publications/papers/mroz12stereo.pdf}, 96 | doi = {10.1186/1687-5281-2012-13} 97 | } 98 | ``` 99 | 100 | For the Mixture of Gaussian (MOG) background subtraction and Kalman filtering example, reference: 101 | ``` 102 | @InProceedings{kundegorski14photogrammetric, 103 | author = {Kundegorski, M.E. and Breckon, T.P.}, 104 | title = {A Photogrammetric Approach for Real-time 3D Localization and Tracking of Pedestrians in Monocular Infrared Imagery}, 105 | booktitle = {Proc. SPIE Optics and Photonics for Counterterrorism, Crime Fighting and Defence}, 106 | year = {2014}, 107 | month = {September}, 108 | volume = {9253}, 109 | number = {01}, 110 | publisher = {SPIE}, 111 | pages = {1-16}, 112 | url = {https://breckon.org/toby/publications/papers/kundegorski14photogrammetric.pdf}, 113 | doi = {10.1117/12.2065673} 114 | } 115 | ``` 116 | 117 | 118 | For the DoG, Canny, contour, Harris and Sobel examples, please reference: 119 | ``` 120 | @Book{solomonbreckon10fundamentals, 121 | author = {Solomon, C.J. and Breckon, T.P.}, 122 | title = {Fundamentals of Digital Image Processing: A Practical Approach with Examples in Matlab}, 123 | publisher = {Wiley-Blackwell}, 124 | year = {2010}, 125 | isbn = {0470844736}, 126 | doi = {10.1002/9780470689776}, 127 | note = {ISBN-13: 978-0470844731}, 128 | } 129 | ``` 130 | 131 | For all other examples reference the original paper as outlined in the OpenCV manual or the header comment of the ```.py``` example file. 132 | 133 | --- 134 | 135 | If you find any bugs raise an issue (or much better still submit a git pull request with a fix) - toby.breckon@durham.ac.uk 136 | 137 | _"may the source be with you"_ - anon. 138 | -------------------------------------------------------------------------------- /calibrate_camera.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : perform intrinsic calibration of a connected camera 4 | 5 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 6 | 7 | # Copyright (c) 2018-2021 Department of Computer Science, 8 | # Durham University, UK 9 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | # Acknowledgements: 12 | 13 | # http://opencv-python-tutroals.readthedocs.org/en/latest/ \ 14 | # py_tutorials/py_calib3d/py_table_of_contents_calib3d/py_table_of_contents_calib3d.html 15 | 16 | # http://docs.ros.org/electric/api/cob_camera_calibration/html/calibrator_8py_source.html 17 | 18 | ##################################################################### 19 | 20 | import cv2 21 | import argparse 22 | import sys 23 | import numpy as np 24 | 25 | ##################################################################### 26 | 27 | keep_processing = True 28 | 29 | # parse command line arguments for camera ID or video file 30 | 31 | parser = argparse.ArgumentParser( 32 | description='Perform ' + 33 | sys.argv[0] + 34 | ' example operation on incoming camera/video image') 35 | parser.add_argument( 36 | "-c", 37 | "--camera_to_use", 38 | type=int, 39 | help="specify camera to use", 40 | default=0) 41 | parser.add_argument( 42 | "-r", 43 | "--rescale", 44 | type=float, 45 | help="rescale image by this factor", 46 | default=1.0) 47 | parser.add_argument( 48 | "-s", 49 | "--set_resolution", 50 | type=int, 51 | nargs=2, 52 | help='override default camera resolution as H W') 53 | parser.add_argument( 54 | "-cbx", 55 | "--chessboardx", 56 | type=int, 57 | help="specify number of internal chessboard squares \ 58 | (corners) in x-direction", 59 | default=6) 60 | parser.add_argument( 61 | "-cby", 62 | "--chessboardy", 63 | type=int, 64 | help="specify number of internal chessboard squares \ 65 | (corners) in y-direction", 66 | default=8) 67 | parser.add_argument( 68 | "-cbw", 69 | "--chessboardw", 70 | type=float, 71 | help="specify width/height of chessboard squares in mm", 72 | default=40.0) 73 | parser.add_argument( 74 | "-i", 75 | "--iterations", 76 | type=int, 77 | help="specify number of iterations for each stage of optimisation", 78 | default=100) 79 | parser.add_argument( 80 | "-e", 81 | "--minimum_error", 82 | type=float, 83 | help="specify lower error threshold upon which to stop \ 84 | optimisation stages", 85 | default=0.001) 86 | args = parser.parse_args() 87 | 88 | ##################################################################### 89 | 90 | # define video capture object 91 | 92 | try: 93 | # to use a non-buffered camera stream (via a separate thread) 94 | 95 | import camera_stream 96 | cap = camera_stream.CameraVideoStream() 97 | 98 | except BaseException: 99 | # if not then just use OpenCV default 100 | 101 | print("INFO: camera_stream class not found - camera input may be buffered") 102 | cap = cv2.VideoCapture() 103 | 104 | # define display window names 105 | 106 | window_name = "Camera Input" # window name 107 | window_nameU = "Undistored (calibrated) Camera" # window name 108 | 109 | ##################################################################### 110 | 111 | # perform intrinsic calibration (removal of image distortion in image) 112 | 113 | do_calibration = False 114 | termination_criteria_subpix = ( 115 | cv2.TERM_CRITERIA_EPS + 116 | cv2.TERM_CRITERIA_MAX_ITER, 117 | args.iterations, 118 | args.minimum_error) 119 | 120 | # set up a set of real-world "object points" for the chessboard pattern 121 | 122 | patternX = args.chessboardx 123 | patternY = args.chessboardy 124 | square_size_in_mm = args.chessboardw 125 | 126 | # prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0) 127 | 128 | objp = np.zeros((patternX * patternY, 3), np.float32) 129 | objp[:, :2] = np.mgrid[0:patternX, 0:patternY].T.reshape(-1, 2) 130 | objp = objp * square_size_in_mm 131 | 132 | # create arrays to store object points and image points from all the images. 133 | objpoints = [] # 3d point in real world space 134 | imgpoints = [] # 2d points in image plane. 135 | 136 | ##################################################################### 137 | 138 | # count number of chessboard detections 139 | chessboard_pattern_detections = 0 140 | 141 | print() 142 | print("--> hold up chessboard (grabbing images at 2 fps)") 143 | print("press c : to continue to calibration") 144 | 145 | ##################################################################### 146 | 147 | # open connected camera 148 | 149 | if cap.open(args.camera_to_use): 150 | 151 | # override default camera resolution 152 | 153 | if (args.set_resolution is not None): 154 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1]) 155 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0]) 156 | 157 | print("INFO: input resolution : (", 158 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x", 159 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")") 160 | 161 | while (not (do_calibration)): 162 | 163 | # grab frames from camera 164 | 165 | ret, frame = cap.read() 166 | 167 | # rescale if specified 168 | 169 | if (args.rescale != 1.0): 170 | frame = cv2.resize(frame, (0, 0), fx=args.rescale, fy=args.rescale) 171 | 172 | # convert to grayscale 173 | 174 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 175 | 176 | # Find the chess board corners in the image 177 | # (change flags to perhaps improve detection ?) 178 | 179 | ret, corners = cv2.findChessboardCorners( 180 | gray, (patternX, patternY), None, cv2.CALIB_CB_ADAPTIVE_THRESH | 181 | cv2.CALIB_CB_FAST_CHECK | cv2.CALIB_CB_NORMALIZE_IMAGE) 182 | 183 | # If found, add object points, image points (after refining them) 184 | 185 | if (ret): 186 | 187 | chessboard_pattern_detections += 1 188 | 189 | # add object points to global list 190 | 191 | objpoints.append(objp) 192 | 193 | # refine corner locations to sub-pixel accuracy and then 194 | 195 | corners_sp = cv2.cornerSubPix( 196 | gray, corners, (11, 11), (-1, -1), termination_criteria_subpix) 197 | imgpoints.append(corners_sp) 198 | 199 | # Draw and display the corners 200 | 201 | drawboard = cv2.drawChessboardCorners( 202 | frame, (patternX, patternY), corners_sp, ret) 203 | 204 | text = 'detected: ' + str(chessboard_pattern_detections) 205 | cv2.putText(drawboard, text, (10, 25), 206 | cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, 8) 207 | 208 | cv2.imshow(window_name, drawboard) 209 | else: 210 | text = 'detected: ' + str(chessboard_pattern_detections) 211 | cv2.putText(frame, text, (10, 25), 212 | cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, 8) 213 | 214 | cv2.imshow(window_name, frame) 215 | 216 | # start the event loop 217 | 218 | key = cv2.waitKey(500) & 0xFF # wait 500 ms. between frames 219 | if (key == ord('c')): 220 | do_calibration = True 221 | 222 | else: 223 | print("Cannot open connected camera.") 224 | exit() 225 | 226 | ##################################################################### 227 | 228 | # check we detected some patterns within the first loop 229 | 230 | if (chessboard_pattern_detections == 0): 231 | print("No calibration patterns detected - exiting.") 232 | exit() 233 | 234 | ##################################################################### 235 | 236 | # perform calibration - uses [Zhang, 2000] 237 | 238 | print("START - intrinsic calibration ...") 239 | 240 | ret, K, D, rvecs, tvecs = cv2.calibrateCamera( 241 | objpoints, imgpoints, gray.shape[::-1], None, None) 242 | 243 | print("FINISHED - intrinsic calibration") 244 | 245 | # print output in readable format 246 | 247 | print() 248 | print("Intrinsic Camera Calibration Matrix, K - from intrinsic calibration:") 249 | print("(format as follows: fx, fy - focal lengths / cx, cy - optical centers)") 250 | print("[fx, 0, cx]\n[0, fy, cy]\n[0, 0, 1]") 251 | np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)}) 252 | print(K) 253 | print() 254 | print("Intrinsic Distortion Co-effients, D - from intrinsic calibration:") 255 | print("(k1, k2, k3 - radial p1, p2 - tangential - distortion coefficients)") 256 | print("[k1, k2, p1, p2, k3]") 257 | np.set_printoptions(formatter={'float': lambda x: "{0:0.5f}".format(x)}) 258 | print(D) 259 | print() 260 | print("Image resolution used (width, height): ", np.flip(frame.shape[:2])) 261 | 262 | ##################################################################### 263 | 264 | # perform undistortion (i.e. calibration) of the images 265 | 266 | keep_processing = True 267 | 268 | print() 269 | print("-> performing undistortion") 270 | print("press x : to exit") 271 | 272 | while (keep_processing): 273 | 274 | # grab frames from camera 275 | 276 | ret, frame = cap.read() 277 | 278 | # undistort image using camera matrix K and distortion coefficients D 279 | 280 | undistorted = cv2.undistort(frame, K, D, None, None) 281 | 282 | # display both images 283 | 284 | cv2.imshow(window_name, frame) 285 | cv2.imshow(window_nameU, undistorted) 286 | 287 | # start the event loop - essential 288 | 289 | key = cv2.waitKey(40) & 0xFF # wait 40ms (i.e. 1000ms / 25 fps = 40 ms) 290 | 291 | if (key == ord('x')): 292 | keep_processing = False 293 | 294 | ##################################################################### 295 | 296 | # close all windows and cams. 297 | 298 | cv2.destroyAllWindows() 299 | 300 | ##################################################################### 301 | -------------------------------------------------------------------------------- /canny.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : canny edge detection for a a video file 4 | # specified on the command line (e.g. python FILE.py video_file) or from an 5 | # attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2016 School of Engineering & Computing Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | ##################################################################### 14 | 15 | import cv2 16 | import argparse 17 | import sys 18 | 19 | ##################################################################### 20 | 21 | keep_processing = True 22 | 23 | # parse command line arguments for camera ID or video file 24 | 25 | parser = argparse.ArgumentParser( 26 | description='Perform ' + 27 | sys.argv[0] + 28 | ' example operation on incoming camera/video image') 29 | parser.add_argument( 30 | "-c", 31 | "--camera_to_use", 32 | type=int, 33 | help="specify camera to use", 34 | default=0) 35 | parser.add_argument( 36 | "-r", 37 | "--rescale", 38 | type=float, 39 | help="rescale image by this factor", 40 | default=1.0) 41 | parser.add_argument( 42 | "-s", 43 | "--set_resolution", 44 | type=int, 45 | nargs=2, 46 | help='override default camera resolution as H W') 47 | parser.add_argument( 48 | "-fs", 49 | "--fullscreen", 50 | action='store_true', 51 | help="run in full screen mode") 52 | parser.add_argument( 53 | "-nc", 54 | "--nocontrols", 55 | action='store_true', 56 | help="no onscreen controls") 57 | parser.add_argument( 58 | 'video_file', 59 | metavar='video_file', 60 | type=str, 61 | nargs='?', 62 | help='specify optional video file') 63 | args = parser.parse_args() 64 | 65 | ##################################################################### 66 | 67 | # this function is called as a call-back everytime the trackbar is moved 68 | # (here we just do nothing) 69 | 70 | 71 | def nothing(x): 72 | pass 73 | 74 | 75 | ##################################################################### 76 | 77 | # define video capture object 78 | 79 | try: 80 | # to use a non-buffered camera stream (via a separate thread) 81 | 82 | if not (args.video_file): 83 | import camera_stream 84 | cap = camera_stream.CameraVideoStream(use_tapi=True) 85 | else: 86 | cap = cv2.VideoCapture() # not needed for video files 87 | 88 | except BaseException: 89 | # if not then just use OpenCV default 90 | 91 | print("INFO: camera_stream class not found - camera input may be buffered") 92 | cap = cv2.VideoCapture() 93 | 94 | # define display window name 95 | 96 | window_name = "Live Camera Input" # window name 97 | window_name2 = "Canny Edges" # window name 98 | 99 | # if command line arguments are provided try to read video_name 100 | # otherwise default to capture from attached H/W camera 101 | 102 | if (((args.video_file) and (cap.open(str(args.video_file)))) 103 | or (cap.open(args.camera_to_use))): 104 | 105 | # create window by name (as resizable) 106 | 107 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 108 | cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL) 109 | cv2.setWindowProperty(window_name2, cv2.WND_PROP_FULLSCREEN, 110 | cv2.WINDOW_FULLSCREEN & args.fullscreen) 111 | 112 | # add some track bar controllers for settings 113 | 114 | lower_threshold = 25 115 | upper_threshold = 120 116 | smoothing_neighbourhood = 3 117 | sobel_size = 3 # greater than 7 seems to crash 118 | 119 | if (not (args.nocontrols)): 120 | cv2.createTrackbar("lower", window_name2, lower_threshold, 121 | 255, nothing) 122 | cv2.createTrackbar("upper", window_name2, upper_threshold, 123 | 255, nothing) 124 | cv2.createTrackbar("smoothing", window_name2, smoothing_neighbourhood, 125 | 15, nothing) 126 | cv2.createTrackbar("sobel size", window_name2, sobel_size, 127 | 7, nothing) 128 | 129 | # override default camera resolution 130 | 131 | if (args.set_resolution is not None): 132 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1]) 133 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0]) 134 | 135 | print("INFO: input resolution : (", 136 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x", 137 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")") 138 | 139 | while (keep_processing): 140 | 141 | # if video file successfully open then read frame from video 142 | 143 | if (cap.isOpened): 144 | ret, frame = cap.read() # rescale if specified 145 | 146 | # when we reach the end of the video (file) exit cleanly 147 | 148 | if (ret == 0): 149 | keep_processing = False 150 | continue 151 | 152 | # rescale if specified 153 | 154 | if (args.rescale != 1.0): 155 | frame = cv2.resize( 156 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 157 | 158 | # get parameters from track bars 159 | 160 | if (not (args.nocontrols)): 161 | lower_threshold = cv2.getTrackbarPos("lower", window_name2) 162 | upper_threshold = cv2.getTrackbarPos("upper", window_name2) 163 | smoothing_neighbourhood = cv2.getTrackbarPos("smoothing", 164 | window_name2) 165 | sobel_size = cv2.getTrackbarPos("sobel size", window_name2) 166 | 167 | # check neighbourhood is greater than 3 and odd 168 | 169 | smoothing_neighbourhood = max(3, smoothing_neighbourhood) 170 | if not (smoothing_neighbourhood % 2): 171 | smoothing_neighbourhood = smoothing_neighbourhood + 1 172 | 173 | sobel_size = max(3, sobel_size) 174 | if not (sobel_size % 2): 175 | sobel_size = sobel_size + 1 176 | 177 | # convert to grayscale 178 | 179 | gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 180 | 181 | # performing smoothing on the image using a 5x5 smoothing mark (see 182 | # manual entry for GaussianBlur()) 183 | 184 | smoothed = cv2.GaussianBlur( 185 | gray_frame, (smoothing_neighbourhood, smoothing_neighbourhood), 0) 186 | 187 | # perform canny edge detection 188 | 189 | canny = cv2.Canny( 190 | smoothed, 191 | lower_threshold, 192 | upper_threshold, 193 | apertureSize=sobel_size) 194 | 195 | # display image 196 | 197 | cv2.imshow(window_name, frame) 198 | cv2.imshow(window_name2, canny) 199 | 200 | # start the event loop - essential 201 | 202 | # cv2.waitKey() is a keyboard binding function (argument is the time in 203 | # milliseconds). It waits for specified milliseconds for any keyboard 204 | # event. If you press any key in that time, the program continues. 205 | # If 0 is passed, it waits indefinitely for a key stroke. 206 | # (bitwise and with 0xFF to extract least significant byte of 207 | # multi-byte response) 208 | 209 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms) 210 | key = cv2.waitKey(40) & 0xFF 211 | 212 | # It can also be set to detect specific key strokes by recording which 213 | # key is pressed 214 | 215 | # e.g. if user presses "x" then exit / press "f" for fullscreen 216 | # display 217 | 218 | if (key == ord('x')): 219 | keep_processing = False 220 | elif (key == ord('f')): 221 | cv2.setWindowProperty( 222 | window_name2, 223 | cv2.WND_PROP_FULLSCREEN, 224 | cv2.WINDOW_FULLSCREEN) 225 | 226 | # close all windows 227 | 228 | cv2.destroyAllWindows() 229 | 230 | else: 231 | print("No video file specified or camera connected.") 232 | 233 | ##################################################################### 234 | -------------------------------------------------------------------------------- /chromaticity_lightness.py: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | 3 | # Example : perform live chromaticity/lightness display from a video file 4 | # specified on the command line (e.g. python FILE.py video_file) or from an 5 | # attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2018 Toby Breckon, Engineering & Computer Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | ########################################################################## 14 | 15 | import cv2 16 | import argparse 17 | import sys 18 | import math 19 | import numpy as np 20 | 21 | ########################################################################## 22 | 23 | keep_processing = True 24 | 25 | # parse command line arguments for camera ID or video file 26 | 27 | parser = argparse.ArgumentParser( 28 | description='Perform ' + 29 | sys.argv[0] + 30 | ' example operation on incoming camera/video image') 31 | parser.add_argument( 32 | "-c", 33 | "--camera_to_use", 34 | type=int, 35 | help="specify camera to use", 36 | default=0) 37 | parser.add_argument( 38 | "-r", 39 | "--rescale", 40 | type=float, 41 | help="rescale image by this factor", 42 | default=1.0) 43 | parser.add_argument( 44 | "-s", 45 | "--set_resolution", 46 | type=int, 47 | nargs=2, 48 | help='override default camera resolution as H W') 49 | parser.add_argument( 50 | "-fs", 51 | "--fullscreen", 52 | action='store_true', 53 | help="run in full screen mode") 54 | parser.add_argument( 55 | 'video_file', 56 | metavar='video_file', 57 | type=str, 58 | nargs='?', 59 | help='specify optional video file') 60 | args = parser.parse_args() 61 | 62 | ########################################################################## 63 | 64 | # concatenate two RGB/grayscale images horizontally (left to right) 65 | # handling differing channel numbers or image heights in the input 66 | 67 | 68 | def h_concatenate(img1, img2): 69 | 70 | # get size and channels for both images 71 | 72 | height1 = img1.shape[0] 73 | 74 | if (len(img1.shape) == 2): 75 | channels1 = 1 76 | else: 77 | channels1 = img1.shape[2] 78 | 79 | height2 = img2.shape[0] 80 | width2 = img2.shape[1] 81 | if (len(img2.shape) == 2): 82 | channels2 = 1 83 | else: 84 | channels2 = img2.shape[2] 85 | 86 | # make all images 3 channel, or assume all same channel 87 | 88 | if ((channels1 > channels2) and (channels1 == 3)): 89 | out2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR) 90 | out1 = img1 91 | elif ((channels2 > channels1) and (channels2 == 3)): 92 | out1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR) 93 | out2 = img2 94 | else: # both must be equal 95 | out1 = img1 96 | out2 = img2 97 | 98 | # height of first image is master height, width can remain unchanged 99 | 100 | if (height1 != height2): 101 | out2 = cv2.resize(out2, (width2, height1)) 102 | 103 | return np.hstack((out1, out2)) 104 | 105 | ########################################################################## 106 | 107 | # define video capture object 108 | 109 | 110 | try: 111 | # to use a non-buffered camera stream (via a separate thread) 112 | 113 | if not (args.video_file): 114 | import camera_stream 115 | cap = camera_stream.CameraVideoStream() 116 | else: 117 | cap = cv2.VideoCapture() # not needed for video files 118 | 119 | except BaseException: 120 | # if not then just use OpenCV default 121 | 122 | print("INFO: camera_stream class not found - camera input may be buffered") 123 | cap = cv2.VideoCapture() 124 | 125 | # define display window name 126 | 127 | window_name = "Live - [Original RGB | Chromaticity {r,g,b} | Lightness (l)]" 128 | 129 | # if command line arguments are provided try to read video_name 130 | # otherwise default to capture from attached camera 131 | 132 | if (((args.video_file) and (cap.open(str(args.video_file)))) 133 | or (cap.open(args.camera_to_use))): 134 | 135 | # create window by name (as resizable) 136 | 137 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 138 | 139 | # override default camera resolution 140 | 141 | if (args.set_resolution is not None): 142 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1]) 143 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0]) 144 | 145 | while (keep_processing): 146 | 147 | # start a timer (to see how long processing and display takes) 148 | 149 | start_t = cv2.getTickCount() 150 | 151 | # if camera /video file successfully open then read frame 152 | 153 | if (cap.isOpened): 154 | ret, frame = cap.read() 155 | 156 | # when we reach the end of the video (file) exit cleanly 157 | 158 | if (ret == 0): 159 | keep_processing = False 160 | continue 161 | 162 | # rescale if specified 163 | 164 | if (args.rescale != 1.0): 165 | frame = cv2.resize( 166 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 167 | 168 | # compute chromaticity as c = c / SUM(RGB) for c = {R, G, B} with 169 | # safety for divide by zero errors 170 | # chromaticity {r,g,b} range is floating point 0 -> 1 171 | 172 | # N.B. if extracting chromaticity {r,g} from this remember to 173 | # take channels r = 2 and g = 1 due to OpenCV BGR channel ordering 174 | 175 | chromaticity = np.zeros(frame.shape).astype(np.float32) 176 | sum_channel = (frame[:, :, 0].astype(np.float32) 177 | + frame[:, :, 1].astype(np.float32) 178 | + frame[:, :, 2].astype(np.float32) 179 | + 1) 180 | chromaticity[:, :, 0] = (frame[:, :, 0] / sum_channel) 181 | chromaticity[:, :, 1] = (frame[:, :, 1] / sum_channel) 182 | chromaticity[:, :, 2] = (frame[:, :, 2] / sum_channel) 183 | 184 | # compute lightness as an integer = RGB / 3 (range is 0 -> 255) 185 | 186 | lightness = np.floor(sum_channel / 3) 187 | 188 | # display image as a concatenated triple of [ RGB | Chromaticity | 189 | # Lightness ] adjusting back to 8-bit and scaling appropriately 190 | 191 | cv2.imshow( 192 | window_name, 193 | h_concatenate( 194 | h_concatenate( 195 | frame, 196 | (chromaticity * 197 | 255).astype( 198 | np.uint8)), 199 | lightness.astype( 200 | np.uint8))) 201 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, 202 | cv2.WINDOW_FULLSCREEN & args.fullscreen) 203 | 204 | # stop the timer and convert to ms. (to see how long processing and 205 | # display takes) 206 | 207 | stop_t = ((cv2.getTickCount() - start_t) / 208 | cv2.getTickFrequency()) * 1000 209 | 210 | # start the event loop - essential 211 | 212 | # cv2.waitKey() is a keyboard binding function (argument is the time in 213 | # ms). It waits for specified milliseconds for any keyboard event. 214 | # If you press any key in that time, the program continues. 215 | # If 0 is passed, it waits indefinitely for a key stroke. 216 | # (bitwise and with 0xFF to extract least significant byte of 217 | # multi-byte response) 218 | 219 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 220 | # 25 fps = 40 ms) 221 | 222 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 223 | 224 | # It can also be set to detect specific key strokes by recording which 225 | # key is pressed 226 | 227 | # e.g. if user presses "x" then exit / press "f" for fullscreen 228 | # display 229 | 230 | if (key == ord('x')): 231 | keep_processing = False 232 | elif (key == ord('f')): 233 | args.fullscreen = not (args.fullscreen) 234 | 235 | # close all windows 236 | 237 | cv2.destroyAllWindows() 238 | 239 | else: 240 | print("No video file specified or camera connected.") 241 | 242 | ########################################################################## 243 | -------------------------------------------------------------------------------- /cnn_ssd_detection.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : perform live object detectoon using a pre-trained SSD CNN model 4 | # and display from a video file specified on the command line 5 | # (e.g. python FILE.py video_file) or from an attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2017 Department of Computer Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | # based on provided examples at: 14 | # https://github.com/opencv/opencv/tree/master/samples/dnn 15 | # see here for how to load Caffe/TensorFlow/... models etc. 16 | 17 | # implements a version of: 18 | 19 | # MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Apps. 20 | # Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, 21 | # Tobias Weyand, Marco Andreetto, Hartwig Adam 22 | # research paper: https://arxiv.org/abs/1704.04861 23 | 24 | # requires Caffe network model files (.prototxt / .caffemodel) downloaded from: 25 | # https://github.com/chuanqi305/MobileNet-SSD/ 26 | 27 | ##################################################################### 28 | 29 | import cv2 30 | import argparse 31 | import sys 32 | import math 33 | 34 | ##################################################################### 35 | 36 | keep_processing = True 37 | 38 | # parse command line arguments for camera ID or video file 39 | 40 | parser = argparse.ArgumentParser( 41 | description='Perform ' + 42 | sys.argv[0] + 43 | ' example operation on incoming camera/video image') 44 | parser.add_argument( 45 | "-c", 46 | "--camera_to_use", 47 | type=int, 48 | help="specify camera to use", 49 | default=0) 50 | parser.add_argument( 51 | "-r", 52 | "--rescale", 53 | type=float, 54 | help="rescale image by this factor", 55 | default=1.0) 56 | parser.add_argument( 57 | 'video_file', 58 | metavar='video_file', 59 | type=str, 60 | nargs='?', 61 | help='specify optional video file') 62 | args = parser.parse_args() 63 | 64 | cnn_model_to_load = "MobileNetSSD_deploy" 65 | 66 | ##################################################################### 67 | 68 | 69 | def trackbar_callback(pos): 70 | global confidence_threshold 71 | confidence_threshold = pos / 100.0 72 | 73 | ##################################################################### 74 | 75 | # define video capture object 76 | 77 | 78 | try: 79 | # to use a non-buffered camera stream (via a separate thread) 80 | 81 | if not (args.video_file): 82 | import camera_stream 83 | cap = camera_stream.CameraVideoStream() 84 | else: 85 | cap = cv2.VideoCapture() # not needed for video files 86 | 87 | except BaseException: 88 | # if not then just use OpenCV default 89 | 90 | print("INFO: camera_stream class not found - camera input may be buffered") 91 | cap = cv2.VideoCapture() 92 | 93 | # define display window name 94 | 95 | window_name = "Live Object Detection - CNN: " + cnn_model_to_load 96 | 97 | # if command line arguments are provided try to read video_name 98 | # otherwise default to capture from attached camera 99 | 100 | if (((args.video_file) and (cap.open(str(args.video_file)))) 101 | or (cap.open(args.camera_to_use))): 102 | 103 | # create window by name (as resizable) 104 | 105 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 106 | 107 | # add track bar to window for confidence threshold 108 | 109 | confidence_threshold = 0.7 110 | cv2.createTrackbar('Confidence threshold, %', window_name, int( 111 | confidence_threshold * 100), 99, trackbar_callback) 112 | 113 | # init CNN model - here from Caffe, although OpenCV can import from 114 | # mosyt deep learning templates 115 | 116 | net = cv2.dnn.readNetFromCaffe( 117 | cnn_model_to_load + ".prototxt", 118 | cnn_model_to_load + ".caffemodel") 119 | 120 | # provide mappings from class numbers to string labels - these are the 121 | # PASCAL VOC classees 122 | 123 | classNames = {0: 'background', 124 | 1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat', 125 | 5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat', 9: 'chair', 126 | 10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse', 127 | 14: 'motorbike', 15: 'person', 16: 'pottedplant', 128 | 17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor'} 129 | 130 | while (keep_processing): 131 | 132 | # start a timer (to see how long processing and display takes) 133 | 134 | start_t = cv2.getTickCount() 135 | 136 | # if video file successfully open then read frame from video 137 | 138 | if (cap.isOpened): 139 | ret, frame = cap.read() 140 | 141 | # when we reach the end of the video (file) exit cleanly 142 | 143 | if (ret == 0): 144 | keep_processing = False 145 | continue 146 | 147 | # rescale if specified 148 | 149 | if (args.rescale != 1.0): 150 | frame = cv2.resize( 151 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 152 | 153 | # get size of input 154 | 155 | cols = frame.shape[1] 156 | rows = frame.shape[0] 157 | 158 | # transform the image into a network input "blob" (i.e. tensor) 159 | # by scaling the image to the input size of the network, in this case 160 | # not swapping the R and G channels (i.e. used when network trained on 161 | # RGB and not the BGR of OpenCV) and re-scaling the inputs from 0->255 162 | # to 0->1 by specifing the mean value for each channel 163 | 164 | swapRBchannels = False # do not swap channels 165 | crop = False # crop image or not 166 | meanChannelVal = 255.0 / 2.0 # mean channel value 167 | 168 | inWidth = 300 # network input width 169 | inHeight = 300 # network input height 170 | inScaleFactor = 0.007843 # input scale factor 171 | 172 | blob = cv2.dnn.blobFromImage( 173 | frame, 174 | inScaleFactor, 175 | (inWidth, 176 | inHeight), 177 | (meanChannelVal, 178 | meanChannelVal, 179 | meanChannelVal), 180 | swapRBchannels, 181 | crop) 182 | 183 | # set this transformed image -> tensor blob as the network input 184 | 185 | net.setInput(blob) 186 | 187 | # perform forward inference on the network 188 | 189 | detections = net.forward() 190 | 191 | # process the detections from the CNN to give bounding boxes 192 | # i.e. for each detection returned from the network 193 | 194 | for i in range(detections.shape[2]): 195 | 196 | # extract the confidence of the detection 197 | 198 | confidence = detections[0, 0, i, 2] 199 | 200 | # provided that is above a threshold 201 | 202 | if confidence > confidence_threshold: 203 | 204 | # get the class number id and the bounding box 205 | 206 | class_id = int(detections[0, 0, i, 1]) 207 | 208 | xLeftBottom = int(detections[0, 0, i, 3] * cols) 209 | yLeftBottom = int(detections[0, 0, i, 4] * rows) 210 | xRightTop = int(detections[0, 0, i, 5] * cols) 211 | yRightTop = int(detections[0, 0, i, 6] * rows) 212 | 213 | # draw the bounding box on the frame 214 | 215 | cv2.rectangle(frame, (xLeftBottom, yLeftBottom), 216 | (xRightTop, yRightTop), (0, 255, 0)) 217 | 218 | # look up the class name based on the class id and draw it on 219 | # the frame also 220 | 221 | if class_id in classNames: 222 | label = classNames[class_id] + (": %.2f" % confidence) 223 | labelSize, baseLine = cv2.getTextSize( 224 | label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) 225 | 226 | yLeftBottom = max(yLeftBottom, labelSize[1]) 227 | cv2.rectangle( 228 | frame, ( 229 | xLeftBottom, yLeftBottom - 230 | labelSize[1] 231 | ), ( 232 | xLeftBottom + labelSize[0], 233 | yLeftBottom + baseLine 234 | ), (255, 255, 255), 235 | cv2.FILLED 236 | ) 237 | cv2.putText(frame, label, (xLeftBottom, yLeftBottom), 238 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) 239 | 240 | # Display efficiency information - the function getPerfProfile returns 241 | # the overall time for inference from the network 242 | 243 | t, _ = net.getPerfProfile() 244 | inference_t = (t * 1000.0 / cv2.getTickFrequency()) 245 | label = ('Inference time: %.2f ms' % inference_t) + \ 246 | (' (Framerate: %.2f fps' % (1000 / inference_t)) + ')' 247 | cv2.putText(frame, label, (0, 15), 248 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) 249 | 250 | # display image 251 | 252 | cv2.imshow(window_name, frame) 253 | 254 | # stop the timer and convert to ms. (to see how long processing and 255 | # display takes) 256 | 257 | stop_t = ((cv2.getTickCount() - start_t) / 258 | cv2.getTickFrequency()) * 1000 259 | 260 | # start the event loop - essential 261 | 262 | # cv2.waitKey() is a keyboard binding function (argument is the time in 263 | # ms). It waits for specified milliseconds for any keyboard event. 264 | # If you press any key in that time, the program continues. 265 | # If 0 is passed, it waits indefinitely for a key stroke. 266 | # (bitwise and with 0xFF to extract least significant byte of 267 | # multi-byte response) 268 | 269 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 270 | # 25 fps = 40 ms) 271 | 272 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 273 | 274 | # It can also be set to detect specific key strokes by recording which 275 | # key is pressed 276 | 277 | # e.g. if user presses "x" then exit / press "f" for fullscreen 278 | 279 | if (key == ord('x')): 280 | keep_processing = False 281 | elif (key == ord('f')): 282 | cv2.setWindowProperty( 283 | window_name, 284 | cv2.WND_PROP_FULLSCREEN, 285 | cv2.WINDOW_FULLSCREEN) 286 | 287 | # close all windows 288 | 289 | cv2.destroyAllWindows() 290 | 291 | else: 292 | print("No video file specified or camera connected.") 293 | 294 | ##################################################################### 295 | -------------------------------------------------------------------------------- /contour_edges.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : contour edges for a video file 4 | # specified on the command line (e.g. python FILE.py video_file) or from an 5 | # attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2016 School of Engineering & Computing Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | ##################################################################### 14 | 15 | import cv2 16 | import argparse 17 | import sys 18 | 19 | ##################################################################### 20 | 21 | keep_processing = True 22 | 23 | # parse command line arguments for camera ID or video file 24 | 25 | parser = argparse.ArgumentParser( 26 | description='Perform ' + 27 | sys.argv[0] + 28 | ' example operation on incoming camera/video image') 29 | parser.add_argument( 30 | "-c", 31 | "--camera_to_use", 32 | type=int, 33 | help="specify camera to use", 34 | default=0) 35 | parser.add_argument( 36 | "-r", 37 | "--rescale", 38 | type=float, 39 | help="rescale image by this factor", 40 | default=1.0) 41 | parser.add_argument( 42 | "-s", 43 | "--set_resolution", 44 | type=int, 45 | nargs=2, 46 | help='override default camera resolution as H W') 47 | parser.add_argument( 48 | 'video_file', 49 | metavar='video_file', 50 | type=str, 51 | nargs='?', 52 | help='specify optional video file') 53 | args = parser.parse_args() 54 | 55 | ##################################################################### 56 | 57 | # this function is called as a call-back everytime the trackbar is moved 58 | # (here we just do nothing) 59 | 60 | 61 | def nothing(x): 62 | pass 63 | 64 | ##################################################################### 65 | 66 | # define video capture object 67 | 68 | 69 | try: 70 | # to use a non-buffered camera stream (via a separate thread) 71 | 72 | if not (args.video_file): 73 | import camera_stream 74 | cap = camera_stream.CameraVideoStream(use_tapi=True) 75 | else: 76 | cap = cv2.VideoCapture() # not needed for video files 77 | 78 | except BaseException: 79 | # if not then just use OpenCV default 80 | 81 | print("INFO: camera_stream class not found - camera input may be buffered") 82 | cap = cv2.VideoCapture() 83 | 84 | # define display window name 85 | 86 | window_name = "Largest Area Contour" # window name 87 | window_name2 = "All Contours" # window name 88 | 89 | # if command line arguments are provided try to read video_name 90 | # otherwise default to capture from attached H/W camera 91 | 92 | if (((args.video_file) and (cap.open(str(args.video_file)))) 93 | or (cap.open(args.camera_to_use))): 94 | 95 | # create window by name (as resizable) 96 | 97 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 98 | cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL) 99 | 100 | # add some track bar controllers for settings 101 | 102 | lower_threshold = 112 103 | cv2.createTrackbar("lower", window_name2, lower_threshold, 255, nothing) 104 | upper_threshold = 170 105 | cv2.createTrackbar("upper", window_name2, upper_threshold, 255, nothing) 106 | smoothing_neighbourhood = 3 107 | cv2.createTrackbar( 108 | "smoothing", 109 | window_name2, 110 | smoothing_neighbourhood, 111 | 15, 112 | nothing) 113 | sobel_size = 3 # greater than 7 seems to crash 114 | cv2.createTrackbar("sobel size", window_name2, sobel_size, 7, nothing) 115 | 116 | # override default camera resolution 117 | 118 | if (args.set_resolution is not None): 119 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1]) 120 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0]) 121 | 122 | print("INFO: input resolution : (", 123 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x", 124 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")") 125 | 126 | while (keep_processing): 127 | 128 | # if video file successfully open then read frame from video 129 | 130 | if (cap.isOpened): 131 | ret, frame = cap.read() 132 | 133 | # when we reach the end of the video (file) exit cleanly 134 | 135 | if (ret == 0): 136 | keep_processing = False 137 | continue 138 | 139 | # rescale if specified 140 | 141 | if (args.rescale != 1.0): 142 | frame = cv2.resize( 143 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 144 | 145 | # get parameters from track bars 146 | 147 | lower_threshold = cv2.getTrackbarPos("lower", window_name2) 148 | upper_threshold = cv2.getTrackbarPos("upper", window_name2) 149 | smoothing_neighbourhood = cv2.getTrackbarPos("smoothing", window_name2) 150 | sobel_size = cv2.getTrackbarPos("sobel size", window_name2) 151 | 152 | # check neighbourhood is greater than 3 and odd 153 | 154 | smoothing_neighbourhood = max(3, smoothing_neighbourhood) 155 | if not (smoothing_neighbourhood % 2): 156 | smoothing_neighbourhood = smoothing_neighbourhood + 1 157 | 158 | sobel_size = max(3, sobel_size) 159 | if not (sobel_size % 2): 160 | sobel_size = sobel_size + 1 161 | 162 | # convert to grayscale 163 | 164 | gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 165 | 166 | # performing smoothing on the image using a 5x5 smoothing mark (see 167 | # manual entry for GaussianBlur()) 168 | 169 | smoothed = cv2.GaussianBlur( 170 | gray_frame, (smoothing_neighbourhood, smoothing_neighbourhood), 0) 171 | 172 | # perform canny edge detection 173 | 174 | canny = cv2.Canny( 175 | smoothed, 176 | lower_threshold, 177 | upper_threshold, 178 | apertureSize=sobel_size) 179 | 180 | # convert the canny edges into contours (check OpenCV version >= 4.x) 181 | 182 | if (int(cv2.__version__.split(".")[0]) >= 4): 183 | contours, hierarchy = cv2.findContours( 184 | canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 185 | else: 186 | _, contours, hierarchy = cv2.findContours( 187 | canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 188 | 189 | # find largest contour by area 190 | 191 | max_contour_area = -1 192 | for cnt in contours: 193 | area = cv2.contourArea(cnt) 194 | if (area > max_contour_area): 195 | max_contour_area = area 196 | largest_contour = cnt 197 | 198 | # draw contours (one display for all of them, one for the largest only) 199 | 200 | # make 3 channel to draw on it in colour 201 | gray_frame = cv2.cvtColor(gray_frame, cv2.COLOR_GRAY2BGR) 202 | cv2.drawContours(gray_frame, contours, -1, (0, 255, 0), 3) # in green 203 | cv2.drawContours(frame, [largest_contour], 0, (0, 0, 255), 3) # in red 204 | 205 | # display image 206 | 207 | cv2.imshow(window_name, frame) 208 | cv2.imshow(window_name2, gray_frame) 209 | 210 | # start the event loop - essential 211 | 212 | # cv2.waitKey() is a keyboard binding function (argument is the time in 213 | # ms). It waits for specified milliseconds for any keyboard event. 214 | # If you press any key in that time, the program continues. 215 | # If 0 is passed, it waits indefinitely for a key stroke. 216 | # (bitwise and with 0xFF to extract least significant byte of 217 | # multi-byte response) 218 | 219 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms) 220 | key = cv2.waitKey(40) & 0xFF 221 | 222 | # It can also be set to detect specific key strokes by recording which 223 | # key is pressed 224 | 225 | # e.g. if user presses "x" then exit / press "f" for fullscreen 226 | # display 227 | 228 | if (key == ord('x')): 229 | keep_processing = False 230 | elif (key == ord('f')): 231 | cv2.setWindowProperty( 232 | window_name, 233 | cv2.WND_PROP_FULLSCREEN, 234 | cv2.WINDOW_FULLSCREEN) 235 | 236 | # close all windows 237 | 238 | cv2.destroyAllWindows() 239 | 240 | else: 241 | print("No video file specified or camera connected.") 242 | 243 | ##################################################################### 244 | -------------------------------------------------------------------------------- /cycleimages.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : load and display a set of images from a directory 4 | # basic illustrative python script 5 | 6 | # For use with provided test / training datasets 7 | 8 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 9 | 10 | # Copyright (c) 2015 / 2016 School of Engineering & Computing Science, 11 | # Durham University, UK 12 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 13 | 14 | ##################################################################### 15 | 16 | import cv2 17 | import os 18 | 19 | directory_to_cycle = "path-to-directory-to-cycle" # edit this 20 | 21 | ##################################################################### 22 | 23 | # display all images in directory (sorted by filename) 24 | 25 | for filename in sorted(os.listdir(directory_to_cycle)): 26 | 27 | # if it is a PNG file 28 | 29 | if '.png' in filename: 30 | print(os.path.join(directory_to_cycle, filename)) 31 | 32 | # read it and display in a window 33 | 34 | img = cv2.imread( 35 | os.path.join( 36 | directory_to_cycle, 37 | filename), 38 | cv2.IMREAD_COLOR) 39 | cv2.imshow('the image', img) 40 | key = cv2.waitKey(200) # wait 200ms 41 | if (key == ord('x')): 42 | break 43 | 44 | 45 | # close all windows 46 | 47 | cv2.destroyAllWindows() 48 | 49 | ##################################################################### 50 | -------------------------------------------------------------------------------- /download-models.sh: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | 3 | # multi model file downloader - (c) 2021 Toby Breckon, Durham University, UK 4 | 5 | ################################################################################ 6 | 7 | # models and associated files for automated download 8 | 9 | MODELS=( https://data.pjreddie.com/files/yolov3.weights 10 | https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3.cfg 11 | https://raw.githubusercontent.com/AlexeyAB/darknet/master/data/coco.names 12 | https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt 13 | https://github.com/forresti/SqueezeNet/raw/master/SqueezeNet_v1.1/squeezenet_v1.1.caffemodel 14 | https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/squeezenet_v1.1.prototxt 15 | https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_coco.txt 16 | https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt 17 | http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz 18 | https://github.com/foss-for-synopsys-dwc-arc-processors/synopsys-caffe-models/raw/master/caffe_models/openpose/caffe_model/pose_iter_440000.caffemodel 19 | https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/openpose/master/models/pose/coco/pose_deploy_linevec.prototxt 20 | https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml 21 | https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_eye.xml 22 | https://raw.githubusercontent.com/opencv/opencv/master/data/lbpcascades/lbpcascade_frontalface_improved.xml 23 | http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz 24 | https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt 25 | http://dl.caffe.berkeleyvision.org/fcn8s-heavy-pascal.caffemodel 26 | https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/fcn8s-heavy-pascal.prototxt 27 | https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_pascal_voc.txt 28 | https://raw.githubusercontent.com/PINTO0309/MobileNet-SSD-RealSense/master/caffemodel/MobileNetSSD/MobileNetSSD_deploy.caffemodel 29 | https://raw.githubusercontent.com/chuanqi305/MobileNet-SSD/master/voc/MobileNetSSD_deploy.prototxt 30 | ) 31 | 32 | # associated MD5 checksums (output of md5sum filename) 33 | 34 | MD5SUMS=( "4fdfb6d202e9d8e65da14c78b604af95 classification_classes_ILSVRC2012.txt" 35 | "8fc50561361f8bcf96b0177086e7616c coco.names" 36 | "81d7d9cb3438456214afcdb5c83e7bfb object_detection_classes_coco.txt" 37 | "c9e6e28e5b84b7b49c436f929b58db91 pose_deploy_linevec.prototxt" 38 | "5156d31f670511fce9b4e28b403f2939 pose_iter_440000.caffemodel" 39 | "0357e4e11d173c72a01615888826bc8e squeezenet_v1.1.caffemodel" 40 | "dfe9c8d69b154f0ebbba87bc32371e2d squeezenet_v1.1.prototxt" 41 | "5d442b0e550e6c640068e7e15e498599 yolov3.cfg" 42 | "c84e5b99d0e52cd466ae710cadf6d84c yolov3.weights" 43 | "1f1902262c16c2d9acb9bc4f8a8c266f faster_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb" 44 | "2d6fac0caaec1f9558872755ff34818d haarcascade_eye.xml" 45 | "a03f92a797e309e76e6a034ab9e02616 haarcascade_frontalface_default.xml" 46 | "acee557d79a3684cac72ebd811a4eee0 lbpcascade_frontalface_improved.xml" 47 | "5708e4e579d8e4eabeec6c555d4234b2 mask_rcnn_inception_v2_coco_2018_01_28.pbtxt" 48 | "b47e443b313a709e4c39c1caeaa3ecb3 mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb" 49 | "c03b2953ebd846c270da1a8e8f200c09 fcn8s-heavy-pascal.caffemodel" 50 | "532698b83c2e8fa5a010bd996d19d30a fcn8s-heavy-pascal.prototxt" 51 | "5ae5d62183cfb6f6d3ac109359d06a1b object_detection_classes_pascal_voc.txt" 52 | "8bed6fa43361685f4c78f1c084be7775 MobileNetSSD_deploy.caffemodel" 53 | "aa2a13fe1fba2c3b7e067067a6749e7e MobileNetSSD_deploy.prototxt" 54 | 55 | ) 56 | 57 | ################################################################################ 58 | 59 | DIR_LOCAL_TARGET=/tmp/python-examples-cv-models 60 | PWD_SCRIPT=`pwd` 61 | 62 | ################################################################################ 63 | 64 | # Preset this script to fail on error 65 | 66 | set -e 67 | 68 | # check for required commands to download and md5 check 69 | 70 | (command -v curl | grep curl > /dev/null) || 71 | (echo "Error: curl command not found, cannot download.") 72 | 73 | (command -v md5sum | grep md5sum > /dev/null) || 74 | (echo "Error: md5sum command not found, cannot verify files.") 75 | 76 | 77 | ################################################################################ 78 | 79 | # Download - perform download of each model 80 | 81 | mkdir -p $DIR_LOCAL_TARGET 82 | cd $DIR_LOCAL_TARGET 83 | 84 | for URL in ${MODELS[@]}; do 85 | echo 86 | echo "Downloading ... " $URL " -> " $DIR_LOCAL_TARGET/ 87 | curl -L -k -O --remote-name $URL 88 | done 89 | 90 | # un-tar/gz any models that need this 91 | 92 | for GZT in `ls *tar.gz`; do 93 | tar -xzf $GZT 94 | rm $GZT 95 | done 96 | 97 | cd $PWD_SCRIPT 98 | 99 | ################################################################################ 100 | 101 | # Post Download - check md5sum 102 | 103 | cd $DIR_LOCAL_TARGET 104 | echo 105 | echo "Performing MD5 file verification checks ..." 106 | printf '%s\n' "${MD5SUMS[@]}" > md5sums.txt 107 | md5sum -c md5sums.txt 108 | rm -f md5sums.txt 109 | 110 | # Post Download - link all files to current directory 111 | 112 | cd $PWD_SCRIPT 113 | echo 114 | echo "Linking files to current directory ..." 115 | ln -sv $DIR_LOCAL_TARGET/* . 116 | 117 | ################################################################################ 118 | -------------------------------------------------------------------------------- /fcn_segmentation.py: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | 3 | # Example : perform FCN semantic image segmentation from a video file 4 | # specified on the command line (e.g. python FILE.py video_file) or from an 5 | # attached web camera (FCN segmentation: Long et al, CVPR 2015) 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # This code: significant portions based on the example available at: 10 | # https://github.com/opencv/opencv/blob/master/samples/dnn/segmentation.py 11 | 12 | 13 | # Copyright (c) 2021 Toby Breckon, Dept. Computer Science, 14 | # Durham University, UK 15 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 16 | 17 | ########################################################################## 18 | 19 | # To use download the following files: 20 | 21 | # http://dl.caffe.berkeleyvision.org/fcn8s-heavy-pascal.caffemodel 22 | # https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/fcn8s-heavy-pascal.prototxt 23 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_pascal_voc.txt 24 | 25 | ########################################################################## 26 | 27 | import cv2 28 | import argparse 29 | import sys 30 | import math 31 | import numpy as np 32 | 33 | ########################################################################## 34 | 35 | keep_processing = True 36 | colors = None 37 | 38 | ########################################################################## 39 | 40 | # generate and display colour legend for segmentation classes 41 | 42 | 43 | def generate_legend(classes, height): 44 | blockHeight = math.floor(height/len(classes)) 45 | 46 | legend = np.zeros((blockHeight * len(colors), 200, 3), np.uint8) 47 | for i in range(len(classes)): 48 | block = legend[i * blockHeight:(i + 1) * blockHeight] 49 | block[:, :] = colors[i] 50 | cv2.putText(block, classes[i], 51 | (0, blockHeight//2), 52 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) 53 | 54 | return legend 55 | 56 | ########################################################################## 57 | 58 | # concatenate two RGB/grayscale images horizontally (left to right) 59 | # handling differing channel numbers or image heights in the input 60 | 61 | 62 | def h_concatenate(img1, img2): 63 | 64 | # get size and channels for both images 65 | 66 | height1 = img1.shape[0] 67 | 68 | if (len(img1.shape) == 2): 69 | channels1 = 1 70 | else: 71 | channels1 = img1.shape[2] 72 | 73 | height2 = img2.shape[0] 74 | width2 = img2.shape[1] 75 | if (len(img2.shape) == 2): 76 | channels2 = 1 77 | else: 78 | channels2 = img2.shape[2] 79 | 80 | # make all images 3 channel, or assume all same channel 81 | 82 | if ((channels1 > channels2) and (channels1 == 3)): 83 | out2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR) 84 | out1 = img1 85 | elif ((channels2 > channels1) and (channels2 == 3)): 86 | out1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR) 87 | out2 = img2 88 | else: # both must be equal 89 | out1 = img1 90 | out2 = img2 91 | 92 | # height of first image is master height, width can remain unchanged 93 | 94 | if (height1 != height2): 95 | out2 = cv2.resize(out2, (width2, height1)) 96 | 97 | return np.hstack((out1, out2)) 98 | 99 | 100 | ########################################################################## 101 | 102 | # parse command line arguments for camera ID or video file 103 | 104 | parser = argparse.ArgumentParser( 105 | description='Perform ' + 106 | sys.argv[0] + 107 | ' example operation on incoming camera/video image') 108 | parser.add_argument( 109 | "-c", 110 | "--camera_to_use", 111 | type=int, 112 | help="specify camera to use", 113 | default=0) 114 | parser.add_argument( 115 | "-r", 116 | "--rescale", 117 | type=float, 118 | help="rescale image by this factor", 119 | default=1.0) 120 | parser.add_argument( 121 | "-fs", 122 | "--fullscreen", 123 | action='store_true', 124 | help="run in full screen mode") 125 | parser.add_argument( 126 | "-use", 127 | "--target", 128 | type=str, 129 | choices=['cpu', 'gpu', 'opencl'], 130 | help="select computational backend", 131 | default='gpu') 132 | parser.add_argument( 133 | 'video_file', 134 | metavar='video_file', 135 | type=str, 136 | nargs='?', 137 | help='specify optional video file') 138 | args = parser.parse_args() 139 | 140 | ########################################################################## 141 | 142 | # define video capture object 143 | 144 | try: 145 | # to use a non-buffered camera stream (via a separate thread) 146 | 147 | if not (args.video_file): 148 | import camera_stream 149 | cap = camera_stream.CameraVideoStream() 150 | else: 151 | cap = cv2.VideoCapture() # not needed for video files 152 | 153 | except BaseException: 154 | # if not then just use OpenCV default 155 | 156 | print("INFO: camera_stream class not found - camera input may be buffered") 157 | cap = cv2.VideoCapture() 158 | 159 | # define display window name 160 | 161 | window_name = "FCN Semantic Image Segmentation" # window name 162 | 163 | ########################################################################## 164 | 165 | # Load names of class labels (background = class 0, for PASCAL VOC) 166 | 167 | classes = None 168 | with open("object_detection_classes_pascal_voc.txt", 'rt') as f: 169 | classes = f.read().rstrip('\n').split('\n') 170 | classes.insert(0, "background") # insery a background class as 0 171 | 172 | ########################################################################## 173 | 174 | # Load CNN model 175 | 176 | net = cv2.dnn.readNet( 177 | "fcn8s-heavy-pascal.caffemodel", 178 | "fcn8s-heavy-pascal.prototxt", 179 | 'caffe') 180 | 181 | # set up compute target as one of [GPU, OpenCL, CPU] 182 | 183 | if (args.target == 'gpu'): 184 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) 185 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) 186 | elif (args.target == 'opencl'): 187 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT) 188 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL) 189 | else: 190 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT) 191 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) 192 | 193 | ########################################################################## 194 | 195 | # if command line arguments are provided try to read video_name 196 | # otherwise default to capture from attached camera 197 | 198 | if (((args.video_file) and (cap.open(str(args.video_file)))) 199 | or (cap.open(args.camera_to_use))): 200 | 201 | # create window by name (as resizable) 202 | 203 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 204 | 205 | while (keep_processing): 206 | 207 | # start a timer (to see how long processing and display takes) 208 | 209 | start_t = cv2.getTickCount() 210 | 211 | # if camera /video file successfully open then read frame 212 | 213 | if (cap.isOpened): 214 | ret, frame = cap.read() 215 | 216 | # when we reach the end of the video (file) exit cleanly 217 | 218 | if (ret == 0): 219 | keep_processing = False 220 | continue 221 | 222 | # rescale if specified 223 | 224 | if (args.rescale != 1.0): 225 | frame = cv2.resize( 226 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 227 | 228 | frameHeight = frame.shape[0] 229 | frameWidth = frame.shape[1] 230 | 231 | ####################################################################### 232 | # FCN Segmentation: 233 | # model: "fcn8s-heavy-pascal.caffemodel" 234 | # config: "fcn8s-heavy-pascal.prototxt" 235 | # mean: [0, 0, 0] 236 | # scale: 1.0 237 | # width: 500 238 | # height: 500 239 | # rgb: false 240 | # 241 | # classes: object_detection_classes_pascal_voc.txt 242 | ####################################################################### 243 | 244 | # create a 4D tensor "blob" from a frame. 245 | 246 | blob = cv2.dnn.blobFromImage( 247 | frame, scalefactor=1.0, 248 | size=(500, 500), mean=[0, 0, 0], 249 | swapRB=False, crop=False 250 | ) 251 | 252 | # Run forward inference on the model 253 | 254 | net.setInput(blob) 255 | result = net.forward() 256 | 257 | numClasses = result.shape[1] 258 | height = result.shape[2] 259 | width = result.shape[3] 260 | 261 | # define colours 262 | 263 | if not colors: 264 | np.random.seed(888) 265 | colors = [np.array([0, 0, 0], np.uint8)] 266 | for i in range(1, numClasses + 1): 267 | colors.append((colors[i - 1] + 268 | np.random.randint(0, 256, [3], 269 | np.uint8)) / 2 270 | ) 271 | del colors[0] 272 | 273 | # generate legend 274 | legend = generate_legend(classes, frameHeight) 275 | 276 | # display segmentation 277 | 278 | classIds = np.argmax(result[0], axis=0) 279 | segm = np.stack([colors[idx] for idx in classIds.flatten()]) 280 | segm = segm.reshape(height, width, 3) 281 | 282 | segm = cv2.resize(segm, (frameWidth, frameHeight), 283 | interpolation=cv2.INTER_NEAREST) 284 | 285 | # stop the timer and convert to ms. (to see how long processing and 286 | # display takes) 287 | 288 | stop_t = ((cv2.getTickCount() - start_t) / 289 | cv2.getTickFrequency()) * 1000 290 | 291 | # Display efficiency information 292 | 293 | label = ('Inference time: %.2f ms' % stop_t) + \ 294 | (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')' 295 | cv2.putText(frame, label, (0, 15), 296 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) 297 | 298 | # display image(s) as concatenated single image 299 | 300 | cv2.imshow(window_name, 301 | h_concatenate(h_concatenate(frame, segm.astype(np.uint8)), 302 | legend)) 303 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, 304 | cv2.WINDOW_FULLSCREEN & args.fullscreen) 305 | 306 | # start the event loop - essential 307 | 308 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 309 | # 25 fps = 40 ms) 310 | 311 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 312 | 313 | # if user presses "x" then exit / press "f" for fullscreen display 314 | 315 | if (key == ord('x')): 316 | keep_processing = False 317 | elif (key == ord('f')): 318 | args.fullscreen = not (args.fullscreen) 319 | 320 | # close all windows 321 | 322 | cv2.destroyAllWindows() 323 | 324 | else: 325 | print("No video file specified or camera connected.") 326 | 327 | ########################################################################## 328 | -------------------------------------------------------------------------------- /gaussian.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : gaussian smoothing for a a video file specified on the 4 | # command line (e.g. python FILE.py video_file) or from an 5 | # attached web camera with selectable opencl acceleration 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2021 Dept Computer Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | ##################################################################### 14 | 15 | import cv2 16 | import argparse 17 | import sys 18 | import math 19 | 20 | ##################################################################### 21 | 22 | keep_processing = True 23 | 24 | # parse command line arguments for camera ID or video file 25 | 26 | parser = argparse.ArgumentParser( 27 | description='Perform ' + 28 | sys.argv[0] + 29 | ' example operation on incoming camera/video image') 30 | parser.add_argument( 31 | "-c", 32 | "--camera_to_use", 33 | type=int, 34 | help="specify camera to use", 35 | default=0) 36 | parser.add_argument( 37 | "-r", 38 | "--rescale", 39 | type=float, 40 | help="rescale image by this factor", 41 | default=1.0) 42 | parser.add_argument( 43 | "-s", 44 | "--set_resolution", 45 | type=int, 46 | nargs=2, 47 | help='override default camera resolution as H W') 48 | parser.add_argument( 49 | "-ocl", 50 | "--opencl", 51 | action='store_true', 52 | help="enable opencl hardware acceleration") 53 | parser.add_argument( 54 | 'video_file', 55 | metavar='video_file', 56 | type=str, 57 | nargs='?', 58 | help='specify optional video file') 59 | 60 | args = parser.parse_args() 61 | 62 | ##################################################################### 63 | 64 | # this function is called as a call-back everytime the trackbar is moved 65 | # (here we just do nothing) 66 | 67 | 68 | def nothing(x): 69 | pass 70 | 71 | 72 | ##################################################################### 73 | 74 | # define video capture object 75 | 76 | try: 77 | # to use a non-buffered camera stream (via a separate thread) 78 | 79 | if not (args.video_file): 80 | import camera_stream 81 | cap = camera_stream.CameraVideoStream(use_tapi=args.opencl) 82 | else: 83 | cap = cv2.VideoCapture() # not needed for video files 84 | 85 | except BaseException: 86 | # if not then just use OpenCV default 87 | 88 | print("INFO: camera_stream class not found - camera input may be buffered") 89 | cap = cv2.VideoCapture() 90 | 91 | # define display window name 92 | 93 | window_name = "Live Camera Input" # window name 94 | window_name2 = "Gaussian Smoothing" # window name 95 | 96 | # setup OpenCL if specified on command line only 97 | 98 | cv2.ocl.setUseOpenCL(args.opencl) 99 | 100 | # if command line arguments are provided try to read video_name 101 | # otherwise default to capture from attached H/W camera 102 | 103 | if (((args.video_file) and (cap.open(str(args.video_file)))) 104 | or (cap.open(args.camera_to_use))): 105 | 106 | # create window by name (as resizable) 107 | 108 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 109 | cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL) 110 | 111 | # add some track bar controllers for settings 112 | 113 | smoothing_neighbourhood = 3 114 | cv2.createTrackbar( 115 | "kernel size", 116 | window_name2, 117 | smoothing_neighbourhood, 118 | 250, 119 | nothing) 120 | 121 | # override default camera resolution 122 | 123 | if (args.set_resolution is not None): 124 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1]) 125 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0]) 126 | 127 | print("INFO: input resolution : (", 128 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x", 129 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")") 130 | 131 | while (keep_processing): 132 | 133 | # start a timer (to see how long processing and display takes) 134 | 135 | start_t = cv2.getTickCount() 136 | 137 | # if video file successfully open then read frame from video 138 | 139 | if (cap.isOpened): 140 | ret, frame = cap.read() # rescale if specified 141 | 142 | # when we reach the end of the video (file) exit cleanly 143 | 144 | if (ret == 0): 145 | keep_processing = False 146 | continue 147 | 148 | # rescale if specified 149 | 150 | if (args.rescale != 1.0): 151 | frame = cv2.resize( 152 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 153 | 154 | # get parameters from track bars 155 | 156 | smoothing_neighbourhood = cv2.getTrackbarPos("kernel size", 157 | window_name2) 158 | 159 | # check neighbourhood is greater than 3 and odd 160 | 161 | smoothing_neighbourhood = max(3, smoothing_neighbourhood) 162 | if not (smoothing_neighbourhood % 2): 163 | smoothing_neighbourhood = smoothing_neighbourhood + 1 164 | 165 | # performing smoothing on the image using a 5x5 smoothing mark (see 166 | # manual entry for GaussianBlur()) 167 | 168 | smoothed = cv2.GaussianBlur(frame, (smoothing_neighbourhood, 169 | smoothing_neighbourhood), 0) 170 | 171 | # stop the timer and convert to ms. (to see how long processing and 172 | # display takes) 173 | 174 | stop_t = ((cv2.getTickCount() - start_t) / 175 | cv2.getTickFrequency()) * 1000 176 | 177 | label = ('Processing time: %.2f ms' % stop_t) + \ 178 | (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')' 179 | cv2.putText(smoothed, label, (0, 15), 180 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) 181 | 182 | # display image 183 | 184 | cv2.imshow(window_name, frame) 185 | cv2.imshow(window_name2, smoothed) 186 | 187 | # start the event loop - essential 188 | 189 | # cv2.waitKey() is a keyboard binding function (argument is the time in 190 | # milliseconds). It waits for specified milliseconds for any keyboard 191 | # event. If you press any key in that time, the program continues. 192 | # If 0 is passed, it waits indefinitely for a key stroke. 193 | # (bitwise and with 0xFF to extract least significant byte of 194 | # multi-byte response) 195 | 196 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 197 | # 25 fps = 40 ms) 198 | 199 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 200 | 201 | # It can also be set to detect specific key strokes by recording which 202 | # key is pressed 203 | 204 | # e.g. if user presses "x" then exit / press "f" for fullscreen 205 | # display 206 | 207 | if (key == ord('x')): 208 | keep_processing = False 209 | elif (key == ord('f')): 210 | cv2.setWindowProperty( 211 | window_name2, 212 | cv2.WND_PROP_FULLSCREEN, 213 | cv2.WINDOW_FULLSCREEN) 214 | 215 | # close all windows 216 | 217 | cv2.destroyAllWindows() 218 | 219 | else: 220 | print("No video file specified or camera connected.") 221 | 222 | ##################################################################### 223 | -------------------------------------------------------------------------------- /generic_interface.py: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | 3 | # Example : perform generic live display from a video file 4 | # specified on the command line (e.g. python FILE.py video_file) or from an 5 | # attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2015 - 2018 Toby Breckon, Engineering & Computing Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | ########################################################################## 14 | 15 | import cv2 16 | import argparse 17 | import sys 18 | import math 19 | 20 | ########################################################################## 21 | 22 | keep_processing = True 23 | 24 | # parse command line arguments for camera ID or video file 25 | 26 | parser = argparse.ArgumentParser( 27 | description='Perform ' + 28 | sys.argv[0] + 29 | ' example operation on incoming camera/video image') 30 | parser.add_argument( 31 | "-c", 32 | "--camera_to_use", 33 | type=int, 34 | help="specify camera to use", 35 | default=0) 36 | parser.add_argument( 37 | "-r", 38 | "--rescale", 39 | type=float, 40 | help="rescale image by this factor", 41 | default=1.0) 42 | parser.add_argument( 43 | "-fs", 44 | "--fullscreen", 45 | action='store_true', 46 | help="run in full screen mode") 47 | parser.add_argument( 48 | 'video_file', 49 | metavar='video_file', 50 | type=str, 51 | nargs='?', 52 | help='specify optional video file') 53 | args = parser.parse_args() 54 | 55 | ########################################################################## 56 | 57 | # define video capture object 58 | 59 | try: 60 | # to use a non-buffered camera stream (via a separate thread) 61 | 62 | if not (args.video_file): 63 | import camera_stream 64 | cap = camera_stream.CameraVideoStream() 65 | else: 66 | cap = cv2.VideoCapture() # not needed for video files 67 | 68 | except BaseException: 69 | # if not then just use OpenCV default 70 | 71 | print("INFO: camera_stream class not found - camera input may be buffered") 72 | cap = cv2.VideoCapture() 73 | 74 | # define display window name 75 | 76 | window_name = "Live Camera Input" # window name 77 | 78 | # if command line arguments are provided try to read video_name 79 | # otherwise default to capture from attached camera 80 | 81 | if (((args.video_file) and (cap.open(str(args.video_file)))) 82 | or (cap.open(args.camera_to_use))): 83 | 84 | # create window by name (as resizable) 85 | 86 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 87 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, 88 | cv2.WINDOW_FULLSCREEN & args.fullscreen) 89 | 90 | while (keep_processing): 91 | 92 | # start a timer (to see how long processing and display takes) 93 | 94 | start_t = cv2.getTickCount() 95 | 96 | # if camera /video file successfully open then read frame 97 | 98 | if (cap.isOpened): 99 | ret, frame = cap.read() 100 | 101 | # when we reach the end of the video (file) exit cleanly 102 | 103 | if (ret == 0): 104 | keep_processing = False 105 | continue 106 | 107 | # rescale if specified 108 | 109 | if (args.rescale != 1.0): 110 | frame = cv2.resize( 111 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 112 | 113 | # *** 114 | # *** do any processing here **** 115 | # *** 116 | 117 | # display image 118 | 119 | cv2.imshow(window_name, frame) 120 | 121 | # stop the timer and convert to ms. (to see how long processing and 122 | # display takes) 123 | 124 | stop_t = ((cv2.getTickCount() - start_t) / 125 | cv2.getTickFrequency()) * 1000 126 | 127 | # start the event loop - essential 128 | 129 | # cv2.waitKey() is a keyboard binding function (argument is the time in 130 | # milliseconds). It waits for specified milliseconds for any keyboard 131 | # event. If you press any key in that time, the program continues. 132 | # If 0 is passed, it waits indefinitely for a key stroke. 133 | # (bitwise and with 0xFF to extract least significant byte of 134 | # multi-byte response) 135 | 136 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 137 | # 25 fps = 40 ms) 138 | 139 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 140 | 141 | # It can also be set to detect specific key strokes by recording which 142 | # key is pressed 143 | 144 | # e.g. if user presses "x" then exit / press "f" for fullscreen 145 | # display 146 | 147 | if (key == ord('x')): 148 | keep_processing = False 149 | elif (key == ord('f')): 150 | args.fullscreen = not (args.fullscreen) 151 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, 152 | cv2.WINDOW_FULLSCREEN & args.fullscreen) 153 | 154 | # close all windows 155 | 156 | cv2.destroyAllWindows() 157 | 158 | else: 159 | print("No video file specified or camera connected.") 160 | 161 | ########################################################################## 162 | -------------------------------------------------------------------------------- /gradient_orientation.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : perform generic live display of gradient orientations 4 | # (which form the essensce of the Histogram of Oriented Gradient (HOG) feature) 5 | # from a video file specified on the command line 6 | # (e.g. python FILE.py video_file) or from an attached web camera 7 | 8 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 9 | 10 | # https://www.learnopencv.com/histogram-of-oriented-gradients/ 11 | 12 | # Copyright (c) 2018 Dept. Computer Science, 13 | # Durham University, UK 14 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 15 | 16 | ##################################################################### 17 | 18 | import cv2 19 | import argparse 20 | import sys 21 | import math 22 | import numpy as np 23 | 24 | ##################################################################### 25 | 26 | keep_processing = True 27 | 28 | # parse command line arguments for camera ID or video file 29 | 30 | parser = argparse.ArgumentParser( 31 | description='Perform ' + 32 | sys.argv[0] + 33 | ' example operation on incoming camera/video image') 34 | parser.add_argument( 35 | "-c", 36 | "--camera_to_use", 37 | type=int, 38 | help="specify camera to use", 39 | default=0) 40 | parser.add_argument( 41 | "-r", 42 | "--rescale", 43 | type=float, 44 | help="rescale image by this factor", 45 | default=1.0) 46 | parser.add_argument( 47 | 'video_file', 48 | metavar='video_file', 49 | type=str, 50 | nargs='?', 51 | help='specify optional video file') 52 | args = parser.parse_args() 53 | 54 | ##################################################################### 55 | 56 | # this function is called as a call-back everytime the trackbar is moved 57 | # (here we just do nothing) 58 | 59 | 60 | def nothing(x): 61 | pass 62 | 63 | 64 | ##################################################################### 65 | 66 | # define video capture object 67 | 68 | try: 69 | # to use a non-buffered camera stream (via a separate thread) 70 | 71 | if not (args.video_file): 72 | import camera_stream 73 | cap = camera_stream.CameraVideoStream() 74 | else: 75 | cap = cv2.VideoCapture() # not needed for video files 76 | 77 | except BaseException: 78 | # if not then just use OpenCV default 79 | 80 | print("INFO: camera_stream class not found - camera input may be buffered") 81 | cap = cv2.VideoCapture() 82 | 83 | # define display window names 84 | 85 | window_nameGx = "Gradient - Gx" # window name 86 | window_nameGy = "Gradient - Gy" # window name 87 | window_nameAngle = "Gradient Angle" # window name 88 | 89 | # if command line arguments are provided try to read video_name 90 | # otherwise default to capture from attached camera 91 | 92 | if (((args.video_file) and (cap.open(str(args.video_file)))) 93 | or (cap.open(args.camera_to_use))): 94 | 95 | # create window by name (as resizable) 96 | 97 | cv2.namedWindow(window_nameGx, cv2.WINDOW_NORMAL) 98 | cv2.namedWindow(window_nameGy, cv2.WINDOW_NORMAL) 99 | cv2.namedWindow(window_nameAngle, cv2.WINDOW_NORMAL) 100 | 101 | # add some track bar controllers for settings 102 | 103 | lower_threshold = 0 104 | cv2.createTrackbar( 105 | "lower", 106 | window_nameAngle, 107 | lower_threshold, 108 | 180, 109 | nothing) 110 | 111 | upper_threshold = 180 112 | cv2.createTrackbar( 113 | "upper", 114 | window_nameAngle, 115 | upper_threshold, 116 | 180, 117 | nothing) 118 | 119 | neighbourhood = 3 120 | cv2.createTrackbar( 121 | "neighbourhood, N", 122 | window_nameGy, 123 | neighbourhood, 124 | 40, 125 | nothing) 126 | 127 | sigma = 1 128 | cv2.createTrackbar( 129 | "sigma", 130 | window_nameGy, 131 | sigma, 132 | 10, 133 | nothing) 134 | 135 | while (keep_processing): 136 | 137 | # start a timer (to see how long processing and display takes) 138 | 139 | start_t = cv2.getTickCount() 140 | 141 | # if video file successfully open then read frame from video 142 | 143 | if (cap.isOpened): 144 | ret, frame = cap.read() 145 | 146 | # when we reach the end of the video (file) exit cleanly 147 | 148 | if (ret == 0): 149 | keep_processing = False 150 | continue 151 | 152 | # rescale if specified 153 | 154 | if (args.rescale != 1.0): 155 | frame = cv2.resize( 156 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 157 | 158 | # get parameter from track bars - Gaussian pre-smoothing 159 | 160 | neighbourhood = cv2.getTrackbarPos("neighbourhood, N", window_nameGy) 161 | sigma = cv2.getTrackbarPos("sigma", window_nameGy) 162 | 163 | # check neighbourhood is greater than 3 and odd 164 | 165 | neighbourhood = max(3, neighbourhood) 166 | if not (neighbourhood % 2): 167 | neighbourhood = neighbourhood + 1 168 | 169 | # perform Gaussian smoothing using NxN neighbourhood 170 | 171 | smoothed_img = cv2.GaussianBlur( 172 | frame, 173 | (neighbourhood, 174 | neighbourhood), 175 | sigma, 176 | sigma, 177 | borderType=cv2.BORDER_REPLICATE) 178 | 179 | # compute the gradients in the x and y directions separately 180 | # N.B from here onward these images are 32-bit float 181 | 182 | gx = cv2.Sobel(smoothed_img, cv2.CV_32F, 1, 0) 183 | gy = cv2.Sobel(smoothed_img, cv2.CV_32F, 0, 1) 184 | 185 | # calculate gradient magnitude and direction (in degrees) 186 | 187 | mag, angle = cv2.cartToPolar(gx, gy, angleInDegrees=True) 188 | 189 | # normalize 190 | 191 | gx = np.abs(gx) 192 | gy = np.abs(gy) 193 | angle = np.abs(angle) 194 | 195 | # normalize other values 0 -> 180 196 | 197 | gx = cv2.normalize(gx, None, 0, 255, cv2.NORM_MINMAX) 198 | gy = cv2.normalize(gy, None, 0, 255, cv2.NORM_MINMAX) 199 | angle = cv2.normalize(angle, None, 0, 180, cv2.NORM_MINMAX) 200 | 201 | # for the angle take the max across all three channels 202 | 203 | (aB, aG, aR) = cv2.split(angle) 204 | angle = np.maximum(np.maximum(aR, aG), aB) 205 | 206 | # get threshold from trackbars and threshold to keep inner range 207 | 208 | lower_threshold = cv2.getTrackbarPos("lower", window_nameAngle) 209 | upper_threshold = cv2.getTrackbarPos("upper", window_nameAngle) 210 | 211 | mask = cv2.inRange(angle, lower_threshold, upper_threshold) 212 | angle = cv2.bitwise_and(angle.astype(np.uint8), mask) 213 | 214 | # display images (as 8-bit) 215 | 216 | cv2.imshow(window_nameGx, gx.astype(np.uint8)) 217 | cv2.imshow(window_nameGy, gy.astype(np.uint8)) 218 | cv2.imshow(window_nameAngle, angle.astype(np.uint8)) 219 | 220 | # stop the timer and convert to ms. (to see how long processing and 221 | # display takes) 222 | 223 | stop_t = ((cv2.getTickCount() - start_t) / 224 | cv2.getTickFrequency()) * 1000 225 | 226 | # start the event loop - essential 227 | 228 | # cv2.waitKey() is a keyboard binding function (argument is the time in 229 | # milliseconds). It waits for specified milliseconds for any keyboard 230 | # event. If you press any key in that time, the program continues. 231 | # If 0 is passed, it waits indefinitely for a key stroke. 232 | # (bitwise and with 0xFF to extract least significant byte of 233 | # multi-byte response) 234 | 235 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 236 | # 25 fps = 40 ms) 237 | 238 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 239 | 240 | # It can also be set to detect specific key strokes by recording which 241 | # key is pressed 242 | 243 | # e.g. if user presses "x" then exit / press "f" for fullscreen 244 | # display 245 | 246 | if (key == ord('x')): 247 | keep_processing = False 248 | elif (key == ord('f')): 249 | cv2.setWindowProperty( 250 | window_nameAngle, 251 | cv2.WND_PROP_FULLSCREEN, 252 | cv2.WINDOW_FULLSCREEN) 253 | 254 | # close all windows 255 | 256 | cv2.destroyAllWindows() 257 | 258 | else: 259 | print("No video file specified or camera connected.") 260 | 261 | ##################################################################### 262 | -------------------------------------------------------------------------------- /haar_cascade_detection.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : perform haar cascade detection on live display from a video file 4 | # specified on the command line (e.g. python FILE.py video_file) or from an 5 | # attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2015 / 2016 School of Engineering & Computing Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | # based on example at: 14 | # http://docs.opencv.org/3.1.0/d7/d8b/tutorial_py_face_detection.html#gsc.tab=0 15 | 16 | # get trained cascade files from: 17 | # https://github.com/opencv/opencv/tree/master/data/haarcascades 18 | 19 | ##################################################################### 20 | 21 | import cv2 22 | import argparse 23 | import sys 24 | import os 25 | import math 26 | 27 | ##################################################################### 28 | 29 | keep_processing = True 30 | faces_recorded = 0 31 | 32 | # parse command line arguments for camera ID or video file 33 | 34 | parser = argparse.ArgumentParser( 35 | description='Perform ' + 36 | sys.argv[0] + 37 | ' example operation on incoming camera/video image') 38 | parser.add_argument( 39 | "-c", 40 | "--camera_to_use", 41 | type=int, 42 | help="specify camera to use", 43 | default=0) 44 | parser.add_argument( 45 | "-r", 46 | "--rescale", 47 | type=float, 48 | help="rescale image by this factor", 49 | default=1.0) 50 | parser.add_argument( 51 | "-ha", 52 | "--harvest", 53 | type=str, 54 | help="path to save detected faces to", 55 | default='') 56 | parser.add_argument( 57 | 'video_file', 58 | metavar='video_file', 59 | type=str, 60 | nargs='?', 61 | help='specify optional video file') 62 | args = parser.parse_args() 63 | 64 | ##################################################################### 65 | # set up directory to save faces to if specified 66 | 67 | if (len(args.harvest) > 0): 68 | try: 69 | os.mkdir(args.harvest) 70 | except OSError: 71 | print("Harvesting to existing directory: " + args.harvest) 72 | 73 | ##################################################################### 74 | 75 | # define video capture object 76 | 77 | try: 78 | # to use a non-buffered camera stream (via a separate thread) 79 | 80 | if not (args.video_file): 81 | import camera_stream 82 | cap = camera_stream.CameraVideoStream() 83 | else: 84 | cap = cv2.VideoCapture() # not needed for video files 85 | 86 | except BaseException: 87 | # if not then just use OpenCV default 88 | 89 | print("INFO: camera_stream class not found - camera input may be buffered") 90 | cap = cv2.VideoCapture() 91 | 92 | # define display window name 93 | 94 | window_name = "Face Detection using Haar Cascades" # window name 95 | 96 | # define haar cascade objects 97 | 98 | # required cascade classifier files (and many others) available from: 99 | # https://github.com/opencv/opencv/tree/master/data/haarcascades 100 | 101 | face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') 102 | eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml') 103 | 104 | if (face_cascade.empty() or eye_cascade.empty()): 105 | print("Failed to load cascade from file.") 106 | 107 | # if command line arguments are provided try to read video_name 108 | # otherwise default to capture from attached H/W camera 109 | 110 | if (((args.video_file) and (cap.open(str(args.video_file)))) 111 | or (cap.open(args.camera_to_use))): 112 | 113 | # create window by name (as resizable) 114 | 115 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 116 | 117 | while (keep_processing): 118 | 119 | # if video file successfully open then read frame from video 120 | 121 | if (cap.isOpened): 122 | ret, frame = cap.read() 123 | 124 | # rescale if specified 125 | 126 | if (args.rescale != 1.0): 127 | frame = cv2.resize( 128 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 129 | 130 | # start a timer (to see how long processing and display takes) 131 | 132 | start_t = cv2.getTickCount() 133 | 134 | # convert to grayscale 135 | 136 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 137 | 138 | # detect faces using haar cascade trained on faces 139 | 140 | faces = face_cascade.detectMultiScale( 141 | gray, scaleFactor=1.2, minNeighbors=4, minSize=( 142 | 30, 30), flags=cv2.CASCADE_DO_CANNY_PRUNING) 143 | 144 | # for each detected face, try to detect eyes inside the top 145 | # half of the face region face region 146 | 147 | for (x, y, w, h) in faces: 148 | 149 | # extract regions of interest (roi) and draw each face bounding box 150 | # and 151 | 152 | # top 50% to detect eyes 153 | roi_gray = gray[y:y + math.floor(h * 0.5), x:x + w] 154 | # copy to save if required 155 | roi_color = frame[y:y + h, x:x + w].copy() 156 | 157 | cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) 158 | 159 | # detect eyes using haar cascade trained on eyes 160 | 161 | eyes = eye_cascade.detectMultiScale(roi_gray) 162 | 163 | # for each detected eye, draw bounding box 164 | 165 | for (ex, ey, ew, eh) in eyes: 166 | cv2.rectangle(frame, (x + ex, y + ey), 167 | (x + ex + ew, y + ey + eh), (0, 255, 0), 2) 168 | 169 | # if specified, record all the faces we see to a specified 170 | # directory 171 | 172 | if (len(args.harvest) > 0): 173 | filename = os.path.join( 174 | args.harvest, "face_" + 175 | str(format(faces_recorded, '04')) + ".png") 176 | cv2.imwrite(filename, roi_color) 177 | faces_recorded += 1 178 | 179 | # display image 180 | 181 | cv2.imshow(window_name, frame) 182 | 183 | # stop the timer and convert to ms. (to see how long processing and 184 | # display takes) 185 | 186 | stop_t = ((cv2.getTickCount() - start_t) / 187 | cv2.getTickFrequency()) * 1000 188 | 189 | # start the event loop - essential 190 | 191 | # cv2.waitKey() is a keyboard binding function (argument is the time in 192 | # ms.) It waits for specified milliseconds for any keyboard event. 193 | # If you press any key in that time, the program continues. 194 | # If 0 is passed, it waits indefinitely for a key stroke. 195 | # (bitwise and with 0xFF to extract least significant byte of 196 | # multi-byte response) here we use a wait time in ms. that takes 197 | # account of processing time already used in the loop 198 | 199 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 200 | # 25 fps = 40 ms) 201 | 202 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 203 | 204 | # It can also be set to detect specific key strokes by recording which 205 | # key is pressed 206 | 207 | # e.g. if user presses "x" then exit / press "f" for fullscreen 208 | # display 209 | 210 | if (key == ord('x')): 211 | keep_processing = False 212 | elif (key == ord('f')): 213 | cv2.setWindowProperty( 214 | window_name, 215 | cv2.WND_PROP_FULLSCREEN, 216 | cv2.WINDOW_FULLSCREEN) 217 | 218 | # close all windows 219 | 220 | cv2.destroyAllWindows() 221 | 222 | else: 223 | print("No video file specified or camera connected.") 224 | -------------------------------------------------------------------------------- /harris.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : harris feature points from a video file 4 | # specified on the command line (e.g. python FILE.py video_file) or from an 5 | # attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2015-2024 Engineering & Computing Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | ##################################################################### 14 | 15 | import cv2 16 | import argparse 17 | import sys 18 | import numpy as np 19 | 20 | ##################################################################### 21 | 22 | keep_processing = True 23 | 24 | # parse command line arguments for camera ID or video file 25 | 26 | parser = argparse.ArgumentParser( 27 | description='Perform ' + 28 | sys.argv[0] + 29 | ' example operation on incoming camera/video image') 30 | parser.add_argument( 31 | "-c", 32 | "--camera_to_use", 33 | type=int, 34 | help="specify camera to use", 35 | default=0) 36 | parser.add_argument( 37 | "-r", 38 | "--rescale", 39 | type=float, 40 | help="rescale image by this factor", 41 | default=1.0) 42 | parser.add_argument( 43 | "-s", 44 | "--set_resolution", 45 | type=int, 46 | nargs=2, 47 | help='override default camera resolution as H W') 48 | parser.add_argument( 49 | 'video_file', 50 | metavar='video_file', 51 | type=str, 52 | nargs='?', 53 | help='specify optional video file') 54 | args = parser.parse_args() 55 | 56 | ##################################################################### 57 | 58 | # this function is called as a call-back everytime the trackbar is moved 59 | # (here we just do nothing) 60 | 61 | 62 | def nothing(x): 63 | pass 64 | 65 | ##################################################################### 66 | 67 | # define video capture object 68 | 69 | 70 | try: 71 | # to use a non-buffered camera stream (via a separate thread) 72 | 73 | if not (args.video_file): 74 | import camera_stream 75 | cap = camera_stream.CameraVideoStream() # T-API breaks code 76 | else: 77 | cap = cv2.VideoCapture() # not needed for video files 78 | 79 | except BaseException: 80 | # if not then just use OpenCV default 81 | 82 | print("INFO: camera_stream class not found - camera input may be buffered") 83 | cap = cv2.VideoCapture() 84 | 85 | # define display window name 86 | 87 | window_name = "Live Camera Input" # window name 88 | 89 | # if command line arguments are provided try to read video_name 90 | # otherwise default to capture from attached H/W camera 91 | 92 | if (((args.video_file) and (cap.open(str(args.video_file)))) 93 | or (cap.open(args.camera_to_use))): 94 | 95 | # create window by name (as resizable) 96 | 97 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 98 | 99 | # add some track bar controllers for settings 100 | 101 | neighbourhood = 3 102 | cv2.createTrackbar( 103 | "neighbourhood, N", 104 | window_name, 105 | neighbourhood, 106 | 15, 107 | nothing) 108 | 109 | # override default camera resolution 110 | 111 | if (args.set_resolution is not None): 112 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1]) 113 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0]) 114 | 115 | print("INFO: input resolution : (", 116 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x", 117 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")") 118 | 119 | while (keep_processing): 120 | 121 | # if video file successfully open then read frame from video 122 | 123 | if (cap.isOpened): 124 | ret, frame = cap.read() 125 | 126 | # when we reach the end of the video (file) exit cleanly 127 | 128 | if (ret == 0): 129 | keep_processing = False 130 | continue 131 | 132 | # rescale if specified 133 | 134 | if (args.rescale != 1.0): 135 | frame = cv2.resize( 136 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 137 | 138 | # convert to single channel grayscale image 139 | # with 32-bit float representation per pixel 140 | 141 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 142 | gray = np.float32(gray) 143 | 144 | # get parameters from track bars 145 | 146 | neighbourhood = cv2.getTrackbarPos("neighbourhood, N", window_name) 147 | 148 | # check neighbourhood is greater than 3 and odd 149 | 150 | neighbourhood = max(3, neighbourhood) 151 | if not (neighbourhood % 2): 152 | neighbourhood = neighbourhood + 1 153 | 154 | # find harris corners (via the good features to track function) 155 | 156 | corners = cv2.goodFeaturesToTrack( 157 | gray, 158 | maxCorners=500, 159 | qualityLevel=0.01, 160 | minDistance=10, 161 | blockSize=neighbourhood, 162 | useHarrisDetector=True, 163 | k=0.01) 164 | corners = np.intp(corners) 165 | 166 | for i in corners: 167 | x, y = i.ravel() 168 | cv2.circle(frame, (x, y), 3, (0, 255, 0), -1) 169 | 170 | # alternatively get the raw harris eigenvalue response 171 | 172 | # dst = cv2.cornerHarris(gray,neighbourhood,neighbourhood, 0.01) 173 | 174 | # Threshold for an optimal value, it may vary depending on the image. 175 | 176 | # frame[dst>0.005*dst.max()]=[0,255,0] 177 | 178 | # display image 179 | 180 | cv2.imshow(window_name, frame) 181 | 182 | # start the event loop - essential 183 | 184 | # cv2.waitKey() is a keyboard binding function (argument is the time in 185 | # milliseconds). It waits for specified milliseconds for any keyboard 186 | # event. If you press any key in that time, the program continues. 187 | # If 0 is passed, it waits indefinitely for a key stroke. 188 | # (bitwise and with 0xFF to extract least significant byte of 189 | # multi-byte response) 190 | 191 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms) 192 | 193 | key = cv2.waitKey(40) & 0xFF 194 | 195 | # It can also be set to detect specific key strokes by recording which 196 | # key is pressed 197 | 198 | # e.g. if user presses "x" then exit / press "f" for fullscreen 199 | # display 200 | 201 | if (key == ord('x')): 202 | keep_processing = False 203 | elif (key == ord('f')): 204 | cv2.setWindowProperty( 205 | window_name, 206 | cv2.WND_PROP_FULLSCREEN, 207 | cv2.WINDOW_FULLSCREEN) 208 | 209 | # close all windows 210 | 211 | cv2.destroyAllWindows() 212 | 213 | else: 214 | print("No video file specified or camera connected.") 215 | 216 | ##################################################################### 217 | -------------------------------------------------------------------------------- /hog.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : HOG pedestrain detection from a video file 4 | # specified on the command line (e.g. FILE.py video_file) or from an 5 | # attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2015 / 2016 School of Engineering & Computing Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | ##################################################################### 14 | 15 | import cv2 16 | import argparse 17 | import sys 18 | import math 19 | import numpy as np 20 | 21 | ##################################################################### 22 | 23 | keep_processing = True 24 | 25 | # parse command line arguments for camera ID or video file 26 | 27 | parser = argparse.ArgumentParser( 28 | description='Perform ' + 29 | sys.argv[0] + 30 | ' example operation on incoming camera/video image') 31 | parser.add_argument( 32 | "-c", 33 | "--camera_to_use", 34 | type=int, 35 | help="specify camera to use", 36 | default=0) 37 | parser.add_argument( 38 | "-r", 39 | "--rescale", 40 | type=float, 41 | help="rescale image by this factor", 42 | default=1.0) 43 | parser.add_argument( 44 | 'video_file', 45 | metavar='video_file', 46 | type=str, 47 | nargs='?', 48 | help='specify optional video file') 49 | args = parser.parse_args() 50 | 51 | ##################################################################### 52 | 53 | # if we have OpenCL H/W acceleration availale, use it - we'll need it 54 | 55 | cv2.ocl.setUseOpenCL(True) 56 | print( 57 | "INFO: OpenCL - available: ", 58 | cv2.ocl.haveOpenCL(), 59 | " using: ", 60 | cv2.ocl.useOpenCL()) 61 | 62 | ##################################################################### 63 | 64 | 65 | def inside(r, q): 66 | rx, ry, rw, rh = r 67 | qx, qy, qw, qh = q 68 | return rx > qx and ry > qy and rx + rw < qx + qw and ry + rh < qy + qh 69 | 70 | 71 | def draw_detections(img, rects, thickness=1): 72 | for x, y, w, h in rects: 73 | # the HOG detector returns slightly larger rectangles than the 74 | # real objects so we slightly shrink the rectangles to 75 | # get a nicer output. 76 | pad_w, pad_h = int(0.15 * w), int(0.05 * h) 77 | cv2.rectangle(img, (x + pad_w, y + pad_h), 78 | (x + w - pad_w, y + h - pad_h), (0, 255, 0), thickness) 79 | 80 | ##################################################################### 81 | 82 | # power law transform 83 | # image - colour image 84 | # gamma - "gradient" co-efficient of gamma function 85 | 86 | 87 | def powerlaw_transform(image, gamma): 88 | 89 | # compute power-law transform 90 | # remembering not defined for pixel = 0 (!) 91 | 92 | # handle any overflow in a quick and dirty way using 0-255 clipping 93 | 94 | image = np.clip(np.power(image, gamma), 0, 255).astype('uint8') 95 | 96 | return image 97 | 98 | 99 | ##################################################################### 100 | 101 | # this function is called as a call-back everytime the trackbar is moved 102 | # (here we just do nothing) 103 | 104 | def nothing(x): 105 | pass 106 | 107 | 108 | ##################################################################### 109 | 110 | # define video capture object 111 | 112 | 113 | try: 114 | # to use a non-buffered camera stream (via a separate thread) 115 | 116 | if not (args.video_file): 117 | import camera_stream 118 | cap = camera_stream.CameraVideoStream() # T-API done later 119 | else: 120 | cap = cv2.VideoCapture() # not needed for video files 121 | 122 | except BaseException: 123 | # if not then just use OpenCV default 124 | 125 | print("INFO: camera_stream class not found - camera input may be buffered") 126 | cap = cv2.VideoCapture() 127 | 128 | ##################################################################### 129 | 130 | # define display window name 131 | 132 | window_name = "HOG pedestrain detection" # window name 133 | 134 | # if command line arguments are provided try to read video_name 135 | # otherwise default to capture from attached H/W camera 136 | 137 | if (((args.video_file) and (cap.open(str(args.video_file)))) 138 | or (cap.open(args.camera_to_use))): 139 | 140 | # create window by name (as resizable) 141 | 142 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 143 | 144 | # set up HoG detector 145 | 146 | hog = cv2.HOGDescriptor() 147 | hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) 148 | 149 | # add some track bar controllers for settings 150 | 151 | neighbourhood = 3 152 | cv2.createTrackbar("Smoothing : neighbourhood, N", window_name, 153 | neighbourhood, 40, nothing) 154 | 155 | sigma = 1 156 | cv2.createTrackbar("Smoothing : sigma", window_name, sigma, 10, nothing) 157 | 158 | gamma = 100 # default gamma = 100 * 0.01 = 1 -> no change 159 | cv2.createTrackbar("gamma, (* 0.01)", window_name, gamma, 150, nothing) 160 | 161 | svm_threshold = 0 # by default the SVM's own threshold at the hyperplane 162 | cv2.createTrackbar("SVM threshold, (distance from hyper-plane, * 0.1)", 163 | window_name, svm_threshold, 10, nothing) 164 | 165 | while (keep_processing): 166 | 167 | # if video file successfully open then read frame from video 168 | 169 | if (cap.isOpened): 170 | ret, frame = cap.read() 171 | 172 | # when we reach the end of the video (file) exit cleanly 173 | 174 | if (ret == 0): 175 | keep_processing = False 176 | continue 177 | 178 | # rescale if specified 179 | 180 | if (args.rescale != 1.0): 181 | frame = cv2.resize( 182 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 183 | 184 | # start a timer (to see how long processing and display takes) 185 | 186 | start_t = cv2.getTickCount() 187 | 188 | # get parameters from track bars 189 | 190 | neighbourhood = cv2.getTrackbarPos( 191 | "Smoothing : neighbourhood, N", window_name) 192 | sigma = cv2.getTrackbarPos("Smoothing : sigma", window_name) 193 | gamma = cv2.getTrackbarPos("gamma, (* 0.01)", window_name) * 0.01 194 | svm_threshold = cv2.getTrackbarPos( 195 | "SVM threshold, (distance from hyper-plane, * 0.1)", 196 | window_name) * 0.1 197 | 198 | # check neighbourhood is greater than 3 and odd 199 | 200 | neighbourhood = max(3, neighbourhood) 201 | if not (neighbourhood % 2): 202 | neighbourhood = neighbourhood + 1 203 | 204 | # use power-law function to perform gamma correction 205 | # and convert np array to T-API universal array for H/W acceleration 206 | 207 | frame = cv2.UMat(powerlaw_transform(frame, gamma)) 208 | 209 | # perform Gaussian smoothing using NxN neighbourhood 210 | 211 | frame = cv2.GaussianBlur( 212 | frame, 213 | (neighbourhood, 214 | neighbourhood), 215 | sigma, 216 | sigma, 217 | borderType=cv2.BORDER_REPLICATE) 218 | 219 | # perform HOG based pedestrain detection 220 | 221 | found, w = hog.detectMultiScale( 222 | frame, winStride=( 223 | 8, 8), padding=( 224 | 32, 32), scale=1.05, hitThreshold=svm_threshold) 225 | found_filtered = [] 226 | 227 | for ri, r in enumerate(found): 228 | for qi, q in enumerate(found): 229 | if ri != qi and inside(r, q): 230 | break 231 | else: 232 | found_filtered.append(r) 233 | 234 | draw_detections(frame, found_filtered, 3) 235 | 236 | # display image 237 | 238 | cv2.imshow(window_name, frame) 239 | 240 | # stop the timer and convert to ms. (to see how long processing and 241 | # display takes) 242 | 243 | stop_t = ((cv2.getTickCount() - start_t) / 244 | cv2.getTickFrequency()) * 1000 245 | 246 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 247 | # 25 fps = 40 ms) 248 | 249 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 250 | 251 | # e.g. if user presses "x" then exit / press "f" for fullscreen 252 | # display 253 | 254 | if (key == ord('x')): 255 | keep_processing = False 256 | elif (key == ord('f')): 257 | cv2.setWindowProperty( 258 | window_name, 259 | cv2.WND_PROP_FULLSCREEN, 260 | cv2.WINDOW_FULLSCREEN) 261 | 262 | # close all windows 263 | 264 | cv2.destroyAllWindows() 265 | 266 | else: 267 | print("No video file specified or camera connected.") 268 | 269 | ##################################################################### 270 | -------------------------------------------------------------------------------- /houghlines.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : hough line detection based on canny edge detection 4 | # for a a video file specified on the command line (e.g. python FILE.py 5 | # video_file) or from an attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2021 Dept. Computer Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | ##################################################################### 14 | 15 | import cv2 16 | import argparse 17 | import sys 18 | import numpy as np 19 | 20 | ##################################################################### 21 | 22 | keep_processing = True 23 | use_probablistic_hough = False 24 | 25 | # parse command line arguments for camera ID or video file 26 | 27 | parser = argparse.ArgumentParser( 28 | description='Perform ' + 29 | sys.argv[0] + 30 | ' example operation on incoming camera/video image') 31 | parser.add_argument( 32 | "-c", 33 | "--camera_to_use", 34 | type=int, 35 | help="specify camera to use", 36 | default=0) 37 | parser.add_argument( 38 | "-r", 39 | "--rescale", 40 | type=float, 41 | help="rescale image by this factor", 42 | default=1.0) 43 | parser.add_argument( 44 | 'video_file', 45 | metavar='video_file', 46 | type=str, 47 | nargs='?', 48 | help='specify optional video file') 49 | args = parser.parse_args() 50 | 51 | ##################################################################### 52 | 53 | # this function is called as a call-back everytime the trackbar is moved 54 | # (here we just do nothing) 55 | 56 | 57 | def nothing(x): 58 | pass 59 | 60 | 61 | ##################################################################### 62 | 63 | # define video capture object 64 | 65 | try: 66 | # to use a non-buffered camera stream (via a separate thread) 67 | 68 | if not (args.video_file): 69 | import camera_stream 70 | cap = camera_stream.CameraVideoStream() 71 | else: 72 | cap = cv2.VideoCapture() # not needed for video files 73 | 74 | except BaseException: 75 | # if not then just use OpenCV default 76 | 77 | print("INFO: camera_stream class not found - camera input may be buffered") 78 | cap = cv2.VideoCapture() 79 | 80 | # define display window name 81 | 82 | window_name = "Live Camera Input" # window name 83 | window_name2 = "Hough Lines" # window name 84 | 85 | # if command line arguments are provided try to read video_name 86 | # otherwise default to capture from attached H/W camera 87 | 88 | if (((args.video_file) and (cap.open(str(args.video_file)))) 89 | or (cap.open(args.camera_to_use))): 90 | 91 | # create window by name (as resizable) 92 | 93 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 94 | cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL) 95 | 96 | # add some track bar controllers for settings 97 | 98 | lower_threshold = 25 99 | cv2.createTrackbar("lower", window_name2, lower_threshold, 255, nothing) 100 | upper_threshold = 120 101 | cv2.createTrackbar("upper", window_name2, upper_threshold, 255, nothing) 102 | smoothing_neighbourhood = 3 103 | cv2.createTrackbar( 104 | "smoothing", 105 | window_name2, 106 | smoothing_neighbourhood, 107 | 15, 108 | nothing) 109 | sobel_size = 3 # greater than 7 seems to crash 110 | cv2.createTrackbar("sobel size", window_name2, sobel_size, 7, nothing) 111 | 112 | while (keep_processing): 113 | 114 | # if video file successfully open then read frame from video 115 | 116 | if (cap.isOpened): 117 | ret, frame = cap.read() # rescale if specified 118 | 119 | # when we reach the end of the video (file) exit cleanly 120 | 121 | if (ret == 0): 122 | keep_processing = False 123 | continue 124 | 125 | # rescale if specified 126 | 127 | if (args.rescale != 1.0): 128 | frame = cv2.resize( 129 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 130 | 131 | # get parameters from track bars 132 | 133 | lower_threshold = cv2.getTrackbarPos("lower", window_name2) 134 | upper_threshold = cv2.getTrackbarPos("upper", window_name2) 135 | smoothing_neighbourhood = cv2.getTrackbarPos("smoothing", window_name2) 136 | sobel_size = cv2.getTrackbarPos("sobel size", window_name2) 137 | 138 | # check neighbourhood is greater than 3 and odd 139 | 140 | smoothing_neighbourhood = max(3, smoothing_neighbourhood) 141 | if not (smoothing_neighbourhood % 2): 142 | smoothing_neighbourhood = smoothing_neighbourhood + 1 143 | 144 | sobel_size = max(3, sobel_size) 145 | if not (sobel_size % 2): 146 | sobel_size = sobel_size + 1 147 | 148 | # convert to grayscale 149 | 150 | gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 151 | 152 | # performing smoothing on the image using a 5x5 smoothing mark (see 153 | # manual entry for GaussianBlur()) 154 | 155 | smoothed = cv2.GaussianBlur( 156 | gray_frame, (smoothing_neighbourhood, smoothing_neighbourhood), 0) 157 | 158 | # perform canny edge detection 159 | 160 | canny = cv2.Canny( 161 | smoothed, 162 | lower_threshold, 163 | upper_threshold, 164 | apertureSize=sobel_size) 165 | 166 | # perform hough line detection 167 | # based on tutorial at: 168 | # https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_houghlines/py_houghlines.html 169 | 170 | if not (use_probablistic_hough): 171 | lines = cv2.HoughLines(canny, 1, np.pi/180, 40) 172 | if lines is not None: 173 | for rho, theta in lines[0]: 174 | a = np.cos(theta) 175 | b = np.sin(theta) 176 | x0 = a*rho 177 | y0 = b*rho 178 | x1 = int(x0 + 1000*(-b)) 179 | y1 = int(y0 + 1000*(a)) 180 | x2 = int(x0 - 1000*(-b)) 181 | y2 = int(y0 - 1000*(a)) 182 | 183 | cv2.line(frame, (x1, y1), (x2, y2), (0, 0, 255), 2) 184 | 185 | else: 186 | 187 | # use use probablistic hough transform 188 | 189 | min_line_length = 100 # requires tuning 190 | max_line_gap = 10 # requires tuning 191 | 192 | lines = cv2.HoughLinesP(canny, 1, np.pi/180, 10, 193 | min_line_length, max_line_gap) 194 | if lines is not None: 195 | for x1, y1, x2, y2 in lines[0]: 196 | cv2.line(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) 197 | 198 | # display image 199 | 200 | cv2.imshow(window_name, frame) 201 | cv2.imshow(window_name2, canny) 202 | 203 | # start the event loop - essential 204 | 205 | # cv2.waitKey() is a keyboard binding function (argument is the time in 206 | # milliseconds). It waits for specified milliseconds for any keyboard 207 | # event. If you press any key in that time, the program continues. 208 | # If 0 is passed, it waits indefinitely for a key stroke. 209 | # (bitwise and with 0xFF to extract least significant byte of 210 | # multi-byte response) 211 | 212 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms) 213 | key = cv2.waitKey(40) & 0xFF 214 | 215 | # It can also be set to detect specific key strokes by recording which 216 | # key is pressed 217 | 218 | # e.g. if user presses "x" then exit / press "f" for fullscreen 219 | # display 220 | 221 | if (key == ord('x')): 222 | keep_processing = False 223 | elif (key == ord('f')): 224 | cv2.setWindowProperty( 225 | window_name2, 226 | cv2.WND_PROP_FULLSCREEN, 227 | cv2.WINDOW_FULLSCREEN) 228 | elif (key == ord('p')): 229 | use_probablistic_hough = not (use_probablistic_hough) 230 | 231 | # close all windows 232 | 233 | cv2.destroyAllWindows() 234 | 235 | else: 236 | print("No video file specified or camera connected.") 237 | 238 | ##################################################################### 239 | -------------------------------------------------------------------------------- /lbp_cascade_detection.py: -------------------------------------------------------------------------------- 1 | # Example : perform LBP cascade detection on live display from a video file 2 | # specified on the command line (e.g. python FILE.py video_file) or from an 3 | # attached web camera 4 | 5 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 6 | 7 | # Copyright (c) 2016 School of Engineering & Computing Science, 8 | # Durham University, UK 9 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | # based on haar example at: 12 | # http://docs.opencv.org/3.1.0/d7/d8b/tutorial_py_face_detection.html#gsc.tab=0 13 | 14 | # get trained cascade files from: 15 | # https://github.com/opencv/opencv/tree/master/data/ 16 | 17 | ##################################################################### 18 | 19 | import cv2 20 | import argparse 21 | import sys 22 | import math 23 | 24 | ##################################################################### 25 | 26 | keep_processing = True 27 | 28 | # parse command line arguments for camera ID or video file 29 | 30 | parser = argparse.ArgumentParser( 31 | description='Perform ' + 32 | sys.argv[0] + 33 | ' example operation on incoming camera/video image') 34 | parser.add_argument( 35 | "-c", 36 | "--camera_to_use", 37 | type=int, 38 | help="specify camera to use", 39 | default=0) 40 | parser.add_argument( 41 | "-r", 42 | "--rescale", 43 | type=float, 44 | help="rescale image by this factor", 45 | default=1.0) 46 | parser.add_argument( 47 | 'video_file', 48 | metavar='video_file', 49 | type=str, 50 | nargs='?', 51 | help='specify optional video file') 52 | args = parser.parse_args() 53 | 54 | ##################################################################### 55 | 56 | # define video capture object 57 | 58 | try: 59 | # to use a non-buffered camera stream (via a separate thread) 60 | 61 | if not (args.video_file): 62 | import camera_stream 63 | cap = camera_stream.CameraVideoStream() 64 | else: 65 | cap = cv2.VideoCapture() # not needed for video files 66 | 67 | except BaseException: 68 | # if not then just use OpenCV default 69 | 70 | print("INFO: camera_stream class not found - camera input may be buffered") 71 | cap = cv2.VideoCapture() 72 | 73 | # define display window name 74 | 75 | window_name = "Face Detection using LBP Cascades" # window name 76 | 77 | # define lbpcascades cascade objects 78 | 79 | # required cascade classifier files (and many others) available from: 80 | # https://github.com/opencv/opencv/tree/master/data/lbpcascades 81 | 82 | face_cascade = cv2.CascadeClassifier('lbpcascade_frontalface_improved.xml') 83 | 84 | if (face_cascade.empty()): 85 | print("Failed to load cascade from file.") 86 | 87 | 88 | # if command line arguments are provided try to read video_name 89 | # otherwise default to capture from attached H/W camera 90 | 91 | if (((args.video_file) and (cap.open(str(args.video_file)))) 92 | or (cap.open(args.camera_to_use))): 93 | 94 | # create window by name (as resizable) 95 | 96 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 97 | 98 | while (keep_processing): 99 | 100 | # if video file successfully open then read frame from video 101 | 102 | if (cap.isOpened): 103 | ret, frame = cap.read() 104 | 105 | # when we reach the end of the video (file) exit cleanly 106 | 107 | if (ret == 0): 108 | keep_processing = False 109 | continue 110 | 111 | # rescale if specified 112 | 113 | if (args.rescale != 1.0): 114 | frame = cv2.resize( 115 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 116 | 117 | # start a timer (to see how long processing and display takes) 118 | 119 | start_t = cv2.getTickCount() 120 | 121 | # convert to grayscale 122 | 123 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 124 | 125 | # detect faces using LBP cascade trained on faces 126 | 127 | faces = face_cascade.detectMultiScale( 128 | gray, scaleFactor=1.3, minNeighbors=3, minSize=(30, 30)) 129 | 130 | # for each detected face, try to detect eyes inside the top 131 | # half of the face region face region 132 | 133 | for (x, y, w, h) in faces: 134 | 135 | # draw each face bounding box and extract regions of interest (roi) 136 | 137 | cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) 138 | roi_gray = gray[y:y + math.floor(h * 0.5), x:x + w] 139 | roi_color = frame[y:y + math.floor(h * 0.5), x:x + w] 140 | 141 | # display image 142 | 143 | cv2.imshow(window_name, frame) 144 | 145 | # stop the timer and convert to ms. (to see how long processing and 146 | # display takes) 147 | 148 | stop_t = ((cv2.getTickCount() - start_t) / 149 | cv2.getTickFrequency()) * 1000 150 | 151 | # start the event loop - essential 152 | 153 | # cv2.waitKey() is a keyboard binding function (argument is the time in 154 | # ms.) It waits for specified milliseconds for any keyboard event. 155 | # If you press any key in that time, the program continues. 156 | # If 0 is passed, it waits indefinitely for a key stroke. 157 | # (bitwise and with 0xFF to extract least significant byte of 158 | # multi-byte response) here we use a wait time in ms. that takes 159 | # account of processing time already used in the loop 160 | 161 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 162 | # 25 fps = 40 ms) 163 | 164 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 165 | 166 | # It can also be set to detect specific key strokes by recording which 167 | # key is pressed 168 | 169 | # e.g. if user presses "x" then exit / press "f" for fullscreen 170 | # display 171 | 172 | if (key == ord('x')): 173 | keep_processing = False 174 | elif (key == ord('f')): 175 | cv2.setWindowProperty( 176 | window_name, 177 | cv2.WND_PROP_FULLSCREEN, 178 | cv2.WINDOW_FULLSCREEN) 179 | 180 | # close all windows 181 | 182 | cv2.destroyAllWindows() 183 | 184 | else: 185 | print("No video file specified or camera connected.") 186 | -------------------------------------------------------------------------------- /mask-rcnn.py: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | 3 | # Example : performs Mask R-CNN object instance segmentation from a video file 4 | # specified on the command line (e.g. python FILE.py video_file) or from an 5 | # attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2021 Toby Breckon, Durham University, UK 10 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 11 | 12 | # Implements the Mask R-CNN instance segmentation architecture decribed in: 13 | # Mask R-CNN - Kaiming He, Georgia Gkioxari, Piotr Dollár, Ross Girshick 14 | # https://arxiv.org/abs/1703.06870 15 | 16 | # This code: significant portions based on the example available at: 17 | # https://github.com/opencv/opencv/blob/master/samples/dnn/mask_rcnn.py 18 | 19 | # To use first download and unpack the following files: 20 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_coco.txt 21 | # http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz 22 | # https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt 23 | # then unpack and rename as follows: 24 | # tar -xzf mask_rcnn_inception_v2_coco_2018_01_28.tar.gz 25 | 26 | ########################################################################## 27 | 28 | import cv2 29 | import argparse 30 | import sys 31 | import math 32 | import numpy as np 33 | 34 | ########################################################################## 35 | 36 | keep_processing = True 37 | colors = None 38 | 39 | # parse command line arguments for camera ID or video file, and Mask 40 | # R-CNN files 41 | parser = argparse.ArgumentParser( 42 | description='Perform ' + 43 | sys.argv[0] + 44 | ' example operation on incoming camera/video image') 45 | parser.add_argument( 46 | "-c", 47 | "--camera_to_use", 48 | type=int, 49 | help="specify camera to use", 50 | default=0) 51 | parser.add_argument( 52 | "-r", 53 | "--rescale", 54 | type=float, 55 | help="rescale image by this factor", 56 | default=1.0) 57 | parser.add_argument( 58 | "-fs", 59 | "--fullscreen", 60 | action='store_true', 61 | help="run in full screen mode") 62 | parser.add_argument( 63 | "-use", 64 | "--target", 65 | type=str, 66 | choices=['cpu', 'gpu', 'opencl'], 67 | help="select computational backend", 68 | default='gpu') 69 | parser.add_argument( 70 | 'video_file', 71 | metavar='video_file', 72 | type=str, 73 | nargs='?', 74 | help='specify optional video file') 75 | parser.add_argument( 76 | "-cl", 77 | "--class_file", 78 | type=str, 79 | help="list of classes", 80 | default='object_detection_classes_coco.txt') 81 | parser.add_argument( 82 | "-cf", 83 | "--config_file", 84 | type=str, 85 | help="network config", 86 | default='mask_rcnn_inception_v2_coco_2018_01_28.pbtxt') 87 | parser.add_argument( 88 | "-w", 89 | "--weights_file", 90 | type=str, 91 | help="network weights", 92 | default="mask_rcnn_inception_v2_coco_2018_01_28/" 93 | + "/frozen_inference_graph.pb") 94 | 95 | args = parser.parse_args() 96 | 97 | ########################################################################## 98 | # dummy on trackbar callback function 99 | 100 | 101 | def on_trackbar(val): 102 | return 103 | 104 | ##################################################################### 105 | # Draw the predicted bounding box on the specified image 106 | # image: image detection performed on 107 | # class_name: string name of detected object_detection 108 | # left, top, right, bottom: rectangle parameters for detection 109 | # colour: to draw detection rectangle in 110 | 111 | 112 | def drawPred(image, class_name, confidence, left, top, right, bottom, colour): 113 | # Draw a bounding box. 114 | cv2.rectangle(image, (left, top), (right, bottom), colour, 3) 115 | 116 | # construct label 117 | label = '%s:%.2f' % (class_name, confidence) 118 | 119 | # Display the label at the top of the bounding box 120 | labelSize, baseLine = cv2.getTextSize( 121 | label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) 122 | top = max(top, labelSize[1]) 123 | cv2.rectangle( 124 | image, 125 | (left, 126 | top - 127 | round( 128 | 1.5 * 129 | labelSize[1])), 130 | (left + 131 | round( 132 | 1.5 * 133 | labelSize[0]), 134 | top + 135 | baseLine), 136 | (255, 137 | 255, 138 | 255), 139 | cv2.FILLED) 140 | cv2.putText(image, label, (left, top), 141 | cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 1) 142 | 143 | ########################################################################## 144 | 145 | # define video capture object 146 | 147 | 148 | try: 149 | # to use a non-buffered camera stream (via a separate thread) 150 | 151 | if not (args.video_file): 152 | import camera_stream 153 | cap = camera_stream.CameraVideoStream() 154 | else: 155 | cap = cv2.VideoCapture() # not needed for video files 156 | 157 | except BaseException: 158 | # if not then just use OpenCV default 159 | 160 | print("INFO: camera_stream class not found - camera input may be buffered") 161 | cap = cv2.VideoCapture() 162 | 163 | ########################################################################## 164 | 165 | # init Mask R-CNN object detection model 166 | 167 | inpWidth = 800 # Width of network's input image 168 | inpHeight = 800 # Height of network's input image 169 | 170 | # Load names of classes from file 171 | 172 | classesFile = args.class_file 173 | classes = None 174 | with open(classesFile, 'rt') as f: 175 | classes = f.read().rstrip('\n').split('\n') 176 | 177 | # load configuration and weight files for the model and load the network 178 | # using them 179 | 180 | net = cv2.dnn.readNet(args.config_file, args.weights_file) 181 | 182 | # set up compute target as one of [GPU, OpenCL, CPU] 183 | 184 | if (args.target == 'gpu'): 185 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) 186 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) 187 | elif (args.target == 'opencl'): 188 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT) 189 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL) 190 | else: 191 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT) 192 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) 193 | 194 | ########################################################################## 195 | 196 | # define display window name + trackbar 197 | 198 | window_name = 'Mask R-CNN instance segmentation: ' + args.weights_file 199 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 200 | trackbarName = 'reporting confidence > (x 0.01)' 201 | cv2.createTrackbar(trackbarName, window_name, 70, 100, on_trackbar) 202 | 203 | ########################################################################## 204 | 205 | # if command line arguments are provided try to read video_name 206 | # otherwise default to capture from attached camera 207 | 208 | if (((args.video_file) and (cap.open(str(args.video_file)))) 209 | or (cap.open(args.camera_to_use))): 210 | 211 | # create window by name (as resizable) 212 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 213 | 214 | while (keep_processing): 215 | 216 | # start a timer (to see how long processing and display takes) 217 | start_t = cv2.getTickCount() 218 | 219 | # if camera /video file successfully open then read frame 220 | if (cap.isOpened): 221 | ret, frame = cap.read() 222 | 223 | # when we reach the end of the video (file) exit cleanly 224 | if (ret == 0): 225 | keep_processing = False 226 | continue 227 | 228 | # rescale if specified 229 | if (args.rescale != 1.0): 230 | frame = cv2.resize( 231 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 232 | 233 | # get frame dimensions 234 | frameH = frame.shape[0] 235 | frameW = frame.shape[1] 236 | 237 | # create a 4D tensor (OpenCV 'blob') from image frame (pixels not 238 | # scaled, image resized) 239 | tensor = cv2.dnn.blobFromImage( 240 | frame, 1.0, (inpWidth, inpHeight), [0, 0, 0], 241 | swapRB=True, crop=False) 242 | 243 | # set the input to the CNN network 244 | net.setInput(tensor) 245 | 246 | # runs forward inference to get output of the final output layers 247 | boxes, masks = net.forward(['detection_out_final', 'detection_masks']) 248 | 249 | # get confidence threshold from trackbar 250 | confThreshold = cv2.getTrackbarPos(trackbarName, window_name) / 100 251 | 252 | # get number of classes detected and number of detections 253 | numClasses = masks.shape[1] 254 | numDetections = boxes.shape[2] 255 | 256 | # draw segmentation - first generate colours if needed 257 | 258 | if not colors: 259 | np.random.seed(324) 260 | colors = [np.array([0, 0, 0], np.uint8)] 261 | for i in range(1, numClasses + 1): 262 | colors.append((colors[i - 1] + 263 | np.random.randint(0, 256, [3], 264 | np.uint8)) / 2 265 | ) 266 | del colors[0] 267 | 268 | # draw segmentation - draw instance segments 269 | 270 | boxesToDraw = [] 271 | for i in range(numDetections): 272 | box = boxes[0, 0, i] 273 | mask = masks[i] 274 | confidence = box[2] 275 | if confidence > confThreshold: 276 | 277 | # **** draw bounding box (as per Faster R-CNN) 278 | 279 | classId = int(box[1]) 280 | left = int(frameW * box[3]) 281 | top = int(frameH * box[4]) 282 | right = int(frameW * box[5]) 283 | bottom = int(frameH * box[6]) 284 | 285 | left = max(0, min(left, frameW - 1)) 286 | top = max(0, min(top, frameH - 1)) 287 | right = max(0, min(right, frameW - 1)) 288 | bottom = max(0, min(bottom, frameH - 1)) 289 | 290 | drawPred(frame, classes[classId], confidence, 291 | left, top, right, bottom, (0, 255, 0)) 292 | 293 | # **** draw object instance mask 294 | # get mask, re-size from 28x28 to size of bounding box 295 | # then theshold at 0.5 296 | 297 | classMask = mask[classId] 298 | classMask = cv2.resize(classMask, 299 | (right - left + 1, bottom - top + 1), 300 | cv2.INTER_CUBIC) 301 | mask = (classMask > 0.5) 302 | 303 | roi = frame[top:bottom+1, left:right+1][mask] 304 | frame[top:bottom+1, left:right+1][mask] = ( 305 | 0.8 * colors[classId] + 0.2 * roi).astype(np.uint8) 306 | 307 | # stop the timer and convert to ms. (to see how long processing takes) 308 | 309 | stop_t = ((cv2.getTickCount() - start_t) / 310 | cv2.getTickFrequency()) * 1000 311 | 312 | # Display efficiency information 313 | 314 | label = ('Inference time: %.2f ms' % stop_t) + \ 315 | (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')' 316 | cv2.putText(frame, label, (0, 15), 317 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) 318 | 319 | # display image 320 | cv2.imshow(window_name, frame) 321 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, 322 | cv2.WINDOW_FULLSCREEN & args.fullscreen) 323 | 324 | # start the event loop + detect specific key strokes 325 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 326 | # 25 fps = 40 ms) 327 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 328 | 329 | # if user presses "x" then exit / press "f" for fullscreen display 330 | if (key == ord('x')): 331 | keep_processing = False 332 | elif (key == ord('f')): 333 | args.fullscreen = not (args.fullscreen) 334 | 335 | # close all windows 336 | cv2.destroyAllWindows() 337 | 338 | else: 339 | print("No video file specified or camera connected.") 340 | 341 | ########################################################################## 342 | -------------------------------------------------------------------------------- /mog-background-subtraction.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : perform MoG based foreground/background subtraction from a video 4 | # file specified on the command line (e.g. python FILE.py video_file) or from 5 | # an attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2015-25 Toby Breckon, Engineering & Computer Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | ##################################################################### 14 | 15 | import cv2 16 | import argparse 17 | import sys 18 | import numpy as np 19 | 20 | ##################################################################### 21 | 22 | # concatenate two RGB/grayscale images horizontally (left to right) 23 | # handling differing channel numbers or image heights in the input 24 | 25 | 26 | def h_concat(img1, img2): 27 | 28 | # get size and channels for both images 29 | 30 | height1 = img1.shape[0] 31 | # width1 = img1.shape[1] 32 | if (len(img1.shape) == 2): 33 | channels1 = 1 34 | else: 35 | channels1 = img1.shape[2] 36 | 37 | height2 = img2.shape[0] 38 | width2 = img2.shape[1] 39 | if (len(img2.shape) == 2): 40 | channels2 = 1 41 | else: 42 | channels2 = img2.shape[2] 43 | 44 | # make all images 3 channel, or assume all same channel 45 | 46 | if ((channels1 > channels2) and (channels1 == 3)): 47 | out2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR) 48 | out1 = img1 49 | elif ((channels2 > channels1) and (channels2 == 3)): 50 | out1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR) 51 | out2 = img2 52 | else: # both must be equal 53 | out1 = img1 54 | out2 = img2 55 | 56 | # height of first image is master height, width remains unchanged 57 | 58 | if (height1 != height2): 59 | out2 = cv2.resize(out2, (height1, width2)) 60 | 61 | return np.hstack((out1, out2)) 62 | 63 | ##################################################################### 64 | 65 | # concatenate two RGB/grayscale images vertically (top to bottom) 66 | # handling differing channel numbers or image heights in the input 67 | 68 | 69 | def v_concat(img1, img2): 70 | 71 | # get size and channels for both images 72 | 73 | # height1 = img1.shape[0] 74 | width1 = img1.shape[1] 75 | if (len(img1.shape) == 2): 76 | channels1 = 1 77 | else: 78 | channels1 = img1.shape[2] 79 | 80 | height2 = img2.shape[0] 81 | width2 = img2.shape[1] 82 | if (len(img2.shape) == 2): 83 | channels2 = 1 84 | else: 85 | channels2 = img2.shape[2] 86 | 87 | # make all images 3 channel, or assume all same channel 88 | 89 | if ((channels1 > channels2) and (channels1 == 3)): 90 | out2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR) 91 | out1 = img1 92 | elif ((channels2 > channels1) and (channels2 == 3)): 93 | out1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR) 94 | out2 = img2 95 | else: # both must be equal 96 | out1 = img1 97 | out2 = img2 98 | 99 | # width of first image is master height, height remains unchanged 100 | 101 | if (width1 != width2): 102 | out2 = cv2.resize(out2, (height2, width1)) 103 | 104 | return np.vstack((out1, out2)) 105 | 106 | ##################################################################### 107 | 108 | 109 | keep_processing = True 110 | 111 | # parse command line arguments for camera ID or video file 112 | 113 | parser = argparse.ArgumentParser( 114 | description='Perform ' + 115 | sys.argv[0] + 116 | ' example operation on incoming camera/video image') 117 | parser.add_argument( 118 | "-c", 119 | "--camera_to_use", 120 | type=int, 121 | help="specify camera to use", 122 | default=0) 123 | parser.add_argument( 124 | "-r", 125 | "--rescale", 126 | type=float, 127 | help="rescale image by this factor", 128 | default=1.0) 129 | parser.add_argument( 130 | "-s", 131 | "--set_resolution", 132 | type=int, 133 | nargs=2, 134 | help='override default camera resolution as H W') 135 | parser.add_argument( 136 | "-fs", 137 | "--fullscreen", 138 | action='store_true', 139 | help="run in full screen mode") 140 | parser.add_argument( 141 | 'video_file', 142 | metavar='video_file', 143 | type=str, 144 | nargs='?', 145 | help='specify optional video file') 146 | args = parser.parse_args() 147 | 148 | ##################################################################### 149 | 150 | # define video capture object 151 | 152 | try: 153 | # to use a non-buffered camera stream (via a separate thread) 154 | 155 | if not (args.video_file): 156 | import camera_stream 157 | cap = camera_stream.CameraVideoStream() 158 | else: 159 | cap = cv2.VideoCapture() # not needed for video files 160 | 161 | except BaseException: 162 | # if not then just use OpenCV default 163 | 164 | print("INFO: camera_stream class not found - camera input may be buffered") 165 | cap = cv2.VideoCapture() 166 | 167 | # check versions to work around this bug in OpenCV 3.1 168 | # https://github.com/opencv/opencv/issues/6055 169 | 170 | (major, minor, _) = cv2.__version__.split(".") 171 | if ((major == '3') and (minor == '1')): 172 | cv2.ocl.setUseOpenCL(False) 173 | 174 | # define display window name 175 | 176 | window_name = "Live Camera Input" # window name 177 | window_nameBG = "Background Model" # window name 178 | window_nameFG = "Foreground Objects" # window name 179 | window_nameFGP = "Foreground Probabiity" # window name 180 | 181 | # if command line arguments are provided try to read video_name 182 | # otherwise default to capture from attached H/W camera 183 | 184 | if (((args.video_file) and (cap.open(str(args.video_file)))) 185 | or (cap.open(args.camera_to_use))): 186 | 187 | # create window by name (as resizable) 188 | 189 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 190 | cv2.namedWindow(window_nameBG, cv2.WINDOW_NORMAL) 191 | cv2.namedWindow(window_nameFG, cv2.WINDOW_NORMAL) 192 | cv2.namedWindow(window_nameFGP, cv2.WINDOW_NORMAL) 193 | 194 | # override default camera resolution 195 | 196 | if (args.set_resolution is not None): 197 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1]) 198 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0]) 199 | 200 | # create GMM background subtraction object 201 | # (using default parameters which are suitable for quick lecture demos 202 | # - see manual for suitable choice of values to use in anger) 203 | 204 | mog = cv2.createBackgroundSubtractorMOG2( 205 | history=2000, varThreshold=16, detectShadows=True) 206 | 207 | print("\nPress to reset MoG model ...\n") 208 | 209 | while (keep_processing): 210 | 211 | # if video file successfully open then read frame from video 212 | 213 | if (cap.isOpened): 214 | ret, frame = cap.read() 215 | 216 | # when we reach the end of the video (file) exit cleanly 217 | 218 | if (ret == 0): 219 | keep_processing = False 220 | continue 221 | 222 | # rescale if specified 223 | 224 | if (args.rescale != 1.0): 225 | frame = cv2.resize( 226 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 227 | 228 | # add current frame to background model and retrieve current foreground 229 | # objects (use learningRate parameter for tuning, see manual ) 230 | 231 | fgmask = mog.apply(frame) 232 | 233 | # threshold and clean it up using erosion/dilation w/ elliptic mask 234 | 235 | fgthres = cv2.threshold(fgmask.copy(), 200, 255, cv2.THRESH_BINARY)[1] 236 | fgeroded = cv2.erode( 237 | fgthres, kernel=cv2.getStructuringElement( 238 | cv2.MORPH_ELLIPSE, (3, 3)), iterations=3) 239 | fgdilated = cv2.dilate( 240 | fgeroded, kernel=cv2.getStructuringElement( 241 | cv2.MORPH_ELLIPSE, (3, 3)), iterations=3) 242 | 243 | # get current background image (representative of current GMM model) 244 | 245 | bgmodel = mog.getBackgroundImage() 246 | 247 | # display images - input, background and original 248 | 249 | if (args.fullscreen): 250 | 251 | window_name = "[ Live | BG | Pr(FG) | FG ]" 252 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 253 | cv2.imshow(window_name, v_concat( 254 | h_concat(frame, bgmodel), 255 | h_concat(fgmask, fgeroded) 256 | )) 257 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, 258 | cv2.WINDOW_FULLSCREEN & args.fullscreen) 259 | 260 | else: 261 | 262 | cv2.imshow(window_name, frame) 263 | cv2.imshow(window_nameFG, fgeroded) 264 | cv2.imshow(window_nameFGP, fgmask) 265 | cv2.imshow(window_nameBG, bgmodel) 266 | 267 | # start the event loop - essential 268 | 269 | # cv2.waitKey() is a keyboard binding function (argument is the time in 270 | # ms.) It waits for specified milliseconds for any keyboard event. 271 | # If you press any key in that time, the program continues. 272 | # If 0 is passed, it waits indefinitely for a key stroke. 273 | # (bitwise and with 0xFF to extract least significant byte of 274 | # multi-byte response) here we use a wait time in ms. that takes 275 | # account of processing time already used in the loop 276 | 277 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms) 278 | key = cv2.waitKey(40) & 0xFF 279 | 280 | # It can also be set to detect specific key strokes by recording which 281 | # key is pressed 282 | 283 | # e.g. if user presses "x" then exit, "f" for fullscreen 284 | # or reset MoG model when space is pressed 285 | 286 | if (key == ord('x')): 287 | keep_processing = False 288 | elif (key == ord(' ')): 289 | print("\nResetting MoG background model ...\n") 290 | mog = cv2.createBackgroundSubtractorMOG2( 291 | history=2000, varThreshold=16, detectShadows=True) 292 | elif (key == ord('f')): 293 | args.fullscreen = not (args.fullscreen) 294 | 295 | # close all windows 296 | 297 | cv2.destroyAllWindows() 298 | 299 | else: 300 | print("No video file specified or camera connected.") 301 | 302 | ##################################################################### 303 | -------------------------------------------------------------------------------- /openpose.py: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | 3 | # Example : perform live display of openpose body pose regression from a video 4 | # file specified on the command line (e.g. python FILE.py video_file) or from 5 | # an attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2019 Toby Breckon, Engineering & Computing Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | # Based heavily on the example provided at: 14 | # https://github.com/opencv/opencv/blob/master/samples/dnn/openpose.py 15 | 16 | ########################################################################## 17 | 18 | # To use download COCO model pose files from: 19 | # https://github.com/CMU-Perceptual-Computing-Lab/openpose/ 20 | # using 21 | # https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/models/getModels.sh 22 | 23 | ########################################################################## 24 | 25 | import cv2 26 | import argparse 27 | import sys 28 | import math 29 | 30 | ########################################################################## 31 | 32 | keep_processing = True 33 | 34 | # parse command line arguments for camera ID or video file 35 | 36 | parser = argparse.ArgumentParser( 37 | description='Perform ' + 38 | sys.argv[0] + 39 | ' example operation on incoming camera/video image') 40 | parser.add_argument( 41 | "-c", 42 | "--camera_to_use", 43 | type=int, 44 | help="specify camera to use", 45 | default=0) 46 | parser.add_argument( 47 | "-r", 48 | "--rescale", 49 | type=float, 50 | help="rescale image by this factor", 51 | default=1.0) 52 | parser.add_argument( 53 | "-fs", 54 | "--fullscreen", 55 | action='store_true', 56 | help="run in full screen mode") 57 | parser.add_argument( 58 | "-use", 59 | "--target", 60 | type=str, 61 | choices=['cpu', 'gpu', 'opencl'], 62 | help="select computational backend", 63 | default='gpu') 64 | parser.add_argument( 65 | 'video_file', 66 | metavar='video_file', 67 | type=str, 68 | nargs='?', 69 | help='specify optional video file') 70 | args = parser.parse_args() 71 | 72 | ########################################################################## 73 | 74 | # define video capture object 75 | 76 | try: 77 | # to use a non-buffered camera stream (via a separate thread) 78 | 79 | if not (args.video_file): 80 | import camera_stream 81 | cap = camera_stream.CameraVideoStream() 82 | else: 83 | cap = cv2.VideoCapture() # not needed for video files 84 | 85 | except BaseException: 86 | # if not then just use OpenCV default 87 | 88 | print("INFO: camera_stream class not found - camera input may be buffered") 89 | cap = cv2.VideoCapture() 90 | 91 | ########################################################################## 92 | 93 | # define display window name 94 | 95 | window_name = "OpenPose Body Pose Regression - Live" # window name 96 | 97 | # create window by name (as resizable) 98 | 99 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 100 | 101 | ########################################################################## 102 | 103 | # set pose labels - based on COCO dataset training 104 | 105 | BODY_PARTS = {"Nose": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4, 106 | "LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8, "RKnee": 9, 107 | "RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13, "REye": 14, 108 | "LEye": 15, "REar": 16, "LEar": 17, "Background": 18} 109 | 110 | POSE_PAIRS = [ 111 | ["Neck", "RShoulder"], ["Neck", "LShoulder"], 112 | ["RShoulder", "RElbow"], ["RElbow", "RWrist"], 113 | ["LShoulder", "LElbow"], ["LElbow", "LWrist"], 114 | ["Neck", "RHip"], ["RHip", "RKnee"], 115 | ["RKnee", "RAnkle"], ["Neck", "LHip"], 116 | ["LHip", "LKnee"], ["LKnee", "LAnkle"], 117 | ["Neck", "Nose"], ["Nose", "REye"], 118 | ["REye", "REar"], ["Nose", "LEye"], 119 | ["LEye", "LEar"] 120 | ] 121 | 122 | ########################################################################## 123 | 124 | # Load CNN model 125 | net = cv2.dnn.readNet( 126 | "pose_iter_440000.caffemodel", 127 | "pose_deploy_linevec.prototxt", 128 | 'caffe') 129 | 130 | # set up compute target as one of [GPU, OpenCL, CPU] 131 | 132 | if (args.target == 'gpu'): 133 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) 134 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) 135 | elif (args.target == 'opencl'): 136 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT) 137 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL) 138 | else: 139 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT) 140 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) 141 | 142 | ########################################################################## 143 | 144 | # if command line arguments are provided try to read video_name 145 | # otherwise default to capture from attached camera 146 | 147 | if (((args.video_file) and (cap.open(str(args.video_file)))) 148 | or (cap.open(args.camera_to_use))): 149 | 150 | while (keep_processing): 151 | 152 | # start a timer (to see how long processing and display takes) 153 | 154 | start_t = cv2.getTickCount() 155 | 156 | # if camera /video file successfully open then read frame 157 | 158 | if (cap.isOpened): 159 | ret, frame = cap.read() 160 | 161 | # when we reach the end of the video (file) exit cleanly 162 | 163 | if (ret == 0): 164 | keep_processing = False 165 | continue 166 | 167 | # rescale if specified 168 | 169 | if (args.rescale != 1.0): 170 | frame = cv2.resize( 171 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 172 | 173 | # create a 4D tensor "blob" from a frame - defaults from OpenCV 174 | # OpenPose example 175 | 176 | blob = cv2.dnn.blobFromImage( 177 | frame, scalefactor=0.003922, size=( 178 | 368, 368), mean=[ 179 | 0, 0, 0], swapRB=False, crop=False) 180 | 181 | # Run forward inference on the model 182 | 183 | net.setInput(blob) 184 | out = net.forward() 185 | 186 | # draw body parts 187 | 188 | if (len(BODY_PARTS) <= out.shape[1]): 189 | 190 | frameWidth = frame.shape[1] 191 | frameHeight = frame.shape[0] 192 | 193 | points = [] 194 | for i in range(len(BODY_PARTS)): 195 | # Slice heatmap of corresponding body's part. 196 | heatMap = out[0, i, :, :] 197 | 198 | # Originally, we try to find all the local maximums. 199 | # To simplify a sample we just find a global one. 200 | # However only a single pose at the same time 201 | # could be detected this way. 202 | _, conf, _, point = cv2.minMaxLoc(heatMap) 203 | x = (frameWidth * point[0]) / out.shape[3] 204 | y = (frameHeight * point[1]) / out.shape[2] 205 | 206 | # Add a point if it's confidence is higher than threshold. 207 | points.append((int(x), int(y)) if conf > 0.1 else None) 208 | 209 | for pair in POSE_PAIRS: 210 | partFrom = pair[0] 211 | partTo = pair[1] 212 | assert (partFrom in BODY_PARTS) 213 | assert (partTo in BODY_PARTS) 214 | 215 | idFrom = BODY_PARTS[partFrom] 216 | idTo = BODY_PARTS[partTo] 217 | 218 | if points[idFrom] and points[idTo]: 219 | cv2.line( 220 | frame, points[idFrom], points[idTo], (0, 255, 0), 3) 221 | cv2.ellipse( 222 | frame, points[idFrom], (3, 3), 0, 0, 360, 223 | (0, 0, 255), cv2.FILLED) 224 | cv2.ellipse( 225 | frame, points[idTo], (3, 3), 0, 0, 360, 226 | (0, 0, 255), cv2.FILLED) 227 | 228 | # stop the timer and convert to ms. 229 | 230 | stop_t = ((cv2.getTickCount() - start_t) / 231 | cv2.getTickFrequency()) * 1000 232 | 233 | # add efficiency information 234 | 235 | label = ('Inference time: %.2f ms' % stop_t) + \ 236 | (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')' 237 | cv2.putText(frame, label, (0, 15), 238 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0)) 239 | 240 | # display image 241 | 242 | cv2.imshow(window_name, frame) 243 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, 244 | cv2.WINDOW_FULLSCREEN & args.fullscreen) 245 | 246 | # start the event loop - essentials 247 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 248 | # 25 fps = 40 ms) 249 | 250 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 251 | 252 | # It can also be set to detect specific key strokes by recording which 253 | # key is pressed 254 | 255 | # e.g. if user presses "x" then exit / press "f" for fullscreen 256 | # display 257 | 258 | if (key == ord('x')): 259 | keep_processing = False 260 | elif (key == ord('f')): 261 | args.fullscreen = not (args.fullscreen) 262 | 263 | # close all windows 264 | 265 | cv2.destroyAllWindows() 266 | 267 | else: 268 | print("No video file specified or camera connected.") 269 | 270 | ########################################################################## 271 | -------------------------------------------------------------------------------- /opticflow.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : perform live visualization of optic flow from a video file 4 | # specified on the command line (e.g. python FILE.py video_file) or from 5 | # an attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2017 School of Engineering & Computing Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | ##################################################################### 14 | 15 | import cv2 16 | import argparse 17 | import sys 18 | import numpy as np 19 | 20 | ##################################################################### 21 | 22 | keep_processing = True 23 | 24 | # parse command line arguments for camera ID or video file 25 | 26 | parser = argparse.ArgumentParser( 27 | description='Perform ' + 28 | sys.argv[0] + 29 | ' example operation on incoming camera/video image') 30 | parser.add_argument( 31 | "-c", 32 | "--camera_to_use", 33 | type=int, 34 | help="specify camera to use", 35 | default=0) 36 | parser.add_argument( 37 | "-r", 38 | "--rescale", 39 | type=float, 40 | help="rescale image by this factor", 41 | default=1.0) 42 | parser.add_argument( 43 | 'video_file', 44 | metavar='video_file', 45 | type=str, 46 | nargs='?', 47 | help='specify optional video file') 48 | args = parser.parse_args() 49 | 50 | ##################################################################### 51 | 52 | # draw optic flow visualization on image using a given step size for 53 | # the line glyphs that show the flow vectors on the image 54 | 55 | 56 | def draw_flow(img, flow, step=8): 57 | h, w = img.shape[:2] 58 | y, x = np.mgrid[step / 2:h:step, step / 59 | 2:w:step].reshape(2, -1).astype(int) 60 | fx, fy = flow[y, x].T 61 | lines = np.vstack([x, y, x + fx, y + fy]).T.reshape(-1, 2, 2) 62 | lines = np.int32(lines + 0.5) 63 | vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) 64 | cv2.polylines(vis, lines, 0, (0, 255, 0)) 65 | for (x1, y1), (x2, y2) in lines: 66 | cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1) 67 | return vis 68 | 69 | ##################################################################### 70 | 71 | # define video capture object 72 | 73 | 74 | try: 75 | # to use a non-buffered camera stream (via a separate thread) 76 | 77 | if not (args.video_file): 78 | import camera_stream 79 | cap = camera_stream.CameraVideoStream() 80 | else: 81 | cap = cv2.VideoCapture() # not needed for video files 82 | 83 | except BaseException: 84 | # if not then just use OpenCV default 85 | 86 | print("INFO: camera_stream class not found - camera input may be buffered") 87 | cap = cv2.VideoCapture() 88 | 89 | # define display window name 90 | 91 | window_name = "Dense Optic Flow" # window name 92 | 93 | # if command line arguments are provided try to read video_name 94 | # otherwise default to capture from attached H/W camera 95 | 96 | if (((args.video_file) and (cap.open(str(args.video_file)))) 97 | or (cap.open(args.camera_to_use))): 98 | 99 | # create window by name (as resizable) 100 | 101 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 102 | 103 | # if video file successfully open then read an initial frame from video 104 | 105 | if (cap.isOpened): 106 | ret, frame = cap.read() 107 | 108 | # rescale if specified 109 | 110 | if (args.rescale != 1.0): 111 | frame = cv2.resize(frame, (0, 0), fx=args.rescale, fy=args.rescale) 112 | 113 | # convert image to grayscale to be previous frame 114 | 115 | prevgray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 116 | 117 | while (keep_processing): 118 | 119 | # if video file successfully open then read frame from video 120 | 121 | if (cap.isOpened): 122 | ret, frame = cap.read() 123 | 124 | # when we reach the end of the video (file) exit cleanly 125 | 126 | if (ret == 0): 127 | keep_processing = False 128 | continue 129 | 130 | # rescale if specified 131 | 132 | if (args.rescale != 1.0): 133 | frame = cv2.resize( 134 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 135 | 136 | # convert image to grayscale 137 | 138 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 139 | 140 | # compute dense optic flow using technique of Farneback 2003 141 | # parameters from example (OpenCV 3.2): 142 | # https://github.com/opencv/opencv/blob/master/samples/python/opt_flow.py 143 | 144 | flow = cv2.calcOpticalFlowFarneback( 145 | prevgray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0) 146 | prevgray = gray 147 | 148 | # display image with optic flow overlay 149 | 150 | cv2.imshow(window_name, draw_flow(gray, flow)) 151 | 152 | # start the event loop - essential 153 | 154 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms) 155 | key = cv2.waitKey(40) & 0xFF 156 | 157 | # It can also be set to detect specific key strokes by recording which 158 | # key is pressed 159 | 160 | # e.g. if user presses "x" then exit / press "f" for fullscreen 161 | # display 162 | 163 | if (key == ord('x')): 164 | keep_processing = False 165 | elif (key == ord('f')): 166 | cv2.setWindowProperty( 167 | window_name, 168 | cv2.WND_PROP_FULLSCREEN, 169 | cv2.WINDOW_FULLSCREEN) 170 | 171 | # close all windows 172 | 173 | cv2.destroyAllWindows() 174 | 175 | else: 176 | print("No video file specified or camera connected.") 177 | 178 | ##################################################################### 179 | -------------------------------------------------------------------------------- /pyramid.py: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | 3 | # Example : perform Gaussian/Laplacian pyramid live display from a video file 4 | # specified on the command line (e.g. python FILE.py video_file) or from an 5 | # attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2021 Toby Breckon, Engineering & Computing Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | # Acknowledgements: based in part from tutorial at: 14 | # https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_pyramids/py_pyramids.html 15 | 16 | ########################################################################## 17 | 18 | import cv2 19 | import argparse 20 | import sys 21 | import math 22 | import numpy as np 23 | 24 | ########################################################################## 25 | 26 | keep_processing = True 27 | 28 | # parse command line arguments for camera ID or video file 29 | 30 | parser = argparse.ArgumentParser( 31 | description='Perform ' + 32 | sys.argv[0] + 33 | ' example operation on incoming camera/video image') 34 | parser.add_argument( 35 | "-c", 36 | "--camera_to_use", 37 | type=int, 38 | help="specify camera to use", 39 | default=0) 40 | parser.add_argument( 41 | "-r", 42 | "--rescale", 43 | type=float, 44 | help="rescale image by this factor", 45 | default=1.0) 46 | parser.add_argument( 47 | 'video_file', 48 | metavar='video_file', 49 | type=str, 50 | nargs='?', 51 | help='specify optional video file') 52 | args = parser.parse_args() 53 | 54 | ##################################################################### 55 | 56 | # define display window name 57 | 58 | window_name = "Live Camera Input" # window name 59 | 60 | ########################################################################## 61 | 62 | # define video capture object 63 | 64 | try: 65 | # to use a non-buffered camera stream (via a separate thread) 66 | 67 | if not (args.video_file): 68 | import camera_stream 69 | cap = camera_stream.CameraVideoStream() 70 | else: 71 | cap = cv2.VideoCapture() # not needed for video files 72 | 73 | except BaseException: 74 | # if not then just use OpenCV default 75 | 76 | print("INFO: camera_stream class not found - camera input may be buffered") 77 | cap = cv2.VideoCapture() 78 | 79 | # if command line arguments are provided try to read video_name 80 | # otherwise default to capture from attached camera 81 | 82 | if (((args.video_file) and (cap.open(str(args.video_file)))) 83 | or (cap.open(args.camera_to_use))): 84 | 85 | # create window by name (as resizable) 86 | 87 | cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE) 88 | 89 | # set initial number of pyramid levels 90 | 91 | nlevels = 5 92 | 93 | # print user key commands 94 | 95 | print() 96 | print("'-' - reduce pyramid levels") 97 | print("'+' - increase pyramid levels (max 6 levels)") 98 | print() 99 | 100 | while (keep_processing): 101 | 102 | # start a timer (to see how long processing and display takes) 103 | 104 | start_t = cv2.getTickCount() 105 | 106 | # if camera /video file successfully open then read frame 107 | 108 | if (cap.isOpened): 109 | ret, frame = cap.read() 110 | 111 | # when we reach the end of the video (file) exit cleanly 112 | 113 | if (ret == 0): 114 | keep_processing = False 115 | continue 116 | 117 | # rescale if specified 118 | 119 | if (args.rescale != 1.0): 120 | frame = cv2.resize( 121 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 122 | 123 | # generate Gaussian pyramid for image frame 124 | 125 | g_level = frame.copy() 126 | g_pyramid = [g_level] 127 | for layer in range(nlevels): 128 | g_level = cv2.pyrDown(g_level) 129 | cv2.namedWindow("Gaussian Level: " + str(layer), 130 | cv2.WINDOW_AUTOSIZE) 131 | cv2.imshow("Gaussian Level: " + str(layer), g_level) 132 | g_pyramid.append(g_level.copy()) 133 | 134 | # generate Laplacian pyramid image frame 135 | 136 | lp_pyramid = [g_pyramid[nlevels - 1]] 137 | for layer in range(nlevels, 0, -1): 138 | g_level_enlarged = cv2.pyrUp(g_pyramid[layer]) 139 | 140 | # catch this rounding error occurence in image sizes 141 | if (g_pyramid[layer-1].shape != g_level_enlarged.shape): 142 | g_level_enlarged = cv2.resize( 143 | g_level_enlarged, 144 | tuple(reversed(g_pyramid[layer-1].shape[:2])), 145 | interpolation=cv2.INTER_LINEAR) 146 | 147 | l_level = cv2.subtract(g_pyramid[layer-1], g_level_enlarged) 148 | cv2.normalize(l_level, l_level, 0, 255, cv2.NORM_MINMAX) 149 | cv2.namedWindow("Laplacian Level: " + str(layer), 150 | cv2.WINDOW_AUTOSIZE) 151 | cv2.imshow("Laplacian Level: " + str(layer), l_level) 152 | lp_pyramid.append(l_level.copy()) 153 | 154 | # display image 155 | 156 | cv2.imshow(window_name, frame) 157 | 158 | # stop the timer and convert to ms. (to see how long processing and 159 | # display takes) 160 | 161 | stop_t = ((cv2.getTickCount() - start_t) / 162 | cv2.getTickFrequency()) * 1000 163 | 164 | # start the event loop - essential 165 | 166 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 167 | # 25 fps = 40 ms) 168 | 169 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 170 | 171 | if (key == ord('x')): 172 | keep_processing = False 173 | elif (key == ord('+')): 174 | cv2.destroyAllWindows() 175 | nlevels = np.min([6, nlevels + 1]) 176 | elif (key == ord('-')): 177 | cv2.destroyAllWindows() 178 | nlevels = np.max([0, nlevels - 1]) 179 | 180 | # close all windows 181 | 182 | cv2.destroyAllWindows() 183 | 184 | else: 185 | print("No video file specified or camera connected.") 186 | 187 | ########################################################################## 188 | -------------------------------------------------------------------------------- /selective_search.py: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | 3 | # Example : detect live selective search bounding boxes from a video file 4 | # specified on the command line (e.g. python FILE.py video_file) or from an 5 | # attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2021 Dept. Computer Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | ########################################################################## 14 | 15 | import cv2 16 | import argparse 17 | import sys 18 | import math 19 | 20 | ##################################################################### 21 | 22 | # press all the go-faster buttons - i.e. speed-up using multithreads 23 | 24 | cv2.setUseOptimized(True) 25 | cv2.setNumThreads(4) 26 | 27 | # if we have OpenCL H/W acceleration availale, use it - we'll need it 28 | 29 | cv2.ocl.setUseOpenCL(True) 30 | print( 31 | "INFO: OpenCL - available: ", 32 | cv2.ocl.haveOpenCL(), 33 | " using: ", 34 | cv2.ocl.useOpenCL()) 35 | 36 | ########################################################################## 37 | 38 | keep_processing = True 39 | 40 | # parse command line arguments for camera ID or video file 41 | 42 | parser = argparse.ArgumentParser( 43 | description='Perform ' + 44 | sys.argv[0] + 45 | ' example operation on incoming camera/video image') 46 | parser.add_argument( 47 | "-c", 48 | "--camera_to_use", 49 | type=int, 50 | help="specify camera to use", 51 | default=0) 52 | parser.add_argument( 53 | "-r", 54 | "--rescale", 55 | type=float, 56 | help="rescale image by this factor", 57 | default=1.0) 58 | parser.add_argument( 59 | "-fs", 60 | "--fullscreen", 61 | action='store_true', 62 | help="run in full screen mode") 63 | parser.add_argument( 64 | 'video_file', 65 | metavar='video_file', 66 | type=str, 67 | nargs='?', 68 | help='specify optional video file') 69 | args = parser.parse_args() 70 | 71 | ########################################################################## 72 | 73 | # define video capture object 74 | 75 | try: 76 | # to use a non-buffered camera stream (via a separate thread) 77 | 78 | if not (args.video_file): 79 | import camera_stream 80 | cap = camera_stream.CameraVideoStream() 81 | else: 82 | cap = cv2.VideoCapture() # not needed for video files 83 | 84 | except BaseException: 85 | # if not then just use OpenCV default 86 | 87 | print("INFO: camera_stream class not found - camera input may be buffered") 88 | cap = cv2.VideoCapture() 89 | 90 | # define display window name 91 | 92 | window_name = "Selective Search - Bounding Boxes" # window name 93 | 94 | # if command line arguments are provided try to read video_name 95 | # otherwise default to capture from attached camera 96 | 97 | if (((args.video_file) and (cap.open(str(args.video_file)))) 98 | or (cap.open(args.camera_to_use))): 99 | 100 | # create window by name (as resizable) 101 | 102 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 103 | 104 | ##################################################################### 105 | 106 | # create Selective Search Segmentation Object using default parameters 107 | 108 | ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation() 109 | 110 | while (keep_processing): 111 | 112 | # start a timer (to see how long processing and display takes) 113 | 114 | start_t = cv2.getTickCount() 115 | 116 | # if camera /video file successfully open then read frame 117 | 118 | if (cap.isOpened): 119 | ret, frame = cap.read() 120 | 121 | # when we reach the end of the video (file) exit cleanly 122 | 123 | if (ret == 0): 124 | keep_processing = False 125 | continue 126 | 127 | # rescale if specified 128 | 129 | if (args.rescale != 1.0): 130 | frame = cv2.resize( 131 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 132 | 133 | # set input image on which we will run segmentation 134 | 135 | ss.setBaseImage(frame) 136 | 137 | # Switch to fast but low recall Selective Search method 138 | ss.switchToSelectiveSearchFast() 139 | 140 | # Switch to high recall but slow Selective Search method (slower) 141 | # ss.switchToSelectiveSearchQuality() 142 | 143 | # run selective search segmentation on input image 144 | rects = ss.process() 145 | print('Total Number of Region Proposals: {}'.format(len(rects))) 146 | 147 | # number of region proposals to show 148 | numShowRects = 100 149 | 150 | # iterate over all the region proposals 151 | for i, rect in enumerate(rects): 152 | # draw rectangle for region proposal till numShowRects 153 | if (i < numShowRects): 154 | x, y, w, h = rect 155 | cv2.rectangle(frame, (x, y), (x+w, y+h), 156 | (0, 255, 0), 1, cv2.LINE_AA) 157 | else: 158 | break 159 | 160 | # stop the timer and convert to ms. (to see how long processing and 161 | # display takes) 162 | 163 | stop_t = ((cv2.getTickCount() - start_t) / 164 | cv2.getTickFrequency()) * 1000 165 | 166 | label = ('Processing time: %.2f ms' % stop_t) + \ 167 | (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')' 168 | cv2.putText(frame, label, (0, 15), 169 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) 170 | 171 | # display image 172 | 173 | cv2.imshow(window_name, frame) 174 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, 175 | cv2.WINDOW_FULLSCREEN & args.fullscreen) 176 | 177 | # start the event loop - essential 178 | 179 | # cv2.waitKey() is a keyboard binding function (argument is the time in 180 | # milliseconds). It waits for specified milliseconds for any keyboard 181 | # event. If you press any key in that time, the program continues. 182 | # If 0 is passed, it waits indefinitely for a key stroke. 183 | # (bitwise and with 0xFF to extract least significant byte of 184 | # multi-byte response) 185 | 186 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 187 | # 25 fps = 40 ms) 188 | 189 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 190 | 191 | # It can also be set to detect specific key strokes by recording which 192 | # key is pressed 193 | 194 | # e.g. if user presses "x" then exit / press "f" for fullscreen 195 | # display 196 | 197 | if (key == ord('x')): 198 | keep_processing = False 199 | elif (key == ord('f')): 200 | args.fullscreen = not (args.fullscreen) 201 | 202 | # close all windows 203 | 204 | cv2.destroyAllWindows() 205 | 206 | else: 207 | print("No video file specified or camera connected.") 208 | 209 | ########################################################################## 210 | -------------------------------------------------------------------------------- /sobel.py: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | 3 | # Example : Sobel edge filtering for a a video file 4 | # specified on the command line (e.g. python FILE.py video_file) or from an 5 | # attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2016 School of Engineering & Computing Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | ##################################################################### 14 | 15 | import cv2 16 | import argparse 17 | import sys 18 | 19 | ##################################################################### 20 | 21 | keep_processing = True 22 | 23 | # parse command line arguments for camera ID or video file 24 | 25 | parser = argparse.ArgumentParser( 26 | description='Perform ' + 27 | sys.argv[0] + 28 | ' example operation on incoming camera/video image') 29 | parser.add_argument( 30 | "-c", 31 | "--camera_to_use", 32 | type=int, 33 | help="specify camera to use", 34 | default=0) 35 | parser.add_argument( 36 | "-r", 37 | "--rescale", 38 | type=float, 39 | help="rescale image by this factor", 40 | default=1.0) 41 | parser.add_argument( 42 | "-s", 43 | "--set_resolution", 44 | type=int, 45 | nargs=2, 46 | help='override default camera resolution as H W') 47 | parser.add_argument( 48 | 'video_file', 49 | metavar='video_file', 50 | type=str, 51 | nargs='?', 52 | help='specify optional video file') 53 | args = parser.parse_args() 54 | 55 | ##################################################################### 56 | 57 | # this function is called as a call-back everytime the trackbar is moved 58 | # (here we just do nothing) 59 | 60 | 61 | def nothing(x): 62 | pass 63 | 64 | ##################################################################### 65 | 66 | # define video capture object 67 | 68 | 69 | try: 70 | # to use a non-buffered camera stream (via a separate thread) 71 | # enabling subsequent hardware acceleration where available 72 | 73 | if not (args.video_file): 74 | import camera_stream 75 | cap = camera_stream.CameraVideoStream(use_tapi=True) 76 | else: 77 | cap = cv2.VideoCapture() # not needed for video files 78 | 79 | except BaseException: 80 | # if not then just use OpenCV default 81 | 82 | print("INFO: camera_stream class not found - camera input may be buffered") 83 | cap = cv2.VideoCapture() 84 | 85 | # define display window name 86 | 87 | window_name = "Live Camera Input" # window name 88 | window_name2 = "Sobel Gradient Edge Response" # window name 89 | 90 | # if command line arguments are provided try to read video_name 91 | # otherwise default to capture from attached H/W camera 92 | 93 | if (((args.video_file) and (cap.open(str(args.video_file)))) 94 | or (cap.open(args.camera_to_use))): 95 | 96 | # create window by name (as resizable) 97 | 98 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 99 | cv2.namedWindow(window_name2, cv2.WINDOW_NORMAL) 100 | 101 | # add some track bar controllers for settings 102 | 103 | neighbourhood = 3 104 | cv2.createTrackbar( 105 | "neighbourhood, N", 106 | window_name2, 107 | neighbourhood, 108 | 15, 109 | nothing) 110 | 111 | # override default camera resolution 112 | 113 | if (args.set_resolution is not None): 114 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.set_resolution[1]) 115 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.set_resolution[0]) 116 | 117 | print("INFO: input resolution : (", 118 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), "x", 119 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), ")") 120 | 121 | while (keep_processing): 122 | 123 | # if video file successfully open then read frame from video 124 | 125 | if (cap.isOpened): 126 | ret, frame = cap.read() 127 | 128 | # when we reach the end of the video (file) exit cleanly 129 | 130 | if (ret == 0): 131 | keep_processing = False 132 | continue 133 | 134 | # rescale if specified 135 | 136 | if (args.rescale != 1.0): 137 | frame = cv2.resize( 138 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 139 | 140 | # get parameters from track bars 141 | 142 | neighbourhood = cv2.getTrackbarPos("neighbourhood, N", window_name2) 143 | 144 | # check neighbourhood is greater than 3 and odd 145 | 146 | neighbourhood = max(3, neighbourhood) 147 | if not (neighbourhood % 2): 148 | neighbourhood = neighbourhood + 1 149 | 150 | # perform sobel across all three colour channels of the image 151 | # in both the x and y directions 152 | 153 | sobel = cv2.Sobel(frame, cv2.CV_8U, 1, 1, ksize=neighbourhood) 154 | 155 | # display images 156 | 157 | cv2.imshow(window_name, frame) 158 | cv2.imshow(window_name2, sobel) 159 | 160 | # start the event loop - essential 161 | 162 | # cv2.waitKey() is a keyboard binding function (argument is the time in 163 | # milliseconds). It waits for specified milliseconds for any keyboard 164 | # event. If you press any key in that time, the program continues. 165 | # If 0 is passed, it waits indefinitely for a key stroke. 166 | # (bitwise and with 0xFF to extract least significant byte of 167 | # multi-byte response) 168 | 169 | # wait 40ms (i.e. 1000ms / 25 fps = 40 ms) 170 | key = cv2.waitKey(40) & 0xFF 171 | 172 | # It can also be set to detect specific key strokes by recording which 173 | # key is pressed 174 | 175 | # e.g. if user presses "x" then exit / press "f" to toggle fullscreen 176 | 177 | if (key == ord('x')): 178 | keep_processing = False 179 | elif (key == ord('f')): 180 | cv2.setWindowProperty( 181 | window_name2, 182 | cv2.WND_PROP_FULLSCREEN, 183 | cv2.WINDOW_FULLSCREEN & 184 | (cv2.getWindowProperty(window_name2, 185 | cv2.WND_PROP_FULLSCREEN) == 0)) 186 | 187 | # close all windows 188 | 189 | cv2.destroyAllWindows() 190 | 191 | else: 192 | print("No video file specified or camera connected.") 193 | 194 | ##################################################################### 195 | -------------------------------------------------------------------------------- /squeezenet.py: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | 3 | # Example : perform live display of squeezenet CNN classification from a video 4 | # file specified on the command line (e.g. python FILE.py video_file) or from 5 | # an attached web camera 6 | 7 | # Author : Toby Breckon, toby.breckon@durham.ac.uk 8 | 9 | # Copyright (c) 2019 Toby Breckon, Engineering & Computing Science, 10 | # Durham University, UK 11 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | # Based heavily on the example provided at: 14 | # https://github.com/opencv/opencv/blob/master/samples/dnn/classification.py 15 | 16 | ########################################################################## 17 | 18 | # To use download the following files: 19 | 20 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt 21 | # -> classification_classes_ILSVRC2012.txt 22 | # https://github.com/forresti/SqueezeNet/raw/master/SqueezeNet_v1.1/squeezenet_v1.1.caffemodel 23 | # -> squeezenet_v1.1.caffemodel 24 | # https://raw.githubusercontent.com/opencv/opencv_extra/master/testdata/dnn/squeezenet_v1.1.prototxt 25 | # -> squeezenet_v1.1.prototxt 26 | 27 | ########################################################################## 28 | 29 | import cv2 30 | import argparse 31 | import sys 32 | import math 33 | import numpy as np 34 | 35 | ########################################################################## 36 | # dummy on trackbar callback function 37 | 38 | 39 | def on_trackbar(val): 40 | return 41 | 42 | ########################################################################## 43 | 44 | 45 | keep_processing = True 46 | 47 | # parse command line arguments for camera ID or video file 48 | 49 | parser = argparse.ArgumentParser( 50 | description='Perform ' + 51 | sys.argv[0] + 52 | ' example operation on incoming camera/video image') 53 | parser.add_argument( 54 | "-c", 55 | "--camera_to_use", 56 | type=int, 57 | help="specify camera to use", 58 | default=0) 59 | parser.add_argument( 60 | "-r", 61 | "--rescale", 62 | type=float, 63 | help="rescale image by this factor", 64 | default=1.0) 65 | parser.add_argument( 66 | "-fs", 67 | "--fullscreen", 68 | action='store_true', 69 | help="run in full screen mode") 70 | parser.add_argument( 71 | "-use", 72 | "--target", 73 | type=str, 74 | choices=['cpu', 'gpu', 'opencl'], 75 | help="select computational backend", 76 | default='gpu') 77 | parser.add_argument( 78 | 'video_file', 79 | metavar='video_file', 80 | type=str, 81 | nargs='?', 82 | help='specify optional video file') 83 | args = parser.parse_args() 84 | 85 | ########################################################################## 86 | 87 | # define video capture object 88 | 89 | try: 90 | # to use a non-buffered camera stream (via a separate thread) 91 | 92 | if not (args.video_file): 93 | import camera_stream 94 | cap = camera_stream.CameraVideoStream() 95 | else: 96 | cap = cv2.VideoCapture() # not needed for video files 97 | 98 | except BaseException: 99 | # if not then just use OpenCV default 100 | 101 | print("INFO: camera_stream class not found - camera input may be buffered") 102 | cap = cv2.VideoCapture() 103 | 104 | ########################################################################## 105 | 106 | # define display window name 107 | 108 | window_name = "SqueezeNet Image Classification - Live" # window name 109 | 110 | # create window by name (as resizable) 111 | 112 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 113 | trackbarName = 'reporting confidence > (x 0.01)' 114 | cv2.createTrackbar(trackbarName, window_name, 50, 100, on_trackbar) 115 | 116 | ########################################################################## 117 | 118 | # Load names of class labels 119 | 120 | classes = None 121 | with open("classification_classes_ILSVRC2012.txt", 'rt') as f: 122 | classes = f.read().rstrip('\n').split('\n') 123 | 124 | ########################################################################## 125 | 126 | # Load CNN model 127 | 128 | net = cv2.dnn.readNet( 129 | "squeezenet_v1.1.caffemodel", 130 | "squeezenet_v1.1.prototxt", 131 | 'caffe') 132 | 133 | # set up compute target as one of [GPU, OpenCL, CPU] 134 | 135 | if (args.target == 'gpu'): 136 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) 137 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) 138 | elif (args.target == 'opencl'): 139 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT) 140 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL) 141 | else: 142 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT) 143 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) 144 | 145 | ########################################################################## 146 | 147 | # if command line arguments are provided try to read video_name 148 | # otherwise default to capture from attached camera 149 | 150 | if (((args.video_file) and (cap.open(str(args.video_file)))) 151 | or (cap.open(args.camera_to_use))): 152 | 153 | while (keep_processing): 154 | 155 | # start a timer (to see how long processing and display takes) 156 | 157 | start_t = cv2.getTickCount() 158 | 159 | # if camera /video file successfully open then read frame 160 | 161 | if (cap.isOpened): 162 | ret, frame = cap.read() 163 | 164 | # when we reach the end of the video (file) exit cleanly 165 | 166 | if (ret == 0): 167 | keep_processing = False 168 | continue 169 | 170 | # rescale if specified 171 | 172 | if (args.rescale != 1.0): 173 | frame = cv2.resize( 174 | frame, (0, 0), fx=args.rescale, fy=args.rescale) 175 | 176 | ####################################################################### 177 | # squeezenet: 178 | # model: "squeezenet_v1.1.caffemodel" 179 | # config: "squeezenet_v1.1.prototxt" 180 | # mean: [0, 0, 0] 181 | # scale: 1.0 182 | # width: 227 183 | # height: 227 184 | # rgb: false 185 | # classes: "classification_classes_ILSVRC2012.txt 186 | ####################################################################### 187 | 188 | # create a 4D tensor "blob" from a frame. 189 | 190 | blob = cv2.dnn.blobFromImage( 191 | frame, scalefactor=1.0, size=( 192 | 227, 227), mean=[ 193 | 0, 0, 0], swapRB=False, crop=False) 194 | 195 | # Run forward inference on the model 196 | 197 | net.setInput(blob) 198 | out = net.forward() 199 | 200 | # get class label with a highest score from final softmax() layer 201 | 202 | out = out.flatten() 203 | classId = np.argmax(out) 204 | confidence = out[classId] 205 | 206 | # stop the timer and convert to ms. (to see how long processing takes 207 | 208 | stop_t = ((cv2.getTickCount() - start_t) / 209 | cv2.getTickFrequency()) * 1000 210 | 211 | # Display efficiency information 212 | 213 | label = ('Inference time: %.2f ms' % stop_t) + \ 214 | (' (Framerate: %.2f fps' % (1000 / stop_t)) + ')' 215 | cv2.putText(frame, label, (0, 15), 216 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) 217 | 218 | # get confidence threshold from track bar 219 | confThreshold = cv2.getTrackbarPos(trackbarName, window_name) / 100 220 | 221 | # if we are quite confidene about classification then dispplay 222 | if (confidence > confThreshold): 223 | # add predicted class. 224 | label = '%s: %.4f' % ( 225 | classes[classId] 226 | if classes else 'Class #%d' % classId, confidence) 227 | cv2.putText(frame, label, (0, 40), 228 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) 229 | 230 | # display image 231 | 232 | cv2.imshow(window_name, frame) 233 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, 234 | cv2.WINDOW_FULLSCREEN & args.fullscreen) 235 | 236 | # start the event loop - essential 237 | 238 | # wait 40ms or less depending on processing time taken (i.e. 1000ms / 239 | # 25 fps = 40 ms) 240 | 241 | key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF 242 | 243 | # It can also be set to detect specific key strokes by recording which 244 | # key is pressed 245 | 246 | # e.g. if user presses "x" then exit / press "f" for fullscreen 247 | # display 248 | 249 | if (key == ord('x')): 250 | keep_processing = False 251 | elif (key == ord('f')): 252 | args.fullscreen = not (args.fullscreen) 253 | 254 | # close all windows 255 | 256 | cv2.destroyAllWindows() 257 | 258 | else: 259 | print("No video file specified or camera connected.") 260 | 261 | ########################################################################## 262 | -------------------------------------------------------------------------------- /test_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ################################################################################ 4 | 5 | # run a batch test over all the examples from the bash shell (linux) 6 | 7 | # Copyright (c) 2019 Dept Computer Science, 8 | # Durham University, UK 9 | # License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | ################################################################################ 12 | 13 | PYTHON_INTERPRETATOR=python3 14 | CAM_TO_TEST=0 15 | VIDEO_TO_TEST=video.avi 16 | 17 | echo 18 | echo Using $PYTHON_INTERPRETATOR with camera $CAM_TO_TEST and video $VIDEO_TO_TEST 19 | echo "Running test suite - press 'x' in OpenCV window to exist each example." 20 | echo 21 | 22 | # get testing resouces if they do not exist 23 | 24 | [ -f example.jpg ] || { wget https://upload.wikimedia.org/wikipedia/commons/b/b4/JPEG_example_JPG_RIP_100.jpg; mv JPEG_example_JPG_RIP_100.jpg example.jpg; } 25 | [ -f video.avi ] || { wget http://clips.vorwaerts-gmbh.de/big_buck_bunny.mp4; mv big_buck_bunny.mp4 video.avi; } 26 | 27 | ################################################################################ 28 | 29 | # run defaults 30 | 31 | echo "Running default tests ..." 32 | echo 33 | 34 | for example in *.py 35 | do 36 | echo "Testing example: " $example 37 | $PYTHON_INTERPRETATOR $example 38 | echo 39 | done 40 | 41 | ################################################################################ 42 | 43 | # run cam test 44 | 45 | echo "Running camera based tests ..." 46 | echo 47 | 48 | for example in *.py 49 | do 50 | echo "Testing example: " $example -c $CAM_TO_TEST 51 | $PYTHON_INTERPRETATOR $example -c $CAM_TO_TEST 52 | echo 53 | done 54 | 55 | ################################################################################ 56 | 57 | # run cam test and resize 58 | 59 | echo "Running camera based tests with resizing ..." 60 | echo 61 | 62 | for example in *.py 63 | do 64 | echo "Testing example: " $example -c $CAM_TO_TEST -r 0.25 65 | $PYTHON_INTERPRETATOR $example -c $CAM_TO_TEST -r 0.25 66 | echo 67 | done 68 | 69 | 70 | ################################################################################ 71 | 72 | # run video file test 73 | 74 | echo "Running video file based tests ..." 75 | echo 76 | 77 | for example in *.py 78 | do 79 | echo "Testing example: " $example $VIDEO_TO_TEST 80 | $PYTHON_INTERPRETATOR $example $VIDEO_TO_TEST 81 | echo 82 | done 83 | 84 | ################################################################################ 85 | --------------------------------------------------------------------------------