├── .dockerignore ├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── appendix_1 └── numba_iou.ipynb ├── chapter1 ├── chapter1.py ├── pencilsketch_bg.jpg ├── tools.py └── wx_gui.py ├── chapter10 ├── classes.py ├── demo.mp4 ├── frozen_inference_graph.pb ├── main.py ├── sort.py └── ssd_mobilenet_v1_coco_2017_11_17.pbtxt.txt ├── chapter2 ├── chapter2.py ├── frame_reader.py └── gestures.py ├── chapter3 ├── chapter3.py ├── feature_matching.py ├── train.png └── wx_gui.py ├── chapter4 ├── calibrate.py ├── chapter4.py ├── fountain_dense │ ├── 0000.png │ ├── 0001.png │ ├── 0002.png │ ├── 0003.png │ ├── 0004.png │ ├── 0005.png │ ├── 0006.png │ ├── 0007.png │ ├── 0008.png │ ├── 0009.png │ └── 0010.png ├── scene3D.py └── wx_gui.py ├── chapter5 ├── common.py ├── gamma_correct.py ├── hdr.py └── panorama.py ├── chapter6 ├── chapter6.py ├── saliency.py ├── soccer.avi ├── tracking.py └── tracking_api.py ├── chapter7 ├── chapter7.py ├── data │ ├── .gitignore │ ├── __init__.py │ ├── gtsrb.py │ └── process.py └── train_tf2.py ├── chapter8 ├── .gitignore ├── chapter8.py ├── data │ ├── .gitignore │ ├── process.py │ └── store.py ├── detectors.py ├── params │ ├── haarcascade_frontalface_default.xml │ ├── haarcascade_lefteye_2splits.xml │ └── haarcascade_righteye_2splits.xml ├── train_classifier.py └── wx_gui.py ├── chapter9 ├── classification.py ├── data.py ├── inference.py └── localization.py ├── dockerfiles ├── Dockerfile └── gpu.Dockerfile ├── requirements.txt ├── wheels └── wxPython-4.0.7.post2-cp38-cp38-linux_x86_64.whl └── wx_gui.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .git/ 2 | .py2.7-cv-blueprints/ 3 | .py3.6-cv-blueprints/ 4 | .py3.7-cv-blueprints/ 5 | .py3.8-cv-blueprints/ 6 | .py3*/ 7 | wheels/ 8 | 9 | chapter6/data/GTSRB* 10 | 11 | .idea/ 12 | .tmp/ 13 | .gitignore 14 | .ipynb_checkpoints 15 | __pycache__/ 16 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | wheels/wxPython-4.0.7.post2-cp38-cp38-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Virtual Environment 2 | .py2.7-cv-blueprints/ 3 | .py3.6-cv-blueprints/ 4 | .py3.7-cv-blueprints/ 5 | .py3.8-cv-blueprints/ 6 | .tmp/ 7 | 8 | .ipynb_checkpoints 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | env/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *,cover 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | 62 | # Sphinx documentation 63 | docs/_build/ 64 | 65 | # PyBuilder 66 | target/ 67 | *.pkl 68 | 69 | 70 | # Editor 71 | *.sw[op] 72 | .idea/ 73 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # OpenCV-4-with-Python-Blueprints-Second-Edition 5 | 6 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.154060.svg)](https://doi.org/10.5281/zenodo.154060) 7 | [![Google group](https://img.shields.io/badge/Google-Discussion%20group-lightgrey.svg)](https://groups.google.com/d/forum/opencv-python-blueprints) 8 | [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](http://www.gnu.org/licenses/gpl-3.0) 9 | 10 | This repository contains all up-to-date source code for the following book: 11 | 12 | 13 | Dr. Menua Gevorgyan, Arsen Mamikonyan, Michael Beyeler
14 | OpenCV 4 with Python Blueprints - Second Edition Build creative computer vision projects with the latest version of OpenCV 4 and Python 3 15 | 16 | 17 | Packt Publishing Ltd.
18 | Paperback: 366 pages
19 | ISBN 978-178980-181-1 20 |
21 | 22 | This book demonstrates how to develop a series of intermediate to advanced projects using OpenCV and Python, 23 | rather than teaching the core concepts of OpenCV in theoretical lessons. Instead, the working projects 24 | developed in this book teach the reader how to apply their theoretical knowledge to topics such as 25 | image manipulation, augmented reality, object tracking, 3D scene reconstruction, statistical learning, 26 | and object categorization. 27 | 28 | By the end of this book, readers will be OpenCV experts whose newly gained experience allows them to develop their own advanced computer vision applications. 29 | 30 | If you use either book or code in a scholarly publication, please cite as: 31 | > Menua Gevorgyan, Arsen Mamikonyan, Michael Beyeler, (2020). OpenCV with Python Blueprints - Second Edition: Build creative computer vision projects with the latest version of OpenCV 4 and Python 3. Packt Publishing Ltd., London, England, 230 pages, ISBN 978-178980-181-1. 32 | 33 | Or use the following bibtex: 34 | ``` 35 | @book{OpenCVWithPythonBlueprints, 36 | title = {{OpenCV with Python Blueprints}}, 37 | subtitle = {Build creative computer vision projects with the latest version of {OpenCV 4} and {Python 3}}, 38 | author = {Menua Gevorgyan, Arsen Mamikonyan, Michael Beyeler}, 39 | year = {2020}, 40 | pages = {366}, 41 | publisher = {Packt Publishing Ltd.}, 42 | isbn = {978-178980-181-1} 43 | } 44 | ``` 45 | 46 | Scholarly work referencing first edition of the book: 47 | - B Zhang et al. (2018). Automatic matching of construction onsite resources under camera views. *Automation in Construction*. 48 | - A Jakubović & J Velagić (2018). Image Feature Matching and Object Detection Using Brute-Force Matchers. *International Symposium ELMAR*. 49 | - B Zhang et al. (2018). Multi-View Matching for Onsite Construction Resources with Combinatorial Optimization. *International Symposium on Automation and Robotics in Construction (ISARC)* 35:1-7. 50 | - LA Marcomini (2018). Identificação automática do comportamento do tráfego a partir de imagens de vídeo. *Escola de Engenharia de São Carlos*, Master's Thesis. 51 | - G Laica et al. (2018). Diseño y construcción de un andador inteligente para el desplazamiento autónomo de los adultos mayores con visión reducida y problemas de movilidad del hogar de vida "Luis Maldonado Tamayo" mediante la investigación de técnicas de visión artificial. *Departamento de Ciencias de la Energía y Mecánica, Universidad de las Fuerzas Armadas ESPE*, Master's Thesis. 52 | - I Huitzil-Velasco et al. (2017). Test of a Myo Armband. *Revista de Ciencias Ambientales y Recursos Naturales* 3(10): 48-56. 53 | - Y Güçlütürk et al. (2016). Convolutional sketch inversion. *European Conference on Computer Vision (ECCV)* 810-824. 54 | 55 | 56 | All code was tested with OpenCV 4.2.0 and Python 3.8 on Ubuntu 18.04, and is available from: 57 | https://github.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/ 58 | 59 | We have also created a Docker file in https://github.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/tree/master/dockerfiles/Dockerfile which has README.md that will help you compile and run the code using the docker file. 60 | 61 | 62 | ## Critical Reception (First Edition) 63 | 64 | 65 | 66 | What readers on Amazon have to say: 67 | 68 | > The author does a great job explaining the concepts needed to understand what's happening in the application without 69 | > the need of going into too many details.
70 | – [Sebastian Montabone](http://www.samontab.com) 71 | 72 | > Excellent book to build practical OpenCV projects! I'm still relatively new to OpenCV, but all examples are well 73 | > laid out and easy to follow. The author does a good job explaining the concepts in detail and shows how they apply 74 | > in real life. As a professional programmer, I especially love that you can just fork the code from GitHub and follow 75 | > along. Strongly recommend to readers with basic knowledge of computer vision, machine learning, and Python! 76 | – Amazon Customer 77 | 78 | > Usually I'm not a big fan of technical books because they are too dull, but this one is written in an engaging 79 | > manner with a few dry jokes here and there. Can only recommend!
80 | – lakesouth 81 | 82 | ## Who This Book Is for 83 | As part of Packt's Blueprints series, this book is for intermediate users of OpenCV who aim to master their skills 84 | by developing advanced practical applications. You should already have some 85 | experience of building simple applications, and you are expected to be familiar with 86 | OpenCV's concepts and Python libraries. Basic knowledge of Python programming 87 | is expected and assumed. 88 | 89 | By the end of this book, you will be an OpenCV expert, and your newly gained 90 | experience will allow you to develop your own advanced computer vision 91 | applications. 92 | 93 | 94 | ## Getting Started 95 | All projects can run on Windows, Mac, or Linux. The required packages can be installed with pip or you can use the docker images available in the repository to run scripts of the chapters. 96 | 97 | ## Installation With Pip 98 | 99 | 100 | ``` 101 | pip install -r requirements.txt 102 | ``` 103 | 104 | ## Runninning With Docker 105 | 106 | 107 | 108 | ### Build the Image 109 | 110 | The repository contains two docker images: 111 | 112 | 1. Without GPU acceleration 113 | ``` 114 | docker build -t book dockerfiles 115 | ``` 116 | 2. With GPU (CUDA) acceleration 117 | ``` 118 | docker build -t book dockerfiles -f dockerfiles/gpu.Dockerfile 119 | ``` 120 | 121 | ### Start a Container 122 | 123 | ``` 124 | docker run --device /dev/video0 --env DISPLAY=$DISPLAY -v="/tmp/.X11-unix:/tmp/.X11-unix:rw" -v `pwd`:/book -it book 125 | ``` 126 | 127 | Here, we have allowed docker to connect to the default camera and to use the X-11 server of the host machine to run graphical applications. In case if you use the GPU version of the images, you also have to pass `--runtime nvidia`. 128 | 129 | ### Run an App 130 | In the container, locate a desired chapter: 131 | ``` 132 | cd /book/chapterX 133 | ``` 134 | and run a desired script of the chapter: 135 | ``` 136 | python chapterX.py 137 | ``` 138 | 139 | 140 | 141 | ### Troubleshooting 142 | 143 | #### Could not connect to any X display. 144 | 145 | The X Server should allow connections from a docker container. 146 | 147 | Run `xhost +local:docker`, also check [this](https://forums.docker.com/t/start-a-gui-application-as-root-in-a-ubuntu-container/17069) 148 | 149 | 150 | ## The Following Packages Were Used in the Chapters of the Book 151 | * OpenCV 4.2 or later: Recent 32-bit and 64-bit versions as well as installation instructions are available at 152 | http://opencv.org/downloads.html. Platform-specific installation instructions can be found at 153 | http://docs.opencv.org/doc/tutorials/introduction/table_of_content_introduction/table_of_content_introduction.html. 154 | * Python 3.8 or later: Recent 32-bit and 64-bit installers are available at https://www.python.org/downloads. The 155 | installation instructions can be found at https://wiki.python.org/moin/BeginnersGuide/Download. 156 | * NumPy 1.18.1 or later: This package for scientific computing officially comes in 32-bit format only, and can be 157 | obtained from http://www.scipy.org/scipylib/download.html. The installation instructions can be found at 158 | http://www.scipy.org/scipylib/building/index.html#building. 159 | 160 | In addition, some chapters require the following free Python modules: 161 | * wxPython 4.0 or later (Chapters 1 to 4, 8): This GUI programming toolkit can be obtained from 162 | http://www.wxpython.org/download.php. 163 | * matplotlib 3.1 or later (Chapters 4, 5, 6, and 7): This 2D plotting library can be obtained from 164 | http://matplotlib.org/downloads.html. Its installation instructions can be found by going to 165 | http://matplotlib.org/faq/installing_faq.html#how-to-install. 166 | * SciPy 1.4 or later (Chapter 1 and 10): This scientific Python library officially comes in 32-bit only, and can be 167 | obtained from http://www.scipy.org/scipylib/download.html. The installation instructions can be found at 168 | http://www.scipy.org/scipylib/building/index.html#building. 169 | * rawpy 0.14 and ExifRead==2.1.2 (Chapter 5) 170 | * requests==2.22.0 to download data in chapter 7 171 | 172 | Furthermore, the use of iPython (http://ipython.org/install.html) is highly recommended as it provides a flexible, 173 | interactive console interface. 174 | 175 | 176 | ## License 177 | The software is released under the GNU General Public License (GPL), which is the most commonly used free software 178 | license according to Wikipedia. GPL allows for commercial use, distribution, modification, patent use, and private use. 179 | 180 | The GPL is a copyleft license, which means that derived works can only be distributed under the same license terms. 181 | For more information, please see the license file. 182 | ### Download a free PDF 183 | 184 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
185 |

https://packt.link/free-ebook/9781789801811

-------------------------------------------------------------------------------- /appendix_1/numba_iou.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import numba" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 3, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "def iou(a: np.ndarray, b: np.ndarray) -> float:\n", 20 | " a_tl, a_br = a[:4].reshape((2, 2))\n", 21 | " b_tl, b_br = b[:4].reshape((2, 2))\n", 22 | " int_tl = np.maximum(a_tl, b_tl)\n", 23 | " int_br = np.minimum(a_br, b_br)\n", 24 | " int_area = np.product(np.maximum(0., int_br - int_tl))\n", 25 | " a_area = np.product(a_br - a_tl)\n", 26 | " b_area = np.product(b_br - b_tl)\n", 27 | " return int_area / (a_area + b_area - int_area)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 4, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "def calc_iou_matrix(detections,trackers):\n", 37 | " iou_matrix = np.zeros((len(detections), len(trackers)), dtype=np.float32)\n", 38 | "\n", 39 | " for d, det in enumerate(detections):\n", 40 | " for t, trk in enumerate(trackers):\n", 41 | " iou_matrix[d, t] = iou(det, trk)\n", 42 | " return iou_matrix" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 5, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "A = np.random.rand(100,4)\n", 52 | "B = np.random.rand(100,4)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 5, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "328 ms ± 26.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 65 | ] 66 | } 67 | ], 68 | "source": [ 69 | "%timeit calc_iou_matrix(A,B)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 6, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "14.5 ms ± 792 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "@numba.jit(nopython=True)\n", 87 | "def product(a):\n", 88 | " result = 1\n", 89 | " for i in range(len(a)):\n", 90 | " result*=a[i]\n", 91 | " return result\n", 92 | "\n", 93 | "@numba.jit(nopython=True)\n", 94 | "def iou(a: np.ndarray, b: np.ndarray) -> float:\n", 95 | " a_tl, a_br = a[0:2],a[2:4]\n", 96 | " b_tl, b_br = b[0:2],b[2:4]\n", 97 | " int_tl = np.maximum(a_tl, b_tl)\n", 98 | " int_br = np.minimum(a_br, b_br)\n", 99 | " int_area = product(np.maximum(0., int_br - int_tl))\n", 100 | " a_area = product(a_br - a_tl)\n", 101 | " b_area = product(b_br - b_tl)\n", 102 | " return int_area / (a_area + b_area - int_area)\n", 103 | "%timeit calc_iou_matrix(A,B)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 7, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "name": "stdout", 113 | "output_type": "stream", 114 | "text": [ 115 | "7.08 ms ± 31 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "@numba.jit(nopython=True)\n", 121 | "def calc_iou_matrix(detections,trackers):\n", 122 | " iou_matrix = np.zeros((len(detections), len(trackers)), dtype=np.float32)\n", 123 | "\n", 124 | " for d in range(len(detections)):\n", 125 | " det = detections[d]\n", 126 | " for t in range(len(trackers)):\n", 127 | " trk = trackers[t]\n", 128 | " iou_matrix[d, t] = iou(det, trk)\n", 129 | "\n", 130 | "%timeit calc_iou_matrix(A,B)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 22, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "text/plain": [ 141 | "array([[2.84786978e-01, 1.49667930e-01, 4.52272718e-01, 7.64122472e-01],\n", 142 | " [1.29328195e-01, 1.85575401e-01, 2.44202698e-01, 1.62779569e-01],\n", 143 | " [8.79994339e-01, 7.84159368e-01, 8.96072205e-01, 4.64633477e-01],\n", 144 | " [2.34234905e-01, 8.16629721e-02, 9.31725878e-01, 9.15272896e-02],\n", 145 | " [6.17814797e-01, 7.92031790e-01, 5.93997225e-01, 9.17106773e-01],\n", 146 | " [5.46257280e-01, 5.90488641e-01, 2.45114229e-01, 5.27682917e-02],\n", 147 | " [7.13898454e-01, 8.35825322e-01, 6.37713557e-01, 3.53566190e-01],\n", 148 | " [7.24266392e-01, 8.09674239e-01, 6.40267349e-01, 2.10335049e-02],\n", 149 | " [3.60260021e-01, 6.98047814e-01, 7.49194667e-01, 7.24404182e-01],\n", 150 | " [4.54322591e-01, 9.39216612e-01, 1.76701033e-01, 5.49275464e-01],\n", 151 | " [8.48035985e-01, 5.33716147e-01, 9.57733994e-01, 9.79705353e-01],\n", 152 | " [9.78277570e-01, 4.44890125e-01, 9.39733444e-01, 7.50692101e-02],\n", 153 | " [8.22548577e-01, 6.41685401e-01, 7.45234802e-01, 5.09103658e-01],\n", 154 | " [2.60426971e-01, 1.00493773e-01, 1.71217758e-01, 5.76172101e-01],\n", 155 | " [5.19713732e-01, 3.46948301e-01, 2.44690123e-01, 4.19853462e-01],\n", 156 | " [4.68961238e-02, 5.52729107e-02, 6.59324109e-01, 6.09828494e-01],\n", 157 | " [3.85613432e-02, 8.55204783e-01, 9.26012669e-01, 7.09888502e-01],\n", 158 | " [1.29093393e-01, 9.49590162e-01, 3.22063612e-01, 5.29960328e-01],\n", 159 | " [7.08357430e-01, 3.80448216e-01, 7.00744999e-01, 5.06966245e-01],\n", 160 | " [3.26126181e-01, 5.75468101e-01, 6.78994830e-01, 5.54342831e-01],\n", 161 | " [8.12375538e-02, 7.49469620e-01, 2.72455198e-01, 5.22540811e-01],\n", 162 | " [3.85367089e-01, 4.41736512e-01, 5.14454592e-01, 1.19293497e-01],\n", 163 | " [7.64035617e-01, 9.49622911e-01, 5.93891039e-01, 1.53838921e-01],\n", 164 | " [7.97760257e-02, 6.49648923e-01, 9.16435057e-01, 4.88523011e-02],\n", 165 | " [2.29542125e-01, 6.18607365e-01, 9.29588552e-01, 7.52633838e-01],\n", 166 | " [1.32451561e-01, 2.60474882e-01, 8.20534966e-05, 5.34758335e-01],\n", 167 | " [9.26208346e-01, 8.88325527e-01, 5.64661678e-01, 7.51415300e-02],\n", 168 | " [6.23189011e-02, 8.45335974e-01, 9.56306204e-01, 5.80513844e-02],\n", 169 | " [5.12158377e-01, 7.08644649e-01, 1.99225853e-01, 6.40248533e-01],\n", 170 | " [9.71114534e-01, 4.65672329e-01, 4.79785001e-01, 6.06286740e-01],\n", 171 | " [9.02719316e-01, 1.29368742e-01, 1.56595684e-01, 3.48418367e-01],\n", 172 | " [4.26345005e-01, 6.80225657e-01, 7.05097062e-01, 8.16246516e-01],\n", 173 | " [8.43022840e-01, 1.55150328e-01, 1.07014732e-01, 2.66176015e-01],\n", 174 | " [1.43833136e-01, 7.46381201e-01, 9.33459466e-01, 9.75532745e-01],\n", 175 | " [5.94062493e-01, 6.31293475e-01, 8.89775355e-01, 8.74834902e-01],\n", 176 | " [3.48179401e-01, 5.26730661e-01, 2.72650674e-01, 4.06032481e-01],\n", 177 | " [6.47693494e-02, 1.16628348e-01, 3.57464769e-01, 9.29423610e-01],\n", 178 | " [2.23870691e-01, 6.33822446e-01, 1.76303346e-01, 5.90196201e-01],\n", 179 | " [5.19051931e-01, 1.69239893e-01, 3.92817274e-01, 4.94994064e-01],\n", 180 | " [2.35778432e-01, 8.93753320e-01, 2.52470705e-01, 2.54120126e-01],\n", 181 | " [7.94779449e-03, 2.99115150e-01, 2.62439362e-02, 3.10726596e-01],\n", 182 | " [7.43372632e-01, 3.06148680e-01, 1.41128562e-01, 9.88191773e-01],\n", 183 | " [1.62348419e-02, 3.45068825e-01, 3.87701560e-01, 2.32051568e-01],\n", 184 | " [3.62609599e-01, 8.12818470e-01, 1.79017692e-01, 1.10072390e-01],\n", 185 | " [6.00747111e-01, 5.91610252e-02, 3.52787226e-01, 1.80002683e-01],\n", 186 | " [1.20041811e-01, 8.24721534e-01, 1.54461163e-01, 8.47966775e-01],\n", 187 | " [1.09809920e-01, 1.86146627e-01, 8.04777057e-01, 6.50033285e-01],\n", 188 | " [5.92720751e-01, 5.12866344e-01, 5.47108710e-01, 5.13579475e-01],\n", 189 | " [7.12107962e-01, 2.94623431e-01, 1.67693961e-01, 6.15859883e-01],\n", 190 | " [3.64380859e-01, 2.52832916e-01, 1.19972728e-01, 8.69102115e-01],\n", 191 | " [9.32491659e-01, 5.55593690e-01, 8.67936902e-01, 8.69947870e-01],\n", 192 | " [3.80094240e-01, 7.66910220e-01, 2.27431462e-01, 8.18655906e-02],\n", 193 | " [1.24677930e-01, 1.67478945e-01, 4.77225927e-01, 9.25475390e-01],\n", 194 | " [3.62191294e-01, 6.52753650e-01, 8.71392075e-01, 4.53979568e-01],\n", 195 | " [7.83613408e-01, 3.96295148e-01, 6.68070842e-01, 8.22256827e-01],\n", 196 | " [2.66799295e-01, 1.03738181e-01, 6.99157302e-01, 5.68579542e-01],\n", 197 | " [1.86985863e-01, 2.17226350e-02, 6.55800074e-01, 8.12148503e-01],\n", 198 | " [4.23810156e-01, 9.98200261e-01, 2.60635320e-01, 2.59812443e-01],\n", 199 | " [6.41079589e-01, 9.04388024e-01, 6.57337920e-01, 3.67862770e-01],\n", 200 | " [4.38713382e-01, 8.55237885e-01, 9.46515235e-01, 6.80434047e-01],\n", 201 | " [3.56163917e-01, 2.11996973e-01, 4.18694478e-02, 8.82582875e-01],\n", 202 | " [5.18865711e-01, 2.03951649e-01, 8.24686364e-02, 8.81622400e-01],\n", 203 | " [8.18017015e-01, 9.89771198e-01, 7.12994585e-01, 1.95878071e-01],\n", 204 | " [4.09745356e-01, 3.80620069e-02, 7.09507585e-01, 4.67298871e-01],\n", 205 | " [8.46667850e-01, 7.69717548e-01, 8.35631943e-01, 1.89558165e-01],\n", 206 | " [7.76663279e-01, 2.42167881e-01, 8.27233081e-01, 8.38679793e-01],\n", 207 | " [3.98400180e-01, 6.39092257e-01, 6.49660355e-01, 6.74128981e-01],\n", 208 | " [5.60777039e-01, 3.09213853e-01, 1.05642424e-01, 8.82463125e-01],\n", 209 | " [4.28736238e-01, 1.60264552e-01, 5.24715315e-01, 1.90091999e-01],\n", 210 | " [1.88907251e-01, 6.29227371e-01, 2.26614241e-01, 6.30271581e-01],\n", 211 | " [2.88142634e-02, 1.30737785e-02, 2.86926726e-02, 9.74329017e-01],\n", 212 | " [3.17601670e-01, 5.06013243e-01, 6.81056475e-01, 7.03450623e-01],\n", 213 | " [3.79000387e-01, 2.31957206e-01, 1.21083567e-01, 5.30021306e-01],\n", 214 | " [8.68262590e-02, 4.97116188e-01, 1.43719020e-01, 4.54984100e-01],\n", 215 | " [9.80171576e-01, 7.04657839e-01, 7.03200159e-02, 6.85264033e-01],\n", 216 | " [8.04557409e-01, 9.68377769e-01, 3.71729269e-01, 2.69743301e-01],\n", 217 | " [9.82104829e-02, 6.47122350e-03, 2.11175103e-01, 9.21097412e-01],\n", 218 | " [6.54719561e-02, 8.25470813e-01, 1.23573962e-01, 4.82917056e-01],\n", 219 | " [4.96703114e-01, 9.01691899e-02, 2.20353326e-01, 1.77234820e-01],\n", 220 | " [2.10898552e-01, 2.87533614e-01, 6.62700998e-01, 5.98436081e-01],\n", 221 | " [7.50308497e-02, 2.29851569e-01, 2.89361741e-01, 7.93646032e-01],\n", 222 | " [5.73880841e-01, 2.43526849e-01, 6.33192394e-01, 6.70816044e-01],\n", 223 | " [6.34623072e-01, 5.07885907e-01, 7.27704114e-01, 7.48819857e-01],\n", 224 | " [1.88771659e-01, 3.10023429e-01, 7.30603811e-02, 3.29997110e-01],\n", 225 | " [2.14005767e-01, 8.67044002e-01, 9.31721932e-01, 1.14427755e-01],\n", 226 | " [7.28537781e-02, 3.20687446e-02, 9.22391264e-01, 2.47669559e-01],\n", 227 | " [8.62983037e-01, 8.17282328e-01, 9.40907208e-01, 7.70718873e-01],\n", 228 | " [8.26480058e-01, 9.50637853e-01, 4.44769479e-01, 6.71710373e-01],\n", 229 | " [6.45784531e-02, 6.93421874e-01, 4.60386577e-01, 1.16068386e-01],\n", 230 | " [6.37542338e-01, 6.20640364e-01, 1.66622630e-01, 9.30108359e-01],\n", 231 | " [9.82964009e-01, 1.60835035e-01, 5.64917699e-01, 5.78353741e-01],\n", 232 | " [7.58120906e-01, 7.67157309e-01, 4.62495285e-01, 1.85201688e-01],\n", 233 | " [7.56419429e-01, 4.10743083e-01, 7.93451756e-01, 4.15199488e-01],\n", 234 | " [9.52036799e-01, 7.84123616e-01, 4.29499335e-01, 6.80436993e-02],\n", 235 | " [2.41347372e-01, 4.53595425e-01, 9.85869772e-02, 1.38116241e-01],\n", 236 | " [4.05717412e-01, 5.19340460e-01, 2.19914351e-01, 2.65040326e-01],\n", 237 | " [5.27267087e-01, 6.47721544e-01, 8.05855368e-01, 4.49244422e-02],\n", 238 | " [7.30421916e-01, 3.40132741e-01, 9.70534517e-03, 6.76803031e-01],\n", 239 | " [5.14827463e-01, 8.64038613e-01, 9.02144952e-01, 8.70425950e-01],\n", 240 | " [7.77698231e-01, 2.67776473e-01, 5.54169302e-01, 7.32617299e-01]])" 241 | ] 242 | }, 243 | "execution_count": 22, 244 | "metadata": {}, 245 | "output_type": "execute_result" 246 | } 247 | ], 248 | "source": [ 249 | "@numba.vectorize\n", 250 | "def custom_operation(a,b):\n", 251 | " if b == 0:\n", 252 | " return 0\n", 253 | " return a*b if a>b else a/b\n", 254 | "custom_operation(A,B)\n", 255 | "custom_operation(A,np.ones((1,4)))" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 18, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "name": "stdout", 265 | "output_type": "stream", 266 | "text": [ 267 | "196 µs ± 2.46 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" 268 | ] 269 | } 270 | ], 271 | "source": [ 272 | "@numba.guvectorize(['(f8[:, :], f8[:, :], f8[:, :])'], '(m,k),(n,k1)->(m, n)')\n", 273 | "def calc_iou_matrix(x, y, z):\n", 274 | " for i in range(x.shape[0]):\n", 275 | " for j in range(y.shape[1]):\n", 276 | " z[i, j] = iou(x[i],y[i])\n", 277 | "\n", 278 | "%timeit calc_iou_matrix(A,B)" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 9, 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [ 287 | "from numba import cuda" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 10, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "@cuda.jit(device=True)\n", 297 | "def iou(a: np.ndarray, b: np.ndarray) -> float:\n", 298 | " xx1 = max(a[0], b[0])\n", 299 | " yy1 = max(a[1], b[1])\n", 300 | " xx2 = min(a[2], b[2])\n", 301 | " yy2 = min(a[3], b[3])\n", 302 | " w = max(0., xx2 - xx1)\n", 303 | " h = max(0., yy2 - yy1)\n", 304 | " wh = w * h\n", 305 | " result = wh / ((a[2]-a[0])*(a[3]-a[1])\n", 306 | " + (b[2]-b[0])*(b[3]-b[1]) - wh)\n", 307 | " return result" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [ 316 | "@numba.guvectorize(['(f8[:, :], f8[:, :], f8[:, :])'], '(m,k),(n,k1)->(m, n)',target=\"cuda\")\n", 317 | "def calc_iou_matrix(x, y, z):\n", 318 | " for i in range(x.shape[0]):\n", 319 | " for j in range(y.shape[1]):\n", 320 | " z[i, j] = iou(x[i],y[j])" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [] 329 | } 330 | ], 331 | "metadata": { 332 | "kernelspec": { 333 | "display_name": "Python 3", 334 | "language": "python", 335 | "name": "python3" 336 | }, 337 | "language_info": { 338 | "codemirror_mode": { 339 | "name": "ipython", 340 | "version": 3 341 | }, 342 | "file_extension": ".py", 343 | "mimetype": "text/x-python", 344 | "name": "python", 345 | "nbconvert_exporter": "python", 346 | "pygments_lexer": "ipython3", 347 | "version": "3.8.1" 348 | } 349 | }, 350 | "nbformat": 4, 351 | "nbformat_minor": 2 352 | } 353 | -------------------------------------------------------------------------------- /chapter1/chapter1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ OpenCV with Python Blueprints. Chapter 1: Fun with Filters 5 | 6 | An app to apply three different image filter effects to the video stream 7 | of a webcam in real-time. 8 | 9 | The three effects are: 10 | - Warming/cooling filters 11 | - Black-and-white pencil sketch 12 | - Cartoonizer 13 | """ 14 | 15 | import wx 16 | import cv2 17 | import numpy as np 18 | 19 | from wx_gui import BaseLayout 20 | from tools import apply_hue_filter 21 | from tools import apply_rgb_filters 22 | from tools import load_img_resized 23 | from tools import spline_to_lookup_table 24 | from tools import cartoonize 25 | from tools import pencil_sketch_on_canvas 26 | 27 | 28 | __license__ = "GNU GPL 3.0 or later" 29 | 30 | 31 | INCREASE_LOOKUP_TABLE = spline_to_lookup_table([0, 64, 128, 192, 256], 32 | [0, 70, 140, 210, 256]) 33 | DECREASE_LOOKUP_TABLE = spline_to_lookup_table([0, 64, 128, 192, 256], 34 | [0, 30, 80, 120, 192]) 35 | 36 | 37 | class FilterLayout(BaseLayout): 38 | """ 39 | Custom layout for filter effects 40 | 41 | This class implements a custom layout for applying diverse filter 42 | effects to a camera feed. The layout is based on an abstract base 43 | class BaseLayout. It displays the camera feed (passed to the class as 44 | a cv2.VideoCapture object) in the variable self.panels_vertical. 45 | Additional layout elements can be added by using the Add method (e.g., 46 | self.panels_vertical(wx.Panel(self, -1))). 47 | """ 48 | 49 | def __init__(self, *args, **kwargs): 50 | super().__init__(*args, **kwargs) 51 | color_canvas = load_img_resized('pencilsketch_bg.jpg', 52 | (self.imgWidth, self.imgHeight)) 53 | self.canvas = cv2.cvtColor(color_canvas, cv2.COLOR_RGB2GRAY) 54 | 55 | def augment_layout(self): 56 | """ Add a row of radio buttons below the camera feed. """ 57 | 58 | # create a horizontal layout with all filter modes as radio buttons 59 | pnl = wx.Panel(self, -1) 60 | self.mode_warm = wx.RadioButton(pnl, -1, 'Warming Filter', (10, 10), 61 | style=wx.RB_GROUP) 62 | self.mode_cool = wx.RadioButton(pnl, -1, 'Cooling Filter', (10, 10)) 63 | self.mode_sketch = wx.RadioButton(pnl, -1, 'Pencil Sketch', (10, 10)) 64 | self.mode_cartoon = wx.RadioButton(pnl, -1, 'Cartoon', (10, 10)) 65 | hbox = wx.BoxSizer(wx.HORIZONTAL) 66 | hbox.Add(self.mode_warm, 1) 67 | hbox.Add(self.mode_cool, 1) 68 | hbox.Add(self.mode_sketch, 1) 69 | hbox.Add(self.mode_cartoon, 1) 70 | pnl.SetSizer(hbox) 71 | 72 | # add panel with radio buttons to existing panels in a vertical 73 | # arrangement 74 | self.panels_vertical.Add(pnl, flag=wx.EXPAND | wx.BOTTOM | wx.TOP, 75 | border=1) 76 | 77 | @staticmethod 78 | def _render_warm(rgb_image: np.ndarray) -> np.ndarray: 79 | interim_img = apply_rgb_filters(rgb_image, 80 | red_filter=INCREASE_LOOKUP_TABLE, 81 | blue_filter=DECREASE_LOOKUP_TABLE) 82 | return apply_hue_filter(interim_img, INCREASE_LOOKUP_TABLE) 83 | 84 | @staticmethod 85 | def _render_cool(rgb_image: np.ndarray) -> np.ndarray: 86 | interim_img = apply_rgb_filters(rgb_image, 87 | red_filter=DECREASE_LOOKUP_TABLE, 88 | blue_filter=INCREASE_LOOKUP_TABLE) 89 | return apply_hue_filter(interim_img, DECREASE_LOOKUP_TABLE) 90 | 91 | def process_frame(self, frame_rgb: np.ndarray) -> np.ndarray: 92 | """Process the frame of the camera (or other capture device) 93 | 94 | Choose a filter effect based on the which of the radio buttons 95 | was clicked. 96 | 97 | :param frame_rgb: Image to process in rgb format, of shape (H, W, 3) 98 | :return: Processed image in rgb format, of shape (H, W, 3) 99 | """ 100 | if self.mode_warm.GetValue(): 101 | return self._render_warm(frame_rgb) 102 | elif self.mode_cool.GetValue(): 103 | return self._render_cool(frame_rgb) 104 | elif self.mode_sketch.GetValue(): 105 | return pencil_sketch_on_canvas(frame_rgb, canvas=self.canvas) 106 | elif self.mode_cartoon.GetValue(): 107 | return cartoonize(frame_rgb) 108 | else: 109 | raise NotImplementedError() 110 | 111 | 112 | def main(): 113 | # open webcam 114 | capture = cv2.VideoCapture(0) 115 | # opening the channel ourselves, if it failed to open. 116 | if not(capture.isOpened()): 117 | capture.open() 118 | 119 | capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640) 120 | capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) 121 | 122 | # start graphical user interface 123 | app = wx.App() 124 | layout = FilterLayout(capture, title='Fun with Filters') 125 | layout.Center() 126 | layout.Show() 127 | app.MainLoop() 128 | 129 | 130 | if __name__ == '__main__': 131 | main() 132 | -------------------------------------------------------------------------------- /chapter1/pencilsketch_bg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter1/pencilsketch_bg.jpg -------------------------------------------------------------------------------- /chapter1/tools.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from functools import lru_cache 4 | from scipy.interpolate import UnivariateSpline 5 | from typing import Tuple 6 | 7 | 8 | def spline_to_lookup_table(spline_breaks: list, break_values: list): 9 | spl = UnivariateSpline(spline_breaks, break_values) 10 | return spl(range(256)) 11 | 12 | 13 | def apply_rgb_filters(rgb_image, *, 14 | red_filter=None, green_filter=None, blue_filter=None): 15 | c_r, c_g, c_b = cv2.split(rgb_image) 16 | if red_filter is not None: 17 | c_r = cv2.LUT(c_r, red_filter).astype(np.uint8) 18 | if green_filter is not None: 19 | c_g = cv2.LUT(c_g, green_filter).astype(np.uint8) 20 | if blue_filter is not None: 21 | c_b = cv2.LUT(c_b, blue_filter).astype(np.uint8) 22 | return cv2.merge((c_r, c_g, c_b)) 23 | 24 | 25 | def apply_hue_filter(rgb_image, hue_filter): 26 | c_h, c_s, c_v = cv2.split(cv2.cvtColor(rgb_image, cv2.COLOR_RGB2HSV)) 27 | c_s = cv2.LUT(c_s, hue_filter).astype(np.uint8) 28 | return cv2.cvtColor(cv2.merge((c_h, c_s, c_v)), cv2.COLOR_HSV2RGB) 29 | 30 | 31 | @lru_cache(maxsize=32) 32 | def load_img_resized(path: str, dimensions: Tuple[int]): 33 | img = cv2.imread(path) 34 | if img is None: 35 | return 36 | return cv2.resize(img, dimensions) 37 | 38 | 39 | def cartoonize(rgb_image, *, 40 | num_pyr_downs=2, num_bilaterals=7): 41 | # STEP 1 -- Apply a bilateral filter to reduce the color palette of 42 | # the image. 43 | downsampled_img = rgb_image 44 | for _ in range(num_pyr_downs): 45 | downsampled_img = cv2.pyrDown(downsampled_img) 46 | 47 | for _ in range(num_bilaterals): 48 | filterd_small_img = cv2.bilateralFilter(downsampled_img, 9, 9, 7) 49 | 50 | filtered_normal_img = filterd_small_img 51 | for _ in range(num_pyr_downs): 52 | filtered_normal_img = cv2.pyrUp(filtered_normal_img) 53 | 54 | # make sure resulting image has the same dims as original 55 | if filtered_normal_img.shape != rgb_image.shape: 56 | filtered_normal_img = cv2.resize( 57 | filtered_normal_img, rgb_image.shape[:2]) 58 | 59 | # STEP 2 -- Convert the original color image into grayscale. 60 | img_gray = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY) 61 | # STEP 3 -- Apply amedian blur to reduce image noise. 62 | img_blur = cv2.medianBlur(img_gray, 7) 63 | 64 | # STEP 4 -- Use adaptive thresholding to detect and emphasize the edges 65 | # in an edge mask. 66 | gray_edges = cv2.adaptiveThreshold(img_blur, 255, 67 | cv2.ADAPTIVE_THRESH_MEAN_C, 68 | cv2.THRESH_BINARY, 9, 2) 69 | # STEP 5 -- Combine the color image from step 1 with the edge mask 70 | # from step 4. 71 | rgb_edges = cv2.cvtColor(gray_edges, cv2.COLOR_GRAY2RGB) 72 | return cv2.bitwise_and(filtered_normal_img, rgb_edges) 73 | 74 | 75 | def convert_to_pencil_sketch_ordered(rgb_image): 76 | gray_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY) 77 | inv_gray = 255 - gray_image 78 | blurred_image = cv2.GaussianBlur(inv_gray, (21, 21), 0, 0) 79 | gray_sketch = cv2.divide(gray_image, 255 - blurred_image, scale=256) 80 | return cv2.cvtColor(gray_sketch, cv2.COLOR_GRAY2RGB) 81 | 82 | 83 | def convert_to_pencil_sketch(rgb_image): 84 | gray_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY) 85 | blurred_image = cv2.GaussianBlur(gray_image, (21, 21), 0, 0) 86 | gray_sketch = cv2.divide(gray_image, blurred_image, scale=256) 87 | return cv2.cvtColor(gray_sketch, cv2.COLOR_GRAY2RGB) 88 | 89 | 90 | def pencil_sketch_on_canvas(rgb_image, canvas=None): 91 | gray_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY) 92 | blurred_image = cv2.GaussianBlur(gray_image, (21, 21), 0, 0) 93 | gray_sketch = cv2.divide(gray_image, blurred_image, scale=256) 94 | if canvas is not None: 95 | gray_sketch = cv2.multiply(gray_sketch, canvas, scale=1 / 256) 96 | return cv2.cvtColor(gray_sketch, cv2.COLOR_GRAY2RGB) 97 | 98 | 99 | def dodge(image, mask): 100 | print(image.dtype, mask.dtype) 101 | return cv2.divide(image, 255 - mask, scale=256) 102 | 103 | 104 | def dodge_naive(image, mask): 105 | # determine the shape of the input image 106 | width, height = image.shape[:2] 107 | 108 | # prepare output argument with same size as image 109 | blend = np.zeros((width, height), np.uint8) 110 | 111 | for c in range(width): 112 | for r in range(height): 113 | 114 | # shift image pixel value by 8 bits 115 | # divide by the inverse of the mask 116 | result = (image[c, r] << 8) / (255 - mask[c, r]) 117 | 118 | # make sure resulting value stays within bounds 119 | blend[c, r] = min(255, result) 120 | return blend 121 | -------------------------------------------------------------------------------- /chapter1/wx_gui.py: -------------------------------------------------------------------------------- 1 | ../wx_gui.py -------------------------------------------------------------------------------- /chapter10/classes.py: -------------------------------------------------------------------------------- 1 | CLASSES_90 = ["background", "person", "bicycle", "car", "motorcycle", 2 | "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", 3 | "unknown", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", 4 | "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "unknown", "backpack", 5 | "umbrella", "unknown", "unknown", "handbag", "tie", "suitcase", "frisbee", "skis", 6 | "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", 7 | "surfboard", "tennis racket", "bottle", "unknown", "wine glass", "cup", "fork", "knife", 8 | "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", 9 | "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "unknown", "dining table", 10 | "unknown", "unknown", "toilet", "unknown", "tv", "laptop", "mouse", "remote", "keyboard", 11 | "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "unknown", 12 | "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ] 13 | -------------------------------------------------------------------------------- /chapter10/demo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter10/demo.mp4 -------------------------------------------------------------------------------- /chapter10/frozen_inference_graph.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter10/frozen_inference_graph.pb -------------------------------------------------------------------------------- /chapter10/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | from classes import CLASSES_90 7 | from sort import Sort 8 | 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("-i", "--input", 12 | help="Video path, stream URI, or camera ID ", default="demo.mp4") 13 | parser.add_argument("-t", "--threshold", type=float, default=0.3, 14 | help="Minimum score to consider") 15 | parser.add_argument("-m", "--mode", choices=['detection', 'tracking'], default="tracking", 16 | help="Either detection or tracking mode") 17 | 18 | args = parser.parse_args() 19 | 20 | if args.input.isdigit(): 21 | args.input = int(args.input) 22 | 23 | 24 | TRACKED_CLASSES = ["car", "person"] 25 | BOX_COLOR = (23, 230, 210) 26 | TEXT_COLOR = (255, 255, 255) 27 | INPUT_SIZE = (300, 300) 28 | 29 | # Read SSD model 30 | config = "./ssd_mobilenet_v1_coco_2017_11_17.pbtxt.txt" 31 | model = "frozen_inference_graph.pb" 32 | detector = cv2.dnn.readNetFromTensorflow(model, config) 33 | 34 | 35 | def illustrate_box(image: np.ndarray, box: np.ndarray, caption: str) -> None: 36 | rows, cols = frame.shape[:2] 37 | points = box.reshape((2, 2)) * np.array([cols, rows]) 38 | p1, p2 = points.astype(np.int32) 39 | cv2.rectangle(image, tuple(p1), tuple(p2), BOX_COLOR, thickness=4) 40 | cv2.putText( 41 | image, 42 | caption, 43 | tuple(p1), 44 | cv2.FONT_HERSHEY_SIMPLEX, 45 | 0.75, 46 | TEXT_COLOR, 47 | 2) 48 | 49 | 50 | def illustrate_detections(dets: np.ndarray, frame: np.ndarray) -> np.ndarray: 51 | class_ids, scores, boxes = dets[:, 0], dets[:, 1], dets[:, 2:6] 52 | for class_id, score, box in zip(class_ids, scores, boxes): 53 | illustrate_box(frame, box, f"{CLASSES_90[int(class_id)]} {score:.2f}") 54 | return frame 55 | 56 | 57 | def illustrate_tracking_info(frame: np.ndarray) -> np.ndarray: 58 | for num, (class_id, tracker) in enumerate(trackers.items()): 59 | txt = f"{CLASSES_90[class_id]}:Total:{tracker.count} Now:{len(tracker.trackers)}" 60 | cv2.putText(frame, txt, (0, 50 * (num + 1)), 61 | cv2.FONT_HERSHEY_SIMPLEX, 0.75, TEXT_COLOR, 2) 62 | return frame 63 | 64 | 65 | trackers = {CLASSES_90.index(tracked_class): Sort() 66 | for tracked_class in TRACKED_CLASSES} 67 | 68 | 69 | def track(dets: np.ndarray, 70 | illustration_frame: np.ndarray = None) -> np.ndarray: 71 | for class_id, tracker in trackers.items(): 72 | class_dets = dets[dets[:, 0] == class_id] 73 | # Retuns [box..,id] 74 | sort_boxes = tracker.update(class_dets[:, 2:6]) 75 | if illustration_frame is not None: 76 | for box in sort_boxes: 77 | illustrate_box(illustration_frame, 78 | box[:4], 79 | f"{CLASSES_90[class_id]} {int(box[4])}") 80 | 81 | return illustration_frame 82 | 83 | 84 | cap = cv2.VideoCapture(args.input) 85 | 86 | for res, frame in iter(cap.read, (False, None)): 87 | detector.setInput( 88 | cv2.dnn.blobFromImage( 89 | frame, 90 | size=INPUT_SIZE, 91 | swapRB=True, 92 | crop=False)) 93 | detections = detector.forward()[0, 0, :, 1:] 94 | scores = detections[:, 1] 95 | detections = detections[scores > 0.3] 96 | if args.mode == "detection": 97 | out = illustrate_detections(detections, frame) 98 | else: 99 | out = track(detections, frame) 100 | illustrate_tracking_info(out) 101 | cv2.imshow("out", out) 102 | if cv2.waitKey(1) == 27: 103 | cv2.waitKey(0) 104 | # exit() 105 | -------------------------------------------------------------------------------- /chapter10/sort.py: -------------------------------------------------------------------------------- 1 | """ 2 | SORT: A Simple, Online and Realtime Tracker 3 | Copyright (C) 2016 Alex Bewley alex@dynamicdetection.com 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | """ 18 | 19 | import numpy as np 20 | from scipy.optimize import linear_sum_assignment 21 | from typing import Tuple 22 | import cv2 23 | 24 | 25 | def convert_bbox_to_z(bbox): 26 | """ 27 | Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form 28 | [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is 29 | the aspect ratio 30 | """ 31 | w, h = bbox[2:4] - bbox[0:2] 32 | x, y = (bbox[0:2] + bbox[2:4]) / 2 33 | s = w * h # scale is just area 34 | r = w / h 35 | return np.array([x, y, s, r])[:, None].astype(np.float64) 36 | 37 | 38 | def convert_x_to_bbox(x): 39 | """ 40 | Takes a bounding box in the centre form [x,y,s,r] and returns it in the form 41 | [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right 42 | """ 43 | # Shape of x is (7, 1) 44 | x = x[:, 0] 45 | center = x[0:2] 46 | w = np.sqrt(x[2] * x[3]) 47 | h = x[2] / w 48 | half_size = np.array([w, h]) / 2 49 | bbox = np.concatenate((center - half_size, center + half_size)) 50 | return bbox.astype(np.float64) 51 | 52 | 53 | class KalmanBoxTracker: 54 | """ 55 | This class represents the internel state of individual tracked objects observed as bbox. 56 | """ 57 | 58 | def __init__(self, bbox, label): 59 | self.id = label 60 | self.time_since_update = 0 61 | self.hit_streak = 0 62 | 63 | self.kf = cv2.KalmanFilter(dynamParams=7, measureParams=4, type=cv2.CV_64F) 64 | 65 | # define constant velocity model 66 | self.kf.transitionMatrix = np.array( 67 | [[1, 0, 0, 0, 1, 0, 0], 68 | [0, 1, 0, 0, 0, 1, 0], 69 | [0, 0, 1, 0, 0, 0, 1], 70 | [0, 0, 0, 1, 0, 0, 0], 71 | [0, 0, 0, 0, 1, 0, 0], 72 | [0, 0, 0, 0, 0, 1, 0], 73 | [0, 0, 0, 0, 0, 0, 1]], dtype=np.float64) 74 | self.kf.processNoiseCov = np.diag([10, 10, 10, 10, 1e4, 1e4, 1e4]).astype(np.float64) 75 | 76 | # We only observe 77 | self.kf.measurementMatrix = np.array( 78 | [[1, 0, 0, 0, 0, 0, 0], 79 | [0, 1, 0, 0, 0, 0, 0], 80 | [0, 0, 1, 0, 0, 0, 0], 81 | [0, 0, 0, 1, 0, 0, 0]], dtype=np.float64) 82 | self.kf.measurementNoiseCov = np.diag([1, 1, 10, 10]).astype(np.float64) 83 | 84 | # Start the particle at their initial position with 0 velocities. 85 | self.kf.statePost = np.vstack((convert_bbox_to_z(bbox), [[0], [0], [0]])) 86 | self.kf.errorCovPost = np.diag([1, 1, 1, 1, 1e-2, 1e-2, 1e-4]).astype(np.float64) 87 | 88 | def update(self, bbox): 89 | """ 90 | Updates the state vector with observed bbox. 91 | """ 92 | self.time_since_update = 0 93 | self.hit_streak += 1 94 | 95 | self.kf.correct(convert_bbox_to_z(bbox)) 96 | 97 | def predict(self): 98 | """ 99 | Advances the state vector and returns the predicted bounding box estimate. 100 | """ 101 | if self.time_since_update > 0: 102 | self.hit_streak = 0 103 | self.time_since_update += 1 104 | 105 | retval = self.kf.predict() 106 | return convert_x_to_bbox(retval) 107 | 108 | @property 109 | def current_state(self): 110 | """ 111 | Returns the current bounding box estimate. 112 | """ 113 | return convert_x_to_bbox(self.kf.statePost) 114 | 115 | 116 | def iou(a: np.ndarray, b: np.ndarray) -> float: 117 | """ 118 | Computes IUO between two bboxes in the form [x1,y1,x2,y2] 119 | """ 120 | a_tl, a_br = a[:4].reshape((2, 2)) 121 | b_tl, b_br = b[:4].reshape((2, 2)) 122 | int_tl = np.maximum(a_tl, b_tl) 123 | int_br = np.minimum(a_br, b_br) 124 | int_area = np.product(np.maximum(0., int_br - int_tl)) 125 | a_area = np.product(a_br - a_tl) 126 | b_area = np.product(b_br - b_tl) 127 | return int_area / (a_area + b_area - int_area) 128 | 129 | 130 | def associate_detections_to_trackers(detections: np.ndarray, trackers: np.ndarray, 131 | iou_threshold: float = 0.3) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: 132 | """ 133 | Assigns detections to tracked object (both represented as bounding boxes) 134 | 135 | Returns 3 lists of matches, unmatched_detections and unmatched_trackers 136 | """ 137 | iou_matrix = np.zeros((len(detections), len(trackers)), dtype=np.float64) 138 | 139 | for d, det in enumerate(detections): 140 | for t, trk in enumerate(trackers): 141 | iou_matrix[d, t] = iou(det, trk) 142 | 143 | row_ind, col_ind = linear_sum_assignment(-iou_matrix) 144 | matched_indices = np.transpose(np.array([row_ind, col_ind])) 145 | 146 | iou_values = np.array([iou_matrix[detection, tracker] 147 | for detection, tracker in matched_indices]) 148 | good_matches = matched_indices[iou_values > 0.3] 149 | unmatched_detections = np.array( 150 | [i for i in range(len(detections)) if i not in good_matches[:, 0]]) 151 | unmatched_trackers = np.array( 152 | [i for i in range(len(trackers)) if i not in good_matches[:, 1]]) 153 | return good_matches, unmatched_detections, unmatched_trackers 154 | 155 | 156 | class Sort: 157 | def __init__(self, max_age=10, min_hits=6): 158 | """ 159 | Sets key parameters for SORT 160 | """ 161 | self.max_age = max_age 162 | self.min_hits = min_hits 163 | self.trackers = [] 164 | self.count = 0 165 | 166 | def next_id(self): 167 | self.count += 1 168 | return self.count 169 | 170 | def update(self, dets): 171 | """ 172 | Params: 173 | dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...] 174 | Requires: this method must be called once for each frame even with empty detections. 175 | Returns the a similar array, where the last column is the object ID. 176 | 177 | NOTE: The number of objects returned may differ from the number of detections provided. 178 | """ 179 | # Predict new locations and remove trakcers with nans. 180 | self.trackers = [ 181 | tracker for tracker in self.trackers if not np.any( 182 | np.isnan( 183 | tracker.predict()))] 184 | # get predicted locations 185 | trks = np.array([tracker.current_state for tracker in self.trackers]) 186 | 187 | matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers( 188 | dets, trks) 189 | 190 | # update matched trackers with assigned detections 191 | for detection_num, tracker_num in matched: 192 | self.trackers[tracker_num].update(dets[detection_num]) 193 | 194 | # create and initialise new trackers for unmatched detections 195 | for i in unmatched_dets: 196 | self.trackers.append(KalmanBoxTracker(dets[i, :], self.next_id())) 197 | 198 | ret = np.array([np.concatenate((trk.current_state, [trk.id + 1])) 199 | for trk in self.trackers 200 | if trk.time_since_update < 1 and trk.hit_streak >= self.min_hits]) 201 | # remove dead tracklet 202 | self.trackers = [ 203 | tracker for tracker in self.trackers if tracker.time_since_update <= self.max_age] 204 | return ret 205 | -------------------------------------------------------------------------------- /chapter2/chapter2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """OpenCV with Python Blueprints 5 | Chapter 2: Hand Gesture Recognition Using a Kinect Depth Sensor 6 | 7 | An app to detect and track simple hand gestures in real-time using the 8 | output of a Microsoft Kinect 3D Sensor. 9 | """ 10 | 11 | 12 | 13 | import cv2 14 | import numpy as np 15 | from typing import Tuple 16 | from gestures import recognize 17 | from frame_reader import read_frame 18 | 19 | 20 | def draw_helpers(img_draw: np.ndarray) -> None: 21 | # draw some helpers for correctly placing hand 22 | height, width = img_draw.shape[:2] 23 | color = (0,102,255) 24 | cv2.circle(img_draw, (width // 2, height // 2), 3, color, 2) 25 | cv2.rectangle(img_draw, (width // 3, height // 3), 26 | (width * 2 // 3, height * 2 // 3), color, 2) 27 | 28 | 29 | 30 | def main(): 31 | for _, frame in iter(read_frame, (False, None)): 32 | num_fingers, img_draw = recognize(frame) 33 | # draw some helpers for correctly placing hand 34 | draw_helpers(img_draw) 35 | # print number of fingers on image 36 | cv2.putText(img_draw, str(num_fingers), (30, 30), 37 | cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255)) 38 | cv2.imshow("frame", img_draw) 39 | # Exit on escape 40 | if cv2.waitKey(10) == 27: 41 | break 42 | 43 | 44 | if __name__ == '__main__': 45 | main() 46 | -------------------------------------------------------------------------------- /chapter2/frame_reader.py: -------------------------------------------------------------------------------- 1 | import freenect 2 | import numpy as np 3 | from typing import Tuple 4 | 5 | def read_frame() -> Tuple[bool,np.ndarray]: 6 | frame, timestamp = freenect.sync_get_depth() 7 | if frame is None: 8 | return False, None 9 | frame = np.clip(frame, 0, 2**10 - 1) 10 | frame >>= 2 11 | return True, frame.astype(np.uint8) 12 | -------------------------------------------------------------------------------- /chapter2/gestures.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """A module containing an algorithm for hand gesture recognition""" 5 | 6 | import numpy as np 7 | import cv2 8 | from typing import Tuple 9 | 10 | __author__ = "Michael Beyeler" 11 | __license__ = "GNU GPL 3.0 or later" 12 | 13 | def recognize(img_gray): 14 | """Recognizes hand gesture in a single-channel depth image 15 | 16 | This method estimates the number of extended fingers based on 17 | a single-channel depth image showing a hand and arm region. 18 | :param img_gray: single-channel depth image 19 | :returns: (num_fingers, img_draw) The estimated number of 20 | extended fingers and an annotated RGB image 21 | """ 22 | 23 | # segment arm region 24 | segment = segment_arm(img_gray) 25 | 26 | # find the hull of the segmented area, and based on that find the 27 | # convexity defects 28 | (contour, defects) = find_hull_defects(segment) 29 | 30 | # detect the number of fingers depending on the contours and convexity 31 | # defects, then draw defects that belong to fingers green, others red 32 | img_draw = cv2.cvtColor(segment, cv2.COLOR_GRAY2RGB) 33 | (num_fingers, img_draw) = detect_num_fingers(contour, 34 | defects, img_draw) 35 | 36 | return (num_fingers, img_draw) 37 | 38 | 39 | def segment_arm(frame: np.ndarray, abs_depth_dev: int = 14) -> np.ndarray: 40 | """Segments arm region 41 | 42 | This method accepts a single-channel depth image of an arm and 43 | hand region and extracts the segmented arm region. 44 | It is assumed that the hand is placed in the center of the image. 45 | :param frame: single-channel depth image 46 | :returns: binary image (mask) of segmented arm region, where 47 | arm=255, else=0 48 | """ 49 | height, width = frame.shape 50 | # find center (21x21 pixel) region of imageheight frame 51 | center_half = 10 # half-width of 21 is 21/2-1 52 | center = frame[height // 2 - center_half:height // 2 + center_half, 53 | width // 2 - center_half:width // 2 + center_half] 54 | 55 | # find median depth value of center region 56 | med_val = np.median(center) 57 | 58 | # try this instead: 59 | frame = np.where(abs(frame - med_val) <= abs_depth_dev, 60 | 128, 0).astype(np.uint8) 61 | 62 | # morphological 63 | kernel = np.ones((3, 3), np.uint8) 64 | frame = cv2.morphologyEx(frame, cv2.MORPH_CLOSE, kernel) 65 | 66 | # connected component 67 | small_kernel = 3 68 | frame[height // 2 - small_kernel:height // 2 + small_kernel, 69 | width // 2 - small_kernel:width // 2 + small_kernel] = 128 70 | 71 | mask = np.zeros((height + 2, width + 2), np.uint8) 72 | flood = frame.copy() 73 | cv2.floodFill(flood, mask, (width // 2, height // 2), 255, 74 | flags=4 | (255 << 8)) 75 | 76 | ret, flooded = cv2.threshold(flood, 129, 255, cv2.THRESH_BINARY) 77 | return flooded 78 | 79 | 80 | def find_hull_defects(segment: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: 81 | """Find hull defects 82 | 83 | This method finds all defects in the hull of a segmented arm 84 | region. 85 | :param segment: a binary image (mask) of a segmented arm region, 86 | where arm=255, else=0 87 | :returns: (max_contour, defects) the largest contour in the image 88 | and all corresponding defects 89 | """ 90 | contours, hierarchy = cv2.findContours(segment, cv2.RETR_TREE, 91 | cv2.CHAIN_APPROX_SIMPLE) 92 | 93 | # find largest area contour 94 | max_contour = max(contours, key=cv2.contourArea) 95 | epsilon = 0.01 * cv2.arcLength(max_contour, True) 96 | max_contour = cv2.approxPolyDP(max_contour, epsilon, True) 97 | 98 | # find convexity hull and defects 99 | hull = cv2.convexHull(max_contour, returnPoints=False) 100 | defects = cv2.convexityDefects(max_contour, hull) 101 | 102 | return max_contour, defects 103 | 104 | 105 | 106 | def detect_num_fingers(contour: np.ndarray, defects: np.ndarray, 107 | img_draw: np.ndarray, thresh_deg: float = 80.0) -> Tuple[int, np.ndarray]: 108 | """Detects the number of extended fingers 109 | 110 | This method determines the number of extended fingers based on a 111 | contour and convexity defects. 112 | It will annotate an RGB color image of the segmented arm region 113 | with all relevant defect points and the hull. 114 | :param contours: a list of contours 115 | :param defects: a list of convexity defects 116 | :param img_draw: an RGB color image to be annotated 117 | :returns: (num_fingers, img_draw) the estimated number of extended 118 | fingers and an annotated RGB color image 119 | """ 120 | 121 | # if there are no convexity defects, possibly no hull found or no 122 | # fingers extended 123 | if defects is None: 124 | return [0, img_draw] 125 | 126 | # we assume the wrist will generate two convexity defects (one on each 127 | # side), so if there are no additional defect points, there are no 128 | # fingers extended 129 | if len(defects) <= 2: 130 | return [0, img_draw] 131 | 132 | # if there is a sufficient amount of convexity defects, we will find a 133 | # defect point between two fingers so to get the number of fingers, 134 | # start counting at 1 135 | num_fingers = 1 136 | # Defects are of shape (num_defects,1,4) 137 | for defect in defects[:, 0, :]: 138 | # Each defect is an array of four integers. 139 | # First three indexes of start, end and the furthest 140 | # points respectively 141 | # contour is of shape (num_points,1,2) - 2 for point coordinates 142 | start, end, far = [contour[i][0] for i in defect[:3]] 143 | # draw the hull 144 | cv2.line(img_draw, tuple(start), tuple(end), (0, 255, 0), 2) 145 | 146 | # if angle is below a threshold, defect point belongs to two 147 | # extended fingers 148 | if angle_rad(start - far, end - far) < deg2rad(thresh_deg): 149 | # increment number of fingers 150 | num_fingers += 1 151 | 152 | # draw point as green 153 | cv2.circle(img_draw, tuple(far), 5, (0, 255, 0), -1) 154 | else: 155 | # draw point as red 156 | cv2.circle(img_draw, tuple(far), 5, (0, 0, 255), -1) 157 | 158 | # make sure we cap the number of fingers 159 | return min(5, num_fingers), img_draw 160 | 161 | 162 | def angle_rad(v1, v2): 163 | """Angle in radians between two vectors 164 | 165 | This method returns the angle (in radians) between two array-like 166 | vectors using the cross-product method, which is more accurate for 167 | small angles than the dot-product-acos method. 168 | """ 169 | return np.arctan2(np.linalg.norm(np.cross(v1, v2)), np.dot(v1, v2)) 170 | 171 | 172 | 173 | 174 | def deg2rad(angle_deg): 175 | """Convert degrees to radians 176 | 177 | This method converts an angle in radians e[0,2*np.pi) into degrees 178 | e[0,360) 179 | """ 180 | return angle_deg / 180.0 * np.pi 181 | -------------------------------------------------------------------------------- /chapter3/chapter3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """OpenCV with Python Blueprints 5 | Chapter 3: Finding Objects Via Feature Matching and Perspective Transforms 6 | 7 | An app to detect and track an object of interest in the video stream of a 8 | webcam, even if the object is viewed at different angles, distances, or 9 | under partial occlusion. 10 | """ 11 | 12 | import cv2 13 | from feature_matching import FeatureMatching 14 | 15 | 16 | def main(): 17 | capture = cv2.VideoCapture(0) 18 | assert capture.isOpened(), "Cannot connect to camera" 19 | 20 | capture.set(cv2.CAP_PROP_FPS, 5) 21 | capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640) 22 | capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) 23 | 24 | train_img = cv2.imread('train.png', cv2.CV_8UC1) 25 | matching = FeatureMatching(train_img) 26 | 27 | for success, frame in iter(capture.read, (False, None)): 28 | cv2.imshow("frame", frame) 29 | match_succsess, img_warped, img_flann = matching.match(frame) 30 | if match_succsess: 31 | cv2.imshow("res", img_warped) 32 | cv2.imshow("flann", img_flann) 33 | if cv2.waitKey(1) & 0xff == 27: 34 | break 35 | 36 | 37 | if __name__ == '__main__': 38 | main() 39 | -------------------------------------------------------------------------------- /chapter3/feature_matching.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """A module containing an algorithm for feature matching""" 5 | 6 | import numpy as np 7 | import cv2 8 | from typing import Tuple, Optional, List, Sequence 9 | Point = Tuple[float, float] 10 | 11 | __author__ = "Michael Beyeler" 12 | __license__ = "GNU GPL 3.0 or later" 13 | 14 | cv2.cornerHarris 15 | 16 | 17 | class Outlier(Exception): 18 | pass 19 | 20 | 21 | class FeatureMatching: 22 | """Feature matching class 23 | 24 | This class implements an algorithm for feature matching and tracking. 25 | 26 | A SURF descriptor is obtained from a training or template image 27 | (train_image) that shows the object of interest from the front and 28 | upright. 29 | 30 | The algorithm will then search for this object in every image frame 31 | passed to the method FeatureMatching.match. The matching is performed 32 | with a FLANN based matcher. 33 | 34 | Note: If you want to use this code (including SURF) in a non-commercial 35 | application, you will need to acquire a SURF license. 36 | """ 37 | 38 | def __init__(self, train_image: np.ndarray): 39 | """ 40 | Initialize the SURF descriptor, FLANN matcher, and the tracking 41 | algorithm. 42 | 43 | :param train_image: training or template image showing the object 44 | of interest 45 | """ 46 | # initialize SURF 47 | self.f_extractor = cv2.xfeatures2d_SURF.create(hessianThreshold=400) 48 | # template image: "train" image 49 | # later on compared ot each video frame: "query" image 50 | self.img_obj = train_image 51 | 52 | self.sh_train = self.img_obj.shape[:2] 53 | self.key_train, self.desc_train = \ 54 | self.f_extractor.detectAndCompute(self.img_obj, None) 55 | 56 | # initialize FLANN 57 | FLANN_INDEX_KDTREE = 0 58 | index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5) 59 | search_params = dict(checks=50) 60 | index_params = {"algorithm": 0, "trees": 5} 61 | search_params = {"checks": 50} 62 | self.flann = cv2.FlannBasedMatcher(index_params, search_params) 63 | # self.flann = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_FLANNBASED) 64 | # initialize tracking 65 | self.last_hinv = np.zeros((3, 3)) 66 | self.max_error_hinv = 50. 67 | self.num_frames_no_success = 0 68 | self.max_frames_no_success = 5 69 | 70 | def match(self, 71 | frame: np.ndarray) -> Tuple[bool, 72 | Optional[np.ndarray], 73 | Optional[np.ndarray]]: 74 | """Detects and tracks an object of interest in a video frame 75 | 76 | This method detects and tracks an object of interest (of which a 77 | SURF descriptor was obtained upon initialization) in a video frame. 78 | Correspondence is established with a FLANN based matcher. 79 | 80 | The algorithm then applies a perspective transform on the frame in 81 | order to project the object of interest to the frontal plane. 82 | 83 | Outlier rejection is applied to improve the tracking of the object 84 | from frame to frame. 85 | 86 | :param frame: input (query) image in which to detect the object 87 | :returns: (success, frame) whether the detection was successful and 88 | and the perspective-transformed frame 89 | """ 90 | 91 | # create a working copy (grayscale) of the frame 92 | # and store its shape for convenience 93 | img_query = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 94 | sh_query = img_query.shape # rows,cols 95 | 96 | # --- feature extraction 97 | # detect keypoints in the query image (video frame) 98 | # using SURF descriptor 99 | # key_query, desc_query = self.f_extractor.detectAndCompute( 100 | # img_query, None) 101 | 102 | key_query = self.f_extractor.detect( 103 | img_query) 104 | key_query, desc_query = self.f_extractor.compute(img_query, key_query) 105 | # img_keypoints = cv2.drawKeypoints(img_query, key_query, None, 106 | # (255, 0, 0), 4) 107 | # cv2.imshow("keypoints",img_keypoints) 108 | # --- feature matching 109 | # returns a list of good matches using FLANN 110 | # based on a scene and its feature descriptor 111 | good_matches = self.match_features(desc_query) 112 | train_points = [self.key_train[good_match.queryIdx].pt 113 | for good_match in good_matches] 114 | query_points = [key_query[good_match.trainIdx].pt 115 | for good_match in good_matches] 116 | 117 | try: 118 | # early outlier detection and rejection 119 | if len(good_matches) < 4: 120 | raise Outlier("Too few matches") 121 | 122 | # --- corner point detection 123 | # calculates the homography matrix needed to convert between 124 | # keypoints from the train image and the query image 125 | dst_corners = detect_corner_points( 126 | train_points, query_points, self.sh_train) 127 | # early outlier detection and rejection 128 | # if any corners lie significantly outside the image, skip frame 129 | if np.any((dst_corners < -20) | 130 | (dst_corners > np.array(sh_query) + 20)): 131 | raise Outlier("Out of image") 132 | # early outlier detection and rejection 133 | # find the area of the quadrilateral that the four corner points 134 | # spans 135 | area = 0 136 | for prev, nxt in zip(dst_corners, np.roll( 137 | dst_corners, -1, axis=0)): 138 | area += (prev[0] * nxt[1] - prev[1] * nxt[0]) / 2. 139 | 140 | # early outlier detection and rejection 141 | # reject corner points if area is unreasonable 142 | if not np.prod(sh_query) / 16. < area < np.prod(sh_query) / 2.: 143 | raise Outlier("Area is unreasonably small or large") 144 | 145 | # --- bring object of interest to frontal plane 146 | train_points_scaled = self.scale_and_offset( 147 | train_points, self.sh_train, sh_query) 148 | Hinv, _ = cv2.findHomography( 149 | np.array(query_points), np.array(train_points_scaled), cv2.RANSAC) 150 | # outlier rejection 151 | # if last frame recent: new Hinv must be similar to last one 152 | # else: accept whatever Hinv is found at this point 153 | similar = np.linalg.norm( 154 | Hinv - self.last_hinv) < self.max_error_hinv 155 | recent = self.num_frames_no_success < self.max_frames_no_success 156 | if recent and not similar: 157 | raise Outlier("Not similar transformation") 158 | except Outlier as e: 159 | self.num_frames_no_success += 1 160 | return False, None, None 161 | else: 162 | # reset counters and update Hinv 163 | self.num_frames_no_success = 0 164 | self.last_h = Hinv 165 | # outline corner points of train image in query image 166 | img_warped = cv2.warpPerspective( 167 | img_query, Hinv, (sh_query[1], sh_query[0])) 168 | img_flann = draw_good_matches( 169 | self.img_obj, 170 | self.key_train, 171 | img_query, 172 | key_query, 173 | good_matches) 174 | # adjust x-coordinate (col) of corner points so that they can be drawn 175 | # next to the train image (add self.sh_train[1]) 176 | dst_corners[:, 0] += self.sh_train[1] 177 | cv2.polylines( 178 | img_flann, 179 | [dst_corners.astype(np.int)], 180 | isClosed=True, 181 | color=(0, 255, 0), 182 | thickness=3) 183 | return True, img_warped, img_flann 184 | 185 | def match_features(self, desc_frame: np.ndarray) -> List[cv2.DMatch]: 186 | """Feature matching between train and query image 187 | 188 | This method finds matches between the descriptor of an input 189 | (query) frame and the stored template (train) image. 190 | 191 | The ratio test is applied to distinguish between good matches and 192 | outliers. 193 | 194 | :param desc_frame: descriptor of input (query) image 195 | :returns: list of good matches 196 | """ 197 | # find 2 best matches (kNN with k=2) 198 | matches = self.flann.knnMatch(self.desc_train, desc_frame, k=2) 199 | # discard bad matches, ratio test as per Lowe's paper 200 | good_matches = [x[0] for x in matches 201 | if x[0].distance < 0.7 * x[1].distance] 202 | return good_matches 203 | 204 | @staticmethod 205 | def scale_and_offset(points: Sequence[Point], 206 | source_size: Tuple[int, int], 207 | dst_size: Tuple[int, int], 208 | factor: float = 0.5) -> List[Point]: 209 | dst_size = np.array(dst_size) 210 | scale = 1 / np.array(source_size) * dst_size * factor 211 | bias = dst_size * (1 - factor) / 2 212 | return [tuple(np.array(pt) * scale + bias) for pt in points] 213 | 214 | 215 | def detect_corner_points(src_points: Sequence[Point], 216 | dst_points: Sequence[Point], 217 | sh_src: Tuple[int, int]) -> np.ndarray: 218 | """Detects corner points in an input (query) image 219 | 220 | This method finds the homography matrix to go from the template 221 | (train) image to the input (query) image, and finds the coordinates 222 | of the good matches (from the train image) in the query image. 223 | 224 | :param key_frame: keypoints of the query image 225 | :param good_matches: list of good matches 226 | :returns: coordinates of good matches in transformed query image 227 | """ 228 | 229 | # find homography using RANSAC 230 | H, _ = cv2.findHomography(np.array(src_points), np.array(dst_points), 231 | cv2.RANSAC) 232 | 233 | if H is None: 234 | raise Outlier("Homography not found") 235 | # outline train image in query image 236 | height, width = sh_src 237 | src_corners = np.array([(0, 0), (width, 0), 238 | (width, height), 239 | (0, height)], dtype=np.float32) 240 | return cv2.perspectiveTransform(src_corners[None, :, :], H)[0] 241 | 242 | 243 | def draw_good_matches(img1: np.ndarray, 244 | kp1: Sequence[cv2.KeyPoint], 245 | img2: np.ndarray, 246 | kp2: Sequence[cv2.KeyPoint], 247 | matches: Sequence[cv2.DMatch]) -> np.ndarray: 248 | """Visualizes a list of good matches 249 | 250 | This function visualizes a list of good matches. It is only required in 251 | OpenCV releases that do not ship with the function drawKeypoints. 252 | 253 | The function draws two images (img1 and img2) side-by-side, 254 | highlighting a list of keypoints in both, and connects matching 255 | keypoints in the two images with blue lines. 256 | 257 | :param img1: first image 258 | :param kp1: list of keypoints for first image 259 | :param img2: second image 260 | :param kp2: list of keypoints for second image 261 | :param matches: list of good matches 262 | :returns: annotated output image 263 | """ 264 | # Create a new output image of a size that will fit the two images together 265 | rows1, cols1 = img1.shape[:2] 266 | rows2, cols2 = img2.shape[:2] 267 | out = np.zeros((max([rows1, rows2]), cols1 + cols2, 3), dtype='uint8') 268 | 269 | # Place the first image on the left 270 | out[:rows1, :cols1, :] = img1[..., None] 271 | 272 | # Place the second image to the right of the first image 273 | out[:rows2, cols1:cols1 + cols2, :] = img2[..., None] 274 | 275 | # For each pair of points we have between both images 276 | # draw circles, then connect a line between them 277 | for m in matches: 278 | # Get the matching keypoints for each of the images 279 | # and convert them to int 280 | c1 = tuple(map(int, kp1[m.queryIdx].pt)) 281 | c2 = tuple(map(int, kp2[m.trainIdx].pt)) 282 | # Shift second center for drawing 283 | c2 = c2[0] + cols1, c2[1] 284 | 285 | radius = 4 286 | BLUE = (255, 0, 0) 287 | thickness = 1 288 | # Draw a small circle at both co-ordinates 289 | cv2.circle(out, c1, radius, BLUE, thickness) 290 | cv2.circle(out, c2, radius, BLUE, thickness) 291 | 292 | # Draw a line in between the two points 293 | cv2.line(out, c1, c2, BLUE, thickness) 294 | 295 | return out 296 | -------------------------------------------------------------------------------- /chapter3/train.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter3/train.png -------------------------------------------------------------------------------- /chapter3/wx_gui.py: -------------------------------------------------------------------------------- 1 | ../wx_gui.py -------------------------------------------------------------------------------- /chapter4/calibrate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """A module for camera calibration using a chessboard""" 5 | 6 | 7 | import cv2 8 | import numpy as np 9 | import wx 10 | 11 | from wx_gui import BaseLayout 12 | 13 | 14 | class CameraCalibration(BaseLayout): 15 | """Camera calibration 16 | 17 | Performs camera calibration on a webcam video feed using 18 | the chessboard approach described here: 19 | http://docs.opencv.org/doc/tutorials/calib3d/camera_calibration/camera_calibration.html 20 | """ 21 | 22 | def augment_layout(self): 23 | pnl = wx.Panel(self, -1) 24 | self.button_calibrate = wx.Button(pnl, label='Calibrate Camera') 25 | self.Bind(wx.EVT_BUTTON, self._on_button_calibrate) 26 | hbox = wx.BoxSizer(wx.HORIZONTAL) 27 | hbox.Add(self.button_calibrate) 28 | pnl.SetSizer(hbox) 29 | 30 | self.panels_vertical.Add(pnl, flag=wx.EXPAND | wx.BOTTOM | wx.TOP, 31 | border=1) 32 | 33 | # setting chessboard size (size of grid - 1) 34 | # (7,7) for the standard chessboard 35 | self.chessboard_size = (7, 7) 36 | 37 | # prepare object points 38 | self.objp = np.zeros((np.prod(self.chessboard_size), 3), 39 | dtype=np.float32) 40 | self.objp[:, :2] = np.mgrid[0:self.chessboard_size[0], 41 | 0:self.chessboard_size[1]].T.reshape(-1, 2) 42 | 43 | # prepare recording 44 | self.recording = False 45 | self.record_min_num_frames = 15 46 | self._reset_recording() 47 | 48 | def process_frame(self, frame): 49 | """Processes each frame 50 | 51 | If recording mode is on (self.recording==True), this method will 52 | perform all the hard work of the camera calibration process: 53 | - for every frame, until enough frames have been processed: 54 | - find the chessboard corners 55 | - refine the coordinates of the detected corners 56 | - after enough frames have been processed: 57 | - estimate the intrinsic camera matrix and distortion 58 | coefficients 59 | 60 | :param frame: current RGB video frame 61 | :returns: annotated video frame showing detected chessboard corners 62 | """ 63 | # if we are not recording, just display the frame 64 | if not self.recording: 65 | return frame 66 | 67 | # else we're recording 68 | img_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY).astype(np.uint8) 69 | if self.record_cnt < self.record_min_num_frames: 70 | # need at least some number of chessboard samples before we can 71 | # calculate the intrinsic matrix 72 | 73 | ret, corners = cv2.findChessboardCorners(img_gray, 74 | self.chessboard_size, 75 | None) 76 | if ret: 77 | print(f"{self.record_min_num_frames - self.record_cnt} chessboards remain") 78 | cv2.drawChessboardCorners(frame, self.chessboard_size, corners, ret) 79 | 80 | # refine found corners 81 | criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 82 | 30, 0.01) 83 | cv2.cornerSubPix(img_gray, corners, (9, 9), (-1, -1), criteria) 84 | 85 | self.obj_points.append(self.objp) 86 | self.img_points.append(corners) 87 | self.record_cnt += 1 88 | 89 | else: 90 | # we have already collected enough frames, so now we want to 91 | # calculate the intrinsic camera matrix (K) and the distortion 92 | # vector (dist) 93 | print("Calibrating...") 94 | ret, K, dist, rvecs, tvecs = cv2.calibrateCamera(self.obj_points, 95 | self.img_points, 96 | (self.imgHeight, 97 | self.imgWidth), 98 | None, None) 99 | print("K=", K) 100 | print("dist=", dist) 101 | 102 | # double-check reconstruction error (should be as close to zero as 103 | # possible) 104 | mean_error = 0 105 | for obj_point, rvec, tvec, img_point in zip( 106 | self.obj_points, rvecs, tvecs, self.img_points): 107 | img_points2, _ = cv2.projectPoints( 108 | obj_point, rvec, tvec, K, dist) 109 | error = cv2.norm(img_point, img_points2, 110 | cv2.NORM_L2) / len(img_points2) 111 | mean_error += error 112 | 113 | print("mean error=", mean_error) 114 | 115 | self.recording = False 116 | self._reset_recording() 117 | self.button_calibrate.Enable() 118 | return frame 119 | 120 | def _on_button_calibrate(self, event): 121 | """Enable recording mode upon pushing the button""" 122 | self.button_calibrate.Disable() 123 | self.recording = True 124 | self._reset_recording() 125 | 126 | def _reset_recording(self): 127 | """Disable recording mode and reset data structures""" 128 | self.record_cnt = 0 129 | self.obj_points = [] 130 | self.img_points = [] 131 | 132 | 133 | def main(): 134 | capture = cv2.VideoCapture(0) 135 | assert capture.isOpened(), "Can not connect to camera" 136 | capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640) 137 | capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) 138 | 139 | # start graphical user interface 140 | app = wx.App() 141 | layout = CameraCalibration(capture, title='Camera Calibration', fps=2) 142 | layout.Show(True) 143 | app.MainLoop() 144 | 145 | 146 | if __name__ == '__main__': 147 | main() 148 | -------------------------------------------------------------------------------- /chapter4/chapter4.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | OpenCV with Python Blueprints 6 | Chapter 4: 3D Scene Reconstruction Using Structure From Motion 7 | 8 | An app to detect and extract structure from motion on a pair of images 9 | using stereo vision. We will assume that the two images have been taken 10 | with the same camera, of which we know the internal camera parameters. If 11 | these parameters are not known, use calibrate.py to estimate them. 12 | 13 | The result is a point cloud that shows the 3D real-world coordinates 14 | of points in the scene. 15 | """ 16 | 17 | import numpy as np 18 | 19 | from scene3D import SceneReconstruction3D 20 | 21 | 22 | def main(): 23 | # camera matrix and distortion coefficients 24 | # can be recovered with calibrate.py 25 | # but the examples used here are already undistorted, taken with a camera 26 | # of known K 27 | K = np.array([[2759.48 / 4, 0, 1520.69 / 4, 0, 2764.16 / 4, 28 | 1006.81 / 4, 0, 0, 1]]).reshape(3, 3) 29 | d = np.array([0.0, 0.0, 0.0, 0.0, 0.0]).reshape(1, 5) 30 | scene = SceneReconstruction3D(K, d) 31 | 32 | # load a pair of images for which to perform SfM 33 | scene.load_image_pair("fountain_dense/0004.png", "fountain_dense/0005.png") 34 | 35 | # draw 3D point cloud of fountain 36 | # use "pan axes" button in pyplot to inspect the cloud (rotate and zoom 37 | # to convince you of the result) 38 | # scene.draw_epipolar_lines() 39 | # scene.plot_rectified_images() 40 | scene.plot_optic_flow() 41 | scene.plot_point_cloud() 42 | 43 | 44 | if __name__ == '__main__': 45 | main() 46 | -------------------------------------------------------------------------------- /chapter4/fountain_dense/0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0000.png -------------------------------------------------------------------------------- /chapter4/fountain_dense/0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0001.png -------------------------------------------------------------------------------- /chapter4/fountain_dense/0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0002.png -------------------------------------------------------------------------------- /chapter4/fountain_dense/0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0003.png -------------------------------------------------------------------------------- /chapter4/fountain_dense/0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0004.png -------------------------------------------------------------------------------- /chapter4/fountain_dense/0005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0005.png -------------------------------------------------------------------------------- /chapter4/fountain_dense/0006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0006.png -------------------------------------------------------------------------------- /chapter4/fountain_dense/0007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0007.png -------------------------------------------------------------------------------- /chapter4/fountain_dense/0008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0008.png -------------------------------------------------------------------------------- /chapter4/fountain_dense/0009.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0009.png -------------------------------------------------------------------------------- /chapter4/fountain_dense/0010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0010.png -------------------------------------------------------------------------------- /chapter4/scene3D.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """A module that contains an algorithm for 3D scene reconstruction """ 5 | 6 | import cv2 7 | import numpy as np 8 | import sys 9 | 10 | from mpl_toolkits.mplot3d import Axes3D 11 | import matplotlib.pyplot as plt 12 | from matplotlib import cm 13 | 14 | class SceneReconstruction3D: 15 | """3D scene reconstruction 16 | 17 | This class implements an algorithm for 3D scene reconstruction using 18 | stereo vision and structure-from-motion techniques. 19 | 20 | A 3D scene is reconstructed from a pair of images that show the same 21 | real-world scene from two different viewpoints. Feature matching is 22 | performed either with rich feature descriptors or based on optic flow. 23 | 3D coordinates are obtained via triangulation. 24 | 25 | Note that a complete structure-from-motion pipeline typically includes 26 | bundle adjustment and geometry fitting, which are out of scope for 27 | this project. 28 | """ 29 | 30 | def __init__(self, K, dist): 31 | """Constructor 32 | 33 | This method initializes the scene reconstruction algorithm. 34 | 35 | :param K: 3x3 intrinsic camera matrix 36 | :param dist: vector of distortion coefficients 37 | """ 38 | self.K = K 39 | self.K_inv = np.linalg.inv(K) # store inverse for fast access 40 | self.d = dist 41 | 42 | def load_image_pair( 43 | self, 44 | img_path1: str, 45 | img_path2: str, 46 | use_pyr_down: bool = True) -> None: 47 | 48 | self.img1, self.img2 = [ 49 | cv2.undistort( 50 | self.load_image( 51 | path, use_pyr_down), self.K, self.d) for path in ( 52 | img_path1, img_path2)] 53 | 54 | @staticmethod 55 | def load_image( 56 | img_path: str, 57 | use_pyr_down: bool, 58 | target_width: int = 600) -> np.ndarray: 59 | """Loads pair of images 60 | 61 | This method loads the two images for which the 3D scene should be 62 | reconstructed. The two images should show the same real-world scene 63 | from two different viewpoints. 64 | 65 | :param img_path1: path to first image 66 | :param img_path2: path to second image 67 | :param use_pyr_down: flag whether to downscale the images to 68 | roughly 600px width (True) or not (False) 69 | """ 70 | 71 | img = cv2.imread(img_path, cv2.CV_8UC3) 72 | 73 | # make sure image is valid 74 | assert img is not None, f"Image {img_path} could not be loaded." 75 | if len(img.shape) == 2: 76 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) 77 | 78 | # scale down image if necessary 79 | while use_pyr_down and img.shape[1] > 2 * target_width: 80 | img = cv2.pyrDown(img) 81 | return img 82 | 83 | def plot_optic_flow(self): 84 | """Plots optic flow field 85 | 86 | This method plots the optic flow between the first and second 87 | image. 88 | """ 89 | self._extract_keypoints_flow() 90 | 91 | img = np.copy(self.img1) 92 | for pt1, pt2 in zip(self.match_pts1, self.match_pts2): 93 | cv2.arrowedLine(img, tuple(pt1), tuple(pt2), 94 | color=(255, 0, 0)) 95 | 96 | cv2.imshow("imgFlow", img) 97 | cv2.waitKey() 98 | 99 | def draw_epipolar_lines(self, feat_mode: str = "SIFT"): 100 | """Draws epipolar lines 101 | 102 | This method computes and draws the epipolar lines of the two 103 | loaded images. 104 | 105 | :param feat_mode: whether to use rich descriptors for feature 106 | matching ("sift") or optic flow ("flow") 107 | """ 108 | self._extract_keypoints(feat_mode) 109 | self._find_fundamental_matrix() 110 | # Find epilines corresponding to points in right image (second image) 111 | # and drawing its lines on left image 112 | pts2re = self.match_pts2.reshape(-1, 1, 2) 113 | lines1 = cv2.computeCorrespondEpilines(pts2re, 2, self.F) 114 | lines1 = lines1.reshape(-1, 3) 115 | img3, img4 = self._draw_epipolar_lines_helper(self.img1, self.img2, 116 | lines1, self.match_pts1, 117 | self.match_pts2) 118 | 119 | # Find epilines corresponding to points in left image (first image) and 120 | # drawing its lines on right image 121 | pts1re = self.match_pts1.reshape(-1, 1, 2) 122 | lines2 = cv2.computeCorrespondEpilines(pts1re, 1, self.F) 123 | lines2 = lines2.reshape(-1, 3) 124 | img1, img2 = self._draw_epipolar_lines_helper(self.img2, self.img1, 125 | lines2, self.match_pts2, 126 | self.match_pts1) 127 | 128 | cv2.imshow("left", img1) 129 | cv2.imshow("right", img3) 130 | cv2.waitKey() 131 | 132 | def plot_rectified_images(self, feat_mode: str = "SIFT"): 133 | """Plots rectified images 134 | 135 | This method computes and plots a rectified version of the two 136 | images side by side. 137 | 138 | :param feat_mode: whether to use rich descriptors for feature 139 | matching ("sift") or optic flow ("flow") 140 | """ 141 | self._extract_keypoints(feat_mode) 142 | self._find_fundamental_matrix() 143 | self._find_essential_matrix() 144 | self._find_camera_matrices_rt() 145 | 146 | R = self.Rt2[:, :3] 147 | T = self.Rt2[:, 3] 148 | # perform the rectification 149 | R1, R2, P1, P2, Q, roi1, roi2 = cv2.stereoRectify(self.K, self.d, 150 | self.K, self.d, 151 | self.img1.shape[:2], 152 | R, T, alpha=1.0) 153 | mapx1, mapy1 = cv2.initUndistortRectifyMap(self.K, self.d, R1, self.K, 154 | self.img1.shape[:2], 155 | cv2.CV_32F) 156 | mapx2, mapy2 = cv2.initUndistortRectifyMap(self.K, self.d, R2, self.K, 157 | self.img2.shape[:2], 158 | cv2.CV_32F) 159 | img_rect1 = cv2.remap(self.img1, mapx1, mapy1, cv2.INTER_LINEAR) 160 | img_rect2 = cv2.remap(self.img2, mapx2, mapy2, cv2.INTER_LINEAR) 161 | 162 | # draw the images side by side 163 | total_size = (max(img_rect1.shape[0], img_rect2.shape[0]), 164 | img_rect1.shape[1] + img_rect2.shape[1], 3) 165 | img = np.zeros(total_size, dtype=np.uint8) 166 | img[:img_rect1.shape[0], :img_rect1.shape[1]] = img_rect1 167 | img[:img_rect2.shape[0], img_rect1.shape[1]:] = img_rect2 168 | 169 | # draw horizontal lines every 25 px accross the side by side image 170 | for i in range(20, img.shape[0], 25): 171 | cv2.line(img, (0, i), (img.shape[1], i), (255, 0, 0)) 172 | 173 | cv2.imshow('imgRectified', img) 174 | cv2.waitKey() 175 | 176 | def plot_point_cloud(self, feat_mode="sift"): 177 | """Plots 3D point cloud 178 | 179 | This method generates and plots a 3D point cloud of the recovered 180 | 3D scene. 181 | 182 | :param feat_mode: whether to use rich descriptors for feature 183 | matching ("sift") or optic flow ("flow") 184 | """ 185 | self._extract_keypoints(feat_mode) 186 | self._find_fundamental_matrix() 187 | self._find_essential_matrix() 188 | self._find_camera_matrices_rt() 189 | 190 | # triangulate points 191 | first_inliers = np.array(self.match_inliers1)[:, :2] 192 | second_inliers = np.array(self.match_inliers2)[:, :2] 193 | pts4D = cv2.triangulatePoints(self.Rt1, self.Rt2, first_inliers.T, 194 | second_inliers.T).T 195 | 196 | # convert from homogeneous coordinates to 3D 197 | pts3D = pts4D[:, :3] / pts4D[:, 3, None] 198 | 199 | # plot with matplotlib 200 | Xs, Zs, Ys = [pts3D[:, i] for i in range(3)] 201 | 202 | fig = plt.figure() 203 | ax = fig.add_subplot(111, projection='3d') 204 | ax.scatter(Xs, Ys, Zs, c=Ys,cmap=cm.hsv, marker='o') 205 | ax.set_xlabel('X') 206 | ax.set_ylabel('Y') 207 | ax.set_zlabel('Z') 208 | plt.title('3D point cloud: Use pan axes button below to inspect') 209 | plt.show() 210 | 211 | def _extract_keypoints(self, feat_mode): 212 | """Extracts keypoints 213 | 214 | This method extracts keypoints for feature matching based on 215 | a specified mode: 216 | - "sift": use rich sift descriptor 217 | - "flow": use optic flow 218 | 219 | :param feat_mode: keypoint extraction mode ("sift" or "flow") 220 | """ 221 | # extract features 222 | if feat_mode.lower() == "sift": 223 | # feature matching via sift and BFMatcher 224 | self._extract_keypoints_sift() 225 | elif feat_mode.lower() == "flow": 226 | # feature matching via optic flow 227 | self._extract_keypoints_flow() 228 | else: 229 | sys.exit(f"Unknown feat_mode {feat_mode}. Use 'sift' or 'FLOW'") 230 | 231 | def _extract_keypoints_sift(self): 232 | """Extracts keypoints via sift descriptors""" 233 | # extract keypoints and descriptors from both images 234 | # detector = cv2.xfeatures2d.SIFT_create(contrastThreshold=0.11, edgeThreshold=10) 235 | detector = cv2.xfeatures2d.SIFT_create() 236 | first_key_points, first_desc = detector.detectAndCompute(self.img1, 237 | None) 238 | second_key_points, second_desc = detector.detectAndCompute(self.img2, 239 | None) 240 | # match descriptors 241 | matcher = cv2.BFMatcher(cv2.NORM_L1, True) 242 | matches = matcher.match(first_desc, second_desc) 243 | 244 | # generate lists of point correspondences 245 | self.match_pts1 = np.array( 246 | [first_key_points[match.queryIdx].pt for match in matches]) 247 | self.match_pts2 = np.array( 248 | [second_key_points[match.trainIdx].pt for match in matches]) 249 | 250 | def _extract_keypoints_flow(self): 251 | """Extracts keypoints via optic flow""" 252 | # find FAST features 253 | fast = cv2.FastFeatureDetector_create() 254 | first_key_points = fast.detect(self.img1) 255 | 256 | first_key_list = [i.pt for i in first_key_points] 257 | first_key_arr = np.array(first_key_list).astype(np.float32) 258 | 259 | second_key_arr, status, err = cv2.calcOpticalFlowPyrLK( 260 | self.img1, self.img2, first_key_arr, None) 261 | 262 | # filter out the points with high error 263 | # keep only entries with status=1 and small error 264 | condition = (status == 1) * (err < 5.) 265 | concat = np.concatenate((condition, condition), axis=1) 266 | first_match_points = first_key_arr[concat].reshape(-1, 2) 267 | second_match_points = second_key_arr[concat].reshape(-1, 2) 268 | 269 | self.match_pts1 = first_match_points 270 | self.match_pts2 = second_match_points 271 | 272 | def _find_fundamental_matrix(self): 273 | """Estimates fundamental matrix """ 274 | self.F, self.Fmask = cv2.findFundamentalMat(self.match_pts1, 275 | self.match_pts2, 276 | cv2.FM_RANSAC, 0.1, 0.99) 277 | 278 | def _find_essential_matrix(self): 279 | """Estimates essential matrix based on fundamental matrix """ 280 | self.E = self.K.T.dot(self.F).dot(self.K) 281 | 282 | def _find_camera_matrices_rt(self): 283 | """Finds the [R|t] camera matrix""" 284 | # decompose essential matrix into R, t (See Hartley and Zisserman 9.13) 285 | U, S, Vt = np.linalg.svd(self.E) 286 | W = np.array([0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 287 | 1.0]).reshape(3, 3) 288 | 289 | # iterate over all point correspondences used in the estimation of the 290 | # fundamental matrix 291 | first_inliers = [] 292 | second_inliers = [] 293 | for pt1, pt2, mask in zip( 294 | self.match_pts1, self.match_pts2, self.Fmask): 295 | if mask: 296 | # normalize and homogenize the image coordinates 297 | first_inliers.append(self.K_inv.dot([pt1[0], pt1[1], 1.0])) 298 | second_inliers.append(self.K_inv.dot([pt2[0], pt2[1], 1.0])) 299 | 300 | # Determine the correct choice of second camera matrix 301 | # only in one of the four configurations will all the points be in 302 | # front of both cameras 303 | 304 | R = T = None 305 | R = U.dot(W.T).dot(Vt) 306 | T = U[:, 2] 307 | for r in (U.dot(W).dot(Vt), U.dot(W.T).dot(Vt)): 308 | for t in (U[:, 2], -U[:, 2]): 309 | if self._in_front_of_both_cameras( 310 | first_inliers, second_inliers, r, t): 311 | R, T = r, t 312 | 313 | assert R is not None, "Camera matricies were never found" 314 | 315 | self.match_inliers1 = first_inliers 316 | self.match_inliers2 = second_inliers 317 | self.Rt1 = np.hstack((np.eye(3), np.zeros((3, 1)))) 318 | self.Rt2 = np.hstack((R, T.reshape(3, 1))) 319 | 320 | def _draw_epipolar_lines_helper(self, img1, img2, lines, pts1, pts2): 321 | """Helper method to draw epipolar lines and features """ 322 | if img1.shape[2] == 1: 323 | img1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR) 324 | if img2.shape[2] == 1: 325 | img2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR) 326 | 327 | c = img1.shape[1] 328 | for r, pt1, pt2 in zip(lines, pts1, pts2): 329 | color = tuple(np.random.randint(0, 255, 3).tolist()) 330 | x0, y0 = map(int, [0, -r[2] / r[1]]) 331 | x1, y1 = map(int, [c, -(r[2] + r[0] * c) / r[1]]) 332 | cv2.line(img1, (x0, y0), (x1, y1), color, 1) 333 | cv2.circle(img1, tuple(pt1), 5, color, -1) 334 | cv2.circle(img2, tuple(pt2), 5, color, -1) 335 | return img1, img2 336 | 337 | def _in_front_of_both_cameras(self, first_points, second_points, rot, 338 | trans): 339 | """Determines whether point correspondences are in front of both 340 | images""" 341 | print("start") 342 | rot_inv = rot 343 | for first, second in zip(first_points, second_points): 344 | first_z = np.dot(rot[0, :] - second[0] * rot[2, :], 345 | trans) / np.dot(rot[0, :] - second[0] * rot[2, :], 346 | second) 347 | first_3d_point = np.array([first[0] * first_z, 348 | second[0] * first_z, first_z]) 349 | second_3d_point = np.dot(rot.T, first_3d_point) - np.dot(rot.T, 350 | trans) 351 | 352 | print(first_3d_point,second_3d_point) 353 | if first_3d_point[2] < 0 or second_3d_point[2] < 0: 354 | return False 355 | 356 | return True 357 | 358 | def _linear_ls_triangulation(self, u1, P1, u2, P2): 359 | """Triangulation via Linear-LS method""" 360 | # build A matrix for homogeneous equation system Ax=0 361 | # assume X = (x,y,z,1) for Linear-LS method 362 | # which turns it into AX=B system, where A is 4x3, X is 3x1 & B is 4x1 363 | A = np.array([u1[0] * P1[2, 0] - P1[0, 0], u1[0] * P1[2, 1] - P1[0, 1], 364 | u1[0] * P1[2, 2] - P1[0, 2], u1[1] * P1[2, 0] - P1[1, 0], 365 | u1[1] * P1[2, 1] - P1[1, 1], u1[1] * P1[2, 2] - P1[1, 2], 366 | u2[0] * P2[2, 0] - P2[0, 0], u2[0] * P2[2, 1] - P2[0, 1], 367 | u2[0] * P2[2, 2] - P2[0, 2], u2[1] * P2[2, 0] - P2[1, 0], 368 | u2[1] * P2[2, 1] - P2[1, 1], 369 | u2[1] * P2[2, 2] - P2[1, 2]]).reshape(4, 3) 370 | 371 | B = np.array([-(u1[0] * P1[2, 3] - P1[0, 3]), 372 | -(u1[1] * P1[2, 3] - P1[1, 3]), 373 | -(u2[0] * P2[2, 3] - P2[0, 3]), 374 | -(u2[1] * P2[2, 3] - P2[1, 3])]).reshape(4, 1) 375 | 376 | ret, X = cv2.solve(A, B, flags=cv2.DECOMP_SVD) 377 | return X.reshape(1, 3) 378 | -------------------------------------------------------------------------------- /chapter4/wx_gui.py: -------------------------------------------------------------------------------- 1 | ../wx_gui.py -------------------------------------------------------------------------------- /chapter5/common.py: -------------------------------------------------------------------------------- 1 | import rawpy 2 | import cv2 3 | import numpy as np 4 | 5 | 6 | def load_image(path, bps=16): 7 | if path.suffix == '.CR2': 8 | with rawpy.imread(str(path)) as raw: 9 | data = raw.postprocess(no_auto_bright=True, 10 | gamma=(1, 1), 11 | output_bps=bps) 12 | return data 13 | else: 14 | return cv2.imread(str(path)) 15 | 16 | 17 | def load_14bit_gray(path): 18 | img = load_image(path, bps=16) 19 | return (cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) / 4).astype(np.uint16) 20 | -------------------------------------------------------------------------------- /chapter5/gamma_correct.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | import itertools 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from common import load_image, load_14bit_gray 7 | import functools 8 | 9 | 10 | @functools.lru_cache(maxsize=None) 11 | def gamma_transform(x, gamma, bps=14): 12 | return np.clip(pow(x / 2**bps, gamma) * 255.0, 0, 255) 13 | 14 | 15 | def apply_gamma(img, gamma, bps=14): 16 | corrected = img.copy() 17 | for i, j in itertools.product(range(corrected.shape[0]), 18 | range(corrected.shape[1])): 19 | corrected[i, j] = gamma_transform(corrected[i, j], gamma, bps=bps) 20 | return corrected 21 | 22 | 23 | if __name__ == '__main__': 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument('raw_image', type=Path, 26 | help='Location of a .CR2 file.') 27 | parser.add_argument('--gamma', type=float, default=0.3) 28 | args = parser.parse_args() 29 | 30 | gray = load_14bit_gray(args.raw_image) 31 | 32 | normal = np.clip(gray / 64, 0, 255).astype(np.uint8) 33 | 34 | corrected = apply_gamma(gray, args.gamma) 35 | 36 | fig, axes = plt.subplots(2, 2, sharey=False) 37 | 38 | for i, img in enumerate([normal, corrected]): 39 | axes[1, i].hist(img.flatten(), bins=256) 40 | axes[1, i].set_ylim(top=1.5e-2 * len(img.flatten())) 41 | axes[1, i].set_xlabel('Brightness (8 bits)') 42 | axes[1, i].set_ylabel('Number of pixels') 43 | axes[0, i].imshow(img, cmap='gray', vmax=255) 44 | plt.title('Histogram of pixel values') 45 | plt.savefig('histogram.png') 46 | plt.show() 47 | -------------------------------------------------------------------------------- /chapter5/hdr.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from matplotlib import cm 3 | import itertools 4 | import numpy as np 5 | from pathlib import Path 6 | from matplotlib import pyplot as plt 7 | import cv2 8 | from common import load_image 9 | 10 | import exifread 11 | 12 | 13 | MARKERS = ['o', '+', 'x', '*', '.', 'X', '1', 'v', 'D'] 14 | 15 | 16 | def thumbnail(img_rgb, long_edge=400): 17 | original_long_edge = max(img_rgb.shape[:2]) 18 | dimensions = tuple([int(x / original_long_edge * long_edge) for x in img_rgb.shape[:2]][::-1]) 19 | print('dimensions', dimensions) 20 | return cv2.resize(img_rgb, dimensions, interpolation=cv2.INTER_AREA) 21 | 22 | 23 | def exposure_strength(path, iso_ref=100, f_stop_ref=6.375): 24 | with open(path, 'rb') as infile: 25 | tags = exifread.process_file(infile) 26 | [f_stop] = tags['EXIF ApertureValue'].values 27 | [iso_speed] = tags['EXIF ISOSpeedRatings'].values 28 | [exposure_time] = tags['EXIF ExposureTime'].values 29 | 30 | rel_aperture_area = 1 / (f_stop.num / f_stop.den / f_stop_ref) ** 2 31 | exposure_time_float = exposure_time.num / exposure_time.den 32 | 33 | score = rel_aperture_area * exposure_time_float * iso_speed / iso_ref 34 | return score, np.log2(score) 35 | 36 | 37 | def lowe_match(descriptors1, descriptors2): 38 | FLANN_INDEX_KDTREE = 0 39 | index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5) 40 | search_params = dict(checks=50) 41 | flann = cv2.FlannBasedMatcher(index_params, search_params) 42 | 43 | matches = flann.knnMatch(descriptors1, descriptors2, k=2) 44 | # discard bad matches, ratio test as per Lowe's paper 45 | good_matches = [m for m, n in matches 46 | if m.distance < 0.7 * n.distance] 47 | return good_matches 48 | 49 | 50 | def save_8bit(img, name): 51 | img_8bit = np.clip(img * 255, 0, 255).astype('uint8') 52 | cv2.imwrite(name, img_8bit) 53 | return img_8bit 54 | 55 | 56 | OPEN_CV_COLORS = 'bgr' 57 | 58 | 59 | def plot_crf(crf, colors=OPEN_CV_COLORS): 60 | for i, c in enumerate(colors): 61 | plt.plot(crf_debevec[:, 0, i], color=c) 62 | 63 | 64 | if __name__ == '__main__': 65 | parser = argparse.ArgumentParser() 66 | img_group = parser.add_mutually_exclusive_group(required=True) 67 | img_group.add_argument('--image-dir', type=Path) 68 | img_group.add_argument('--images', type=Path, nargs='+') 69 | parser.add_argument('--show-steps', action='store_true') 70 | parser.add_argument('--random-seed', type=int, default=43) 71 | parser.add_argument('--num-pixels', type=int, default=100) 72 | parser.add_argument('--align-images', action='store_true') 73 | parser.add_argument('--debug-color', choices=OPEN_CV_COLORS, default='g') 74 | args = parser.parse_args() 75 | 76 | if args.image_dir: 77 | args.images = sorted(args.image_dir.iterdir()) 78 | 79 | args.color_i = OPEN_CV_COLORS.find(args.debug_color) 80 | 81 | images = [load_image(p, bps=8) for p in args.images] 82 | times = [exposure_strength(p)[0] for p in args.images] 83 | times_array = np.array(times, dtype=np.float32) 84 | print('times', times_array) 85 | 86 | if args.show_steps: 87 | np.random.seed(args.random_seed) 88 | pixel_values = {} 89 | while len(pixel_values) < args.num_pixels: 90 | i = np.random.randint(0, high=images[0].shape[0] - 1) 91 | j = np.random.randint(0, high=images[0].shape[1] - 1) 92 | 93 | new_val = images[0][i, j, args.color_i] 94 | good_pixel = True 95 | for vv in pixel_values.values(): 96 | if np.abs(vv[0].astype(int) - new_val.astype(int)) < 100 // args.num_pixels: 97 | good_pixel = False 98 | break 99 | 100 | if good_pixel: 101 | pixel_values[(i, j)] = [img[i, j, args.color_i] for img in images] 102 | 103 | log_ts = [np.log2(t) for t in times] 104 | 105 | for [(i, j), vv], marker in zip(pixel_values.items(), MARKERS): 106 | plt.scatter(vv, log_ts, marker=marker, label=f'Pixel [{i}, {j}]') 107 | plt.xlabel('Output Pixel value (8-bit)') 108 | plt.ylabel('log exposure') 109 | plt.legend() 110 | plt.show() 111 | 112 | cal_debevec = cv2.createCalibrateDebevec(samples=200) 113 | print('Calibrated Debevec') 114 | crf_debevec = cal_debevec.process(images, times=times_array) 115 | 116 | merge_debevec = cv2.createMergeDebevec() 117 | hdr_debevec = merge_debevec.process(images, times=times_array.copy(), response=crf_debevec) 118 | 119 | print("merged") 120 | 121 | if args.show_steps: 122 | for [(i, j), vv], marker in zip(pixel_values.items(), MARKERS): 123 | e = hdr_debevec[i, j, args.color_i] 124 | plt.scatter(vv, np.array(log_ts) + np.log(e) + 1.6, 125 | marker=marker, 126 | label=f'Pixel [{i}, {j}]') 127 | plt.plot(np.log(crf_debevec[:, 0, args.color_i]), 128 | color=OPEN_CV_COLORS[args.color_i]) 129 | plt.tight_layout() 130 | plt.legend() 131 | plt.show() 132 | # Tonemap HDR image 133 | tonemap1 = cv2.createTonemap(gamma=2.2) 134 | res_debevec = tonemap1.process(hdr_debevec.copy()) 135 | x = save_8bit(res_debevec, 'res_debevec.jpg') 136 | plt.imshow(x) 137 | plt.show() 138 | 139 | if args.show_steps: 140 | merge_robertson = cv2.createMergeRobertson() 141 | hdr_robertson = merge_robertson.process(images, times=times_array.copy()) 142 | # Tonemap HDR image 143 | tonemap1 = cv2.createTonemap(gamma=2.2) 144 | res_robertson = tonemap1.process(hdr_robertson) 145 | save_8bit(res_robertson, 'res_robertson.jpg') 146 | 147 | # Exposure fusion using Mertens 148 | merge_mertens = cv2.createMergeMertens() 149 | res_mertens = merge_mertens.process(images) 150 | save_8bit(res_mertens, 'res_mertens.jpg') 151 | -------------------------------------------------------------------------------- /chapter5/panorama.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | import numpy as np 4 | from hdr import load_image 5 | import cv2 6 | 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser() 10 | img_group = parser.add_mutually_exclusive_group(required=True) 11 | img_group.add_argument('--image-dir', type=Path) 12 | img_group.add_argument('--images', type=Path, nargs='+') 13 | parser.add_argument('--show-steps', action='store_true') 14 | args = parser.parse_args() 15 | 16 | if args.image_dir: 17 | args.images = sorted(args.image_dir.iterdir()) 18 | return args 19 | 20 | 21 | def largest_connected_subset(images): 22 | finder = cv2.xfeatures2d_SURF.create() 23 | all_img_features = [cv2.detail.computeImageFeatures2(finder, img) 24 | for img in images] 25 | 26 | matcher = cv2.detail.BestOf2NearestMatcher_create(False, 0.6) 27 | pair_matches = matcher.apply2(all_img_features) 28 | matcher.collectGarbage() 29 | 30 | _conn_indices = cv2.detail.leaveBiggestComponent(all_img_features, pair_matches, 0.4) 31 | conn_indices = [i for [i] in _conn_indices] 32 | if len(conn_indices) < 2: 33 | raise RuntimeError("Need 2 or more connected images.") 34 | 35 | conn_features = np.array([all_img_features[i] for i in conn_indices]) 36 | conn_images = [images[i] for i in conn_indices] 37 | 38 | if len(conn_images) < len(images): 39 | pair_matches = matcher.apply2(conn_features) 40 | matcher.collectGarbage() 41 | 42 | return conn_images, conn_features, pair_matches 43 | 44 | 45 | def find_camera_parameters(features, pair_matches): 46 | estimator = cv2.detail_HomographyBasedEstimator() 47 | success, cameras = estimator.apply(features, pair_matches, None) 48 | if not success: 49 | raise RuntimeError("Homography estimation failed.") 50 | 51 | for cam in cameras: 52 | cam.R = cam.R.astype(np.float32) 53 | 54 | adjuster = cv2.detail_BundleAdjusterRay() 55 | adjuster.setConfThresh(0.8) 56 | 57 | refine_mask = np.array([[1, 1, 1], 58 | [0, 1, 1], 59 | [0, 0, 0]], dtype=np.uint8) 60 | adjuster.setRefinementMask(refine_mask) 61 | 62 | success, cameras = adjuster.apply(features, p, cameras) 63 | 64 | if not success: 65 | raise RuntimeError("Camera parameters adjusting failed.") 66 | 67 | print(cameras) 68 | return cameras 69 | 70 | 71 | if __name__ == '__main__': 72 | args = parse_args() 73 | all_images = [load_image(p, bps=8) for p in args.images] 74 | 75 | 76 | conn_images, features, p = largest_connected_subset(all_images) 77 | 78 | cameras = find_camera_parameters(features, p) 79 | 80 | focals = [cam.focal for cam in cameras] 81 | warped_image_scale = np.mean(focals) 82 | 83 | # corners, sizes, images_warped, masks_warped = [], [], [], [] 84 | 85 | # warper = cv2.PyRotationWarper('plane', warped_image_scale) 86 | # for i, img in enumerate(conn_images): 87 | # K = cameras[i].K().astype(np.float32) 88 | # corner, image_wp = warper.warp(img, K, cameras[i].R, 89 | # cv2.INTER_LINEAR, cv2.BORDER_REFLECT) 90 | 91 | # corners.append(corner) 92 | # sizes.append((image_wp.shape[1], image_wp.shape[0])) 93 | # images_warped.append(image_wp) 94 | # mask = cv2.UMat(255 * np.ones((img.shape[0], img.shape[1]), np.uint8)) 95 | # p, mask_wp = warper.warp(mask, K, cameras[i].R, 96 | # cv2.INTER_NEAREST, cv2.BORDER_CONSTANT) 97 | 98 | # # masks_warped.append(mask_wp.get()) 99 | 100 | # images_warped_f = [img.astype(np.float32) for im in images_warped] 101 | 102 | # compensator = cv2.detail.ExposureCompensator_createDefault( 103 | # cv2.detail.ExposureCompensator_NO) 104 | # compensator.feed(corners=corners, images=images_warped, masks=masks_warped) 105 | 106 | # seam_finder = cv2.detail.SeamFinder_createDefault(cv2.detail.SeamFinder_NO) 107 | # seam_finder.find(images_warped_f, corners, masks_warped) 108 | 109 | stitch_sizes, stitch_corners = [], [] 110 | 111 | warper = cv2.PyRotationWarper('plane', warped_image_scale) 112 | for i, img in enumerate(conn_images): 113 | sz = img.shape[1], img.shape[0] 114 | K = cameras[i].K().astype(np.float32) 115 | roi = warper.warpRoi(sz, K, cameras[i].R) 116 | stitch_corners.append(roi[0:2]) 117 | stitch_sizes.append(roi[2:4]) 118 | 119 | canvas_size = cv2.detail.resultRoi(corners=stitch_corners, 120 | sizes=stitch_sizes) 121 | 122 | blend_width = np.sqrt(canvas_size[2] * canvas_size[3]) * 5 / 100 123 | if blend_width < 1: 124 | blender = cv2.detail.Blender_createDefault(cv2.detail.Blender_NO) 125 | else: 126 | blender = cv2.detail_MultiBandBlender() 127 | blender.setNumBands((np.log(blend_width) / np.log(2.) - 1.).astype(np.int)) 128 | blender.prepare(canvas_size) 129 | 130 | for i, img in enumerate(conn_images): 131 | 132 | K = cameras[i].K().astype(np.float32) 133 | 134 | corner, image_wp = warper.warp(img, K, cameras[i].R, 135 | cv2.INTER_LINEAR, cv2.BORDER_REFLECT) 136 | 137 | mask = 255 * np.ones((img.shape[0], img.shape[1]), np.uint8) 138 | _, mask_wp = warper.warp(mask, K, cameras[i].R, 139 | cv2.INTER_NEAREST, cv2.BORDER_CONSTANT) 140 | 141 | # compensator.apply(i, stitch_corners[i], image_wp, mask_wp) 142 | image_warped_s = image_wp.astype(np.int16) 143 | # image_wp = [] 144 | 145 | # dilated_mask = cv2.dilate(masks_warped[i], None) 146 | # seam_mask = cv2.resize(dilated_mask, 147 | # (mask_wp.shape[1], mask_wp.shape[0]), 148 | # 0, 149 | # 0, 150 | # cv2.INTER_LINEAR_EXACT) 151 | # mask_warped = cv2.bitwise_and(seam_mask, mask_wp) 152 | # mask_warped = mask_wp 153 | 154 | blender.feed(cv2.UMat(image_warped_s), mask_wp, stitch_corners[i]) 155 | 156 | result, result_mask = blender.blend(None, None) 157 | cv2.imwrite('result.jpg', result) 158 | 159 | zoomx = 600.0 / result.shape[1] 160 | dst = cv2.normalize(src=result, dst=None, alpha=255., 161 | norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U) 162 | dst = cv2.resize(dst, dsize=None, fx=zoomx, fy=zoomx) 163 | cv2.imwrite('dst.png', dst) 164 | cv2.imwrite('dst.jpeg', dst) 165 | cv2.imshow('panorama', dst) 166 | cv2.waitKey() 167 | -------------------------------------------------------------------------------- /chapter6/chapter6.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | OpenCV with Python Blueprints 6 | Chapter 5: Tracking Visually Salient Objects 7 | 8 | An app to track multiple visually salient objects in a video sequence. 9 | """ 10 | 11 | import cv2 12 | from os import path 13 | 14 | from saliency import get_saliency_map, get_proto_objects_map 15 | from tracking import MultipleObjectsTracker 16 | 17 | import time 18 | 19 | def main(video_file='soccer.avi', roi=((140, 100), (500, 600))): 20 | if not path.isfile(video_file): 21 | print(f'File "{video_file}" does not exist.') 22 | raise SystemExit 23 | 24 | # open video file 25 | video = cv2.VideoCapture(video_file) 26 | 27 | # initialize tracker 28 | mot = MultipleObjectsTracker() 29 | 30 | for _, img in iter(video.read, (False, None)): 31 | if roi: 32 | # original video is too big: grab some meaningful ROI 33 | img = img[roi[0][0]:roi[1][0], roi[0][1]:roi[1][1]] 34 | 35 | # generate saliency map 36 | saliency = get_saliency_map(img, use_numpy_fft=False, 37 | gauss_kernel=(3, 3)) 38 | objects = get_proto_objects_map(saliency, use_otsu=False) 39 | cv2.imshow('original', img) 40 | cv2.imshow('saliency', saliency) 41 | cv2.imshow('objects', objects) 42 | cv2.imshow('tracker', mot.advance_frame(img, objects,saliency)) 43 | # time.sleep(1) 44 | if cv2.waitKey(100) & 0xFF == ord('q'): 45 | break 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /chapter6/saliency.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | A module to generate a saliency map from an RGB image 6 | 7 | This code is based on the approach described in: 8 | [1] X. Hou and L. Zhang (2007). Saliency Detection: A Spectral Residual 9 | Approach. IEEE Transactions on Computer Vision and Pattern Recognition 10 | (CVPR), p.1-8. doi: 10.1109/CVPR.2007.383267 11 | """ 12 | 13 | import cv2 14 | import numpy as np 15 | from matplotlib import pyplot as plt 16 | from typing import Tuple 17 | 18 | 19 | def _calc_channel_sal_magn(channel: np.ndarray, 20 | use_numpy_fft: bool = True) -> np.ndarray: 21 | """ 22 | Calculate the log-magnitude of the Fourier spectrum 23 | of a single-channel image. This image could be a regular grayscale 24 | image, or a single color channel of an RGB image. 25 | 26 | :param channel: single-channel input image 27 | :returns: log-magnitude of Fourier spectrum 28 | """ 29 | # do FFT and get log-spectrum 30 | if use_numpy_fft: 31 | img_dft = np.fft.fft2(channel) 32 | magnitude, angle = cv2.cartToPolar(np.real(img_dft), 33 | np.imag(img_dft)) 34 | else: 35 | img_dft = cv2.dft(np.float32(channel), 36 | flags=cv2.DFT_COMPLEX_OUTPUT) 37 | magnitude, angle = cv2.cartToPolar(img_dft[:, :, 0], 38 | img_dft[:, :, 1]) 39 | 40 | # get log amplitude 41 | log_ampl = np.log10(magnitude.clip(min=1e-9)) 42 | 43 | # blur log amplitude with avg filter 44 | log_ampl_blur = cv2.blur(log_ampl, (3, 3)) 45 | 46 | # residual 47 | residual = np.exp(log_ampl - log_ampl_blur) 48 | 49 | # back to cartesian frequency domain 50 | if use_numpy_fft: 51 | real_part, imag_part = cv2.polarToCart(residual, angle) 52 | img_combined = np.fft.ifft2(real_part + 1j * imag_part) 53 | magnitude, _ = cv2.cartToPolar(np.real(img_combined), 54 | np.imag(img_combined)) 55 | else: 56 | img_dft[:, :, 0], img_dft[:, :, 1] = cv2.polarToCart(residual, 57 | angle) 58 | img_combined = cv2.idft(img_dft) 59 | magnitude, _ = cv2.cartToPolar(img_combined[:, :, 0], 60 | img_combined[:, :, 1]) 61 | 62 | return magnitude 63 | 64 | 65 | def get_saliency_map(frame: np.ndarray, 66 | small_shape: Tuple[int] = (64, 64), 67 | gauss_kernel: Tuple[int] = (5, 5), 68 | use_numpy_fft: bool = True) -> np.ndarray: 69 | """ 70 | Returns a saliency map 71 | 72 | This function generates a saliency map for the image that was 73 | passed to the class constructor. 74 | 75 | :returns: grayscale saliency map 76 | """ 77 | frame_small = cv2.resize(frame, small_shape) 78 | if len(frame.shape) == 2: 79 | # single channelsmall_shape[1::-1] 80 | sal = _calc_channel_sal_magn(frame, use_numpy_fft) 81 | else: 82 | # multiple channels: consider each channel independently 83 | sal = np.zeros_like(frame_small).astype(np.float32) 84 | for c in range(frame_small.shape[2]): 85 | small = frame_small[:, :, c] 86 | sal[:, :, c] = _calc_channel_sal_magn(small, use_numpy_fft) 87 | 88 | # overall saliency: channel mean 89 | sal = np.mean(sal, axis=2) 90 | 91 | # postprocess: blur, normalize, and square 92 | if gauss_kernel is not None: 93 | sal = cv2.GaussianBlur(sal, gauss_kernel, sigmaX=8, sigmaY=0) 94 | 95 | sal /= np.max(sal) 96 | return cv2.resize(sal ** 2, frame.shape[1::-1]) 97 | 98 | 99 | def get_proto_objects_map(saliency: np.ndarray, use_otsu=True) -> np.ndarray: 100 | """ 101 | Generate the proto-objects map of an RGB image 102 | 103 | Proto-objects are saliency hot spots, generated by thresholding 104 | the saliency map. 105 | 106 | :param use_otsu: flag whether to use Otsu thresholding (True) or 107 | a hardcoded threshold value (False) 108 | :saliency grayscale saliency map 109 | :returns: proto-objects map 110 | """ 111 | saliency = np.uint8(saliency * 255) 112 | if use_otsu: 113 | thresh_type = cv2.THRESH_OTSU 114 | # For threshold value, simply pass zero. 115 | thresh_value = 0 116 | else: 117 | thresh_type = cv2.THRESH_BINARY 118 | thresh_value = np.mean(saliency) * 3 119 | 120 | _, img_objects = cv2.threshold(saliency, 121 | thresh_value, 255, thresh_type) 122 | return img_objects 123 | 124 | 125 | def plot_power_spectrum(frame: np.ndarray, use_numpy_fft=True) -> None: 126 | """Plot the power spectrum of image""" 127 | 128 | # convert the frame to grayscale if necessary 129 | if len(frame.shape) > 2: 130 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 131 | 132 | # expand the image to an optimal size for FFT 133 | rows, cols = frame.shape 134 | nrows = cv2.getOptimalDFTSize(rows) 135 | ncols = cv2.getOptimalDFTSize(cols) 136 | frame = cv2.copyMakeBorder(frame, 0, ncols - cols, 0, nrows - rows, 137 | cv2.BORDER_CONSTANT, value=0) 138 | 139 | # do FFT and get log-spectrum 140 | if use_numpy_fft: 141 | img_dft = np.fft.fft2(frame) 142 | spectrum = np.log10(np.real(np.abs(img_dft))**2) 143 | else: 144 | img_dft = cv2.dft(np.float32(frame), flags=cv2.DFT_COMPLEX_OUTPUT) 145 | spectrum = np.log10(img_dft[:, :, 0]**2 + img_dft[:, :, 1]**2) 146 | 147 | # radial average 148 | L = max(frame.shape) 149 | freqs = np.fft.fftfreq(L)[:L // 2] 150 | dists = np.sqrt(np.fft.fftfreq(frame.shape[0])[:, np.newaxis]**2 + 151 | np.fft.fftfreq(frame.shape[1])**2) 152 | dcount = np.histogram(dists.ravel(), bins=freqs)[0] 153 | histo, bins = np.histogram(dists.ravel(), bins=freqs, 154 | weights=spectrum.ravel()) 155 | 156 | centers = (bins[:-1] + bins[1:]) / 2 157 | plt.plot(centers, histo / dcount) 158 | plt.xlabel('frequency') 159 | plt.ylabel('log-spectrum') 160 | plt.show() 161 | 162 | 163 | def calc_magnitude_spectrum(img: np.ndarray): 164 | """Plot the magnitude spectrum 165 | This method calculates the magnitude spectrum of the image passed 166 | to the class constructor. 167 | :returns: magnitude spectrum 168 | """ 169 | # convert the frame to grayscale if necessary 170 | if len(img.shape) > 2: 171 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 172 | 173 | # expand the image to an optimal size for FFT 174 | rows, cols = img.shape 175 | nrows = cv2.getOptimalDFTSize(rows) 176 | ncols = cv2.getOptimalDFTSize(cols) 177 | frame = cv2.copyMakeBorder(img, 0, ncols - cols, 0, nrows - rows, 178 | cv2.BORDER_CONSTANT, value=0) 179 | 180 | # do FFT and get log-spectrum 181 | img_dft = np.fft.fft2(img) 182 | spectrum = np.log10(np.abs(np.fft.fftshift(img_dft))) 183 | 184 | # return normalized 185 | return spectrum / np.max(spectrum) 186 | 187 | 188 | if __name__ == '__main__': 189 | video = cv2.VideoCapture('soccer.avi') 190 | _, im = video.read() 191 | plt.imshow(im) 192 | plot_power_spectrum(im) 193 | plt.imshow(calc_magnitude_spectrum(cv2.imread("test.jpeg"))) 194 | -------------------------------------------------------------------------------- /chapter6/soccer.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter6/soccer.avi -------------------------------------------------------------------------------- /chapter6/tracking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """A module that contains an algorithm for multiple-objects tracking""" 5 | 6 | import cv2 7 | import numpy as np 8 | import copy 9 | import itertools 10 | 11 | 12 | class MultipleObjectsTracker: 13 | """ 14 | Multiple-objects tracker 15 | 16 | This class implements an algorithm for tracking multiple objects in 17 | a video sequence. 18 | The algorithm combines a saliency map for object detection and 19 | mean-shift tracking for object tracking. 20 | """ 21 | 22 | def __init__(self, min_object_area: int = 400, 23 | min_speed_per_pix: float = 0.02): 24 | """ 25 | Constructor 26 | 27 | This method initializes the multiple-objects tracking algorithm. 28 | 29 | :param min_area: Minimum area for a proto-object contour to be 30 | considered a real object 31 | """ 32 | self.object_boxes = [] 33 | self.min_object_area = min_object_area 34 | self.min_speed_per_pix = min_speed_per_pix 35 | self.num_frame_tracked = 0 36 | # Setup the termination criteria, either 100 iteration or move by at 37 | # least 1 pt 38 | self.term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 39 | 5, 1) 40 | 41 | def advance_frame(self, 42 | frame: np.ndarray, 43 | proto_objects_map: np.ndarray, 44 | saliency: np.ndarray) -> np.ndarray: 45 | """ 46 | Advance the algorithm by a single frame 47 | 48 | certain targets are discarded: 49 | - targets that are too small 50 | - targets that don't move 51 | 52 | :param frame: New input RGB frame 53 | :param proto_objects_map: corresponding proto-objects map of the 54 | frame 55 | :param saliency: TODO: EXPLAIN 56 | :returns: frame annotated with bounding boxes around all objects 57 | that are being tracked 58 | """ 59 | print(f"Objects are tracked for {self.num_frame_tracked} frame") 60 | 61 | # Build a list all bounding boxes found from the 62 | # current proto-objects map 63 | object_contours, _ = cv2.findContours(proto_objects_map, 1, 2) 64 | object_boxes = [cv2.boundingRect(contour) 65 | for contour in object_contours 66 | if cv2.contourArea(contour) > self.min_object_area] 67 | 68 | if len(self.object_boxes) >= len(object_boxes): 69 | # Continue tracking with meanshift if number of salient objects 70 | # didn't increase 71 | object_boxes = [cv2.meanShift(saliency, box, self.term_crit)[1] 72 | for box in self.object_boxes] 73 | self.num_frame_tracked += 1 74 | else: 75 | # Otherwise restart tracking 76 | self.num_frame_tracked = 0 77 | self.object_initial_centers = [ 78 | (x + w / 2, y + h / 2) for (x, y, w, h) in object_boxes] 79 | 80 | # Remember current objects 81 | self.object_boxes = object_boxes 82 | 83 | return self.draw_good_boxes(copy.deepcopy(frame)) 84 | 85 | def draw_good_boxes(self, frame: np.ndarray) -> np.ndarray: 86 | # Find total displacement length for each object 87 | # and normalize by object size 88 | displacements = [((x + w / 2 - cx)**2 + (y + w / 2 - cy)**2)**0.5 / w 89 | for (x, y, w, h), (cx, cy) 90 | in zip(self.object_boxes, self.object_initial_centers)] 91 | # Draw objects that move and their numbers 92 | for (x, y, w, h), displacement, i in zip( 93 | self.object_boxes, displacements, itertools.count()): 94 | # Draw only those which have some avarage speed 95 | if displacement / (self.num_frame_tracked + 0.01) > self.min_speed_per_pix: 96 | cv2.rectangle(frame, (x, y), (x + w, y + h), 97 | (0, 255, 0), 2) 98 | cv2.putText(frame, str(i), (x, y), 99 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) 100 | return frame 101 | -------------------------------------------------------------------------------- /chapter6/tracking_api.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | # Define Constants 8 | FONT = cv2.FONT_HERSHEY_SIMPLEX 9 | GREEN = (20, 200, 20) 10 | RED = (20, 20, 255) 11 | 12 | # Define trackers 13 | trackers = { 14 | 'BOOSTING': cv2.TrackerBoosting_create, 15 | 'MIL': cv2.TrackerMIL_create, 16 | 'KCF': cv2.TrackerKCF_create, 17 | 'TLD': cv2.TrackerTLD_create, 18 | 'MEDIANFLOW': cv2.TrackerMedianFlow_create, 19 | 'GOTURN': cv2.TrackerGOTURN_create, 20 | 'MOSSE': cv2.TrackerMOSSE_create, 21 | 'CSRT': cv2.TrackerCSRT_create 22 | 23 | } 24 | 25 | # Parse arguments 26 | parser = argparse.ArgumentParser(description='Tracking API demo.') 27 | parser.add_argument( 28 | '--tracker', 29 | default="KCF", 30 | help=f"One of {trackers.keys()}") 31 | parser.add_argument( 32 | '--video', 33 | help="Video file to use", 34 | default="videos/test.mp4") 35 | args = parser.parse_args() 36 | 37 | 38 | tracker_name = args.tracker.upper() 39 | assert tracker_name in trackers, f"Tracker should be one of {trackers.keys()}" 40 | # Open the video and read the first frame 41 | video = cv2.VideoCapture(args.video) 42 | assert video.isOpened(), "Could not open video" 43 | ok, frame = video.read() 44 | assert ok, "Video file is not readable" 45 | 46 | # Select bounding box 47 | bbox = cv2.selectROI(frame, False) 48 | 49 | # Initialize the tracker 50 | tracker = trackers[tracker_name]() 51 | tracker.init(frame, bbox) 52 | 53 | for ok, frame in iter(video.read, (False, None)): 54 | # Time in seconds 55 | start_time = time.time() 56 | 57 | # Update tracker 58 | ok, bbox = tracker.update(frame) 59 | 60 | # Calculate FPS 61 | fps = 1 / (time.time() - start_time) 62 | 63 | # Display tracking info and show frame 64 | if ok: 65 | # Draw bounding box 66 | x, y, w, h = np.array(bbox, dtype=np.int) 67 | cv2.rectangle(frame, (x, y), (x + w, y + w), GREEN, 2, 1) 68 | else: 69 | # Tracking failure 70 | cv2.putText(frame, "Tracking failed", (100, 80), FONT, 0.7, RED, 2) 71 | cv2.putText(frame, f"{tracker_name} Tracker", 72 | (100, 20), FONT, 0.7, GREEN, 2) 73 | cv2.putText(frame, f"FPS : {fps:.0f}", (100, 50), FONT, 0.7, GREEN, 2) 74 | cv2.imshow("Tracking", frame) 75 | 76 | # Exit if ESC pressed 77 | if cv2.waitKey(1) & 0xff == 27: 78 | break 79 | -------------------------------------------------------------------------------- /chapter7/chapter7.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | OpenCV with Python Blueprints 6 | Chapter 7: Learning to Recognize Traffic Signs 7 | 8 | Traffic sign recognition using support vector machines (SVMs). 9 | """ 10 | 11 | import cv2 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | 15 | from data.gtsrb import load_training_data 16 | from data.gtsrb import load_test_data 17 | from data.process import surf_featurize, hog_featurize 18 | from data.process import hsv_featurize, grayscale_featurize 19 | 20 | 21 | def train_MLP(X_train, y_train): 22 | mlp = cv2.ml.ANN_MLP_create() 23 | mlp.setLayerSizes(np.array([784, 512, 512, 10])) 24 | mlp.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM, 2.5, 1.0) 25 | mlp.setTrainingMethod(cv2.ml.ANN_MLP.BACKPROP) 26 | mlp.train(X_train, cv2.ml.ROW_SAMPLE, y_train) 27 | return mlp 28 | 29 | 30 | def train_one_vs_all_SVM(X_train, y_train): 31 | single_svm = cv2.ml.SVM_create() 32 | single_svm.setKernel(cv2.ml.SVM_LINEAR) 33 | single_svm.setType(cv2.ml.SVM_C_SVC) 34 | single_svm.setC(2.67) 35 | single_svm.setGamma(5.383) 36 | single_svm.train(X_train, cv2.ml.ROW_SAMPLE, y_train) 37 | return single_svm 38 | 39 | 40 | def accuracy(y_predicted, y_true): 41 | return sum(y_predicted == y_true) / len(y_true) 42 | 43 | 44 | def precision(y_predicted, y_true, positive_label): 45 | cm = confusion_matrix(y_predicted, y_true) 46 | true_positives = cm[positive_label, positive_label] 47 | total_positives = sum(cm[positive_label]) 48 | return true_positives / total_positives 49 | 50 | 51 | def recall(y_predicted, y_true, positive_label): 52 | cm = confusion_matrix(y_predicted, y_true) 53 | true_positives = cm[positive_label, positive_label] 54 | class_members = sum(cm[:, positive_label]) 55 | return true_positives / class_members 56 | 57 | 58 | def confusion_matrix(y_predicted, y_true): 59 | num_classes = max(max(y_predicted), max(y_true)) + 1 60 | conf_matrix = np.zeros((num_classes, num_classes)) 61 | for r, c in zip(y_predicted, y_true): 62 | conf_matrix[r, c] += 1 63 | return conf_matrix 64 | 65 | 66 | def train_sklearn_random_forest(X_train, y_train): 67 | pass 68 | 69 | 70 | def main(labels=[0, 10, 20, 30, 40]): 71 | train_data, train_labels = load_training_data(labels) 72 | test_data, test_labels = load_test_data(labels) 73 | 74 | y_train = np.array(train_labels) 75 | y_test = np.array(test_labels) 76 | 77 | accuracies = {} 78 | 79 | for featurize in [hog_featurize, grayscale_featurize, 80 | hsv_featurize, surf_featurize]: 81 | x_train = featurize(train_data) 82 | print(x_train.shape) 83 | model = train_one_vs_all_SVM(x_train, y_train) 84 | 85 | x_test = featurize(test_data) 86 | res = model.predict(x_test) 87 | y_predict = res[1].flatten() 88 | np.save(f'y_predict_{featurize.__name__}', y_predict) 89 | np.save('y_true', y_test) 90 | accuracies[featurize.__name__] = accuracy(y_predict, y_test) 91 | 92 | print(accuracies) 93 | 94 | plt.bar(accuracies.keys(), accuracies.values()) 95 | plt.axes().xaxis.set_tick_params(rotation=20) 96 | plt.ylim([0, 1]) 97 | plt.grid() 98 | plt.title('Test accuracy for different featurize functions') 99 | plt.show() 100 | 101 | 102 | if __name__ == '__main__': 103 | main() 104 | -------------------------------------------------------------------------------- /chapter7/data/.gitignore: -------------------------------------------------------------------------------- 1 | *.zip 2 | -------------------------------------------------------------------------------- /chapter7/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter7/data/__init__.py -------------------------------------------------------------------------------- /chapter7/data/gtsrb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | A module to load the German Traffic Sign Recognition Benchmark (GTSRB) 6 | 7 | The dataset contains more than 50,000 images of traffic signs belonging 8 | to more than 40 classes. The dataset can be freely obtained from: 9 | http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset. 10 | """ 11 | 12 | from pathlib import Path 13 | import requests 14 | from io import TextIOWrapper 15 | import hashlib 16 | import cv2 17 | import numpy as np 18 | from zipfile import ZipFile 19 | 20 | import csv 21 | from matplotlib import cm 22 | from matplotlib import pyplot as plt 23 | 24 | 25 | ARCHIVE_PATH = 'https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/' # noqa 26 | 27 | 28 | def _download(filename, *, md5sum=None): 29 | ''' 30 | GTSRB_Final_Training_Images.zip 31 | 32 | ''' 33 | write_path = Path(__file__).parent / filename 34 | if write_path.exists() and _md5sum_matches(write_path, md5sum): 35 | return write_path 36 | response = requests.get(f'{ARCHIVE_PATH}/{filename}') 37 | response.raise_for_status() 38 | with open(write_path, 'wb') as outfile: 39 | outfile.write(response.content) 40 | return write_path 41 | 42 | 43 | def _md5sum_matches(file_path, checksum): 44 | if checksum is None: 45 | return True 46 | hash_md5 = hashlib.md5() 47 | with open(file_path, "rb") as f: 48 | for chunk in iter(lambda: f.read(4096), b""): 49 | hash_md5.update(chunk) 50 | return checksum == hash_md5.hexdigest() 51 | 52 | 53 | def _load_data(filepath, labels): 54 | data, targets = [], [] 55 | 56 | with ZipFile(filepath) as data_zip: 57 | for path in data_zip.namelist(): 58 | if not path.endswith('.csv'): 59 | continue 60 | # Only iterate over annotations files 61 | *dir_path, csv_filename = path.split('/') 62 | label_str = dir_path[-1] 63 | if labels is not None and int(label_str) not in labels: 64 | continue 65 | with data_zip.open(path, 'r') as csvfile: 66 | reader = csv.DictReader(TextIOWrapper(csvfile), delimiter=';') 67 | for img_info in reader: 68 | img_path = '/'.join([*dir_path, img_info['Filename']]) 69 | raw_data = data_zip.read(img_path) 70 | img = cv2.imdecode(np.frombuffer(raw_data, np.uint8), 1) 71 | 72 | x1, y1 = np.int(img_info['Roi.X1']), np.int(img_info['Roi.Y1']) 73 | x2, y2 = np.int(img_info['Roi.X2']), np.int(img_info['Roi.Y2']) 74 | 75 | data.append(img[y1: y2, x1: x2]) 76 | targets.append(np.int(img_info['ClassId'])) 77 | return data, targets 78 | 79 | 80 | def load_test_data(labels=[0, 10]): 81 | filepath = _download('GTSRB_Online-Test-Images-Sorted.zip', 82 | md5sum='b7bba7dad2a4dc4bc54d6ba2716d163b') 83 | return _load_data(filepath, labels) 84 | 85 | 86 | def load_training_data(labels=[0, 10]): 87 | filepath = _download('GTSRB-Training_fixed.zip', 88 | md5sum='513f3c79a4c5141765e10e952eaa2478') 89 | return _load_data(filepath, labels) 90 | 91 | 92 | if __name__ == '__main__': 93 | train_data, train_labels = load_training_data(labels=None) 94 | np.random.seed(75) 95 | for _ in range(100): 96 | indices = np.arange(len(train_data)) 97 | np.random.shuffle(indices) 98 | for r in range(3): 99 | for c in range(5): 100 | i = 5 * r + c 101 | ax = plt.subplot(3, 5, 1 + i) 102 | sample = train_data[indices[i]] 103 | ax.imshow(cv2.resize(sample, (32, 32)), cmap=cm.Greys_r) 104 | ax.axis('off') 105 | plt.tight_layout() 106 | plt.show() 107 | np.random.seed(np.random.randint(len(indices))) 108 | -------------------------------------------------------------------------------- /chapter7/data/process.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import itertools 4 | 5 | 6 | def hog_featurize(data, *, scale_size=(32, 32)): 7 | """ 8 | Featurize using histogram of gradients. 9 | 10 | For each image: 11 | 1. resize all images to have the same (usually smaller size) 12 | 2. Calculate HOG values using same HOGDescriptor for all images, 13 | so we get same number of gradients for each image 14 | 3. Return a flattened list of gradients as a final feature. 15 | 16 | """ 17 | block_size = (scale_size[0] // 2, scale_size[1] // 2) 18 | block_stride = (scale_size[0] // 4, scale_size[1] // 4) 19 | cell_size = block_stride 20 | hog = cv2.HOGDescriptor(scale_size, block_size, block_stride, 21 | cell_size, 9) 22 | 23 | resized_images = (cv2.resize(x, scale_size) for x in data) 24 | 25 | return np.array([hog.compute(x).flatten() for x in resized_images]) 26 | 27 | 28 | def surf_featurize(data, *, scale_size=(16, 16), num_surf_features=100): 29 | all_kp = [cv2.KeyPoint(float(x), float(y), 1) 30 | for x, y in itertools.product(range(scale_size[0]), 31 | range(scale_size[1]))] 32 | surf = cv2.xfeatures2d_SURF.create(hessianThreshold=400) 33 | kp_des = (surf.compute(x, all_kp) for x in data) 34 | return np.array([d.flatten()[:num_surf_features] 35 | for _, d in kp_des]).astype(np.float32) 36 | 37 | 38 | def hsv_featurize(data, *, scale_size=(16, 16)): 39 | """ 40 | Featurize by calculating HSV values of the data 41 | 42 | For each image: 43 | 1. resize all images to have the same (usually smaller size) 44 | 2. Convert the image to HSV (values in 0 - 255 range) 45 | 3. Convert each image to have pixel value in (0, 1) and flatten 46 | 4. Subtract average pixel value of the flattened vector. 47 | """ 48 | resized_images = (cv2.resize(x, scale_size) for x in data) 49 | hsv_data = (cv2.cvtColor(x, cv2.COLOR_BGR2HSV) for x in resized_images) 50 | scaled_data = (np.array(x).astype(np.float32).flatten() / 255 51 | for x in hsv_data) 52 | return np.vstack([x - x.mean() for x in scaled_data]) 53 | 54 | 55 | def grayscale_featurize(data, *, scale_size=(16, 16)): 56 | """ 57 | Featurize by calculating grayscale values of the data 58 | 59 | For each image: 60 | 1. resize all images to have the same (usually smaller size) 61 | 2. Convert the image to grayscale (values in 0 - 255 range) 62 | 3. Convert each image to have pixel value in (0, 1) and flatten 63 | 4. Subtract average pixel value of the flattened vector. 64 | """ 65 | resized_images = (cv2.resize(x, scale_size) for x in data) 66 | gray_data = (cv2.cvtColor(x, cv2.COLOR_BGR2GRAY) for x in resized_images) 67 | scaled_data = (np.array(x).astype(np.float32).flatten() / 255 68 | for x in gray_data) 69 | return np.vstack([x - x.mean() for x in scaled_data]) 70 | 71 | 72 | if __name__ == '__main__': 73 | from data.gtsrb import load_training_data 74 | import matplotlib.pyplot as plt 75 | 76 | train_data, train_labels = load_training_data(labels=[13]) 77 | 78 | i = 80 79 | 80 | [f] = hog_featurize([train_data[i]]) 81 | print(len(f)) 82 | 83 | plt.imshow(train_data[i]) 84 | plt.show() 85 | -------------------------------------------------------------------------------- /chapter7/train_tf2.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import cv2 4 | # https://answers.opencv.org/question/175699/readnetfromtensorflow-fails-on-retrained-nn/ 5 | 6 | # https://jeanvitor.com/tensorflow-object-detecion-opencv/ 7 | 8 | # https://heartbeat.fritz.ai/real-time-object-detection-on-raspberry-pi-using-opencv-dnn-98827255fa60 9 | 10 | from data.gtsrb import load_training_data 11 | from data.gtsrb import load_test_data 12 | 13 | UNIFORM_SIZE = (32, 32) 14 | 15 | 16 | def normalize(x): 17 | """ 18 | Do minimum pre-processing 19 | 1. resize to UNIFORM_SIZE 20 | 2. scale to (0, 1) range 21 | 3. subtract the mean of all pixel values 22 | """ 23 | one_size = cv2.resize(x, UNIFORM_SIZE).astype(np.float32) / 255 24 | return one_size - one_size.mean() 25 | 26 | 27 | def train_tf_model(X_train, y_train): 28 | model = tf.keras.models.Sequential([ 29 | tf.keras.layers.Conv2D(20, (8, 8), 30 | input_shape=list(UNIFORM_SIZE) + [3], 31 | activation='relu'), 32 | tf.keras.layers.MaxPooling2D(pool_size=(4, 4), strides=4), 33 | tf.keras.layers.Dropout(0.15), 34 | tf.keras.layers.Flatten(), 35 | tf.keras.layers.Dense(64, activation='relu'), 36 | tf.keras.layers.Dropout(0.15), 37 | tf.keras.layers.Dense(43, activation='softmax') 38 | ]) 39 | 40 | model.compile(optimizer='adam', 41 | loss='sparse_categorical_crossentropy', 42 | metrics=['accuracy']) 43 | model.fit(x_train, np.array(train_labels), epochs=2) 44 | return model 45 | 46 | 47 | if __name__ == '__main__': 48 | train_data, train_labels = load_training_data(labels=None) 49 | test_data, test_labels = load_test_data(labels=None) 50 | 51 | x_train = np.array([normalize(x) for x in train_data]) 52 | model = train_tf_model(x_train, train_labels) 53 | x_test = np.array([normalize(x) for x in test_data]) 54 | 55 | y_hat = model.predict_classes(x_test) 56 | 57 | acc = sum(y_hat == np.array(test_labels)) / len(test_labels) 58 | print(f'Accuracy = {acc:.3f}') 59 | -------------------------------------------------------------------------------- /chapter8/.gitignore: -------------------------------------------------------------------------------- 1 | media/ 2 | -------------------------------------------------------------------------------- /chapter8/chapter8.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | """ 6 | OpenCV with Python Blueprints 7 | Chapter 8: Learning to Recognize Emotion in Faces 8 | 9 | An app that combines both face detection and face recognition, with a 10 | focus on recognizing emotional expressions in the detected faces. 11 | 12 | The process flow is as follows: 13 | * Run the GUI in Training Mode to assemble a training set. Upon exiting 14 | the app will dump all assembled training samples to a pickle file 15 | "datasets/faces_training.pkl". 16 | * Run the script train_test_mlp.py to train a MLP classifier on the 17 | dataset. This file will store the parameters of the trained MLP in 18 | a file "params/mlp.xml" and dump the preprocessed dataset to a 19 | pickle file "datasets/faces_preprocessed.pkl". 20 | * Run the GUI in Testing Mode to apply the pre-trained MLP classifier 21 | to the live stream of the webcam. 22 | """ 23 | 24 | import argparse 25 | import cv2 26 | import numpy as np 27 | 28 | import wx 29 | from pathlib import Path 30 | 31 | from data.store import save_datum, pickle_load 32 | from data.process import _pca_featurize 33 | from detectors import FaceDetector 34 | from wx_gui import BaseLayout 35 | 36 | 37 | class FacialExpressionRecognizerLayout(BaseLayout): 38 | def __init__(self, *args, 39 | clf_path=None, 40 | **kwargs): 41 | super().__init__(*args, **kwargs) 42 | self.clf = cv2.ml.ANN_MLP_load(str(clf_path / 'mlp.xml')) 43 | 44 | self.index_to_label = pickle_load(clf_path / 'index_to_label') 45 | self.pca_args = pickle_load(clf_path / 'pca_args') 46 | 47 | self.face_detector = FaceDetector( 48 | face_cascade='params/haarcascade_frontalface_default.xml', 49 | eye_cascade='params/haarcascade_lefteye_2splits.xml') 50 | 51 | def featurize_head(self, head): 52 | return _pca_featurize(head[None], *self.pca_args) 53 | 54 | def augment_layout(self): 55 | pass 56 | 57 | def process_frame(self, frame_rgb: np.ndarray) -> np.ndarray: 58 | success, frame, self.head, (x, y) = self.face_detector.detect_face( 59 | frame_rgb) 60 | if not success: 61 | return frame 62 | 63 | success, head = self.face_detector.align_head(self.head) 64 | if not success: 65 | return frame 66 | 67 | # We have to pass [1 x n] array predict. 68 | _, output = self.clf.predict(self.featurize_head(head)) 69 | label = self.index_to_label[np.argmax(output)] 70 | 71 | # Draw predicted label above the bounding box. 72 | cv2.putText(frame, label, (x, y - 20), 73 | cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2) 74 | 75 | return frame 76 | 77 | 78 | class DataCollectorLayout(BaseLayout): 79 | 80 | def __init__(self, *args, 81 | training_data='data/cropped_faces.csv', 82 | **kwargs): 83 | super().__init__(*args, **kwargs) 84 | self.face_detector = FaceDetector( 85 | face_cascade='params/haarcascade_frontalface_default.xml', 86 | eye_cascade='params/haarcascade_lefteye_2splits.xml') 87 | 88 | self.training_data = training_data 89 | 90 | def augment_layout(self): 91 | """Initializes GUI""" 92 | # initialize data structure 93 | self.samples = [] 94 | self.labels = [] 95 | 96 | # create a horizontal layout with all buttons 97 | pnl2 = wx.Panel(self, -1) 98 | self.neutral = wx.RadioButton(pnl2, -1, 'neutral', (10, 10), 99 | style=wx.RB_GROUP) 100 | self.happy = wx.RadioButton(pnl2, -1, 'happy') 101 | self.sad = wx.RadioButton(pnl2, -1, 'sad') 102 | self.surprised = wx.RadioButton(pnl2, -1, 'surprised') 103 | self.angry = wx.RadioButton(pnl2, -1, 'angry') 104 | self.disgusted = wx.RadioButton(pnl2, -1, 'disgusted') 105 | hbox2 = wx.BoxSizer(wx.HORIZONTAL) 106 | hbox2.Add(self.neutral, 1) 107 | hbox2.Add(self.happy, 1) 108 | hbox2.Add(self.sad, 1) 109 | hbox2.Add(self.surprised, 1) 110 | hbox2.Add(self.angry, 1) 111 | hbox2.Add(self.disgusted, 1) 112 | pnl2.SetSizer(hbox2) 113 | 114 | # create horizontal layout with single snapshot button 115 | pnl3 = wx.Panel(self, -1) 116 | self.snapshot = wx.Button(pnl3, -1, 'Take Snapshot') 117 | self.Bind(wx.EVT_BUTTON, self._on_snapshot, self.snapshot) 118 | hbox3 = wx.BoxSizer(wx.HORIZONTAL) 119 | hbox3.Add(self.snapshot, 1) 120 | pnl3.SetSizer(hbox3) 121 | 122 | # arrange all horizontal layouts vertically 123 | self.panels_vertical.Add(pnl2, flag=wx.EXPAND | wx.BOTTOM, border=1) 124 | self.panels_vertical.Add(pnl3, flag=wx.EXPAND | wx.BOTTOM, border=1) 125 | 126 | def process_frame(self, frame_rgb: np.ndarray) -> np.ndarray: 127 | """ 128 | Add a bounding box around the face if a face is detected. 129 | """ 130 | _, frame, self.head, _ = self.face_detector.detect_face(frame_rgb) 131 | return frame 132 | 133 | def _on_snapshot(self, evt): 134 | """Takes a snapshot of the current frame 135 | 136 | This method takes a snapshot of the current frame, preprocesses 137 | it to extract the head region, and upon success adds the data 138 | sample to the training set. 139 | """ 140 | if self.neutral.GetValue(): 141 | label = 'neutral' 142 | elif self.happy.GetValue(): 143 | label = 'happy' 144 | elif self.sad.GetValue(): 145 | label = 'sad' 146 | elif self.surprised.GetValue(): 147 | label = 'surprised' 148 | elif self.angry.GetValue(): 149 | label = 'angry' 150 | elif self.disgusted.GetValue(): 151 | label = 'disgusted' 152 | 153 | if self.head is None: 154 | print("No face detected") 155 | else: 156 | success, aligned_head = self.face_detector.align_head(self.head) 157 | if success: 158 | save_datum(self.training_data, label, aligned_head) 159 | print(f"Saved {label} training datum.") 160 | else: 161 | print("Could not align head (eye detection failed?)") 162 | 163 | 164 | def run_layout(layout_cls, **kwargs): 165 | # open webcam 166 | capture = cv2.VideoCapture(0) 167 | # opening the channel ourselves, if it failed to open. 168 | if not(capture.isOpened()): 169 | capture.open() 170 | 171 | capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640) 172 | capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) 173 | 174 | # start graphical user interface 175 | app = wx.App() 176 | layout = layout_cls(capture, **kwargs) 177 | layout.Center() 178 | layout.Show() 179 | app.MainLoop() 180 | 181 | 182 | if __name__ == '__main__': 183 | parser = argparse.ArgumentParser() 184 | parser.add_argument('mode', choices=['collect', 'demo']) 185 | parser.add_argument('--classifier', type=Path) 186 | args = parser.parse_args() 187 | 188 | if args.mode == 'collect': 189 | run_layout(DataCollectorLayout, title='Collect Data') 190 | elif args.mode == 'demo': 191 | assert args.classifier is not None, 'you have to provide --classifier' 192 | run_layout(FacialExpressionRecognizerLayout, 193 | title='Facial Expression Recognizer', 194 | clf_path=args.classifier) 195 | -------------------------------------------------------------------------------- /chapter8/data/.gitignore: -------------------------------------------------------------------------------- 1 | *.csv 2 | *.csv.[0-9] 3 | -------------------------------------------------------------------------------- /chapter8/data/process.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | from typing import Callable 4 | import cv2 5 | 6 | 7 | def featurize(datum): 8 | return np.array(datum, dtype=np.float32).flatten() 9 | 10 | 11 | EMOTIONS = { 12 | 'neutral': 0, 13 | 'surprised': 1, 14 | 'angry': 2, 15 | 'happy': 3, 16 | 'sad': 4, 17 | 'disgusted': 5 18 | } 19 | 20 | REVERSE_EMOTIONS = {v: k for k, v in EMOTIONS.items()} 21 | 22 | 23 | def int_encode(label): 24 | return EMOTIONS[label] 25 | 26 | 27 | def int_decode(value): 28 | return REVERSE_EMOTIONS[value] 29 | 30 | 31 | def one_hot_encode(all_labels) -> (np.ndarray, Callable): 32 | unique_lebels = list(sorted(set(all_labels))) 33 | index_to_label = dict(enumerate(unique_lebels)) 34 | label_to_index = {v: k for k, v in index_to_label.items()} 35 | 36 | y = np.zeros((len(all_labels), len(unique_lebels))).astype(np.float32) 37 | for i, label in enumerate(all_labels): 38 | y[i, label_to_index[label]] = 1 39 | 40 | return y, index_to_label 41 | 42 | 43 | def train_test_split(n, train_portion=0.8, seed=None): 44 | if seed: 45 | np.random.seed(seed) 46 | indices = np.arange(n) 47 | np.random.shuffle(indices) 48 | N = int(n * train_portion) 49 | return indices[:N], indices[N:] 50 | 51 | 52 | def _pca_featurize(data, center, top_vecs): 53 | return np.array([np.dot(top_vecs, np.array(datum).flatten() - center) 54 | for datum in data]).astype(np.float32) 55 | 56 | 57 | def pca_featurize(training_data, *, num_components=20): 58 | x_arr = np.array(training_data).reshape((len(training_data), -1)).astype(np.float32) 59 | mean, eigvecs = cv2.PCACompute(x_arr, mean=None) 60 | 61 | # Take only first num_components eigenvectors. 62 | top_vecs = eigvecs[:num_components] 63 | center = mean.flatten() 64 | 65 | args = (center, top_vecs) 66 | return _pca_featurize(training_data, *args), args 67 | 68 | 69 | if __name__ == '__main__': 70 | print(train_test_split(10, 0.8)) 71 | from data.store import load_collected_data 72 | data, targets = load_collected_data('data/cropped_faces.csv') 73 | X, f = pca_featurize(data) 74 | print(X.shape) 75 | 76 | -------------------------------------------------------------------------------- /chapter8/data/store.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import pickle 3 | import json 4 | from enum import IntEnum, auto, unique 5 | import sys 6 | csv.field_size_limit(sys.maxsize) 7 | 8 | 9 | def load_collected_data(path): 10 | data, targets = [], [] 11 | with open(path, 'r', newline='') as infile: 12 | reader = csv.reader(infile) 13 | for label, sample in reader: 14 | targets.append(label) 15 | data.append(json.loads(sample)) 16 | return data, targets 17 | 18 | 19 | def save_datum(path, label, img): 20 | with open(path, 'a', newline='') as outfile: 21 | writer = csv.writer(outfile) 22 | writer.writerow([label, img.tolist()]) 23 | 24 | 25 | def pickle_dump(f, path): 26 | with open(path, 'wb') as outfile: 27 | return pickle.dump(f, outfile) 28 | 29 | 30 | def pickle_load(path): 31 | with open(path, 'rb') as infile: 32 | return pickle.load(infile) 33 | 34 | 35 | if __name__ == '__main__': 36 | td = load_collected_data('data/cropped_faces.csv') 37 | print([len(x) for x in td]) 38 | -------------------------------------------------------------------------------- /chapter8/detectors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """A module that contains various detectors""" 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | 10 | class FaceDetector: 11 | """Face Detector 12 | 13 | This class implements a face detection algorithm using a face cascade 14 | and two eye cascades. 15 | """ 16 | 17 | def __init__(self, *, 18 | face_cascade='params/haarcascade_frontalface_default.xml', 19 | eye_cascade='params/haarcascade_lefteye_2splits.xml', 20 | scale_factor=4): 21 | # resize images before detection 22 | self.scale_factor = scale_factor 23 | 24 | # load pre-trained cascades 25 | self.face_clf = cv2.CascadeClassifier(face_cascade) 26 | if self.face_clf.empty(): 27 | raise ValueError(f'Could not load face cascade "{face_cascade}"') 28 | self.eye_clf = cv2.CascadeClassifier(eye_cascade) 29 | if self.eye_clf.empty(): 30 | raise ValueError( 31 | f'Could not load eye cascade "{eye_cascade}"') 32 | 33 | def detect_face(self, rgb_img, *, outline=True): 34 | """Performs face detection 35 | 36 | This method detects faces in an RGB input image. 37 | The method returns True upon success (else False), draws the 38 | bounding box of the head onto the input image (frame), and 39 | extracts the head region (head). 40 | 41 | :param frame: RGB input image 42 | :returns: success, frame, head 43 | """ 44 | frameCasc = cv2.cvtColor(cv2.resize(rgb_img, (0, 0), 45 | fx=1.0 / self.scale_factor, 46 | fy=1.0 / self.scale_factor), 47 | cv2.COLOR_RGB2GRAY) 48 | faces = self.face_clf.detectMultiScale( 49 | frameCasc, 50 | scaleFactor=1.1, 51 | minNeighbors=3, 52 | flags=cv2.CASCADE_SCALE_IMAGE) * self.scale_factor 53 | 54 | # if face is found: extract head region from bounding box 55 | for (x, y, w, h) in faces: 56 | if outline: 57 | cv2.rectangle(rgb_img, (x, y), (x + w, y + h), (100, 255, 0), 58 | thickness=2) 59 | head = cv2.cvtColor(rgb_img[y:y + h, x:x + w], 60 | cv2.COLOR_RGB2GRAY) 61 | return True, rgb_img, head, (x, y) 62 | 63 | return False, rgb_img, None, (None, None) 64 | 65 | def eye_centers(self, head, *, outline=False): 66 | height, width = head.shape[:2] 67 | 68 | eyes = self.eye_clf.detectMultiScale(head, 69 | scaleFactor=1.1, 70 | minNeighbors=3, 71 | flags=cv2.CASCADE_SCALE_IMAGE) 72 | if len(eyes) != 2: 73 | raise RuntimeError(f'Number of eyes {len(eyes)} != 2') 74 | eye_centers = [] 75 | for x, y, w, h in eyes: 76 | # find the center of the detected eye region 77 | eye_centers.append(np.array([x + w / 2, y + h / 2])) 78 | if outline: 79 | cv2.rectangle(head, (x, y), (x + w, y + h), (10, 55, 0), 80 | thickness=2) 81 | return eye_centers 82 | 83 | def align_head(self, head): 84 | """Aligns a head region using affine transformations 85 | 86 | This method preprocesses an extracted head region by rotating 87 | and scaling it so that the face appears centered and up-right. 88 | 89 | The method returns True on success (else False) and the aligned 90 | head region (head). Possible reasons for failure are that one or 91 | both eye detectors fail, maybe due to poor lighting conditions. 92 | 93 | :param head: extracted head region 94 | :returns: success, head 95 | """ 96 | # we want the eye to be at 25% of the width, and 20% of the height 97 | # resulting image should be square (desired_img_width, 98 | # desired_img_height) 99 | desired_eye_x = 0.25 100 | desired_eye_y = 0.2 101 | desired_img_width = desired_img_height = 200 102 | 103 | try: 104 | eye_centers = self.eye_centers(head) 105 | except RuntimeError: 106 | return False, head 107 | 108 | if eye_centers[0][0] < eye_centers[0][1]: 109 | left_eye, right_eye = eye_centers 110 | else: 111 | right_eye, left_eye = eye_centers 112 | 113 | # scale distance between eyes to desired length 114 | eye_dist = np.linalg.norm(left_eye - right_eye) 115 | eyeSizeScale = (1.0 - desired_eye_x * 2) * desired_img_width / eye_dist 116 | 117 | # get rotation matrix 118 | # get center point between the two eyes and calculate angle 119 | eye_angle_deg = 180 / np.pi * np.arctan2(right_eye[1] - left_eye[1], 120 | right_eye[0] - left_eye[0]) 121 | eye_midpoint = (left_eye + right_eye) / 2 122 | rot_mat = cv2.getRotationMatrix2D(tuple(eye_midpoint), eye_angle_deg, 123 | eyeSizeScale) 124 | 125 | # shift center of the eyes to be centered in the image 126 | rot_mat[0, 2] += desired_img_width * 0.5 - eye_midpoint[0] 127 | rot_mat[1, 2] += desired_eye_y * desired_img_height - eye_midpoint[1] 128 | 129 | # warp perspective to make eyes aligned on horizontal line and scaled 130 | # to right size 131 | res = cv2.warpAffine(head, rot_mat, (desired_img_width, 132 | desired_img_width)) 133 | 134 | # return success 135 | return True, res 136 | -------------------------------------------------------------------------------- /chapter8/train_classifier.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import numpy as np 4 | from pathlib import Path 5 | from collections import Counter 6 | from data.store import load_collected_data 7 | from data.process import train_test_split 8 | from data.process import pca_featurize, _pca_featurize 9 | from data.process import one_hot_encode 10 | from data.store import pickle_dump 11 | 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--data', required=True) 16 | parser.add_argument('--save', type=Path) 17 | parser.add_argument('--num-components', type=int, 18 | default=20) 19 | args = parser.parse_args() 20 | 21 | data, targets = load_collected_data(args.data) 22 | 23 | train, test = train_test_split(len(data), 0.8) 24 | x_train, pca_args = pca_featurize(np.array(data)[train], 25 | num_components=args.num_components) 26 | 27 | encoded_targets, index_to_label = one_hot_encode(targets) 28 | 29 | last_layer_count = len(encoded_targets[0]) 30 | mlp = cv2.ml.ANN_MLP_create() 31 | mlp.setLayerSizes(np.array([args.num_components, 10, last_layer_count], dtype=np.uint8)) 32 | mlp.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.1) 33 | mlp.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM) 34 | mlp.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 30, 0.000001 )) 35 | 36 | y_train = encoded_targets[train] 37 | 38 | mlp.train(x_train, cv2.ml.ROW_SAMPLE, y_train) 39 | 40 | x_test = _pca_featurize(np.array(data)[test], *pca_args) 41 | _, predicted = mlp.predict(x_test) 42 | 43 | y_hat = np.array([index_to_label[np.argmax(y)] for y in predicted]) 44 | y_true = np.array(targets)[test] 45 | 46 | print('Training Accuracy:') 47 | print(sum(y_hat == y_true) / len(y_hat)) 48 | 49 | if args.save: 50 | x_all, pca_args = pca_featurize(np.array(data), num_components=args.num_components) 51 | mlp.train(x_all, cv2.ml.ROW_SAMPLE, encoded_targets) 52 | args.save.mkdir(exist_ok=True) 53 | mlp.save(str(args.save / 'mlp.xml')) 54 | pickle_dump(index_to_label, args.save / 'index_to_label') 55 | pickle_dump(pca_args, args.save / 'pca_args') 56 | -------------------------------------------------------------------------------- /chapter8/wx_gui.py: -------------------------------------------------------------------------------- 1 | ../wx_gui.py -------------------------------------------------------------------------------- /chapter9/classification.py: -------------------------------------------------------------------------------- 1 | import tensorflow.keras as K 2 | 3 | from data import ds 4 | 5 | base_model = K.applications.MobileNetV2(input_shape=(224,224, 3), include_top=False) 6 | 7 | # Freeze layers 8 | for layer in base_model.layers: 9 | layer.trainable = False 10 | 11 | x = K.layers.GlobalAveragePooling2D()(base_model.output) 12 | 13 | is_breeds = True 14 | if is_breeds: 15 | out = K.layers.Dense(37,activation="softmax")(x) 16 | inp_ds = ds.map(lambda d: (d.image,d.breed)) 17 | else: 18 | out = K.layers.Dense(2,activation="softmax")(x) 19 | inp_ds = ds.map(lambda d: (d.image,d.type)) 20 | 21 | model = K.Model(inputs=base_model.input, outputs=out) 22 | model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["categorical_accuracy","top_k_categorical_accuracy"]) 23 | 24 | valid = inp_ds.take(1000) 25 | train = inp_ds.skip(1000).shuffle(10**4) 26 | 27 | model.fit(train.batch(32), epochs=4) 28 | model.evaluate(valid.batch(1)) 29 | -------------------------------------------------------------------------------- /chapter9/data.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | from itertools import count 5 | from collections import defaultdict, namedtuple 6 | 7 | import cv2 8 | import numpy as np 9 | import tensorflow as tf 10 | import xml.etree.ElementTree as ET 11 | 12 | DATASET_DIR = "dataset" 13 | for type in ("annotations", "images"): 14 | tf.keras.utils.get_file( 15 | type, 16 | f"https://www.robots.ox.ac.uk/~vgg/data/pets/data/{type}.tar.gz", 17 | untar=True, 18 | cache_dir=".", 19 | cache_subdir=DATASET_DIR) 20 | 21 | IMAGE_SIZE = 224 22 | IMAGE_ROOT = os.path.join(DATASET_DIR, "images") 23 | XML_ROOT = os.path.join(DATASET_DIR, "annotations") 24 | 25 | Data = namedtuple("Data", "image,box,size,type,breed") 26 | 27 | types = defaultdict(count().__next__) 28 | breeds = defaultdict(count().__next__) 29 | 30 | 31 | def parse_xml(path: str) -> Data: 32 | with open(path) as f: 33 | xml_string = f.read() 34 | root = ET.fromstring(xml_string) 35 | img_name = root.find("./filename").text 36 | breed_name = img_name[:img_name.rindex("_")] 37 | breed_id = breeds[breed_name] 38 | type_id = types[root.find("./object/name").text] 39 | box = np.array([int(root.find(f"./object/bndbox/{tag}").text) 40 | for tag in "xmin,ymin,xmax,ymax".split(",")]) 41 | size = np.array([int(root.find(f"./size/{tag}").text) 42 | for tag in "width,height".split(",")]) 43 | normed_box = (box.reshape((2, 2)) / size).reshape((4)) 44 | return Data(img_name, normed_box, size, type_id, breed_id) 45 | 46 | 47 | xml_paths = glob.glob(os.path.join(XML_ROOT, "xmls", "*.xml")) 48 | xml_paths.sort() 49 | 50 | parsed = np.array([parse_xml(path) for path in xml_paths]) 51 | 52 | print(f"{len(types)} TYPES:", *types.keys(), sep=", ") 53 | print(f"{len(breeds)} BREEDS:", *breeds.keys(), sep=", ") 54 | 55 | np.random.seed(1) 56 | np.random.shuffle(parsed) 57 | 58 | ds = tuple(np.array(list(i)) for i in np.transpose(parsed)) 59 | ds_slices = tf.data.Dataset.from_tensor_slices(ds) 60 | 61 | for el in ds_slices.take(1): 62 | print(el) 63 | # check boxes 64 | for el in ds_slices: 65 | b = el[1].numpy() 66 | if(np.any((b > 1) | (b < 0)) or np.any(b[2:] < b[:2])): 67 | print(f"Invalid box found {b}") 68 | 69 | 70 | def prepare(image, box, size, type, breed): 71 | image = tf.io.read_file(IMAGE_ROOT + "/" + image) 72 | image = tf.image.decode_png(image, channels=3) 73 | image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE)) 74 | image /= 255 75 | return Data(image, box, size, tf.one_hot( 76 | type, len(types)), tf.one_hot(breed, len(breeds))) 77 | 78 | 79 | ds = ds_slices.map(prepare).prefetch(128) 80 | 81 | if __name__ == "__main__": 82 | def illustrate(sample): 83 | breed_num = np.argmax(sample.breed) 84 | for breed, num in breeds.items(): 85 | if num == breed_num: 86 | break 87 | image = sample.image.numpy() 88 | pt1, pt2 = (sample.box.numpy().reshape( 89 | (2, 2)) * IMAGE_SIZE).astype(np.int32) 90 | cv2.rectangle(image, tuple(pt1), tuple(pt2), (0, 1, 0)) 91 | cv2.putText(image, breed, (10, 10), 92 | cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 1, 0)) 93 | return image 94 | samples_image = np.concatenate([illustrate(sample) 95 | for sample in ds.take(3)], axis=1) 96 | cv2.imshow("samples", samples_image) 97 | cv2.waitKey(0) 98 | -------------------------------------------------------------------------------- /chapter9/inference.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import tensorflow.keras as K 4 | 5 | def draw_box(frame: np.ndarray, box: np.ndarray) -> np.ndarray: 6 | h, w = frame.shape[0:2] 7 | pts = (box.reshape((2, 2)) * np.array([w, h])).astype(np.int) 8 | cv2.rectangle(frame, tuple(pts[0]), tuple(pts[1]), (0, 255, 0), 2) 9 | return frame 10 | 11 | model = K.models.load_model("localization.h5") 12 | 13 | cap = cv2.VideoCapture(0) 14 | 15 | for _, frame in iter(cap.read, (False, None)): 16 | input = cv2.resize(frame, (224, 224)) 17 | input = cv2.cvtColor(input, cv2.COLOR_BGR2RGB) 18 | box, = model.predict(input[None] / 255) 19 | draw_box(frame, box) 20 | cv2.imshow("res", frame) 21 | if(cv2.waitKey(1) == 27): 22 | break 23 | -------------------------------------------------------------------------------- /chapter9/localization.py: -------------------------------------------------------------------------------- 1 | import tensorflow.keras as K 2 | 3 | from data import ds 4 | 5 | base_model = K.applications.MobileNetV2( 6 | input_shape=(224, 224, 3), include_top=False) 7 | 8 | conv_opts = dict( 9 | activation='relu', 10 | padding='same', 11 | kernel_regularizer="l2") 12 | 13 | x = K.layers.Conv2D(256, (1, 1), **conv_opts)(base_model.output) 14 | x = K.layers.Conv2D(256, (3, 3), strides=2, **conv_opts)(x) 15 | out = K.layers.Flatten()(x) 16 | out = K.layers.Dense(4, activation="sigmoid")(out) 17 | 18 | model = K.Model(inputs=base_model.input, outputs=out) 19 | 20 | inp_ds = ds.map(lambda d: (d.image, d.box)) 21 | valid = inp_ds.take(1000) 22 | train = inp_ds.skip(1000).shuffle(10000) 23 | 24 | model.compile( 25 | loss="mean_squared_error", 26 | optimizer="adam", 27 | metrics=[ 28 | K.metrics.RootMeanSquaredError(), 29 | "mae"]) 30 | 31 | checkpoint = K.callbacks.ModelCheckpoint("localization.h5", monitor='val_root_mean_squared_error', 32 | save_best_only=True, verbose=1) 33 | 34 | model.fit( 35 | train.batch(32), 36 | epochs=12, 37 | validation_data=valid.batch(1), 38 | callbacks=[checkpoint]) 39 | -------------------------------------------------------------------------------- /dockerfiles/Dockerfile: -------------------------------------------------------------------------------- 1 | #FROM tensorflow/tensorflow:latest-py3 2 | 3 | # FROM tensorflow/tensorflow:latest-gpu-py3 4 | # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/dockerfiles/dockerfiles 5 | 6 | FROM python:3.8 7 | 8 | RUN apt-get update && apt-get install -y \ 9 | build-essential \ 10 | cmake \ 11 | git \ 12 | wget \ 13 | unzip \ 14 | yasm \ 15 | pkg-config \ 16 | libswscale-dev \ 17 | libtbb2 \ 18 | libtbb-dev \ 19 | libjpeg-dev \ 20 | libpng-dev \ 21 | libtiff-dev \ 22 | libavformat-dev \ 23 | libpq-dev \ 24 | libgtk2.0-dev \ 25 | # Optional 26 | libtbb2 libtbb-dev \ 27 | libjpeg-dev \ 28 | libpng-dev \ 29 | libtiff-dev \ 30 | libv4l-dev \ 31 | libdc1394-22-dev \ 32 | qt4-default \ 33 | # Missing libraries for GTK and wxPython dependencies 34 | libatk-adaptor \ 35 | libcanberra-gtk-module \ 36 | x11-apps \ 37 | libgtk-3-dev \ 38 | # Tools 39 | imagemagick \ 40 | && rm -rf /var/lib/apt/lists/* 41 | 42 | ENV OPENCV_VERSION="4.2.0" 43 | 44 | WORKDIR / 45 | RUN wget --output-document cv.zip https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip \ 46 | && unzip cv.zip \ 47 | && wget --output-document contrib.zip https://github.com/opencv/opencv_contrib/archive/${OPENCV_VERSION}.zip \ 48 | && unzip contrib.zip \ 49 | && mkdir /opencv-${OPENCV_VERSION}/cmake_binary 50 | 51 | # Install numpy, since1. it's required for OpenCV 52 | RUN pip install --upgrade pip && pip install --no-cache-dir numpy==1.18.1 53 | 54 | RUN cd /opencv-${OPENCV_VERSION}/cmake_binary \ 55 | && cmake -DBUILD_TIFF=ON \ 56 | -DBUILD_opencv_java=OFF \ 57 | -DWITH_CUDA=OFF \ 58 | -DWITH_OPENGL=ON \ 59 | -DWITH_OPENCL=ON \ 60 | -DWITH_IPP=ON \ 61 | -DWITH_TBB=ON \ 62 | -DWITH_EIGEN=ON \ 63 | -DWITH_V4L=ON \ 64 | -DBUILD_TESTS=OFF \ 65 | -DBUILD_PERF_TESTS=OFF \ 66 | -DCMAKE_BUILD_TYPE=RELEASE \ 67 | -D OPENCV_EXTRA_MODULES_PATH=/opencv_contrib-${OPENCV_VERSION}/modules \ 68 | -D OPENCV_ENABLE_NONFREE=ON \ 69 | -DCMAKE_INSTALL_PREFIX=$(python -c "import sys; print(sys.prefix)") \ 70 | -DPYTHON_EXECUTABLE=$(which python) \ 71 | -DPYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \ 72 | -DPYTHON_PACKAGES_PATH=$(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())") \ 73 | .. \ 74 | && make install \ 75 | && rm /cv.zip /contrib.zip \ 76 | && rm -r /opencv-${OPENCV_VERSION} /opencv_contrib-${OPENCV_VERSION} 77 | 78 | RUN ln -s \ 79 | /usr/local/python/cv2/python-3.8/cv2.cpython-38m-x86_64-linux-gnu.so \ 80 | /usr/local/lib/python3.8/site-packages/cv2.so 81 | 82 | RUN pip install --upgrade pip && pip install --no-cache-dir pathlib2 wxPython==4.0.5 83 | 84 | RUN pip install --upgrade pip && pip install --no-cache-dir scipy==1.4.1 matplotlib==3.1.2 requests==2.22.0 ipython numba==0.48.0 jupyterlab==1.2.6 rawpy==0.14.0 # Rawpy is required for HDR & Panorama (processing .CR2 files) 85 | RUN pip install ExifRead==2.1.2 86 | 87 | 88 | CMD bash 89 | -------------------------------------------------------------------------------- /dockerfiles/gpu.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.1.0-gpu-py3 2 | RUN apt-get update && apt-get install -y \ 3 | build-essential \ 4 | cmake \ 5 | git \ 6 | wget \ 7 | unzip \ 8 | yasm \ 9 | pkg-config \ 10 | libswscale-dev \ 11 | libtbb2 \ 12 | libtbb-dev \ 13 | libjpeg-dev \ 14 | libpng-dev \ 15 | libtiff-dev \ 16 | libavformat-dev \ 17 | libpq-dev \ 18 | libgtk2.0-dev \ 19 | # Optional 20 | libtbb2 libtbb-dev \ 21 | libjpeg-dev \ 22 | libpng-dev \ 23 | libtiff-dev \ 24 | libv4l-dev \ 25 | libdc1394-22-dev \ 26 | qt4-default \ 27 | # Missing libraries for GTK and wxPython dependencies 28 | libatk-adaptor \ 29 | libcanberra-gtk-module \ 30 | x11-apps \ 31 | libgtk-3-dev \ 32 | # Tools 33 | imagemagick \ 34 | && rm -rf /var/lib/apt/lists/* 35 | 36 | ENV OPENCV_VERSION="4.2.0" 37 | 38 | WORKDIR / 39 | RUN wget --output-document cv.zip https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip \ 40 | && unzip cv.zip \ 41 | && wget --output-document contrib.zip https://github.com/opencv/opencv_contrib/archive/${OPENCV_VERSION}.zip \ 42 | && unzip contrib.zip \ 43 | && mkdir /opencv-${OPENCV_VERSION}/cmake_binary 44 | 45 | # Install numpy, since1. it's required for OpenCV 46 | RUN pip install --upgrade pip && pip install --no-cache-dir numpy==1.18.1 47 | 48 | RUN cd /opencv-${OPENCV_VERSION}/cmake_binary \ 49 | && cmake -DBUILD_TIFF=ON \ 50 | -DBUILD_opencv_java=OFF \ 51 | -DWITH_CUDA=OFF \ 52 | -DWITH_OPENGL=ON \ 53 | -DWITH_OPENCL=ON \ 54 | -DWITH_IPP=ON \ 55 | -DWITH_TBB=ON \ 56 | -DWITH_EIGEN=ON \ 57 | -DWITH_V4L=ON \ 58 | -DBUILD_TESTS=OFF \ 59 | -DBUILD_PERF_TESTS=OFF \ 60 | -DCMAKE_BUILD_TYPE=RELEASE \ 61 | -D OPENCV_EXTRA_MODULES_PATH=/opencv_contrib-${OPENCV_VERSION}/modules \ 62 | -D OPENCV_ENABLE_NONFREE=ON \ 63 | -DCMAKE_INSTALL_PREFIX=$(python -c "import sys; print(sys.prefix)") \ 64 | -DPYTHON_EXECUTABLE=$(which python) \ 65 | -DPYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \ 66 | -DPYTHON_PACKAGES_PATH=$(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())") \ 67 | .. \ 68 | && make install \ 69 | && rm /cv.zip /contrib.zip \ 70 | && rm -r /opencv-${OPENCV_VERSION} /opencv_contrib-${OPENCV_VERSION} 71 | 72 | # RUN ln -s \ 73 | # /usr/local/python/cv2/python-3.8/cv2.cpython-38m-x86_64-linux-gnu.so \ 74 | # /usr/local/lib/python3.8/site-packages/cv2.so 75 | 76 | RUN pip install --upgrade pip && pip install --no-cache-dir pathlib2 wxPython==4.0.5 77 | 78 | RUN pip install --upgrade pip && pip install --no-cache-dir scipy==1.4.1 matplotlib==3.1.2 requests==2.22.0 ipython numba==0.48.0 jupyterlab==1.2.6 rawpy==0.14.0 # Rawpy is required for HDR & Panorama (processing .CR2 files) 79 | 80 | 81 | CMD bash 82 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.18.1 2 | scipy==1.4.1 3 | matplotlib==3.1.2 4 | requests==2.22.0 5 | opencv-contrib-python==4.2.0.32 6 | opencv-python==4.2.0.32 7 | rawpy==0.13.1 8 | ExifRead==2.1.2 9 | wheels/wxPython-4.0.7.post2-cp38-cp38-linux_x86_64.whl 10 | -------------------------------------------------------------------------------- /wheels/wxPython-4.0.7.post2-cp38-cp38-linux_x86_64.whl: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9aa4f425952c7312d89a2247341be50f81372fef2c6a9c1021739fed4b976444 3 | size 134361990 4 | -------------------------------------------------------------------------------- /wx_gui.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | A module containing simple GUI layouts using wxPython 6 | 7 | This file is heavily based on the work of Michael Beyeler. 8 | """ 9 | 10 | __license__ = "GNU GPL 3.0 or later" 11 | 12 | import numpy as np 13 | import wx 14 | import cv2 15 | 16 | 17 | class BaseLayout(wx.Frame): 18 | """ Abstract base class for all layouts in the book. 19 | 20 | A custom layout needs to implement the 2 methods below 21 | - augment_layout 22 | - process_frame 23 | """ 24 | 25 | def __init__(self, 26 | capture: cv2.VideoCapture, 27 | title: str = None, 28 | parent=None, 29 | window_id: int = -1, # default value 30 | fps: int = 10): 31 | """ 32 | Initialize all necessary parameters and generate a basic GUI layout 33 | that can then be augmented using `self.augment_layout`. 34 | 35 | :param parent: A wx.Frame parent (often Null). If it is non-Null, 36 | the frame will be minimized when its parent is minimized and 37 | restored when it is restored. 38 | :param window_id: The window identifier. 39 | :param title: The caption to be displayed on the frame's title bar. 40 | :param capture: Original video source to get the frames from. 41 | :param fps: Frames per second at which to display camera feed. 42 | """ 43 | # Make sure the capture device could be set up 44 | self.capture = capture 45 | success, frame = self._acquire_frame() 46 | if not success: 47 | print("Could not acquire frame from camera.") 48 | raise SystemExit() 49 | self.imgHeight, self.imgWidth = frame.shape[:2] 50 | 51 | super().__init__(parent, window_id, title, 52 | size=(self.imgWidth, self.imgHeight + 20)) 53 | self.fps = fps 54 | self.bmp = wx.Bitmap.FromBuffer(self.imgWidth, self.imgHeight, frame) 55 | 56 | # set up periodic screen capture 57 | self.timer = wx.Timer(self) 58 | self.timer.Start(1000. / self.fps) 59 | self.Bind(wx.EVT_TIMER, self._on_next_frame) 60 | 61 | # set up video stream 62 | self.video_pnl = wx.Panel(self, size=(self.imgWidth, self.imgHeight)) 63 | self.video_pnl.SetBackgroundColour(wx.BLACK) 64 | self.video_pnl.Bind(wx.EVT_PAINT, self._on_paint) 65 | 66 | # display the button layout beneath the video stream 67 | self.panels_vertical = wx.BoxSizer(wx.VERTICAL) 68 | self.panels_vertical.Add(self.video_pnl, 1, flag=wx.EXPAND | wx.TOP, 69 | border=1) 70 | 71 | self.augment_layout() 72 | 73 | # round off the layout by expanding and centering 74 | self.SetMinSize((self.imgWidth, self.imgHeight)) 75 | self.SetSizer(self.panels_vertical) 76 | self.Centre() 77 | 78 | def augment_layout(self): 79 | """ Augment custom layout elements to the GUI. 80 | 81 | This method is called in the class constructor, after initializing 82 | common parameters. Every GUI contains the camera feed in the variable 83 | `self.video_pnl`. Additional layout elements can be added below 84 | the camera feed by means of the method `self.panels_vertical.Add` 85 | """ 86 | raise NotImplementedError() 87 | 88 | def _on_next_frame(self, event): 89 | """ 90 | Capture a new frame from the capture device, 91 | send an RGB version to `self.process_frame`, refresh. 92 | """ 93 | success, frame = self._acquire_frame() 94 | if success: 95 | # process current frame 96 | frame = self.process_frame(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) 97 | 98 | # update buffer and paint (EVT_PAINT triggered by Refresh) 99 | self.bmp.CopyFromBuffer(frame) 100 | self.Refresh(eraseBackground=False) 101 | 102 | def _on_paint(self, event): 103 | """ Draw the camera frame stored in `self.bmp` onto `self.video_pnl`. 104 | """ 105 | wx.BufferedPaintDC(self.video_pnl).DrawBitmap(self.bmp, 0, 0) 106 | 107 | def _acquire_frame(self) -> (bool, np.ndarray): 108 | """ Capture a new frame from the input device 109 | 110 | :return: (success, frame) 111 | Whether acquiring was successful and current frame. 112 | """ 113 | return self.capture.read() 114 | 115 | def process_frame(self, frame_rgb: np.ndarray) -> np.ndarray: 116 | """Process the frame of the camera (or other capture device) 117 | 118 | :param frame_rgb: Image to process in rgb format, of shape (H, W, 3) 119 | :return: Processed image in rgb format, of shape (H, W, 3) 120 | """ 121 | raise NotImplementedError() 122 | --------------------------------------------------------------------------------