├── .dockerignore
├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── appendix_1
└── numba_iou.ipynb
├── chapter1
├── chapter1.py
├── pencilsketch_bg.jpg
├── tools.py
└── wx_gui.py
├── chapter10
├── classes.py
├── demo.mp4
├── frozen_inference_graph.pb
├── main.py
├── sort.py
└── ssd_mobilenet_v1_coco_2017_11_17.pbtxt.txt
├── chapter2
├── chapter2.py
├── frame_reader.py
└── gestures.py
├── chapter3
├── chapter3.py
├── feature_matching.py
├── train.png
└── wx_gui.py
├── chapter4
├── calibrate.py
├── chapter4.py
├── fountain_dense
│ ├── 0000.png
│ ├── 0001.png
│ ├── 0002.png
│ ├── 0003.png
│ ├── 0004.png
│ ├── 0005.png
│ ├── 0006.png
│ ├── 0007.png
│ ├── 0008.png
│ ├── 0009.png
│ └── 0010.png
├── scene3D.py
└── wx_gui.py
├── chapter5
├── common.py
├── gamma_correct.py
├── hdr.py
└── panorama.py
├── chapter6
├── chapter6.py
├── saliency.py
├── soccer.avi
├── tracking.py
└── tracking_api.py
├── chapter7
├── chapter7.py
├── data
│ ├── .gitignore
│ ├── __init__.py
│ ├── gtsrb.py
│ └── process.py
└── train_tf2.py
├── chapter8
├── .gitignore
├── chapter8.py
├── data
│ ├── .gitignore
│ ├── process.py
│ └── store.py
├── detectors.py
├── params
│ ├── haarcascade_frontalface_default.xml
│ ├── haarcascade_lefteye_2splits.xml
│ └── haarcascade_righteye_2splits.xml
├── train_classifier.py
└── wx_gui.py
├── chapter9
├── classification.py
├── data.py
├── inference.py
└── localization.py
├── dockerfiles
├── Dockerfile
└── gpu.Dockerfile
├── requirements.txt
├── wheels
└── wxPython-4.0.7.post2-cp38-cp38-linux_x86_64.whl
└── wx_gui.py
/.dockerignore:
--------------------------------------------------------------------------------
1 | .git/
2 | .py2.7-cv-blueprints/
3 | .py3.6-cv-blueprints/
4 | .py3.7-cv-blueprints/
5 | .py3.8-cv-blueprints/
6 | .py3*/
7 | wheels/
8 |
9 | chapter6/data/GTSRB*
10 |
11 | .idea/
12 | .tmp/
13 | .gitignore
14 | .ipynb_checkpoints
15 | __pycache__/
16 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | wheels/wxPython-4.0.7.post2-cp38-cp38-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Virtual Environment
2 | .py2.7-cv-blueprints/
3 | .py3.6-cv-blueprints/
4 | .py3.7-cv-blueprints/
5 | .py3.8-cv-blueprints/
6 | .tmp/
7 |
8 | .ipynb_checkpoints
9 |
10 | # Byte-compiled / optimized / DLL files
11 | __pycache__/
12 | *.py[cod]
13 |
14 | # C extensions
15 | *.so
16 |
17 | # Distribution / packaging
18 | .Python
19 | env/
20 | build/
21 | develop-eggs/
22 | dist/
23 | downloads/
24 | eggs/
25 | .eggs/
26 | lib/
27 | lib64/
28 | parts/
29 | sdist/
30 | var/
31 | *.egg-info/
32 | .installed.cfg
33 | *.egg
34 |
35 | # PyInstaller
36 | # Usually these files are written by a python script from a template
37 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
38 | *.manifest
39 | *.spec
40 |
41 | # Installer logs
42 | pip-log.txt
43 | pip-delete-this-directory.txt
44 |
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *,cover
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 |
62 | # Sphinx documentation
63 | docs/_build/
64 |
65 | # PyBuilder
66 | target/
67 | *.pkl
68 |
69 |
70 | # Editor
71 | *.sw[op]
72 | .idea/
73 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Packt
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # OpenCV-4-with-Python-Blueprints-Second-Edition
5 |
6 | [](https://doi.org/10.5281/zenodo.154060)
7 | [](https://groups.google.com/d/forum/opencv-python-blueprints)
8 | [](http://www.gnu.org/licenses/gpl-3.0)
9 |
10 | This repository contains all up-to-date source code for the following book:
11 |
12 |
13 | Dr. Menua Gevorgyan, Arsen Mamikonyan, Michael Beyeler
14 | OpenCV 4 with Python Blueprints - Second Edition Build creative computer vision projects with the latest version of OpenCV 4 and Python 3
15 |
16 |
17 | Packt Publishing Ltd.
18 | Paperback: 366 pages
19 | ISBN 978-178980-181-1
20 |
21 |
22 | This book demonstrates how to develop a series of intermediate to advanced projects using OpenCV and Python,
23 | rather than teaching the core concepts of OpenCV in theoretical lessons. Instead, the working projects
24 | developed in this book teach the reader how to apply their theoretical knowledge to topics such as
25 | image manipulation, augmented reality, object tracking, 3D scene reconstruction, statistical learning,
26 | and object categorization.
27 |
28 | By the end of this book, readers will be OpenCV experts whose newly gained experience allows them to develop their own advanced computer vision applications.
29 |
30 | If you use either book or code in a scholarly publication, please cite as:
31 | > Menua Gevorgyan, Arsen Mamikonyan, Michael Beyeler, (2020). OpenCV with Python Blueprints - Second Edition: Build creative computer vision projects with the latest version of OpenCV 4 and Python 3. Packt Publishing Ltd., London, England, 230 pages, ISBN 978-178980-181-1.
32 |
33 | Or use the following bibtex:
34 | ```
35 | @book{OpenCVWithPythonBlueprints,
36 | title = {{OpenCV with Python Blueprints}},
37 | subtitle = {Build creative computer vision projects with the latest version of {OpenCV 4} and {Python 3}},
38 | author = {Menua Gevorgyan, Arsen Mamikonyan, Michael Beyeler},
39 | year = {2020},
40 | pages = {366},
41 | publisher = {Packt Publishing Ltd.},
42 | isbn = {978-178980-181-1}
43 | }
44 | ```
45 |
46 | Scholarly work referencing first edition of the book:
47 | - B Zhang et al. (2018). Automatic matching of construction onsite resources under camera views. *Automation in Construction*.
48 | - A Jakubović & J Velagić (2018). Image Feature Matching and Object Detection Using Brute-Force Matchers. *International Symposium ELMAR*.
49 | - B Zhang et al. (2018). Multi-View Matching for Onsite Construction Resources with Combinatorial Optimization. *International Symposium on Automation and Robotics in Construction (ISARC)* 35:1-7.
50 | - LA Marcomini (2018). Identificação automática do comportamento do tráfego a partir de imagens de vídeo. *Escola de Engenharia de São Carlos*, Master's Thesis.
51 | - G Laica et al. (2018). Diseño y construcción de un andador inteligente para el desplazamiento autónomo de los adultos mayores con visión reducida y problemas de movilidad del hogar de vida "Luis Maldonado Tamayo" mediante la investigación de técnicas de visión artificial. *Departamento de Ciencias de la Energía y Mecánica, Universidad de las Fuerzas Armadas ESPE*, Master's Thesis.
52 | - I Huitzil-Velasco et al. (2017). Test of a Myo Armband. *Revista de Ciencias Ambientales y Recursos Naturales* 3(10): 48-56.
53 | - Y Güçlütürk et al. (2016). Convolutional sketch inversion. *European Conference on Computer Vision (ECCV)* 810-824.
54 |
55 |
56 | All code was tested with OpenCV 4.2.0 and Python 3.8 on Ubuntu 18.04, and is available from:
57 | https://github.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/
58 |
59 | We have also created a Docker file in https://github.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/tree/master/dockerfiles/Dockerfile which has README.md that will help you compile and run the code using the docker file.
60 |
61 |
62 | ## Critical Reception (First Edition)
63 |
64 |
65 |
66 | What readers on Amazon have to say:
67 |
68 | > The author does a great job explaining the concepts needed to understand what's happening in the application without
69 | > the need of going into too many details.
70 | – [Sebastian Montabone](http://www.samontab.com)
71 |
72 | > Excellent book to build practical OpenCV projects! I'm still relatively new to OpenCV, but all examples are well
73 | > laid out and easy to follow. The author does a good job explaining the concepts in detail and shows how they apply
74 | > in real life. As a professional programmer, I especially love that you can just fork the code from GitHub and follow
75 | > along. Strongly recommend to readers with basic knowledge of computer vision, machine learning, and Python!
76 | – Amazon Customer
77 |
78 | > Usually I'm not a big fan of technical books because they are too dull, but this one is written in an engaging
79 | > manner with a few dry jokes here and there. Can only recommend!
80 | – lakesouth
81 |
82 | ## Who This Book Is for
83 | As part of Packt's Blueprints series, this book is for intermediate users of OpenCV who aim to master their skills
84 | by developing advanced practical applications. You should already have some
85 | experience of building simple applications, and you are expected to be familiar with
86 | OpenCV's concepts and Python libraries. Basic knowledge of Python programming
87 | is expected and assumed.
88 |
89 | By the end of this book, you will be an OpenCV expert, and your newly gained
90 | experience will allow you to develop your own advanced computer vision
91 | applications.
92 |
93 |
94 | ## Getting Started
95 | All projects can run on Windows, Mac, or Linux. The required packages can be installed with pip or you can use the docker images available in the repository to run scripts of the chapters.
96 |
97 | ## Installation With Pip
98 |
99 |
100 | ```
101 | pip install -r requirements.txt
102 | ```
103 |
104 | ## Runninning With Docker
105 |
106 |
107 |
108 | ### Build the Image
109 |
110 | The repository contains two docker images:
111 |
112 | 1. Without GPU acceleration
113 | ```
114 | docker build -t book dockerfiles
115 | ```
116 | 2. With GPU (CUDA) acceleration
117 | ```
118 | docker build -t book dockerfiles -f dockerfiles/gpu.Dockerfile
119 | ```
120 |
121 | ### Start a Container
122 |
123 | ```
124 | docker run --device /dev/video0 --env DISPLAY=$DISPLAY -v="/tmp/.X11-unix:/tmp/.X11-unix:rw" -v `pwd`:/book -it book
125 | ```
126 |
127 | Here, we have allowed docker to connect to the default camera and to use the X-11 server of the host machine to run graphical applications. In case if you use the GPU version of the images, you also have to pass `--runtime nvidia`.
128 |
129 | ### Run an App
130 | In the container, locate a desired chapter:
131 | ```
132 | cd /book/chapterX
133 | ```
134 | and run a desired script of the chapter:
135 | ```
136 | python chapterX.py
137 | ```
138 |
139 |
140 |
141 | ### Troubleshooting
142 |
143 | #### Could not connect to any X display.
144 |
145 | The X Server should allow connections from a docker container.
146 |
147 | Run `xhost +local:docker`, also check [this](https://forums.docker.com/t/start-a-gui-application-as-root-in-a-ubuntu-container/17069)
148 |
149 |
150 | ## The Following Packages Were Used in the Chapters of the Book
151 | * OpenCV 4.2 or later: Recent 32-bit and 64-bit versions as well as installation instructions are available at
152 | http://opencv.org/downloads.html. Platform-specific installation instructions can be found at
153 | http://docs.opencv.org/doc/tutorials/introduction/table_of_content_introduction/table_of_content_introduction.html.
154 | * Python 3.8 or later: Recent 32-bit and 64-bit installers are available at https://www.python.org/downloads. The
155 | installation instructions can be found at https://wiki.python.org/moin/BeginnersGuide/Download.
156 | * NumPy 1.18.1 or later: This package for scientific computing officially comes in 32-bit format only, and can be
157 | obtained from http://www.scipy.org/scipylib/download.html. The installation instructions can be found at
158 | http://www.scipy.org/scipylib/building/index.html#building.
159 |
160 | In addition, some chapters require the following free Python modules:
161 | * wxPython 4.0 or later (Chapters 1 to 4, 8): This GUI programming toolkit can be obtained from
162 | http://www.wxpython.org/download.php.
163 | * matplotlib 3.1 or later (Chapters 4, 5, 6, and 7): This 2D plotting library can be obtained from
164 | http://matplotlib.org/downloads.html. Its installation instructions can be found by going to
165 | http://matplotlib.org/faq/installing_faq.html#how-to-install.
166 | * SciPy 1.4 or later (Chapter 1 and 10): This scientific Python library officially comes in 32-bit only, and can be
167 | obtained from http://www.scipy.org/scipylib/download.html. The installation instructions can be found at
168 | http://www.scipy.org/scipylib/building/index.html#building.
169 | * rawpy 0.14 and ExifRead==2.1.2 (Chapter 5)
170 | * requests==2.22.0 to download data in chapter 7
171 |
172 | Furthermore, the use of iPython (http://ipython.org/install.html) is highly recommended as it provides a flexible,
173 | interactive console interface.
174 |
175 |
176 | ## License
177 | The software is released under the GNU General Public License (GPL), which is the most commonly used free software
178 | license according to Wikipedia. GPL allows for commercial use, distribution, modification, patent use, and private use.
179 |
180 | The GPL is a copyleft license, which means that derived works can only be distributed under the same license terms.
181 | For more information, please see the license file.
182 | ### Download a free PDF
183 |
184 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
185 |
https://packt.link/free-ebook/9781789801811
--------------------------------------------------------------------------------
/appendix_1/numba_iou.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import numba"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 3,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "def iou(a: np.ndarray, b: np.ndarray) -> float:\n",
20 | " a_tl, a_br = a[:4].reshape((2, 2))\n",
21 | " b_tl, b_br = b[:4].reshape((2, 2))\n",
22 | " int_tl = np.maximum(a_tl, b_tl)\n",
23 | " int_br = np.minimum(a_br, b_br)\n",
24 | " int_area = np.product(np.maximum(0., int_br - int_tl))\n",
25 | " a_area = np.product(a_br - a_tl)\n",
26 | " b_area = np.product(b_br - b_tl)\n",
27 | " return int_area / (a_area + b_area - int_area)"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 4,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "def calc_iou_matrix(detections,trackers):\n",
37 | " iou_matrix = np.zeros((len(detections), len(trackers)), dtype=np.float32)\n",
38 | "\n",
39 | " for d, det in enumerate(detections):\n",
40 | " for t, trk in enumerate(trackers):\n",
41 | " iou_matrix[d, t] = iou(det, trk)\n",
42 | " return iou_matrix"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 5,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "A = np.random.rand(100,4)\n",
52 | "B = np.random.rand(100,4)"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 5,
58 | "metadata": {},
59 | "outputs": [
60 | {
61 | "name": "stdout",
62 | "output_type": "stream",
63 | "text": [
64 | "328 ms ± 26.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
65 | ]
66 | }
67 | ],
68 | "source": [
69 | "%timeit calc_iou_matrix(A,B)"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 6,
75 | "metadata": {},
76 | "outputs": [
77 | {
78 | "name": "stdout",
79 | "output_type": "stream",
80 | "text": [
81 | "14.5 ms ± 792 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
82 | ]
83 | }
84 | ],
85 | "source": [
86 | "@numba.jit(nopython=True)\n",
87 | "def product(a):\n",
88 | " result = 1\n",
89 | " for i in range(len(a)):\n",
90 | " result*=a[i]\n",
91 | " return result\n",
92 | "\n",
93 | "@numba.jit(nopython=True)\n",
94 | "def iou(a: np.ndarray, b: np.ndarray) -> float:\n",
95 | " a_tl, a_br = a[0:2],a[2:4]\n",
96 | " b_tl, b_br = b[0:2],b[2:4]\n",
97 | " int_tl = np.maximum(a_tl, b_tl)\n",
98 | " int_br = np.minimum(a_br, b_br)\n",
99 | " int_area = product(np.maximum(0., int_br - int_tl))\n",
100 | " a_area = product(a_br - a_tl)\n",
101 | " b_area = product(b_br - b_tl)\n",
102 | " return int_area / (a_area + b_area - int_area)\n",
103 | "%timeit calc_iou_matrix(A,B)"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 7,
109 | "metadata": {},
110 | "outputs": [
111 | {
112 | "name": "stdout",
113 | "output_type": "stream",
114 | "text": [
115 | "7.08 ms ± 31 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
116 | ]
117 | }
118 | ],
119 | "source": [
120 | "@numba.jit(nopython=True)\n",
121 | "def calc_iou_matrix(detections,trackers):\n",
122 | " iou_matrix = np.zeros((len(detections), len(trackers)), dtype=np.float32)\n",
123 | "\n",
124 | " for d in range(len(detections)):\n",
125 | " det = detections[d]\n",
126 | " for t in range(len(trackers)):\n",
127 | " trk = trackers[t]\n",
128 | " iou_matrix[d, t] = iou(det, trk)\n",
129 | "\n",
130 | "%timeit calc_iou_matrix(A,B)"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": 22,
136 | "metadata": {},
137 | "outputs": [
138 | {
139 | "data": {
140 | "text/plain": [
141 | "array([[2.84786978e-01, 1.49667930e-01, 4.52272718e-01, 7.64122472e-01],\n",
142 | " [1.29328195e-01, 1.85575401e-01, 2.44202698e-01, 1.62779569e-01],\n",
143 | " [8.79994339e-01, 7.84159368e-01, 8.96072205e-01, 4.64633477e-01],\n",
144 | " [2.34234905e-01, 8.16629721e-02, 9.31725878e-01, 9.15272896e-02],\n",
145 | " [6.17814797e-01, 7.92031790e-01, 5.93997225e-01, 9.17106773e-01],\n",
146 | " [5.46257280e-01, 5.90488641e-01, 2.45114229e-01, 5.27682917e-02],\n",
147 | " [7.13898454e-01, 8.35825322e-01, 6.37713557e-01, 3.53566190e-01],\n",
148 | " [7.24266392e-01, 8.09674239e-01, 6.40267349e-01, 2.10335049e-02],\n",
149 | " [3.60260021e-01, 6.98047814e-01, 7.49194667e-01, 7.24404182e-01],\n",
150 | " [4.54322591e-01, 9.39216612e-01, 1.76701033e-01, 5.49275464e-01],\n",
151 | " [8.48035985e-01, 5.33716147e-01, 9.57733994e-01, 9.79705353e-01],\n",
152 | " [9.78277570e-01, 4.44890125e-01, 9.39733444e-01, 7.50692101e-02],\n",
153 | " [8.22548577e-01, 6.41685401e-01, 7.45234802e-01, 5.09103658e-01],\n",
154 | " [2.60426971e-01, 1.00493773e-01, 1.71217758e-01, 5.76172101e-01],\n",
155 | " [5.19713732e-01, 3.46948301e-01, 2.44690123e-01, 4.19853462e-01],\n",
156 | " [4.68961238e-02, 5.52729107e-02, 6.59324109e-01, 6.09828494e-01],\n",
157 | " [3.85613432e-02, 8.55204783e-01, 9.26012669e-01, 7.09888502e-01],\n",
158 | " [1.29093393e-01, 9.49590162e-01, 3.22063612e-01, 5.29960328e-01],\n",
159 | " [7.08357430e-01, 3.80448216e-01, 7.00744999e-01, 5.06966245e-01],\n",
160 | " [3.26126181e-01, 5.75468101e-01, 6.78994830e-01, 5.54342831e-01],\n",
161 | " [8.12375538e-02, 7.49469620e-01, 2.72455198e-01, 5.22540811e-01],\n",
162 | " [3.85367089e-01, 4.41736512e-01, 5.14454592e-01, 1.19293497e-01],\n",
163 | " [7.64035617e-01, 9.49622911e-01, 5.93891039e-01, 1.53838921e-01],\n",
164 | " [7.97760257e-02, 6.49648923e-01, 9.16435057e-01, 4.88523011e-02],\n",
165 | " [2.29542125e-01, 6.18607365e-01, 9.29588552e-01, 7.52633838e-01],\n",
166 | " [1.32451561e-01, 2.60474882e-01, 8.20534966e-05, 5.34758335e-01],\n",
167 | " [9.26208346e-01, 8.88325527e-01, 5.64661678e-01, 7.51415300e-02],\n",
168 | " [6.23189011e-02, 8.45335974e-01, 9.56306204e-01, 5.80513844e-02],\n",
169 | " [5.12158377e-01, 7.08644649e-01, 1.99225853e-01, 6.40248533e-01],\n",
170 | " [9.71114534e-01, 4.65672329e-01, 4.79785001e-01, 6.06286740e-01],\n",
171 | " [9.02719316e-01, 1.29368742e-01, 1.56595684e-01, 3.48418367e-01],\n",
172 | " [4.26345005e-01, 6.80225657e-01, 7.05097062e-01, 8.16246516e-01],\n",
173 | " [8.43022840e-01, 1.55150328e-01, 1.07014732e-01, 2.66176015e-01],\n",
174 | " [1.43833136e-01, 7.46381201e-01, 9.33459466e-01, 9.75532745e-01],\n",
175 | " [5.94062493e-01, 6.31293475e-01, 8.89775355e-01, 8.74834902e-01],\n",
176 | " [3.48179401e-01, 5.26730661e-01, 2.72650674e-01, 4.06032481e-01],\n",
177 | " [6.47693494e-02, 1.16628348e-01, 3.57464769e-01, 9.29423610e-01],\n",
178 | " [2.23870691e-01, 6.33822446e-01, 1.76303346e-01, 5.90196201e-01],\n",
179 | " [5.19051931e-01, 1.69239893e-01, 3.92817274e-01, 4.94994064e-01],\n",
180 | " [2.35778432e-01, 8.93753320e-01, 2.52470705e-01, 2.54120126e-01],\n",
181 | " [7.94779449e-03, 2.99115150e-01, 2.62439362e-02, 3.10726596e-01],\n",
182 | " [7.43372632e-01, 3.06148680e-01, 1.41128562e-01, 9.88191773e-01],\n",
183 | " [1.62348419e-02, 3.45068825e-01, 3.87701560e-01, 2.32051568e-01],\n",
184 | " [3.62609599e-01, 8.12818470e-01, 1.79017692e-01, 1.10072390e-01],\n",
185 | " [6.00747111e-01, 5.91610252e-02, 3.52787226e-01, 1.80002683e-01],\n",
186 | " [1.20041811e-01, 8.24721534e-01, 1.54461163e-01, 8.47966775e-01],\n",
187 | " [1.09809920e-01, 1.86146627e-01, 8.04777057e-01, 6.50033285e-01],\n",
188 | " [5.92720751e-01, 5.12866344e-01, 5.47108710e-01, 5.13579475e-01],\n",
189 | " [7.12107962e-01, 2.94623431e-01, 1.67693961e-01, 6.15859883e-01],\n",
190 | " [3.64380859e-01, 2.52832916e-01, 1.19972728e-01, 8.69102115e-01],\n",
191 | " [9.32491659e-01, 5.55593690e-01, 8.67936902e-01, 8.69947870e-01],\n",
192 | " [3.80094240e-01, 7.66910220e-01, 2.27431462e-01, 8.18655906e-02],\n",
193 | " [1.24677930e-01, 1.67478945e-01, 4.77225927e-01, 9.25475390e-01],\n",
194 | " [3.62191294e-01, 6.52753650e-01, 8.71392075e-01, 4.53979568e-01],\n",
195 | " [7.83613408e-01, 3.96295148e-01, 6.68070842e-01, 8.22256827e-01],\n",
196 | " [2.66799295e-01, 1.03738181e-01, 6.99157302e-01, 5.68579542e-01],\n",
197 | " [1.86985863e-01, 2.17226350e-02, 6.55800074e-01, 8.12148503e-01],\n",
198 | " [4.23810156e-01, 9.98200261e-01, 2.60635320e-01, 2.59812443e-01],\n",
199 | " [6.41079589e-01, 9.04388024e-01, 6.57337920e-01, 3.67862770e-01],\n",
200 | " [4.38713382e-01, 8.55237885e-01, 9.46515235e-01, 6.80434047e-01],\n",
201 | " [3.56163917e-01, 2.11996973e-01, 4.18694478e-02, 8.82582875e-01],\n",
202 | " [5.18865711e-01, 2.03951649e-01, 8.24686364e-02, 8.81622400e-01],\n",
203 | " [8.18017015e-01, 9.89771198e-01, 7.12994585e-01, 1.95878071e-01],\n",
204 | " [4.09745356e-01, 3.80620069e-02, 7.09507585e-01, 4.67298871e-01],\n",
205 | " [8.46667850e-01, 7.69717548e-01, 8.35631943e-01, 1.89558165e-01],\n",
206 | " [7.76663279e-01, 2.42167881e-01, 8.27233081e-01, 8.38679793e-01],\n",
207 | " [3.98400180e-01, 6.39092257e-01, 6.49660355e-01, 6.74128981e-01],\n",
208 | " [5.60777039e-01, 3.09213853e-01, 1.05642424e-01, 8.82463125e-01],\n",
209 | " [4.28736238e-01, 1.60264552e-01, 5.24715315e-01, 1.90091999e-01],\n",
210 | " [1.88907251e-01, 6.29227371e-01, 2.26614241e-01, 6.30271581e-01],\n",
211 | " [2.88142634e-02, 1.30737785e-02, 2.86926726e-02, 9.74329017e-01],\n",
212 | " [3.17601670e-01, 5.06013243e-01, 6.81056475e-01, 7.03450623e-01],\n",
213 | " [3.79000387e-01, 2.31957206e-01, 1.21083567e-01, 5.30021306e-01],\n",
214 | " [8.68262590e-02, 4.97116188e-01, 1.43719020e-01, 4.54984100e-01],\n",
215 | " [9.80171576e-01, 7.04657839e-01, 7.03200159e-02, 6.85264033e-01],\n",
216 | " [8.04557409e-01, 9.68377769e-01, 3.71729269e-01, 2.69743301e-01],\n",
217 | " [9.82104829e-02, 6.47122350e-03, 2.11175103e-01, 9.21097412e-01],\n",
218 | " [6.54719561e-02, 8.25470813e-01, 1.23573962e-01, 4.82917056e-01],\n",
219 | " [4.96703114e-01, 9.01691899e-02, 2.20353326e-01, 1.77234820e-01],\n",
220 | " [2.10898552e-01, 2.87533614e-01, 6.62700998e-01, 5.98436081e-01],\n",
221 | " [7.50308497e-02, 2.29851569e-01, 2.89361741e-01, 7.93646032e-01],\n",
222 | " [5.73880841e-01, 2.43526849e-01, 6.33192394e-01, 6.70816044e-01],\n",
223 | " [6.34623072e-01, 5.07885907e-01, 7.27704114e-01, 7.48819857e-01],\n",
224 | " [1.88771659e-01, 3.10023429e-01, 7.30603811e-02, 3.29997110e-01],\n",
225 | " [2.14005767e-01, 8.67044002e-01, 9.31721932e-01, 1.14427755e-01],\n",
226 | " [7.28537781e-02, 3.20687446e-02, 9.22391264e-01, 2.47669559e-01],\n",
227 | " [8.62983037e-01, 8.17282328e-01, 9.40907208e-01, 7.70718873e-01],\n",
228 | " [8.26480058e-01, 9.50637853e-01, 4.44769479e-01, 6.71710373e-01],\n",
229 | " [6.45784531e-02, 6.93421874e-01, 4.60386577e-01, 1.16068386e-01],\n",
230 | " [6.37542338e-01, 6.20640364e-01, 1.66622630e-01, 9.30108359e-01],\n",
231 | " [9.82964009e-01, 1.60835035e-01, 5.64917699e-01, 5.78353741e-01],\n",
232 | " [7.58120906e-01, 7.67157309e-01, 4.62495285e-01, 1.85201688e-01],\n",
233 | " [7.56419429e-01, 4.10743083e-01, 7.93451756e-01, 4.15199488e-01],\n",
234 | " [9.52036799e-01, 7.84123616e-01, 4.29499335e-01, 6.80436993e-02],\n",
235 | " [2.41347372e-01, 4.53595425e-01, 9.85869772e-02, 1.38116241e-01],\n",
236 | " [4.05717412e-01, 5.19340460e-01, 2.19914351e-01, 2.65040326e-01],\n",
237 | " [5.27267087e-01, 6.47721544e-01, 8.05855368e-01, 4.49244422e-02],\n",
238 | " [7.30421916e-01, 3.40132741e-01, 9.70534517e-03, 6.76803031e-01],\n",
239 | " [5.14827463e-01, 8.64038613e-01, 9.02144952e-01, 8.70425950e-01],\n",
240 | " [7.77698231e-01, 2.67776473e-01, 5.54169302e-01, 7.32617299e-01]])"
241 | ]
242 | },
243 | "execution_count": 22,
244 | "metadata": {},
245 | "output_type": "execute_result"
246 | }
247 | ],
248 | "source": [
249 | "@numba.vectorize\n",
250 | "def custom_operation(a,b):\n",
251 | " if b == 0:\n",
252 | " return 0\n",
253 | " return a*b if a>b else a/b\n",
254 | "custom_operation(A,B)\n",
255 | "custom_operation(A,np.ones((1,4)))"
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": 18,
261 | "metadata": {},
262 | "outputs": [
263 | {
264 | "name": "stdout",
265 | "output_type": "stream",
266 | "text": [
267 | "196 µs ± 2.46 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
268 | ]
269 | }
270 | ],
271 | "source": [
272 | "@numba.guvectorize(['(f8[:, :], f8[:, :], f8[:, :])'], '(m,k),(n,k1)->(m, n)')\n",
273 | "def calc_iou_matrix(x, y, z):\n",
274 | " for i in range(x.shape[0]):\n",
275 | " for j in range(y.shape[1]):\n",
276 | " z[i, j] = iou(x[i],y[i])\n",
277 | "\n",
278 | "%timeit calc_iou_matrix(A,B)"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": 9,
284 | "metadata": {},
285 | "outputs": [],
286 | "source": [
287 | "from numba import cuda"
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": 10,
293 | "metadata": {},
294 | "outputs": [],
295 | "source": [
296 | "@cuda.jit(device=True)\n",
297 | "def iou(a: np.ndarray, b: np.ndarray) -> float:\n",
298 | " xx1 = max(a[0], b[0])\n",
299 | " yy1 = max(a[1], b[1])\n",
300 | " xx2 = min(a[2], b[2])\n",
301 | " yy2 = min(a[3], b[3])\n",
302 | " w = max(0., xx2 - xx1)\n",
303 | " h = max(0., yy2 - yy1)\n",
304 | " wh = w * h\n",
305 | " result = wh / ((a[2]-a[0])*(a[3]-a[1])\n",
306 | " + (b[2]-b[0])*(b[3]-b[1]) - wh)\n",
307 | " return result"
308 | ]
309 | },
310 | {
311 | "cell_type": "code",
312 | "execution_count": null,
313 | "metadata": {},
314 | "outputs": [],
315 | "source": [
316 | "@numba.guvectorize(['(f8[:, :], f8[:, :], f8[:, :])'], '(m,k),(n,k1)->(m, n)',target=\"cuda\")\n",
317 | "def calc_iou_matrix(x, y, z):\n",
318 | " for i in range(x.shape[0]):\n",
319 | " for j in range(y.shape[1]):\n",
320 | " z[i, j] = iou(x[i],y[j])"
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": null,
326 | "metadata": {},
327 | "outputs": [],
328 | "source": []
329 | }
330 | ],
331 | "metadata": {
332 | "kernelspec": {
333 | "display_name": "Python 3",
334 | "language": "python",
335 | "name": "python3"
336 | },
337 | "language_info": {
338 | "codemirror_mode": {
339 | "name": "ipython",
340 | "version": 3
341 | },
342 | "file_extension": ".py",
343 | "mimetype": "text/x-python",
344 | "name": "python",
345 | "nbconvert_exporter": "python",
346 | "pygments_lexer": "ipython3",
347 | "version": "3.8.1"
348 | }
349 | },
350 | "nbformat": 4,
351 | "nbformat_minor": 2
352 | }
353 |
--------------------------------------------------------------------------------
/chapter1/chapter1.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """ OpenCV with Python Blueprints. Chapter 1: Fun with Filters
5 |
6 | An app to apply three different image filter effects to the video stream
7 | of a webcam in real-time.
8 |
9 | The three effects are:
10 | - Warming/cooling filters
11 | - Black-and-white pencil sketch
12 | - Cartoonizer
13 | """
14 |
15 | import wx
16 | import cv2
17 | import numpy as np
18 |
19 | from wx_gui import BaseLayout
20 | from tools import apply_hue_filter
21 | from tools import apply_rgb_filters
22 | from tools import load_img_resized
23 | from tools import spline_to_lookup_table
24 | from tools import cartoonize
25 | from tools import pencil_sketch_on_canvas
26 |
27 |
28 | __license__ = "GNU GPL 3.0 or later"
29 |
30 |
31 | INCREASE_LOOKUP_TABLE = spline_to_lookup_table([0, 64, 128, 192, 256],
32 | [0, 70, 140, 210, 256])
33 | DECREASE_LOOKUP_TABLE = spline_to_lookup_table([0, 64, 128, 192, 256],
34 | [0, 30, 80, 120, 192])
35 |
36 |
37 | class FilterLayout(BaseLayout):
38 | """
39 | Custom layout for filter effects
40 |
41 | This class implements a custom layout for applying diverse filter
42 | effects to a camera feed. The layout is based on an abstract base
43 | class BaseLayout. It displays the camera feed (passed to the class as
44 | a cv2.VideoCapture object) in the variable self.panels_vertical.
45 | Additional layout elements can be added by using the Add method (e.g.,
46 | self.panels_vertical(wx.Panel(self, -1))).
47 | """
48 |
49 | def __init__(self, *args, **kwargs):
50 | super().__init__(*args, **kwargs)
51 | color_canvas = load_img_resized('pencilsketch_bg.jpg',
52 | (self.imgWidth, self.imgHeight))
53 | self.canvas = cv2.cvtColor(color_canvas, cv2.COLOR_RGB2GRAY)
54 |
55 | def augment_layout(self):
56 | """ Add a row of radio buttons below the camera feed. """
57 |
58 | # create a horizontal layout with all filter modes as radio buttons
59 | pnl = wx.Panel(self, -1)
60 | self.mode_warm = wx.RadioButton(pnl, -1, 'Warming Filter', (10, 10),
61 | style=wx.RB_GROUP)
62 | self.mode_cool = wx.RadioButton(pnl, -1, 'Cooling Filter', (10, 10))
63 | self.mode_sketch = wx.RadioButton(pnl, -1, 'Pencil Sketch', (10, 10))
64 | self.mode_cartoon = wx.RadioButton(pnl, -1, 'Cartoon', (10, 10))
65 | hbox = wx.BoxSizer(wx.HORIZONTAL)
66 | hbox.Add(self.mode_warm, 1)
67 | hbox.Add(self.mode_cool, 1)
68 | hbox.Add(self.mode_sketch, 1)
69 | hbox.Add(self.mode_cartoon, 1)
70 | pnl.SetSizer(hbox)
71 |
72 | # add panel with radio buttons to existing panels in a vertical
73 | # arrangement
74 | self.panels_vertical.Add(pnl, flag=wx.EXPAND | wx.BOTTOM | wx.TOP,
75 | border=1)
76 |
77 | @staticmethod
78 | def _render_warm(rgb_image: np.ndarray) -> np.ndarray:
79 | interim_img = apply_rgb_filters(rgb_image,
80 | red_filter=INCREASE_LOOKUP_TABLE,
81 | blue_filter=DECREASE_LOOKUP_TABLE)
82 | return apply_hue_filter(interim_img, INCREASE_LOOKUP_TABLE)
83 |
84 | @staticmethod
85 | def _render_cool(rgb_image: np.ndarray) -> np.ndarray:
86 | interim_img = apply_rgb_filters(rgb_image,
87 | red_filter=DECREASE_LOOKUP_TABLE,
88 | blue_filter=INCREASE_LOOKUP_TABLE)
89 | return apply_hue_filter(interim_img, DECREASE_LOOKUP_TABLE)
90 |
91 | def process_frame(self, frame_rgb: np.ndarray) -> np.ndarray:
92 | """Process the frame of the camera (or other capture device)
93 |
94 | Choose a filter effect based on the which of the radio buttons
95 | was clicked.
96 |
97 | :param frame_rgb: Image to process in rgb format, of shape (H, W, 3)
98 | :return: Processed image in rgb format, of shape (H, W, 3)
99 | """
100 | if self.mode_warm.GetValue():
101 | return self._render_warm(frame_rgb)
102 | elif self.mode_cool.GetValue():
103 | return self._render_cool(frame_rgb)
104 | elif self.mode_sketch.GetValue():
105 | return pencil_sketch_on_canvas(frame_rgb, canvas=self.canvas)
106 | elif self.mode_cartoon.GetValue():
107 | return cartoonize(frame_rgb)
108 | else:
109 | raise NotImplementedError()
110 |
111 |
112 | def main():
113 | # open webcam
114 | capture = cv2.VideoCapture(0)
115 | # opening the channel ourselves, if it failed to open.
116 | if not(capture.isOpened()):
117 | capture.open()
118 |
119 | capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
120 | capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
121 |
122 | # start graphical user interface
123 | app = wx.App()
124 | layout = FilterLayout(capture, title='Fun with Filters')
125 | layout.Center()
126 | layout.Show()
127 | app.MainLoop()
128 |
129 |
130 | if __name__ == '__main__':
131 | main()
132 |
--------------------------------------------------------------------------------
/chapter1/pencilsketch_bg.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter1/pencilsketch_bg.jpg
--------------------------------------------------------------------------------
/chapter1/tools.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | from functools import lru_cache
4 | from scipy.interpolate import UnivariateSpline
5 | from typing import Tuple
6 |
7 |
8 | def spline_to_lookup_table(spline_breaks: list, break_values: list):
9 | spl = UnivariateSpline(spline_breaks, break_values)
10 | return spl(range(256))
11 |
12 |
13 | def apply_rgb_filters(rgb_image, *,
14 | red_filter=None, green_filter=None, blue_filter=None):
15 | c_r, c_g, c_b = cv2.split(rgb_image)
16 | if red_filter is not None:
17 | c_r = cv2.LUT(c_r, red_filter).astype(np.uint8)
18 | if green_filter is not None:
19 | c_g = cv2.LUT(c_g, green_filter).astype(np.uint8)
20 | if blue_filter is not None:
21 | c_b = cv2.LUT(c_b, blue_filter).astype(np.uint8)
22 | return cv2.merge((c_r, c_g, c_b))
23 |
24 |
25 | def apply_hue_filter(rgb_image, hue_filter):
26 | c_h, c_s, c_v = cv2.split(cv2.cvtColor(rgb_image, cv2.COLOR_RGB2HSV))
27 | c_s = cv2.LUT(c_s, hue_filter).astype(np.uint8)
28 | return cv2.cvtColor(cv2.merge((c_h, c_s, c_v)), cv2.COLOR_HSV2RGB)
29 |
30 |
31 | @lru_cache(maxsize=32)
32 | def load_img_resized(path: str, dimensions: Tuple[int]):
33 | img = cv2.imread(path)
34 | if img is None:
35 | return
36 | return cv2.resize(img, dimensions)
37 |
38 |
39 | def cartoonize(rgb_image, *,
40 | num_pyr_downs=2, num_bilaterals=7):
41 | # STEP 1 -- Apply a bilateral filter to reduce the color palette of
42 | # the image.
43 | downsampled_img = rgb_image
44 | for _ in range(num_pyr_downs):
45 | downsampled_img = cv2.pyrDown(downsampled_img)
46 |
47 | for _ in range(num_bilaterals):
48 | filterd_small_img = cv2.bilateralFilter(downsampled_img, 9, 9, 7)
49 |
50 | filtered_normal_img = filterd_small_img
51 | for _ in range(num_pyr_downs):
52 | filtered_normal_img = cv2.pyrUp(filtered_normal_img)
53 |
54 | # make sure resulting image has the same dims as original
55 | if filtered_normal_img.shape != rgb_image.shape:
56 | filtered_normal_img = cv2.resize(
57 | filtered_normal_img, rgb_image.shape[:2])
58 |
59 | # STEP 2 -- Convert the original color image into grayscale.
60 | img_gray = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY)
61 | # STEP 3 -- Apply amedian blur to reduce image noise.
62 | img_blur = cv2.medianBlur(img_gray, 7)
63 |
64 | # STEP 4 -- Use adaptive thresholding to detect and emphasize the edges
65 | # in an edge mask.
66 | gray_edges = cv2.adaptiveThreshold(img_blur, 255,
67 | cv2.ADAPTIVE_THRESH_MEAN_C,
68 | cv2.THRESH_BINARY, 9, 2)
69 | # STEP 5 -- Combine the color image from step 1 with the edge mask
70 | # from step 4.
71 | rgb_edges = cv2.cvtColor(gray_edges, cv2.COLOR_GRAY2RGB)
72 | return cv2.bitwise_and(filtered_normal_img, rgb_edges)
73 |
74 |
75 | def convert_to_pencil_sketch_ordered(rgb_image):
76 | gray_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY)
77 | inv_gray = 255 - gray_image
78 | blurred_image = cv2.GaussianBlur(inv_gray, (21, 21), 0, 0)
79 | gray_sketch = cv2.divide(gray_image, 255 - blurred_image, scale=256)
80 | return cv2.cvtColor(gray_sketch, cv2.COLOR_GRAY2RGB)
81 |
82 |
83 | def convert_to_pencil_sketch(rgb_image):
84 | gray_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY)
85 | blurred_image = cv2.GaussianBlur(gray_image, (21, 21), 0, 0)
86 | gray_sketch = cv2.divide(gray_image, blurred_image, scale=256)
87 | return cv2.cvtColor(gray_sketch, cv2.COLOR_GRAY2RGB)
88 |
89 |
90 | def pencil_sketch_on_canvas(rgb_image, canvas=None):
91 | gray_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY)
92 | blurred_image = cv2.GaussianBlur(gray_image, (21, 21), 0, 0)
93 | gray_sketch = cv2.divide(gray_image, blurred_image, scale=256)
94 | if canvas is not None:
95 | gray_sketch = cv2.multiply(gray_sketch, canvas, scale=1 / 256)
96 | return cv2.cvtColor(gray_sketch, cv2.COLOR_GRAY2RGB)
97 |
98 |
99 | def dodge(image, mask):
100 | print(image.dtype, mask.dtype)
101 | return cv2.divide(image, 255 - mask, scale=256)
102 |
103 |
104 | def dodge_naive(image, mask):
105 | # determine the shape of the input image
106 | width, height = image.shape[:2]
107 |
108 | # prepare output argument with same size as image
109 | blend = np.zeros((width, height), np.uint8)
110 |
111 | for c in range(width):
112 | for r in range(height):
113 |
114 | # shift image pixel value by 8 bits
115 | # divide by the inverse of the mask
116 | result = (image[c, r] << 8) / (255 - mask[c, r])
117 |
118 | # make sure resulting value stays within bounds
119 | blend[c, r] = min(255, result)
120 | return blend
121 |
--------------------------------------------------------------------------------
/chapter1/wx_gui.py:
--------------------------------------------------------------------------------
1 | ../wx_gui.py
--------------------------------------------------------------------------------
/chapter10/classes.py:
--------------------------------------------------------------------------------
1 | CLASSES_90 = ["background", "person", "bicycle", "car", "motorcycle",
2 | "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
3 | "unknown", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
4 | "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "unknown", "backpack",
5 | "umbrella", "unknown", "unknown", "handbag", "tie", "suitcase", "frisbee", "skis",
6 | "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
7 | "surfboard", "tennis racket", "bottle", "unknown", "wine glass", "cup", "fork", "knife",
8 | "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog",
9 | "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "unknown", "dining table",
10 | "unknown", "unknown", "toilet", "unknown", "tv", "laptop", "mouse", "remote", "keyboard",
11 | "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "unknown",
12 | "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ]
13 |
--------------------------------------------------------------------------------
/chapter10/demo.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter10/demo.mp4
--------------------------------------------------------------------------------
/chapter10/frozen_inference_graph.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter10/frozen_inference_graph.pb
--------------------------------------------------------------------------------
/chapter10/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import cv2
4 | import numpy as np
5 |
6 | from classes import CLASSES_90
7 | from sort import Sort
8 |
9 |
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument("-i", "--input",
12 | help="Video path, stream URI, or camera ID ", default="demo.mp4")
13 | parser.add_argument("-t", "--threshold", type=float, default=0.3,
14 | help="Minimum score to consider")
15 | parser.add_argument("-m", "--mode", choices=['detection', 'tracking'], default="tracking",
16 | help="Either detection or tracking mode")
17 |
18 | args = parser.parse_args()
19 |
20 | if args.input.isdigit():
21 | args.input = int(args.input)
22 |
23 |
24 | TRACKED_CLASSES = ["car", "person"]
25 | BOX_COLOR = (23, 230, 210)
26 | TEXT_COLOR = (255, 255, 255)
27 | INPUT_SIZE = (300, 300)
28 |
29 | # Read SSD model
30 | config = "./ssd_mobilenet_v1_coco_2017_11_17.pbtxt.txt"
31 | model = "frozen_inference_graph.pb"
32 | detector = cv2.dnn.readNetFromTensorflow(model, config)
33 |
34 |
35 | def illustrate_box(image: np.ndarray, box: np.ndarray, caption: str) -> None:
36 | rows, cols = frame.shape[:2]
37 | points = box.reshape((2, 2)) * np.array([cols, rows])
38 | p1, p2 = points.astype(np.int32)
39 | cv2.rectangle(image, tuple(p1), tuple(p2), BOX_COLOR, thickness=4)
40 | cv2.putText(
41 | image,
42 | caption,
43 | tuple(p1),
44 | cv2.FONT_HERSHEY_SIMPLEX,
45 | 0.75,
46 | TEXT_COLOR,
47 | 2)
48 |
49 |
50 | def illustrate_detections(dets: np.ndarray, frame: np.ndarray) -> np.ndarray:
51 | class_ids, scores, boxes = dets[:, 0], dets[:, 1], dets[:, 2:6]
52 | for class_id, score, box in zip(class_ids, scores, boxes):
53 | illustrate_box(frame, box, f"{CLASSES_90[int(class_id)]} {score:.2f}")
54 | return frame
55 |
56 |
57 | def illustrate_tracking_info(frame: np.ndarray) -> np.ndarray:
58 | for num, (class_id, tracker) in enumerate(trackers.items()):
59 | txt = f"{CLASSES_90[class_id]}:Total:{tracker.count} Now:{len(tracker.trackers)}"
60 | cv2.putText(frame, txt, (0, 50 * (num + 1)),
61 | cv2.FONT_HERSHEY_SIMPLEX, 0.75, TEXT_COLOR, 2)
62 | return frame
63 |
64 |
65 | trackers = {CLASSES_90.index(tracked_class): Sort()
66 | for tracked_class in TRACKED_CLASSES}
67 |
68 |
69 | def track(dets: np.ndarray,
70 | illustration_frame: np.ndarray = None) -> np.ndarray:
71 | for class_id, tracker in trackers.items():
72 | class_dets = dets[dets[:, 0] == class_id]
73 | # Retuns [box..,id]
74 | sort_boxes = tracker.update(class_dets[:, 2:6])
75 | if illustration_frame is not None:
76 | for box in sort_boxes:
77 | illustrate_box(illustration_frame,
78 | box[:4],
79 | f"{CLASSES_90[class_id]} {int(box[4])}")
80 |
81 | return illustration_frame
82 |
83 |
84 | cap = cv2.VideoCapture(args.input)
85 |
86 | for res, frame in iter(cap.read, (False, None)):
87 | detector.setInput(
88 | cv2.dnn.blobFromImage(
89 | frame,
90 | size=INPUT_SIZE,
91 | swapRB=True,
92 | crop=False))
93 | detections = detector.forward()[0, 0, :, 1:]
94 | scores = detections[:, 1]
95 | detections = detections[scores > 0.3]
96 | if args.mode == "detection":
97 | out = illustrate_detections(detections, frame)
98 | else:
99 | out = track(detections, frame)
100 | illustrate_tracking_info(out)
101 | cv2.imshow("out", out)
102 | if cv2.waitKey(1) == 27:
103 | cv2.waitKey(0)
104 | # exit()
105 |
--------------------------------------------------------------------------------
/chapter10/sort.py:
--------------------------------------------------------------------------------
1 | """
2 | SORT: A Simple, Online and Realtime Tracker
3 | Copyright (C) 2016 Alex Bewley alex@dynamicdetection.com
4 |
5 | This program is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | This program is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with this program. If not, see .
17 | """
18 |
19 | import numpy as np
20 | from scipy.optimize import linear_sum_assignment
21 | from typing import Tuple
22 | import cv2
23 |
24 |
25 | def convert_bbox_to_z(bbox):
26 | """
27 | Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
28 | [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
29 | the aspect ratio
30 | """
31 | w, h = bbox[2:4] - bbox[0:2]
32 | x, y = (bbox[0:2] + bbox[2:4]) / 2
33 | s = w * h # scale is just area
34 | r = w / h
35 | return np.array([x, y, s, r])[:, None].astype(np.float64)
36 |
37 |
38 | def convert_x_to_bbox(x):
39 | """
40 | Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
41 | [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
42 | """
43 | # Shape of x is (7, 1)
44 | x = x[:, 0]
45 | center = x[0:2]
46 | w = np.sqrt(x[2] * x[3])
47 | h = x[2] / w
48 | half_size = np.array([w, h]) / 2
49 | bbox = np.concatenate((center - half_size, center + half_size))
50 | return bbox.astype(np.float64)
51 |
52 |
53 | class KalmanBoxTracker:
54 | """
55 | This class represents the internel state of individual tracked objects observed as bbox.
56 | """
57 |
58 | def __init__(self, bbox, label):
59 | self.id = label
60 | self.time_since_update = 0
61 | self.hit_streak = 0
62 |
63 | self.kf = cv2.KalmanFilter(dynamParams=7, measureParams=4, type=cv2.CV_64F)
64 |
65 | # define constant velocity model
66 | self.kf.transitionMatrix = np.array(
67 | [[1, 0, 0, 0, 1, 0, 0],
68 | [0, 1, 0, 0, 0, 1, 0],
69 | [0, 0, 1, 0, 0, 0, 1],
70 | [0, 0, 0, 1, 0, 0, 0],
71 | [0, 0, 0, 0, 1, 0, 0],
72 | [0, 0, 0, 0, 0, 1, 0],
73 | [0, 0, 0, 0, 0, 0, 1]], dtype=np.float64)
74 | self.kf.processNoiseCov = np.diag([10, 10, 10, 10, 1e4, 1e4, 1e4]).astype(np.float64)
75 |
76 | # We only observe
77 | self.kf.measurementMatrix = np.array(
78 | [[1, 0, 0, 0, 0, 0, 0],
79 | [0, 1, 0, 0, 0, 0, 0],
80 | [0, 0, 1, 0, 0, 0, 0],
81 | [0, 0, 0, 1, 0, 0, 0]], dtype=np.float64)
82 | self.kf.measurementNoiseCov = np.diag([1, 1, 10, 10]).astype(np.float64)
83 |
84 | # Start the particle at their initial position with 0 velocities.
85 | self.kf.statePost = np.vstack((convert_bbox_to_z(bbox), [[0], [0], [0]]))
86 | self.kf.errorCovPost = np.diag([1, 1, 1, 1, 1e-2, 1e-2, 1e-4]).astype(np.float64)
87 |
88 | def update(self, bbox):
89 | """
90 | Updates the state vector with observed bbox.
91 | """
92 | self.time_since_update = 0
93 | self.hit_streak += 1
94 |
95 | self.kf.correct(convert_bbox_to_z(bbox))
96 |
97 | def predict(self):
98 | """
99 | Advances the state vector and returns the predicted bounding box estimate.
100 | """
101 | if self.time_since_update > 0:
102 | self.hit_streak = 0
103 | self.time_since_update += 1
104 |
105 | retval = self.kf.predict()
106 | return convert_x_to_bbox(retval)
107 |
108 | @property
109 | def current_state(self):
110 | """
111 | Returns the current bounding box estimate.
112 | """
113 | return convert_x_to_bbox(self.kf.statePost)
114 |
115 |
116 | def iou(a: np.ndarray, b: np.ndarray) -> float:
117 | """
118 | Computes IUO between two bboxes in the form [x1,y1,x2,y2]
119 | """
120 | a_tl, a_br = a[:4].reshape((2, 2))
121 | b_tl, b_br = b[:4].reshape((2, 2))
122 | int_tl = np.maximum(a_tl, b_tl)
123 | int_br = np.minimum(a_br, b_br)
124 | int_area = np.product(np.maximum(0., int_br - int_tl))
125 | a_area = np.product(a_br - a_tl)
126 | b_area = np.product(b_br - b_tl)
127 | return int_area / (a_area + b_area - int_area)
128 |
129 |
130 | def associate_detections_to_trackers(detections: np.ndarray, trackers: np.ndarray,
131 | iou_threshold: float = 0.3) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
132 | """
133 | Assigns detections to tracked object (both represented as bounding boxes)
134 |
135 | Returns 3 lists of matches, unmatched_detections and unmatched_trackers
136 | """
137 | iou_matrix = np.zeros((len(detections), len(trackers)), dtype=np.float64)
138 |
139 | for d, det in enumerate(detections):
140 | for t, trk in enumerate(trackers):
141 | iou_matrix[d, t] = iou(det, trk)
142 |
143 | row_ind, col_ind = linear_sum_assignment(-iou_matrix)
144 | matched_indices = np.transpose(np.array([row_ind, col_ind]))
145 |
146 | iou_values = np.array([iou_matrix[detection, tracker]
147 | for detection, tracker in matched_indices])
148 | good_matches = matched_indices[iou_values > 0.3]
149 | unmatched_detections = np.array(
150 | [i for i in range(len(detections)) if i not in good_matches[:, 0]])
151 | unmatched_trackers = np.array(
152 | [i for i in range(len(trackers)) if i not in good_matches[:, 1]])
153 | return good_matches, unmatched_detections, unmatched_trackers
154 |
155 |
156 | class Sort:
157 | def __init__(self, max_age=10, min_hits=6):
158 | """
159 | Sets key parameters for SORT
160 | """
161 | self.max_age = max_age
162 | self.min_hits = min_hits
163 | self.trackers = []
164 | self.count = 0
165 |
166 | def next_id(self):
167 | self.count += 1
168 | return self.count
169 |
170 | def update(self, dets):
171 | """
172 | Params:
173 | dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
174 | Requires: this method must be called once for each frame even with empty detections.
175 | Returns the a similar array, where the last column is the object ID.
176 |
177 | NOTE: The number of objects returned may differ from the number of detections provided.
178 | """
179 | # Predict new locations and remove trakcers with nans.
180 | self.trackers = [
181 | tracker for tracker in self.trackers if not np.any(
182 | np.isnan(
183 | tracker.predict()))]
184 | # get predicted locations
185 | trks = np.array([tracker.current_state for tracker in self.trackers])
186 |
187 | matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(
188 | dets, trks)
189 |
190 | # update matched trackers with assigned detections
191 | for detection_num, tracker_num in matched:
192 | self.trackers[tracker_num].update(dets[detection_num])
193 |
194 | # create and initialise new trackers for unmatched detections
195 | for i in unmatched_dets:
196 | self.trackers.append(KalmanBoxTracker(dets[i, :], self.next_id()))
197 |
198 | ret = np.array([np.concatenate((trk.current_state, [trk.id + 1]))
199 | for trk in self.trackers
200 | if trk.time_since_update < 1 and trk.hit_streak >= self.min_hits])
201 | # remove dead tracklet
202 | self.trackers = [
203 | tracker for tracker in self.trackers if tracker.time_since_update <= self.max_age]
204 | return ret
205 |
--------------------------------------------------------------------------------
/chapter2/chapter2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """OpenCV with Python Blueprints
5 | Chapter 2: Hand Gesture Recognition Using a Kinect Depth Sensor
6 |
7 | An app to detect and track simple hand gestures in real-time using the
8 | output of a Microsoft Kinect 3D Sensor.
9 | """
10 |
11 |
12 |
13 | import cv2
14 | import numpy as np
15 | from typing import Tuple
16 | from gestures import recognize
17 | from frame_reader import read_frame
18 |
19 |
20 | def draw_helpers(img_draw: np.ndarray) -> None:
21 | # draw some helpers for correctly placing hand
22 | height, width = img_draw.shape[:2]
23 | color = (0,102,255)
24 | cv2.circle(img_draw, (width // 2, height // 2), 3, color, 2)
25 | cv2.rectangle(img_draw, (width // 3, height // 3),
26 | (width * 2 // 3, height * 2 // 3), color, 2)
27 |
28 |
29 |
30 | def main():
31 | for _, frame in iter(read_frame, (False, None)):
32 | num_fingers, img_draw = recognize(frame)
33 | # draw some helpers for correctly placing hand
34 | draw_helpers(img_draw)
35 | # print number of fingers on image
36 | cv2.putText(img_draw, str(num_fingers), (30, 30),
37 | cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255))
38 | cv2.imshow("frame", img_draw)
39 | # Exit on escape
40 | if cv2.waitKey(10) == 27:
41 | break
42 |
43 |
44 | if __name__ == '__main__':
45 | main()
46 |
--------------------------------------------------------------------------------
/chapter2/frame_reader.py:
--------------------------------------------------------------------------------
1 | import freenect
2 | import numpy as np
3 | from typing import Tuple
4 |
5 | def read_frame() -> Tuple[bool,np.ndarray]:
6 | frame, timestamp = freenect.sync_get_depth()
7 | if frame is None:
8 | return False, None
9 | frame = np.clip(frame, 0, 2**10 - 1)
10 | frame >>= 2
11 | return True, frame.astype(np.uint8)
12 |
--------------------------------------------------------------------------------
/chapter2/gestures.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """A module containing an algorithm for hand gesture recognition"""
5 |
6 | import numpy as np
7 | import cv2
8 | from typing import Tuple
9 |
10 | __author__ = "Michael Beyeler"
11 | __license__ = "GNU GPL 3.0 or later"
12 |
13 | def recognize(img_gray):
14 | """Recognizes hand gesture in a single-channel depth image
15 |
16 | This method estimates the number of extended fingers based on
17 | a single-channel depth image showing a hand and arm region.
18 | :param img_gray: single-channel depth image
19 | :returns: (num_fingers, img_draw) The estimated number of
20 | extended fingers and an annotated RGB image
21 | """
22 |
23 | # segment arm region
24 | segment = segment_arm(img_gray)
25 |
26 | # find the hull of the segmented area, and based on that find the
27 | # convexity defects
28 | (contour, defects) = find_hull_defects(segment)
29 |
30 | # detect the number of fingers depending on the contours and convexity
31 | # defects, then draw defects that belong to fingers green, others red
32 | img_draw = cv2.cvtColor(segment, cv2.COLOR_GRAY2RGB)
33 | (num_fingers, img_draw) = detect_num_fingers(contour,
34 | defects, img_draw)
35 |
36 | return (num_fingers, img_draw)
37 |
38 |
39 | def segment_arm(frame: np.ndarray, abs_depth_dev: int = 14) -> np.ndarray:
40 | """Segments arm region
41 |
42 | This method accepts a single-channel depth image of an arm and
43 | hand region and extracts the segmented arm region.
44 | It is assumed that the hand is placed in the center of the image.
45 | :param frame: single-channel depth image
46 | :returns: binary image (mask) of segmented arm region, where
47 | arm=255, else=0
48 | """
49 | height, width = frame.shape
50 | # find center (21x21 pixel) region of imageheight frame
51 | center_half = 10 # half-width of 21 is 21/2-1
52 | center = frame[height // 2 - center_half:height // 2 + center_half,
53 | width // 2 - center_half:width // 2 + center_half]
54 |
55 | # find median depth value of center region
56 | med_val = np.median(center)
57 |
58 | # try this instead:
59 | frame = np.where(abs(frame - med_val) <= abs_depth_dev,
60 | 128, 0).astype(np.uint8)
61 |
62 | # morphological
63 | kernel = np.ones((3, 3), np.uint8)
64 | frame = cv2.morphologyEx(frame, cv2.MORPH_CLOSE, kernel)
65 |
66 | # connected component
67 | small_kernel = 3
68 | frame[height // 2 - small_kernel:height // 2 + small_kernel,
69 | width // 2 - small_kernel:width // 2 + small_kernel] = 128
70 |
71 | mask = np.zeros((height + 2, width + 2), np.uint8)
72 | flood = frame.copy()
73 | cv2.floodFill(flood, mask, (width // 2, height // 2), 255,
74 | flags=4 | (255 << 8))
75 |
76 | ret, flooded = cv2.threshold(flood, 129, 255, cv2.THRESH_BINARY)
77 | return flooded
78 |
79 |
80 | def find_hull_defects(segment: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
81 | """Find hull defects
82 |
83 | This method finds all defects in the hull of a segmented arm
84 | region.
85 | :param segment: a binary image (mask) of a segmented arm region,
86 | where arm=255, else=0
87 | :returns: (max_contour, defects) the largest contour in the image
88 | and all corresponding defects
89 | """
90 | contours, hierarchy = cv2.findContours(segment, cv2.RETR_TREE,
91 | cv2.CHAIN_APPROX_SIMPLE)
92 |
93 | # find largest area contour
94 | max_contour = max(contours, key=cv2.contourArea)
95 | epsilon = 0.01 * cv2.arcLength(max_contour, True)
96 | max_contour = cv2.approxPolyDP(max_contour, epsilon, True)
97 |
98 | # find convexity hull and defects
99 | hull = cv2.convexHull(max_contour, returnPoints=False)
100 | defects = cv2.convexityDefects(max_contour, hull)
101 |
102 | return max_contour, defects
103 |
104 |
105 |
106 | def detect_num_fingers(contour: np.ndarray, defects: np.ndarray,
107 | img_draw: np.ndarray, thresh_deg: float = 80.0) -> Tuple[int, np.ndarray]:
108 | """Detects the number of extended fingers
109 |
110 | This method determines the number of extended fingers based on a
111 | contour and convexity defects.
112 | It will annotate an RGB color image of the segmented arm region
113 | with all relevant defect points and the hull.
114 | :param contours: a list of contours
115 | :param defects: a list of convexity defects
116 | :param img_draw: an RGB color image to be annotated
117 | :returns: (num_fingers, img_draw) the estimated number of extended
118 | fingers and an annotated RGB color image
119 | """
120 |
121 | # if there are no convexity defects, possibly no hull found or no
122 | # fingers extended
123 | if defects is None:
124 | return [0, img_draw]
125 |
126 | # we assume the wrist will generate two convexity defects (one on each
127 | # side), so if there are no additional defect points, there are no
128 | # fingers extended
129 | if len(defects) <= 2:
130 | return [0, img_draw]
131 |
132 | # if there is a sufficient amount of convexity defects, we will find a
133 | # defect point between two fingers so to get the number of fingers,
134 | # start counting at 1
135 | num_fingers = 1
136 | # Defects are of shape (num_defects,1,4)
137 | for defect in defects[:, 0, :]:
138 | # Each defect is an array of four integers.
139 | # First three indexes of start, end and the furthest
140 | # points respectively
141 | # contour is of shape (num_points,1,2) - 2 for point coordinates
142 | start, end, far = [contour[i][0] for i in defect[:3]]
143 | # draw the hull
144 | cv2.line(img_draw, tuple(start), tuple(end), (0, 255, 0), 2)
145 |
146 | # if angle is below a threshold, defect point belongs to two
147 | # extended fingers
148 | if angle_rad(start - far, end - far) < deg2rad(thresh_deg):
149 | # increment number of fingers
150 | num_fingers += 1
151 |
152 | # draw point as green
153 | cv2.circle(img_draw, tuple(far), 5, (0, 255, 0), -1)
154 | else:
155 | # draw point as red
156 | cv2.circle(img_draw, tuple(far), 5, (0, 0, 255), -1)
157 |
158 | # make sure we cap the number of fingers
159 | return min(5, num_fingers), img_draw
160 |
161 |
162 | def angle_rad(v1, v2):
163 | """Angle in radians between two vectors
164 |
165 | This method returns the angle (in radians) between two array-like
166 | vectors using the cross-product method, which is more accurate for
167 | small angles than the dot-product-acos method.
168 | """
169 | return np.arctan2(np.linalg.norm(np.cross(v1, v2)), np.dot(v1, v2))
170 |
171 |
172 |
173 |
174 | def deg2rad(angle_deg):
175 | """Convert degrees to radians
176 |
177 | This method converts an angle in radians e[0,2*np.pi) into degrees
178 | e[0,360)
179 | """
180 | return angle_deg / 180.0 * np.pi
181 |
--------------------------------------------------------------------------------
/chapter3/chapter3.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """OpenCV with Python Blueprints
5 | Chapter 3: Finding Objects Via Feature Matching and Perspective Transforms
6 |
7 | An app to detect and track an object of interest in the video stream of a
8 | webcam, even if the object is viewed at different angles, distances, or
9 | under partial occlusion.
10 | """
11 |
12 | import cv2
13 | from feature_matching import FeatureMatching
14 |
15 |
16 | def main():
17 | capture = cv2.VideoCapture(0)
18 | assert capture.isOpened(), "Cannot connect to camera"
19 |
20 | capture.set(cv2.CAP_PROP_FPS, 5)
21 | capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
22 | capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
23 |
24 | train_img = cv2.imread('train.png', cv2.CV_8UC1)
25 | matching = FeatureMatching(train_img)
26 |
27 | for success, frame in iter(capture.read, (False, None)):
28 | cv2.imshow("frame", frame)
29 | match_succsess, img_warped, img_flann = matching.match(frame)
30 | if match_succsess:
31 | cv2.imshow("res", img_warped)
32 | cv2.imshow("flann", img_flann)
33 | if cv2.waitKey(1) & 0xff == 27:
34 | break
35 |
36 |
37 | if __name__ == '__main__':
38 | main()
39 |
--------------------------------------------------------------------------------
/chapter3/feature_matching.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """A module containing an algorithm for feature matching"""
5 |
6 | import numpy as np
7 | import cv2
8 | from typing import Tuple, Optional, List, Sequence
9 | Point = Tuple[float, float]
10 |
11 | __author__ = "Michael Beyeler"
12 | __license__ = "GNU GPL 3.0 or later"
13 |
14 | cv2.cornerHarris
15 |
16 |
17 | class Outlier(Exception):
18 | pass
19 |
20 |
21 | class FeatureMatching:
22 | """Feature matching class
23 |
24 | This class implements an algorithm for feature matching and tracking.
25 |
26 | A SURF descriptor is obtained from a training or template image
27 | (train_image) that shows the object of interest from the front and
28 | upright.
29 |
30 | The algorithm will then search for this object in every image frame
31 | passed to the method FeatureMatching.match. The matching is performed
32 | with a FLANN based matcher.
33 |
34 | Note: If you want to use this code (including SURF) in a non-commercial
35 | application, you will need to acquire a SURF license.
36 | """
37 |
38 | def __init__(self, train_image: np.ndarray):
39 | """
40 | Initialize the SURF descriptor, FLANN matcher, and the tracking
41 | algorithm.
42 |
43 | :param train_image: training or template image showing the object
44 | of interest
45 | """
46 | # initialize SURF
47 | self.f_extractor = cv2.xfeatures2d_SURF.create(hessianThreshold=400)
48 | # template image: "train" image
49 | # later on compared ot each video frame: "query" image
50 | self.img_obj = train_image
51 |
52 | self.sh_train = self.img_obj.shape[:2]
53 | self.key_train, self.desc_train = \
54 | self.f_extractor.detectAndCompute(self.img_obj, None)
55 |
56 | # initialize FLANN
57 | FLANN_INDEX_KDTREE = 0
58 | index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
59 | search_params = dict(checks=50)
60 | index_params = {"algorithm": 0, "trees": 5}
61 | search_params = {"checks": 50}
62 | self.flann = cv2.FlannBasedMatcher(index_params, search_params)
63 | # self.flann = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_FLANNBASED)
64 | # initialize tracking
65 | self.last_hinv = np.zeros((3, 3))
66 | self.max_error_hinv = 50.
67 | self.num_frames_no_success = 0
68 | self.max_frames_no_success = 5
69 |
70 | def match(self,
71 | frame: np.ndarray) -> Tuple[bool,
72 | Optional[np.ndarray],
73 | Optional[np.ndarray]]:
74 | """Detects and tracks an object of interest in a video frame
75 |
76 | This method detects and tracks an object of interest (of which a
77 | SURF descriptor was obtained upon initialization) in a video frame.
78 | Correspondence is established with a FLANN based matcher.
79 |
80 | The algorithm then applies a perspective transform on the frame in
81 | order to project the object of interest to the frontal plane.
82 |
83 | Outlier rejection is applied to improve the tracking of the object
84 | from frame to frame.
85 |
86 | :param frame: input (query) image in which to detect the object
87 | :returns: (success, frame) whether the detection was successful and
88 | and the perspective-transformed frame
89 | """
90 |
91 | # create a working copy (grayscale) of the frame
92 | # and store its shape for convenience
93 | img_query = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
94 | sh_query = img_query.shape # rows,cols
95 |
96 | # --- feature extraction
97 | # detect keypoints in the query image (video frame)
98 | # using SURF descriptor
99 | # key_query, desc_query = self.f_extractor.detectAndCompute(
100 | # img_query, None)
101 |
102 | key_query = self.f_extractor.detect(
103 | img_query)
104 | key_query, desc_query = self.f_extractor.compute(img_query, key_query)
105 | # img_keypoints = cv2.drawKeypoints(img_query, key_query, None,
106 | # (255, 0, 0), 4)
107 | # cv2.imshow("keypoints",img_keypoints)
108 | # --- feature matching
109 | # returns a list of good matches using FLANN
110 | # based on a scene and its feature descriptor
111 | good_matches = self.match_features(desc_query)
112 | train_points = [self.key_train[good_match.queryIdx].pt
113 | for good_match in good_matches]
114 | query_points = [key_query[good_match.trainIdx].pt
115 | for good_match in good_matches]
116 |
117 | try:
118 | # early outlier detection and rejection
119 | if len(good_matches) < 4:
120 | raise Outlier("Too few matches")
121 |
122 | # --- corner point detection
123 | # calculates the homography matrix needed to convert between
124 | # keypoints from the train image and the query image
125 | dst_corners = detect_corner_points(
126 | train_points, query_points, self.sh_train)
127 | # early outlier detection and rejection
128 | # if any corners lie significantly outside the image, skip frame
129 | if np.any((dst_corners < -20) |
130 | (dst_corners > np.array(sh_query) + 20)):
131 | raise Outlier("Out of image")
132 | # early outlier detection and rejection
133 | # find the area of the quadrilateral that the four corner points
134 | # spans
135 | area = 0
136 | for prev, nxt in zip(dst_corners, np.roll(
137 | dst_corners, -1, axis=0)):
138 | area += (prev[0] * nxt[1] - prev[1] * nxt[0]) / 2.
139 |
140 | # early outlier detection and rejection
141 | # reject corner points if area is unreasonable
142 | if not np.prod(sh_query) / 16. < area < np.prod(sh_query) / 2.:
143 | raise Outlier("Area is unreasonably small or large")
144 |
145 | # --- bring object of interest to frontal plane
146 | train_points_scaled = self.scale_and_offset(
147 | train_points, self.sh_train, sh_query)
148 | Hinv, _ = cv2.findHomography(
149 | np.array(query_points), np.array(train_points_scaled), cv2.RANSAC)
150 | # outlier rejection
151 | # if last frame recent: new Hinv must be similar to last one
152 | # else: accept whatever Hinv is found at this point
153 | similar = np.linalg.norm(
154 | Hinv - self.last_hinv) < self.max_error_hinv
155 | recent = self.num_frames_no_success < self.max_frames_no_success
156 | if recent and not similar:
157 | raise Outlier("Not similar transformation")
158 | except Outlier as e:
159 | self.num_frames_no_success += 1
160 | return False, None, None
161 | else:
162 | # reset counters and update Hinv
163 | self.num_frames_no_success = 0
164 | self.last_h = Hinv
165 | # outline corner points of train image in query image
166 | img_warped = cv2.warpPerspective(
167 | img_query, Hinv, (sh_query[1], sh_query[0]))
168 | img_flann = draw_good_matches(
169 | self.img_obj,
170 | self.key_train,
171 | img_query,
172 | key_query,
173 | good_matches)
174 | # adjust x-coordinate (col) of corner points so that they can be drawn
175 | # next to the train image (add self.sh_train[1])
176 | dst_corners[:, 0] += self.sh_train[1]
177 | cv2.polylines(
178 | img_flann,
179 | [dst_corners.astype(np.int)],
180 | isClosed=True,
181 | color=(0, 255, 0),
182 | thickness=3)
183 | return True, img_warped, img_flann
184 |
185 | def match_features(self, desc_frame: np.ndarray) -> List[cv2.DMatch]:
186 | """Feature matching between train and query image
187 |
188 | This method finds matches between the descriptor of an input
189 | (query) frame and the stored template (train) image.
190 |
191 | The ratio test is applied to distinguish between good matches and
192 | outliers.
193 |
194 | :param desc_frame: descriptor of input (query) image
195 | :returns: list of good matches
196 | """
197 | # find 2 best matches (kNN with k=2)
198 | matches = self.flann.knnMatch(self.desc_train, desc_frame, k=2)
199 | # discard bad matches, ratio test as per Lowe's paper
200 | good_matches = [x[0] for x in matches
201 | if x[0].distance < 0.7 * x[1].distance]
202 | return good_matches
203 |
204 | @staticmethod
205 | def scale_and_offset(points: Sequence[Point],
206 | source_size: Tuple[int, int],
207 | dst_size: Tuple[int, int],
208 | factor: float = 0.5) -> List[Point]:
209 | dst_size = np.array(dst_size)
210 | scale = 1 / np.array(source_size) * dst_size * factor
211 | bias = dst_size * (1 - factor) / 2
212 | return [tuple(np.array(pt) * scale + bias) for pt in points]
213 |
214 |
215 | def detect_corner_points(src_points: Sequence[Point],
216 | dst_points: Sequence[Point],
217 | sh_src: Tuple[int, int]) -> np.ndarray:
218 | """Detects corner points in an input (query) image
219 |
220 | This method finds the homography matrix to go from the template
221 | (train) image to the input (query) image, and finds the coordinates
222 | of the good matches (from the train image) in the query image.
223 |
224 | :param key_frame: keypoints of the query image
225 | :param good_matches: list of good matches
226 | :returns: coordinates of good matches in transformed query image
227 | """
228 |
229 | # find homography using RANSAC
230 | H, _ = cv2.findHomography(np.array(src_points), np.array(dst_points),
231 | cv2.RANSAC)
232 |
233 | if H is None:
234 | raise Outlier("Homography not found")
235 | # outline train image in query image
236 | height, width = sh_src
237 | src_corners = np.array([(0, 0), (width, 0),
238 | (width, height),
239 | (0, height)], dtype=np.float32)
240 | return cv2.perspectiveTransform(src_corners[None, :, :], H)[0]
241 |
242 |
243 | def draw_good_matches(img1: np.ndarray,
244 | kp1: Sequence[cv2.KeyPoint],
245 | img2: np.ndarray,
246 | kp2: Sequence[cv2.KeyPoint],
247 | matches: Sequence[cv2.DMatch]) -> np.ndarray:
248 | """Visualizes a list of good matches
249 |
250 | This function visualizes a list of good matches. It is only required in
251 | OpenCV releases that do not ship with the function drawKeypoints.
252 |
253 | The function draws two images (img1 and img2) side-by-side,
254 | highlighting a list of keypoints in both, and connects matching
255 | keypoints in the two images with blue lines.
256 |
257 | :param img1: first image
258 | :param kp1: list of keypoints for first image
259 | :param img2: second image
260 | :param kp2: list of keypoints for second image
261 | :param matches: list of good matches
262 | :returns: annotated output image
263 | """
264 | # Create a new output image of a size that will fit the two images together
265 | rows1, cols1 = img1.shape[:2]
266 | rows2, cols2 = img2.shape[:2]
267 | out = np.zeros((max([rows1, rows2]), cols1 + cols2, 3), dtype='uint8')
268 |
269 | # Place the first image on the left
270 | out[:rows1, :cols1, :] = img1[..., None]
271 |
272 | # Place the second image to the right of the first image
273 | out[:rows2, cols1:cols1 + cols2, :] = img2[..., None]
274 |
275 | # For each pair of points we have between both images
276 | # draw circles, then connect a line between them
277 | for m in matches:
278 | # Get the matching keypoints for each of the images
279 | # and convert them to int
280 | c1 = tuple(map(int, kp1[m.queryIdx].pt))
281 | c2 = tuple(map(int, kp2[m.trainIdx].pt))
282 | # Shift second center for drawing
283 | c2 = c2[0] + cols1, c2[1]
284 |
285 | radius = 4
286 | BLUE = (255, 0, 0)
287 | thickness = 1
288 | # Draw a small circle at both co-ordinates
289 | cv2.circle(out, c1, radius, BLUE, thickness)
290 | cv2.circle(out, c2, radius, BLUE, thickness)
291 |
292 | # Draw a line in between the two points
293 | cv2.line(out, c1, c2, BLUE, thickness)
294 |
295 | return out
296 |
--------------------------------------------------------------------------------
/chapter3/train.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter3/train.png
--------------------------------------------------------------------------------
/chapter3/wx_gui.py:
--------------------------------------------------------------------------------
1 | ../wx_gui.py
--------------------------------------------------------------------------------
/chapter4/calibrate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """A module for camera calibration using a chessboard"""
5 |
6 |
7 | import cv2
8 | import numpy as np
9 | import wx
10 |
11 | from wx_gui import BaseLayout
12 |
13 |
14 | class CameraCalibration(BaseLayout):
15 | """Camera calibration
16 |
17 | Performs camera calibration on a webcam video feed using
18 | the chessboard approach described here:
19 | http://docs.opencv.org/doc/tutorials/calib3d/camera_calibration/camera_calibration.html
20 | """
21 |
22 | def augment_layout(self):
23 | pnl = wx.Panel(self, -1)
24 | self.button_calibrate = wx.Button(pnl, label='Calibrate Camera')
25 | self.Bind(wx.EVT_BUTTON, self._on_button_calibrate)
26 | hbox = wx.BoxSizer(wx.HORIZONTAL)
27 | hbox.Add(self.button_calibrate)
28 | pnl.SetSizer(hbox)
29 |
30 | self.panels_vertical.Add(pnl, flag=wx.EXPAND | wx.BOTTOM | wx.TOP,
31 | border=1)
32 |
33 | # setting chessboard size (size of grid - 1)
34 | # (7,7) for the standard chessboard
35 | self.chessboard_size = (7, 7)
36 |
37 | # prepare object points
38 | self.objp = np.zeros((np.prod(self.chessboard_size), 3),
39 | dtype=np.float32)
40 | self.objp[:, :2] = np.mgrid[0:self.chessboard_size[0],
41 | 0:self.chessboard_size[1]].T.reshape(-1, 2)
42 |
43 | # prepare recording
44 | self.recording = False
45 | self.record_min_num_frames = 15
46 | self._reset_recording()
47 |
48 | def process_frame(self, frame):
49 | """Processes each frame
50 |
51 | If recording mode is on (self.recording==True), this method will
52 | perform all the hard work of the camera calibration process:
53 | - for every frame, until enough frames have been processed:
54 | - find the chessboard corners
55 | - refine the coordinates of the detected corners
56 | - after enough frames have been processed:
57 | - estimate the intrinsic camera matrix and distortion
58 | coefficients
59 |
60 | :param frame: current RGB video frame
61 | :returns: annotated video frame showing detected chessboard corners
62 | """
63 | # if we are not recording, just display the frame
64 | if not self.recording:
65 | return frame
66 |
67 | # else we're recording
68 | img_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY).astype(np.uint8)
69 | if self.record_cnt < self.record_min_num_frames:
70 | # need at least some number of chessboard samples before we can
71 | # calculate the intrinsic matrix
72 |
73 | ret, corners = cv2.findChessboardCorners(img_gray,
74 | self.chessboard_size,
75 | None)
76 | if ret:
77 | print(f"{self.record_min_num_frames - self.record_cnt} chessboards remain")
78 | cv2.drawChessboardCorners(frame, self.chessboard_size, corners, ret)
79 |
80 | # refine found corners
81 | criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER,
82 | 30, 0.01)
83 | cv2.cornerSubPix(img_gray, corners, (9, 9), (-1, -1), criteria)
84 |
85 | self.obj_points.append(self.objp)
86 | self.img_points.append(corners)
87 | self.record_cnt += 1
88 |
89 | else:
90 | # we have already collected enough frames, so now we want to
91 | # calculate the intrinsic camera matrix (K) and the distortion
92 | # vector (dist)
93 | print("Calibrating...")
94 | ret, K, dist, rvecs, tvecs = cv2.calibrateCamera(self.obj_points,
95 | self.img_points,
96 | (self.imgHeight,
97 | self.imgWidth),
98 | None, None)
99 | print("K=", K)
100 | print("dist=", dist)
101 |
102 | # double-check reconstruction error (should be as close to zero as
103 | # possible)
104 | mean_error = 0
105 | for obj_point, rvec, tvec, img_point in zip(
106 | self.obj_points, rvecs, tvecs, self.img_points):
107 | img_points2, _ = cv2.projectPoints(
108 | obj_point, rvec, tvec, K, dist)
109 | error = cv2.norm(img_point, img_points2,
110 | cv2.NORM_L2) / len(img_points2)
111 | mean_error += error
112 |
113 | print("mean error=", mean_error)
114 |
115 | self.recording = False
116 | self._reset_recording()
117 | self.button_calibrate.Enable()
118 | return frame
119 |
120 | def _on_button_calibrate(self, event):
121 | """Enable recording mode upon pushing the button"""
122 | self.button_calibrate.Disable()
123 | self.recording = True
124 | self._reset_recording()
125 |
126 | def _reset_recording(self):
127 | """Disable recording mode and reset data structures"""
128 | self.record_cnt = 0
129 | self.obj_points = []
130 | self.img_points = []
131 |
132 |
133 | def main():
134 | capture = cv2.VideoCapture(0)
135 | assert capture.isOpened(), "Can not connect to camera"
136 | capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
137 | capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
138 |
139 | # start graphical user interface
140 | app = wx.App()
141 | layout = CameraCalibration(capture, title='Camera Calibration', fps=2)
142 | layout.Show(True)
143 | app.MainLoop()
144 |
145 |
146 | if __name__ == '__main__':
147 | main()
148 |
--------------------------------------------------------------------------------
/chapter4/chapter4.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | OpenCV with Python Blueprints
6 | Chapter 4: 3D Scene Reconstruction Using Structure From Motion
7 |
8 | An app to detect and extract structure from motion on a pair of images
9 | using stereo vision. We will assume that the two images have been taken
10 | with the same camera, of which we know the internal camera parameters. If
11 | these parameters are not known, use calibrate.py to estimate them.
12 |
13 | The result is a point cloud that shows the 3D real-world coordinates
14 | of points in the scene.
15 | """
16 |
17 | import numpy as np
18 |
19 | from scene3D import SceneReconstruction3D
20 |
21 |
22 | def main():
23 | # camera matrix and distortion coefficients
24 | # can be recovered with calibrate.py
25 | # but the examples used here are already undistorted, taken with a camera
26 | # of known K
27 | K = np.array([[2759.48 / 4, 0, 1520.69 / 4, 0, 2764.16 / 4,
28 | 1006.81 / 4, 0, 0, 1]]).reshape(3, 3)
29 | d = np.array([0.0, 0.0, 0.0, 0.0, 0.0]).reshape(1, 5)
30 | scene = SceneReconstruction3D(K, d)
31 |
32 | # load a pair of images for which to perform SfM
33 | scene.load_image_pair("fountain_dense/0004.png", "fountain_dense/0005.png")
34 |
35 | # draw 3D point cloud of fountain
36 | # use "pan axes" button in pyplot to inspect the cloud (rotate and zoom
37 | # to convince you of the result)
38 | # scene.draw_epipolar_lines()
39 | # scene.plot_rectified_images()
40 | scene.plot_optic_flow()
41 | scene.plot_point_cloud()
42 |
43 |
44 | if __name__ == '__main__':
45 | main()
46 |
--------------------------------------------------------------------------------
/chapter4/fountain_dense/0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0000.png
--------------------------------------------------------------------------------
/chapter4/fountain_dense/0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0001.png
--------------------------------------------------------------------------------
/chapter4/fountain_dense/0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0002.png
--------------------------------------------------------------------------------
/chapter4/fountain_dense/0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0003.png
--------------------------------------------------------------------------------
/chapter4/fountain_dense/0004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0004.png
--------------------------------------------------------------------------------
/chapter4/fountain_dense/0005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0005.png
--------------------------------------------------------------------------------
/chapter4/fountain_dense/0006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0006.png
--------------------------------------------------------------------------------
/chapter4/fountain_dense/0007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0007.png
--------------------------------------------------------------------------------
/chapter4/fountain_dense/0008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0008.png
--------------------------------------------------------------------------------
/chapter4/fountain_dense/0009.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0009.png
--------------------------------------------------------------------------------
/chapter4/fountain_dense/0010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter4/fountain_dense/0010.png
--------------------------------------------------------------------------------
/chapter4/scene3D.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """A module that contains an algorithm for 3D scene reconstruction """
5 |
6 | import cv2
7 | import numpy as np
8 | import sys
9 |
10 | from mpl_toolkits.mplot3d import Axes3D
11 | import matplotlib.pyplot as plt
12 | from matplotlib import cm
13 |
14 | class SceneReconstruction3D:
15 | """3D scene reconstruction
16 |
17 | This class implements an algorithm for 3D scene reconstruction using
18 | stereo vision and structure-from-motion techniques.
19 |
20 | A 3D scene is reconstructed from a pair of images that show the same
21 | real-world scene from two different viewpoints. Feature matching is
22 | performed either with rich feature descriptors or based on optic flow.
23 | 3D coordinates are obtained via triangulation.
24 |
25 | Note that a complete structure-from-motion pipeline typically includes
26 | bundle adjustment and geometry fitting, which are out of scope for
27 | this project.
28 | """
29 |
30 | def __init__(self, K, dist):
31 | """Constructor
32 |
33 | This method initializes the scene reconstruction algorithm.
34 |
35 | :param K: 3x3 intrinsic camera matrix
36 | :param dist: vector of distortion coefficients
37 | """
38 | self.K = K
39 | self.K_inv = np.linalg.inv(K) # store inverse for fast access
40 | self.d = dist
41 |
42 | def load_image_pair(
43 | self,
44 | img_path1: str,
45 | img_path2: str,
46 | use_pyr_down: bool = True) -> None:
47 |
48 | self.img1, self.img2 = [
49 | cv2.undistort(
50 | self.load_image(
51 | path, use_pyr_down), self.K, self.d) for path in (
52 | img_path1, img_path2)]
53 |
54 | @staticmethod
55 | def load_image(
56 | img_path: str,
57 | use_pyr_down: bool,
58 | target_width: int = 600) -> np.ndarray:
59 | """Loads pair of images
60 |
61 | This method loads the two images for which the 3D scene should be
62 | reconstructed. The two images should show the same real-world scene
63 | from two different viewpoints.
64 |
65 | :param img_path1: path to first image
66 | :param img_path2: path to second image
67 | :param use_pyr_down: flag whether to downscale the images to
68 | roughly 600px width (True) or not (False)
69 | """
70 |
71 | img = cv2.imread(img_path, cv2.CV_8UC3)
72 |
73 | # make sure image is valid
74 | assert img is not None, f"Image {img_path} could not be loaded."
75 | if len(img.shape) == 2:
76 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
77 |
78 | # scale down image if necessary
79 | while use_pyr_down and img.shape[1] > 2 * target_width:
80 | img = cv2.pyrDown(img)
81 | return img
82 |
83 | def plot_optic_flow(self):
84 | """Plots optic flow field
85 |
86 | This method plots the optic flow between the first and second
87 | image.
88 | """
89 | self._extract_keypoints_flow()
90 |
91 | img = np.copy(self.img1)
92 | for pt1, pt2 in zip(self.match_pts1, self.match_pts2):
93 | cv2.arrowedLine(img, tuple(pt1), tuple(pt2),
94 | color=(255, 0, 0))
95 |
96 | cv2.imshow("imgFlow", img)
97 | cv2.waitKey()
98 |
99 | def draw_epipolar_lines(self, feat_mode: str = "SIFT"):
100 | """Draws epipolar lines
101 |
102 | This method computes and draws the epipolar lines of the two
103 | loaded images.
104 |
105 | :param feat_mode: whether to use rich descriptors for feature
106 | matching ("sift") or optic flow ("flow")
107 | """
108 | self._extract_keypoints(feat_mode)
109 | self._find_fundamental_matrix()
110 | # Find epilines corresponding to points in right image (second image)
111 | # and drawing its lines on left image
112 | pts2re = self.match_pts2.reshape(-1, 1, 2)
113 | lines1 = cv2.computeCorrespondEpilines(pts2re, 2, self.F)
114 | lines1 = lines1.reshape(-1, 3)
115 | img3, img4 = self._draw_epipolar_lines_helper(self.img1, self.img2,
116 | lines1, self.match_pts1,
117 | self.match_pts2)
118 |
119 | # Find epilines corresponding to points in left image (first image) and
120 | # drawing its lines on right image
121 | pts1re = self.match_pts1.reshape(-1, 1, 2)
122 | lines2 = cv2.computeCorrespondEpilines(pts1re, 1, self.F)
123 | lines2 = lines2.reshape(-1, 3)
124 | img1, img2 = self._draw_epipolar_lines_helper(self.img2, self.img1,
125 | lines2, self.match_pts2,
126 | self.match_pts1)
127 |
128 | cv2.imshow("left", img1)
129 | cv2.imshow("right", img3)
130 | cv2.waitKey()
131 |
132 | def plot_rectified_images(self, feat_mode: str = "SIFT"):
133 | """Plots rectified images
134 |
135 | This method computes and plots a rectified version of the two
136 | images side by side.
137 |
138 | :param feat_mode: whether to use rich descriptors for feature
139 | matching ("sift") or optic flow ("flow")
140 | """
141 | self._extract_keypoints(feat_mode)
142 | self._find_fundamental_matrix()
143 | self._find_essential_matrix()
144 | self._find_camera_matrices_rt()
145 |
146 | R = self.Rt2[:, :3]
147 | T = self.Rt2[:, 3]
148 | # perform the rectification
149 | R1, R2, P1, P2, Q, roi1, roi2 = cv2.stereoRectify(self.K, self.d,
150 | self.K, self.d,
151 | self.img1.shape[:2],
152 | R, T, alpha=1.0)
153 | mapx1, mapy1 = cv2.initUndistortRectifyMap(self.K, self.d, R1, self.K,
154 | self.img1.shape[:2],
155 | cv2.CV_32F)
156 | mapx2, mapy2 = cv2.initUndistortRectifyMap(self.K, self.d, R2, self.K,
157 | self.img2.shape[:2],
158 | cv2.CV_32F)
159 | img_rect1 = cv2.remap(self.img1, mapx1, mapy1, cv2.INTER_LINEAR)
160 | img_rect2 = cv2.remap(self.img2, mapx2, mapy2, cv2.INTER_LINEAR)
161 |
162 | # draw the images side by side
163 | total_size = (max(img_rect1.shape[0], img_rect2.shape[0]),
164 | img_rect1.shape[1] + img_rect2.shape[1], 3)
165 | img = np.zeros(total_size, dtype=np.uint8)
166 | img[:img_rect1.shape[0], :img_rect1.shape[1]] = img_rect1
167 | img[:img_rect2.shape[0], img_rect1.shape[1]:] = img_rect2
168 |
169 | # draw horizontal lines every 25 px accross the side by side image
170 | for i in range(20, img.shape[0], 25):
171 | cv2.line(img, (0, i), (img.shape[1], i), (255, 0, 0))
172 |
173 | cv2.imshow('imgRectified', img)
174 | cv2.waitKey()
175 |
176 | def plot_point_cloud(self, feat_mode="sift"):
177 | """Plots 3D point cloud
178 |
179 | This method generates and plots a 3D point cloud of the recovered
180 | 3D scene.
181 |
182 | :param feat_mode: whether to use rich descriptors for feature
183 | matching ("sift") or optic flow ("flow")
184 | """
185 | self._extract_keypoints(feat_mode)
186 | self._find_fundamental_matrix()
187 | self._find_essential_matrix()
188 | self._find_camera_matrices_rt()
189 |
190 | # triangulate points
191 | first_inliers = np.array(self.match_inliers1)[:, :2]
192 | second_inliers = np.array(self.match_inliers2)[:, :2]
193 | pts4D = cv2.triangulatePoints(self.Rt1, self.Rt2, first_inliers.T,
194 | second_inliers.T).T
195 |
196 | # convert from homogeneous coordinates to 3D
197 | pts3D = pts4D[:, :3] / pts4D[:, 3, None]
198 |
199 | # plot with matplotlib
200 | Xs, Zs, Ys = [pts3D[:, i] for i in range(3)]
201 |
202 | fig = plt.figure()
203 | ax = fig.add_subplot(111, projection='3d')
204 | ax.scatter(Xs, Ys, Zs, c=Ys,cmap=cm.hsv, marker='o')
205 | ax.set_xlabel('X')
206 | ax.set_ylabel('Y')
207 | ax.set_zlabel('Z')
208 | plt.title('3D point cloud: Use pan axes button below to inspect')
209 | plt.show()
210 |
211 | def _extract_keypoints(self, feat_mode):
212 | """Extracts keypoints
213 |
214 | This method extracts keypoints for feature matching based on
215 | a specified mode:
216 | - "sift": use rich sift descriptor
217 | - "flow": use optic flow
218 |
219 | :param feat_mode: keypoint extraction mode ("sift" or "flow")
220 | """
221 | # extract features
222 | if feat_mode.lower() == "sift":
223 | # feature matching via sift and BFMatcher
224 | self._extract_keypoints_sift()
225 | elif feat_mode.lower() == "flow":
226 | # feature matching via optic flow
227 | self._extract_keypoints_flow()
228 | else:
229 | sys.exit(f"Unknown feat_mode {feat_mode}. Use 'sift' or 'FLOW'")
230 |
231 | def _extract_keypoints_sift(self):
232 | """Extracts keypoints via sift descriptors"""
233 | # extract keypoints and descriptors from both images
234 | # detector = cv2.xfeatures2d.SIFT_create(contrastThreshold=0.11, edgeThreshold=10)
235 | detector = cv2.xfeatures2d.SIFT_create()
236 | first_key_points, first_desc = detector.detectAndCompute(self.img1,
237 | None)
238 | second_key_points, second_desc = detector.detectAndCompute(self.img2,
239 | None)
240 | # match descriptors
241 | matcher = cv2.BFMatcher(cv2.NORM_L1, True)
242 | matches = matcher.match(first_desc, second_desc)
243 |
244 | # generate lists of point correspondences
245 | self.match_pts1 = np.array(
246 | [first_key_points[match.queryIdx].pt for match in matches])
247 | self.match_pts2 = np.array(
248 | [second_key_points[match.trainIdx].pt for match in matches])
249 |
250 | def _extract_keypoints_flow(self):
251 | """Extracts keypoints via optic flow"""
252 | # find FAST features
253 | fast = cv2.FastFeatureDetector_create()
254 | first_key_points = fast.detect(self.img1)
255 |
256 | first_key_list = [i.pt for i in first_key_points]
257 | first_key_arr = np.array(first_key_list).astype(np.float32)
258 |
259 | second_key_arr, status, err = cv2.calcOpticalFlowPyrLK(
260 | self.img1, self.img2, first_key_arr, None)
261 |
262 | # filter out the points with high error
263 | # keep only entries with status=1 and small error
264 | condition = (status == 1) * (err < 5.)
265 | concat = np.concatenate((condition, condition), axis=1)
266 | first_match_points = first_key_arr[concat].reshape(-1, 2)
267 | second_match_points = second_key_arr[concat].reshape(-1, 2)
268 |
269 | self.match_pts1 = first_match_points
270 | self.match_pts2 = second_match_points
271 |
272 | def _find_fundamental_matrix(self):
273 | """Estimates fundamental matrix """
274 | self.F, self.Fmask = cv2.findFundamentalMat(self.match_pts1,
275 | self.match_pts2,
276 | cv2.FM_RANSAC, 0.1, 0.99)
277 |
278 | def _find_essential_matrix(self):
279 | """Estimates essential matrix based on fundamental matrix """
280 | self.E = self.K.T.dot(self.F).dot(self.K)
281 |
282 | def _find_camera_matrices_rt(self):
283 | """Finds the [R|t] camera matrix"""
284 | # decompose essential matrix into R, t (See Hartley and Zisserman 9.13)
285 | U, S, Vt = np.linalg.svd(self.E)
286 | W = np.array([0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0,
287 | 1.0]).reshape(3, 3)
288 |
289 | # iterate over all point correspondences used in the estimation of the
290 | # fundamental matrix
291 | first_inliers = []
292 | second_inliers = []
293 | for pt1, pt2, mask in zip(
294 | self.match_pts1, self.match_pts2, self.Fmask):
295 | if mask:
296 | # normalize and homogenize the image coordinates
297 | first_inliers.append(self.K_inv.dot([pt1[0], pt1[1], 1.0]))
298 | second_inliers.append(self.K_inv.dot([pt2[0], pt2[1], 1.0]))
299 |
300 | # Determine the correct choice of second camera matrix
301 | # only in one of the four configurations will all the points be in
302 | # front of both cameras
303 |
304 | R = T = None
305 | R = U.dot(W.T).dot(Vt)
306 | T = U[:, 2]
307 | for r in (U.dot(W).dot(Vt), U.dot(W.T).dot(Vt)):
308 | for t in (U[:, 2], -U[:, 2]):
309 | if self._in_front_of_both_cameras(
310 | first_inliers, second_inliers, r, t):
311 | R, T = r, t
312 |
313 | assert R is not None, "Camera matricies were never found"
314 |
315 | self.match_inliers1 = first_inliers
316 | self.match_inliers2 = second_inliers
317 | self.Rt1 = np.hstack((np.eye(3), np.zeros((3, 1))))
318 | self.Rt2 = np.hstack((R, T.reshape(3, 1)))
319 |
320 | def _draw_epipolar_lines_helper(self, img1, img2, lines, pts1, pts2):
321 | """Helper method to draw epipolar lines and features """
322 | if img1.shape[2] == 1:
323 | img1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR)
324 | if img2.shape[2] == 1:
325 | img2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR)
326 |
327 | c = img1.shape[1]
328 | for r, pt1, pt2 in zip(lines, pts1, pts2):
329 | color = tuple(np.random.randint(0, 255, 3).tolist())
330 | x0, y0 = map(int, [0, -r[2] / r[1]])
331 | x1, y1 = map(int, [c, -(r[2] + r[0] * c) / r[1]])
332 | cv2.line(img1, (x0, y0), (x1, y1), color, 1)
333 | cv2.circle(img1, tuple(pt1), 5, color, -1)
334 | cv2.circle(img2, tuple(pt2), 5, color, -1)
335 | return img1, img2
336 |
337 | def _in_front_of_both_cameras(self, first_points, second_points, rot,
338 | trans):
339 | """Determines whether point correspondences are in front of both
340 | images"""
341 | print("start")
342 | rot_inv = rot
343 | for first, second in zip(first_points, second_points):
344 | first_z = np.dot(rot[0, :] - second[0] * rot[2, :],
345 | trans) / np.dot(rot[0, :] - second[0] * rot[2, :],
346 | second)
347 | first_3d_point = np.array([first[0] * first_z,
348 | second[0] * first_z, first_z])
349 | second_3d_point = np.dot(rot.T, first_3d_point) - np.dot(rot.T,
350 | trans)
351 |
352 | print(first_3d_point,second_3d_point)
353 | if first_3d_point[2] < 0 or second_3d_point[2] < 0:
354 | return False
355 |
356 | return True
357 |
358 | def _linear_ls_triangulation(self, u1, P1, u2, P2):
359 | """Triangulation via Linear-LS method"""
360 | # build A matrix for homogeneous equation system Ax=0
361 | # assume X = (x,y,z,1) for Linear-LS method
362 | # which turns it into AX=B system, where A is 4x3, X is 3x1 & B is 4x1
363 | A = np.array([u1[0] * P1[2, 0] - P1[0, 0], u1[0] * P1[2, 1] - P1[0, 1],
364 | u1[0] * P1[2, 2] - P1[0, 2], u1[1] * P1[2, 0] - P1[1, 0],
365 | u1[1] * P1[2, 1] - P1[1, 1], u1[1] * P1[2, 2] - P1[1, 2],
366 | u2[0] * P2[2, 0] - P2[0, 0], u2[0] * P2[2, 1] - P2[0, 1],
367 | u2[0] * P2[2, 2] - P2[0, 2], u2[1] * P2[2, 0] - P2[1, 0],
368 | u2[1] * P2[2, 1] - P2[1, 1],
369 | u2[1] * P2[2, 2] - P2[1, 2]]).reshape(4, 3)
370 |
371 | B = np.array([-(u1[0] * P1[2, 3] - P1[0, 3]),
372 | -(u1[1] * P1[2, 3] - P1[1, 3]),
373 | -(u2[0] * P2[2, 3] - P2[0, 3]),
374 | -(u2[1] * P2[2, 3] - P2[1, 3])]).reshape(4, 1)
375 |
376 | ret, X = cv2.solve(A, B, flags=cv2.DECOMP_SVD)
377 | return X.reshape(1, 3)
378 |
--------------------------------------------------------------------------------
/chapter4/wx_gui.py:
--------------------------------------------------------------------------------
1 | ../wx_gui.py
--------------------------------------------------------------------------------
/chapter5/common.py:
--------------------------------------------------------------------------------
1 | import rawpy
2 | import cv2
3 | import numpy as np
4 |
5 |
6 | def load_image(path, bps=16):
7 | if path.suffix == '.CR2':
8 | with rawpy.imread(str(path)) as raw:
9 | data = raw.postprocess(no_auto_bright=True,
10 | gamma=(1, 1),
11 | output_bps=bps)
12 | return data
13 | else:
14 | return cv2.imread(str(path))
15 |
16 |
17 | def load_14bit_gray(path):
18 | img = load_image(path, bps=16)
19 | return (cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) / 4).astype(np.uint16)
20 |
--------------------------------------------------------------------------------
/chapter5/gamma_correct.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from pathlib import Path
3 | import itertools
4 | import numpy as np
5 | import matplotlib.pyplot as plt
6 | from common import load_image, load_14bit_gray
7 | import functools
8 |
9 |
10 | @functools.lru_cache(maxsize=None)
11 | def gamma_transform(x, gamma, bps=14):
12 | return np.clip(pow(x / 2**bps, gamma) * 255.0, 0, 255)
13 |
14 |
15 | def apply_gamma(img, gamma, bps=14):
16 | corrected = img.copy()
17 | for i, j in itertools.product(range(corrected.shape[0]),
18 | range(corrected.shape[1])):
19 | corrected[i, j] = gamma_transform(corrected[i, j], gamma, bps=bps)
20 | return corrected
21 |
22 |
23 | if __name__ == '__main__':
24 | parser = argparse.ArgumentParser()
25 | parser.add_argument('raw_image', type=Path,
26 | help='Location of a .CR2 file.')
27 | parser.add_argument('--gamma', type=float, default=0.3)
28 | args = parser.parse_args()
29 |
30 | gray = load_14bit_gray(args.raw_image)
31 |
32 | normal = np.clip(gray / 64, 0, 255).astype(np.uint8)
33 |
34 | corrected = apply_gamma(gray, args.gamma)
35 |
36 | fig, axes = plt.subplots(2, 2, sharey=False)
37 |
38 | for i, img in enumerate([normal, corrected]):
39 | axes[1, i].hist(img.flatten(), bins=256)
40 | axes[1, i].set_ylim(top=1.5e-2 * len(img.flatten()))
41 | axes[1, i].set_xlabel('Brightness (8 bits)')
42 | axes[1, i].set_ylabel('Number of pixels')
43 | axes[0, i].imshow(img, cmap='gray', vmax=255)
44 | plt.title('Histogram of pixel values')
45 | plt.savefig('histogram.png')
46 | plt.show()
47 |
--------------------------------------------------------------------------------
/chapter5/hdr.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from matplotlib import cm
3 | import itertools
4 | import numpy as np
5 | from pathlib import Path
6 | from matplotlib import pyplot as plt
7 | import cv2
8 | from common import load_image
9 |
10 | import exifread
11 |
12 |
13 | MARKERS = ['o', '+', 'x', '*', '.', 'X', '1', 'v', 'D']
14 |
15 |
16 | def thumbnail(img_rgb, long_edge=400):
17 | original_long_edge = max(img_rgb.shape[:2])
18 | dimensions = tuple([int(x / original_long_edge * long_edge) for x in img_rgb.shape[:2]][::-1])
19 | print('dimensions', dimensions)
20 | return cv2.resize(img_rgb, dimensions, interpolation=cv2.INTER_AREA)
21 |
22 |
23 | def exposure_strength(path, iso_ref=100, f_stop_ref=6.375):
24 | with open(path, 'rb') as infile:
25 | tags = exifread.process_file(infile)
26 | [f_stop] = tags['EXIF ApertureValue'].values
27 | [iso_speed] = tags['EXIF ISOSpeedRatings'].values
28 | [exposure_time] = tags['EXIF ExposureTime'].values
29 |
30 | rel_aperture_area = 1 / (f_stop.num / f_stop.den / f_stop_ref) ** 2
31 | exposure_time_float = exposure_time.num / exposure_time.den
32 |
33 | score = rel_aperture_area * exposure_time_float * iso_speed / iso_ref
34 | return score, np.log2(score)
35 |
36 |
37 | def lowe_match(descriptors1, descriptors2):
38 | FLANN_INDEX_KDTREE = 0
39 | index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
40 | search_params = dict(checks=50)
41 | flann = cv2.FlannBasedMatcher(index_params, search_params)
42 |
43 | matches = flann.knnMatch(descriptors1, descriptors2, k=2)
44 | # discard bad matches, ratio test as per Lowe's paper
45 | good_matches = [m for m, n in matches
46 | if m.distance < 0.7 * n.distance]
47 | return good_matches
48 |
49 |
50 | def save_8bit(img, name):
51 | img_8bit = np.clip(img * 255, 0, 255).astype('uint8')
52 | cv2.imwrite(name, img_8bit)
53 | return img_8bit
54 |
55 |
56 | OPEN_CV_COLORS = 'bgr'
57 |
58 |
59 | def plot_crf(crf, colors=OPEN_CV_COLORS):
60 | for i, c in enumerate(colors):
61 | plt.plot(crf_debevec[:, 0, i], color=c)
62 |
63 |
64 | if __name__ == '__main__':
65 | parser = argparse.ArgumentParser()
66 | img_group = parser.add_mutually_exclusive_group(required=True)
67 | img_group.add_argument('--image-dir', type=Path)
68 | img_group.add_argument('--images', type=Path, nargs='+')
69 | parser.add_argument('--show-steps', action='store_true')
70 | parser.add_argument('--random-seed', type=int, default=43)
71 | parser.add_argument('--num-pixels', type=int, default=100)
72 | parser.add_argument('--align-images', action='store_true')
73 | parser.add_argument('--debug-color', choices=OPEN_CV_COLORS, default='g')
74 | args = parser.parse_args()
75 |
76 | if args.image_dir:
77 | args.images = sorted(args.image_dir.iterdir())
78 |
79 | args.color_i = OPEN_CV_COLORS.find(args.debug_color)
80 |
81 | images = [load_image(p, bps=8) for p in args.images]
82 | times = [exposure_strength(p)[0] for p in args.images]
83 | times_array = np.array(times, dtype=np.float32)
84 | print('times', times_array)
85 |
86 | if args.show_steps:
87 | np.random.seed(args.random_seed)
88 | pixel_values = {}
89 | while len(pixel_values) < args.num_pixels:
90 | i = np.random.randint(0, high=images[0].shape[0] - 1)
91 | j = np.random.randint(0, high=images[0].shape[1] - 1)
92 |
93 | new_val = images[0][i, j, args.color_i]
94 | good_pixel = True
95 | for vv in pixel_values.values():
96 | if np.abs(vv[0].astype(int) - new_val.astype(int)) < 100 // args.num_pixels:
97 | good_pixel = False
98 | break
99 |
100 | if good_pixel:
101 | pixel_values[(i, j)] = [img[i, j, args.color_i] for img in images]
102 |
103 | log_ts = [np.log2(t) for t in times]
104 |
105 | for [(i, j), vv], marker in zip(pixel_values.items(), MARKERS):
106 | plt.scatter(vv, log_ts, marker=marker, label=f'Pixel [{i}, {j}]')
107 | plt.xlabel('Output Pixel value (8-bit)')
108 | plt.ylabel('log exposure')
109 | plt.legend()
110 | plt.show()
111 |
112 | cal_debevec = cv2.createCalibrateDebevec(samples=200)
113 | print('Calibrated Debevec')
114 | crf_debevec = cal_debevec.process(images, times=times_array)
115 |
116 | merge_debevec = cv2.createMergeDebevec()
117 | hdr_debevec = merge_debevec.process(images, times=times_array.copy(), response=crf_debevec)
118 |
119 | print("merged")
120 |
121 | if args.show_steps:
122 | for [(i, j), vv], marker in zip(pixel_values.items(), MARKERS):
123 | e = hdr_debevec[i, j, args.color_i]
124 | plt.scatter(vv, np.array(log_ts) + np.log(e) + 1.6,
125 | marker=marker,
126 | label=f'Pixel [{i}, {j}]')
127 | plt.plot(np.log(crf_debevec[:, 0, args.color_i]),
128 | color=OPEN_CV_COLORS[args.color_i])
129 | plt.tight_layout()
130 | plt.legend()
131 | plt.show()
132 | # Tonemap HDR image
133 | tonemap1 = cv2.createTonemap(gamma=2.2)
134 | res_debevec = tonemap1.process(hdr_debevec.copy())
135 | x = save_8bit(res_debevec, 'res_debevec.jpg')
136 | plt.imshow(x)
137 | plt.show()
138 |
139 | if args.show_steps:
140 | merge_robertson = cv2.createMergeRobertson()
141 | hdr_robertson = merge_robertson.process(images, times=times_array.copy())
142 | # Tonemap HDR image
143 | tonemap1 = cv2.createTonemap(gamma=2.2)
144 | res_robertson = tonemap1.process(hdr_robertson)
145 | save_8bit(res_robertson, 'res_robertson.jpg')
146 |
147 | # Exposure fusion using Mertens
148 | merge_mertens = cv2.createMergeMertens()
149 | res_mertens = merge_mertens.process(images)
150 | save_8bit(res_mertens, 'res_mertens.jpg')
151 |
--------------------------------------------------------------------------------
/chapter5/panorama.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from pathlib import Path
3 | import numpy as np
4 | from hdr import load_image
5 | import cv2
6 |
7 |
8 | def parse_args():
9 | parser = argparse.ArgumentParser()
10 | img_group = parser.add_mutually_exclusive_group(required=True)
11 | img_group.add_argument('--image-dir', type=Path)
12 | img_group.add_argument('--images', type=Path, nargs='+')
13 | parser.add_argument('--show-steps', action='store_true')
14 | args = parser.parse_args()
15 |
16 | if args.image_dir:
17 | args.images = sorted(args.image_dir.iterdir())
18 | return args
19 |
20 |
21 | def largest_connected_subset(images):
22 | finder = cv2.xfeatures2d_SURF.create()
23 | all_img_features = [cv2.detail.computeImageFeatures2(finder, img)
24 | for img in images]
25 |
26 | matcher = cv2.detail.BestOf2NearestMatcher_create(False, 0.6)
27 | pair_matches = matcher.apply2(all_img_features)
28 | matcher.collectGarbage()
29 |
30 | _conn_indices = cv2.detail.leaveBiggestComponent(all_img_features, pair_matches, 0.4)
31 | conn_indices = [i for [i] in _conn_indices]
32 | if len(conn_indices) < 2:
33 | raise RuntimeError("Need 2 or more connected images.")
34 |
35 | conn_features = np.array([all_img_features[i] for i in conn_indices])
36 | conn_images = [images[i] for i in conn_indices]
37 |
38 | if len(conn_images) < len(images):
39 | pair_matches = matcher.apply2(conn_features)
40 | matcher.collectGarbage()
41 |
42 | return conn_images, conn_features, pair_matches
43 |
44 |
45 | def find_camera_parameters(features, pair_matches):
46 | estimator = cv2.detail_HomographyBasedEstimator()
47 | success, cameras = estimator.apply(features, pair_matches, None)
48 | if not success:
49 | raise RuntimeError("Homography estimation failed.")
50 |
51 | for cam in cameras:
52 | cam.R = cam.R.astype(np.float32)
53 |
54 | adjuster = cv2.detail_BundleAdjusterRay()
55 | adjuster.setConfThresh(0.8)
56 |
57 | refine_mask = np.array([[1, 1, 1],
58 | [0, 1, 1],
59 | [0, 0, 0]], dtype=np.uint8)
60 | adjuster.setRefinementMask(refine_mask)
61 |
62 | success, cameras = adjuster.apply(features, p, cameras)
63 |
64 | if not success:
65 | raise RuntimeError("Camera parameters adjusting failed.")
66 |
67 | print(cameras)
68 | return cameras
69 |
70 |
71 | if __name__ == '__main__':
72 | args = parse_args()
73 | all_images = [load_image(p, bps=8) for p in args.images]
74 |
75 |
76 | conn_images, features, p = largest_connected_subset(all_images)
77 |
78 | cameras = find_camera_parameters(features, p)
79 |
80 | focals = [cam.focal for cam in cameras]
81 | warped_image_scale = np.mean(focals)
82 |
83 | # corners, sizes, images_warped, masks_warped = [], [], [], []
84 |
85 | # warper = cv2.PyRotationWarper('plane', warped_image_scale)
86 | # for i, img in enumerate(conn_images):
87 | # K = cameras[i].K().astype(np.float32)
88 | # corner, image_wp = warper.warp(img, K, cameras[i].R,
89 | # cv2.INTER_LINEAR, cv2.BORDER_REFLECT)
90 |
91 | # corners.append(corner)
92 | # sizes.append((image_wp.shape[1], image_wp.shape[0]))
93 | # images_warped.append(image_wp)
94 | # mask = cv2.UMat(255 * np.ones((img.shape[0], img.shape[1]), np.uint8))
95 | # p, mask_wp = warper.warp(mask, K, cameras[i].R,
96 | # cv2.INTER_NEAREST, cv2.BORDER_CONSTANT)
97 |
98 | # # masks_warped.append(mask_wp.get())
99 |
100 | # images_warped_f = [img.astype(np.float32) for im in images_warped]
101 |
102 | # compensator = cv2.detail.ExposureCompensator_createDefault(
103 | # cv2.detail.ExposureCompensator_NO)
104 | # compensator.feed(corners=corners, images=images_warped, masks=masks_warped)
105 |
106 | # seam_finder = cv2.detail.SeamFinder_createDefault(cv2.detail.SeamFinder_NO)
107 | # seam_finder.find(images_warped_f, corners, masks_warped)
108 |
109 | stitch_sizes, stitch_corners = [], []
110 |
111 | warper = cv2.PyRotationWarper('plane', warped_image_scale)
112 | for i, img in enumerate(conn_images):
113 | sz = img.shape[1], img.shape[0]
114 | K = cameras[i].K().astype(np.float32)
115 | roi = warper.warpRoi(sz, K, cameras[i].R)
116 | stitch_corners.append(roi[0:2])
117 | stitch_sizes.append(roi[2:4])
118 |
119 | canvas_size = cv2.detail.resultRoi(corners=stitch_corners,
120 | sizes=stitch_sizes)
121 |
122 | blend_width = np.sqrt(canvas_size[2] * canvas_size[3]) * 5 / 100
123 | if blend_width < 1:
124 | blender = cv2.detail.Blender_createDefault(cv2.detail.Blender_NO)
125 | else:
126 | blender = cv2.detail_MultiBandBlender()
127 | blender.setNumBands((np.log(blend_width) / np.log(2.) - 1.).astype(np.int))
128 | blender.prepare(canvas_size)
129 |
130 | for i, img in enumerate(conn_images):
131 |
132 | K = cameras[i].K().astype(np.float32)
133 |
134 | corner, image_wp = warper.warp(img, K, cameras[i].R,
135 | cv2.INTER_LINEAR, cv2.BORDER_REFLECT)
136 |
137 | mask = 255 * np.ones((img.shape[0], img.shape[1]), np.uint8)
138 | _, mask_wp = warper.warp(mask, K, cameras[i].R,
139 | cv2.INTER_NEAREST, cv2.BORDER_CONSTANT)
140 |
141 | # compensator.apply(i, stitch_corners[i], image_wp, mask_wp)
142 | image_warped_s = image_wp.astype(np.int16)
143 | # image_wp = []
144 |
145 | # dilated_mask = cv2.dilate(masks_warped[i], None)
146 | # seam_mask = cv2.resize(dilated_mask,
147 | # (mask_wp.shape[1], mask_wp.shape[0]),
148 | # 0,
149 | # 0,
150 | # cv2.INTER_LINEAR_EXACT)
151 | # mask_warped = cv2.bitwise_and(seam_mask, mask_wp)
152 | # mask_warped = mask_wp
153 |
154 | blender.feed(cv2.UMat(image_warped_s), mask_wp, stitch_corners[i])
155 |
156 | result, result_mask = blender.blend(None, None)
157 | cv2.imwrite('result.jpg', result)
158 |
159 | zoomx = 600.0 / result.shape[1]
160 | dst = cv2.normalize(src=result, dst=None, alpha=255.,
161 | norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
162 | dst = cv2.resize(dst, dsize=None, fx=zoomx, fy=zoomx)
163 | cv2.imwrite('dst.png', dst)
164 | cv2.imwrite('dst.jpeg', dst)
165 | cv2.imshow('panorama', dst)
166 | cv2.waitKey()
167 |
--------------------------------------------------------------------------------
/chapter6/chapter6.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | OpenCV with Python Blueprints
6 | Chapter 5: Tracking Visually Salient Objects
7 |
8 | An app to track multiple visually salient objects in a video sequence.
9 | """
10 |
11 | import cv2
12 | from os import path
13 |
14 | from saliency import get_saliency_map, get_proto_objects_map
15 | from tracking import MultipleObjectsTracker
16 |
17 | import time
18 |
19 | def main(video_file='soccer.avi', roi=((140, 100), (500, 600))):
20 | if not path.isfile(video_file):
21 | print(f'File "{video_file}" does not exist.')
22 | raise SystemExit
23 |
24 | # open video file
25 | video = cv2.VideoCapture(video_file)
26 |
27 | # initialize tracker
28 | mot = MultipleObjectsTracker()
29 |
30 | for _, img in iter(video.read, (False, None)):
31 | if roi:
32 | # original video is too big: grab some meaningful ROI
33 | img = img[roi[0][0]:roi[1][0], roi[0][1]:roi[1][1]]
34 |
35 | # generate saliency map
36 | saliency = get_saliency_map(img, use_numpy_fft=False,
37 | gauss_kernel=(3, 3))
38 | objects = get_proto_objects_map(saliency, use_otsu=False)
39 | cv2.imshow('original', img)
40 | cv2.imshow('saliency', saliency)
41 | cv2.imshow('objects', objects)
42 | cv2.imshow('tracker', mot.advance_frame(img, objects,saliency))
43 | # time.sleep(1)
44 | if cv2.waitKey(100) & 0xFF == ord('q'):
45 | break
46 |
47 |
48 | if __name__ == '__main__':
49 | main()
50 |
--------------------------------------------------------------------------------
/chapter6/saliency.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | A module to generate a saliency map from an RGB image
6 |
7 | This code is based on the approach described in:
8 | [1] X. Hou and L. Zhang (2007). Saliency Detection: A Spectral Residual
9 | Approach. IEEE Transactions on Computer Vision and Pattern Recognition
10 | (CVPR), p.1-8. doi: 10.1109/CVPR.2007.383267
11 | """
12 |
13 | import cv2
14 | import numpy as np
15 | from matplotlib import pyplot as plt
16 | from typing import Tuple
17 |
18 |
19 | def _calc_channel_sal_magn(channel: np.ndarray,
20 | use_numpy_fft: bool = True) -> np.ndarray:
21 | """
22 | Calculate the log-magnitude of the Fourier spectrum
23 | of a single-channel image. This image could be a regular grayscale
24 | image, or a single color channel of an RGB image.
25 |
26 | :param channel: single-channel input image
27 | :returns: log-magnitude of Fourier spectrum
28 | """
29 | # do FFT and get log-spectrum
30 | if use_numpy_fft:
31 | img_dft = np.fft.fft2(channel)
32 | magnitude, angle = cv2.cartToPolar(np.real(img_dft),
33 | np.imag(img_dft))
34 | else:
35 | img_dft = cv2.dft(np.float32(channel),
36 | flags=cv2.DFT_COMPLEX_OUTPUT)
37 | magnitude, angle = cv2.cartToPolar(img_dft[:, :, 0],
38 | img_dft[:, :, 1])
39 |
40 | # get log amplitude
41 | log_ampl = np.log10(magnitude.clip(min=1e-9))
42 |
43 | # blur log amplitude with avg filter
44 | log_ampl_blur = cv2.blur(log_ampl, (3, 3))
45 |
46 | # residual
47 | residual = np.exp(log_ampl - log_ampl_blur)
48 |
49 | # back to cartesian frequency domain
50 | if use_numpy_fft:
51 | real_part, imag_part = cv2.polarToCart(residual, angle)
52 | img_combined = np.fft.ifft2(real_part + 1j * imag_part)
53 | magnitude, _ = cv2.cartToPolar(np.real(img_combined),
54 | np.imag(img_combined))
55 | else:
56 | img_dft[:, :, 0], img_dft[:, :, 1] = cv2.polarToCart(residual,
57 | angle)
58 | img_combined = cv2.idft(img_dft)
59 | magnitude, _ = cv2.cartToPolar(img_combined[:, :, 0],
60 | img_combined[:, :, 1])
61 |
62 | return magnitude
63 |
64 |
65 | def get_saliency_map(frame: np.ndarray,
66 | small_shape: Tuple[int] = (64, 64),
67 | gauss_kernel: Tuple[int] = (5, 5),
68 | use_numpy_fft: bool = True) -> np.ndarray:
69 | """
70 | Returns a saliency map
71 |
72 | This function generates a saliency map for the image that was
73 | passed to the class constructor.
74 |
75 | :returns: grayscale saliency map
76 | """
77 | frame_small = cv2.resize(frame, small_shape)
78 | if len(frame.shape) == 2:
79 | # single channelsmall_shape[1::-1]
80 | sal = _calc_channel_sal_magn(frame, use_numpy_fft)
81 | else:
82 | # multiple channels: consider each channel independently
83 | sal = np.zeros_like(frame_small).astype(np.float32)
84 | for c in range(frame_small.shape[2]):
85 | small = frame_small[:, :, c]
86 | sal[:, :, c] = _calc_channel_sal_magn(small, use_numpy_fft)
87 |
88 | # overall saliency: channel mean
89 | sal = np.mean(sal, axis=2)
90 |
91 | # postprocess: blur, normalize, and square
92 | if gauss_kernel is not None:
93 | sal = cv2.GaussianBlur(sal, gauss_kernel, sigmaX=8, sigmaY=0)
94 |
95 | sal /= np.max(sal)
96 | return cv2.resize(sal ** 2, frame.shape[1::-1])
97 |
98 |
99 | def get_proto_objects_map(saliency: np.ndarray, use_otsu=True) -> np.ndarray:
100 | """
101 | Generate the proto-objects map of an RGB image
102 |
103 | Proto-objects are saliency hot spots, generated by thresholding
104 | the saliency map.
105 |
106 | :param use_otsu: flag whether to use Otsu thresholding (True) or
107 | a hardcoded threshold value (False)
108 | :saliency grayscale saliency map
109 | :returns: proto-objects map
110 | """
111 | saliency = np.uint8(saliency * 255)
112 | if use_otsu:
113 | thresh_type = cv2.THRESH_OTSU
114 | # For threshold value, simply pass zero.
115 | thresh_value = 0
116 | else:
117 | thresh_type = cv2.THRESH_BINARY
118 | thresh_value = np.mean(saliency) * 3
119 |
120 | _, img_objects = cv2.threshold(saliency,
121 | thresh_value, 255, thresh_type)
122 | return img_objects
123 |
124 |
125 | def plot_power_spectrum(frame: np.ndarray, use_numpy_fft=True) -> None:
126 | """Plot the power spectrum of image"""
127 |
128 | # convert the frame to grayscale if necessary
129 | if len(frame.shape) > 2:
130 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
131 |
132 | # expand the image to an optimal size for FFT
133 | rows, cols = frame.shape
134 | nrows = cv2.getOptimalDFTSize(rows)
135 | ncols = cv2.getOptimalDFTSize(cols)
136 | frame = cv2.copyMakeBorder(frame, 0, ncols - cols, 0, nrows - rows,
137 | cv2.BORDER_CONSTANT, value=0)
138 |
139 | # do FFT and get log-spectrum
140 | if use_numpy_fft:
141 | img_dft = np.fft.fft2(frame)
142 | spectrum = np.log10(np.real(np.abs(img_dft))**2)
143 | else:
144 | img_dft = cv2.dft(np.float32(frame), flags=cv2.DFT_COMPLEX_OUTPUT)
145 | spectrum = np.log10(img_dft[:, :, 0]**2 + img_dft[:, :, 1]**2)
146 |
147 | # radial average
148 | L = max(frame.shape)
149 | freqs = np.fft.fftfreq(L)[:L // 2]
150 | dists = np.sqrt(np.fft.fftfreq(frame.shape[0])[:, np.newaxis]**2 +
151 | np.fft.fftfreq(frame.shape[1])**2)
152 | dcount = np.histogram(dists.ravel(), bins=freqs)[0]
153 | histo, bins = np.histogram(dists.ravel(), bins=freqs,
154 | weights=spectrum.ravel())
155 |
156 | centers = (bins[:-1] + bins[1:]) / 2
157 | plt.plot(centers, histo / dcount)
158 | plt.xlabel('frequency')
159 | plt.ylabel('log-spectrum')
160 | plt.show()
161 |
162 |
163 | def calc_magnitude_spectrum(img: np.ndarray):
164 | """Plot the magnitude spectrum
165 | This method calculates the magnitude spectrum of the image passed
166 | to the class constructor.
167 | :returns: magnitude spectrum
168 | """
169 | # convert the frame to grayscale if necessary
170 | if len(img.shape) > 2:
171 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
172 |
173 | # expand the image to an optimal size for FFT
174 | rows, cols = img.shape
175 | nrows = cv2.getOptimalDFTSize(rows)
176 | ncols = cv2.getOptimalDFTSize(cols)
177 | frame = cv2.copyMakeBorder(img, 0, ncols - cols, 0, nrows - rows,
178 | cv2.BORDER_CONSTANT, value=0)
179 |
180 | # do FFT and get log-spectrum
181 | img_dft = np.fft.fft2(img)
182 | spectrum = np.log10(np.abs(np.fft.fftshift(img_dft)))
183 |
184 | # return normalized
185 | return spectrum / np.max(spectrum)
186 |
187 |
188 | if __name__ == '__main__':
189 | video = cv2.VideoCapture('soccer.avi')
190 | _, im = video.read()
191 | plt.imshow(im)
192 | plot_power_spectrum(im)
193 | plt.imshow(calc_magnitude_spectrum(cv2.imread("test.jpeg")))
194 |
--------------------------------------------------------------------------------
/chapter6/soccer.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter6/soccer.avi
--------------------------------------------------------------------------------
/chapter6/tracking.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """A module that contains an algorithm for multiple-objects tracking"""
5 |
6 | import cv2
7 | import numpy as np
8 | import copy
9 | import itertools
10 |
11 |
12 | class MultipleObjectsTracker:
13 | """
14 | Multiple-objects tracker
15 |
16 | This class implements an algorithm for tracking multiple objects in
17 | a video sequence.
18 | The algorithm combines a saliency map for object detection and
19 | mean-shift tracking for object tracking.
20 | """
21 |
22 | def __init__(self, min_object_area: int = 400,
23 | min_speed_per_pix: float = 0.02):
24 | """
25 | Constructor
26 |
27 | This method initializes the multiple-objects tracking algorithm.
28 |
29 | :param min_area: Minimum area for a proto-object contour to be
30 | considered a real object
31 | """
32 | self.object_boxes = []
33 | self.min_object_area = min_object_area
34 | self.min_speed_per_pix = min_speed_per_pix
35 | self.num_frame_tracked = 0
36 | # Setup the termination criteria, either 100 iteration or move by at
37 | # least 1 pt
38 | self.term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
39 | 5, 1)
40 |
41 | def advance_frame(self,
42 | frame: np.ndarray,
43 | proto_objects_map: np.ndarray,
44 | saliency: np.ndarray) -> np.ndarray:
45 | """
46 | Advance the algorithm by a single frame
47 |
48 | certain targets are discarded:
49 | - targets that are too small
50 | - targets that don't move
51 |
52 | :param frame: New input RGB frame
53 | :param proto_objects_map: corresponding proto-objects map of the
54 | frame
55 | :param saliency: TODO: EXPLAIN
56 | :returns: frame annotated with bounding boxes around all objects
57 | that are being tracked
58 | """
59 | print(f"Objects are tracked for {self.num_frame_tracked} frame")
60 |
61 | # Build a list all bounding boxes found from the
62 | # current proto-objects map
63 | object_contours, _ = cv2.findContours(proto_objects_map, 1, 2)
64 | object_boxes = [cv2.boundingRect(contour)
65 | for contour in object_contours
66 | if cv2.contourArea(contour) > self.min_object_area]
67 |
68 | if len(self.object_boxes) >= len(object_boxes):
69 | # Continue tracking with meanshift if number of salient objects
70 | # didn't increase
71 | object_boxes = [cv2.meanShift(saliency, box, self.term_crit)[1]
72 | for box in self.object_boxes]
73 | self.num_frame_tracked += 1
74 | else:
75 | # Otherwise restart tracking
76 | self.num_frame_tracked = 0
77 | self.object_initial_centers = [
78 | (x + w / 2, y + h / 2) for (x, y, w, h) in object_boxes]
79 |
80 | # Remember current objects
81 | self.object_boxes = object_boxes
82 |
83 | return self.draw_good_boxes(copy.deepcopy(frame))
84 |
85 | def draw_good_boxes(self, frame: np.ndarray) -> np.ndarray:
86 | # Find total displacement length for each object
87 | # and normalize by object size
88 | displacements = [((x + w / 2 - cx)**2 + (y + w / 2 - cy)**2)**0.5 / w
89 | for (x, y, w, h), (cx, cy)
90 | in zip(self.object_boxes, self.object_initial_centers)]
91 | # Draw objects that move and their numbers
92 | for (x, y, w, h), displacement, i in zip(
93 | self.object_boxes, displacements, itertools.count()):
94 | # Draw only those which have some avarage speed
95 | if displacement / (self.num_frame_tracked + 0.01) > self.min_speed_per_pix:
96 | cv2.rectangle(frame, (x, y), (x + w, y + h),
97 | (0, 255, 0), 2)
98 | cv2.putText(frame, str(i), (x, y),
99 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
100 | return frame
101 |
--------------------------------------------------------------------------------
/chapter6/tracking_api.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import time
3 |
4 | import cv2
5 | import numpy as np
6 |
7 | # Define Constants
8 | FONT = cv2.FONT_HERSHEY_SIMPLEX
9 | GREEN = (20, 200, 20)
10 | RED = (20, 20, 255)
11 |
12 | # Define trackers
13 | trackers = {
14 | 'BOOSTING': cv2.TrackerBoosting_create,
15 | 'MIL': cv2.TrackerMIL_create,
16 | 'KCF': cv2.TrackerKCF_create,
17 | 'TLD': cv2.TrackerTLD_create,
18 | 'MEDIANFLOW': cv2.TrackerMedianFlow_create,
19 | 'GOTURN': cv2.TrackerGOTURN_create,
20 | 'MOSSE': cv2.TrackerMOSSE_create,
21 | 'CSRT': cv2.TrackerCSRT_create
22 |
23 | }
24 |
25 | # Parse arguments
26 | parser = argparse.ArgumentParser(description='Tracking API demo.')
27 | parser.add_argument(
28 | '--tracker',
29 | default="KCF",
30 | help=f"One of {trackers.keys()}")
31 | parser.add_argument(
32 | '--video',
33 | help="Video file to use",
34 | default="videos/test.mp4")
35 | args = parser.parse_args()
36 |
37 |
38 | tracker_name = args.tracker.upper()
39 | assert tracker_name in trackers, f"Tracker should be one of {trackers.keys()}"
40 | # Open the video and read the first frame
41 | video = cv2.VideoCapture(args.video)
42 | assert video.isOpened(), "Could not open video"
43 | ok, frame = video.read()
44 | assert ok, "Video file is not readable"
45 |
46 | # Select bounding box
47 | bbox = cv2.selectROI(frame, False)
48 |
49 | # Initialize the tracker
50 | tracker = trackers[tracker_name]()
51 | tracker.init(frame, bbox)
52 |
53 | for ok, frame in iter(video.read, (False, None)):
54 | # Time in seconds
55 | start_time = time.time()
56 |
57 | # Update tracker
58 | ok, bbox = tracker.update(frame)
59 |
60 | # Calculate FPS
61 | fps = 1 / (time.time() - start_time)
62 |
63 | # Display tracking info and show frame
64 | if ok:
65 | # Draw bounding box
66 | x, y, w, h = np.array(bbox, dtype=np.int)
67 | cv2.rectangle(frame, (x, y), (x + w, y + w), GREEN, 2, 1)
68 | else:
69 | # Tracking failure
70 | cv2.putText(frame, "Tracking failed", (100, 80), FONT, 0.7, RED, 2)
71 | cv2.putText(frame, f"{tracker_name} Tracker",
72 | (100, 20), FONT, 0.7, GREEN, 2)
73 | cv2.putText(frame, f"FPS : {fps:.0f}", (100, 50), FONT, 0.7, GREEN, 2)
74 | cv2.imshow("Tracking", frame)
75 |
76 | # Exit if ESC pressed
77 | if cv2.waitKey(1) & 0xff == 27:
78 | break
79 |
--------------------------------------------------------------------------------
/chapter7/chapter7.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | OpenCV with Python Blueprints
6 | Chapter 7: Learning to Recognize Traffic Signs
7 |
8 | Traffic sign recognition using support vector machines (SVMs).
9 | """
10 |
11 | import cv2
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 |
15 | from data.gtsrb import load_training_data
16 | from data.gtsrb import load_test_data
17 | from data.process import surf_featurize, hog_featurize
18 | from data.process import hsv_featurize, grayscale_featurize
19 |
20 |
21 | def train_MLP(X_train, y_train):
22 | mlp = cv2.ml.ANN_MLP_create()
23 | mlp.setLayerSizes(np.array([784, 512, 512, 10]))
24 | mlp.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM, 2.5, 1.0)
25 | mlp.setTrainingMethod(cv2.ml.ANN_MLP.BACKPROP)
26 | mlp.train(X_train, cv2.ml.ROW_SAMPLE, y_train)
27 | return mlp
28 |
29 |
30 | def train_one_vs_all_SVM(X_train, y_train):
31 | single_svm = cv2.ml.SVM_create()
32 | single_svm.setKernel(cv2.ml.SVM_LINEAR)
33 | single_svm.setType(cv2.ml.SVM_C_SVC)
34 | single_svm.setC(2.67)
35 | single_svm.setGamma(5.383)
36 | single_svm.train(X_train, cv2.ml.ROW_SAMPLE, y_train)
37 | return single_svm
38 |
39 |
40 | def accuracy(y_predicted, y_true):
41 | return sum(y_predicted == y_true) / len(y_true)
42 |
43 |
44 | def precision(y_predicted, y_true, positive_label):
45 | cm = confusion_matrix(y_predicted, y_true)
46 | true_positives = cm[positive_label, positive_label]
47 | total_positives = sum(cm[positive_label])
48 | return true_positives / total_positives
49 |
50 |
51 | def recall(y_predicted, y_true, positive_label):
52 | cm = confusion_matrix(y_predicted, y_true)
53 | true_positives = cm[positive_label, positive_label]
54 | class_members = sum(cm[:, positive_label])
55 | return true_positives / class_members
56 |
57 |
58 | def confusion_matrix(y_predicted, y_true):
59 | num_classes = max(max(y_predicted), max(y_true)) + 1
60 | conf_matrix = np.zeros((num_classes, num_classes))
61 | for r, c in zip(y_predicted, y_true):
62 | conf_matrix[r, c] += 1
63 | return conf_matrix
64 |
65 |
66 | def train_sklearn_random_forest(X_train, y_train):
67 | pass
68 |
69 |
70 | def main(labels=[0, 10, 20, 30, 40]):
71 | train_data, train_labels = load_training_data(labels)
72 | test_data, test_labels = load_test_data(labels)
73 |
74 | y_train = np.array(train_labels)
75 | y_test = np.array(test_labels)
76 |
77 | accuracies = {}
78 |
79 | for featurize in [hog_featurize, grayscale_featurize,
80 | hsv_featurize, surf_featurize]:
81 | x_train = featurize(train_data)
82 | print(x_train.shape)
83 | model = train_one_vs_all_SVM(x_train, y_train)
84 |
85 | x_test = featurize(test_data)
86 | res = model.predict(x_test)
87 | y_predict = res[1].flatten()
88 | np.save(f'y_predict_{featurize.__name__}', y_predict)
89 | np.save('y_true', y_test)
90 | accuracies[featurize.__name__] = accuracy(y_predict, y_test)
91 |
92 | print(accuracies)
93 |
94 | plt.bar(accuracies.keys(), accuracies.values())
95 | plt.axes().xaxis.set_tick_params(rotation=20)
96 | plt.ylim([0, 1])
97 | plt.grid()
98 | plt.title('Test accuracy for different featurize functions')
99 | plt.show()
100 |
101 |
102 | if __name__ == '__main__':
103 | main()
104 |
--------------------------------------------------------------------------------
/chapter7/data/.gitignore:
--------------------------------------------------------------------------------
1 | *.zip
2 |
--------------------------------------------------------------------------------
/chapter7/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/OpenCV-4-with-Python-Blueprints-Second-Edition/33880272e8fef550a9f85fcc1b0d7509d2a84b7f/chapter7/data/__init__.py
--------------------------------------------------------------------------------
/chapter7/data/gtsrb.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | A module to load the German Traffic Sign Recognition Benchmark (GTSRB)
6 |
7 | The dataset contains more than 50,000 images of traffic signs belonging
8 | to more than 40 classes. The dataset can be freely obtained from:
9 | http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset.
10 | """
11 |
12 | from pathlib import Path
13 | import requests
14 | from io import TextIOWrapper
15 | import hashlib
16 | import cv2
17 | import numpy as np
18 | from zipfile import ZipFile
19 |
20 | import csv
21 | from matplotlib import cm
22 | from matplotlib import pyplot as plt
23 |
24 |
25 | ARCHIVE_PATH = 'https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/' # noqa
26 |
27 |
28 | def _download(filename, *, md5sum=None):
29 | '''
30 | GTSRB_Final_Training_Images.zip
31 |
32 | '''
33 | write_path = Path(__file__).parent / filename
34 | if write_path.exists() and _md5sum_matches(write_path, md5sum):
35 | return write_path
36 | response = requests.get(f'{ARCHIVE_PATH}/{filename}')
37 | response.raise_for_status()
38 | with open(write_path, 'wb') as outfile:
39 | outfile.write(response.content)
40 | return write_path
41 |
42 |
43 | def _md5sum_matches(file_path, checksum):
44 | if checksum is None:
45 | return True
46 | hash_md5 = hashlib.md5()
47 | with open(file_path, "rb") as f:
48 | for chunk in iter(lambda: f.read(4096), b""):
49 | hash_md5.update(chunk)
50 | return checksum == hash_md5.hexdigest()
51 |
52 |
53 | def _load_data(filepath, labels):
54 | data, targets = [], []
55 |
56 | with ZipFile(filepath) as data_zip:
57 | for path in data_zip.namelist():
58 | if not path.endswith('.csv'):
59 | continue
60 | # Only iterate over annotations files
61 | *dir_path, csv_filename = path.split('/')
62 | label_str = dir_path[-1]
63 | if labels is not None and int(label_str) not in labels:
64 | continue
65 | with data_zip.open(path, 'r') as csvfile:
66 | reader = csv.DictReader(TextIOWrapper(csvfile), delimiter=';')
67 | for img_info in reader:
68 | img_path = '/'.join([*dir_path, img_info['Filename']])
69 | raw_data = data_zip.read(img_path)
70 | img = cv2.imdecode(np.frombuffer(raw_data, np.uint8), 1)
71 |
72 | x1, y1 = np.int(img_info['Roi.X1']), np.int(img_info['Roi.Y1'])
73 | x2, y2 = np.int(img_info['Roi.X2']), np.int(img_info['Roi.Y2'])
74 |
75 | data.append(img[y1: y2, x1: x2])
76 | targets.append(np.int(img_info['ClassId']))
77 | return data, targets
78 |
79 |
80 | def load_test_data(labels=[0, 10]):
81 | filepath = _download('GTSRB_Online-Test-Images-Sorted.zip',
82 | md5sum='b7bba7dad2a4dc4bc54d6ba2716d163b')
83 | return _load_data(filepath, labels)
84 |
85 |
86 | def load_training_data(labels=[0, 10]):
87 | filepath = _download('GTSRB-Training_fixed.zip',
88 | md5sum='513f3c79a4c5141765e10e952eaa2478')
89 | return _load_data(filepath, labels)
90 |
91 |
92 | if __name__ == '__main__':
93 | train_data, train_labels = load_training_data(labels=None)
94 | np.random.seed(75)
95 | for _ in range(100):
96 | indices = np.arange(len(train_data))
97 | np.random.shuffle(indices)
98 | for r in range(3):
99 | for c in range(5):
100 | i = 5 * r + c
101 | ax = plt.subplot(3, 5, 1 + i)
102 | sample = train_data[indices[i]]
103 | ax.imshow(cv2.resize(sample, (32, 32)), cmap=cm.Greys_r)
104 | ax.axis('off')
105 | plt.tight_layout()
106 | plt.show()
107 | np.random.seed(np.random.randint(len(indices)))
108 |
--------------------------------------------------------------------------------
/chapter7/data/process.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import itertools
4 |
5 |
6 | def hog_featurize(data, *, scale_size=(32, 32)):
7 | """
8 | Featurize using histogram of gradients.
9 |
10 | For each image:
11 | 1. resize all images to have the same (usually smaller size)
12 | 2. Calculate HOG values using same HOGDescriptor for all images,
13 | so we get same number of gradients for each image
14 | 3. Return a flattened list of gradients as a final feature.
15 |
16 | """
17 | block_size = (scale_size[0] // 2, scale_size[1] // 2)
18 | block_stride = (scale_size[0] // 4, scale_size[1] // 4)
19 | cell_size = block_stride
20 | hog = cv2.HOGDescriptor(scale_size, block_size, block_stride,
21 | cell_size, 9)
22 |
23 | resized_images = (cv2.resize(x, scale_size) for x in data)
24 |
25 | return np.array([hog.compute(x).flatten() for x in resized_images])
26 |
27 |
28 | def surf_featurize(data, *, scale_size=(16, 16), num_surf_features=100):
29 | all_kp = [cv2.KeyPoint(float(x), float(y), 1)
30 | for x, y in itertools.product(range(scale_size[0]),
31 | range(scale_size[1]))]
32 | surf = cv2.xfeatures2d_SURF.create(hessianThreshold=400)
33 | kp_des = (surf.compute(x, all_kp) for x in data)
34 | return np.array([d.flatten()[:num_surf_features]
35 | for _, d in kp_des]).astype(np.float32)
36 |
37 |
38 | def hsv_featurize(data, *, scale_size=(16, 16)):
39 | """
40 | Featurize by calculating HSV values of the data
41 |
42 | For each image:
43 | 1. resize all images to have the same (usually smaller size)
44 | 2. Convert the image to HSV (values in 0 - 255 range)
45 | 3. Convert each image to have pixel value in (0, 1) and flatten
46 | 4. Subtract average pixel value of the flattened vector.
47 | """
48 | resized_images = (cv2.resize(x, scale_size) for x in data)
49 | hsv_data = (cv2.cvtColor(x, cv2.COLOR_BGR2HSV) for x in resized_images)
50 | scaled_data = (np.array(x).astype(np.float32).flatten() / 255
51 | for x in hsv_data)
52 | return np.vstack([x - x.mean() for x in scaled_data])
53 |
54 |
55 | def grayscale_featurize(data, *, scale_size=(16, 16)):
56 | """
57 | Featurize by calculating grayscale values of the data
58 |
59 | For each image:
60 | 1. resize all images to have the same (usually smaller size)
61 | 2. Convert the image to grayscale (values in 0 - 255 range)
62 | 3. Convert each image to have pixel value in (0, 1) and flatten
63 | 4. Subtract average pixel value of the flattened vector.
64 | """
65 | resized_images = (cv2.resize(x, scale_size) for x in data)
66 | gray_data = (cv2.cvtColor(x, cv2.COLOR_BGR2GRAY) for x in resized_images)
67 | scaled_data = (np.array(x).astype(np.float32).flatten() / 255
68 | for x in gray_data)
69 | return np.vstack([x - x.mean() for x in scaled_data])
70 |
71 |
72 | if __name__ == '__main__':
73 | from data.gtsrb import load_training_data
74 | import matplotlib.pyplot as plt
75 |
76 | train_data, train_labels = load_training_data(labels=[13])
77 |
78 | i = 80
79 |
80 | [f] = hog_featurize([train_data[i]])
81 | print(len(f))
82 |
83 | plt.imshow(train_data[i])
84 | plt.show()
85 |
--------------------------------------------------------------------------------
/chapter7/train_tf2.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import cv2
4 | # https://answers.opencv.org/question/175699/readnetfromtensorflow-fails-on-retrained-nn/
5 |
6 | # https://jeanvitor.com/tensorflow-object-detecion-opencv/
7 |
8 | # https://heartbeat.fritz.ai/real-time-object-detection-on-raspberry-pi-using-opencv-dnn-98827255fa60
9 |
10 | from data.gtsrb import load_training_data
11 | from data.gtsrb import load_test_data
12 |
13 | UNIFORM_SIZE = (32, 32)
14 |
15 |
16 | def normalize(x):
17 | """
18 | Do minimum pre-processing
19 | 1. resize to UNIFORM_SIZE
20 | 2. scale to (0, 1) range
21 | 3. subtract the mean of all pixel values
22 | """
23 | one_size = cv2.resize(x, UNIFORM_SIZE).astype(np.float32) / 255
24 | return one_size - one_size.mean()
25 |
26 |
27 | def train_tf_model(X_train, y_train):
28 | model = tf.keras.models.Sequential([
29 | tf.keras.layers.Conv2D(20, (8, 8),
30 | input_shape=list(UNIFORM_SIZE) + [3],
31 | activation='relu'),
32 | tf.keras.layers.MaxPooling2D(pool_size=(4, 4), strides=4),
33 | tf.keras.layers.Dropout(0.15),
34 | tf.keras.layers.Flatten(),
35 | tf.keras.layers.Dense(64, activation='relu'),
36 | tf.keras.layers.Dropout(0.15),
37 | tf.keras.layers.Dense(43, activation='softmax')
38 | ])
39 |
40 | model.compile(optimizer='adam',
41 | loss='sparse_categorical_crossentropy',
42 | metrics=['accuracy'])
43 | model.fit(x_train, np.array(train_labels), epochs=2)
44 | return model
45 |
46 |
47 | if __name__ == '__main__':
48 | train_data, train_labels = load_training_data(labels=None)
49 | test_data, test_labels = load_test_data(labels=None)
50 |
51 | x_train = np.array([normalize(x) for x in train_data])
52 | model = train_tf_model(x_train, train_labels)
53 | x_test = np.array([normalize(x) for x in test_data])
54 |
55 | y_hat = model.predict_classes(x_test)
56 |
57 | acc = sum(y_hat == np.array(test_labels)) / len(test_labels)
58 | print(f'Accuracy = {acc:.3f}')
59 |
--------------------------------------------------------------------------------
/chapter8/.gitignore:
--------------------------------------------------------------------------------
1 | media/
2 |
--------------------------------------------------------------------------------
/chapter8/chapter8.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 |
5 | """
6 | OpenCV with Python Blueprints
7 | Chapter 8: Learning to Recognize Emotion in Faces
8 |
9 | An app that combines both face detection and face recognition, with a
10 | focus on recognizing emotional expressions in the detected faces.
11 |
12 | The process flow is as follows:
13 | * Run the GUI in Training Mode to assemble a training set. Upon exiting
14 | the app will dump all assembled training samples to a pickle file
15 | "datasets/faces_training.pkl".
16 | * Run the script train_test_mlp.py to train a MLP classifier on the
17 | dataset. This file will store the parameters of the trained MLP in
18 | a file "params/mlp.xml" and dump the preprocessed dataset to a
19 | pickle file "datasets/faces_preprocessed.pkl".
20 | * Run the GUI in Testing Mode to apply the pre-trained MLP classifier
21 | to the live stream of the webcam.
22 | """
23 |
24 | import argparse
25 | import cv2
26 | import numpy as np
27 |
28 | import wx
29 | from pathlib import Path
30 |
31 | from data.store import save_datum, pickle_load
32 | from data.process import _pca_featurize
33 | from detectors import FaceDetector
34 | from wx_gui import BaseLayout
35 |
36 |
37 | class FacialExpressionRecognizerLayout(BaseLayout):
38 | def __init__(self, *args,
39 | clf_path=None,
40 | **kwargs):
41 | super().__init__(*args, **kwargs)
42 | self.clf = cv2.ml.ANN_MLP_load(str(clf_path / 'mlp.xml'))
43 |
44 | self.index_to_label = pickle_load(clf_path / 'index_to_label')
45 | self.pca_args = pickle_load(clf_path / 'pca_args')
46 |
47 | self.face_detector = FaceDetector(
48 | face_cascade='params/haarcascade_frontalface_default.xml',
49 | eye_cascade='params/haarcascade_lefteye_2splits.xml')
50 |
51 | def featurize_head(self, head):
52 | return _pca_featurize(head[None], *self.pca_args)
53 |
54 | def augment_layout(self):
55 | pass
56 |
57 | def process_frame(self, frame_rgb: np.ndarray) -> np.ndarray:
58 | success, frame, self.head, (x, y) = self.face_detector.detect_face(
59 | frame_rgb)
60 | if not success:
61 | return frame
62 |
63 | success, head = self.face_detector.align_head(self.head)
64 | if not success:
65 | return frame
66 |
67 | # We have to pass [1 x n] array predict.
68 | _, output = self.clf.predict(self.featurize_head(head))
69 | label = self.index_to_label[np.argmax(output)]
70 |
71 | # Draw predicted label above the bounding box.
72 | cv2.putText(frame, label, (x, y - 20),
73 | cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)
74 |
75 | return frame
76 |
77 |
78 | class DataCollectorLayout(BaseLayout):
79 |
80 | def __init__(self, *args,
81 | training_data='data/cropped_faces.csv',
82 | **kwargs):
83 | super().__init__(*args, **kwargs)
84 | self.face_detector = FaceDetector(
85 | face_cascade='params/haarcascade_frontalface_default.xml',
86 | eye_cascade='params/haarcascade_lefteye_2splits.xml')
87 |
88 | self.training_data = training_data
89 |
90 | def augment_layout(self):
91 | """Initializes GUI"""
92 | # initialize data structure
93 | self.samples = []
94 | self.labels = []
95 |
96 | # create a horizontal layout with all buttons
97 | pnl2 = wx.Panel(self, -1)
98 | self.neutral = wx.RadioButton(pnl2, -1, 'neutral', (10, 10),
99 | style=wx.RB_GROUP)
100 | self.happy = wx.RadioButton(pnl2, -1, 'happy')
101 | self.sad = wx.RadioButton(pnl2, -1, 'sad')
102 | self.surprised = wx.RadioButton(pnl2, -1, 'surprised')
103 | self.angry = wx.RadioButton(pnl2, -1, 'angry')
104 | self.disgusted = wx.RadioButton(pnl2, -1, 'disgusted')
105 | hbox2 = wx.BoxSizer(wx.HORIZONTAL)
106 | hbox2.Add(self.neutral, 1)
107 | hbox2.Add(self.happy, 1)
108 | hbox2.Add(self.sad, 1)
109 | hbox2.Add(self.surprised, 1)
110 | hbox2.Add(self.angry, 1)
111 | hbox2.Add(self.disgusted, 1)
112 | pnl2.SetSizer(hbox2)
113 |
114 | # create horizontal layout with single snapshot button
115 | pnl3 = wx.Panel(self, -1)
116 | self.snapshot = wx.Button(pnl3, -1, 'Take Snapshot')
117 | self.Bind(wx.EVT_BUTTON, self._on_snapshot, self.snapshot)
118 | hbox3 = wx.BoxSizer(wx.HORIZONTAL)
119 | hbox3.Add(self.snapshot, 1)
120 | pnl3.SetSizer(hbox3)
121 |
122 | # arrange all horizontal layouts vertically
123 | self.panels_vertical.Add(pnl2, flag=wx.EXPAND | wx.BOTTOM, border=1)
124 | self.panels_vertical.Add(pnl3, flag=wx.EXPAND | wx.BOTTOM, border=1)
125 |
126 | def process_frame(self, frame_rgb: np.ndarray) -> np.ndarray:
127 | """
128 | Add a bounding box around the face if a face is detected.
129 | """
130 | _, frame, self.head, _ = self.face_detector.detect_face(frame_rgb)
131 | return frame
132 |
133 | def _on_snapshot(self, evt):
134 | """Takes a snapshot of the current frame
135 |
136 | This method takes a snapshot of the current frame, preprocesses
137 | it to extract the head region, and upon success adds the data
138 | sample to the training set.
139 | """
140 | if self.neutral.GetValue():
141 | label = 'neutral'
142 | elif self.happy.GetValue():
143 | label = 'happy'
144 | elif self.sad.GetValue():
145 | label = 'sad'
146 | elif self.surprised.GetValue():
147 | label = 'surprised'
148 | elif self.angry.GetValue():
149 | label = 'angry'
150 | elif self.disgusted.GetValue():
151 | label = 'disgusted'
152 |
153 | if self.head is None:
154 | print("No face detected")
155 | else:
156 | success, aligned_head = self.face_detector.align_head(self.head)
157 | if success:
158 | save_datum(self.training_data, label, aligned_head)
159 | print(f"Saved {label} training datum.")
160 | else:
161 | print("Could not align head (eye detection failed?)")
162 |
163 |
164 | def run_layout(layout_cls, **kwargs):
165 | # open webcam
166 | capture = cv2.VideoCapture(0)
167 | # opening the channel ourselves, if it failed to open.
168 | if not(capture.isOpened()):
169 | capture.open()
170 |
171 | capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
172 | capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
173 |
174 | # start graphical user interface
175 | app = wx.App()
176 | layout = layout_cls(capture, **kwargs)
177 | layout.Center()
178 | layout.Show()
179 | app.MainLoop()
180 |
181 |
182 | if __name__ == '__main__':
183 | parser = argparse.ArgumentParser()
184 | parser.add_argument('mode', choices=['collect', 'demo'])
185 | parser.add_argument('--classifier', type=Path)
186 | args = parser.parse_args()
187 |
188 | if args.mode == 'collect':
189 | run_layout(DataCollectorLayout, title='Collect Data')
190 | elif args.mode == 'demo':
191 | assert args.classifier is not None, 'you have to provide --classifier'
192 | run_layout(FacialExpressionRecognizerLayout,
193 | title='Facial Expression Recognizer',
194 | clf_path=args.classifier)
195 |
--------------------------------------------------------------------------------
/chapter8/data/.gitignore:
--------------------------------------------------------------------------------
1 | *.csv
2 | *.csv.[0-9]
3 |
--------------------------------------------------------------------------------
/chapter8/data/process.py:
--------------------------------------------------------------------------------
1 | import json
2 | import numpy as np
3 | from typing import Callable
4 | import cv2
5 |
6 |
7 | def featurize(datum):
8 | return np.array(datum, dtype=np.float32).flatten()
9 |
10 |
11 | EMOTIONS = {
12 | 'neutral': 0,
13 | 'surprised': 1,
14 | 'angry': 2,
15 | 'happy': 3,
16 | 'sad': 4,
17 | 'disgusted': 5
18 | }
19 |
20 | REVERSE_EMOTIONS = {v: k for k, v in EMOTIONS.items()}
21 |
22 |
23 | def int_encode(label):
24 | return EMOTIONS[label]
25 |
26 |
27 | def int_decode(value):
28 | return REVERSE_EMOTIONS[value]
29 |
30 |
31 | def one_hot_encode(all_labels) -> (np.ndarray, Callable):
32 | unique_lebels = list(sorted(set(all_labels)))
33 | index_to_label = dict(enumerate(unique_lebels))
34 | label_to_index = {v: k for k, v in index_to_label.items()}
35 |
36 | y = np.zeros((len(all_labels), len(unique_lebels))).astype(np.float32)
37 | for i, label in enumerate(all_labels):
38 | y[i, label_to_index[label]] = 1
39 |
40 | return y, index_to_label
41 |
42 |
43 | def train_test_split(n, train_portion=0.8, seed=None):
44 | if seed:
45 | np.random.seed(seed)
46 | indices = np.arange(n)
47 | np.random.shuffle(indices)
48 | N = int(n * train_portion)
49 | return indices[:N], indices[N:]
50 |
51 |
52 | def _pca_featurize(data, center, top_vecs):
53 | return np.array([np.dot(top_vecs, np.array(datum).flatten() - center)
54 | for datum in data]).astype(np.float32)
55 |
56 |
57 | def pca_featurize(training_data, *, num_components=20):
58 | x_arr = np.array(training_data).reshape((len(training_data), -1)).astype(np.float32)
59 | mean, eigvecs = cv2.PCACompute(x_arr, mean=None)
60 |
61 | # Take only first num_components eigenvectors.
62 | top_vecs = eigvecs[:num_components]
63 | center = mean.flatten()
64 |
65 | args = (center, top_vecs)
66 | return _pca_featurize(training_data, *args), args
67 |
68 |
69 | if __name__ == '__main__':
70 | print(train_test_split(10, 0.8))
71 | from data.store import load_collected_data
72 | data, targets = load_collected_data('data/cropped_faces.csv')
73 | X, f = pca_featurize(data)
74 | print(X.shape)
75 |
76 |
--------------------------------------------------------------------------------
/chapter8/data/store.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import pickle
3 | import json
4 | from enum import IntEnum, auto, unique
5 | import sys
6 | csv.field_size_limit(sys.maxsize)
7 |
8 |
9 | def load_collected_data(path):
10 | data, targets = [], []
11 | with open(path, 'r', newline='') as infile:
12 | reader = csv.reader(infile)
13 | for label, sample in reader:
14 | targets.append(label)
15 | data.append(json.loads(sample))
16 | return data, targets
17 |
18 |
19 | def save_datum(path, label, img):
20 | with open(path, 'a', newline='') as outfile:
21 | writer = csv.writer(outfile)
22 | writer.writerow([label, img.tolist()])
23 |
24 |
25 | def pickle_dump(f, path):
26 | with open(path, 'wb') as outfile:
27 | return pickle.dump(f, outfile)
28 |
29 |
30 | def pickle_load(path):
31 | with open(path, 'rb') as infile:
32 | return pickle.load(infile)
33 |
34 |
35 | if __name__ == '__main__':
36 | td = load_collected_data('data/cropped_faces.csv')
37 | print([len(x) for x in td])
38 |
--------------------------------------------------------------------------------
/chapter8/detectors.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """A module that contains various detectors"""
5 |
6 | import cv2
7 | import numpy as np
8 |
9 |
10 | class FaceDetector:
11 | """Face Detector
12 |
13 | This class implements a face detection algorithm using a face cascade
14 | and two eye cascades.
15 | """
16 |
17 | def __init__(self, *,
18 | face_cascade='params/haarcascade_frontalface_default.xml',
19 | eye_cascade='params/haarcascade_lefteye_2splits.xml',
20 | scale_factor=4):
21 | # resize images before detection
22 | self.scale_factor = scale_factor
23 |
24 | # load pre-trained cascades
25 | self.face_clf = cv2.CascadeClassifier(face_cascade)
26 | if self.face_clf.empty():
27 | raise ValueError(f'Could not load face cascade "{face_cascade}"')
28 | self.eye_clf = cv2.CascadeClassifier(eye_cascade)
29 | if self.eye_clf.empty():
30 | raise ValueError(
31 | f'Could not load eye cascade "{eye_cascade}"')
32 |
33 | def detect_face(self, rgb_img, *, outline=True):
34 | """Performs face detection
35 |
36 | This method detects faces in an RGB input image.
37 | The method returns True upon success (else False), draws the
38 | bounding box of the head onto the input image (frame), and
39 | extracts the head region (head).
40 |
41 | :param frame: RGB input image
42 | :returns: success, frame, head
43 | """
44 | frameCasc = cv2.cvtColor(cv2.resize(rgb_img, (0, 0),
45 | fx=1.0 / self.scale_factor,
46 | fy=1.0 / self.scale_factor),
47 | cv2.COLOR_RGB2GRAY)
48 | faces = self.face_clf.detectMultiScale(
49 | frameCasc,
50 | scaleFactor=1.1,
51 | minNeighbors=3,
52 | flags=cv2.CASCADE_SCALE_IMAGE) * self.scale_factor
53 |
54 | # if face is found: extract head region from bounding box
55 | for (x, y, w, h) in faces:
56 | if outline:
57 | cv2.rectangle(rgb_img, (x, y), (x + w, y + h), (100, 255, 0),
58 | thickness=2)
59 | head = cv2.cvtColor(rgb_img[y:y + h, x:x + w],
60 | cv2.COLOR_RGB2GRAY)
61 | return True, rgb_img, head, (x, y)
62 |
63 | return False, rgb_img, None, (None, None)
64 |
65 | def eye_centers(self, head, *, outline=False):
66 | height, width = head.shape[:2]
67 |
68 | eyes = self.eye_clf.detectMultiScale(head,
69 | scaleFactor=1.1,
70 | minNeighbors=3,
71 | flags=cv2.CASCADE_SCALE_IMAGE)
72 | if len(eyes) != 2:
73 | raise RuntimeError(f'Number of eyes {len(eyes)} != 2')
74 | eye_centers = []
75 | for x, y, w, h in eyes:
76 | # find the center of the detected eye region
77 | eye_centers.append(np.array([x + w / 2, y + h / 2]))
78 | if outline:
79 | cv2.rectangle(head, (x, y), (x + w, y + h), (10, 55, 0),
80 | thickness=2)
81 | return eye_centers
82 |
83 | def align_head(self, head):
84 | """Aligns a head region using affine transformations
85 |
86 | This method preprocesses an extracted head region by rotating
87 | and scaling it so that the face appears centered and up-right.
88 |
89 | The method returns True on success (else False) and the aligned
90 | head region (head). Possible reasons for failure are that one or
91 | both eye detectors fail, maybe due to poor lighting conditions.
92 |
93 | :param head: extracted head region
94 | :returns: success, head
95 | """
96 | # we want the eye to be at 25% of the width, and 20% of the height
97 | # resulting image should be square (desired_img_width,
98 | # desired_img_height)
99 | desired_eye_x = 0.25
100 | desired_eye_y = 0.2
101 | desired_img_width = desired_img_height = 200
102 |
103 | try:
104 | eye_centers = self.eye_centers(head)
105 | except RuntimeError:
106 | return False, head
107 |
108 | if eye_centers[0][0] < eye_centers[0][1]:
109 | left_eye, right_eye = eye_centers
110 | else:
111 | right_eye, left_eye = eye_centers
112 |
113 | # scale distance between eyes to desired length
114 | eye_dist = np.linalg.norm(left_eye - right_eye)
115 | eyeSizeScale = (1.0 - desired_eye_x * 2) * desired_img_width / eye_dist
116 |
117 | # get rotation matrix
118 | # get center point between the two eyes and calculate angle
119 | eye_angle_deg = 180 / np.pi * np.arctan2(right_eye[1] - left_eye[1],
120 | right_eye[0] - left_eye[0])
121 | eye_midpoint = (left_eye + right_eye) / 2
122 | rot_mat = cv2.getRotationMatrix2D(tuple(eye_midpoint), eye_angle_deg,
123 | eyeSizeScale)
124 |
125 | # shift center of the eyes to be centered in the image
126 | rot_mat[0, 2] += desired_img_width * 0.5 - eye_midpoint[0]
127 | rot_mat[1, 2] += desired_eye_y * desired_img_height - eye_midpoint[1]
128 |
129 | # warp perspective to make eyes aligned on horizontal line and scaled
130 | # to right size
131 | res = cv2.warpAffine(head, rot_mat, (desired_img_width,
132 | desired_img_width))
133 |
134 | # return success
135 | return True, res
136 |
--------------------------------------------------------------------------------
/chapter8/train_classifier.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import cv2
3 | import numpy as np
4 | from pathlib import Path
5 | from collections import Counter
6 | from data.store import load_collected_data
7 | from data.process import train_test_split
8 | from data.process import pca_featurize, _pca_featurize
9 | from data.process import one_hot_encode
10 | from data.store import pickle_dump
11 |
12 |
13 | if __name__ == '__main__':
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--data', required=True)
16 | parser.add_argument('--save', type=Path)
17 | parser.add_argument('--num-components', type=int,
18 | default=20)
19 | args = parser.parse_args()
20 |
21 | data, targets = load_collected_data(args.data)
22 |
23 | train, test = train_test_split(len(data), 0.8)
24 | x_train, pca_args = pca_featurize(np.array(data)[train],
25 | num_components=args.num_components)
26 |
27 | encoded_targets, index_to_label = one_hot_encode(targets)
28 |
29 | last_layer_count = len(encoded_targets[0])
30 | mlp = cv2.ml.ANN_MLP_create()
31 | mlp.setLayerSizes(np.array([args.num_components, 10, last_layer_count], dtype=np.uint8))
32 | mlp.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.1)
33 | mlp.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
34 | mlp.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 30, 0.000001 ))
35 |
36 | y_train = encoded_targets[train]
37 |
38 | mlp.train(x_train, cv2.ml.ROW_SAMPLE, y_train)
39 |
40 | x_test = _pca_featurize(np.array(data)[test], *pca_args)
41 | _, predicted = mlp.predict(x_test)
42 |
43 | y_hat = np.array([index_to_label[np.argmax(y)] for y in predicted])
44 | y_true = np.array(targets)[test]
45 |
46 | print('Training Accuracy:')
47 | print(sum(y_hat == y_true) / len(y_hat))
48 |
49 | if args.save:
50 | x_all, pca_args = pca_featurize(np.array(data), num_components=args.num_components)
51 | mlp.train(x_all, cv2.ml.ROW_SAMPLE, encoded_targets)
52 | args.save.mkdir(exist_ok=True)
53 | mlp.save(str(args.save / 'mlp.xml'))
54 | pickle_dump(index_to_label, args.save / 'index_to_label')
55 | pickle_dump(pca_args, args.save / 'pca_args')
56 |
--------------------------------------------------------------------------------
/chapter8/wx_gui.py:
--------------------------------------------------------------------------------
1 | ../wx_gui.py
--------------------------------------------------------------------------------
/chapter9/classification.py:
--------------------------------------------------------------------------------
1 | import tensorflow.keras as K
2 |
3 | from data import ds
4 |
5 | base_model = K.applications.MobileNetV2(input_shape=(224,224, 3), include_top=False)
6 |
7 | # Freeze layers
8 | for layer in base_model.layers:
9 | layer.trainable = False
10 |
11 | x = K.layers.GlobalAveragePooling2D()(base_model.output)
12 |
13 | is_breeds = True
14 | if is_breeds:
15 | out = K.layers.Dense(37,activation="softmax")(x)
16 | inp_ds = ds.map(lambda d: (d.image,d.breed))
17 | else:
18 | out = K.layers.Dense(2,activation="softmax")(x)
19 | inp_ds = ds.map(lambda d: (d.image,d.type))
20 |
21 | model = K.Model(inputs=base_model.input, outputs=out)
22 | model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["categorical_accuracy","top_k_categorical_accuracy"])
23 |
24 | valid = inp_ds.take(1000)
25 | train = inp_ds.skip(1000).shuffle(10**4)
26 |
27 | model.fit(train.batch(32), epochs=4)
28 | model.evaluate(valid.batch(1))
29 |
--------------------------------------------------------------------------------
/chapter9/data.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 |
4 | from itertools import count
5 | from collections import defaultdict, namedtuple
6 |
7 | import cv2
8 | import numpy as np
9 | import tensorflow as tf
10 | import xml.etree.ElementTree as ET
11 |
12 | DATASET_DIR = "dataset"
13 | for type in ("annotations", "images"):
14 | tf.keras.utils.get_file(
15 | type,
16 | f"https://www.robots.ox.ac.uk/~vgg/data/pets/data/{type}.tar.gz",
17 | untar=True,
18 | cache_dir=".",
19 | cache_subdir=DATASET_DIR)
20 |
21 | IMAGE_SIZE = 224
22 | IMAGE_ROOT = os.path.join(DATASET_DIR, "images")
23 | XML_ROOT = os.path.join(DATASET_DIR, "annotations")
24 |
25 | Data = namedtuple("Data", "image,box,size,type,breed")
26 |
27 | types = defaultdict(count().__next__)
28 | breeds = defaultdict(count().__next__)
29 |
30 |
31 | def parse_xml(path: str) -> Data:
32 | with open(path) as f:
33 | xml_string = f.read()
34 | root = ET.fromstring(xml_string)
35 | img_name = root.find("./filename").text
36 | breed_name = img_name[:img_name.rindex("_")]
37 | breed_id = breeds[breed_name]
38 | type_id = types[root.find("./object/name").text]
39 | box = np.array([int(root.find(f"./object/bndbox/{tag}").text)
40 | for tag in "xmin,ymin,xmax,ymax".split(",")])
41 | size = np.array([int(root.find(f"./size/{tag}").text)
42 | for tag in "width,height".split(",")])
43 | normed_box = (box.reshape((2, 2)) / size).reshape((4))
44 | return Data(img_name, normed_box, size, type_id, breed_id)
45 |
46 |
47 | xml_paths = glob.glob(os.path.join(XML_ROOT, "xmls", "*.xml"))
48 | xml_paths.sort()
49 |
50 | parsed = np.array([parse_xml(path) for path in xml_paths])
51 |
52 | print(f"{len(types)} TYPES:", *types.keys(), sep=", ")
53 | print(f"{len(breeds)} BREEDS:", *breeds.keys(), sep=", ")
54 |
55 | np.random.seed(1)
56 | np.random.shuffle(parsed)
57 |
58 | ds = tuple(np.array(list(i)) for i in np.transpose(parsed))
59 | ds_slices = tf.data.Dataset.from_tensor_slices(ds)
60 |
61 | for el in ds_slices.take(1):
62 | print(el)
63 | # check boxes
64 | for el in ds_slices:
65 | b = el[1].numpy()
66 | if(np.any((b > 1) | (b < 0)) or np.any(b[2:] < b[:2])):
67 | print(f"Invalid box found {b}")
68 |
69 |
70 | def prepare(image, box, size, type, breed):
71 | image = tf.io.read_file(IMAGE_ROOT + "/" + image)
72 | image = tf.image.decode_png(image, channels=3)
73 | image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
74 | image /= 255
75 | return Data(image, box, size, tf.one_hot(
76 | type, len(types)), tf.one_hot(breed, len(breeds)))
77 |
78 |
79 | ds = ds_slices.map(prepare).prefetch(128)
80 |
81 | if __name__ == "__main__":
82 | def illustrate(sample):
83 | breed_num = np.argmax(sample.breed)
84 | for breed, num in breeds.items():
85 | if num == breed_num:
86 | break
87 | image = sample.image.numpy()
88 | pt1, pt2 = (sample.box.numpy().reshape(
89 | (2, 2)) * IMAGE_SIZE).astype(np.int32)
90 | cv2.rectangle(image, tuple(pt1), tuple(pt2), (0, 1, 0))
91 | cv2.putText(image, breed, (10, 10),
92 | cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 1, 0))
93 | return image
94 | samples_image = np.concatenate([illustrate(sample)
95 | for sample in ds.take(3)], axis=1)
96 | cv2.imshow("samples", samples_image)
97 | cv2.waitKey(0)
98 |
--------------------------------------------------------------------------------
/chapter9/inference.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import tensorflow.keras as K
4 |
5 | def draw_box(frame: np.ndarray, box: np.ndarray) -> np.ndarray:
6 | h, w = frame.shape[0:2]
7 | pts = (box.reshape((2, 2)) * np.array([w, h])).astype(np.int)
8 | cv2.rectangle(frame, tuple(pts[0]), tuple(pts[1]), (0, 255, 0), 2)
9 | return frame
10 |
11 | model = K.models.load_model("localization.h5")
12 |
13 | cap = cv2.VideoCapture(0)
14 |
15 | for _, frame in iter(cap.read, (False, None)):
16 | input = cv2.resize(frame, (224, 224))
17 | input = cv2.cvtColor(input, cv2.COLOR_BGR2RGB)
18 | box, = model.predict(input[None] / 255)
19 | draw_box(frame, box)
20 | cv2.imshow("res", frame)
21 | if(cv2.waitKey(1) == 27):
22 | break
23 |
--------------------------------------------------------------------------------
/chapter9/localization.py:
--------------------------------------------------------------------------------
1 | import tensorflow.keras as K
2 |
3 | from data import ds
4 |
5 | base_model = K.applications.MobileNetV2(
6 | input_shape=(224, 224, 3), include_top=False)
7 |
8 | conv_opts = dict(
9 | activation='relu',
10 | padding='same',
11 | kernel_regularizer="l2")
12 |
13 | x = K.layers.Conv2D(256, (1, 1), **conv_opts)(base_model.output)
14 | x = K.layers.Conv2D(256, (3, 3), strides=2, **conv_opts)(x)
15 | out = K.layers.Flatten()(x)
16 | out = K.layers.Dense(4, activation="sigmoid")(out)
17 |
18 | model = K.Model(inputs=base_model.input, outputs=out)
19 |
20 | inp_ds = ds.map(lambda d: (d.image, d.box))
21 | valid = inp_ds.take(1000)
22 | train = inp_ds.skip(1000).shuffle(10000)
23 |
24 | model.compile(
25 | loss="mean_squared_error",
26 | optimizer="adam",
27 | metrics=[
28 | K.metrics.RootMeanSquaredError(),
29 | "mae"])
30 |
31 | checkpoint = K.callbacks.ModelCheckpoint("localization.h5", monitor='val_root_mean_squared_error',
32 | save_best_only=True, verbose=1)
33 |
34 | model.fit(
35 | train.batch(32),
36 | epochs=12,
37 | validation_data=valid.batch(1),
38 | callbacks=[checkpoint])
39 |
--------------------------------------------------------------------------------
/dockerfiles/Dockerfile:
--------------------------------------------------------------------------------
1 | #FROM tensorflow/tensorflow:latest-py3
2 |
3 | # FROM tensorflow/tensorflow:latest-gpu-py3
4 | # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/dockerfiles/dockerfiles
5 |
6 | FROM python:3.8
7 |
8 | RUN apt-get update && apt-get install -y \
9 | build-essential \
10 | cmake \
11 | git \
12 | wget \
13 | unzip \
14 | yasm \
15 | pkg-config \
16 | libswscale-dev \
17 | libtbb2 \
18 | libtbb-dev \
19 | libjpeg-dev \
20 | libpng-dev \
21 | libtiff-dev \
22 | libavformat-dev \
23 | libpq-dev \
24 | libgtk2.0-dev \
25 | # Optional
26 | libtbb2 libtbb-dev \
27 | libjpeg-dev \
28 | libpng-dev \
29 | libtiff-dev \
30 | libv4l-dev \
31 | libdc1394-22-dev \
32 | qt4-default \
33 | # Missing libraries for GTK and wxPython dependencies
34 | libatk-adaptor \
35 | libcanberra-gtk-module \
36 | x11-apps \
37 | libgtk-3-dev \
38 | # Tools
39 | imagemagick \
40 | && rm -rf /var/lib/apt/lists/*
41 |
42 | ENV OPENCV_VERSION="4.2.0"
43 |
44 | WORKDIR /
45 | RUN wget --output-document cv.zip https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip \
46 | && unzip cv.zip \
47 | && wget --output-document contrib.zip https://github.com/opencv/opencv_contrib/archive/${OPENCV_VERSION}.zip \
48 | && unzip contrib.zip \
49 | && mkdir /opencv-${OPENCV_VERSION}/cmake_binary
50 |
51 | # Install numpy, since1. it's required for OpenCV
52 | RUN pip install --upgrade pip && pip install --no-cache-dir numpy==1.18.1
53 |
54 | RUN cd /opencv-${OPENCV_VERSION}/cmake_binary \
55 | && cmake -DBUILD_TIFF=ON \
56 | -DBUILD_opencv_java=OFF \
57 | -DWITH_CUDA=OFF \
58 | -DWITH_OPENGL=ON \
59 | -DWITH_OPENCL=ON \
60 | -DWITH_IPP=ON \
61 | -DWITH_TBB=ON \
62 | -DWITH_EIGEN=ON \
63 | -DWITH_V4L=ON \
64 | -DBUILD_TESTS=OFF \
65 | -DBUILD_PERF_TESTS=OFF \
66 | -DCMAKE_BUILD_TYPE=RELEASE \
67 | -D OPENCV_EXTRA_MODULES_PATH=/opencv_contrib-${OPENCV_VERSION}/modules \
68 | -D OPENCV_ENABLE_NONFREE=ON \
69 | -DCMAKE_INSTALL_PREFIX=$(python -c "import sys; print(sys.prefix)") \
70 | -DPYTHON_EXECUTABLE=$(which python) \
71 | -DPYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \
72 | -DPYTHON_PACKAGES_PATH=$(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())") \
73 | .. \
74 | && make install \
75 | && rm /cv.zip /contrib.zip \
76 | && rm -r /opencv-${OPENCV_VERSION} /opencv_contrib-${OPENCV_VERSION}
77 |
78 | RUN ln -s \
79 | /usr/local/python/cv2/python-3.8/cv2.cpython-38m-x86_64-linux-gnu.so \
80 | /usr/local/lib/python3.8/site-packages/cv2.so
81 |
82 | RUN pip install --upgrade pip && pip install --no-cache-dir pathlib2 wxPython==4.0.5
83 |
84 | RUN pip install --upgrade pip && pip install --no-cache-dir scipy==1.4.1 matplotlib==3.1.2 requests==2.22.0 ipython numba==0.48.0 jupyterlab==1.2.6 rawpy==0.14.0 # Rawpy is required for HDR & Panorama (processing .CR2 files)
85 | RUN pip install ExifRead==2.1.2
86 |
87 |
88 | CMD bash
89 |
--------------------------------------------------------------------------------
/dockerfiles/gpu.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM tensorflow/tensorflow:2.1.0-gpu-py3
2 | RUN apt-get update && apt-get install -y \
3 | build-essential \
4 | cmake \
5 | git \
6 | wget \
7 | unzip \
8 | yasm \
9 | pkg-config \
10 | libswscale-dev \
11 | libtbb2 \
12 | libtbb-dev \
13 | libjpeg-dev \
14 | libpng-dev \
15 | libtiff-dev \
16 | libavformat-dev \
17 | libpq-dev \
18 | libgtk2.0-dev \
19 | # Optional
20 | libtbb2 libtbb-dev \
21 | libjpeg-dev \
22 | libpng-dev \
23 | libtiff-dev \
24 | libv4l-dev \
25 | libdc1394-22-dev \
26 | qt4-default \
27 | # Missing libraries for GTK and wxPython dependencies
28 | libatk-adaptor \
29 | libcanberra-gtk-module \
30 | x11-apps \
31 | libgtk-3-dev \
32 | # Tools
33 | imagemagick \
34 | && rm -rf /var/lib/apt/lists/*
35 |
36 | ENV OPENCV_VERSION="4.2.0"
37 |
38 | WORKDIR /
39 | RUN wget --output-document cv.zip https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip \
40 | && unzip cv.zip \
41 | && wget --output-document contrib.zip https://github.com/opencv/opencv_contrib/archive/${OPENCV_VERSION}.zip \
42 | && unzip contrib.zip \
43 | && mkdir /opencv-${OPENCV_VERSION}/cmake_binary
44 |
45 | # Install numpy, since1. it's required for OpenCV
46 | RUN pip install --upgrade pip && pip install --no-cache-dir numpy==1.18.1
47 |
48 | RUN cd /opencv-${OPENCV_VERSION}/cmake_binary \
49 | && cmake -DBUILD_TIFF=ON \
50 | -DBUILD_opencv_java=OFF \
51 | -DWITH_CUDA=OFF \
52 | -DWITH_OPENGL=ON \
53 | -DWITH_OPENCL=ON \
54 | -DWITH_IPP=ON \
55 | -DWITH_TBB=ON \
56 | -DWITH_EIGEN=ON \
57 | -DWITH_V4L=ON \
58 | -DBUILD_TESTS=OFF \
59 | -DBUILD_PERF_TESTS=OFF \
60 | -DCMAKE_BUILD_TYPE=RELEASE \
61 | -D OPENCV_EXTRA_MODULES_PATH=/opencv_contrib-${OPENCV_VERSION}/modules \
62 | -D OPENCV_ENABLE_NONFREE=ON \
63 | -DCMAKE_INSTALL_PREFIX=$(python -c "import sys; print(sys.prefix)") \
64 | -DPYTHON_EXECUTABLE=$(which python) \
65 | -DPYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \
66 | -DPYTHON_PACKAGES_PATH=$(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())") \
67 | .. \
68 | && make install \
69 | && rm /cv.zip /contrib.zip \
70 | && rm -r /opencv-${OPENCV_VERSION} /opencv_contrib-${OPENCV_VERSION}
71 |
72 | # RUN ln -s \
73 | # /usr/local/python/cv2/python-3.8/cv2.cpython-38m-x86_64-linux-gnu.so \
74 | # /usr/local/lib/python3.8/site-packages/cv2.so
75 |
76 | RUN pip install --upgrade pip && pip install --no-cache-dir pathlib2 wxPython==4.0.5
77 |
78 | RUN pip install --upgrade pip && pip install --no-cache-dir scipy==1.4.1 matplotlib==3.1.2 requests==2.22.0 ipython numba==0.48.0 jupyterlab==1.2.6 rawpy==0.14.0 # Rawpy is required for HDR & Panorama (processing .CR2 files)
79 |
80 |
81 | CMD bash
82 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.18.1
2 | scipy==1.4.1
3 | matplotlib==3.1.2
4 | requests==2.22.0
5 | opencv-contrib-python==4.2.0.32
6 | opencv-python==4.2.0.32
7 | rawpy==0.13.1
8 | ExifRead==2.1.2
9 | wheels/wxPython-4.0.7.post2-cp38-cp38-linux_x86_64.whl
10 |
--------------------------------------------------------------------------------
/wheels/wxPython-4.0.7.post2-cp38-cp38-linux_x86_64.whl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:9aa4f425952c7312d89a2247341be50f81372fef2c6a9c1021739fed4b976444
3 | size 134361990
4 |
--------------------------------------------------------------------------------
/wx_gui.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | A module containing simple GUI layouts using wxPython
6 |
7 | This file is heavily based on the work of Michael Beyeler.
8 | """
9 |
10 | __license__ = "GNU GPL 3.0 or later"
11 |
12 | import numpy as np
13 | import wx
14 | import cv2
15 |
16 |
17 | class BaseLayout(wx.Frame):
18 | """ Abstract base class for all layouts in the book.
19 |
20 | A custom layout needs to implement the 2 methods below
21 | - augment_layout
22 | - process_frame
23 | """
24 |
25 | def __init__(self,
26 | capture: cv2.VideoCapture,
27 | title: str = None,
28 | parent=None,
29 | window_id: int = -1, # default value
30 | fps: int = 10):
31 | """
32 | Initialize all necessary parameters and generate a basic GUI layout
33 | that can then be augmented using `self.augment_layout`.
34 |
35 | :param parent: A wx.Frame parent (often Null). If it is non-Null,
36 | the frame will be minimized when its parent is minimized and
37 | restored when it is restored.
38 | :param window_id: The window identifier.
39 | :param title: The caption to be displayed on the frame's title bar.
40 | :param capture: Original video source to get the frames from.
41 | :param fps: Frames per second at which to display camera feed.
42 | """
43 | # Make sure the capture device could be set up
44 | self.capture = capture
45 | success, frame = self._acquire_frame()
46 | if not success:
47 | print("Could not acquire frame from camera.")
48 | raise SystemExit()
49 | self.imgHeight, self.imgWidth = frame.shape[:2]
50 |
51 | super().__init__(parent, window_id, title,
52 | size=(self.imgWidth, self.imgHeight + 20))
53 | self.fps = fps
54 | self.bmp = wx.Bitmap.FromBuffer(self.imgWidth, self.imgHeight, frame)
55 |
56 | # set up periodic screen capture
57 | self.timer = wx.Timer(self)
58 | self.timer.Start(1000. / self.fps)
59 | self.Bind(wx.EVT_TIMER, self._on_next_frame)
60 |
61 | # set up video stream
62 | self.video_pnl = wx.Panel(self, size=(self.imgWidth, self.imgHeight))
63 | self.video_pnl.SetBackgroundColour(wx.BLACK)
64 | self.video_pnl.Bind(wx.EVT_PAINT, self._on_paint)
65 |
66 | # display the button layout beneath the video stream
67 | self.panels_vertical = wx.BoxSizer(wx.VERTICAL)
68 | self.panels_vertical.Add(self.video_pnl, 1, flag=wx.EXPAND | wx.TOP,
69 | border=1)
70 |
71 | self.augment_layout()
72 |
73 | # round off the layout by expanding and centering
74 | self.SetMinSize((self.imgWidth, self.imgHeight))
75 | self.SetSizer(self.panels_vertical)
76 | self.Centre()
77 |
78 | def augment_layout(self):
79 | """ Augment custom layout elements to the GUI.
80 |
81 | This method is called in the class constructor, after initializing
82 | common parameters. Every GUI contains the camera feed in the variable
83 | `self.video_pnl`. Additional layout elements can be added below
84 | the camera feed by means of the method `self.panels_vertical.Add`
85 | """
86 | raise NotImplementedError()
87 |
88 | def _on_next_frame(self, event):
89 | """
90 | Capture a new frame from the capture device,
91 | send an RGB version to `self.process_frame`, refresh.
92 | """
93 | success, frame = self._acquire_frame()
94 | if success:
95 | # process current frame
96 | frame = self.process_frame(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
97 |
98 | # update buffer and paint (EVT_PAINT triggered by Refresh)
99 | self.bmp.CopyFromBuffer(frame)
100 | self.Refresh(eraseBackground=False)
101 |
102 | def _on_paint(self, event):
103 | """ Draw the camera frame stored in `self.bmp` onto `self.video_pnl`.
104 | """
105 | wx.BufferedPaintDC(self.video_pnl).DrawBitmap(self.bmp, 0, 0)
106 |
107 | def _acquire_frame(self) -> (bool, np.ndarray):
108 | """ Capture a new frame from the input device
109 |
110 | :return: (success, frame)
111 | Whether acquiring was successful and current frame.
112 | """
113 | return self.capture.read()
114 |
115 | def process_frame(self, frame_rgb: np.ndarray) -> np.ndarray:
116 | """Process the frame of the camera (or other capture device)
117 |
118 | :param frame_rgb: Image to process in rgb format, of shape (H, W, 3)
119 | :return: Processed image in rgb format, of shape (H, W, 3)
120 | """
121 | raise NotImplementedError()
122 |
--------------------------------------------------------------------------------