├── 1. Basics.ipynb ├── 2. OpenCV - Basics.ipynb ├── 3. Operations on Images .ipynb ├── 4. Image Processing.ipynb ├── 5. Feature Detection.ipynb ├── 6. Video Analysis.ipynb ├── 7. Camera Calibration and 3D Reconstruction.ipynb ├── 8. Image Denoising and Inpainting.ipynb ├── Barcode Detector.ipynb ├── Digit Recognition.ipynb ├── Document Scanner.ipynb ├── Face Detection using Haar Cascades .ipynb ├── FloodFill.ipynb ├── Human Detection.ipynb ├── Optical Character Recognition using K Nearest Neighbours.ipynb ├── Optical Character Recognition using Support Vector Machines.ipynb ├── Path Planning and Obstacle Detection.ipynb ├── README.md ├── Shape Detection.ipynb ├── Texture Flow.ipynb ├── Zooming.ipynb ├── captures ├── 1.png ├── ataritm.png ├── barcodedetection.png ├── batman1.png ├── br.png ├── br2.png ├── bthresh.png ├── circles.png ├── ck.png ├── closing.png ├── corners2.png ├── cs.png ├── denoise1.png ├── digitrecognizer.png ├── facedetection.png ├── featuresmatched.png ├── featuresmatched2.png ├── floodfill.png ├── foreground.png ├── gradient.png ├── hc.png ├── hista.png ├── histb.png ├── histc.png ├── histd.png ├── histe.png ├── humandetection1.png ├── humandetection2.png ├── imagepyramid.png ├── joker2.png ├── mask.png ├── mpl.png ├── opticalflow.png ├── original.png ├── res.png ├── resizedip.png ├── sat.png ├── shapes.png ├── shapesdetected.png ├── shapesntext.jpg ├── shapesthresh.png ├── sparrow.png ├── sparrows.png ├── step1ds.png ├── step2ds.png ├── step3ds.png ├── thres1.png ├── trackbar.png └── zoom.png ├── datasets ├── digits.png └── letter-recognition.data ├── images ├── 9ball.jpg ├── K.JPG ├── S.JPG ├── atari.jpg ├── atarit.png ├── batman.png ├── boundingrect.png ├── building.jpg ├── calib_pattern.jpg ├── calib_radial.jpg ├── calib_result.jpg ├── camshift_result.jpg ├── chess.png ├── circumcircle.png ├── coins.png ├── contour.jpg ├── contourapprox.jpg ├── contours.png ├── cube.png ├── cubeedge.jpg ├── cubeedge.png ├── denoise.png ├── denoisedimage.jpg ├── diamond.png ├── fast_kp.jpg ├── filter.jpg ├── fitellipse.png ├── fitline.jpg ├── floodfillshapes.png ├── flower - Copy.jpg ├── flower.jpg ├── google.jpg ├── googlelogo.jpg ├── grabcut_output1.jpg ├── grad.png ├── grad2.png ├── gray.jpg ├── grayscale.jpg ├── hd.png ├── hdigits.jpg ├── high_contrast.jpg ├── hist.png ├── ij1.jpg ├── im2.jpg ├── im3.jpg ├── im4.jpg ├── image1.jpg ├── image2.jpg ├── inpaint_result.jpg ├── invertedstar.png ├── joker (1).png ├── joker.png ├── left08.jpg ├── letters.JPG ├── m1left.jpg ├── m2right.jpg ├── messi.png ├── minion1.jpg ├── minionleft.jpg ├── minionright.jpg ├── minions.jpg ├── noise.jpg ├── noise.png ├── noise1.jpg ├── noiseimage.jpg ├── noisyim.jpg ├── opencv_logo.jpg ├── photo_1.jpg ├── photo_2.jpg ├── pokemon_games.png ├── pose_1.jpg ├── pose_2.jpg ├── rect.png ├── sat_noisy.jpg ├── shapes.png ├── shitomasi_block1.jpg ├── sift_keypoints.jpg ├── skew.png ├── star.png ├── star2.png ├── starry_night.jpg ├── surf_kp1.jpg ├── surf_kp2.jpg ├── template.jpg ├── th.png ├── th2.png ├── th3.png ├── th4.png ├── triangle.png ├── water_dt.jpg ├── water_fgbg.jpg ├── water_marker.jpg ├── water_result.jpg └── water_thresh.jpg ├── results ├── Directblending.jpg ├── Pyramidblending.jpg ├── building1.png ├── chessboard1.png └── textureflow.jpg └── videos ├── mean_shift.webm ├── meanshiftoutput.mp4 ├── people-walking.mp4 ├── slow_traffic.mp4 └── sparrow.mp4 /1. Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Programming Computer Vision : Basics\n", 8 | "___" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "## Reading Images\n", 16 | "Images can be read using Image class of Python library **PIL** [(Python Imaging Library)](http://www.pythonware.com/products/pil/)." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "from PIL import Image\n", 28 | "im = Image.open('images/flower.jpg')" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "The return value 'im' is a PIL image object. Thus the following image would be read.\n", 36 | "" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "___\n", 44 | "## Color conversions\n", 45 | "We can use the `convert()` method for Color conversions. An image can be converted to grayscale using the .convert('L') function where 'L' simply is a mode that defines images as 8-bit pixels of black & white. To learn about other modes, you can visit http://pillow.readthedocs.org/en/3.1.x/handbook/concepts.html.\n", 46 | "The library supports transformations between each supported mode and the 'L' and 'RGB' modes. To convert between other modes, you may have to use an intermediate image (typically an “RGB” image)." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "gray = im.convert('L')\n", 58 | "gray.show()" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "___\n", 73 | "## Enhancement\n", 74 | "The ImageEnhance module can be used for image enhancement. Once created from an image, an enhancement object can be used to quickly try out different settings.\n", 75 | "You can adjust contrast, brightness, color balance and sharpness in this way." 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "collapsed": true 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "from PIL import ImageEnhance\n", 87 | "\n", 88 | "enh = ImageEnhance.Contrast(im)\n", 89 | "enh.enhance(1.4).show(\"30% more contrast\")\n" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "___\n", 104 | "\n", 105 | "## Converting into other file format" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "collapsed": true 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "from __future__ import print_function\n", 117 | "import os, sys\n", 118 | "from PIL import Image\n", 119 | "\n", 120 | "def convertToJPEG():\n", 121 | " for infile in sys.argv[1:]:\n", 122 | " f, e = os.path.splitext(infile)\n", 123 | " outfile = f + \".jpg\"\n", 124 | " if infile != outfile:\n", 125 | " try:\n", 126 | " Image.open(infile).save(outfile)\n", 127 | " except IOError:\n", 128 | " print(\"cannot convert\", infile)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "This is a function that converts the images in our specified file format. The PIL function open() creates a PIL image object and the save() method saves the image to a file with the given filename.\n", 136 | "___\n", 137 | "## Creating Thumbnails" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "collapsed": true 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "from __future__ import print_function\n", 149 | "import os, sys\n", 150 | "from PIL import Image\n", 151 | "\n", 152 | "size = (128, 128)\n", 153 | "\n", 154 | "def createThumbnails():\n", 155 | " for infile in sys.argv[1:]:\n", 156 | " outfile = os.path.splitext(infile)[0] + \".thumbnail\"\n", 157 | " if infile != outfile:\n", 158 | " try:\n", 159 | " im = Image.open(infile)\n", 160 | " im.thumbnail(size)\n", 161 | " im.save(outfile, \"JPEG\")\n", 162 | " except IOError:\n", 163 | " print(\"cannot create thumbnail for\", infile)\n" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "The thumbnail() method takes a tuple specifying the new size and converts the image to a thumbnail image with size that fits within the tuple.\n", 171 | "\n", 172 | "___\n", 173 | "## Copy and paste regions\n", 174 | "Cropping a region from an image is done using the crop() method." 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": true 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "box = (100,100,400,400)\n", 186 | "region = im.crop(box)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "\n", 194 | "The region is defined by a 4-tuple, where coordinates are (left, upper, right, lower). PIL uses a coordinate system with (0, 0) in the upper left corner. The extracted region can for example be rotated and then put back using the paste() method like this:" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "collapsed": true 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "region = region.transpose(Image.ROTATE_180)\n", 206 | "im.paste(region,box)\n" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "____\n", 214 | "## Resize and rotate\n", 215 | "To resize an image, call resize() with a tuple giving the new size." 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": { 222 | "collapsed": true 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "out = im.resize((128,128))" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "To rotate an image, use counter clockwise angles and rotate() like this:" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": true 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "out = im.rotate(45)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "___\n", 252 | "\n", 253 | "## Using Matplotlib to plot images, points and lines:" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 4, 259 | "metadata": { 260 | "collapsed": true 261 | }, 262 | "outputs": [], 263 | "source": [ 264 | "from PIL import Image\n", 265 | "from pylab import *\n", 266 | "# read image to array\n", 267 | "im = array(Image.open('images/flower.jpg'))\n", 268 | "# plot the image\n", 269 | "imshow(im)\n", 270 | "# some points\n", 271 | "x = [100,100,400,400]\n", 272 | "y = [200,500,200,500]\n", 273 | "# plot the points with red star-markers\n", 274 | "plot(x,y,'r*')\n", 275 | "# line plot connecting the first two points\n", 276 | "plot(x[:2],y[:2])\n", 277 | "# add title and show the plot\n", 278 | "title('Plotting: \"flower.jpg')\n", 279 | "show()" 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": {}, 292 | "source": [ 293 | "### Image Contours" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 7, 299 | "metadata": { 300 | "collapsed": false 301 | }, 302 | "outputs": [ 303 | { 304 | "data": { 305 | "text/plain": [ 306 | "(0.0, 400.0, 0.0, 300.0)" 307 | ] 308 | }, 309 | "execution_count": 7, 310 | "metadata": {}, 311 | "output_type": "execute_result" 312 | } 313 | ], 314 | "source": [ 315 | "from PIL import Image\n", 316 | "from pylab import *\n", 317 | "# read image to array\n", 318 | "im = array(Image.open('images/flower.jpg').convert('L'))\n", 319 | "# create a new figure\n", 320 | "figure()\n", 321 | "# don’t use colors\n", 322 | "gray()\n", 323 | "# show contours with origin upper left corner\n", 324 | "contour(im, origin='image')\n", 325 | "axis('equal')\n", 326 | "axis('off')\n" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "\n", 334 | "### Histograms:" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 8, 340 | "metadata": { 341 | "collapsed": true 342 | }, 343 | "outputs": [], 344 | "source": [ 345 | "figure()\n", 346 | "hist(im.flatten(),128)\n", 347 | "show()" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "\n", 355 | "This shows the distribution of pixel values. A number of bins is specified for the span of values and each bin gets a count of how many pixels have values in the bin’s range. The visualization of the (graylevel) image histogram is done using the hist() function.\n", 356 | "The second argument specifies the number of bins to use. Note that the image needs to be flattened first, because hist() takes a one-dimensional array as input. The method flatten() converts any array to a one-dimensional array with values taken row-wise.\n", 357 | "___\n", 358 | "\n", 359 | "## Graylevel transforms using NumPy" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 9, 365 | "metadata": { 366 | "collapsed": true 367 | }, 368 | "outputs": [], 369 | "source": [ 370 | "from PIL import Image\n", 371 | "from numpy import *\n", 372 | "im = array(Image.open('images/flower.jpg').convert('L'))\n", 373 | "im2 = 255 - im #invert image\n", 374 | "im3 = (100.0/255) * im + 100 #clamp to interval 100...200\n", 375 | "im4 = 255.0 * (im/255.0)**2 #squared" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "Converting these numpy arrays back into our grayscale images:" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "metadata": { 389 | "collapsed": true 390 | }, 391 | "outputs": [], 392 | "source": [ 393 | "npim2 = Image.fromarray(uint8(im2))\n", 394 | "npim2.show()\n", 395 | "npim3 = Image.fromarray(uint8(im3))\n", 396 | "npim3.show()\n", 397 | "npim4 = Image.fromarray(uint8(im4))\n", 398 | "npim4.show()" 399 | ] 400 | }, 401 | { 402 | "cell_type": "markdown", 403 | "metadata": {}, 404 | "source": [ 405 | "Thus the three transformed grayscale images can be compared as follows:\n", 406 | "" 407 | ] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": {}, 412 | "source": [ 413 | "___\n", 414 | "## Image De-noising\n", 415 | "Image de-noising is the process of removing image noise while at the same time trying to preserve details and structures" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "metadata": { 422 | "collapsed": true 423 | }, 424 | "outputs": [], 425 | "source": [ 426 | "from numpy import *\n", 427 | "\n", 428 | "def denoise(im, U_init, tolerance=0.1, tau=0.125, tv_weight=100):\n", 429 | " \"\"\" An implementation of the Rudin-Osher-Fatemi (ROF) denoising model\n", 430 | " using the numerical procedure presented in Eq. (11) of A. Chambolle\n", 431 | " (2005). Implemented using periodic boundary conditions \n", 432 | " (essentially turning the rectangular image domain into a torus!).\n", 433 | " \n", 434 | " Input:\n", 435 | " im - noisy input image (grayscale)\n", 436 | " U_init - initial guess for U\n", 437 | " tv_weight - weight of the TV-regularizing term\n", 438 | " tau - steplength in the Chambolle algorithm\n", 439 | " tolerance - tolerance for determining the stop criterion\n", 440 | " \n", 441 | " Output:\n", 442 | " U - denoised and detextured image (also the primal variable)\n", 443 | " T - texture residual\"\"\"\n", 444 | " \n", 445 | " #---Initialization\n", 446 | " m,n = im.shape #size of noisy image\n", 447 | "\n", 448 | " U = U_init\n", 449 | " Px = im #x-component to the dual field\n", 450 | " Py = im #y-component of the dual field\n", 451 | " error = 1 \n", 452 | " iteration = 0\n", 453 | "\n", 454 | " #---Main iteration\n", 455 | " while (error > tolerance):\n", 456 | " Uold = U\n", 457 | "\n", 458 | " #Gradient of primal variable\n", 459 | " LyU = vstack((U[1:,:],U[0,:])) #Left translation w.r.t. the y-direction\n", 460 | " LxU = hstack((U[:,1:],U.take([0],axis=1))) #Left translation w.r.t. the x-direction\n", 461 | "\n", 462 | " GradUx = LxU-U #x-component of U's gradient\n", 463 | " GradUy = LyU-U #y-component of U's gradient\n", 464 | "\n", 465 | " #First we update the dual varible\n", 466 | " PxNew = Px + (tau/tv_weight)*GradUx #Non-normalized update of x-component (dual)\n", 467 | " PyNew = Py + (tau/tv_weight)*GradUy #Non-normalized update of y-component (dual)\n", 468 | " NormNew = maximum(1,sqrt(PxNew**2+PyNew**2))\n", 469 | "\n", 470 | " Px = PxNew/NormNew #Update of x-component (dual)\n", 471 | " Py = PyNew/NormNew #Update of y-component (dual)\n", 472 | "\n", 473 | " #Then we update the primal variable\n", 474 | " RxPx =hstack((Px.take([-1],axis=1),Px[:,0:-1])) #Right x-translation of x-component\n", 475 | " RyPy = vstack((Py[-1,:],Py[0:-1,:])) #Right y-translation of y-component\n", 476 | " DivP = (Px-RxPx)+(Py-RyPy) #Divergence of the dual field.\n", 477 | " U = im + tv_weight*DivP #Update of the primal variable\n", 478 | "\n", 479 | " #Update of error-measure\n", 480 | " error = linalg.norm(U-Uold)/sqrt(n*m);\n", 481 | " iteration += 1;\n", 482 | "\n", 483 | " print iteration, error\n", 484 | "\n", 485 | " #The texture residual\n", 486 | " T = im - U\n", 487 | " print 'Number of ROF iterations: ', iteration\n", 488 | " \n", 489 | " return U,T" 490 | ] 491 | }, 492 | { 493 | "cell_type": "markdown", 494 | "metadata": {}, 495 | "source": [ 496 | "In this example, we used the function roll(), which as the name suggests, \"rolls\" the values of an array cyclically around an axis. This is very convenient for computing neighbor differences, in this case for derivatives. We also used linalg.norm() which measures the difference between two arrays (in this case the image matrices U and Uold)\n", 497 | "\n", 498 | "We can now use the denoise function to remove noise from a real image This is the image to be tested:\n", 499 | "" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "metadata": { 506 | "collapsed": true 507 | }, 508 | "outputs": [], 509 | "source": [ 510 | "from PIL import Image\n", 511 | "import pylab\n", 512 | "\n", 513 | "im = array(Image.open('images/noiseimage.jpg').convert('L'))\n", 514 | "U,T = denoise(im,im)\n", 515 | "\n", 516 | "pylab.figure()\n", 517 | "pylab.gray()\n", 518 | "pylab.imshow(U)\n", 519 | "pylab.axis('equal')\n", 520 | "pylab.axis('off')\n", 521 | "pylab.show()" 522 | ] 523 | }, 524 | { 525 | "cell_type": "markdown", 526 | "metadata": {}, 527 | "source": [ 528 | "The resulting de-noised image is:\n", 529 | " \n", 530 | " ___" 531 | ] 532 | }, 533 | { 534 | "cell_type": "markdown", 535 | "metadata": {}, 536 | "source": [ 537 | "Thus we are done with the basics of Computer Vision. Next we would level up a bit by exploring the OpenCV library.\n", 538 | "
" 539 | ] 540 | } 541 | ], 542 | "metadata": { 543 | "kernelspec": { 544 | "display_name": "Python 3", 545 | "language": "python", 546 | "name": "python3" 547 | }, 548 | "language_info": { 549 | "codemirror_mode": { 550 | "name": "ipython", 551 | "version": 3 552 | }, 553 | "file_extension": ".py", 554 | "mimetype": "text/x-python", 555 | "name": "python", 556 | "nbconvert_exporter": "python", 557 | "pygments_lexer": "ipython3", 558 | "version": "3.5.2" 559 | } 560 | }, 561 | "nbformat": 4, 562 | "nbformat_minor": 0 563 | } 564 | -------------------------------------------------------------------------------- /2. OpenCV - Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# OpenCV : Basics\n", 8 | "\n", 9 | "___" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "OpenCV (Open Source Computer Vision) is an image and video processing library of programming functions mainly aimed at real-time computer vision. OpenCV has bindings in C++, C, Python, Java and MATLAB/OCTAVE.\n", 17 | "\n", 18 | "**Applications** of OpenCV include variety of image and video analysis techniques like :" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "\n" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "and lots of others.\n", 46 | "\n", 47 | "___\n", 48 | "\n", 49 | "In the next set of examples, we will primarily be working on Python. Installing OpenCV for python requires two main libraries, with an optional third. Below Python packages are to be downloaded and installed to their default locations.\n", 50 | "\n", 51 | "1. Python-2.7.x.\n", 52 | "2. Numpy.\n", 53 | "3. Matplotlib (Matplotlib is optional, but recommended since we use it a lot in our tutorials).\n", 54 | "\n", 55 | "## Windows Users:\n", 56 | "Download the appropriate wheel (.whl) file of opencv for your corresponding operating system from https://www.lfd.uci.edu/~gohlke/pythonlibs/#opencv\n", 57 | "\n", 58 | "Then open Command Prompt and direct to the Scripts folder and install the modules using pip:\n", 59 | ">> `C:/Python34/Scripts`\n", 60 | "\n", 61 | ">> `pip install _youropencvwhlfile_.whl`\n", 62 | "\n", 63 | ">> `pip install numpy`\n", 64 | "\n", 65 | ">> `pip install matplotlib`\n", 66 | "\n", 67 | "If this method doesn't work, here's an alternative : \n", 68 | "\n", 69 | "* Download latest OpenCV release from [here](http://sourceforge.net/projects/opencvlibrary/files/opencv-win/2.4.6/OpenCV-2.4.6.0.exe/download) and double-click to extract it.\n", 70 | "* Goto opencv/build/python/2.7 folder.\n", 71 | "* Copy cv2.pyd to C:/Python27/lib/site-packages.\n", 72 | "* Open Python IDLE and type following codes in Python terminal." 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "collapsed": false 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "import cv2\n", 84 | "print cv2.__version__" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "If the results are printed out without any errors then you have successfully installed OpenCV-Python.\n", 92 | "\n", 93 | "## Linux / Mac Users:\n", 94 | ">> `pip3 install numpy` or `apt-get install python3-numpy`.\n", 95 | "\n", 96 | "You may need to apt-get install python3-pip.\n", 97 | "\n", 98 | ">> `pip3 install matplotlib` or `apt-get install python3-matplotlib`.\n", 99 | "\n", 100 | ">> `apt-get install python-OpenCV`.\n", 101 | "\n", 102 | "Matplotlib is an optional choice for visualizing video or image frames . Numpy will be primarily used for its array functionality. Finally, we will be using the python-specific bindings for OpenCV called python-OpenCV.\n", 103 | "\n", 104 | "**[Here](http://www.pyimagesearch.com/2016/10/24/ubuntu-16-04-how-to-install-opencv/)'s an alternative solution to build and install OpenCV in Ubuntu.**\n", 105 | "\n", 106 | "\n", 107 | "Once installed, Run the following python module imports:" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 3, 113 | "metadata": { 114 | "collapsed": true 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "import cv2\n", 119 | "import matplotlib\n", 120 | "import numpy" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "If there are no errors then we are good to go!\n", 128 | "___" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "## Getting started with images" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "### Reading an image:\n", 143 | "\n", 144 | "Use the function _cv2.imread()_ to read an image. The image should be in the working directory or a full path of image should be given. I highly encourage you to use your own images as examples to increase fun as well as the learning curve.\n", 145 | "\n", 146 | "Second argument is a flag which specifies the way image should be read.\n", 147 | "\n", 148 | "cv2.IMREAD_COLOR : Loads a color image. Any transparency of image will be neglected. It is the default flag.\n", 149 | "cv2.IMREAD_GRAYSCALE : Loads image in grayscale mode\n", 150 | "cv2.IMREAD_UNCHANGED : Loads image as such including alpha channel" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 8, 156 | "metadata": { 157 | "collapsed": true 158 | }, 159 | "outputs": [], 160 | "source": [ 161 | "import numpy as np\n", 162 | "import cv2\n", 163 | "# Load an color image in grayscale\n", 164 | "img = cv2.imread('images/flower.jpg',0)\n", 165 | "# Warning: Even if the image path is wrong, it won’t throw any error, but print img will give you None\n" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "### Displaying an image:\n", 173 | "Use the function _cv2.imshow()_ to display an image in a window. The window automatically fits to the image size.\n", 174 | "First argument is a window name which is a string. second argument is our image. " 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 9, 180 | "metadata": { 181 | "collapsed": true 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "cv2.imshow('image',img)\n", 186 | "cv2.waitKey(0)\n", 187 | "cv2.destroyAllWindows()" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "A GUI will open as a result and would look like:\n", 195 | "" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "_cv2.waitKey()_ is a keyboard binding function. Its argument is the time in milliseconds. The function waits for\n", 203 | "specified milliseconds for any keyboard event. If you press any key in that time, the program continues. If 0 is passed,\n", 204 | "it waits indefinitely for a key stroke. It can also be set to detect specific key strokes like, if key a is pressed etc which\n", 205 | "we will discuss below.\n", 206 | "\n", 207 | "_cv2.destroyAllWindows()_ simply destroys all the windows we created. If you want to destroy any specific window,\n", 208 | "use the function cv2.destroyWindow() where you pass the exact window name as the argument.\n", 209 | "\n", 210 | "Note: There is a special case where you can already create a window and load image to it later. In that case, you can\n", 211 | "specify whether window is resizable or not. It is done with the function cv2.namedWindow(). By default, the flag is\n", 212 | "cv2.WINDOW_AUTOSIZE. But if you specify flag to be cv2.WINDOW_NORMAL, you can resize window. It will be\n", 213 | "helpful when image is too large in dimension and adding track bar to windows.\n", 214 | "\n", 215 | "This can be done using:" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 11, 221 | "metadata": { 222 | "collapsed": false 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "cv2.namedWindow('image', cv2.WINDOW_NORMAL)\n", 227 | "cv2.imshow('image',img)\n", 228 | "cv2.waitKey(0)\n", 229 | "cv2.destroyAllWindows()" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "### Write an image\n", 237 | "Use the function _cv2.imwrite()_ to save an image.\n", 238 | "First argument is the file name, second argument is the image you want to save." 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 12, 244 | "metadata": { 245 | "collapsed": false 246 | }, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "True" 252 | ] 253 | }, 254 | "execution_count": 12, 255 | "metadata": {}, 256 | "output_type": "execute_result" 257 | } 258 | ], 259 | "source": [ 260 | "cv2.imwrite('flowergray.png',img)\n", 261 | "# This will save the image in PNG format in the working directory." 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "___\n", 269 | "## Getting started with Videos\n" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "OpenCV provides a very simple interface to capture live stream with our own cameras. \n", 277 | "\n", 278 | "To capture a video, you need to create a VideoCapture object. Its argument can be either the device index or the name\n", 279 | "of a video file. Device index is just the number to specify which camera. \n", 280 | "If there are multiple cameras connected to your computer passing index as 0 or -1 would start the first camera; passing 1 as index would start the second camera and so on.\n", 281 | "\n", 282 | "After starting the respective camera, you can capture frame-by-frame. And at the end of capturing, we release the capture." 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": { 289 | "collapsed": true 290 | }, 291 | "outputs": [], 292 | "source": [ 293 | "import numpy as np\n", 294 | "import cv2\n", 295 | "cap = cv2.VideoCapture(0)\n", 296 | "while(True):\n", 297 | " # Capture frame-by-frame\n", 298 | " ret, frame = cap.read()\n", 299 | " # Our operations on the frame come here\n", 300 | " gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n", 301 | " # Display the resulting frame\n", 302 | " cv2.imshow('frame',gray)\n", 303 | " if cv2.waitKey(1) & 0xFF == ord('q'):\n", 304 | " break\n", 305 | "# When everything done, release the capture\n", 306 | "cap.release()\n", 307 | "cv2.destroyAllWindows()" 308 | ] 309 | }, 310 | { 311 | "cell_type": "markdown", 312 | "metadata": {}, 313 | "source": [ 314 | "This will capture a\n", 315 | "video from the camera (in this case the in-built webcam of my laptop), convert it into grayscale video and display it.\n", 316 | "\n", 317 | "cap.read() returns a bool (True/False). If frame is read correctly, it will be True. So you can check end of the\n", 318 | "video by checking this return value.\n", 319 | "Sometimes, cap may not have initialized the capture. In that case, this code shows error. You can check whether it is\n", 320 | "initialized or not by the method cap.isOpened(). If it is True, OK. Otherwise open it using cap.open()." 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "### Playing Video from file\n", 328 | "It is same as capturing from Camera, just change camera index with video file name. Also while displaying the frame,\n", 329 | "use appropriate time for _cv2.waitKey()_. If it is too less, video will be very fast and if it is too high, video will be\n", 330 | "slow (Well, that is how you can display videos in slow motion). 25 milliseconds will be OK in normal cases" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": { 337 | "collapsed": true 338 | }, 339 | "outputs": [], 340 | "source": [ 341 | "import numpy as np\n", 342 | "import cv2\n", 343 | "cap = cv2.VideoCapture('videos/people-walking.mp4')\n", 344 | "while(cap.isOpened()):\n", 345 | " ret, frame = cap.read()\n", 346 | " gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n", 347 | " cv2.imshow('frame',gray)\n", 348 | " if cv2.waitKey(1) & 0xFF == ord('q'):\n", 349 | " break\n", 350 | "cap.release()\n", 351 | "cv2.destroyAllWindows()" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "### Saving a Video\n", 359 | "What if we want to save the video after we capture it and process it frame-by-frame? For images, it is very simple, just\n", 360 | "use cv2.imwrite().\n", 361 | "\n", 362 | "This time we create a VideoWriter object. We should specify the output file name (eg: output.avi). Then we should\n", 363 | "specify the FourCC code . Then number of frames per second (fps) and frame size should\n", 364 | "be passed. And last one is isColor flag. If it is True, encoder expect color frame, otherwise it works with grayscale\n", 365 | "frame.\n", 366 | "\n", 367 | "FourCC is a 4-byte code used to specify the video codec. The list of available codes can be found in fourcc.org. It is\n", 368 | "platform dependent.\n", 369 | "\n", 370 | "FourCC code is passed as cv2.VideoWriter_fourcc(’M’,’J’,’P’,’G’) or\n", 371 | "cv2.VideoWriter_fourcc(*’MJPG) for MJPG.\n", 372 | "\n", 373 | "Below code captures from a Camera, flip every frame in vertical direction and saves it." 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": { 380 | "collapsed": true 381 | }, 382 | "outputs": [], 383 | "source": [ 384 | "import numpy as np\n", 385 | "import cv2\n", 386 | "cap = cv2.VideoCapture(0)\n", 387 | "# Define the codec and create VideoWriter object\n", 388 | "fourcc = cv2.VideoWriter_fourcc(*'XVID')\n", 389 | "out = cv2.VideoWriter('output.avi',fourcc, 20.0, (640,480))\n", 390 | "while(cap.isOpened()):\n", 391 | " ret, frame = cap.read()\n", 392 | " if ret==True:\n", 393 | " frame = cv2.flip(frame,0)\n", 394 | " # write the flipped frame\n", 395 | " out.write(frame)\n", 396 | " cv2.imshow('frame',frame)\n", 397 | " if cv2.waitKey(1) & 0xFF == ord('q'):\n", 398 | " break\n", 399 | " else:\n", 400 | " break\n", 401 | "# Release everything if job is finished\n", 402 | "cap.release()\n", 403 | "out.release()\n", 404 | "cv2.destroyAllWindows()" 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": {}, 410 | "source": [ 411 | "___" 412 | ] 413 | }, 414 | { 415 | "cell_type": "markdown", 416 | "metadata": {}, 417 | "source": [ 418 | "## Drawing and Writing Text on Images\n", 419 | "To draw different shapes using OpenCV we would be using functions like:\n", 420 | "_cv2.line(), cv2.circle() , cv2.rectangle(), cv2.ellipse() etc\n", 421 | "\n", 422 | "In all the above functions, you will see some common arguments as given below:\n", 423 | "* img : The image where you want to draw the shapes\n", 424 | "* color : Color of the shape. for BGR, pass it as a tuple, eg: (255,0,0) for blue. For grayscale, just pass the scalar value.\n", 425 | "* thickness : Thickness of the line or circle etc. If -1 is passed for closed figures like circles, it will fill the shape. default thickness = 1\n", 426 | "* lineType : Type of line, whether 8-connected, anti-aliased line etc. By default, it is 8-connected. cv2.LINE_AA gives anti-aliased line which looks great for curves.\n", 427 | "\n", 428 | "To add text to images you need to specify following things:\n", 429 | "* Text data that you want to write \n", 430 | "* Position coordinates of where you want put it (i.e. bottom-left corner where data starts). \n", 431 | "* Font type (Check cv2.putText() docs for supported fonts)\n", 432 | "* Font Scale (specifies the size of font)\n", 433 | "* regular things like color, thickness, lineType etc. For better look, lineType = cv2.LINE_AA is recommended.\n" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 5, 439 | "metadata": { 440 | "collapsed": true 441 | }, 442 | "outputs": [], 443 | "source": [ 444 | "import numpy as np\n", 445 | "import cv2\n", 446 | "\n", 447 | "img = cv2.imread('images/flower.jpg',cv2.IMREAD_COLOR)\n", 448 | "\n", 449 | "cv2.line(img,(0,0),(150,150),(255,255,255),15) # line\n", 450 | "# To draw a line, you need to pass starting and ending coordinates of line.\n", 451 | "\n", 452 | "cv2.rectangle(img,(15,25),(200,150),(0,0,255),15) # red rect \n", 453 | "# To draw a rectangle, you need top-left corner and bottom-right corner of rectangle.\n", 454 | "\n", 455 | "cv2.circle(img,(100,63), 55, (0,255,0), -1) #circle\n", 456 | "# To draw a circle, you need its center coordinates and radius.\n", 457 | "\n", 458 | "cv2.ellipse(img,(256,256),(100,50),0,0,180,255,-1) #elipse\n", 459 | "# To draw the ellipse, we need to pass follwing arguments : 1.center location (x,y); 2.axes lengths (major axis length, minor axis length).\n", 460 | "# then the angle of rotation of ellipse in anti-clockwise direction.\n", 461 | "# startAngle and endAngle denotes the starting and ending of ellipse arc measured in clockwise direction from major axis.\n", 462 | "# i.e. giving values 0 and 360 gives the full ellipse\n", 463 | "\n", 464 | "pts = np.array([[10,5],[20,30],[70,20],[50,10]], np.int32) # polygon\n", 465 | "# To draw a polygon, first you need coordinates of vertices. Make those points into an array of shape ROWSx1x2 where\n", 466 | "# ROWS are number of vertices and it should be of type int32.\n", 467 | "pts = pts.reshape((-1,1,2))\n", 468 | "cv2.polylines(img, [pts], True, (0,255,255), 3)\n", 469 | "\n", 470 | "# writing\n", 471 | "font = cv2.FONT_HERSHEY_SIMPLEX\n", 472 | "cv2.putText(img,'Text!',(0,130), font, 1, (200,255,155), 2, cv2.LINE_AA)\n", 473 | "\n", 474 | "cv2.imshow('image',img)\n", 475 | "cv2.waitKey(0)\n", 476 | "cv2.destroyAllWindows()\n", 477 | "\n" 478 | ] 479 | }, 480 | { 481 | "cell_type": "markdown", 482 | "metadata": {}, 483 | "source": [ 484 | "The output would be : \n", 485 | "" 486 | ] 487 | }, 488 | { 489 | "cell_type": "markdown", 490 | "metadata": {}, 491 | "source": [ 492 | "## Using Mouse as a Paint Brush:" 493 | ] 494 | }, 495 | { 496 | "cell_type": "markdown", 497 | "metadata": {}, 498 | "source": [ 499 | "Here, we create a simple application which draws a circle on an image wherever we double-click on it.\n", 500 | "\n", 501 | "First we create a mouse callback function which is executed when a mouse event take place. Mouse event can be\n", 502 | "anything related to mouse like left-button down, left-button up, left-button double-click etc. It gives us the coordinates\n", 503 | "(x,y) for every mouse event. With this event and location, we can do whatever we like. To list all available events\n", 504 | "available, run the following code in Python terminal:" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": 6, 510 | "metadata": { 511 | "collapsed": false 512 | }, 513 | "outputs": [ 514 | { 515 | "name": "stdout", 516 | "output_type": "stream", 517 | "text": [ 518 | "['EVENT_FLAG_ALTKEY', 'EVENT_FLAG_CTRLKEY', 'EVENT_FLAG_LBUTTON', 'EVENT_FLAG_MBUTTON', 'EVENT_FLAG_RBUTTON', 'EVENT_FLAG_SHIFTKEY', 'EVENT_LBUTTONDBLCLK', 'EVENT_LBUTTONDOWN', 'EVENT_LBUTTONUP', 'EVENT_MBUTTONDBLCLK', 'EVENT_MBUTTONDOWN', 'EVENT_MBUTTONUP', 'EVENT_MOUSEHWHEEL', 'EVENT_MOUSEMOVE', 'EVENT_MOUSEWHEEL', 'EVENT_RBUTTONDBLCLK', 'EVENT_RBUTTONDOWN', 'EVENT_RBUTTONUP']\n" 519 | ] 520 | } 521 | ], 522 | "source": [ 523 | "import cv2\n", 524 | "events = [i for i in dir(cv2) if 'EVENT' in i]\n", 525 | "print events" 526 | ] 527 | }, 528 | { 529 | "cell_type": "markdown", 530 | "metadata": {}, 531 | "source": [ 532 | "Creating mouse callback function has a specific format which is same everywhere. It differs only in what the function\n", 533 | "does. So our mouse callback function does one thing, it draws a circle where we double-click." 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": null, 539 | "metadata": { 540 | "collapsed": true 541 | }, 542 | "outputs": [], 543 | "source": [ 544 | "import cv2\n", 545 | "import numpy as np\n", 546 | "# mouse callback function\n", 547 | "def draw_circle(event,x,y,flags,param):\n", 548 | " if event == cv2.EVENT_LBUTTONDBLCLK:\n", 549 | " cv2.circle(img,(x,y),100,(255,0,0),-1)\n", 550 | " \n", 551 | "# Create a black image, a window and bind the function to window\n", 552 | "img = np.zeros((512,512,3), np.uint8)\n", 553 | "cv2.namedWindow('image')\n", 554 | "cv2.setMouseCallback('image',draw_circle)\n", 555 | "\n", 556 | "while(1):\n", 557 | " cv2.imshow('image',img)\n", 558 | " if cv2.waitKey(20) & 0xFF == 27:\n", 559 | " break\n", 560 | "cv2.destroyAllWindows()" 561 | ] 562 | }, 563 | { 564 | "cell_type": "markdown", 565 | "metadata": {}, 566 | "source": [ 567 | "Now we go for much more better application. In this, we draw either rectangles or circles (depending on the mode we\n", 568 | "select) by dragging the mouse like we do in Paint application. So our mouse callback function has two parts, one to\n", 569 | "draw rectangle and other to draw the circles. This specific example will be really helpful in creating and understanding\n", 570 | "some interactive applications like object tracking, image segmentation etc." 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": null, 576 | "metadata": { 577 | "collapsed": true 578 | }, 579 | "outputs": [], 580 | "source": [ 581 | "import cv2\n", 582 | "import numpy as np\n", 583 | "drawing = False # true if mouse is pressed\n", 584 | "mode = True # if True, draw rectangle. Press 'm' to toggle to curve\n", 585 | "ix,iy = -1,-1\n", 586 | "# mouse callback function\n", 587 | "def draw_circle(event,x,y,flags,param):\n", 588 | " global ix,iy,drawing,mode\n", 589 | " if event == cv2.EVENT_LBUTTONDOWN:\n", 590 | " drawing = True\n", 591 | " ix,iy = x,y\n", 592 | " elif event == cv2.EVENT_MOUSEMOVE:\n", 593 | " if drawing == True:\n", 594 | " if mode == True:\n", 595 | " cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),-1)\n", 596 | " else:\n", 597 | " cv2.circle(img,(x,y),5,(0,0,255),-1)\n", 598 | " elif event == cv2.EVENT_LBUTTONUP:\n", 599 | " drawing = False\n", 600 | " if mode == True:\n", 601 | " cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),-1)\n", 602 | " else:\n", 603 | " cv2.circle(img,(x,y),5,(0,0,255),-1)\n", 604 | "# Next we have to bind this mouse callback function to OpenCV window. In the main loop, we should set a keyboard binding for key ‘m’ to toggle between rectangle and circle. \n", 605 | "\n", 606 | "img = np.zeros((512,512,3), np.uint8)\n", 607 | "cv2.namedWindow('image')\n", 608 | "cv2.setMouseCallback('image',draw_circle)\n", 609 | "while(1):\n", 610 | " cv2.imshow('image',img)\n", 611 | " k = cv2.waitKey(1) & 0xFF\n", 612 | " if k == ord('m'):\n", 613 | " mode = not mode\n", 614 | " elif k == 27:\n", 615 | " break\n", 616 | "cv2.destroyAllWindows()" 617 | ] 618 | }, 619 | { 620 | "cell_type": "markdown", 621 | "metadata": {}, 622 | "source": [ 623 | "## Trackbar as the Color Palette" 624 | ] 625 | }, 626 | { 627 | "cell_type": "markdown", 628 | "metadata": {}, 629 | "source": [ 630 | "Here we will create a simple application which shows the color you specify. You have a window which shows the\n", 631 | "color and three trackbars to specify each of B,G,R colors. You slide the trackbar and correspondingly window color\n", 632 | "changes. By default, initial color will be set to Black.\n", 633 | "\n", 634 | "For _cv2.getTrackbarPos()_ function, first argument is the trackbar name, second one is the window name to which it is\n", 635 | "attached, third argument is the default value, fourth one is the maximum value and fifth one is the callback function which is executed everytime trackbar value changes. The callback function always has a default argument which is\n", 636 | "the trackbar position. In our case, function does nothing, so we simply pass.\n", 637 | "\n", 638 | "Another important application of trackbar is to use it as a button or switch. OpenCV, by default, doesn’t have button\n", 639 | "functionality. So you can use trackbar to get such functionality. In our application, we have created one switch in\n", 640 | "which application works only if switch is ON, otherwise screen is always black." 641 | ] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "execution_count": null, 646 | "metadata": { 647 | "collapsed": true 648 | }, 649 | "outputs": [], 650 | "source": [ 651 | "import cv2\n", 652 | "import numpy as np\n", 653 | "\n", 654 | "def nothing(x):\n", 655 | " pass\n", 656 | "\n", 657 | "# Create a black image, a window\n", 658 | "img = np.zeros((300,512,3), np.uint8)\n", 659 | "cv2.namedWindow('image')\n", 660 | "\n", 661 | "# create trackbars for color change\n", 662 | "cv2.createTrackbar('R','image',0,255,nothing)\n", 663 | "cv2.createTrackbar('G','image',0,255,nothing)\n", 664 | "cv2.createTrackbar('B','image',0,255,nothing)\n", 665 | "\n", 666 | "# create switch for ON/OFF functionality\n", 667 | "switch = '0 : OFF \\n1 : ON'\n", 668 | "cv2.createTrackbar(switch, 'image',0,1,nothing)\n", 669 | "\n", 670 | "while(1):\n", 671 | " cv2.imshow('image',img)\n", 672 | " k = cv2.waitKey(1) & 0xFF\n", 673 | " if k == 27:\n", 674 | " break\n", 675 | " # get current positions of four trackbars\n", 676 | " r = cv2.getTrackbarPos('R','image')\n", 677 | " g = cv2.getTrackbarPos('G','image')\n", 678 | " b = cv2.getTrackbarPos('B','image')\n", 679 | " s = cv2.getTrackbarPos(switch,'image')\n", 680 | " \n", 681 | " if s == 0:\n", 682 | " img[:] = 0\n", 683 | " else:\n", 684 | " img[:] = [b,g,r]\n", 685 | " \n", 686 | "cv2.destroyAllWindows()\n" 687 | ] 688 | }, 689 | { 690 | "cell_type": "markdown", 691 | "metadata": {}, 692 | "source": [ 693 | "Our application would look something like this:\n", 694 | "\n", 695 | "" 696 | ] 697 | }, 698 | { 699 | "cell_type": "markdown", 700 | "metadata": { 701 | "collapsed": true 702 | }, 703 | "source": [ 704 | "___" 705 | ] 706 | } 707 | ], 708 | "metadata": { 709 | "kernelspec": { 710 | "display_name": "Python 3", 711 | "language": "python", 712 | "name": "python3" 713 | }, 714 | "language_info": { 715 | "codemirror_mode": { 716 | "name": "ipython", 717 | "version": 3 718 | }, 719 | "file_extension": ".py", 720 | "mimetype": "text/x-python", 721 | "name": "python", 722 | "nbconvert_exporter": "python", 723 | "pygments_lexer": "ipython3", 724 | "version": "3.5.2" 725 | } 726 | }, 727 | "nbformat": 4, 728 | "nbformat_minor": 0 729 | } 730 | -------------------------------------------------------------------------------- /7. Camera Calibration and 3D Reconstruction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": true, 7 | "editable": true 8 | }, 9 | "source": [ 10 | "

Camera Calibration and 3D Reconstruction

\n", 11 | "___" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "deletable": true, 18 | "editable": true 19 | }, 20 | "source": [ 21 | "\n", 22 | "# Camera Calibration\n", 23 | "The everyday used pinhole cameras introduce a lot of distortion to images. Two major distortions are radial distortion and tangential distortion.\n", 24 | "\n", 25 | "Due to radial distortion, straight lines will appear curved. Its effect is more as we move away from the center of image. For example, one image is shown below, where two edges of a chess board are marked with red lines. But you can see that border is not a straight line and doesn't match with the red line. All the expected straight lines are bulged out.\n", 26 | "" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "deletable": true, 33 | "editable": true 34 | }, 35 | "source": [ 36 | "To understand these distortions in depth and get a matematical understanding, you can visit: Distortion(optics)\n", 37 | "\n", 38 | "For the stereo applications, these distortions need to be corrected first. To find the intrinsic and extrinsic parameters of camera, we have to provide some sample images of a well defined pattern like a chess board. We find some specific points in it ( square corners in chess board). We know its coordinates in real world space and we know its coordinates in image. With these data, some mathematical problem is solved in background to get the distortion coefficients. For better results, we need atleast 10 test patterns.\n", 39 | "\n", 40 | "We will use the image of chess board (see samples/cpp/left01.jpg – left14.jpg) that come with OpenCV istelf.\n", 41 | "\n", 42 | "Example image is shown below:\n" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": { 48 | "collapsed": true, 49 | "deletable": true, 50 | "editable": true 51 | }, 52 | "source": [ 53 | "" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": { 59 | "deletable": true, 60 | "editable": true 61 | }, 62 | "source": [ 63 | "Important input datas needed for camera calibration is a set of 3D real world points and its corresponding 2D image points. 2D image points are OK which we can easily find from the image. (These image points are locations where two black squares touch each other in chess boards)\n", 64 | "\n", 65 | "What about the 3D points from real world space? Those images are taken from a static camera and chess boards are placed at different locations and orientations. So we need to know (X,Y,Z) values. But for simplicity, we can say chess board was kept stationary at XY plane, (so Z=0 always) and camera was moved accordingly. This consideration helps us to find only X,Y values. Now for X,Y values, we can simply pass the points as (0,0), (1,0), (2,0), ... which denotes the location of points. In this case, the results we get will be in the scale of size of chess board square. But if we know the square size, (say 30 mm), and we can pass the values as (0,0),(30,0),(60,0),..., we get the results in mm.\n", 66 | "\n", 67 | "3D points are called object points and 2D image points are called image points.\n", 68 | "\n", 69 | "So to find pattern in chess board, we use the function, cv2.findChessboardCorners(). We also need to pass what kind of pattern we are looking, like 8x8 grid, 5x5 grid etc. In this example, we use 7x6 grid. (Normally a chess board has 8x8 squares and 7x7 internal corners). It returns the corner points and retval which will be True if pattern is obtained. These corners will be placed in an order (from left-to-right, top-to-bottom)\n", 70 | "\n", 71 | "\n", 72 | "This function may not be able to find the required pattern in all the images. So one good option is to write the code such that, it starts the camera and check each frame for required pattern. Once pattern is obtained, find the corners and store it in a list. Also provides some interval before reading next frame so that we can adjust our chess board in different direction. Continue this process until required number of good patterns are obtained. Even in the example provided here, we are not sure out of 14 images given, how many are good. So we read all the images and take the good ones.\n", 73 | "Instead of chess board, we can use some circular grid, but then use the function _cv2.findCirclesGrid()_ to find the pattern. It is said that less number of images are enough when using circular grid.\n", 74 | "Once we find the corners, we can increase their accuracy using cv2.cornerSubPix(). We can also draw the pattern using cv2.drawChessboardCorners(). All these steps are included in below code:" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": true, 82 | "deletable": true, 83 | "editable": true, 84 | "scrolled": true 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "import numpy as np\n", 89 | "import cv2\n", 90 | "import glob\n", 91 | "\n", 92 | "# termination criteria\n", 93 | "criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)\n", 94 | "\n", 95 | "# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)\n", 96 | "objp = np.zeros((6*7,3), np.float32)\n", 97 | "objp[:,:2] = np.mgrid[0:7,0:6].T.reshape(-1,2)\n", 98 | "\n", 99 | "# Arrays to store object points and image points from all the images.\n", 100 | "objpoints = [] # 3d point in real world space\n", 101 | "imgpoints = [] # 2d points in image plane.\n", 102 | "\n", 103 | "# You'll have to store the chessboard images in the directory of this script\n", 104 | "images = glob.glob('*.jpg')\n", 105 | "\n", 106 | "for fname in images:\n", 107 | " img = cv2.imread(fname)\n", 108 | " gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)\n", 109 | " \n", 110 | " # Find the chess board corners\n", 111 | " ret, corners = cv2.findChessboardCorners(gray, (7,6),None)\n", 112 | " \n", 113 | " # If found, add object points, image points (after refining them)\n", 114 | " if ret == True:\n", 115 | " objpoints.append(objp)\n", 116 | " corners2 = cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)\n", 117 | " imgpoints.append(corners2)\n", 118 | " \n", 119 | " # Draw and display the corners\n", 120 | " img = cv2.drawChessboardCorners(img, (7,6), corners2,ret)\n", 121 | " cv2.imshow('img',img)\n", 122 | " cv2.waitKey(500)\n", 123 | " \n", 124 | "cv2.destroyAllWindows()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": { 130 | "deletable": true, 131 | "editable": true 132 | }, 133 | "source": [ 134 | "One image with pattern drawn on it is shown below:\n", 135 | "" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": { 141 | "deletable": true, 142 | "editable": true 143 | }, 144 | "source": [ 145 | "## Calibration:\n", 146 | "\n", 147 | "So now we have our object points and image points we are ready to go for calibration. For that we use the function, cv2.calibrateCamera(). It returns the camera matrix, distortion coefficients, rotation and translation vectors etc.\n" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": { 154 | "collapsed": false, 155 | "deletable": true, 156 | "editable": true 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1],None,None)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": { 166 | "deletable": true, 167 | "editable": true 168 | }, 169 | "source": [ 170 | "## Undistortion:\n", 171 | "\n", 172 | "We have got what we were trying. Now we can take an image and undistort it. OpenCV comes with two methods, we will see both. But before that, we can refine the camera matrix based on a free scaling parameter using cv2.getOptimalNewCameraMatrix(). If the scaling parameter alpha=0, it returns undistorted image with minimum unwanted pixels. So it may even remove some pixels at image corners. If alpha=1, all pixels are retained with some extra black images. It also returns an image ROI which can be used to crop the result.\n", 173 | "\n", 174 | "So we take a new image (left12.jpg in this case. That is the first image in this chapter)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": false, 182 | "deletable": true, 183 | "editable": true, 184 | "scrolled": true 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "img = cv2.imread('left12.jpg')\n", 189 | "h, w = img.shape[:2]\n", 190 | "newcameramtx, roi=cv2.getOptimalNewCameraMatrix(mtx,dist,(w,h),1,(w,h))" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": { 196 | "deletable": true, 197 | "editable": true 198 | }, 199 | "source": [ 200 | "### 1. Using cv2.undistort()\n", 201 | "\n", 202 | "This is the shortest path. Just call the function and use ROI obtained above to crop the result." 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": { 209 | "collapsed": true, 210 | "deletable": true, 211 | "editable": true 212 | }, 213 | "outputs": [], 214 | "source": [ 215 | "# undistort\n", 216 | "dst = cv2.undistort(img, mtx, dist, None, newcameramtx)\n", 217 | "# crop the image\n", 218 | "x,y,w,h = roi\n", 219 | "dst = dst[y:y+h, x:x+w]\n", 220 | "cv2.imwrite('calibresult.png',dst)" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": { 226 | "deletable": true, 227 | "editable": true 228 | }, 229 | "source": [ 230 | "### 2. Using remapping\n", 231 | "\n", 232 | "This is curved path. First find a mapping function from distorted image to undistorted image. Then use the remap function." 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": { 239 | "collapsed": true, 240 | "deletable": true, 241 | "editable": true 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "# undistort\n", 246 | "mapx,mapy = cv2.initUndistortRectifyMap(mtx,dist,None,newcameramtx,(w,h),5)\n", 247 | "dst = cv2.remap(img,mapx,mapy,cv2.INTER_LINEAR)\n", 248 | "# crop the image\n", 249 | "x,y,w,h = roi\n", 250 | "dst = dst[y:y+h, x:x+w]\n", 251 | "cv2.imwrite('calibresult.png',dst)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": { 257 | "deletable": true, 258 | "editable": true 259 | }, 260 | "source": [ 261 | "Both the methods give the same result. See the result below:\n", 262 | "\n", 263 | "\n", 264 | "You can see in the result that all the edges are straight.\n", 265 | "\n", 266 | "Now you can store the camera matrix and distortion coefficients using write functions in Numpy (np.savez, np.savetxt etc) for future uses." 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "
\n", 274 | "\n", 275 | "Following is the complete script that implements camera calibration for distorted images with chess board samples.\n", 276 | "It reads distorted images, calculates the calibration and write undistorted images to a folder nameed 'output'.\n", 277 | "\n", 278 | "#### usage:\n", 279 | " ```calibrate.py [--debug ] [--square_size] []```\n", 280 | "\n", 281 | "#### default values:\n", 282 | " --debug: ./output/\n", 283 | " --square_size: 1.0\n", 284 | " defaults to ../data/left*.jpg\n" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "### calibrate.py" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": { 298 | "collapsed": true 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "#!/usr/bin/env python\n", 303 | "# Python 2/3 compatibility\n", 304 | "from __future__ import print_function\n", 305 | "\n", 306 | "import numpy as np\n", 307 | "import cv2\n", 308 | "\n", 309 | "# local modules\n", 310 | "from common import splitfn\n", 311 | "\n", 312 | "# built-in modules\n", 313 | "import os\n", 314 | "\n", 315 | "if __name__ == '__main__':\n", 316 | " import sys\n", 317 | " import getopt\n", 318 | " from glob import glob\n", 319 | "\n", 320 | " args, img_mask = getopt.getopt(sys.argv[1:], '', ['debug=', 'square_size='])\n", 321 | " args = dict(args)\n", 322 | " args.setdefault('--debug', './output/')\n", 323 | " args.setdefault('--square_size', 1.0)\n", 324 | " if not img_mask:\n", 325 | " img_mask = 'images/left*.jpg' # default\n", 326 | " else:\n", 327 | " img_mask = img_mask[0]\n", 328 | "\n", 329 | " img_names = glob(img_mask)\n", 330 | " debug_dir = args.get('--debug')\n", 331 | " if not os.path.isdir(debug_dir):\n", 332 | " os.mkdir(debug_dir)\n", 333 | " square_size = float(args.get('--square_size'))\n", 334 | "\n", 335 | " pattern_size = (9, 6)\n", 336 | " pattern_points = np.zeros((np.prod(pattern_size), 3), np.float32)\n", 337 | " pattern_points[:, :2] = np.indices(pattern_size).T.reshape(-1, 2)\n", 338 | " pattern_points *= square_size\n", 339 | "\n", 340 | " obj_points = []\n", 341 | " img_points = []\n", 342 | " h, w = 0, 0\n", 343 | " img_names_undistort = []\n", 344 | " for fn in img_names:\n", 345 | " print('processing %s... ' % fn, end='')\n", 346 | " img = cv2.imread(fn, 0)\n", 347 | " if img is None:\n", 348 | " print(\"Failed to load\", fn)\n", 349 | " continue\n", 350 | "\n", 351 | " h, w = img.shape[:2]\n", 352 | " found, corners = cv2.findChessboardCorners(img, pattern_size)\n", 353 | " if found:\n", 354 | " term = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_COUNT, 30, 0.1)\n", 355 | " cv2.cornerSubPix(img, corners, (5, 5), (-1, -1), term)\n", 356 | "\n", 357 | " if debug_dir:\n", 358 | " vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)\n", 359 | " cv2.drawChessboardCorners(vis, pattern_size, corners, found)\n", 360 | " path, name, ext = splitfn(fn)\n", 361 | " outfile = debug_dir + name + '_chess.png'\n", 362 | " cv2.imwrite(outfile, vis)\n", 363 | " if found:\n", 364 | " img_names_undistort.append(outfile)\n", 365 | "\n", 366 | " if not found:\n", 367 | " print('chessboard not found')\n", 368 | " continue\n", 369 | "\n", 370 | " img_points.append(corners.reshape(-1, 2))\n", 371 | " obj_points.append(pattern_points)\n", 372 | "\n", 373 | " print('ok')\n", 374 | "\n", 375 | " # calculate camera distortion\n", 376 | " rms, camera_matrix, dist_coefs, rvecs, tvecs = cv2.calibrateCamera(obj_points, img_points, (w, h), None, None)\n", 377 | "\n", 378 | " print(\"\\nRMS:\", rms)\n", 379 | " print(\"camera matrix:\\n\", camera_matrix)\n", 380 | " print(\"distortion coefficients: \", dist_coefs.ravel())\n", 381 | "\n", 382 | " # undistort the image with the calibration\n", 383 | " print('')\n", 384 | " for img_found in img_names_undistort:\n", 385 | " img = cv2.imread(img_found)\n", 386 | "\n", 387 | " h, w = img.shape[:2]\n", 388 | " newcameramtx, roi = cv2.getOptimalNewCameraMatrix(camera_matrix, dist_coefs, (w, h), 1, (w, h))\n", 389 | "\n", 390 | " dst = cv2.undistort(img, camera_matrix, dist_coefs, None, newcameramtx)\n", 391 | "\n", 392 | " # crop and save the image\n", 393 | " x, y, w, h = roi\n", 394 | " dst = dst[y:y+h, x:x+w]\n", 395 | " outfile = img_found + '_undistorted.png'\n", 396 | " print('Undistorted image written to: %s' % outfile)\n", 397 | " cv2.imwrite(outfile, dst)" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "
" 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": {}, 410 | "source": [ 411 | "\n", 412 | "## Re-projection Error:\n", 413 | "\n", 414 | "Re-projection error gives a good estimation of just how exact is the found parameters. This should be as close to zero as possible. Given the intrinsic, distortion, rotation and translation matrices, we first transform the object point to image point using cv2.projectPoints(). Then we calculate the absolute norm between what we got with our transformation and the corner finding algorithm. To find the average error we calculate the arithmetical mean of the errors calculate for all the calibration images." 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": { 421 | "collapsed": true, 422 | "deletable": true, 423 | "editable": true 424 | }, 425 | "outputs": [], 426 | "source": [ 427 | "mean_error = 0\n", 428 | "for i in xrange(len(objpoints)):\n", 429 | " imgpoints2, _ = cv2.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)\n", 430 | " error = cv2.norm(imgpoints[i],imgpoints2, cv2.NORM_L2)/len(imgpoints2)\n", 431 | " tot_error += error\n", 432 | " \n", 433 | "print \"total error: \", mean_error/len(objpoints)" 434 | ] 435 | }, 436 | { 437 | "cell_type": "markdown", 438 | "metadata": { 439 | "deletable": true, 440 | "editable": true 441 | }, 442 | "source": [ 443 | "___" 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "metadata": { 449 | "deletable": true, 450 | "editable": true 451 | }, 452 | "source": [ 453 | "# Pose Estimation\n", 454 | "___" 455 | ] 456 | }, 457 | { 458 | "cell_type": "markdown", 459 | "metadata": { 460 | "deletable": true, 461 | "editable": true 462 | }, 463 | "source": [ 464 | "Here we will learn to exploit calib3d module to create some 3D effects in images.\n", 465 | "\n", 466 | "During the previous session on camera calibration, we found the camera matrix, distortion coefficients etc. Given a pattern image, we can utilize the above information to calculate its pose, or how the object is situated in space, like how it is rotated, how it is displaced etc. For a planar object, we can assume Z=0, such that, the problem now becomes how camera is placed in space to see our pattern image. So, if we know how the object lies in the space, we can draw some 2D diagrams in it to simulate the 3D effect. Let's see how to do it.\n", 467 | "\n", 468 | "Our problem is, we want to draw our 3D coordinate axis (X, Y, Z axes) on our chessboard's first corner. X axis in blue color, Y axis in green color and Z axis in red color. So in-effect, Z axis should feel like it is perpendicular to our chessboard plane.\n", 469 | "\n", 470 | "First, let's load the camera matrix and distortion coefficients from the previous calibration result:" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": null, 476 | "metadata": { 477 | "collapsed": false, 478 | "deletable": true, 479 | "editable": true 480 | }, 481 | "outputs": [], 482 | "source": [ 483 | "import cv2\n", 484 | "import numpy as np\n", 485 | "import glob\n", 486 | "# Load previously saved data\n", 487 | "with np.load('B.npz') as X:\n", 488 | " mtx, dist, _, _ = [X[i] for i in ('mtx','dist','rvecs','tvecs')]" 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": { 494 | "deletable": true, 495 | "editable": true 496 | }, 497 | "source": [ 498 | "Now let’s create a function, draw which takes the corners in the chessboard (obtained using cv2.findChessboardCorners()) and axis points to draw a 3D axis." 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": null, 504 | "metadata": { 505 | "collapsed": true, 506 | "deletable": true, 507 | "editable": true 508 | }, 509 | "outputs": [], 510 | "source": [ 511 | "def draw(img, corners, imgpts):\n", 512 | " corner = tuple(corners[0].ravel())\n", 513 | " img = cv2.line(img, corner, tuple(imgpts[0].ravel()), (255,0,0), 5)\n", 514 | " img = cv2.line(img, corner, tuple(imgpts[1].ravel()), (0,255,0), 5)\n", 515 | " img = cv2.line(img, corner, tuple(imgpts[2].ravel()), (0,0,255), 5)\n", 516 | " return img" 517 | ] 518 | }, 519 | { 520 | "cell_type": "markdown", 521 | "metadata": { 522 | "deletable": true, 523 | "editable": true 524 | }, 525 | "source": [ 526 | "Then as in previous case, we create termination criteria, object points (3D points of corners in chessboard) and axis\n", 527 | "points. Axis points are points in 3D space for drawing the axis. We draw axis of length 3 (units will be in terms of\n", 528 | "chess square size since we calibrated based on that size). So our X axis is drawn from (0,0,0) to (3,0,0), so for Y axis.\n", 529 | "For Z axis, it is drawn from (0,0,0) to (0,0,-3). Negative denotes it is drawn towards the camera." 530 | ] 531 | }, 532 | { 533 | "cell_type": "code", 534 | "execution_count": null, 535 | "metadata": { 536 | "collapsed": true, 537 | "deletable": true, 538 | "editable": true 539 | }, 540 | "outputs": [], 541 | "source": [ 542 | "criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)\n", 543 | "objp = np.zeros((6*7,3), np.float32)\n", 544 | "objp[:,:2] = np.mgrid[0:7,0:6].T.reshape(-1,2)\n", 545 | "axis = np.float32([[3,0,0], [0,3,0], [0,0,-3]]).reshape(-1,3)" 546 | ] 547 | }, 548 | { 549 | "cell_type": "markdown", 550 | "metadata": { 551 | "deletable": true, 552 | "editable": true 553 | }, 554 | "source": [ 555 | "Now, as usual, we load each image. Search for 7x6 grid. If found, we refine it with subcorner pixels. Then to calculate\n", 556 | "the rotation and translation, we use the function, cv2.solvePnPRansac(). Once we those transformation matrices,\n", 557 | "we use them to project our axis points to the image plane. In simple words, we find the points on image plane\n", 558 | "corresponding to each of (3,0,0),(0,3,0),(0,0,3) in 3D space. Once we get them, we draw lines from the first corner to\n", 559 | "each of these points using our draw() function. Done!" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": null, 565 | "metadata": { 566 | "collapsed": true, 567 | "deletable": true, 568 | "editable": true 569 | }, 570 | "outputs": [], 571 | "source": [ 572 | "for fname in glob.glob('left*.jpg'):\n", 573 | " img = cv2.imread(fname)\n", 574 | " gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)\n", 575 | " ret, corners = cv2.findChessboardCorners(gray, (7,6),None)\n", 576 | " if ret == True:\n", 577 | " corners2 = cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)\n", 578 | " \n", 579 | " # Find the rotation and translation vectors.\n", 580 | " rvecs, tvecs, inliers = cv2.solvePnPRansac(objp, corners2, mtx, dist)\n", 581 | " \n", 582 | " # project 3D points to image plane\n", 583 | " imgpts, jac = cv2.projectPoints(axis, rvecs, tvecs, mtx, dist)\n", 584 | " img = draw(img,corners2,imgpts)\n", 585 | " cv2.imshow('img',img)\n", 586 | " k = cv2.waitKey(0) & 0xff\n", 587 | " if k == 's':\n", 588 | " cv2.imwrite(fname[:6]+'.png', img)\n", 589 | "cv2.destroyAllWindows()" 590 | ] 591 | }, 592 | { 593 | "cell_type": "markdown", 594 | "metadata": { 595 | "deletable": true, 596 | "editable": true 597 | }, 598 | "source": [ 599 | "See some results below. Notice that each axis is 3 squares long.:\n", 600 | "\n", 601 | "" 602 | ] 603 | }, 604 | { 605 | "cell_type": "markdown", 606 | "metadata": { 607 | "deletable": true, 608 | "editable": true 609 | }, 610 | "source": [ 611 | "___\n", 612 | "## Render a Cube\n", 613 | "\n", 614 | "If you want to draw a cube, modify the draw() function and axis points as follows.\n", 615 | "\n", 616 | "Modified draw() function:" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": null, 622 | "metadata": { 623 | "collapsed": false, 624 | "deletable": true, 625 | "editable": true 626 | }, 627 | "outputs": [], 628 | "source": [ 629 | "def draw(img, corners, imgpts):\n", 630 | " imgpts = np.int32(imgpts).reshape(-1,2)\n", 631 | " \n", 632 | " # draw ground floor in green\n", 633 | " img = cv2.drawContours(img, [imgpts[:4]],-1,(0,255,0),-3)\n", 634 | " \n", 635 | " # draw pillars in blue color\n", 636 | " for i,j in zip(range(4),range(4,8)):\n", 637 | " img = cv2.line(img, tuple(imgpts[i]), tuple(imgpts[j]),(255),3)\n", 638 | " \n", 639 | " # draw top layer in red color\n", 640 | " img = cv2.drawContours(img, [imgpts[4:]],-1,(0,0,255),3)" 641 | ] 642 | }, 643 | { 644 | "cell_type": "markdown", 645 | "metadata": { 646 | "deletable": true, 647 | "editable": true 648 | }, 649 | "source": [ 650 | "Modified axis points. They are the 8 corners of a cube in 3D space:" 651 | ] 652 | }, 653 | { 654 | "cell_type": "code", 655 | "execution_count": null, 656 | "metadata": { 657 | "collapsed": true, 658 | "deletable": true, 659 | "editable": true 660 | }, 661 | "outputs": [], 662 | "source": [ 663 | "axis = np.float32([[0,0,0], [0,3,0], [3,3,0], [3,0,0], [0,0,-3],[0,3,-3],[3,3,-3],[3,0,-3] ])" 664 | ] 665 | }, 666 | { 667 | "cell_type": "markdown", 668 | "metadata": { 669 | "deletable": true, 670 | "editable": true 671 | }, 672 | "source": [ 673 | "And look at the result below:\n", 674 | "" 675 | ] 676 | }, 677 | { 678 | "cell_type": "markdown", 679 | "metadata": { 680 | "deletable": true, 681 | "editable": true 682 | }, 683 | "source": [ 684 | "If you are interested in graphics, augmented reality etc, you can use OpenGL to render more complicated figures.\n", 685 | "___" 686 | ] 687 | } 688 | ], 689 | "metadata": { 690 | "kernelspec": { 691 | "display_name": "Python 2", 692 | "language": "python", 693 | "name": "python2" 694 | }, 695 | "language_info": { 696 | "codemirror_mode": { 697 | "name": "ipython", 698 | "version": 2 699 | }, 700 | "file_extension": ".py", 701 | "mimetype": "text/x-python", 702 | "name": "python", 703 | "nbconvert_exporter": "python", 704 | "pygments_lexer": "ipython2", 705 | "version": "2.7.12" 706 | } 707 | }, 708 | "nbformat": 4, 709 | "nbformat_minor": 0 710 | } 711 | -------------------------------------------------------------------------------- /8. Image Denoising and Inpainting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Computational Photography : Image Denoising and Inpainting\n", 8 | "___\n", 9 | "_Computational photography or computational imaging refers to digital image capture and processing techniques that use digital computation instead of optical processes. Computational photography can improve the capabilities of a camera, or introduce features that were not possible with film based photography_" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Image Denoising\n", 17 | "In earlier examples, we have seen many image smoothing techniques like Gaussian Blurring, Median Blurring etc and they were good to some extent in removing small quantities of noise. In those techniques, we took a small neighbourhood around a pixel and did some operations like gaussian weighted average, median of the values etc to replace the central element. In short, noise removal at a pixel was local to its neighbourhood.\n", 18 | "\n", 19 | "Here we look at the Non-local Means Denoising algorithm to remove noise in the image and learn about functions included in openCV that can be directly used for Image Denoising." 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "OpenCV provides four variations of this technique.\n", 27 | "\n", 28 | "* cv2.fastNlMeansDenoising() - works with a single grayscale images\n", 29 | "\n", 30 | "* cv2.fastNlMeansDenoisingColored() - works with a color image.\n", 31 | "\n", 32 | "* cv2.fastNlMeansDenoisingMulti() - works with image sequence captured in short period of time (grayscale images)\n", 33 | "\n", 34 | "* cv2.fastNlMeansDenoisingColoredMulti() - same as above, but for color images.\n", 35 | "\n", 36 | "Common arguments are:\n", 37 | "\n", 38 | "* h : parameter deciding filter strength. Higher h value removes noise better, but removes details of image also. (10 is ok)\n", 39 | "* hForColorComponents : same as h, but for color images only. (normally same as h)\n", 40 | "* templateWindowSize : should be odd. (recommended 7)\n", 41 | "* searchWindowSize : should be odd. (recommended 21)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "___\n", 49 | "Here we will look at examples for colored images and image sequences:" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | " 2. cv2.fastNlMeansDenoisingColored() : \n", 57 | "\n", 58 | "As mentioned above it is used to remove noise from color images. (Noise is expected to be gaussian). See the example below:" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 8, 64 | "metadata": { 65 | "collapsed": false 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "import numpy as np\n", 70 | "import cv2\n", 71 | "from matplotlib import pyplot as plt\n", 72 | "img = cv2.imread('images/noise1.jpg')\n", 73 | "dst = cv2.fastNlMeansDenoisingColored(img,None,10,10,7,21)\n", 74 | "plt.subplot(121),plt.imshow(img)\n", 75 | "plt.subplot(122),plt.imshow(dst)\n", 76 | "plt.show()" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "After de-noising, the resulting image would look like:\n", 84 | "\n", 85 | "___" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": { 91 | "collapsed": true 92 | }, 93 | "source": [ 94 | "2. cv2.fastNlMeansDenoisingMulti()\n", 95 | "\n", 96 | "Now we will apply the same method to a video. The first argument is the list of noisy frames. Second argument imgToDenoiseIndex specifies which frame we need to denoise, for that we pass the index of frame in our input list. Third is the temporalWindowSize which specifies the number of nearby frames to be used for denoising. It should be odd. In that case, a total of temporalWindowSize frames are used where central frame is the frame to be denoised. For example, you passed a list of 5 frames as input. Let imgToDenoiseIndex = 2 and temporalWindowSize = 3. Then frame-1, frame-2 and frame-3 are used to denoise frame-2. Let's see an example." 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": true 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "import numpy as np\n", 108 | "import cv2\n", 109 | "from matplotlib import pyplot as plt\n", 110 | "cap = cv2.VideoCapture('vtest.avi')\n", 111 | "# create a list of first 5 frames\n", 112 | "img = [cap.read()[1] for i in xrange(5)]\n", 113 | "# convert all to grayscale\n", 114 | "gray = [cv2.cvtColor(i, cv2.COLOR_BGR2GRAY) for i in img]\n", 115 | "# convert all to float64\n", 116 | "gray = [np.float64(i) for i in gray]\n", 117 | "# create a noise of variance 25\n", 118 | "noise = np.random.randn(*gray[1].shape)*10\n", 119 | "# Add this noise to images\n", 120 | "noisy = [i+noise for i in gray]\n", 121 | "# Convert back to uint8\n", 122 | "noisy = [np.uint8(np.clip(i,0,255)) for i in noisy]\n", 123 | "# Denoise 3rd frame considering all the 5 frames\n", 124 | "dst = cv2.fastNlMeansDenoisingMulti(noisy, 2, 5, None, 4, 7, 35)\n", 125 | "plt.subplot(131),plt.imshow(gray[2],'gray')\n", 126 | "plt.subplot(132),plt.imshow(noisy[2],'gray')\n", 127 | "plt.subplot(133),plt.imshow(dst,'gray')\n", 128 | "plt.show()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "It takes considerable amount of time for computation. In the result, first image is the original frame, second is the\n", 136 | "noisy one, third is the denoised image." 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "___" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "## Image Inpainting\n", 151 | "\n", 152 | "Inpainting is used to remove small noises, strokes etc in old photographs. Almost everyone has encountered old degraded photos with some black spots, some strokes etc on it. How can we restore them back? We can’t simply erase them in a paint tool because it is will simply replace black structures with white structures which is of no use. In these cases, a technique called image inpainting is used.\n", 153 | "\n", 154 | "In the digital world, inpainting (also known as image interpolation or video interpolation) refers to the application of sophisticated algorithms to replace lost or corrupted parts of the image data (mainly small regions or to remove small defects)\n", 155 | "\n", 156 | "The basic idea is simple: Replace those bad marks with its neighbouring pixels so that it looks like the neigbourhood.\n", 157 | "\n", 158 | "We need to create a mask of same size as that of input image, where non-zero pixels corresponds to the area which\n", 159 | "is to be inpainted. Everything else is simple. My image is degraded with some black strokes (I added manually). I\n", 160 | "created a corresponding strokes with Paint tool." 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 1, 166 | "metadata": { 167 | "collapsed": true 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "import numpy as np\n", 172 | "import cv2\n", 173 | "img = cv2.imread('messi_2.jpg')\n", 174 | "mask = cv2.imread('mask2.png',0)\n", 175 | "dst = cv2.inpaint(img,mask,3,cv2.INPAINT_TELEA)\n", 176 | "dst2 = cv2.inpaint(img,mask,3,cv2.INPAINT_NS)\n", 177 | "cv2.imshow('dst',dst)\n", 178 | "cv2.waitKey(0)\n", 179 | "cv2.destroyAllWindows()" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": { 185 | "collapsed": true 186 | }, 187 | "source": [ 188 | "The resulting output would look like this. First image shows degraded input. Second image is the mask. Third and the Fourth images are the result of the following two inpainting algorithms pre-defined in OpenCV:\n", 189 | "* INPAINT_TELEA Method by Alexandru Telea [119\n", 190 | "] \n", 191 | "* INPAINT_NS Navier-Stokes based method [Navier01]\n", 192 | "\n", 193 | "The function _cv2.inpaint()_ reconstructs the selected image area from the pixel near the area boundary. The function may be used to remove dust and scratches from a scanned photo, or to remove undesirable objects from still images or video.\n", 194 | "\n", 195 | "\n", 196 | "___" 197 | ] 198 | } 199 | ], 200 | "metadata": { 201 | "kernelspec": { 202 | "display_name": "Python 2", 203 | "language": "python", 204 | "name": "python2" 205 | }, 206 | "language_info": { 207 | "codemirror_mode": { 208 | "name": "ipython", 209 | "version": 2 210 | }, 211 | "file_extension": ".py", 212 | "mimetype": "text/x-python", 213 | "name": "python", 214 | "nbconvert_exporter": "python", 215 | "pygments_lexer": "ipython2", 216 | "version": "2.7.11" 217 | } 218 | }, 219 | "nbformat": 4, 220 | "nbformat_minor": 0 221 | } 222 | -------------------------------------------------------------------------------- /Barcode Detector.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Barcode Detector\n", 8 | "___" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "Here we will demonstrate a basic implementation of barcode detection using computer vision and image processing techniques. Although this code won't work on all barcodes and the accuracy of detecting barcodes might be low, it will surely give you a ffair intuition on what steps should be followed to make a barcode detector." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "# import the necessary packages\n", 27 | "import numpy as np\n", 28 | "import argparse\n", 29 | "import cv2\n", 30 | "\n", 31 | "# load the image and convert it to grayscale\n", 32 | "image = cv2.imread(\"images/barcode_02.jpg\")\n", 33 | "gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", 34 | "\n", 35 | "# compute the Scharr gradient magnitude representation of the images\n", 36 | "# in both the x and y direction\n", 37 | "gradX = cv2.Sobel(gray, ddepth = cv2.CV_32F, dx = 1, dy = 0, ksize = -1)\n", 38 | "gradY = cv2.Sobel(gray, ddepth = cv2.CV_32F, dx = 0, dy = 1, ksize = -1)\n", 39 | "\n", 40 | "# subtract the y-gradient from the x-gradient\n", 41 | "gradient = cv2.subtract(gradX, gradY)\n", 42 | "gradient = cv2.convertScaleAbs(gradient)\n", 43 | "\n", 44 | "# blur and threshold the image\n", 45 | "blurred = cv2.blur(gradient, (9, 9))\n", 46 | "(_, thresh) = cv2.threshold(blurred, 225, 255, cv2.THRESH_BINARY)\n", 47 | "\n", 48 | "# construct a closing kernel and apply it to the thresholded image\n", 49 | "kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (21, 7))\n", 50 | "closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)\n", 51 | "\n", 52 | "# perform a series of erosions and dilations\n", 53 | "closed = cv2.erode(closed, None, iterations = 4)\n", 54 | "closed = cv2.dilate(closed, None, iterations = 4)\n", 55 | "\n", 56 | "# find the contours in the thresholded image, then sort the contours\n", 57 | "# by their area, keeping only the largest one\n", 58 | "(_,cnts, _) = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL,\n", 59 | " cv2.CHAIN_APPROX_SIMPLE)\n", 60 | "c = sorted(cnts, key = cv2.contourArea, reverse = True)[0]\n", 61 | "\n", 62 | "# compute the rotated bounding box of the largest contour\n", 63 | "rect = cv2.minAreaRect(c)\n", 64 | "box = np.int0(cv2.boxPoints(rect))\n", 65 | "\n", 66 | "# draw a bounding box arounded the detected barcode and display the\n", 67 | "# image\n", 68 | "cv2.drawContours(image, [box], -1, (0, 255, 0), 3)\n", 69 | "cv2.imshow(\"Image\", image)\n", 70 | "cv2.waitKey(0)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "As always we import the required modules, load our image and convert it to grayscale.\n", 78 | "\n", 79 | "Then, we use the Scharr operator (specified using ksize = -1 ) to construct the gradient magnitude representation of the grayscale image in the horizontal and vertical directions .\n", 80 | "\n", 81 | "From there, we subtract the y-gradient of the Scharr operator from the x-gradient of the Scharr operator. By performing this subtraction we are left with regions of the image that have high horizontal gradients and low vertical gradients.\n", 82 | "Our gradient representation of our original image above looks like:\n", 83 | "\n", 84 | "" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "Notice how the barcoded region of the image has been detected by our gradient operations. The next steps will be to filter out the noise in the image and focus solely on the barcode region.\n", 92 | "\n", 93 | "The first thing we’ll do is apply an average blur to the gradient image using a 9 x 9 kernel. This will help smooth out high frequency noise in the gradient representation of the image. We’ll then threshold the blurred image. Any pixel in the gradient image that is not greater than 225 is set to 0 (black). Otherwise, the pixel is set to 255 (white).\n", 94 | "\n", 95 | "The output of the blurring and thresholding looks like this:\n", 96 | "\n", 97 | "" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "However, as you can see in the threshold image above, there are gaps between the vertical bars of the barcode. In order to close these gaps and make it easier for our algorithm to detect the “blob”-like region of the barcode, we’ll performed basic morphological operations. We constructedg a rectangular kernel using the cv2.getStructuringElement. This kernel has a width that is larger than the height, thus allowing us to close the gaps between vertical stripes of the barcode.\n", 105 | "\n", 106 | "We then perform our morphological operation by applying our kernel to our thresholded image, thus attempting to close the the gaps between the bars. to remove these small blobs, that are not part of the actual barcode, but may interfere with our contour detection, we performed 4 iterations of erosions, followed by 4 iterations of dilations. An erosion will “erode” the white pixels in the image, thus removing the small blobs, whereas a dilation will “dilate” the remaining white pixels and grow the white regions back out. Provided that the small blobs were removed during the erosion, they will not reappear during the dilation.\n", 107 | "\n", 108 | "After our series of erosions and dilations you can see that the small blobs have been successfully removed and we are left with the barcode region:\n", 109 | "\n", 110 | "" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "Then ultimately we find the contours of the barcoded region of the image. We simply find the largest contour in the image, should correspond to the barcoded region. We then determine the minimum bounding box for the largest contour and finally display the detected barcode .\n", 118 | "\n", 119 | "As you can see in the following image, we have successfully detected the barcode:\n", 120 | "\n", 121 | "" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "___" 129 | ] 130 | } 131 | ], 132 | "metadata": { 133 | "kernelspec": { 134 | "display_name": "Python 2", 135 | "language": "python", 136 | "name": "python2" 137 | }, 138 | "language_info": { 139 | "codemirror_mode": { 140 | "name": "ipython", 141 | "version": 2 142 | }, 143 | "file_extension": ".py", 144 | "mimetype": "text/x-python", 145 | "name": "python", 146 | "nbconvert_exporter": "python", 147 | "pygments_lexer": "ipython2", 148 | "version": "2.7.11" 149 | } 150 | }, 151 | "nbformat": 4, 152 | "nbformat_minor": 0 153 | } 154 | -------------------------------------------------------------------------------- /Digit Recognition.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Digit Recognition using OpenCV and Scikit-Learn\n", 8 | "___" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "## The Problem:\n", 16 | "Digit recognition is the ability of a computer to receive and interpret intelligible handwritten numerical input from sources such as paper documents, images, touch-screens and other devices. In this example, we will see how to build a digit recognizer application that takes the input of an image and recognizes the handwritten digits in that image. " 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "## The Data Set:\n", 24 | "The data used for this problem is the classical MNIST (\"Modified National Institute of Standards and Technology\") dataset which is extensively studied in the Machine Learning Community. \n", 25 | "\n", 26 | "The MNIST database is a set of 70000 samples of handwritten digits where each sample consists of 28×28 sized grayscale images. We will be using sklearn.datasets package to download the MNIST database. " 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "## Step 1. Training the Digit Classifier:\n", 34 | "Here, we will : –\n", 35 | "\n", 36 | "1. Calculate the Histogram of Oriented Gaussians(HOG) features for each sample in the database.\n", 37 | "2. Train a multi-class linear SVM with the HOG features of each sample along with the corresponding label.\n", 38 | "3. Save the classifier in a file so that we can use the classifier again without performing training each time." 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 1, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "Count of digits in dataset Counter({1: 7877, 7: 7293, 3: 7141, 2: 6990, 9: 6958, 0: 6903, 6: 6876, 8: 6825, 4: 6824, 5: 6313})\n" 53 | ] 54 | }, 55 | { 56 | "data": { 57 | "text/plain": [ 58 | "['digits_cls.pkl']" 59 | ] 60 | }, 61 | "execution_count": 1, 62 | "metadata": {}, 63 | "output_type": "execute_result" 64 | } 65 | ], 66 | "source": [ 67 | "# Importing the modules\n", 68 | "from sklearn.externals import joblib\n", 69 | "from sklearn import datasets\n", 70 | "from skimage.feature import hog\n", 71 | "from sklearn.svm import LinearSVC\n", 72 | "import numpy as np\n", 73 | "from collections import Counter\n", 74 | "\n", 75 | "# Load the dataset\n", 76 | "# This might take some time as a process of downloading about 55mb of data would be going on.\n", 77 | "dataset = datasets.fetch_mldata(\"MNIST Original\")\n", 78 | "\n", 79 | "# Once, the dataset is downloaded we will save the images of the digits in a numpy array features and the corresponding labels\n", 80 | "# i.e. the digit in another numpy array labels\n", 81 | "# Extract the features and labels\n", 82 | "features = np.array(dataset.data, 'int16') \n", 83 | "labels = np.array(dataset.target, 'int')\n", 84 | "\n", 85 | "# Calculate the HOG features for each image in the database and save them in another numpy array named hog_feature.\n", 86 | "list_hog_fd = []\n", 87 | "for feature in features:\n", 88 | " fd = hog(feature.reshape((28, 28)), orientations=9, pixels_per_cell=(14, 14), cells_per_block=(1, 1), visualise=False)\n", 89 | " list_hog_fd.append(fd)\n", 90 | "hog_features = np.array(list_hog_fd, 'float64')\n", 91 | "\n", 92 | "print \"Count of digits in dataset\", Counter(labels)\n", 93 | "\n", 94 | "# The next step is to create a Linear SVM object. Since there are 10 digits, we need a multi-class classifier. \n", 95 | "# The Linear SVM that comes with sklearn can perform multi-class classification.\n", 96 | "clf = LinearSVC()\n", 97 | "\n", 98 | "# Perform the training using the fit function of clf\n", 99 | "clf.fit(hog_features, labels)\n", 100 | "\n", 101 | "# Save the classifier\n", 102 | "joblib.dump(clf, \"digits_cls.pkl\", compress=3)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "The crux of this code to tain our digit classifier after the initial loading of digit dataset and extracting the features and labels, is extracting the HOG features.\n", 110 | "\n", 111 | "The arguments passed in the _hog()_ functions are explained below:
\n", 112 | "We set the number of cells in each block equal to one and each individual cell is of size 14×14. Since our image is of size 28×28, we will have four blocks/cells of size 14×14 each. Also, we set the size of orientation vector equal to 9. So our HOG feature vector for each sample will be of size 4×9 = 36. We are not interesting in visualizing the HOG feature image, so we will set the visualise parameter to false. \n", 113 | "\n", 114 | "After this step we create the _LinearSVM()_ object to do multi-classification.\n", 115 | "\n", 116 | "Then we train our classifier using the fit() function which takes two parameters:\n", 117 | "1. an array of the HOG features of the handwritten digit earlier calculated \n", 118 | "2. Corresponding array of labels. \n", 119 | "Each label value is from the set — [0, 1, 2, 3,…, 8, 9]. \n", 120 | "\n", 121 | "When the training finishes, we will save the classifier in a file named digits_cls.pkl using _joblib.dump()_ function which has parameters of:\n", 122 | "1. The classifier object\n", 123 | "2. Filename where we want to save the classifier\n", 124 | "3. The compression degree ranging from 0-9. 0 means no compression whereas higher degree means more compression althoug poor computation time. Results have show compression = 3 proves to be a good trade-off.\n", 125 | "\n", 126 | "**Thus we have successfully trained our digits classifier.**\n", 127 | "\n" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "## Step2. Recognizing digits using our classifier:\n", 135 | "Now that our classifeir is ready, we can test it on an input of actual digits." 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 2, 141 | "metadata": { 142 | "collapsed": false, 143 | "scrolled": true 144 | }, 145 | "outputs": [ 146 | { 147 | "data": { 148 | "text/plain": [ 149 | "-1" 150 | ] 151 | }, 152 | "execution_count": 2, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "import cv2\n", 159 | "from sklearn.externals import joblib\n", 160 | "from skimage.feature import hog\n", 161 | "import numpy as np\n", 162 | "\n", 163 | "# Load the classifier\n", 164 | "clf = joblib.load(\"digits_cls.pkl\")\n", 165 | "\n", 166 | "# Read the input image \n", 167 | "im = cv2.imread(\"images/hdigits.jpg\")\n", 168 | "\n", 169 | "# Convert to grayscale and apply Gaussian filtering\n", 170 | "im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)\n", 171 | "im_gray = cv2.GaussianBlur(im_gray, (5, 5), 0)\n", 172 | "\n", 173 | "# Threshold the image\n", 174 | "ret, im_th = cv2.threshold(im_gray, 90, 255, cv2.THRESH_BINARY_INV)\n", 175 | "\n", 176 | "# Find contours in the image\n", 177 | "_,ctrs,_ = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n", 178 | "\n", 179 | "# Get rectangles contains each contour\n", 180 | "rects = [cv2.boundingRect(ctr) for ctr in ctrs]\n", 181 | "\n", 182 | "# For each rectangular region, calculate HOG features and predict\n", 183 | "# the digit using Linear SVM.\n", 184 | "for rect in rects:\n", 185 | " # Draw the rectangles\n", 186 | " cv2.rectangle(im, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3) \n", 187 | " # Make the rectangular region around the digit\n", 188 | " leng = int(rect[3] * 1.6)\n", 189 | " pt1 = int(rect[1] + rect[3] // 2 - leng // 2)\n", 190 | " pt2 = int(rect[0] + rect[2] // 2 - leng // 2)\n", 191 | " roi = im_th[pt1:pt1+leng, pt2:pt2+leng]\n", 192 | " # Resize the image\n", 193 | " roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)\n", 194 | " roi = cv2.dilate(roi, (3, 3))\n", 195 | " # Calculate the HOG features\n", 196 | " roi_hog_fd = hog(roi, orientations=9, pixels_per_cell=(14, 14), cells_per_block=(1, 1), visualise=False)\n", 197 | " nbr = clf.predict(np.array([roi_hog_fd], 'float64'))\n", 198 | " cv2.putText(im, str(int(nbr[0])), (rect[0], rect[1]),cv2.FONT_HERSHEY_DUPLEX, 2, (0, 255, 255), 3)\n", 199 | "\n", 200 | "cv2.imshow(\"Digit Recognizer\", im)\n", 201 | "cv2.waitKey()" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "For testing our calssifier on real input, we loaded the classifier from the file digits_cls.pkl which we had saved in the previous script.\n", 209 | "\n", 210 | "Then we load the test image, convert it to a grayscale image as we have seen before and then apply a Gaussian filter to it so for smoothing. \n", 211 | "\n", 212 | "Next we convert our grayscale image into a binary image using a threshold value of 90. All the pixel locations with grayscale values greater than 90 are set to 0(black)in the binary image and all the pixel locations with grayscale values less than 90 are set to 255(white) in the binary image. \n", 213 | "\n", 214 | "We calculate the contours in the image, calculate the bounding box for each contour and then generate a bounding square around each contour for each corresponding bounding box. \n", 215 | "\n", 216 | "Next we then resize each bounding square to a size of 28×28 and dilate it.\n", 217 | "\n", 218 | "We calculate the HOG features for each bounding square. (The HOG feature vector for each bounding square should be of the same size for which the classifier was trained, else you will get an error). \n", 219 | "\n", 220 | "Finally, we predict the digit using our classifier. We also draw the bounding box and the predicted digit on the input image. and then display the image.\n", 221 | "\n", 222 | "I tested the classifier on this image - \n", 223 | "\n", 224 | "\n" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "The resulting output with the digits recognized looked like this:\n", 232 | "\n", 233 | "\n", 234 | "\n", 235 | "___" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "**NOTE**: While using your own images for testing:\n", 243 | "\n", 244 | "Make sure each is at a sufficient distance from each other. Otherwise if the digits are too close, they will interfere in the square region around each digit. In this case, we will need to create a new square image and then we need to copy the contour in that square image.\n", 245 | "\n", 246 | "For the images I used in testing, fixed thresholding worked pretty well. In most real world images, fixed thresholding does not produce good results. In this case, we need to use adaptive thresholding.\n", 247 | "\n", 248 | "In the pre-processing step, we only did Gaussian blurring. In most situations, on the binary image we will need to open and close the image to remove small noise pixels and fill small holes ie perform appropriate Image Denoising and Inpainting.\n", 249 | "\n", 250 | "Thus here we discussed how we can recognize handwritten digits using OpenCV and Scikit-Learn. We trained a Linear SVM with the HOG features of each sample and then ultimately tested our code.\n", 251 | "___" 252 | ] 253 | } 254 | ], 255 | "metadata": { 256 | "kernelspec": { 257 | "display_name": "Python 2", 258 | "language": "python", 259 | "name": "python2" 260 | }, 261 | "language_info": { 262 | "codemirror_mode": { 263 | "name": "ipython", 264 | "version": 2 265 | }, 266 | "file_extension": ".py", 267 | "mimetype": "text/x-python", 268 | "name": "python", 269 | "nbconvert_exporter": "python", 270 | "pygments_lexer": "ipython2", 271 | "version": "2.7.11" 272 | } 273 | }, 274 | "nbformat": 4, 275 | "nbformat_minor": 0 276 | } 277 | -------------------------------------------------------------------------------- /Document Scanner.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Document Scanner\n", 8 | "___" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "In this example, we would be making a document scanner application. This is one of those posts which are inspired by amazing computer vision blogs at pyimagesearch.com. For detailed explanations and cool computer vision projects, this website is strongly recommended. While going through examples from Pyimageserach or even this website, make sure you understand and make good use of the example projects. Think about how can you modify them and put them into use to solve real world problems. \n", 16 | "\n", 17 | "Coming back to our example, you should know scanning a document can be basically broken down into three simple steps:\n", 18 | "1. Detect edges.\n", 19 | "2. Use the edges in the image to find the contour (outline) representing the piece of paper being scanned.\n", 20 | "3. Apply a perspective transform to obtain the top-down view of the document." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "Given below is the code of the document scanner:\n", 28 | "We would be using two modules for this app\n", 29 | "1. imutils.py (a slightly modified version to suit our purpose)\n", 30 | "2. transform.py\n", 31 | "Store these scripts in a folder named modules in your project directory." 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "___\n", 39 | "## _modules/imutils.py_ " 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "# Import the necessary packages\n", 51 | "import numpy as np\n", 52 | "import cv2\n", 53 | "\n", 54 | "def translate(image, x, y):\n", 55 | " # Define the translation matrix and perform the translation\n", 56 | " M = np.float32([[1, 0, x], [0, 1, y]])\n", 57 | " shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))\n", 58 | "\n", 59 | " # Return the translated image\n", 60 | " return shifted\n", 61 | "\n", 62 | "def rotate(image, angle, center = None, scale = 1.0):\n", 63 | " # Grab the dimensions of the image\n", 64 | " (h, w) = image.shape[:2]\n", 65 | "\n", 66 | " # If the center is None, initialize it as the center of the image\n", 67 | " if center is None:\n", 68 | " center = (w / 2, h / 2)\n", 69 | "\n", 70 | " # Perform the rotation\n", 71 | " M = cv2.getRotationMatrix2D(center, angle, scale)\n", 72 | " rotated = cv2.warpAffine(image, M, (w, h))\n", 73 | "\n", 74 | " # Return the rotated image\n", 75 | " return rotated\n", 76 | "\n", 77 | "def resize(image, width = None, height = None, inter = cv2.INTER_AREA):\n", 78 | " # initialize the dimensions of the image to be resized and grab the image size\n", 79 | " dim = None\n", 80 | " (h, w) = image.shape[:2]\n", 81 | "\n", 82 | " # if both the width and height are None, then return the original image\n", 83 | " if width is None and height is None:\n", 84 | " return image\n", 85 | "\n", 86 | " # check to see if the width is None\n", 87 | " if width is None:\n", 88 | " # calculate the ratio of the height and construct the dimensions\n", 89 | " r = height / float(h)\n", 90 | " dim = (int(w * r), height)\n", 91 | "\n", 92 | " # otherwise, the height is None\n", 93 | " else:\n", 94 | " # calculate the ratio of the width and construct the dimensions\n", 95 | " r = width / float(w)\n", 96 | " dim = (width, int(h * r))\n", 97 | "\n", 98 | " # resize the image\n", 99 | " resized = cv2.resize(image, dim, interpolation = inter)\n", 100 | "\n", 101 | " # return the resized image\n", 102 | " return resized" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "___\n", 110 | "## _modules/transform.py_" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "collapsed": true 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "# import the necessary packages\n", 122 | "import numpy as np\n", 123 | "import cv2\n", 124 | "\n", 125 | "def order_points(pts):\n", 126 | " # initialzie a list of coordinates that will be ordered such that the first entry in the list is the top-left,\n", 127 | " # the second entry is the top-right, the third is the bottom-right, and the fourth is the bottom-left\n", 128 | " rect = np.zeros((4, 2), dtype = \"float32\")\n", 129 | "\n", 130 | " # the top-left point will have the smallest sum, whereas the bottom-right point will have the largest sum\n", 131 | " s = pts.sum(axis = 1)\n", 132 | " rect[0] = pts[np.argmin(s)]\n", 133 | " rect[2] = pts[np.argmax(s)]\n", 134 | "\n", 135 | " # now, compute the difference between the points, \n", 136 | " # the top-right point will have the smallest difference, whereas the bottom-left will have the largest difference\n", 137 | " diff = np.diff(pts, axis = 1)\n", 138 | " rect[1] = pts[np.argmin(diff)]\n", 139 | " rect[3] = pts[np.argmax(diff)]\n", 140 | "\n", 141 | " # return the ordered coordinates\n", 142 | " return rect\n", 143 | "\n", 144 | "def four_point_transform(image, pts):\n", 145 | " # obtain a consistent order of the points and unpack them individually\n", 146 | " rect = order_points(pts)\n", 147 | " (tl, tr, br, bl) = rect\n", 148 | "\n", 149 | " # compute the width of the new image, which will be the maximum distance between bottom-right and bottom-left\n", 150 | " # x-coordiates or the top-right and top-left x-coordinates\n", 151 | " widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))\n", 152 | " widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))\n", 153 | " maxWidth = max(int(widthA), int(widthB))\n", 154 | "\n", 155 | " # compute the height of the new image, which will be the maximum distance between the top-right and bottom-right\n", 156 | " # y-coordinates or the top-left and bottom-left y-coordinates\n", 157 | " heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))\n", 158 | " heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))\n", 159 | " maxHeight = max(int(heightA), int(heightB))\n", 160 | "\n", 161 | " # now that we have the dimensions of the new image, construct the set of destination points to obtain a \"birds eye view\",\n", 162 | " # (i.e. top-down view) of the image, again specifying points in the top-left, top-right, bottom-right, and bottom-left order\n", 163 | " dst = np.array([\n", 164 | " [0, 0],\n", 165 | " [maxWidth - 1, 0],\n", 166 | " [maxWidth - 1, maxHeight - 1],\n", 167 | " [0, maxHeight - 1]], dtype = \"float32\")\n", 168 | "\n", 169 | " # compute the perspective transform matrix and then apply it\n", 170 | " M = cv2.getPerspectiveTransform(rect, dst)\n", 171 | " warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))\n", 172 | "\n", 173 | " # return the warped image\n", 174 | " return warped" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "___\n", 182 | "And then finally our main script which imports these above two scripts as modules.\n", 183 | "## _scan.py_" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": { 190 | "collapsed": true 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "# import the necessary packages\n", 195 | "from modules.transform import four_point_transform\n", 196 | "from modules import imutils\n", 197 | "from skimage.filter import threshold_adaptive\n", 198 | "import numpy as np\n", 199 | "import argparse\n", 200 | "import cv2\n", 201 | "\n", 202 | "# load the image and compute the ratio of the old height\n", 203 | "# to the new height, clone it, and resize it\n", 204 | "image = cv2.imread(\"images/doc.jpg\")\n", 205 | "ratio = image.shape[0] / 500.0\n", 206 | "orig = image.copy()\n", 207 | "image = imutils.resize(image, height = 500)\n", 208 | "\n", 209 | "# convert the image to grayscale, blur it, and find edges\n", 210 | "# in the image\n", 211 | "gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", 212 | "gray = cv2.GaussianBlur(gray, (5, 5), 0)\n", 213 | "edged = cv2.Canny(gray, 75, 200)\n", 214 | "\n", 215 | "# show the original image and the edge detected image\n", 216 | "print(\"STEP 1: Edge Detection\")\n", 217 | "cv2.imshow(\"Image\", image)\n", 218 | "cv2.imshow(\"Edged\", edged)\n", 219 | "cv2.waitKey(0)\n", 220 | "cv2.destroyAllWindows()\n", 221 | "\n", 222 | "# find the contours in the edged image, keeping only the\n", 223 | "# largest ones, and initialize the screen contour\n", 224 | "(_,cnts, _) = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)\n", 225 | "cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]\n", 226 | "\n", 227 | "# loop over the contours\n", 228 | "for c in cnts:\n", 229 | " # approximate the contour\n", 230 | " peri = cv2.arcLength(c, True)\n", 231 | " approx = cv2.approxPolyDP(c, 0.02 * peri, True)\n", 232 | "\n", 233 | " # if our approximated contour has four points, then we\n", 234 | " # can assume that we have found our screen\n", 235 | " if len(approx) == 4:\n", 236 | " screenCnt = approx\n", 237 | " break\n", 238 | "\n", 239 | "# show the contour (outline) of the piece of paper\n", 240 | "print(\"STEP 2: Find contours of paper\")\n", 241 | "cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)\n", 242 | "cv2.imshow(\"Outline\", image)\n", 243 | "cv2.waitKey(0)\n", 244 | "cv2.destroyAllWindows()\n", 245 | "\n", 246 | "# apply the four point transform to obtain a top-down\n", 247 | "# view of the original image\n", 248 | "warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)\n", 249 | "\n", 250 | "# convert the warped image to grayscale, then threshold it\n", 251 | "# to give it that 'black and white' paper effect\n", 252 | "warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)\n", 253 | "warped = threshold_adaptive(warped, 250, offset = 10)\n", 254 | "warped = warped.astype(\"uint8\") * 255\n", 255 | "\n", 256 | "# show the original and scanned images\n", 257 | "print(\"STEP 3: Apply perspective transform\")\n", 258 | "cv2.imshow(\"Original\", imutils.resize(orig, height = 650))\n", 259 | "cv2.imshow(\"Scanned\", imutils.resize(warped, height = 650))\n", 260 | "cv2.waitKey(0)\n" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "___\n", 268 | "### The resulting output of the subsequent steps are:\n", 269 | "\n", 270 | "" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "" 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "___" 292 | ] 293 | } 294 | ], 295 | "metadata": { 296 | "kernelspec": { 297 | "display_name": "Python 2", 298 | "language": "python", 299 | "name": "python2" 300 | }, 301 | "language_info": { 302 | "codemirror_mode": { 303 | "name": "ipython", 304 | "version": 2 305 | }, 306 | "file_extension": ".py", 307 | "mimetype": "text/x-python", 308 | "name": "python", 309 | "nbconvert_exporter": "python", 310 | "pygments_lexer": "ipython2", 311 | "version": "2.7.11" 312 | } 313 | }, 314 | "nbformat": 4, 315 | "nbformat_minor": 0 316 | } 317 | -------------------------------------------------------------------------------- /Face Detection using Haar Cascades .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Face Detection using Haar Cascades\n", 8 | "___" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "Here we're going to discuss an interesting and a classical application of Computer Vision: Object Detection.\n", 16 | "We'll do face and eye detection with OpenCV using Haar Cascades. In order to do object recognition/detection with cascade files, you first need cascade files. For the extremely popular tasks, these already exist. Detecting things like faces, eyes, smiles, cars, and license plates for example are all pretty popular and Haar Cascades of these are readily available.\n", 17 | "\n", 18 | "You can find such popular cascade files from https://github.com/Itseez/opencv/tree/master/data/haarcascades \n", 19 | "\n", 20 | "You can also use Google to find various Haar Cascades of things you may want to detect. We will use a Face cascade and Eye cascade. You can find a few more at the root directory of Haar cascades. Note the license for using/distributing these Haar Cascades.\n", 21 | "\n", 22 | "Now we are doing face and eye detection, and thus we need face and eye haar cascade files.\n", 23 | "\n", 24 | "You can download these files from: \n", 25 | "* https://github.com/Itseez/opencv/blob/master/data/haarcascades/haarcascade_frontalface_default.xml\n", 26 | "* https://github.com/Itseez/opencv/blob/master/data/haarcascades/haarcascade_eye.xml\n", 27 | "\n", 28 | "Just right click on raw and save the link in the same directory of your python file." 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "First we need to load the required XML classifiers. Then load our input image (or video) in grayscale mode." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "import numpy as np\n", 47 | "import cv2\n", 48 | "\n", 49 | "face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')\n", 50 | "eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')\n", 51 | "\n", 52 | "img = cv2.imread('ij.jpg')\n", 53 | "gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "Now we find the faces in the image. If faces are found, it returns the positions of detected faces as Rect(x,y,w,h).\n", 61 | "The function _detectMultiScale()_ basically finds faces in the images. You can find the full description on how the cascade classifier works, here: http://docs.opencv.org/2.4/modules/objdetect/doc/cascade_classification.html\n", 62 | "\n", 63 | "Once we get these locations, we can create a ROI for the face and apply eye detection on this ROI (since eyes are always on the face! ).\n", 64 | "Most eye detection uses the surrounding skin, eye lids, eye lashes, and eye brows to also make the detection.\n", 65 | "Thus, our next step is to break down the faces first, before getting to the eyes:\n" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": true 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "faces = face_cascade.detectMultiScale(gray, 1.3, 5)\n", 77 | "\n", 78 | "# break down the faces first, before getting to the eyes:\n", 79 | "\n", 80 | "for (x,y,w,h) in faces:\n", 81 | " cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)\n", 82 | " roi_gray = gray[y:y+h, x:x+w]\n", 83 | " roi_color = img[y:y+h, x:x+w]\n", 84 | "\n", 85 | " # Here, we're finding faces, their sizes, drawing rectangles, and noting the ROI.\n", 86 | " \n", 87 | "\n", 88 | " eyes = eye_cascade.detectMultiScale(roi_gray)\n", 89 | "\n", 90 | " # If we find those, we'll go ahead and make some more rectangles.\n", 91 | " for (ex,ey,ew,eh) in eyes:\n", 92 | " cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)\n", 93 | " \n", 94 | "cv2.imshow('img',img)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "**The Complete Code:**" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "import numpy as np\n", 113 | "import cv2\n", 114 | "\n", 115 | "face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')\n", 116 | "eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')\n", 117 | "\n", 118 | "\n", 119 | "img = cv2.imread('ij.jpg')\n", 120 | "#ret, img = cap.read()\n", 121 | "gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n", 122 | "faces = face_cascade.detectMultiScale(gray, 1.3, 5)\n", 123 | "\n", 124 | " # break down the faces first, before getting to the eyes:\n", 125 | "\n", 126 | "for (x,y,w,h) in faces:\n", 127 | " cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)\n", 128 | " roi_gray = gray[y:y+h, x:x+w]\n", 129 | " roi_color = img[y:y+h, x:x+w]\n", 130 | "\n", 131 | " # Here, we're finding faces, their sizes, drawing rectangles, and noting the ROI.\n", 132 | " # Next, we poke around for some eyes:\n", 133 | "\n", 134 | " eyes = eye_cascade.detectMultiScale(roi_gray)\n", 135 | " # If we find those, we'll go ahead and make some more rectangles.\n", 136 | " for (ex,ey,ew,eh) in eyes:\n", 137 | " cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)\n", 138 | " \n", 139 | "cv2.imshow('img',img)\n" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "The resulting images with face and eyes detected would look like:\n", 147 | "" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "___\n", 155 | "Pretty cool right? :D \n", 156 | "\n", 157 | "However you might have noticed sometimes some eyes in your images might have been left out. Also sometimes mouths might also be detected pretty often as eyes. Facial hair and other things can often fool this basic face detection, and even advanced. Skin color can also cause trouble, as we often try to simplify images as much as possible, thus losing a lot of color values.\n", 158 | "\n", 159 | "This was about face and eye recognition.\n", 160 | "If you want to take a step futher you can actually train your own classifier for any object like car, planes etc. you can use OpenCV to create one. Its full details are given here: Cascade Classifier Training.\n", 161 | "___\n" 162 | ] 163 | } 164 | ], 165 | "metadata": { 166 | "kernelspec": { 167 | "display_name": "Python 2", 168 | "language": "python", 169 | "name": "python2" 170 | }, 171 | "language_info": { 172 | "codemirror_mode": { 173 | "name": "ipython", 174 | "version": 2 175 | }, 176 | "file_extension": ".py", 177 | "mimetype": "text/x-python", 178 | "name": "python", 179 | "nbconvert_exporter": "python", 180 | "pygments_lexer": "ipython2", 181 | "version": "2.7.11" 182 | } 183 | }, 184 | "nbformat": 4, 185 | "nbformat_minor": 0 186 | } 187 | -------------------------------------------------------------------------------- /FloodFill.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# FloodFill demo\n", 8 | "
\n", 9 | "\n", 10 | "Click on the image to set seed point. This example imitates the FloodFill colour functionality of the MS Windows Paint program.\n", 11 | "\n", 12 | "#### Usage: \n", 13 | "\n", 14 | "Keys to handle the GUI functions:\n", 15 | "* f - toggle floating range\n", 16 | "* c - toggle 4/8 connectivity\n", 17 | "* ESC - exit" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "I used the following sample image and coloured it using the following script. The script outputs a GUI containing trackbars to choose the RGB value and also to set the resolution. You have to click on a portion in the image to 'floodfill' it with a colour corresponding to the chosen RGB value." 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "#!/usr/bin/env python\n", 43 | "# Python 2/3 compatibility\n", 44 | "from __future__ import print_function\n", 45 | "\n", 46 | "import numpy as np\n", 47 | "import cv2\n", 48 | "\n", 49 | "if __name__ == '__main__':\n", 50 | " import sys\n", 51 | " \n", 52 | " fn = 'floodfillshapes.png'\n", 53 | " img = cv2.imread(fn, True)\n", 54 | " if img is None:\n", 55 | " print('Failed to load image file:', fn)\n", 56 | " sys.exit(1)\n", 57 | "\n", 58 | " h, w = img.shape[:2]\n", 59 | " mask = np.zeros((h+2, w+2), np.uint8)\n", 60 | " seed_pt = None\n", 61 | " fixed_range = True\n", 62 | " connectivity = 4\n", 63 | "\n", 64 | " def update(dummy=None):\n", 65 | " if seed_pt is None:\n", 66 | " cv2.imshow('floodfill', img)\n", 67 | " return\n", 68 | " flooded = img.copy()\n", 69 | " mask[:] = 0\n", 70 | "\n", 71 | " # get current positions of trackbars\n", 72 | " r = cv2.getTrackbarPos('R','floodfill')\n", 73 | " g = cv2.getTrackbarPos('G','floodfill')\n", 74 | " b = cv2.getTrackbarPos('B','floodfill')\n", 75 | " \n", 76 | " lo = cv2.getTrackbarPos('lo', 'floodfill')\n", 77 | " hi = cv2.getTrackbarPos('hi', 'floodfill')\n", 78 | " flags = connectivity\n", 79 | " if fixed_range:\n", 80 | " flags |= cv2.FLOODFILL_FIXED_RANGE\n", 81 | " cv2.floodFill(flooded, mask, seed_pt, (b, g, r), (lo,)*3, (hi,)*3, flags)\n", 82 | " cv2.circle(flooded, seed_pt, 2, (0, 0, 255), -1)\n", 83 | " cv2.imshow('floodfill', flooded)\n", 84 | "\n", 85 | " def onmouse(event, x, y, flags, param):\n", 86 | " global seed_pt\n", 87 | " if flags & cv2.EVENT_FLAG_LBUTTON:\n", 88 | " seed_pt = x, y\n", 89 | " update()\n", 90 | "\n", 91 | " update()\n", 92 | " cv2.setMouseCallback('floodfill', onmouse)\n", 93 | "\n", 94 | " cv2.createTrackbar('R','floodfill',0,255,update)\n", 95 | " cv2.createTrackbar('G','floodfill',0,255,update)\n", 96 | " cv2.createTrackbar('B','floodfill',0,255,update)\n", 97 | "\n", 98 | " cv2.createTrackbar('lo', 'floodfill', 20, 255, update)\n", 99 | " cv2.createTrackbar('hi', 'floodfill', 20, 255, update)\n", 100 | "\n", 101 | " while True:\n", 102 | " ch = 0xFF & cv2.waitKey()\n", 103 | " if ch == 27:\n", 104 | " break\n", 105 | " if ch == ord('f'):\n", 106 | " fixed_range = not fixed_range\n", 107 | " print('using %s range' % ('floating', 'fixed')[fixed_range])\n", 108 | " update()\n", 109 | " if ch == ord('c'):\n", 110 | " connectivity = 12-connectivity\n", 111 | " print('connectivity =', connectivity)\n", 112 | " update()\n", 113 | " cv2.destroyAllWindows()\n" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "The output looks like this : \n", 121 | " \n", 122 | "" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "
" 130 | ] 131 | } 132 | ], 133 | "metadata": { 134 | "kernelspec": { 135 | "display_name": "Python 2", 136 | "language": "python", 137 | "name": "python2" 138 | }, 139 | "language_info": { 140 | "codemirror_mode": { 141 | "name": "ipython", 142 | "version": 2 143 | }, 144 | "file_extension": ".py", 145 | "mimetype": "text/x-python", 146 | "name": "python", 147 | "nbconvert_exporter": "python", 148 | "pygments_lexer": "ipython2", 149 | "version": "2.7.12" 150 | } 151 | }, 152 | "nbformat": 4, 153 | "nbformat_minor": 2 154 | } 155 | -------------------------------------------------------------------------------- /Human Detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Human Detection\n", 8 | "___" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "Here we would see how we can use computer vision to carry out the task of human or pedestrian detection.\n", 16 | "\n", 17 | "OpenCV has a built-in pre-trained HOG + Linear SVM model that can be used to perform human detection in both images and video streams. We implement the non-maxima suppression (NMS) algorithm which in short takes multiple, overlapping bounding boxes and reduces them to only a single bounding box; so as to reduce the number of false-positives reported by the final object detector. The following code performs the task of pedestrian detection:" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": { 24 | "collapsed": true 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "# import the necessary packages\n", 29 | "from __future__ import print_function\n", 30 | "from imutils.object_detection import non_max_suppression\n", 31 | "from imutils import paths\n", 32 | "import numpy as np\n", 33 | "import argparse\n", 34 | "import imutils\n", 35 | "import cv2\n", 36 | "\n", 37 | "# initialize the HOG descriptor/person detector\n", 38 | "hog = cv2.HOGDescriptor()\n", 39 | "hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())\n", 40 | "\n", 41 | "# loop over the image paths\n", 42 | "imagePaths = list(paths.list_images(\"images\"))\n", 43 | "\n", 44 | "for imagePath in imagePaths:\n", 45 | " # load the image and resize it to (1) reduce detection time\n", 46 | " # and (2) improve detection accuracy\n", 47 | " image = cv2.imread(imagePath)\n", 48 | " image = imutils.resize(image, width=min(400, image.shape[1]))\n", 49 | " orig = image.copy()\n", 50 | "\n", 51 | " # detect people in the image\n", 52 | " (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4),\n", 53 | " padding=(8, 8), scale=1.05)\n", 54 | "\n", 55 | " # draw the original bounding boxes\n", 56 | " for (x, y, w, h) in rects:\n", 57 | " cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)\n", 58 | "\n", 59 | " # apply non-maxima suppression to the bounding boxes using a\n", 60 | " # fairly large overlap threshold to try to maintain overlapping\n", 61 | " # boxes that are still people\n", 62 | " rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])\n", 63 | " pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)\n", 64 | "\n", 65 | " # draw the final bounding boxes\n", 66 | " for (xA, yA, xB, yB) in pick:\n", 67 | " cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)\n", 68 | "\n", 69 | " # show some information on the number of bounding boxes\n", 70 | " filename = imagePath[imagePath.rfind(\"/\") + 1:]\n", 71 | " print(\"[INFO] {}: {} original boxes, {} after suppression\".format(\n", 72 | " filename, len(rects), len(pick)))\n", 73 | "\n", 74 | " # show the output images\n", 75 | " cv2.imshow(\"Before NMS\", orig)\n", 76 | " cv2.imshow(\"After NMS\", image)\n", 77 | " cv2.waitKey(0)\n" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "First after importing the required modules and loading our folder that contains the pedestrian images, we initialize our pedestrian detector. We make a call to _hog = cv2.HOGDescriptor()_ which initializes the Histogram of Oriented Gradients descriptor. Then, we call the _setSVMDetector_ to set the Support Vector Machine to be pre-trained pedestrian detector, loaded via the cv2.HOGDescriptor_getDefaultPeopleDetector() function.\n", 85 | "\n", 86 | "We loop over the images in our images directory. You can download the datset of images with pedestrian from the popular INRIA Person Dataset (specifically, from the GRAZ-01 subset).\n", 87 | "\n", 88 | "From there, we handle loading our image off disk and resizing it to have a maximum width of 400 pixels. We reduce our image dimensions because:\n", 89 | "\n", 90 | "1. Reducing image size ensures that less sliding windows in the image pyramid need to be evaluated (i.e., have HOG features extracted from and then passed on to the Linear SVM), thus reducing detection time (and increasing overall detection throughput).\n", 91 | "2. Resizing our image also improves the overall accuracy of our pedestrian detection (i.e., less false-positives).\n", 92 | "\n", 93 | "We use the detectMultiScale method of the HOG descriptor that constructs an image pyramid with scale=1.05 and a sliding window step size of (4, 4) pixels in both the x and y direction respectively. The size of the sliding window is fixed at 32 x 128 pixels. The detectMultiScale function returns a 2-tuple of rects , or the bounding box (x, y)-coordinates of each person in the image, and weights , the confidence value returned by the SVM for each detection.\n", 94 | "\n", 95 | "A larger scale size will evaluate less layers in the image pyramid which can make the algorithm faster to run. However, having too large of a scale (i.e., less layers in the image pyramid) can lead to pedestrians not being detected. Similarly, having too small of a scale size dramatically increases the number of image pyramid layers that need to be evaluated. Not only can this be computationally expecnsive, it can also increase the number of false-positives detected by the pedestrian detector.\n", 96 | "\n", 97 | "Then we take our initial bounding boxes and draw them on our image. However, for some images you’ll notice that there are multiple, overlapping bounding boxes detected for each person. In this case, we have two options. We can detect if one bounding box is fully contained within another. Or we can apply non-maxima suppression and suppress bounding boxes that overlap with a significant threshold.\n", 98 | "\n", 99 | "After applying non-maxima suppression, we draw the finalized bounding boxes, display some basic information about the image and number of bounding boxes, and finally display our output images to our screen.\n", 100 | "___" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "The resulting output before and after non-maximum suppression of two sample images from the dataset looks like:\n", 108 | " \n", 109 | "\n", 110 | "\n", 111 | "The above image serves an example of why applying non-maxima suppression is important. The detectMultiScale function falsely detected two bounding boxes (along with the correct bounding box), both overlapping the true person in the image. By applying non-maxima suppression we were able to suppress the extraneous bounding boxes, leaving us with the true detection\n", 112 | "___\n", 113 | "\n", 114 | "\n", 115 | "Here our HOG method is able to detect the people. The larger overlapThresh in the non_maxima_suppression function ensures that the bounding boxes are not suppressed, even though they do partially overlap." 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "___" 123 | ] 124 | } 125 | ], 126 | "metadata": { 127 | "kernelspec": { 128 | "display_name": "Python 2", 129 | "language": "python", 130 | "name": "python2" 131 | }, 132 | "language_info": { 133 | "codemirror_mode": { 134 | "name": "ipython", 135 | "version": 2 136 | }, 137 | "file_extension": ".py", 138 | "mimetype": "text/x-python", 139 | "name": "python", 140 | "nbconvert_exporter": "python", 141 | "pygments_lexer": "ipython2", 142 | "version": "2.7.11" 143 | } 144 | }, 145 | "nbformat": 4, 146 | "nbformat_minor": 0 147 | } 148 | -------------------------------------------------------------------------------- /Optical Character Recognition using K Nearest Neighbours.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": true, 7 | "editable": true 8 | }, 9 | "source": [ 10 | "# Optical Character Recognition using K Nearest Neighbours\n", 11 | "___" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "deletable": true, 18 | "editable": true 19 | }, 20 | "source": [ 21 | "## The Problem:\n", 22 | "We will use our knowledge on kNN to build a basic OCR application. The goal of our OCR application is to read and recognize the handwritten digits.\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "deletable": true, 29 | "editable": true 30 | }, 31 | "source": [ 32 | "## The Dataset:\n", 33 | "The dataset required to deal with the problem is already available in OpenCV docs which is an image file named as digits.png (in the folder opencv/samples/python2/data/) which has 5000 handwritten digits (500 for each digit). Each digit is a 20x20 image.\n", 34 | "\n", 35 | "You can download the digits.png file from https://github.com/Itseez/opencv/blob/master/samples/data/digits.png by just right-clicking raw and saving the link in your project directory.\n", 36 | "\n", 37 | "After downloading the file we have to split our data into train_data and test_data. So our first step is to split this image into 5000 different digits. For each digit, we flatten it into a single row with 400 pixels. That is our feature set, ie intensity values of all pixels. It is the simplest feature set we can create. We use first 250 samples of each digit as train_data, and next 250 samples as test_data." 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": { 43 | "deletable": true, 44 | "editable": true 45 | }, 46 | "source": [ 47 | "## Prcoessing the data:" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 12, 53 | "metadata": { 54 | "collapsed": false, 55 | "deletable": true, 56 | "editable": true 57 | }, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": [ 62 | "" 63 | ] 64 | }, 65 | "execution_count": 12, 66 | "metadata": {}, 67 | "output_type": "execute_result" 68 | } 69 | ], 70 | "source": [ 71 | "import cv2\n", 72 | "#cv2.__version__\n", 73 | "cv2.ml.KNearest_create" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "collapsed": false, 81 | "deletable": true, 82 | "editable": true 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "import numpy as np\n", 87 | "import cv2\n", 88 | "cv2.ml.KNearest_create()\n", 89 | "from matplotlib import pyplot as plt\n", 90 | "img = cv2.imread('datasets/digits.png')\n", 91 | "gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)\n", 92 | "\n", 93 | "# Now we split the image to 5000 cells, each 20x20 size\n", 94 | "cells = [np.hsplit(row,100) for row in np.vsplit(gray,50)]\n", 95 | "\n", 96 | "# Make it into a Numpy array. It size will be (50,100,20,20)\n", 97 | "x = np.array(cells)\n", 98 | "\n", 99 | "# Now we prepare train_data and test_data.\n", 100 | "train = x[:,:50].reshape(-1,400).astype(np.float32) # Size = (2500,400)\n", 101 | "test = x[:,50:100].reshape(-1,400).astype(np.float32) # Size = (2500,400)\n", 102 | "\n", 103 | "# Create labels for train and test data\n", 104 | "k = np.arange(10)\n", 105 | "train_labels = np.repeat(k,250)[:,np.newaxis]\n", 106 | "test_labels = train_labels.copy()\n", 107 | "\n", 108 | "# Initiate kNN, train the data, then test it with test data for k=1\n", 109 | "knn = cv2.KNearest()\n", 110 | "knn.train(train,train_labels)\n", 111 | "ret,result,neighbours,dist = knn.find_nearest(test,k=5)\n", 112 | "\n", 113 | "# Now we check the accuracy of classification\n", 114 | "# For that, compare the result with test_labels and check which are wrong\n", 115 | "matches = result==test_labels\n", 116 | "correct = np.count_nonzero(matches)\n", 117 | "accuracy = correct*100.0/result.size\n", 118 | "print accuracy\n" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": { 124 | "deletable": true, 125 | "editable": true 126 | }, 127 | "source": [ 128 | "So our basic OCR app is ready. This particular example gave me an accuracy of 91%. One option improve accuracy is to add more data for training, especially the wrong ones. So instead of finding this training data everytime I start application, I better save it, so that next time, I directly read this data from a file and start classification. You can do it with the help of some Numpy functions like np.savetxt, np.savez, np.load etc. Please check their docs for more details." 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": true, 136 | "deletable": true, 137 | "editable": true 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "np.savez('knn_data.npz',train=train, train_labels=train_labels)\n", 142 | "# Now load the data\n", 143 | "with np.load('knn_data.npz') as data:\n", 144 | "print data.files\n", 145 | "train = data['train']\n", 146 | "train_labels = data['train_labels']" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": { 152 | "deletable": true, 153 | "editable": true 154 | }, 155 | "source": [ 156 | "## OCR of English Alphabets\n", 157 | "\n", 158 | "Next we will do the same for English alphabets, but there is a slight change in data and feature set. Here, instead of images, OpenCV comes with a data file, letter-recognition.data in opencv/samples/cpp/ folder. If you open it, you will see 20000 lines which may, on first sight, look like garbage. Actually, in each row, first column is an alphabet which is our label. Next 16 numbers following it are its different features. These features are obtained from UCI Machine Learning Repository. You can find the details of these features and can download the dataset from http://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/.\n", 159 | "\n", 160 | "There are 20000 samples available, so we take first 10000 data as training samples and remaining 10000 as test samples. We should change the alphabets to ascii characters because we can't work with alphabets directly." 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": { 167 | "collapsed": true, 168 | "deletable": true, 169 | "editable": true 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "import cv2\n", 174 | "import numpy as np\n", 175 | "import matplotlib.pyplot as plt\n", 176 | "\n", 177 | "# Load the data, converters convert the letter to a number\n", 178 | "data= np.loadtxt('letter-recognition.data', dtype= 'float32', delimiter = ',',\n", 179 | "converters= {0: lambda ch: ord(ch)-ord('A')})\n", 180 | "\n", 181 | "# split the data to two, 10000 each for train and test\n", 182 | "train, test = np.vsplit(data,2)\n", 183 | "\n", 184 | "# split trainData and testData to features and responses\n", 185 | "responses, trainData = np.hsplit(train,[1])\n", 186 | "labels, testData = np.hsplit(test,[1])\n", 187 | "\n", 188 | "# Initiate the kNN, classify, measure accuracy.\n", 189 | "knn = cv2.KNearest()\n", 190 | "knn.train(trainData, responses)\n", 191 | "ret, result, neighbours, dist = knn.find_nearest(testData, k=5)\n", 192 | "correct = np.count_nonzero(result == labels)\n", 193 | "accuracy = correct*100.0/10000\n", 194 | "print accuracy" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": { 200 | "deletable": true, 201 | "editable": true 202 | }, 203 | "source": [ 204 | "It gives me an accuracy of 93.22%. Again, if you want to increase accuracy, you can iteratively add error data in each\n", 205 | "level.\n", 206 | "___" 207 | ] 208 | } 209 | ], 210 | "metadata": { 211 | "kernelspec": { 212 | "display_name": "Python 2", 213 | "language": "python", 214 | "name": "python2" 215 | }, 216 | "language_info": { 217 | "codemirror_mode": { 218 | "name": "ipython", 219 | "version": 2 220 | }, 221 | "file_extension": ".py", 222 | "mimetype": "text/x-python", 223 | "name": "python", 224 | "nbconvert_exporter": "python", 225 | "pygments_lexer": "ipython2", 226 | "version": "2.7.13rc1" 227 | } 228 | }, 229 | "nbformat": 4, 230 | "nbformat_minor": 0 231 | } 232 | -------------------------------------------------------------------------------- /Optical Character Recognition using Support Vector Machines.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Optical Character Recognition using Support Vector Machines\n", 8 | "___" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "## OCR of Hand-written Digits\n", 16 | "\n", 17 | "In kNN, we directly used pixel intensity as the feature vector. This time we will use Histogram of Oriented Gradients (HOG) as feature vectors.\n", 18 | "\n", 19 | "Here, before finding the HOG, we deskew the image using its second order moments. So we first define a function deskew() which takes a digit image and deskew it. Below is the deskew() function:" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "collapsed": false 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "def deskew(img):\n", 31 | " m = cv2.moments(img)\n", 32 | " if abs(m['mu02']) < 1e-2:\n", 33 | " return img.copy()\n", 34 | " skew = m['mu11']/m['mu02']\n", 35 | " M = np.float32([[1, skew, -0.5*SZ*skew], [0, 1, 0]])\n", 36 | " img = cv2.warpAffine(img,M,(SZ, SZ),flags=affine_flags)\n", 37 | " return img" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "Below image shows above deskew function applied to an image of zero. Left image is the original image and right\n", 45 | "image is the deskewed image.\n", 46 | "\n", 47 | "" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "Next we have to find the HOG Descriptor of each cell. For that, we find Sobel derivatives of each cell in X and Y\n", 55 | "direction. Then find their magnitude and direction of gradient at each pixel. This gradient is quantized to 16 integer\n", 56 | "values. Divide this image to four sub-squares. For each sub-square, calculate the histogram of direction (16 bins)\n", 57 | "weighted with their magnitude. So each sub-square gives you a vector containing 16 values. Four such vectors (of\n", 58 | "four sub-squares) together gives us a feature vector containing 64 values. This is the feature vector we use to train our\n", 59 | "data." 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "def hog(img):\n", 71 | " gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)\n", 72 | " gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)\n", 73 | " mag, ang = cv2.cartToPolar(gx, gy)\n", 74 | " # quantizing binvalues in (0...16)\n", 75 | " bins = np.int32(bin_n*ang/(2*np.pi))\n", 76 | " # Divide to 4 sub-squares\n", 77 | " bin_cells = bins[:10,:10], bins[10:,:10], bins[:10,10:], bins[10:,10:]\n", 78 | " mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]\n", 79 | " hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]\n", 80 | " hist = np.hstack(hists)\n", 81 | " return hist" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "Finally, as in the previous case, we start by splitting our big dataset into individual cells. For every digit, 250 cells are\n", 89 | "reserved for training data and remaining 250 data is reserved for testing. Full code is given below:" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": false 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "import cv2\n", 101 | "import numpy as np\n", 102 | "SZ=20\n", 103 | "bin_n = 16 # Number of bins\n", 104 | "svm_params = dict( kernel_type = cv2.SVM_LINEAR, svm_type = cv2.SVM_C_SVC, C=2.67, gamma=5.383 )\n", 105 | "affine_flags = cv2.WARP_INVERSE_MAP|cv2.INTER_LINEAR\n", 106 | "\n", 107 | "def deskew(img):\n", 108 | " m = cv2.moments(img)\n", 109 | " if abs(m['mu02']) < 1e-2:\n", 110 | " return img.copy()\n", 111 | " skew = m['mu11']/m['mu02']\n", 112 | " M = np.float32([[1, skew, -0.5*SZ*skew], [0, 1, 0]])\n", 113 | " img = cv2.warpAffine(img,M,(SZ, SZ),flags=affine_flags)\n", 114 | " return img\n", 115 | "\n", 116 | "def hog(img):\n", 117 | " gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)\n", 118 | " gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)\n", 119 | " mag, ang = cv2.cartToPolar(gx, gy)\n", 120 | " bins = np.int32(bin_n*ang/(2*np.pi)) # quantizing binvalues in (0...16)\n", 121 | " bin_cells = bins[:10,:10], bins[10:,:10], bins[:10,10:], bins[10:,10:]\n", 122 | " mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]\n", 123 | " hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]\n", 124 | " hist = np.hstack(hists) # hist is a 64 bit vector\n", 125 | " return hist\n", 126 | "\n", 127 | "img = cv2.imread('digits.png',0)\n", 128 | "cells = [np.hsplit(row,100) for row in np.vsplit(img,50)]\n", 129 | "\n", 130 | "# First half is trainData, remaining is testData\n", 131 | "train_cells = [ i[:50] for i in cells ]\n", 132 | "test_cells = [ i[50:] for i in cells]\n", 133 | "\n", 134 | "###### Now training ########################\n", 135 | "deskewed = [map(deskew,row) for row in train_cells]\n", 136 | "hogdata = [map(hog,row) for row in deskewed]\n", 137 | "trainData = np.float32(hogdata).reshape(-1,64)\n", 138 | "responses = np.float32(np.repeat(np.arange(10),250)[:,np.newaxis])\n", 139 | "svm = cv2.SVM()\n", 140 | "svm.train(trainData,responses, params=svm_params)\n", 141 | "svm.save('svm_data.dat')\n", 142 | "\n", 143 | "###### Now testing ########################\n", 144 | "deskewed = [map(deskew,row) for row in test_cells]\n", 145 | "hogdata = [map(hog,row) for row in deskewed]\n", 146 | "testData = np.float32(hogdata).reshape(-1,bin_n*4)\n", 147 | "result = svm.predict_all(testData)\n", 148 | "\n", 149 | "####### Check Accuracy ########################\n", 150 | "mask = result==responses\n", 151 | "correct = np.count_nonzero(mask)\n", 152 | "print correct*100.0/result.size" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "This particular technique gave me nearly 94% accuracy. You can try different values for various parameters of SVM\n", 160 | "to check if higher accuracy is possible. Or you can read technical papers on this area and try to implement them." 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "___" 168 | ] 169 | } 170 | ], 171 | "metadata": { 172 | "kernelspec": { 173 | "display_name": "Python 2", 174 | "language": "python", 175 | "name": "python2" 176 | }, 177 | "language_info": { 178 | "codemirror_mode": { 179 | "name": "ipython", 180 | "version": 2 181 | }, 182 | "file_extension": ".py", 183 | "mimetype": "text/x-python", 184 | "name": "python", 185 | "nbconvert_exporter": "python", 186 | "pygments_lexer": "ipython2", 187 | "version": "2.7.11" 188 | } 189 | }, 190 | "nbformat": 4, 191 | "nbformat_minor": 0 192 | } 193 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Computer-Vision 2 | 3 | This repository contains computer vision notebooks from my website [appliedprogramming.net](http://www.appliedprogramming.net/computer-vision/home.html). 4 | 5 | | Computer Vision | 6 | |-----------------------| 7 | |[1. Basics](http://www.appliedprogramming.net/computer-vision/basics.html)| 8 | |[2. OpenCV - Basics](http://www.appliedprogramming.net/computer-vision/opencvbasics.html)| 9 | |[3. Operations on Images](http://www.appliedprogramming.net/computer-vision/imageoperations.html)| 10 | |[4. Image Processing](http://www.appliedprogramming.net/computer-vision/imageprocessing.html)| 11 | |[5. Feature Detection](http://www.appliedprogramming.net/computer-vision/featuredetection.html)| 12 | |[6. Video Analysis](http://www.appliedprogramming.net/computer-vision/videoanalysis.html)| 13 | |[7. Camera Calibration and 3D Reconstruction](http://www.appliedprogramming.net/computer-vision/cameracalibration.html)| 14 | |[Texture flow](http://www.appliedprogramming.net/computer-vision/textureflow.html)| 15 | |[Flood-fill demo](http://www.appliedprogramming.net/computer-vision/floodfill.html)| 16 | |[Object Detection and Path Planning](http://www.appliedprogramming.net/computer-vision/pathplanning.html)| 17 | |[Barcode Detector](http://www.appliedprogramming.net/computer-vision/barcodedetection.html)| 18 | |[Face Detection using Haar Cascades](http://www.appliedprogramming.net/computer-vision/facedetection.html)| 19 | |[Human Detection](http://www.appliedprogramming.net/computer-vision/humandetection.html)| 20 | |[Digit Recognition](http://www.appliedprogramming.net/computer-vision/digitrecognition.html)| 21 | |[Optical Character Recognition using K Nearest Neighbours](http://www.appliedprogramming.net/computer-vision/ocr-using-k-nearest-neighbours.html)| 22 | |[Optical Character Recognition using Support Vector Machines](http://www.appliedprogramming.net/computer-vision/ocr-using-support-vector-machines.html)| 23 | |[Shape Detection](http://www.appliedprogramming.net/computer-vision/shapedetection.html)| 24 | |[Zooming](http://www.appliedprogramming.net/computer-vision/zooming.html)| 25 | 26 |
-------------------------------------------------------------------------------- /Shape Detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Shape Detectors\n", 8 | "___\n" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "In this example, we would make a shape detector using OpenCV and Python.\n", 16 | "Following as you will see, we will use the _detect.py_ main python script that we’ll use to load an image, analyze it for shapes, and then perform shape detection and identification via the ShapeDetector class from the _shapedetector.py_ script." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "Given below is the _shapedetector.py_ script. Store this script in a folder named modules inside your project directory. " 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## _modules/shapedetector.py_" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "# import the necessary packages\n", 42 | "import cv2\n", 43 | "\n", 44 | "class ShapeDetector:\n", 45 | " def __init__(self):\n", 46 | " pass\n", 47 | "\n", 48 | " def detect(self, c):\n", 49 | " # initialize the shape name and approximate the contour\n", 50 | " shape = \"unidentified\"\n", 51 | " peri = cv2.arcLength(c, True)\n", 52 | " approx = cv2.approxPolyDP(c, 0.04 * peri, True)\n", 53 | "\n", 54 | " # if the shape is a triangle, it will have 3 vertices\n", 55 | " if len(approx) == 3:\n", 56 | " shape = \"triangle\"\n", 57 | "\n", 58 | " # if the shape has 4 vertices, it is either a square or a rectangle\n", 59 | " elif len(approx) == 4:\n", 60 | " # compute the bounding box of the contour and use the bounding box to compute the aspect ratio\n", 61 | " (x, y, w, h) = cv2.boundingRect(approx)\n", 62 | " ar = w / float(h)\n", 63 | "\n", 64 | " # a square will have an aspect ratio that is approximately equal to one, otherwise, the shape is a rectangle\n", 65 | " shape = \"square\" if ar >= 0.95 and ar <= 1.05 else \"rectangle\"\n", 66 | "\n", 67 | " # if the shape is a pentagon, it will have 5 vertices\n", 68 | " elif len(approx) == 5:\n", 69 | " shape = \"pentagon\"\n", 70 | "\n", 71 | " # otherwise, we assume the shape is a circle\n", 72 | " else:\n", 73 | " shape = \"circle\"\n", 74 | "\n", 75 | " # return the name of the shape\n", 76 | " return shape" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "To perform shape detection, we’ll be using contour approximation. The _ShapeDetector_ class inside this script has a _detect_ method that takes one argument 'c' , the contour (i.e., outline) of the shape we are trying to identify.\n", 84 | "\n", 85 | "Contour approximation can be implemented by using the predefined OpenCV method _cv2.approxPolyDP()_. Common values for the second parameter to cv2.approxPolyDP are normally in the range of 1-5% of the original contour perimeter. \n", 86 | "\n", 87 | "Here in the code, we first compute the perimeter of the contour, followed by constructing the actual contour approximation.\n", 88 | "And thereafter, we can move on to performing shape detection. \n", 89 | "\n", 90 | "A contour consists of a list of vertices. We can check the number of entries in this list to determine the shape of an object.\n", 91 | "For example, if the approximated contour has three vertices, then it must be a triangle. If a contour has four vertices, then it must be either a square or a rectangle. To determine which, we compute the aspect ratio of the shape, which is simply the width of the contour bounding box divided by the height. If the aspect ratio is ~1.0, then we are examining a square (since all sides have approximately equal length). Otherwise, the shape is a rectangle. Labelling a shape as a pentagon is straightforward, we just have to detect it has 5 sides. Ultimately, by process of elimination, we can assume that the analyzed shape is a circle.\n", 92 | "\n", 93 | "Finally, we return the detected shape to the calling method.\n", 94 | "___" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "We would use the shapedetector script as a module and import it. Given below is the code of our main script _detect()_ that would use the ShapeDetector class from our shapedetector script.\n", 102 | "\n", 103 | "## _detect()_" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "collapsed": true 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "# import the necessary packages\n", 115 | "from modules.shapedetector import ShapeDetector\n", 116 | "import argparse\n", 117 | "import imutils\n", 118 | "import numpy as np\n", 119 | "import cv2\n", 120 | "\n", 121 | "# load the image and resize it to a smaller factor so that the shapes can be approximated better\n", 122 | "image = cv2.imread(\"images/shapes_and_colors.png\")\n", 123 | "resized = imutils.resize(image, width=300)\n", 124 | "ratio = (image.shape[0] / float(resized.shape[0]))\n", 125 | "\n", 126 | "# convert the resized image to grayscale, blur it slightly, and threshold it\n", 127 | "gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)\n", 128 | "blurred = cv2.GaussianBlur(gray, (5, 5), 0)\n", 129 | "thresh = cv2.threshold(blurred, 60, 255, cv2.THRESH_BINARY)[1]\n", 130 | "\n", 131 | "# find contours in the thresholded image and initialize the shape detector\n", 132 | "cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)\n", 133 | "cnts = cnts[0] if imutils.is_cv2() else cnts[1]\n", 134 | "sd = ShapeDetector()\n", 135 | "\n", 136 | "# loop over the contours\n", 137 | "for c in cnts:\n", 138 | " # compute the center of the contour, then detect the name of the ashape using only the contour\n", 139 | " M = cv2.moments(c)\n", 140 | " cX = int((M[\"m10\"] / M[\"m00\"]) * ratio)\n", 141 | " cY = int((M[\"m01\"] / M[\"m00\"]) * ratio)\n", 142 | " shape = sd.detect(c)\n", 143 | "\n", 144 | " # multiply the contour (x, y)-coordinates by the resize ratio,\n", 145 | " # then draw the contours and the name of the shape on the image\n", 146 | " c *= np.int32(ratio)\n", 147 | " cv2.drawContours(image, [c], -1, (0, 255, 0), 2)\n", 148 | " cv2.putText(image, shape, (cX, cY), cv2.FONT_HERSHEY_SIMPLEX,\n", 149 | " 0.5, (255, 255, 255), 2)\n", 150 | "\n", 151 | " # show the output image\n", 152 | " cv2.imshow(\"Image\", image)\n", 153 | " cv2.waitKey(0)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "Here we first load our image and then resize it. We keep track of the ratio of the old height to the new resized height.\n", 161 | "From there we perform basic image processing: converting the resized image to grayscale, smoothing it to reduce high frequency noise, and finally thresholding it to reveal the shapes in the image.\n", 162 | "\n", 163 | "Lastly, we find contours in our threshold image, handle grabbing the correct tuple value from _cv2.findContours()_, and finally initialize our ShapeDetector:\n", 164 | "We start looping over each of the individual contours. For each of them, we compute the center of the contour, followed by performing shape detection and labeling.\n", 165 | "\n", 166 | "Since we are processing the contours extracted from the resized image (rather than the original image), we need to multiply the contours and center (x, y)-coordinates by our resize ratio. This will give us the correct (x, y)-coordinates for both the contours and centroid of the original image.\n", 167 | "\n", 168 | "Lastly, we draw the contours and the labeled shape on our image, followed by displaying our results." 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "I used the following image for testing the code. Feel free to make your own images with shapes to try and test it.\n", 176 | "\n", 177 | "" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "The resulting output with the threshold image and the final image with all the shapes detected looked like: \n", 185 | "\n", 186 | "\n" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "___\n", 194 | "\n", 195 | "" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "___" 203 | ] 204 | } 205 | ], 206 | "metadata": { 207 | "kernelspec": { 208 | "display_name": "Python 2", 209 | "language": "python", 210 | "name": "python2" 211 | }, 212 | "language_info": { 213 | "codemirror_mode": { 214 | "name": "ipython", 215 | "version": 2 216 | }, 217 | "file_extension": ".py", 218 | "mimetype": "text/x-python", 219 | "name": "python", 220 | "nbconvert_exporter": "python", 221 | "pygments_lexer": "ipython2", 222 | "version": "2.7.11" 223 | } 224 | }, 225 | "nbformat": 4, 226 | "nbformat_minor": 0 227 | } 228 | -------------------------------------------------------------------------------- /Texture Flow.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Texture flow direction estimation.\n", 8 | "\n", 9 | "
\n", 10 | "\n", 11 | "This example shows how we can use cv2.cornerEigenValsAndVecs function to estimate image texture flow direction.\n", 12 | "\n", 13 | "Here's our input image:" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "![Input_Image](https://raw.githubusercontent.com/Applied-Programming/Computer-Vision/master/images/starry_night.jpg)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "# Python 2/3 compatibility\n", 32 | "from __future__ import print_function\n", 33 | "\n", 34 | "import numpy as np\n", 35 | "import cv2\n", 36 | "\n", 37 | "if __name__ == '__main__':\n", 38 | " import sys\n", 39 | " fn = 'images/starry_night.jpg'\n", 40 | "\n", 41 | " img = cv2.imread(fn)\n", 42 | " if img is None:\n", 43 | " print('Failed to load image file:', fn)\n", 44 | " sys.exit(1)\n", 45 | "\n", 46 | " gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n", 47 | " h, w = img.shape[:2]\n", 48 | "\n", 49 | " eigen = cv2.cornerEigenValsAndVecs(gray, 15, 3)\n", 50 | " eigen = eigen.reshape(h, w, 3, 2) # [[e1, e2], v1, v2]\n", 51 | " flow = eigen[:,:,2]\n", 52 | "\n", 53 | " vis = img.copy()\n", 54 | " vis[:] = (192 + np.uint32(vis)) / 2\n", 55 | " d = 12\n", 56 | " points = np.dstack( np.mgrid[d/2:w:d, d/2:h:d] ).reshape(-1, 2)\n", 57 | " for x, y in np.int32(points):\n", 58 | " vx, vy = np.int32(flow[y, x]*d)\n", 59 | " cv2.line(vis, (x-vx, y-vy), (x+vx, y+vy), (0, 0, 0), 1, cv2.LINE_AA)\n", 60 | " cv2.imshow('input', img)\n", 61 | " cv2.imshow('flow', vis)\n", 62 | " cv2.waitKey()" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "By running the script, we get the following output image which shows the texture flow as we intended." 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "![Output](https://raw.githubusercontent.com/Applied-Programming/Computer-Vision/master/results/textureflow.jpg)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "
" 84 | ] 85 | } 86 | ], 87 | "metadata": { 88 | "kernelspec": { 89 | "display_name": "Python 2", 90 | "language": "python", 91 | "name": "python2" 92 | }, 93 | "language_info": { 94 | "codemirror_mode": { 95 | "name": "ipython", 96 | "version": 2 97 | }, 98 | "file_extension": ".py", 99 | "mimetype": "text/x-python", 100 | "name": "python", 101 | "nbconvert_exporter": "python", 102 | "pygments_lexer": "ipython2", 103 | "version": "2.7.12" 104 | } 105 | }, 106 | "nbformat": 4, 107 | "nbformat_minor": 2 108 | } 109 | -------------------------------------------------------------------------------- /Zooming.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": true, 7 | "editable": true 8 | }, 9 | "source": [ 10 | "# Zooming\n", 11 | "
\n", 12 | "\n", 13 | "This example shows how to implement a simple high-resolution image navigation using OpenCV for Python.\n", 14 | "We simply use the cv2.getRectSubPix() function which contains the following parameters : \n", 15 | "\n", 16 | "Parameters:\t\n", 17 | "* src – Source image.\n", 18 | "* patchSize – Size of the extracted patch.\n", 19 | "* center – Floating point coordinates of the center of the extracted rectangle within the source image. The center must be inside the image.\n", 20 | "\n", 21 | "
\n", 22 | "\n", 23 | "\n", 24 | "## zoom.py\n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "collapsed": true, 32 | "deletable": true, 33 | "editable": true 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "#!/usr/bin/env python\n", 38 | "# Python 2/3 compatibility\n", 39 | "from __future__ import print_function\n", 40 | "import sys\n", 41 | "PY3 = sys.version_info[0] == 3\n", 42 | "\n", 43 | "if PY3:\n", 44 | " xrange = range\n", 45 | "\n", 46 | "import numpy as np\n", 47 | "import cv2\n", 48 | "\n", 49 | "# built-in modules\n", 50 | "import sys\n", 51 | "\n", 52 | "if __name__ == '__main__':\n", 53 | " print('This example shows how to implement a simple hi resolution image navigation.')\n", 54 | " print('USAGE: browse.py [image filename]')\n", 55 | " print()\n", 56 | "\n", 57 | " fn = 'images/hi-res-nyc.jpg'\n", 58 | " print('loading %s ...' % fn)\n", 59 | " img = cv2.imread(fn)\n", 60 | " if img is None:\n", 61 | " print('Failed to load fn:', fn)\n", 62 | " sys.exit(1)\n", 63 | " \n", 64 | " small = img\n", 65 | " for i in xrange(3):\n", 66 | " small = cv2.pyrDown(small)\n", 67 | "\n", 68 | " def onmouse(event, x, y, flags, param):\n", 69 | " h, w = img.shape[:2]\n", 70 | " h1, w1 = small.shape[:2]\n", 71 | " x, y = 1.0*x*h/h1, 1.0*y*h/h1\n", 72 | " zoom = cv2.getRectSubPix(img, (800, 600), (x+0.5, y+0.5))\n", 73 | " cv2.imshow('zoom', zoom)\n", 74 | "\n", 75 | " cv2.imshow('preview', small)\n", 76 | " cv2.setMouseCallback('preview', onmouse)\n", 77 | " cv2.waitKey()\n", 78 | " cv2.destroyAllWindows()\n" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": { 84 | "deletable": true, 85 | "editable": true 86 | }, 87 | "source": [ 88 | "The output of this script is as follows:\n", 89 | "\n", 90 | "![Output Image](https://raw.githubusercontent.com/Applied-Programming/Computer-Vision/master/captures/zoom.png \"Zooming\")" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": { 96 | "deletable": true, 97 | "editable": true 98 | }, 99 | "source": [ 100 | "The program zooms that patch of the hi-res image where the cursor is currently positioned." 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": { 106 | "deletable": true, 107 | "editable": true 108 | }, 109 | "source": [ 110 | "
" 111 | ] 112 | } 113 | ], 114 | "metadata": { 115 | "kernelspec": { 116 | "display_name": "Python 2", 117 | "language": "python", 118 | "name": "python2" 119 | }, 120 | "language_info": { 121 | "codemirror_mode": { 122 | "name": "ipython", 123 | "version": 2 124 | }, 125 | "file_extension": ".py", 126 | "mimetype": "text/x-python", 127 | "name": "python", 128 | "nbconvert_exporter": "python", 129 | "pygments_lexer": "ipython2", 130 | "version": "2.7.13rc1" 131 | } 132 | }, 133 | "nbformat": 4, 134 | "nbformat_minor": 2 135 | } 136 | -------------------------------------------------------------------------------- /captures/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/1.png -------------------------------------------------------------------------------- /captures/ataritm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/ataritm.png -------------------------------------------------------------------------------- /captures/barcodedetection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/barcodedetection.png -------------------------------------------------------------------------------- /captures/batman1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/batman1.png -------------------------------------------------------------------------------- /captures/br.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/br.png -------------------------------------------------------------------------------- /captures/br2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/br2.png -------------------------------------------------------------------------------- /captures/bthresh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/bthresh.png -------------------------------------------------------------------------------- /captures/circles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/circles.png -------------------------------------------------------------------------------- /captures/ck.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/ck.png -------------------------------------------------------------------------------- /captures/closing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/closing.png -------------------------------------------------------------------------------- /captures/corners2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/corners2.png -------------------------------------------------------------------------------- /captures/cs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/cs.png -------------------------------------------------------------------------------- /captures/denoise1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/denoise1.png -------------------------------------------------------------------------------- /captures/digitrecognizer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/digitrecognizer.png -------------------------------------------------------------------------------- /captures/facedetection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/facedetection.png -------------------------------------------------------------------------------- /captures/featuresmatched.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/featuresmatched.png -------------------------------------------------------------------------------- /captures/featuresmatched2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/featuresmatched2.png -------------------------------------------------------------------------------- /captures/floodfill.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/floodfill.png -------------------------------------------------------------------------------- /captures/foreground.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/foreground.png -------------------------------------------------------------------------------- /captures/gradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/gradient.png -------------------------------------------------------------------------------- /captures/hc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/hc.png -------------------------------------------------------------------------------- /captures/hista.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/hista.png -------------------------------------------------------------------------------- /captures/histb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/histb.png -------------------------------------------------------------------------------- /captures/histc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/histc.png -------------------------------------------------------------------------------- /captures/histd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/histd.png -------------------------------------------------------------------------------- /captures/histe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/histe.png -------------------------------------------------------------------------------- /captures/humandetection1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/humandetection1.png -------------------------------------------------------------------------------- /captures/humandetection2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/humandetection2.png -------------------------------------------------------------------------------- /captures/imagepyramid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/imagepyramid.png -------------------------------------------------------------------------------- /captures/joker2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/joker2.png -------------------------------------------------------------------------------- /captures/mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/mask.png -------------------------------------------------------------------------------- /captures/mpl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/mpl.png -------------------------------------------------------------------------------- /captures/opticalflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/opticalflow.png -------------------------------------------------------------------------------- /captures/original.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/original.png -------------------------------------------------------------------------------- /captures/res.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/res.png -------------------------------------------------------------------------------- /captures/resizedip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/resizedip.png -------------------------------------------------------------------------------- /captures/sat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/sat.png -------------------------------------------------------------------------------- /captures/shapes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/shapes.png -------------------------------------------------------------------------------- /captures/shapesdetected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/shapesdetected.png -------------------------------------------------------------------------------- /captures/shapesntext.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/shapesntext.jpg -------------------------------------------------------------------------------- /captures/shapesthresh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/shapesthresh.png -------------------------------------------------------------------------------- /captures/sparrow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/sparrow.png -------------------------------------------------------------------------------- /captures/sparrows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/sparrows.png -------------------------------------------------------------------------------- /captures/step1ds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/step1ds.png -------------------------------------------------------------------------------- /captures/step2ds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/step2ds.png -------------------------------------------------------------------------------- /captures/step3ds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/step3ds.png -------------------------------------------------------------------------------- /captures/thres1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/thres1.png -------------------------------------------------------------------------------- /captures/trackbar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/trackbar.png -------------------------------------------------------------------------------- /captures/zoom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/captures/zoom.png -------------------------------------------------------------------------------- /datasets/digits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/datasets/digits.png -------------------------------------------------------------------------------- /images/9ball.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/9ball.jpg -------------------------------------------------------------------------------- /images/K.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/K.JPG -------------------------------------------------------------------------------- /images/S.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/S.JPG -------------------------------------------------------------------------------- /images/atari.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/atari.jpg -------------------------------------------------------------------------------- /images/atarit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/atarit.png -------------------------------------------------------------------------------- /images/batman.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/batman.png -------------------------------------------------------------------------------- /images/boundingrect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/boundingrect.png -------------------------------------------------------------------------------- /images/building.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/building.jpg -------------------------------------------------------------------------------- /images/calib_pattern.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/calib_pattern.jpg -------------------------------------------------------------------------------- /images/calib_radial.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/calib_radial.jpg -------------------------------------------------------------------------------- /images/calib_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/calib_result.jpg -------------------------------------------------------------------------------- /images/camshift_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/camshift_result.jpg -------------------------------------------------------------------------------- /images/chess.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/chess.png -------------------------------------------------------------------------------- /images/circumcircle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/circumcircle.png -------------------------------------------------------------------------------- /images/coins.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/coins.png -------------------------------------------------------------------------------- /images/contour.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/contour.jpg -------------------------------------------------------------------------------- /images/contourapprox.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/contourapprox.jpg -------------------------------------------------------------------------------- /images/contours.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/contours.png -------------------------------------------------------------------------------- /images/cube.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/cube.png -------------------------------------------------------------------------------- /images/cubeedge.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/cubeedge.jpg -------------------------------------------------------------------------------- /images/cubeedge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/cubeedge.png -------------------------------------------------------------------------------- /images/denoise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/denoise.png -------------------------------------------------------------------------------- /images/denoisedimage.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/denoisedimage.jpg -------------------------------------------------------------------------------- /images/diamond.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/diamond.png -------------------------------------------------------------------------------- /images/fast_kp.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/fast_kp.jpg -------------------------------------------------------------------------------- /images/filter.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/filter.jpg -------------------------------------------------------------------------------- /images/fitellipse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/fitellipse.png -------------------------------------------------------------------------------- /images/fitline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/fitline.jpg -------------------------------------------------------------------------------- /images/floodfillshapes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/floodfillshapes.png -------------------------------------------------------------------------------- /images/flower - Copy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/flower - Copy.jpg -------------------------------------------------------------------------------- /images/flower.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/flower.jpg -------------------------------------------------------------------------------- /images/google.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/google.jpg -------------------------------------------------------------------------------- /images/googlelogo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/googlelogo.jpg -------------------------------------------------------------------------------- /images/grabcut_output1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/grabcut_output1.jpg -------------------------------------------------------------------------------- /images/grad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/grad.png -------------------------------------------------------------------------------- /images/grad2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/grad2.png -------------------------------------------------------------------------------- /images/gray.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/gray.jpg -------------------------------------------------------------------------------- /images/grayscale.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/grayscale.jpg -------------------------------------------------------------------------------- /images/hd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/hd.png -------------------------------------------------------------------------------- /images/hdigits.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/hdigits.jpg -------------------------------------------------------------------------------- /images/high_contrast.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/high_contrast.jpg -------------------------------------------------------------------------------- /images/hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/hist.png -------------------------------------------------------------------------------- /images/ij1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/ij1.jpg -------------------------------------------------------------------------------- /images/im2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/im2.jpg -------------------------------------------------------------------------------- /images/im3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/im3.jpg -------------------------------------------------------------------------------- /images/im4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/im4.jpg -------------------------------------------------------------------------------- /images/image1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/image1.jpg -------------------------------------------------------------------------------- /images/image2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/image2.jpg -------------------------------------------------------------------------------- /images/inpaint_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/inpaint_result.jpg -------------------------------------------------------------------------------- /images/invertedstar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/invertedstar.png -------------------------------------------------------------------------------- /images/joker (1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/joker (1).png -------------------------------------------------------------------------------- /images/joker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/joker.png -------------------------------------------------------------------------------- /images/left08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/left08.jpg -------------------------------------------------------------------------------- /images/letters.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/letters.JPG -------------------------------------------------------------------------------- /images/m1left.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/m1left.jpg -------------------------------------------------------------------------------- /images/m2right.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/m2right.jpg -------------------------------------------------------------------------------- /images/messi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/messi.png -------------------------------------------------------------------------------- /images/minion1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/minion1.jpg -------------------------------------------------------------------------------- /images/minionleft.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/minionleft.jpg -------------------------------------------------------------------------------- /images/minionright.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/minionright.jpg -------------------------------------------------------------------------------- /images/minions.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/minions.jpg -------------------------------------------------------------------------------- /images/noise.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/noise.jpg -------------------------------------------------------------------------------- /images/noise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/noise.png -------------------------------------------------------------------------------- /images/noise1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/noise1.jpg -------------------------------------------------------------------------------- /images/noiseimage.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/noiseimage.jpg -------------------------------------------------------------------------------- /images/noisyim.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/noisyim.jpg -------------------------------------------------------------------------------- /images/opencv_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/opencv_logo.jpg -------------------------------------------------------------------------------- /images/photo_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/photo_1.jpg -------------------------------------------------------------------------------- /images/photo_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/photo_2.jpg -------------------------------------------------------------------------------- /images/pokemon_games.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/pokemon_games.png -------------------------------------------------------------------------------- /images/pose_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/pose_1.jpg -------------------------------------------------------------------------------- /images/pose_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/pose_2.jpg -------------------------------------------------------------------------------- /images/rect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/rect.png -------------------------------------------------------------------------------- /images/sat_noisy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/sat_noisy.jpg -------------------------------------------------------------------------------- /images/shapes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/shapes.png -------------------------------------------------------------------------------- /images/shitomasi_block1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/shitomasi_block1.jpg -------------------------------------------------------------------------------- /images/sift_keypoints.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/sift_keypoints.jpg -------------------------------------------------------------------------------- /images/skew.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/skew.png -------------------------------------------------------------------------------- /images/star.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/star.png -------------------------------------------------------------------------------- /images/star2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/star2.png -------------------------------------------------------------------------------- /images/starry_night.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/starry_night.jpg -------------------------------------------------------------------------------- /images/surf_kp1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/surf_kp1.jpg -------------------------------------------------------------------------------- /images/surf_kp2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/surf_kp2.jpg -------------------------------------------------------------------------------- /images/template.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/template.jpg -------------------------------------------------------------------------------- /images/th.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/th.png -------------------------------------------------------------------------------- /images/th2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/th2.png -------------------------------------------------------------------------------- /images/th3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/th3.png -------------------------------------------------------------------------------- /images/th4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/th4.png -------------------------------------------------------------------------------- /images/triangle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/triangle.png -------------------------------------------------------------------------------- /images/water_dt.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/water_dt.jpg -------------------------------------------------------------------------------- /images/water_fgbg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/water_fgbg.jpg -------------------------------------------------------------------------------- /images/water_marker.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/water_marker.jpg -------------------------------------------------------------------------------- /images/water_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/water_result.jpg -------------------------------------------------------------------------------- /images/water_thresh.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/images/water_thresh.jpg -------------------------------------------------------------------------------- /results/Directblending.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/results/Directblending.jpg -------------------------------------------------------------------------------- /results/Pyramidblending.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/results/Pyramidblending.jpg -------------------------------------------------------------------------------- /results/building1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/results/building1.png -------------------------------------------------------------------------------- /results/chessboard1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/results/chessboard1.png -------------------------------------------------------------------------------- /results/textureflow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/results/textureflow.jpg -------------------------------------------------------------------------------- /videos/mean_shift.webm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/videos/mean_shift.webm -------------------------------------------------------------------------------- /videos/meanshiftoutput.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/videos/meanshiftoutput.mp4 -------------------------------------------------------------------------------- /videos/people-walking.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/videos/people-walking.mp4 -------------------------------------------------------------------------------- /videos/slow_traffic.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/videos/slow_traffic.mp4 -------------------------------------------------------------------------------- /videos/sparrow.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aniruddha-Tapas/Computer-Vision/b70574ce66a633c9c2e650c1f07d5e5e17d8f742/videos/sparrow.mp4 --------------------------------------------------------------------------------