├── .gitattributes ├── .gitignore ├── Chapter01 ├── 01_HSV.py ├── 01_gray.py ├── 01_gray_3D.py ├── 01_hls.py ├── 01_image_io.py └── 01_lab.py ├── Chapter02 ├── 02_keras_cifar10.py └── 02_mnist_plot.py ├── Chapter03 ├── 03_affine_transform.py ├── 03_clahe_hist_equalization.py ├── 03_convolution.py ├── 03_fourier_transform.py ├── 03_gaussian_blur.py ├── 03_hist_equalize.py ├── 03_image_derivatives.py ├── 03_median_filtering.py ├── 03_plot_image.py ├── 03_point_operations.py ├── 03_projective_tranformed.py ├── 03_pyramid_down_smaple.py ├── 03_smoothing.py └── 03_transformation.py ├── Chapter04 ├── 04_base.py ├── 04_fast_feature.py ├── 04_feature_match.py ├── 04_flann_feature_match.py ├── 04_orb_detections.py ├── 04_sift_features.py ├── 04_sk_orb_features.py └── 04_template_matching.py ├── Chapter05 ├── 05_nn1.py ├── 05_nn2.py ├── 05_nn_mnist.py ├── 05_nn_vis.py ├── 05_print_activation.py ├── 05_print_conv_out.py ├── 05_print_dense.py ├── 05_print_inceptionv3.py ├── 05_print_pooling.py ├── 05_print_resnet.py └── 05_print_vgg16.py ├── Chapter06 ├── 06_face_detection_webcam.py ├── 06_mscoco_seg_vis.py └── 06_youtube_ssd_demo.py ├── Chapter07 └── 07_fcn_32s_keras.py ├── Chapter08 └── 08_compute_F_mat.py ├── LICENSE └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # Windows Installer files 12 | *.cab 13 | *.msi 14 | *.msm 15 | *.msp 16 | 17 | # Windows shortcuts 18 | *.lnk 19 | 20 | # ========================= 21 | # Operating System Files 22 | # ========================= 23 | 24 | # OSX 25 | # ========================= 26 | 27 | .DS_Store 28 | .AppleDouble 29 | .LSOverride 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear in the root of a volume 35 | .DocumentRevisions-V100 36 | .fseventsd 37 | .Spotlight-V100 38 | .TemporaryItems 39 | .Trashes 40 | .VolumeIcon.icns 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | -------------------------------------------------------------------------------- /Chapter01/01_HSV.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # loads and read an image from path to file 4 | img = cv2.imread('../figures/flower.png') 5 | 6 | # convert the color to hsv 7 | hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) 8 | 9 | # displays previous image 10 | cv2.imshow("Image",hsv) 11 | 12 | # keeps the window open untill a key is pressed 13 | cv2.waitKey(0) 14 | 15 | # clears all window buffers 16 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /Chapter01/01_gray.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # loads and read an image from path to file 4 | img = cv2.imread('../figures/flower.png') 5 | 6 | # convert the color to grayscale 7 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 8 | 9 | # displays previous image 10 | cv2.imshow("Image",gray) 11 | 12 | # keeps the window open untill a key is pressed 13 | cv2.waitKey(0) 14 | 15 | # clears all window buffers 16 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /Chapter01/01_gray_3D.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | import cv2 5 | 6 | 7 | # loads and read an image from path to file 8 | img = cv2.imread('../figures/building_sm.png') 9 | 10 | # convert the color to grayscale 11 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 12 | gray = cv2.resize(gray, (160, 120)) 13 | 14 | # apply smoothing operation 15 | gray = cv2.blur(gray,(3,3)) 16 | 17 | # create grid to plot using numpy 18 | xx, yy = np.mgrid[0:gray.shape[0], 0:gray.shape[1]] 19 | 20 | # create the figure 21 | fig = plt.figure() 22 | ax = fig.gca(projection='3d') 23 | ax.plot_surface(xx, yy, gray ,rstride=1, cstride=1, cmap=plt.cm.gray, 24 | linewidth=1) 25 | # show it 26 | plt.show() 27 | -------------------------------------------------------------------------------- /Chapter01/01_hls.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # loads and read an image from path to file 4 | img = cv2.imread('../figures/flower.png') 5 | 6 | # convert the color to hls 7 | hls = cv2.cvtColor(img, cv2.COLOR_BGR2HLS) 8 | 9 | # displays previous image 10 | cv2.imshow("Image",gray) 11 | 12 | # keeps the window open untill a key is pressed 13 | cv2.waitKey(0) 14 | 15 | # clears all window buffers 16 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /Chapter01/01_image_io.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # loads and read an image from path to file 4 | img = cv2.imread('../figures/flower.png') 5 | 6 | # displays previous image 7 | cv2.imshow("Image",img) 8 | 9 | # keeps the window open untill a key is pressed 10 | cv2.waitKey(0) 11 | 12 | # clears all window buffers 13 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /Chapter01/01_lab.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # loads and read an image from path to file 4 | img = cv2.imread('../figures/flower.png') 5 | 6 | # convert the color to lab 7 | lab = cv2.cvtColor(img, cv2.COLOR_BGR2Lab) 8 | 9 | # displays previous image 10 | cv2.imshow("Image",lab) 11 | 12 | # keeps the window open untill a key is pressed 13 | cv2.waitKey(0) 14 | 15 | # clears all window buffers 16 | cv2.destroyAllWindows() 17 | -------------------------------------------------------------------------------- /Chapter02/02_keras_cifar10.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from keras.datasets import cifar10 4 | import matplotlib.pyplot as plt 5 | 6 | # Download and load dataset 7 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 8 | labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] 9 | # to know the size of data 10 | print("Train data shape:", x_train.shape, "Test data shape:", x_test.shape) 11 | 12 | # plot sample image 13 | idx = 1500 14 | print("Label:",labels[y_train[idx][0]]) 15 | plt.imshow(x_train[idx]) 16 | plt.axis('off') 17 | plt.show() -------------------------------------------------------------------------------- /Chapter02/02_mnist_plot.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from keras.datasets import mnist 4 | import matplotlib.pyplot as plt 5 | 6 | # Download and load dataset 7 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 8 | # to know the size of data 9 | print("Train data shape:", x_train.shape, "Test data shape:", x_test.shape) 10 | 11 | # plot sample image 12 | idx = 0 13 | print("Label:",y_train[idx]) 14 | plt.imshow(x_train[idx], cmap='gray') 15 | plt.axis('off') 16 | plt.show() -------------------------------------------------------------------------------- /Chapter03/03_affine_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_cv_img(input_image): 9 | """ 10 | Converts an image from BGR to RGB and plots 11 | """ 12 | # change color channels order for matplotlib 13 | plt.imshow(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)) 14 | 15 | # For easier view, turn off axis around image 16 | plt.axis('off') 17 | plt.show() 18 | 19 | 20 | def main(): 21 | # read an image 22 | img = cv2.imread('../figures/building.jpg') 23 | 24 | # create transformation matrix form preselected points 25 | pts1 = np.float32([[50,50],[200,50],[50,200]]) 26 | pts2 = np.float32([[10,100],[200,50],[100,250]]) 27 | affine_tr = cv2.getAffineTransform(pts1,pts2) 28 | 29 | transformed = cv2.warpAffine(img, affine_tr, (img.shape[1]*2,img.shape[0]*2)) 30 | 31 | # Do plot 32 | plot_cv_img(transformed) 33 | 34 | if __name__ == '__main__': 35 | main() -------------------------------------------------------------------------------- /Chapter03/03_clahe_hist_equalization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_gray(input_image): 9 | """ 10 | plot grayscale image with no axis 11 | """ 12 | # plot grayscale image with gray colormap 13 | plt.imshow(input_image, cmap='gray') 14 | 15 | # turn off axis for easier view 16 | plt.axis('off') 17 | plt.show() 18 | 19 | def plot_hist_cdf(cdf_normalized, img): 20 | plt.plot(cdf_normalized, color = 'b') 21 | plt.hist(img.flatten(),256,[0,256], color = 'r') 22 | plt.xlim([0,256]) 23 | plt.legend(('cdf','histogram'), loc = 'upper left') 24 | plt.show() 25 | 26 | 27 | 28 | def main(): 29 | # read an image 30 | img = cv2.imread('../figures/flower.png') 31 | crop_gray = cv2.cvtColor(img[100:400, 100:400], cv2.COLOR_BGR2GRAY) 32 | clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) 33 | cl1 = clahe.apply(crop_gray) 34 | res = np.hstack((crop_gray,cl1)) 35 | plot_gray(res) 36 | 37 | if __name__ == '__main__': 38 | main() -------------------------------------------------------------------------------- /Chapter03/03_convolution.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_cv_img(input_image, output_image): 9 | """ 10 | Converts an image from BGR to RGB and plots 11 | """ 12 | # change color channels order for matplotlib 13 | fig, ax = plt.subplots(nrows=1, ncols=2) 14 | 15 | ax[0].imshow(input_image, cmap='gray') 16 | ax[0].set_title('Input Image') 17 | ax[0].axis('off') 18 | 19 | ax[1].imshow(output_image, cmap='gray') 20 | ax[1].set_title('Convolution ') 21 | ax[1].axis('off') 22 | 23 | plt.savefig('../figures/03_convolution.png') 24 | 25 | plt.show() 26 | 27 | 28 | def main(): 29 | # read an image 30 | img = cv2.imread('../figures/flower.png') 31 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 32 | 33 | # initialize noise image with zeros 34 | noise = np.zeros((400, 600)) 35 | 36 | # fill the image with random numbers in given range 37 | cv2.randu(noise, 0, 255) 38 | 39 | noisy_gray = gray + np.array(0.2*noise, dtype=np.int) 40 | 41 | kernel = np.ones((5,5),np.float32)/25 42 | dst = cv2.filter2D(gray,-1,kernel) 43 | 44 | # Do plot 45 | plot_cv_img(gray, dst) 46 | 47 | if __name__ == '__main__': 48 | main() -------------------------------------------------------------------------------- /Chapter03/03_fourier_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_gray(input_image): 9 | """ 10 | plot grayscale image with no axis 11 | """ 12 | # plot grayscale image with gray colormap 13 | plt.imshow(input_image, cmap='gray') 14 | 15 | # turn off axis for easier view 16 | plt.axis('off') 17 | plt.show() 18 | 19 | 20 | def plot_dft(crop_gray, magnitude_spectrum): 21 | plt.subplot(121),plt.imshow(crop_gray, cmap = 'gray') 22 | plt.title('Input Image'), plt.xticks([]), plt.yticks([]) 23 | plt.subplot(122),plt.imshow(magnitude_spectrum, cmap = 'gray') 24 | plt.title('Magnitude Spectrum'), plt.xticks([]), plt.yticks([]) 25 | plt.show() 26 | 27 | 28 | 29 | def main(): 30 | # read an image 31 | img = cv2.imread('../figures/flower.png') 32 | 33 | # create cropped grayscale image from the original image 34 | crop_gray = cv2.cvtColor(img[100:400, 100:400], cv2.COLOR_BGR2GRAY) 35 | 36 | # take discrete fourier transform 37 | dft = cv2.dft(np.float32(crop_gray),flags = cv2.DFT_COMPLEX_OUTPUT) 38 | dft_shift = np.fft.fftshift(dft) 39 | magnitude_spectrum = 20*np.log(cv2.magnitude(dft_shift[:,:,0],dft_shift[:,:,1])) 40 | 41 | # plot results 42 | plot_dft(crop_gray, magnitude_spectrum) 43 | 44 | 45 | 46 | if __name__ == '__main__': 47 | main() -------------------------------------------------------------------------------- /Chapter03/03_gaussian_blur.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_cv_img(input_image, output_image): 9 | """ 10 | Converts an image from BGR to RGB and plots 11 | """ 12 | 13 | fig, ax = plt.subplots(nrows=1, ncols=2) 14 | 15 | ax[0].imshow(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)) 16 | ax[0].set_title('Input Image') 17 | ax[0].axis('off') 18 | 19 | ax[1].imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB)) 20 | ax[1].set_title('Gaussian Blurred') 21 | ax[1].axis('off') 22 | 23 | plt.savefig('../figures/03_gaussian_blur.png') 24 | 25 | plt.show() 26 | 27 | 28 | def main(): 29 | # read an image 30 | img = cv2.imread('../figures/flower.png') 31 | 32 | blur = cv2.GaussianBlur(img,(5,5),0) 33 | 34 | # Do plot 35 | plot_cv_img(img, blur) 36 | 37 | if __name__ == '__main__': 38 | main() -------------------------------------------------------------------------------- /Chapter03/03_hist_equalize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_gray(input_image, output_image1, output_image2): 9 | """ 10 | Converts an image from BGR to RGB and plots 11 | """ 12 | # change color channels order for matplotlib 13 | fig, ax = plt.subplots(nrows=1, ncols=3) 14 | 15 | ax[0].imshow(input_image, cmap='gray') 16 | ax[0].set_title('Input Image') 17 | ax[0].axis('off') 18 | 19 | ax[1].imshow(output_image1, cmap='gray') 20 | ax[1].set_title('Histogram Equalized ') 21 | ax[1].axis('off') 22 | 23 | ax[2].imshow(output_image2, cmap='gray') 24 | ax[2].set_title('Histogram Equalized ') 25 | ax[2].axis('off') 26 | 27 | # plt.savefig('../figures/03_histogram_equalized.png') 28 | 29 | plt.show() 30 | 31 | def plot_hist_cdf(cdf_normalized, img): 32 | plt.plot(cdf_normalized, color = 'b') 33 | plt.hist(img.flatten(),256,[0,256], color = 'r') 34 | plt.xlim([0,256]) 35 | plt.legend(('cdf','histogram'), loc = 'upper left') 36 | plt.show() 37 | 38 | 39 | 40 | def main(): 41 | # read an image 42 | img = cv2.imread('../figures/_DSC2126.jpg') 43 | img = cv2.resize(img, (600,400)) 44 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 45 | 46 | 47 | # hist,bins = np.histogram(img[100:400, 100:400].flatten(),256,[0,256]) 48 | # cdf = hist.cumsum() 49 | # cdf_normalized = cdf * hist.max()/ cdf.max() 50 | 51 | # # plot hist normalized 52 | # plot_hist_cdf(cdf_normalized, img[100:400, 100:400]) 53 | 54 | equ = cv2.equalizeHist(gray) 55 | 56 | # create a CLAHE object (Arguments are optional). 57 | clahe = cv2.createCLAHE() 58 | cl1 = clahe.apply(gray) 59 | 60 | plot_gray(gray, equ, cl1) 61 | 62 | if __name__ == '__main__': 63 | main() -------------------------------------------------------------------------------- /Chapter03/03_image_derivatives.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_cv_img(input_image, output_image1, output_image2): 9 | """ 10 | Converts an image from BGR to RGB and plots 11 | """ 12 | 13 | fig, ax = plt.subplots(nrows=1, ncols=3) 14 | 15 | ax[0].imshow(input_image, cmap='gray') 16 | ax[0].set_title('Input Image') 17 | ax[0].axis('off') 18 | 19 | ax[1].imshow(output_image1, cmap='gray') 20 | ax[1].set_title('Laplacian Image') 21 | ax[1].axis('off') 22 | 23 | ax[2].imshow(output_image2, cmap = 'gray') 24 | ax[2].set_title('Laplacian of Gaussian') 25 | ax[2].axis('off') 26 | 27 | # ax[3].imshow(output_image3, cmap = 'gray') 28 | # ax[3].set_title('Sharpened Image') 29 | # ax[3].axis('off') 30 | 31 | plt.savefig('../figures/03_image_derivatives_log.png') 32 | 33 | plt.show() 34 | 35 | 36 | def main(): 37 | # read an image 38 | img = cv2.imread('../figures/building_crop.jpg') 39 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 40 | 41 | 42 | 43 | # sobel 44 | x_sobel = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5) 45 | y_sobel = cv2.Sobel(img,cv2.CV_64F,0,1,ksize=5) 46 | 47 | # laplacian 48 | lapl = cv2.Laplacian(img,cv2.CV_64F, ksize=5) 49 | 50 | # gaussian blur 51 | blur = cv2.GaussianBlur(img,(5,5),0) 52 | # laplacian of gaussian 53 | log = cv2.Laplacian(blur,cv2.CV_64F, ksize=5) 54 | 55 | # res = np.hstack([img, x_sobel, y_sobel]) 56 | # plt.imshow(res, cmap='gray') 57 | # plt.axis('off') 58 | # plt.show() 59 | # lapl = np.asarray(lapl, dtype= np.uint) 60 | # Do plot 61 | plot_cv_img(img, lapl, log) 62 | 63 | if __name__ == '__main__': 64 | main() -------------------------------------------------------------------------------- /Chapter03/03_median_filtering.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_cv_img(input_image, output_image1, output_image2, output_image3): 9 | """ 10 | Converts an image from BGR to RGB and plots 11 | """ 12 | 13 | fig, ax = plt.subplots(nrows=1, ncols=4) 14 | 15 | ax[0].imshow(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)) 16 | ax[0].set_title('Input Image') 17 | ax[0].axis('off') 18 | 19 | ax[1].imshow(cv2.cvtColor(output_image1, cv2.COLOR_BGR2RGB)) 20 | ax[1].set_title('Median Filter (3,3)') 21 | ax[1].axis('off') 22 | 23 | ax[2].imshow(cv2.cvtColor(output_image2, cv2.COLOR_BGR2RGB)) 24 | ax[2].set_title('Median Filter (5,5)') 25 | ax[2].axis('off') 26 | 27 | ax[3].imshow(cv2.cvtColor(output_image3, cv2.COLOR_BGR2RGB)) 28 | ax[3].set_title('Median Filter (7,7)') 29 | ax[3].axis('off') 30 | 31 | # plt.savefig('../figures/03_median_filter.png') 32 | 33 | plt.show() 34 | 35 | 36 | def main(): 37 | # read an image 38 | img = cv2.imread('../figures/flower.png') 39 | 40 | 41 | median1 = cv2.medianBlur(img,3) 42 | median2 = cv2.medianBlur(img,5) 43 | median3 = cv2.medianBlur(img,7) 44 | 45 | 46 | # Do plot 47 | plot_cv_img(img, median1, median2, median3) 48 | 49 | if __name__ == '__main__': 50 | main() -------------------------------------------------------------------------------- /Chapter03/03_plot_image.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_cv_img(input_image): 9 | """ 10 | Converts an image from BGR to RGB and plots 11 | """ 12 | # change color channels order for matplotlib 13 | plt.imshow(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)) 14 | 15 | # For easier view, turn off axis around image 16 | plt.axis('off') 17 | plt.show() 18 | 19 | 20 | def main(): 21 | # read an image 22 | img = cv2.imread('../figures/flower.png') 23 | 24 | # Do plot 25 | plot_cv_img(img) 26 | 27 | if __name__ == '__main__': 28 | main() -------------------------------------------------------------------------------- /Chapter03/03_point_operations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | 9 | 10 | def point_operation(img, K, L): 11 | """ 12 | Applies point operation to given grayscale image 13 | """ 14 | img = np.asarray(img, dtype=np.float) 15 | img = img*K + L 16 | img[img > 255] = 255 17 | img[img < 0] = 0 18 | return np.asarray(img, dtype = np.int) 19 | 20 | def main(): 21 | # read an image 22 | img = cv2.imread('../figures/flower.png') 23 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 24 | 25 | # k = 0.5, l = 0 26 | out1 = point_operation(gray, 0.5, 0) 27 | 28 | # k = 1., l = 10 29 | out2 = point_operation(gray, 1., 10) 30 | 31 | # k = 0.8, l = 15 32 | out3 = point_operation(gray, 0.7, 25) 33 | 34 | res = np.hstack([gray,out1, out2, out3]) 35 | plt.imshow(res, cmap='gray') 36 | plt.axis('off') 37 | 38 | plt.show() 39 | 40 | 41 | if __name__ == '__main__': 42 | main() -------------------------------------------------------------------------------- /Chapter03/03_projective_tranformed.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_cv_img(input_image): 9 | """ 10 | Converts an image from BGR to RGB and plots 11 | """ 12 | # change color channels order for matplotlib 13 | plt.imshow(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)) 14 | 15 | # For easier view, turn off axis around image 16 | plt.axis('off') 17 | plt.show() 18 | 19 | 20 | def main(): 21 | # read an image 22 | img = cv2.imread('../figures/building.jpg') 23 | 24 | # create transformation matrix form preselected points 25 | pts1 = np.float32([[56,65],[368,52],[28,387],[389,390]]) 26 | pts2 = np.float32([[0,0],[300,0],[0,300],[300,300]]) 27 | 28 | perpective_tr = cv2.getPerspectiveTransform(pts1,pts2) 29 | 30 | 31 | 32 | transformed = cv2.warpAffine(img, perpective_tr, (300,300)) 33 | 34 | # Do plot 35 | plot_cv_img(transformed) 36 | 37 | if __name__ == '__main__': 38 | main() -------------------------------------------------------------------------------- /Chapter03/03_pyramid_down_smaple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_lr_img(input_image, l1, l2, l3): 9 | """ 10 | Converts an image from BGR to RGB and plots 11 | """ 12 | 13 | fig, ax = plt.subplots(nrows=1, ncols=4) 14 | 15 | ax[0].imshow(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)) 16 | ax[0].set_title('Input Image (400,600) ') 17 | ax[0].axis('off') 18 | 19 | ax[1].imshow(cv2.cvtColor(l1, cv2.COLOR_BGR2RGB)) 20 | ax[1].set_title('Lower Resolution (200, 300)') 21 | ax[1].axis('off') 22 | 23 | ax[2].imshow(cv2.cvtColor(l2, cv2.COLOR_BGR2RGB)) 24 | ax[2].set_title('Lower Resolution (100, 150)') 25 | ax[2].axis('off') 26 | 27 | ax[3].imshow(cv2.cvtColor(l3, cv2.COLOR_BGR2RGB)) 28 | ax[3].set_title('Lower Resolution (50, 75)') 29 | ax[3].axis('off') 30 | 31 | # plt.savefig('../figures/03_pyr_down_sample.png') 32 | 33 | plt.show() 34 | 35 | 36 | def plot_hy_img(input_image, h1, h2, h3): 37 | """ 38 | Converts an image from BGR to RGB and plots 39 | """ 40 | 41 | fig, ax = plt.subplots(nrows=1, ncols=4) 42 | 43 | ax[0].imshow(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)) 44 | ax[0].set_title('Input Image (50,75) ') 45 | ax[0].axis('off') 46 | 47 | ax[1].imshow(cv2.cvtColor(h1, cv2.COLOR_BGR2RGB)) 48 | ax[1].set_title('Higher Resolution (100, 150)') 49 | ax[1].axis('off') 50 | 51 | ax[2].imshow(cv2.cvtColor(h2, cv2.COLOR_BGR2RGB)) 52 | ax[2].set_title('Higher Resolution (200, 300)') 53 | ax[2].axis('off') 54 | 55 | ax[3].imshow(cv2.cvtColor(h3, cv2.COLOR_BGR2RGB)) 56 | ax[3].set_title('Higher Resolution (400, 600)') 57 | ax[3].axis('off') 58 | 59 | # plt.savefig('../figures/03_pyr_down_sample.png') 60 | 61 | plt.show() 62 | 63 | 64 | def main(): 65 | # read an image 66 | img = cv2.imread('../figures/flower.png') 67 | print(img.shape) 68 | 69 | lower_resolution1 = cv2.pyrDown(img) 70 | print(lower_resolution1.shape) 71 | 72 | lower_resolution2 = cv2.pyrDown(lower_resolution1) 73 | print(lower_resolution2.shape) 74 | 75 | lower_resolution3 = cv2.pyrDown(lower_resolution2) 76 | print(lower_resolution3.shape) 77 | 78 | higher_resolution3 = cv2.pyrUp(lower_resolution3) 79 | print(higher_resolution3.shape) 80 | 81 | higher_resolution2 = cv2.pyrUp(higher_resolution3) 82 | print(higher_resolution2.shape) 83 | 84 | higher_resolution1 = cv2.pyrUp(higher_resolution2) 85 | print(higher_resolution1.shape) 86 | 87 | 88 | 89 | 90 | 91 | # Do plot 92 | plot_lr_img(img, lower_resolution1, lower_resolution2, lower_resolution3) 93 | plot_hy_img(lower_resolution3, higher_resolution3, higher_resolution2, higher_resolution1) 94 | 95 | if __name__ == '__main__': 96 | main() -------------------------------------------------------------------------------- /Chapter03/03_smoothing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_cv_img(input_image, output_image): 9 | """ 10 | Converts an image from BGR to RGB and plots 11 | """ 12 | 13 | fig, ax = plt.subplots(nrows=1, ncols=2) 14 | 15 | ax[0].imshow(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)) 16 | ax[0].set_title('Input Image') 17 | ax[0].axis('off') 18 | 19 | ax[1].imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB)) 20 | ax[1].set_title('Box Filter (20,20)') 21 | ax[1].axis('off') 22 | 23 | # plt.savefig('../figures/03_box_blur_55.png') 24 | 25 | plt.show() 26 | 27 | 28 | def main(): 29 | # read an image 30 | img = cv2.imread('../figures/_DSC0426.jpg') 31 | print(img.shape) 32 | img = cv2.resize(img, (1200,800)) 33 | cv2.imwrite('../figures/building_sm.png', img) 34 | blur = cv2.blur(img,(5,5)) 35 | 36 | # Do plot 37 | plot_cv_img(img, blur) 38 | 39 | if __name__ == '__main__': 40 | main() -------------------------------------------------------------------------------- /Chapter03/03_transformation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_cv_img(input_image): 9 | """ 10 | Converts an image from BGR to RGB and plots 11 | """ 12 | # change color channels order for matplotlib 13 | plt.imshow(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)) 14 | 15 | # For easier view, turn off axis around image 16 | plt.axis('off') 17 | plt.show() 18 | 19 | 20 | def main(): 21 | # read an image 22 | img = cv2.imread('../figures/flower.png') 23 | 24 | # create transformation matrix 25 | translation_matrix = np.float32([[1,0,160],[0,1,40]]) 26 | transformed = cv2.warpAffine(img, translation_matrix, (img.shape[1],img.shape[0])) 27 | 28 | # Do plot 29 | plot_cv_img(transformed) 30 | 31 | if __name__ == '__main__': 32 | main() -------------------------------------------------------------------------------- /Chapter04/04_base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | def plot_cv_img(input_image1, input_image2, input_image3): 9 | """ 10 | Converts an image from BGR to RGB and plots 11 | """ 12 | # change color channels order for matplotlib 13 | fig, ax = plt.subplots(nrows=1, ncols=3) 14 | input_image1 = cv2.cvtColor(input_image1,cv2.COLOR_BGR2RGB) 15 | input_image2 = cv2.cvtColor(input_image2,cv2.COLOR_BGR2RGB) 16 | input_image3 = cv2.cvtColor(input_image3,cv2.COLOR_BGR2RGB) 17 | 18 | 19 | ax[0].imshow(input_image1) 20 | ax[0].set_title('Image1') 21 | ax[0].axis('off') 22 | 23 | ax[1].imshow(input_image2) 24 | ax[1].set_title('Image2') 25 | ax[1].axis('off') 26 | 27 | ax[2].imshow(input_image3) 28 | ax[2].set_title('Image3') 29 | ax[2].axis('off') 30 | 31 | plt.savefig('../figures/04_harris_corners1.png') 32 | 33 | plt.show() 34 | 35 | def compute_harris_corners(input): 36 | gray = cv2.cvtColor(input,cv2.COLOR_BGR2GRAY) 37 | gray = np.float32(gray) 38 | dst = cv2.cornerHarris(gray,2,5,0.04) 39 | #result is dilated for marking the corners, not important 40 | dst = cv2.dilate(dst,None) 41 | # Threshold for an optimal value, it may vary depending on the image. 42 | input[dst>0.01*dst.max()]=[0,255,0] 43 | plt.figure(figsize=(12, 8)) 44 | plt.imshow(cv2.cvtColor(input, cv2.COLOR_BGR2RGB)) 45 | plt.axis('off') 46 | plt.show() 47 | 48 | def display_harris_corners(input_img): 49 | """ 50 | computes corners in colored image and plot it. 51 | """ 52 | # first convert to grayscale with float32 values 53 | gray = cv2.cvtColor(input_img,cv2.COLOR_BGR2GRAY) 54 | gray = np.float32(gray) 55 | 56 | # using opencv harris corner implementation 57 | corners = cv2.cornerHarris(gray,2,7,0.04) 58 | 59 | # # result is dilated for marking the corners, not important 60 | # dst = cv2.dilate(dst,None) 61 | 62 | # additional thresholding and marking corners for plotting 63 | input_img[corners>0.01*corners.max()]=[255,0,0] 64 | 65 | return input_img 66 | # # plot image 67 | # plt.figure(figsize=(12, 8)) 68 | # plt.imshow(cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)) 69 | # plt.axis('off') 70 | 71 | def main(): 72 | # read an image 73 | img1 = cv2.imread('../figures/building2.jpg') 74 | img2 = cv2.imread('../figures/flower.png') 75 | img3 = cv2.imread('../figures/building_crop.jpg') 76 | #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 77 | 78 | 79 | # compute harris corners and display 80 | out1 = display_harris_corners(img1) 81 | out2 = display_harris_corners(img2) 82 | out3 = display_harris_corners(img3) 83 | 84 | 85 | # Do plot 86 | plot_cv_img(out1, out2, out3) 87 | 88 | if __name__ == '__main__': 89 | main() -------------------------------------------------------------------------------- /Chapter04/04_fast_feature.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | 9 | 10 | def plot_imgs(img1, img2): 11 | """ 12 | Converts an image from BGR to RGB and plots 13 | """ 14 | 15 | fig, ax = plt.subplots(nrows=1, ncols=2) 16 | 17 | ax[0].imshow(cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)) 18 | ax[0].set_title('FAST points on Image (th=10)') 19 | ax[0].axis('off') 20 | 21 | ax[1].imshow(cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)) 22 | ax[1].set_title('FAST points on Image (th=30)') 23 | ax[1].axis('off') 24 | 25 | # ax[2].imshow(cv2.cvtColor(img3, cv2.COLOR_BGR2RGB)) 26 | # ax[2].set_title('FAST Points(th=15)') 27 | # ax[2].axis('off') 28 | 29 | # ax[3].imshow(cv2.cvtColor(img4, cv2.COLOR_BGR2RGB)) 30 | # ax[3].set_title('FAST Points(th=50)') 31 | # ax[3].axis('off') 32 | 33 | # plt.savefig('../figures/04_fast_features_thres.png') 34 | 35 | plt.show() 36 | 37 | def compute_fast_det(filename, is_nms=True, thresh = 10): 38 | 39 | img = cv2.imread(filename) 40 | 41 | # Initiate FAST object with default values 42 | fast = cv2.FastFeatureDetector_create() #FastFeatureDetector() 43 | 44 | # find and draw the keypoints 45 | if not is_nms: 46 | fast.setNonmaxSuppression(0) 47 | 48 | fast.setThreshold(thresh) 49 | 50 | kp = fast.detect(img,None) 51 | cv2.drawKeypoints(img, kp, img, color=(255,0,0)) 52 | 53 | return img 54 | 55 | 56 | def main(): 57 | # read an image 58 | #img = cv2.imread('../figures/flower.png') 59 | #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 60 | filename1 = '../figures/flower.png' 61 | filename2 = '../figures/building_sm.png' 62 | filename3 = '../figures/outdoor.jpg' 63 | # compute harris corners and display 64 | img1 = compute_fast_det(filename1, thresh = 10) 65 | img2 = compute_fast_det(filename2, thresh = 30) 66 | #img3 = compute_fast_det(filename, thresh = 10) 67 | #img4 = compute_fast_det(filename, thresh = 10) 68 | 69 | # Do plot 70 | plot_imgs(img1, img2) 71 | 72 | if __name__ == '__main__': 73 | main() -------------------------------------------------------------------------------- /Chapter04/04_feature_match.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | print(cv2.__version__) 5 | # With jupyter notebook uncomment below line 6 | # %matplotlib inline 7 | # This plots figures inside the notebook 8 | 9 | 10 | 11 | 12 | def compute_orb_keypoints(filename): 13 | """ 14 | Takes in filename to read and computes ORB keypoints 15 | Returns image, keypoints and descriptors 16 | """ 17 | 18 | img = cv2.imread(filename) 19 | img = cv2.pyrDown(img) 20 | img = cv2.pyrDown(img) 21 | # img = cv2.pyrDown(img) 22 | # img = cv2.pyrDown(img) 23 | # create orb object 24 | orb = cv2.ORB_create() 25 | 26 | # set parameters 27 | orb.setScoreType(cv2.FAST_FEATURE_DETECTOR_TYPE_9_16) 28 | orb.setWTA_K(3) 29 | 30 | kp = orb.detect(img,None) 31 | 32 | kp, des = orb.compute(img, kp) 33 | return img,kp, des 34 | 35 | 36 | def draw_keyp(img, kp): 37 | """ 38 | Draws color around keypoint pixels 39 | """ 40 | cv2.drawKeypoints(img,kp,img, color=(255,0,0), flags=2) 41 | return img 42 | 43 | 44 | def plot_orb(filename): 45 | """ 46 | Plots ORB keypoints from filename 47 | """ 48 | img,kp, des = compute_orb_keypoints(filename) 49 | img = draw_keyp(img, kp) 50 | plot_img(img) 51 | 52 | 53 | def plot_img(img): 54 | """ 55 | Generic plotting of opencv image 56 | """ 57 | fig = plt.figure(figsize=(16,12)) 58 | ax = fig.add_subplot(1,1,1) 59 | ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) 60 | plt.axis('off') 61 | plt.show() 62 | 63 | def compute_img_matches(filename1, filename2, thres=10): 64 | """ 65 | Extracts ORB features from given filenames 66 | Computes ORB matches and plot them side by side 67 | """ 68 | img1, kp1, des1 = compute_orb_keypoints(filename1) 69 | img2, kp2, des2 = compute_orb_keypoints(filename2) 70 | 71 | matches = brute_force_matcher(des1, des2) 72 | draw_matches(img1, img2, kp1, kp2, matches, thres) 73 | 74 | def brute_force_matcher(des1, des2): 75 | """ 76 | Brute force matcher to match ORB feature descriptors 77 | """ 78 | # create BFMatcher object 79 | bf = cv2.BFMatcher(cv2.NORM_HAMMING2, crossCheck=True) 80 | # Match descriptors. 81 | matches = bf.match(des1,des2) 82 | 83 | # Sort them in the order of their distance. 84 | matches = sorted(matches, key = lambda x:x.distance) 85 | 86 | return matches 87 | 88 | def draw_matches(img1, img2, kp1, kp2, matches, thres=10): 89 | """ 90 | Utility function to draw lines connecting matches between two images. 91 | """ 92 | draw_params = dict(matchColor = (0,255,0), 93 | singlePointColor = (255,0,0), 94 | flags = 0) 95 | 96 | # Draw first thres matches. 97 | img3 = cv2.drawMatches(img1,kp1,img2,kp2,matches[:thres],None, **draw_params) 98 | plot_img(img3) 99 | 100 | 101 | 102 | def main(): 103 | # read an image 104 | filename2 = '../figures/building_7.JPG' 105 | filename1 = '../figures/building_crop.jpg' 106 | compute_img_matches(filename1, filename2, thres=20) 107 | 108 | 109 | 110 | if __name__ == '__main__': 111 | main() 112 | 113 | -------------------------------------------------------------------------------- /Chapter04/04_flann_feature_match.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | print(cv2.__version__) 5 | # With jupyter notebook uncomment below line 6 | # %matplotlib inline 7 | # This plots figures inside the notebook 8 | 9 | 10 | 11 | 12 | def compute_orb_keypoints(filename): 13 | """ 14 | Takes in filename to read and computes ORB keypoints 15 | Returns image, keypoints and descriptors 16 | """ 17 | 18 | img = cv2.imread(filename) 19 | # create orb object 20 | orb = cv2.ORB_create() 21 | 22 | # set parameters 23 | orb.setScoreType(cv2.FAST_FEATURE_DETECTOR_TYPE_9_16) 24 | orb.setWTA_K(3) 25 | 26 | kp = orb.detect(img,None) 27 | 28 | kp, des = orb.compute(img, kp) 29 | return img,kp, des 30 | 31 | 32 | def draw_keyp(img, kp): 33 | """ 34 | Draws color around keypoint pixels 35 | """ 36 | cv2.drawKeypoints(img,kp,img, color=(255,0,0), flags=2) 37 | return img 38 | 39 | 40 | def plot_orb(filename): 41 | """ 42 | Plots ORB keypoints from filename 43 | """ 44 | img,kp, des = compute_orb_keypoints(filename) 45 | img = draw_keyp(img, kp) 46 | plot_img(img) 47 | 48 | 49 | def plot_img(img): 50 | """ 51 | Generic plotting of opencv image 52 | """ 53 | fig = plt.figure(figsize=(16,12)) 54 | ax = fig.add_subplot(1,1,1) 55 | ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) 56 | plt.axis('off') 57 | plt.show() 58 | 59 | def compute_img_matches(filename1, filename2, thres=10): 60 | """ 61 | Extracts ORB features from given filenames 62 | Computes ORB matches and plot them side by side 63 | """ 64 | img1, kp1, des1 = compute_orb_keypoints(filename1) 65 | img2, kp2, des2 = compute_orb_keypoints(filename2) 66 | 67 | matches = brute_force_matcher(des1, des2) 68 | draw_matches(img1, img2, kp1, kp2, matches, thres) 69 | 70 | def brute_force_matcher(des1, des2): 71 | """ 72 | Brute force matcher to match ORB feature descriptors 73 | """ 74 | # create BFMatcher object 75 | bf = cv2.BFMatcher(cv2.NORM_HAMMING2, crossCheck=True) 76 | # Match descriptors. 77 | matches = bf.match(des1,des2) 78 | 79 | # Sort them in the order of their distance. 80 | matches = sorted(matches, key = lambda x:x.distance) 81 | 82 | return matches 83 | 84 | def draw_matches(img1, img2, kp1, kp2, matches, thres=10): 85 | """ 86 | Utility function to draw lines connecting matches between two images. 87 | """ 88 | draw_params = dict(matchColor = (0,255,0), 89 | singlePointColor = (255,0,0), 90 | flags = 0) 91 | 92 | # Draw first thres matches. 93 | img3 = cv2.drawMatches(img1,kp1,img2,kp2,matches[:thres],None, **draw_params) 94 | plot_img(img3) 95 | 96 | 97 | 98 | def main(): 99 | # read an image 100 | filename1 = '../figures/building_crop.jpg' 101 | filename2 = '../figures/building.jpg' 102 | 103 | compute_img_matches(filename1, filename2) 104 | 105 | 106 | 107 | if __name__ == '__main__': 108 | main() 109 | 110 | -------------------------------------------------------------------------------- /Chapter04/04_orb_detections.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | 9 | 10 | def plot_mulitple(img1, img2): 11 | """ 12 | Converts an image from BGR to RGB and plots 13 | """ 14 | 15 | fig, ax = plt.subplots(nrows=1, ncols=2) 16 | 17 | ax[0].imshow(cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)) 18 | ax[0].set_title('Image 1') 19 | ax[0].axis('off') 20 | 21 | ax[1].imshow(cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)) 22 | ax[1].set_title('Image 2') 23 | ax[1].axis('off') 24 | 25 | plt.show() 26 | 27 | 28 | 29 | def compute_orb_keypoints(filename): 30 | """ 31 | Reads image from filename and computes ORB keypoints 32 | Returns image, keypoints and descriptors. 33 | """ 34 | # load image 35 | img = cv2.imread(filename) 36 | 37 | # create orb object 38 | orb = cv2.ORB_create() 39 | 40 | # set parameters 41 | orb.setScoreType(cv2.FAST_FEATURE_DETECTOR_TYPE_9_16) 42 | orb.setWTA_K(3) 43 | 44 | # detect keypoints 45 | kp = orb.detect(img,None) 46 | 47 | # for detected keypoints compute descriptors. 48 | kp, des = orb.compute(img, kp) 49 | return img,kp, des 50 | 51 | def draw_keyp(img, kp): 52 | """ 53 | Takes image and keypoints and plots on the same images 54 | Does not display it. 55 | """ 56 | cv2.drawKeypoints(img,kp,img, color=(255,0,0), flags=2) 57 | return img 58 | 59 | 60 | def plot_img(img, figsize=(12,8)): 61 | """ 62 | Plots image using matplotlib for the given figsize 63 | """ 64 | fig = plt.figure(figsize=figsize) 65 | ax = fig.add_subplot(1,1,1) 66 | 67 | # image need to be converted to RGB format for plotting 68 | ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) 69 | plt.axis('off') 70 | plt.title('ORB keypoints') 71 | plt.show() 72 | 73 | 74 | 75 | def main(): 76 | # read an image 77 | filename = '../figures/flower.png' 78 | # compute ORB keypoints 79 | img1,kp1, des1 = compute_orb_keypoints(filename) 80 | # draw keypoints on image 81 | img1 = draw_keyp(img1, kp1) 82 | # plot one image image with keypoints 83 | plot_img(img1) 84 | 85 | 86 | if __name__ == '__main__': 87 | main() -------------------------------------------------------------------------------- /Chapter04/04_sift_features.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | # With jupyter notebook uncomment below line 5 | # %matplotlib inline 6 | # This plots figures inside the notebook 7 | 8 | 9 | 10 | def compute_sift_features(img): 11 | gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) 12 | sift = cv2.xfeatures2d.SIFT_create() 13 | kp = sift.detect(gray,None) 14 | img=cv2.drawKeypoints(gray,kp) 15 | plt.figure(figsize=(12, 8)) 16 | plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) 17 | plt.axis('off') 18 | plt.show() 19 | 20 | 21 | 22 | def compute_fast_det(img, is_nms=True, thresh = 10): 23 | gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) 24 | 25 | # Initiate FAST object with default values 26 | fast = cv2.FastFeatureDetector_create() #FastFeatureDetector() 27 | 28 | # # find and draw the keypoints 29 | if not is_nms: 30 | fast.setNonmaxSuppression(0) 31 | 32 | fast.setThreshold(thresh) 33 | 34 | kp = fast.detect(img,None) 35 | cv2.drawKeypoints(img, kp, img, color=(255,0,0)) 36 | 37 | 38 | 39 | sift = cv2.SIFT() 40 | kp = sift.detect(gray,None) 41 | 42 | img=cv2.drawKeypoints(gray,kp) 43 | 44 | plt.figure(figsize=(12, 8)) 45 | plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) 46 | plt.axis('off') 47 | plt.show() 48 | 49 | 50 | def main(): 51 | # read an image 52 | img = cv2.imread('../figures/flower.png') 53 | #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 54 | 55 | 56 | # compute harris corners and display 57 | compute_sift_features(img) 58 | 59 | # Do plot 60 | #plot_cv_img(gray, dst) 61 | 62 | if __name__ == '__main__': 63 | main() -------------------------------------------------------------------------------- /Chapter04/04_sk_orb_features.py: -------------------------------------------------------------------------------- 1 | from skimage import data 2 | from skimage import transform as tf 3 | from skimage.feature import (match_descriptors, corner_harris, 4 | corner_peaks, ORB, plot_matches) 5 | from skimage.color import rgb2gray 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | img1 = rgb2gray(data.astronaut()) 10 | img2 = tf.rotate(img1, 180) 11 | tform = tf.AffineTransform(scale=(1.3, 1.1), rotation=0.5, 12 | translation=(0, -200)) 13 | img3 = tf.warp(img1, tform) 14 | 15 | descriptor_extractor = ORB(n_keypoints=200) 16 | 17 | descriptor_extractor.detect_and_extract(img1) 18 | keypoints1 = descriptor_extractor.keypoints 19 | descriptors1 = descriptor_extractor.descriptors 20 | 21 | descriptor_extractor.detect_and_extract(img2) 22 | keypoints2 = descriptor_extractor.keypoints 23 | descriptors2 = descriptor_extractor.descriptors 24 | 25 | descriptor_extractor.detect_and_extract(img3) 26 | keypoints3 = descriptor_extractor.keypoints 27 | descriptors3 = descriptor_extractor.descriptors 28 | 29 | matches12 = match_descriptors(descriptors1, descriptors2, cross_check=True) 30 | matches13 = match_descriptors(descriptors1, descriptors3, cross_check=True) 31 | 32 | fig, ax = plt.subplots(nrows=2, ncols=1) 33 | 34 | plt.gray() 35 | 36 | plot_matches(ax[0], img1, img2, keypoints1, keypoints2, matches12) 37 | ax[0].axis('off') 38 | ax[0].set_title("Original Image vs. Transformed Image") 39 | 40 | plot_matches(ax[1], img1, img3, keypoints1, keypoints3, matches13) 41 | ax[1].axis('off') 42 | ax[1].set_title("Original Image vs. Transformed Image") 43 | 44 | 45 | plt.show() 46 | -------------------------------------------------------------------------------- /Chapter04/04_template_matching.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | img = cv2.imread('../figures/building.jpg',0) 5 | img2 = img.copy() 6 | template = cv2.imread('../figures/building_crop.jpg',0) 7 | w, h = template.shape[::-1] 8 | # All the 6 methods for comparison in a list 9 | methods = ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR', 10 | 'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED'] 11 | for meth in methods: 12 | img = img2.copy() 13 | method = eval(meth) 14 | print(method) 15 | # Apply template Matching 16 | res = cv2.matchTemplate(img,template,method) 17 | min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) 18 | # If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum 19 | if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]: 20 | top_left = min_loc 21 | else: 22 | top_left = max_loc 23 | bottom_right = (top_left[0] + w, top_left[1] + h) 24 | cv2.rectangle(img,top_left, bottom_right, 255, 2) 25 | plt.subplot(121),plt.imshow(res,cmap = 'gray') 26 | plt.title('Matching Result'), plt.xticks([]), plt.yticks([]) 27 | plt.subplot(122),plt.imshow(img,cmap = 'gray') 28 | plt.title('Detected Point'), plt.xticks([]), plt.yticks([]) 29 | plt.suptitle(meth) 30 | plt.show() -------------------------------------------------------------------------------- /Chapter05/05_nn1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | dim_x = 1000 # input dims 4 | dim_y = 2 # output dims 5 | batch = 10 # batch size for training 6 | lr = 1e-4 # learning rate for weight update 7 | steps = 5000 # steps for learning 8 | 9 | # create random input and targets 10 | x = np.random.randn(batch, dim_x) 11 | y = np.random.randn(batch, dim_y) 12 | 13 | # initialize weight matrix 14 | w = np.random.randn(dim_x, dim_y) 15 | 16 | def net(x, w): 17 | """ 18 | A simple neural net that performs non-linear transformation 19 | Function : 1 / (1 + e^(-w*x)) 20 | x: inputs 21 | w: weight matrix 22 | Returns the function value 23 | """ 24 | return 1/(1+np.exp(-x.dot(w))) 25 | 26 | def compute_loss(y, y_pred): 27 | """ 28 | Loss function : sum(y_pred**2 - y**2) 29 | y: ground truth targets 30 | y_pred: predicted target values 31 | """ 32 | return np.mean((y_pred-y)**2) 33 | 34 | def backprop(y, y_pred, w, x): 35 | """ 36 | Backpropagation to compute w gradients 37 | y : ground truth targets 38 | y_pred : predicted targets 39 | w : weights for the network 40 | x : inputs to the net 41 | """ 42 | # start from outer most 43 | y_grad = 2.0 * (y_pred - y) 44 | 45 | # inner layer grads 46 | w_grad = x.T.dot(y_grad * y_pred * (1 - y_pred)) 47 | return w_grad 48 | 49 | for i in range(steps): 50 | 51 | # feed forward pass 52 | y_pred = net(x, w) 53 | 54 | # compute loss 55 | loss = compute_loss(y, y_pred) 56 | print("Loss:", loss, "at step:", i) 57 | 58 | # compute grads using backprop on given net 59 | w_grad = backprop(y, y_pred, w, x) 60 | 61 | # update weights with some learning rate 62 | w -= lr * w_grad 63 | 64 | -------------------------------------------------------------------------------- /Chapter05/05_nn2.py: -------------------------------------------------------------------------------- 1 | '''Train a simple deep CNN on the CIFAR10 small images dataset. 2 | 3 | It gets to 75% validation accuracy in 25 epochs, and 79% after 50 epochs. 4 | (it's still underfitting at that point, though). 5 | ''' 6 | 7 | from __future__ import print_function 8 | import keras 9 | from keras.datasets import cifar10 10 | from keras.preprocessing.image import ImageDataGenerator 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Dropout, Activation, Flatten 13 | from keras.layers import Conv2D, MaxPooling2D 14 | import numpy as np 15 | import os 16 | 17 | batch_size = 32 18 | num_classes = 10 19 | epochs = 100 20 | data_augmentation = False 21 | num_predictions = 20 22 | save_dir = os.path.join(os.getcwd(), 'saved_models') 23 | model_name = 'keras_cifar10_trained_model.h5' 24 | 25 | # The data, shuffled and split between train and test sets: 26 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 27 | print('x_train shape:', x_train.shape) 28 | print(x_train.shape[0], 'train samples') 29 | print(x_test.shape[0], 'test samples') 30 | 31 | # Convert class vectors to binary class matrices. 32 | y_train = keras.utils.to_categorical(y_train, num_classes) 33 | y_test = keras.utils.to_categorical(y_test, num_classes) 34 | 35 | model = Sequential() 36 | model.add(Conv2D(32, (5, 5), padding='same', 37 | input_shape=x_train.shape[1:])) 38 | model.add(Activation('relu')) 39 | model.add(Conv2D(32, (3, 3))) 40 | model.add(Activation('relu')) 41 | model.add(MaxPooling2D(pool_size=(2, 2))) 42 | # model.add(Dropout(0.25)) 43 | 44 | model.add(Conv2D(64, (3, 3), padding='same')) 45 | model.add(Activation('relu')) 46 | model.add(Conv2D(64, (3, 3))) 47 | model.add(Activation('relu')) 48 | model.add(MaxPooling2D(pool_size=(2, 2))) 49 | # model.add(Dropout(0.25)) 50 | 51 | model.add(Conv2D(128, (3, 3))) 52 | model.add(Activation('relu')) 53 | model.add(MaxPooling2D(pool_size=(2, 2))) 54 | 55 | model.add(Flatten()) 56 | # model.add(Dense(512)) 57 | model.add(Activation('relu')) 58 | # model.add(Dropout(0.5)) 59 | model.add(Dense(num_classes)) 60 | model.add(Activation('softmax')) 61 | 62 | # initiate RMSprop optimizer 63 | opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) 64 | 65 | # Let's train the model using RMSprop 66 | model.compile(loss='categorical_crossentropy', 67 | optimizer=opt, 68 | metrics=['accuracy']) 69 | 70 | x_train = x_train.astype('float32') 71 | x_test = x_test.astype('float32') 72 | x_train /= 255 73 | x_test /= 255 74 | 75 | if not data_augmentation: 76 | print('Not using data augmentation.') 77 | model.fit(x_train, y_train, 78 | batch_size=batch_size, 79 | epochs=epochs, 80 | validation_data=(x_test, y_test), 81 | shuffle=True) 82 | else: 83 | print('Using real-time data augmentation.') 84 | # This will do preprocessing and realtime data augmentation: 85 | datagen = ImageDataGenerator( 86 | featurewise_center=False, # set input mean to 0 over the dataset 87 | samplewise_center=False, # set each sample mean to 0 88 | featurewise_std_normalization=False, # divide inputs by std of the dataset 89 | samplewise_std_normalization=False, # divide each input by its std 90 | zca_whitening=False, # apply ZCA whitening 91 | rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) 92 | width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) 93 | height_shift_range=0.1, # randomly shift images vertically (fraction of total height) 94 | horizontal_flip=True, # randomly flip images 95 | vertical_flip=False) # randomly flip images 96 | 97 | # Compute quantities required for feature-wise normalization 98 | # (std, mean, and principal components if ZCA whitening is applied). 99 | datagen.fit(x_train) 100 | 101 | # Fit the model on the batches generated by datagen.flow(). 102 | model.fit_generator(datagen.flow(x_train, y_train, 103 | batch_size=batch_size), 104 | steps_per_epoch=int(np.ceil(x_train.shape[0] / float(batch_size))), 105 | epochs=epochs, 106 | validation_data=(x_test, y_test), 107 | workers=4) 108 | 109 | # Save model and weights 110 | if not os.path.isdir(save_dir): 111 | os.makedirs(save_dir) 112 | model_path = os.path.join(save_dir, model_name) 113 | model.save(model_path) 114 | print('Saved trained model at %s ' % model_path) 115 | 116 | # Score trained model. 117 | scores = model.evaluate(x_test, y_test, verbose=1) 118 | print('Test loss:', scores[0]) 119 | print('Test accuracy:', scores[1]) -------------------------------------------------------------------------------- /Chapter05/05_nn_mnist.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import keras.backend as K 3 | from keras.layers import Dense, Conv2D, Input, MaxPooling2D, Flatten, Dropout 4 | from keras.models import Model 5 | from keras.datasets import fashion_mnist 6 | from keras.callbacks import ModelCheckpoint 7 | 8 | 9 | # setup parameters 10 | batch_sz = 64 11 | nb_class = 10 12 | nb_epochs = 10 13 | img_h, img_w = 28, 28 14 | 15 | 16 | def get_dataset(): 17 | """ 18 | Return processed and reshaped dataset for training 19 | In this cases Fashion-mnist dataset. 20 | """ 21 | # load mnist dataset 22 | (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() 23 | 24 | # test and train datasets 25 | print("Nb Train:", x_train.shape[0], "Nb test:",x_test.shape[0]) 26 | x_train = x_train.reshape(x_train.shape[0], img_h, img_w, 1) 27 | x_test = x_test.reshape(x_test.shape[0], img_h, img_w, 1) 28 | in_shape = (img_h, img_w, 1) 29 | 30 | # normalize inputs 31 | x_train = x_train.astype('float32') 32 | x_test = x_test.astype('float32') 33 | x_train /= 255.0 34 | x_test /= 255.0 35 | 36 | # convert to one hot vectors 37 | y_train = keras.utils.to_categorical(y_train, nb_class) 38 | y_test = keras.utils.to_categorical(y_test, nb_class) 39 | return x_train, x_test, y_train, y_test 40 | 41 | x_train, x_test, y_train, y_test = get_dataset() 42 | 43 | def conv3x3(input_x,nb_filters): 44 | """ 45 | Wrapper around convolution layer 46 | Inputs: 47 | input_x: input layer / tensor 48 | nb_filter: Number of filters for convolution 49 | """ 50 | return Conv2D(nb_filters, kernel_size=(3,3), use_bias=False, 51 | activation='relu', padding="same")(input_x) 52 | 53 | def create_model(img_h=28, img_w=28): 54 | """ 55 | Creates a CNN model for training. 56 | Inputs: 57 | img_h: input image height 58 | img_w: input image width 59 | Returns: 60 | Model structure 61 | """ 62 | 63 | inputs = Input(shape=(img_h, img_w, 1)) 64 | 65 | x = conv3x3(inputs, 32) 66 | x = conv3x3(x, 32) 67 | x = MaxPooling2D(pool_size=(2,2))(x) 68 | x = conv3x3(x, 64) 69 | x = conv3x3(x, 64) 70 | x = MaxPooling2D(pool_size=(2,2))(x) 71 | x = conv3x3(x, 128) 72 | x = MaxPooling2D(pool_size=(2,2))(x) 73 | x = Flatten()(x) 74 | x = Dense(128, activation="relu")(x) 75 | preds = Dense(nb_class, activation='softmax')(x) 76 | 77 | model = Model(inputs=inputs, outputs=preds) 78 | print(model.summary()) 79 | return model 80 | 81 | model = create_model() 82 | 83 | # setup optimizer, loss function and metrics for model 84 | model.compile(loss=keras.losses.categorical_crossentropy, 85 | optimizer=keras.optimizers.Adam(), 86 | metrics=['accuracy']) 87 | 88 | # To save model after each epoch of training 89 | callback = ModelCheckpoint('mnist_cnn.h5') 90 | 91 | # start training 92 | model.fit(x_train, y_train, 93 | batch_size=batch_sz, 94 | epochs=nb_epochs, 95 | verbose=1, 96 | validation_data=(x_test, y_test), 97 | callbacks=[callback]) 98 | 99 | # Evaluate and print accuracy 100 | score = model.evaluate(x_test, y_test, verbose=0) 101 | print('Test loss:', score[0]) 102 | print('Test accuracy:', score[1]) 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /Chapter05/05_nn_vis.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import keras.backend as K 3 | from keras.layers import Dense, Conv2D, Input, MaxPooling2D, Flatten 4 | from keras.models import Model 5 | from keras.datasets import fashion_mnist 6 | from keras.callbacks import ModelCheckpoint 7 | 8 | 9 | # setup parameters 10 | batch_sz = 128 11 | nb_class = 10 12 | nb_epochs = 10 13 | 14 | img_h, img_w = 28, 28 15 | print( K.image_data_format()) 16 | 17 | # input image dimensions 18 | img_rows, img_cols = 28, 28 19 | 20 | def get_dataset(): 21 | """ 22 | Return processed and reshaped dataset for training 23 | In this cases Fashion-mnist dataset. 24 | """ 25 | # load mnist dataset 26 | (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() 27 | 28 | # test and train datasets 29 | print("Nb Train:", x_train.shape[0], "Nb test:",x_test.shape[0]) 30 | x_train = x_train.reshape(x_train.shape[0], img_h, img_w, 1) 31 | x_test = x_test.reshape(x_test.shape[0], img_h, img_w, 1) 32 | in_shape = (img_h, img_w, 1) 33 | 34 | # normalize inputs 35 | x_train = x_train.astype('float32') 36 | x_test = x_test.astype('float32') 37 | x_train /= 255.0 38 | x_test /= 255.0 39 | 40 | # convert to one hot vectors 41 | y_train = keras.utils.to_categorical(y_train, nb_class) 42 | y_test = keras.utils.to_categorical(y_test, nb_class) 43 | return x_train, x_test, y_train, y_test 44 | 45 | x_train, x_test, y_train, y_test = get_dataset() 46 | 47 | def create_model(img_h=28, img_w=28): 48 | inputs = Input(shape=(img_h, img_w, 1)) 49 | x = Conv2D(32, kernel_size=(3,3), activation='relu')(inputs) # 32C 3K 1S VP RELU 50 | x = Conv2D(32, kernel_size=(3,3), activation='relu')(x) # 64C 3K 1S VP RELU 51 | x = MaxPooling2D(pool_size=(2,2))(x) # pool2 52 | x = Conv2D(64, kernel_size=(3,3), activation='relu')(x) # 32C 3K 1S VP RELU 53 | x = Conv2D(64, kernel_size=(3,3), activation='relu')(x) # 64C 3K 1S VP RELU 54 | x = MaxPooling2D(pool_size=(2,2))(x) # pool2 55 | x = Flatten()(x) 56 | preds = Dense(nb_class, activation='softmax')(x) 57 | model = Model(inputs=inputs, outputs=preds) 58 | print(model.summary()) 59 | return model 60 | 61 | model = create_model() 62 | 63 | model.compile(loss=keras.losses.categorical_crossentropy, 64 | optimizer=keras.optimizers.SGD(lr=0.001), 65 | metrics=['accuracy']) 66 | 67 | callback = ModelCheckpoint() 68 | 69 | # start training 70 | model.fit(x_train, y_train, 71 | batch_size=batch_sz, 72 | epochs=nb_epochs, 73 | verbose=1, 74 | validation_data=(x_test, y_test), callbacks=[callback]) 75 | 76 | # Evaluate 77 | score = model.evaluate(x_test, y_test, verbose=0) 78 | print('Test loss:', score[0]) 79 | print('Test accuracy:', score[1]) 80 | 81 | -------------------------------------------------------------------------------- /Chapter05/05_print_activation.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Conv2D, Input, Activation 2 | from keras.models import Model 3 | 4 | def print_model(): 5 | """ 6 | Creates a sample model and prints output shape 7 | Use this to analyse convolution parameters 8 | """ 9 | # create input with given shape 10 | x = Input(shape=(512,512,3)) 11 | 12 | # create a convolution layer 13 | conv = Conv2D(filters=32, 14 | kernel_size=(5,5), 15 | strides=1, padding="same", 16 | use_bias=True)(x) 17 | y = Activation('relu')(conv) 18 | 19 | # create model 20 | model = Model(inputs=x, outputs=y) 21 | 22 | # prints our model created 23 | model.summary() 24 | 25 | print_model() -------------------------------------------------------------------------------- /Chapter05/05_print_conv_out.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Conv2D, Input 2 | from keras.models import Model 3 | 4 | def print_model(): 5 | """ 6 | Creates a sample model and prints output shape 7 | Use this to analyse convolution parameters 8 | """ 9 | # create input with given shape 10 | x = Input(shape=(512,512,3)) 11 | 12 | # create a convolution layer 13 | y = Conv2D(filters=32, 14 | kernel_size=(5,5), 15 | strides=1, padding="same", 16 | use_bias=True)(x) 17 | 18 | # create model 19 | model = Model(inputs=x, outputs=y) 20 | 21 | # prints our model created 22 | model.summary() 23 | 24 | print_model() -------------------------------------------------------------------------------- /Chapter05/05_print_dense.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Dense, Input 2 | from keras.models import Model 3 | 4 | def print_model(): 5 | """ 6 | Creates a sample model and prints output shape 7 | Use this to analyse dense/Fully Connected parameters 8 | """ 9 | # create input with given shape 10 | x = Input(shape=(512,)) 11 | 12 | # create a fully connected layer layer 13 | y = Dense(32)(x) 14 | 15 | # create model 16 | model = Model(inputs=x, outputs=y) 17 | 18 | # prints our model created 19 | model.summary() 20 | 21 | print_model() -------------------------------------------------------------------------------- /Chapter05/05_print_inceptionv3.py: -------------------------------------------------------------------------------- 1 | from keras.applications.inception_v3 import InceptionV3 2 | 3 | def print_model(): 4 | """ 5 | Loads Inceptionv3 and prints model structure 6 | """ 7 | 8 | # create model 9 | model = InceptionV3(weights='imagenet') 10 | 11 | # prints our model created 12 | model.summary() 13 | 14 | print_model() -------------------------------------------------------------------------------- /Chapter05/05_print_pooling.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Conv2D, Input, MaxPooling2D 2 | from keras.models import Model 3 | 4 | def print_model(): 5 | """ 6 | Creates a sample model and prints output shape 7 | Use this to analyse Pooling parameters 8 | """ 9 | # create input with given shape 10 | x = Input(shape=(512,512,3)) 11 | 12 | # create a convolution layer 13 | conv = Conv2D(filters=32, 14 | kernel_size=(5,5), activation="relu", 15 | strides=1, padding="same", 16 | use_bias=True)(x) 17 | 18 | pool = MaxPooling2D(pool_size=(2,2))(conv) 19 | 20 | # create model 21 | model = Model(inputs=x, outputs=pool) 22 | 23 | # prints our model created 24 | model.summary() 25 | 26 | print_model() -------------------------------------------------------------------------------- /Chapter05/05_print_resnet.py: -------------------------------------------------------------------------------- 1 | from keras.applications.resnet50 import ResNet50 2 | import numpy as np 3 | import cv2 4 | from keras.applications.resnet50 import preprocess_input, decode_predictions 5 | import time 6 | 7 | def get_model(): 8 | """ 9 | Loads Resnet and prints model structure 10 | """ 11 | 12 | # create model 13 | model = ResNet50(weights='imagenet') 14 | 15 | # To print our model loaded 16 | model.summary() 17 | return model 18 | 19 | def preprocess_img(img): 20 | # apply opencv preprocessing 21 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 22 | img = cv2.resize(img, (224, 224)) 23 | img = img[np.newaxis, :, :, :] 24 | img = np.asarray(img, dtype=np.float) 25 | 26 | # further use imagenet specific preprocessing 27 | # this applies color channel specific mean normalization 28 | x = preprocess_input(img) 29 | print(x.shape) 30 | return x 31 | 32 | # read input image and preprocess 33 | img = cv2.imread('../figures/train1.png') 34 | input_x = preprocess_img(img) 35 | 36 | # create model with pre-trained weights 37 | resnet_model = get_model() 38 | 39 | # run predictions only , no training 40 | start = time.time() 41 | preds = resnet_model.predict(input_x) 42 | print(time.time() - start) 43 | 44 | # decode prediction to index of classes, top 5 predictions 45 | print('Predicted:', decode_predictions(preds, top=5)[0]) -------------------------------------------------------------------------------- /Chapter05/05_print_vgg16.py: -------------------------------------------------------------------------------- 1 | from keras.applications.vgg16 import VGG16 2 | 3 | def print_model(): 4 | """ 5 | Loads VGGNet and prints model structure 6 | """ 7 | 8 | # create model 9 | model = VGG16(weights='imagenet') 10 | 11 | # prints our model created 12 | model.summary() 13 | 14 | print_model() -------------------------------------------------------------------------------- /Chapter06/06_face_detection_webcam.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | # create cascaded classifier with pre-learned weights 5 | # For other objects, change the file here 6 | face_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_default.xml') 7 | 8 | cap = cv2.VideoCapture(0) 9 | 10 | while(True): 11 | ret, frame = cap.read() 12 | if not ret: 13 | print("No frame captured") 14 | 15 | # frame = cv2.resize(frame, (640, 480)) 16 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 17 | 18 | # detect face 19 | faces = face_cascade.detectMultiScale(gray) 20 | 21 | # plot results 22 | for (x,y,w,h) in faces: 23 | cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2) 24 | 25 | cv2.imshow('img',frame) 26 | if cv2.waitKey(1) & 0xFF == ord('q'): 27 | break 28 | 29 | cap.release() 30 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /Chapter06/06_mscoco_seg_vis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import sys 4 | # import tensorflow as tf 5 | import cv2 6 | from matplotlib import pyplot as plt 7 | # inside jupyter uncomment next line 8 | # %matplotlib inline 9 | import random 10 | import time 11 | from pycocotools.coco import COCO 12 | import skimage.io as io 13 | 14 | 15 | def draw_segmentation_mask(img,anns): 16 | for ann in anns: 17 | for seg in ann['segmentation']: 18 | poly = np.array(seg).reshape((int(len(seg)/2), 2)) 19 | cv2.fillConvexPoly(img,np.int32([poly]), color=(255, 255, 255) ) 20 | return cv2.cvtColor(img,cv2.COLOR_RGB2GRAY) 21 | 22 | def draw_segmentation_boundary(img,anns): 23 | for ann in anns: 24 | for seg in ann['segmentation']: 25 | poly = np.array(seg).reshape((int(len(seg)/2), 2)) 26 | cv2.polylines(img, np.int32([poly]), True, color=(0, 255, 0)) 27 | return img 28 | 29 | def main(): 30 | annFile='annotations/instances_train2017.json' 31 | coco=COCO(annFile) 32 | # display COCO categories and supercategories 33 | cats = coco.loadCats(coco.getCatIds()) 34 | nms=[cat['name'] for cat in cats] 35 | print('COCO categories: \n{}\n'.format(' '.join(nms))) 36 | 37 | nms = set([cat['supercategory'] for cat in cats]) 38 | print('COCO supercategories: \n{}'.format(' '.join(nms))) 39 | 40 | # get all images containing given categories, select one at random 41 | catIds = coco.getCatIds(catNms=['person','dog']); 42 | imgIds = coco.getImgIds(catIds=catIds ); 43 | # imgIds = coco.getImgIds(imgIds = [324158]) 44 | img_meta = coco.loadImgs(imgIds[np.random.randint(0,len(imgIds))])[0] 45 | 46 | I = io.imread(img_meta['coco_url']) 47 | cv_img = I.copy() 48 | plt.imshow(cv_img) 49 | plt.axis('off') 50 | plt.show() 51 | 52 | annIds = coco.getAnnIds(imgIds=img_meta['id'], catIds=catIds, iscrowd=None) 53 | anns = coco.loadAnns(annIds) 54 | # print(anns) 55 | 56 | # create mask of zero 57 | mask = np.zeros(cv_img.shape, dtype=np.uint8) 58 | mask = draw_segmentation_boundary(mask, anns) 59 | plt.imshow(mask, cmap='gray') 60 | plt.axis('off') 61 | plt.show() 62 | 63 | 64 | if __name__=='__main__': 65 | main() 66 | 67 | -------------------------------------------------------------------------------- /Chapter06/06_youtube_ssd_demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | import six.moves.urllib as urllib 5 | import sys 6 | import tarfile 7 | import tensorflow as tf 8 | import zipfile 9 | import pafy 10 | from collections import defaultdict 11 | from io import StringIO 12 | from matplotlib import pyplot as plt 13 | from PIL import Image 14 | import random 15 | import time 16 | sys.path.append("..") 17 | from utils import label_map_util 18 | from utils import visualization_utils as vis_util 19 | 20 | # What model to download. 21 | # ssd_inception_v2_coco_2017_11_17 22 | # faster_rcnn_inception_v2_coco_2017_11_08 23 | MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17' 24 | # MODEL_NAME = 'faster_rcnn_inception_v2_coco_2017_11_08' 25 | MODEL_FILE = MODEL_NAME + '.tar.gz' 26 | DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' 27 | 28 | # Path to frozen detection graph. This is the actual model that is used for the object detection. 29 | PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb' 30 | 31 | # List of the strings that is used to add correct label for each box. 32 | PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt') 33 | 34 | NUM_CLASSES = 90 35 | 36 | # create youtube capture 37 | url = 'https://www.youtube.com/watch?v=fq-X9UZMLRk' 38 | videoPafy = pafy.new(url) 39 | 40 | 41 | 42 | def load_label_dict(PATH_TO_LABELS): 43 | label_map = label_map_util.load_labelmap(PATH_TO_LABELS) 44 | categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) 45 | category_index = label_map_util.create_category_index(categories) 46 | #print(category_index) 47 | return category_index 48 | 49 | 50 | 51 | 52 | 53 | 54 | def show_cv_img_with_detections(img, dets,scores, classes,thres=0.4): 55 | height = img.shape[0] 56 | width = img.shape[1] 57 | colors = dict() 58 | 59 | for i in range(dets.shape[0]): 60 | 61 | cls_id = int(classes[i]) 62 | # print(cls_id) 63 | if cls_id >= 0: 64 | score = scores[i] 65 | 66 | if score > thres: 67 | if cls_id not in colors: 68 | colors[cls_id] = (random.random(), random.random(), random.random()) 69 | xmin = int(dets[i, 1] * width) 70 | ymin = int(dets[i, 0] * height) 71 | xmax = int(dets[i, 3] * width) 72 | ymax = int(dets[i, 2] * height) 73 | 74 | # print(xmin, ymin, xmax, ymax) 75 | cv2.rectangle(img, (xmin, ymin), (xmax, ymax), colors[cls_id]) 76 | class_name = str(category_index[cls_id]['name']) 77 | cv2.putText(img, '{:s} {:.3f}'.format(class_name, score), (xmin, ymin), cv2.FONT_HERSHEY_PLAIN, 0.5, colors[cls_id]) 78 | 79 | return img 80 | 81 | 82 | def show_mpl_img_with_detections(img, dets,scores, classes,thres=0.6): 83 | 84 | import matplotlib.pyplot as plt 85 | import random 86 | plt.figure(figsize=(8,6)) 87 | plt.imshow(img) 88 | height = img.shape[0] 89 | width = img.shape[1] 90 | colors = dict() 91 | # dets = dets[0] 92 | # print(dets.shape) 93 | for i in range(dets.shape[0]): 94 | 95 | cls_id = int(classes[i]) 96 | # print(cls_id) 97 | if cls_id >= 0: 98 | score = scores[i] 99 | 100 | if score > thres: 101 | if cls_id not in colors: 102 | colors[cls_id] = (random.random(), random.random(), random.random()) 103 | xmin = int(dets[i, 1] * width) 104 | ymin = int(dets[i, 0] * height) 105 | xmax = int(dets[i, 3] * width) 106 | ymax = int(dets[i, 2] * height) 107 | rect = plt.Rectangle((xmin, ymin), xmax - xmin, 108 | ymax - ymin, fill=False, 109 | edgecolor=colors[cls_id], 110 | linewidth=2.5) 111 | plt.gca().add_patch(rect) 112 | class_name = str(category_index[cls_id]['name']) 113 | 114 | plt.gca().text(xmin, ymin - 2, 115 | '{:s} {:.3f}'.format(class_name, score), 116 | bbox=dict(facecolor=colors[cls_id], alpha=0.5), 117 | fontsize=8, color='white') 118 | plt.axis('off') 119 | plt.savefig(filename) 120 | plt.close() 121 | plt.pause(0.001) 122 | # cv2.imwrite(filename,img) 123 | return 124 | 125 | # download model 126 | opener = urllib.request.URLopener() 127 | opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE) 128 | tar_file = tarfile.open(MODEL_FILE) 129 | for file in tar_file.getmembers(): 130 | file_name = os.path.basename(file.name) 131 | if 'frozen_inference_graph.pb' in file_name: 132 | tar_file.extract(file, os.getcwd()) 133 | 134 | # import frozen graph 135 | detection_graph = tf.Graph() 136 | with detection_graph.as_default(): 137 | od_graph_def = tf.GraphDef() 138 | with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: 139 | serialized_graph = fid.read() 140 | od_graph_def.ParseFromString(serialized_graph) 141 | tf.import_graph_def(od_graph_def, name='') 142 | 143 | # load labels 144 | category_index = load_label_dict(PATH_TO_LABELS) 145 | 146 | 147 | best = videoPafy.getbest(preftype="webm") 148 | cap = cv2.VideoCapture(videoPafy.videostreams[2].url) 149 | # cap = cv2.VideoCapture(best.url) 150 | # run session 151 | with detection_graph.as_default(): 152 | with tf.Session(graph=detection_graph) as sess: 153 | # g = tf.get_default_graph() 154 | # print(g.get_operations()) 155 | 156 | # Definite input and output Tensors for detection_graph 157 | image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') 158 | # Each box represents a part of the image where a particular object was detected. 159 | detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') 160 | # Each score represent how level of confidence for each of the objects. 161 | # Score is shown on the result image, together with the class label. 162 | detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') 163 | detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') 164 | num_detections = detection_graph.get_tensor_by_name('num_detections:0') 165 | skip = 5 166 | while(True): 167 | # Capture frame-by-frame 168 | ret, frame = cap.read() 169 | if skip != 0: 170 | skip -=1 171 | skip = 5 172 | frame_bgr = frame 173 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 174 | 175 | 176 | 177 | # the array based representation of the image will be used later in order to prepare the 178 | # result image with boxes and labels on it. 179 | # image_np = load_image_into_numpy_array(frame) 180 | image_np = np.asarray(frame,dtype=np.uint8) 181 | # Expand dimensions since the model expects images to have shape: [1, None, None, 3] 182 | image_np_expanded = np.expand_dims(image_np, axis=0) 183 | # Actual detection. 184 | # (boxes, scores, classes, num) = sess.run( 185 | # [detection_boxes, detection_scores, detection_classes, num_detections], 186 | # feed_dict={image_tensor: image_np_expanded}) 187 | # print(classes) 188 | # out = show_cv_img_with_detections(frame_bgr, boxes[0],scores[0], classes[0], thres=0.45) 189 | 190 | cv2.imshow('frame',frame_bgr) 191 | if cv2.waitKey(1) & 0xFF == ord('q'): 192 | 193 | break 194 | 195 | # When everything done, release the capture 196 | cap.release() 197 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /Chapter07/07_fcn_32s_keras.py: -------------------------------------------------------------------------------- 1 | from keras.models import * 2 | from keras.layers import * 3 | from keras.applications.vgg16 import VGG16 4 | 5 | def create_model_fcn32(nb_class, input_w=256): 6 | """ 7 | Create FCN-32s model for segmentaiton. 8 | Input: 9 | nb_class: number of detection categories 10 | input_w: input width, using square image 11 | 12 | Returns model created for training. 13 | """ 14 | input = Input(shape=(input_w, input_w, 3)) 15 | 16 | # initialize feature extractor excuding fully connected layers 17 | # here we use VGG model, with pre-trained weights. 18 | vgg = VGG16(include_top=False, weights='imagenet', input_tensor=input) 19 | # create further network 20 | x = Conv2D(4096, kernel_size=(7,7), use_bias=False, 21 | activation='relu', padding="same")(vgg.output) 22 | x = Dropout(0.5)(x) 23 | x = Conv2D(4096, kernel_size=(1,1), use_bias=False, 24 | activation='relu', padding="same")(x) 25 | x = Dropout(0.5)(x) 26 | x = Conv2D(nb_class, kernel_size=(1,1), use_bias=False, 27 | padding="same")(x) 28 | # upsampling to image size 29 | x = Conv2DTranspose(nb_class , 30 | kernel_size=(64,64), 31 | strides=(32,32), 32 | use_bias=False, padding='same')(x) 33 | 34 | 35 | x = Activation('softmax')(x) 36 | model = Model(input, x) 37 | model.summary() 38 | return model 39 | 40 | # Create model for pascal voc image segmentation for 21 classes 41 | model = create_model_fcn32(21) 42 | -------------------------------------------------------------------------------- /Chapter08/08_compute_F_mat.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | print(cv2.__version__) 5 | import glob 6 | # With jupyter notebook uncomment below line 7 | # %matplotlib inline 8 | # This plots figures inside the notebook 9 | 10 | 11 | 12 | def compute_orb_keypoints(filename): 13 | """ 14 | Reads image from filename and computes ORB keypoints 15 | Returns image, keypoints and descriptors. 16 | """ 17 | # load image 18 | img = cv2.imread(filename) 19 | 20 | # create orb object 21 | orb = cv2.ORB_create() 22 | 23 | # set parameters 24 | orb.setScoreType(cv2.FAST_FEATURE_DETECTOR_TYPE_9_16) 25 | orb.setWTA_K(3) 26 | 27 | # detect keypoints 28 | kp = orb.detect(img,None) 29 | 30 | # for detected keypoints compute descriptors. 31 | kp, des = orb.compute(img, kp) 32 | 33 | return img,kp, des 34 | 35 | 36 | def brute_force_matcher(des1, des2): 37 | """ 38 | Brute force matcher to match ORB feature descriptors 39 | """ 40 | # create BFMatcher object 41 | bf = cv2.BFMatcher(cv2.NORM_HAMMING2, crossCheck=True) 42 | # Match descriptors. 43 | matches = bf.match(des1,des2) 44 | 45 | # Sort them in the order of their distance. 46 | matches = sorted(matches, key = lambda x:x.distance) 47 | 48 | return matches 49 | 50 | def compute_fundamental_matrix(filename1, filename2): 51 | """ 52 | Takes in filenames of two input images 53 | Return Fundamental matrix computes 54 | using 8 point algorithm 55 | """ 56 | # compute ORB keypoints and descriptor for each image 57 | img1, kp1, des1 = compute_orb_keypoints(filename1) 58 | img2, kp2, des2 = compute_orb_keypoints(filename2) 59 | 60 | # compute keypoint matches using descriptor 61 | matches = brute_force_matcher(des1, des2) 62 | 63 | # extract points 64 | pts1 = [] 65 | pts2 = [] 66 | for i,(m) in enumerate(matches): 67 | if m.distance < 20: 68 | #print(m.distance) 69 | pts2.append(kp2[m.trainIdx].pt) 70 | pts1.append(kp1[m.queryIdx].pt) 71 | pts1 = np.asarray(pts1) 72 | pts2 = np.asarray(pts2) 73 | 74 | # Compute fundamental matrix 75 | F, mask = cv2.findFundamentalMat(pts1,pts2,cv2.FM_8POINT) 76 | return F 77 | 78 | 79 | def main(): 80 | # read list of images form dir in sorted order 81 | image_dir = '/Users/mac/Documents/dinoRing/' 82 | file_list = sorted(glob.glob(image_dir+'*.png')) 83 | 84 | #compute F matrix between two images 85 | print(compute_fundamental_matrix(file_list[0], file_list[2])) 86 | 87 | 88 | 89 | if __name__ == '__main__': 90 | main() 91 | 92 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # Practical Computer Vision 5 | This is the code repository for [Practical Computer Vision](https://www.packtpub.com/big-data-and-business-intelligence/practical-computer-vision?utm_source=github&utm_medium=repository&utm_campaign=9781788297684), published by [Packt](https://www.packtpub.com/?utm_source=github). It contains all the supporting project files necessary to work through the book from start to finish. 6 | ## About the Book 7 | In this book, you will find several recently proposed methods in various domains of computer vision. You will start by setting up the proper Python environment to work on practical applications. This includes setting up libraries such as OpenCV, TensorFlow, and Keras using Anaconda. Using these libraries, you'll start to understand the concepts of image transformation and filtering. You will find a detailed explanation of feature detectors such as FAST and ORB; you'll use them to find similar-looking objects. 8 | 9 | With an introduction to convolutional neural nets, you will learn how to build a deep neural net using Keras and how to use it to classify the Fashion-MNIST dataset. With regard to object detection, you will learn the implementation of a simple face detector as well as the workings of complex deep-learning-based object detectors such as Faster R-CNN and SSD using TensorFlow. You'll get started with semantic segmentation using FCN models and track objects with Deep SORT. Not only this, you will also use Visual SLAM techniques such as ORB-SLAM on a standard dataset. 10 | 11 | By the end of this book, you will have a firm understanding of the different computer vision techniques and how to apply them in your applications. 12 | 13 | ## Instructions and Navigation 14 | All of the code is organized into folders. Each folder starts with a number followed by the application name. For example, Chapter02. 15 | 16 | 17 | 18 | The code will look like the following: 19 | ``` 20 | import numpy as np 21 | import matplotlib.pyplot as plt 22 | import cv2 23 | ``` 24 | 25 | The list of software needed for this book is as follows: 26 | Anaconda distribution v5.0.1 27 | OpenCV v3.3.0 28 | TensorFlow v1.4.0 29 | Keras v2.1.2 30 | 31 | To run all of the code effectively, Ubuntu 16.04 is preferable, with Nvidia GPU and at least 4 GB of RAM. The code will also run without GPU support. 32 | 33 | ## Related Products 34 | * [Mastering OpenCV with Practical Computer Vision Projects](https://www.packtpub.com/application-development/mastering-opencv-practical-computer-vision-projects?utm_source=github&utm_medium=repository&utm_campaign=9781849517829) 35 | 36 | * [OpenCV 3 Computer Vision with Python Cookbook](https://www.packtpub.com/application-development/opencv-3-computer-vision-python-cookbook?utm_source=github&utm_medium=repository&utm_campaign=9781788474443) 37 | 38 | * [Practical Industrial Internet of Things Security](https://www.packtpub.com/business/practical-industrial-internet-things-security?utm_source=github&utm_medium=repository&utm_campaign=9781788832687) 39 | 40 | ### Suggestions and Feedback 41 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSe5qwunkGf6PUvzPirPDtuy1Du5Rlzew23UBp2S-P3wB-GcwQ/viewform) if you have any feedback or suggestions. 42 | ### Download a free PDF 43 | 44 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
45 |

https://packt.link/free-ebook/9781788297684

--------------------------------------------------------------------------------