├── .github
    ├── issues_labeler.yml
    └── workflows
    │   ├── greetings.yml
    │   └── issues_labeler.yml
├── .gitignore
├── 01. Basic operations
    ├── drawingOnImage.py
    ├── gettingStarted.py
    ├── gettingStartedWithVideos.py
    ├── mouseEvents.py
    ├── savingVideosFromCamera.py
    ├── usingMatplotlib.py
    └── usingSkimage.py
├── 02. Core operations
    ├── README.md
    ├── arithmaticOperations.py
    ├── basicImageMerge.py
    ├── binaryThresholding.py
    ├── cropping.py
    └── resizing.py
├── 03. Edge Detection
    ├── CannyEdgeRealTime.py
    ├── README.md
    └── cannyEdgeDetection.py
├── 04. Image Filter
    ├── SkImageFilter.py
    ├── bilateralFilter.py
    └── colorFiltering.py
├── 05. Corner Detection
    └── cornerDetection.py
├── 06. Background subtraction
    ├── liveBackgroundSubtraction.py
    ├── mogBackgroundDetection.py
    └── runningAverage.py
├── 07. Face Detection
    ├── README.md
    ├── blurTheFace.py
    ├── faceDetectionDNN.py
    ├── faceDetectionHaarCascade.py
    ├── realTimeFaceDetection.py
    ├── realTimeFaceDetectionDNN.py
    └── smileDetection.py
├── 08. Object Detection
    ├── README.md
    └── ojectDetectionCVLIB.py
├── 09. Template Matching
    ├── README.md
    └── portMatching.py
├── 10. Invisible_Cloak
    └── invisiblecloak.py
├── 11. Optical Flow
    └── opticalFlow.py
├── 12. Blob Detection
    └── blobDetection.py
├── 13. contouring
    ├── README.md
    ├── contouring.py
    ├── liveContourDetection.py
    └── shapeDetection.py
├── 14. ImageOperations
    ├── colvolutionaFeature.py
    ├── filterVGG16.py
    ├── imageEnocdingDecoding.py
    └── simpleImageOperations.py
├── 15. VirtualPen
    └── README.md
├── 16. EyeBall Tracking
    └── README.md
├── 17. Color Trackbar
    └── colorTrackbar.py
├── 18. SIFT Feature Extraction
    ├── README.md
    ├── basic.py
    └── compareFeatures.py
├── 19. Hog Feature Extraction
    ├── README.md
    └── featureExtractor.py
├── 20. Image Segmentation
    ├── KmeansImageSegmentation.py
    ├── README.md
    └── waterShedAlgorithm.py
├── 21. Facial Recognition
    ├── FaceRec.py
    ├── README.md
    └── images
    │   └── obama.jpg
├── 22. Optical Character Recognition
    └── README.md
├── 23. PixelLib Segmentation
    ├── README.md
    └── instanceSegmentationExample.py
├── 24. Road Lane Detection
    ├── README.md
    └── laneDetection.py
├── CONTRIBUTING.md
├── LICENSE
├── Media
    ├── Man_United.jpeg
    ├── Shape_Detected.png
    ├── Shapes.png
    ├── apple.jpeg
    ├── bnw.jfif
    ├── book.png
    ├── book_on_table.jpeg
    ├── coins.jpg
    ├── coins_hog.png
    ├── corner_detection.jpg
    ├── cropped image1.png
    ├── edge-detection.jpg
    ├── face-001.jpg
    ├── face-detected-dnn.jpeg
    ├── face-detected.jpeg
    ├── nature.png
    ├── nature_output.png
    ├── opencv-logo-white.png
    ├── pieboard-templatematching.jpg
    ├── port-detected.jpeg
    ├── port-templatematching.jpg
    ├── road.jpg
    ├── road_segmentation.jpg
    ├── sample.jpeg
    ├── sample2.jpeg
    ├── thumbs_up_down.jpg
    └── thumbs_up_down_countour.jpg
├── README.md
├── _config.yml
├── assets
    ├── deploy.prototxt.txt
    ├── haarcascade_eye.xml
    ├── haarcascade_frontalface_default.xml
    └── haarcascade_smile.xml
├── requirements.txt
└── utils.py


/.github/issues_labeler.yml:
--------------------------------------------------------------------------------
 1 | APIs:
 2 |   - "(api|APIs|apis|API|APIS)"
 3 | BASH:
 4 |   - "(Bash|bash|BASH)"
 5 | javascript:
 6 |   - "(JavaScript|JAVASCRIPT|Javascript|javascript|JS|js|Js)"
 7 | python:
 8 |   - "(Python|PYTHON|python)"
 9 | enhancement:
10 |   - "(enhancement|Enhancement|enhance)"
11 | difficulty-easy:
12 |   - "(Easy|easy)"  
13 | difficulty-medium:
14 |   - "(Medium|medium)"
15 | difficulty-hard:
16 |   - "(Hard|hard)"
17 | git:
18 |   - "(Git|git|GIT)"
19 | github-actions:
20 |   - "(GitHub actions|GIT actions|github actions)"
21 | hacktoberfest:
22 |   - "(hacktoberfest|Hacktoberfest|Hacktober fest|Hacktoberfest2020)"
23 | bug:
24 |   - "(bug|Bug|BUG)"  
25 | up-for-grab:
26 |   - "(up for grab)"  
27 | urgent:
28 |   - "(urgent|URGENT|Urgent)"  
29 | documentation:
30 |   - "(documentation|Documentation)"
31 | no-Code:
32 |   - "(No Code)"  
33 | 


--------------------------------------------------------------------------------
/.github/workflows/greetings.yml:
--------------------------------------------------------------------------------
 1 | name: Greetings
 2 | 
 3 | on: [pull_request_target, issues]
 4 | 
 5 | jobs:
 6 |   greeting:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |     - uses: actions/first-interaction@v1
10 |       with:
11 |         repo-token: "${{ secrets.GITHUB_TOKEN }}"
12 |         issue-message: 'Hey @${{ github.actor }}, congratulations!! 🎉 for creating your first issue. Wait for the admin approval now and then you can go ahead to solve the issue. Do give a star ⭐ if you like this project. '
13 |         pr-message: 'Congratulations!! 🎉 @${{ github.actor }} for making your first PR. Admin will review the changes soon and merge finally.😊 Do give a star ⭐ if you like this project. '
14 | 


--------------------------------------------------------------------------------
/.github/workflows/issues_labeler.yml:
--------------------------------------------------------------------------------
 1 | name: "Issue Labeler"
 2 | on:
 3 |   issues:
 4 |     types: [opened, edited]
 5 | 
 6 | jobs:
 7 |   triage:
 8 |     name: Autmomate Issue
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: github/issue-labeler@v2.0
12 |         with:
13 |           repo-token: "${{ secrets.GITHUB_TOKEN }}"
14 |           configuration-path: .github/issues_labeler.yml
15 |           enable-versioned-regex: 0
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Unit test / coverage reports
 7 | htmlcov/
 8 | .tox/
 9 | .nox/
10 | .coverage
11 | .coverage.*
12 | .cache
13 | nosetests.xml
14 | coverage.xml
15 | *.cover
16 | *.py,cover
17 | .hypothesis/
18 | .pytest_cache/
19 | 
20 | # Translations
21 | *.mo
22 | *.pot
23 | 
24 | # Jupyter Notebook
25 | .ipynb_checkpoints
26 | 
27 | # IPython
28 | profile_default/
29 | ipython_config.py
30 | 
31 | # pyenv
32 | .python-version
33 | 
34 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
35 | __pypackages__/
36 | 
37 | # Environments
38 | .env
39 | .venv
40 | env/
41 | venv/
42 | ENV/
43 | env.bak/
44 | venv.bak/
45 | 
46 | # others
47 | test.py
48 | *.h5
49 | *.caffemodel
50 | *.mp4
51 | inference_model/


--------------------------------------------------------------------------------
/01. Basic operations/drawingOnImage.py:
--------------------------------------------------------------------------------
 1 | """ Drawing function in Opencv """
 2 | 
 3 | import cv2
 4 | 
 5 | img = cv2.imread("./Media/apple.jpeg")
 6 | if img is not None:
 7 |     """ It will be create a line from (10, 10) to (100, 100)"""
 8 |     img = cv2.line(img,
 9 |                    pt1=(10, 10),
10 |                    pt2=(180, 100),
11 |                    color=(255, 255, 255),
12 |                    thickness=2)
13 | 
14 |     img = cv2.arrowedLine(img,
15 |                           pt1=(20, 20),
16 |                           pt2=(300, 300),
17 |                           color=(0, 255, 255),
18 |                           thickness=2)
19 | 
20 |     img = cv2.rectangle(img,
21 |                         pt1=(250, 0),
22 |                         pt2=(450, 250),
23 |                         color=(0, 255, 0),
24 |                         thickness=2)
25 | 
26 |     img = cv2.circle(img,
27 |                      center=(100, 100),
28 |                      radius=50,
29 |                      color=(255, 0, 255),
30 |                      thickness=-1)
31 | 
32 |     cv2.imshow("output", img)
33 | else:
34 |     print('file not found.')
35 | 
36 | cv2.waitKey(0)
37 | cv2.destroyAllWindows()
38 | 


--------------------------------------------------------------------------------
/01. Basic operations/gettingStarted.py:
--------------------------------------------------------------------------------
 1 | ''' Getting started with opencv
 2 | 
 3 | Reading and saving image with opencv using waitKey
 4 | '''
 5 | import cv2
 6 | 
 7 | #load an color image in grayscale color
 8 | img = cv2.imread('./Media/sample.jpeg',0)
 9 | 
10 | cv2.imshow('image', img)
11 | k = cv2.waitKey(0) & 0xFF
12 | 
13 | # esc key to exit
14 | if k == 27:
15 |     cv2.destroyAllWindows()
16 | 
17 | # s key to save and exit
18 | elif k == ord('s'):
19 |     cv2.imwrite("./Media/sample2.jpeg", img)
20 |     cv2.destroyAllWindows()
21 | 


--------------------------------------------------------------------------------
/01. Basic operations/gettingStartedWithVideos.py:
--------------------------------------------------------------------------------
 1 | ''' Getting started with Videos in Opencv'''
 2 | import cv2
 3 | 
 4 | cap = cv2.VideoCapture(0)
 5 | 
 6 | while True:
 7 |     # capture frame by frame
 8 |     check, frame = cap.read()
 9 |     if check:
10 |         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
11 | 
12 |         # display the result
13 |         cv2.imshow('frame', gray)
14 | 
15 |         # break the while loop by 'q'
16 |         if cv2.waitKey(1) & 0xFF == ord('q'):
17 |             break
18 | 
19 | #release the cap
20 | cap.release()
21 | cv2.destroyAllWindows()
22 | 


--------------------------------------------------------------------------------
/01. Basic operations/mouseEvents.py:
--------------------------------------------------------------------------------
 1 | ''' Mouse Events in OpenCV '''
 2 | 
 3 | import cv2
 4 | import numpy as np
 5 | 
 6 | # mouse callback function
 7 | def draw_circle(event, x, y, flags, param):
 8 |     if event == cv2.EVENT_LBUTTONDBLCLK:
 9 |         cv2.circle(img, (x,y), 100, (255,0,0), -1)
10 | 
11 | # create a black image, a windows and bind the function to window
12 | img = np.zeros((512,512,3), np.uint8)
13 | cv2.namedWindow('image')
14 | cv2.setMouseCallback('image', draw_circle)
15 | 
16 | while(1):
17 |     cv2.imshow('image', img)
18 |     if cv2.waitKey(20) & 0xFF == 27:
19 |         break
20 | cv2.destroyAllWindows()
21 | 


--------------------------------------------------------------------------------
/01. Basic operations/savingVideosFromCamera.py:
--------------------------------------------------------------------------------
 1 | ''' Saving videos from Camera '''
 2 | import cv2
 3 | 
 4 | cap = cv2.VideoCapture(0)
 5 | 
 6 | #define codec and create VideoWriter object
 7 | fourcc = cv2.VideoWriter_fourcc(*'XVID')
 8 | out = cv2.VideoWriter("./Media/output.avi", fourcc, 20.0, (640,480))
 9 | 
10 | while(cap.isOpened()):
11 |     ret, frame = cap.read()
12 |     if ret:
13 |         frame = cv2.flip(frame,0)
14 | 
15 |         # write the filpped frame
16 |         out.write(frame)
17 | 
18 |         cv2.imshow('video',frame)
19 | 
20 |         if cv2.waitKey(1) & 0xFF == ord('q'):
21 |             break
22 |     else:
23 |         break
24 | 
25 | # release video capture
26 | cap.release()
27 | out.release()
28 | cv2.destroyAllWindows()
29 | 


--------------------------------------------------------------------------------
/01. Basic operations/usingMatplotlib.py:
--------------------------------------------------------------------------------
 1 | ''' Showing output using matplotlib '''
 2 | 
 3 | import cv2
 4 | from matplotlib import pyplot as plt
 5 | 
 6 | img = cv2.imread('./Media/sample.jpeg', 0)
 7 | 
 8 | plt.imshow(img, cmap="gray")
 9 | plt.title('Sample Image')
10 | plt.xticks([])
11 | plt.yticks([])
12 | plt.show()
13 | 


--------------------------------------------------------------------------------
/01. Basic operations/usingSkimage.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from skimage import io
 3 | 
 4 | # reading the image
 5 | img = cv2.imread('./Media/sample.jpeg')
 6 | 
 7 | # changing image from BGR to RGB for correct output
 8 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 9 | 
10 | # showing output
11 | io.imshow(img)
12 | io.show()
13 | 


--------------------------------------------------------------------------------
/02. Core operations/README.md:
--------------------------------------------------------------------------------
 1 | # Image Cropper  
 2 | This code allows you to crop images dynamically and save them. Mouse Events in OpenCV will be used to achieve this.  
 3 | 
 4 | ### Executing the code  
 5 | 1. Run the .py file by running the command ``` python3 crop.py``` in the terminal or cmd.  
 6 | 2. Use the **Left** mouse button to drag out a rectangular region of the image you want to crop. **Release** the button, once you are done.  
 7 | 3. The selected rectangular is shown on the image. You can press **r** to reset your selection. 
 8 | 4. Press **c** to crop the image. A new window opens up.  
 9 |   a) Press **s** to save the cropped image.  
10 |   b) Press **r** to reset and return to the original image.  
11 | 5. Repeat from step 2 to crop more images.  
12 | 
13 | ### Demo  
14 | ![](https://github.com/Pranjalmishra30/OpenCV-Rep/blob/master/Mini-Projects/Cropping_Images/Data/crop-DEMO.gif)  
15 | 
16 | ### Refrences  
17 | 1. Mouse events [tutorial](https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_gui/py_mouse_handling/py_mouse_handling.html)  
18 | 2. Pyimagesearch [tutorial](https://www.pyimagesearch.com/2015/03/09/capturing-mouse-click-events-with-python-and-opencv/)  
19 | 


--------------------------------------------------------------------------------
/02. Core operations/arithmaticOperations.py:
--------------------------------------------------------------------------------
 1 | """ Adding two image using addWeighted() """
 2 | import cv2
 3 | 
 4 | img1 = cv2.imread('./Media/sample.jpeg')
 5 | img2 = cv2.imread('./Media/opencv-logo-white.png')
 6 | 
 7 | # resizing the image for arithmatic operations
 8 | img1 = cv2.resize(img1, (500, 500))
 9 | img2 = cv2.resize(img2, (500, 500))
10 | 
11 | output_img = cv2.addWeighted(img1, 0.7, img2, 0.3, 0)
12 | 
13 | cv2.imshow("final image", output_img)
14 | if cv2.waitKey(0) == ord('q'):
15 |     cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/02. Core operations/basicImageMerge.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | from matplotlib import pyplot as plt
 4 | 
 5 | BLUE = [255, 0, 0]
 6 | 
 7 | img1 =  cv2.imread("./Media/opencv-logo-white.png")
 8 | 
 9 | replicate = cv2.copyMakeBorder(img1, 20, 20, 20, 20, cv2.BORDER_REPLICATE)
10 | reflect = cv2.copyMakeBorder(img1, 20, 20, 20, 20, cv2.BORDER_REFLECT)
11 | reflect101 = cv2.copyMakeBorder(img1, 20, 20, 20, 20, cv2.BORDER_REFLECT_101)
12 | wrap = cv2.copyMakeBorder(img1, 20, 20, 20, 20, cv2.BORDER_WRAP)
13 | constant = cv2.copyMakeBorder(img1, 20, 20, 20, 20, cv2.BORDER_CONSTANT, value= BLUE)
14 | 
15 | plt.subplot(321),plt.imshow(img1, 'gray'),plt.title('original')
16 | plt.subplot(322),plt.imshow(replicate, 'gray'),plt.title('replicate')
17 | plt.subplot(323),plt.imshow(reflect, 'gray'),plt.title('reflect')
18 | plt.subplot(324),plt.imshow(reflect101, 'gray'),plt.title('reflect101')
19 | plt.subplot(325),plt.imshow(wrap, 'gray'),plt.title('wrap')
20 | plt.subplot(326),plt.imshow(constant, 'gray'),plt.title('constant')
21 | 
22 | plt.show()


--------------------------------------------------------------------------------
/02. Core operations/binaryThresholding.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | from matplotlib import pyplot as plt
 4 | 
 5 | img = cv2.imread('./Media/bnw.jfif')
 6 | gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
 7 | 
 8 | img = cv2.medianBlur(img, 5)
 9 | 
10 | ret, th1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
11 | 
12 | plt.imshow(th1, 'gray')
13 | plt.title('Binary Thresholding')
14 | plt.xticks([])
15 | plt.yticks([])
16 | plt.show()


--------------------------------------------------------------------------------
/02. Core operations/cropping.py:
--------------------------------------------------------------------------------
 1 | """ Image cropping using opencv """
 2 | 
 3 | import cv2
 4 | refpt = [] #List of refrence points
 5 | 
 6 | def select_roi (event, x, y, flags, param):
 7 |     global refpt #Global refrences
 8 | 
 9 |     if event == cv2.EVENT_LBUTTONDOWN: # When the left mouse button is clicked
10 |         refpt = [(x,y)]
11 | 
12 |     elif event == cv2.EVENT_LBUTTONUP: # When the left mouse button is released
13 |         refpt.append((x,y)) # recording the last coordinates
14 |         cv2.rectangle(img_main,refpt[0],refpt[1],(0,255,0),2)
15 |         cv2.imshow("frame",img_main)
16 |         print("Selection Successful")    
17 | 
18 | img = cv2.imread("Data/Man_United.jpeg")
19 | img_main = cv2.resize(img,(400,400)) #Resizing image
20 | 
21 | clone = img_main.copy() # To reset the image after cropping
22 | clone2 = img_main.copy() # To crop a section out without affecting the original image
23 | 
24 | cv2.namedWindow("frame")
25 | cv2.setMouseCallback("frame", select_roi)
26 | 
27 | i=1 # Numbering for saving images
28 | 
29 | while True:
30 |     cv2.imshow("frame", img_main)
31 |     var = cv2.waitKey(0)
32 | 
33 |     '''
34 |     Instructions
35 |     - Select a region , then press c to crop that portion.
36 |     - Press r to reset your selection.
37 |     - In the Crop mode , press s to save your cropped image or press r to reset selection
38 |     - Press q to exit the program.
39 |     '''
40 | 
41 |     if var == ord('c'): # Crop selected images
42 |         
43 |         if len(refpt)==2:
44 |             roi = clone2[refpt[0][1]:refpt[1][1], refpt[0][0]:refpt[1][0]] # [x1:x2 , y1:y2]
45 |             cv2.namedWindow("Crop")
46 |             cv2.imshow("Crop",roi)
47 |             print("Cropped")
48 | 
49 |             var2 = cv2.waitKey(0)
50 | 
51 |             if var2 == ord('s'): # Saving cropped image
52 |                 cv2.imwrite("Data/cropped image{}.png".format(i),roi) # Name of the saved image 
53 |                 i=i+1
54 |                 print("image saved\n")
55 |                 cv2.destroyWindow("Crop") 
56 |                 img_main = clone.copy()
57 |                 
58 |             elif var2 == ord('r'): # Reset
59 |                 cv2.destroyWindow("Crop")
60 |                 print("Reset\n")
61 |                 img_main = clone.copy()
62 |     
63 |     elif var == ord('r'): # Reset
64 |         print("Reset\n")
65 |         img_main = clone.copy()
66 |                            
67 |     elif var == ord('q'): # Exit the loop
68 |         print("Exiting ...")
69 |         break
70 | 
71 | cv2.destroyAllWindows()
72 | 


--------------------------------------------------------------------------------
/02. Core operations/resizing.py:
--------------------------------------------------------------------------------
 1 | """ Image Resizing
 2 | 
 3 | - cv2.INTER_AREA: This is used when we need to shrink an image.
 4 | - cv2.INTER_CUBIC: This is slow but more efficient.
 5 | - cv2.INTER_LINEAR: This is primarily used when zooming is required. This is the default interpolation technique in OpenCV.
 6 | 
 7 | https://www.geeksforgeeks.org/image-resizing-using-opencv-python/
 8 | """
 9 | 
10 | import cv2
11 | import numpy as np
12 | from matplotlib import pyplot as plt
13 | 
14 | img = cv2.imread('./Media/Man_United.jpeg')
15 | 
16 | if img is not None:
17 |     interpolation_area = cv2.resize(img,
18 |                                (480, 480),
19 |                                interpolation = cv2.INTER_AREA)
20 |     
21 |     interpolation_nearest = cv2.resize(img,
22 |                                (480, 480),
23 |                                interpolation=cv2.INTER_NEAREST)
24 |     
25 |     bigger = cv2.resize(img,
26 |                         (2048, 2048))
27 |     titles = ['Original', 'INTER_AREA', "INTER_NEAREST", "bigger"]
28 |     images = [img, interpolation_area, interpolation_nearest, bigger]
29 |     count = 4
30 | 
31 |     for i in range(count):
32 |         plt.subplot(2, 2, i+1)
33 |         plt.title(titles[i])
34 |         plt.imshow(images[i])
35 |     plt.show()
36 | else:
37 |     print("Check the file path again.")


--------------------------------------------------------------------------------
/03. Edge Detection/CannyEdgeRealTime.py:
--------------------------------------------------------------------------------
 1 | """ Real time Edge detection """
 2 | import cv2
 3 | import numpy as np
 4 | 
 5 | cap = cv2.VideoCapture(0)
 6 | 
 7 | while True:
 8 | 
 9 |     check, frame = cap.read()
10 |     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
11 | 
12 |     if check:
13 |         # applying canny edge transformation
14 |         edges = cv2.Canny(gray, threshold1=30, threshold2=100)
15 | 
16 |         # showing the output frame
17 |         cv2.imshow('Original',frame)
18 |         cv2.imshow('Edges', edges)
19 | 
20 |         if cv2.waitKey(5) & 0xFF == 27:
21 |             break
22 | 
23 | cap.release()
24 | cv2.destroyAllWindows()
25 | 


--------------------------------------------------------------------------------
/03. Edge Detection/README.md:
--------------------------------------------------------------------------------
1 | ## Edge Detection
2 | 
3 | **Canny Edge Detection**- The Canny edge detector is an edge detection operator that uses a multi-stage algorithm to detect a wide range of edges in images. It was developed by John F. Canny in 1986. Canny also produced a computational theory of edge detection explaining why the technique works.
4 | 


--------------------------------------------------------------------------------
/03. Edge Detection/cannyEdgeDetection.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Edge detection with canny algo
 3 | edges = cv2.Canny('/path/to/img', threshold1, threshold2, apertureSize, L2gradient)
 4 | 
 5 | L2gradient: Its default value is false, if value is true, Canny () uses a more computationally expensive equation to detect edges,
 6 | which provides more accuracy at the cost of resources.
 7 | '''
 8 | import cv2
 9 | 
10 | img = cv2.imread(r'./Media/face-001.jpg')
11 | gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
12 | 
13 | # applying canny edge transformations
14 | edges = cv2.Canny(gray_img, threshold1=30, threshold2=100)
15 | 
16 | # showing the output frame
17 | cv2.imshow("Edge Detected Image", edges)
18 | 
19 | k = cv2.waitKey(0) & 0xFF
20 | 
21 | if k == 27:
22 |     cv2.destroyAllWindows()
23 | elif k == ord('s'):
24 |     cv2.imwrite("./Media/edge-detection.jpg", edges)
25 |     cv2.destroyAllWindows()
26 | 


--------------------------------------------------------------------------------
/04. Image Filter/SkImageFilter.py:
--------------------------------------------------------------------------------
1 | from skimage import data, io, filters
2 | 
3 | image = data.coins()
4 | # ... or any other NumPy array!
5 | edges = filters.sobel(image)
6 | io.imshow(edges)
7 | io.show()
8 | 


--------------------------------------------------------------------------------
/04. Image Filter/bilateralFilter.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | OpenCV provides the bilateralFilter() function to apply the bilateral filter on the image. The bilateral filter can reduce unwanted noise very 
 3 | well while keeping edges sharp. The syntax of the function is given below
 4 | 
 5 | src- It denotes the source of the image. It can be an 8-bit or floating-point, 1-channel image.
 6 | dst- It denotes the destination image of the same size. Its type will be the same as the src image.
 7 | d - It denotes the diameter of the pixel neighborhood (integer type) that is used during filtering. If its value is negative, then it is computed from sigmaSpace.
 8 | sigmaColor - It denotes the filter sigma in the color space.
 9 | sigmaSpace - It denotes the filter sigma in the coordinate space.
10 | '''
11 | 
12 | import cv2
13 | import numpy as np
14 | from matplotlib import pyplot as plt
15 | 
16 | img = cv2.imread('./Media/sample.jpeg',1)
17 | 
18 | blur = cv2.bilateralFilter(img,9,75,75)
19 | 
20 | plt.subplot(121),plt.imshow(img),plt.title('Original')
21 | plt.xticks([]), plt.yticks([])
22 | plt.subplot(122),plt.imshow(blur),plt.title('Bilateral Filter')
23 | plt.xticks([]), plt.yticks([])
24 | plt.show()
25 | cv2.waitKey(0) & 0xFF
26 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/04. Image Filter/colorFiltering.py:
--------------------------------------------------------------------------------
 1 | ''' color filtering in live videos using color thresholding '''
 2 | 
 3 | import cv2
 4 | import numpy as np
 5 | 
 6 | cap = cv2.VideoCapture(0)
 7 | 
 8 | while True:
 9 |     cam, frame = cap.read()
10 |     if cam is True:
11 |         hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
12 | 
13 |         lower_blue = np.array([101,50,38])
14 |         upper_blue = np.array([110,255,255])
15 | 
16 |         lower_red = np.array([160,20,70])
17 |         upper_red = np.array([190,255,255])
18 | 
19 |         lower_green = np.array([36, 25, 25])
20 |         upper_green = np.array([86, 255,255])
21 | 
22 |         mask_blue = cv2.inRange(hsv, lower_blue, upper_blue)
23 |         res_blue = cv2.bitwise_and(frame, frame, mask=mask_blue)
24 | 
25 |         mask_red = cv2.inRange(hsv, lower_red, upper_red)
26 |         res_red = cv2.bitwise_and(frame, frame, mask=mask_red)
27 | 
28 |         mask_green = cv2.inRange(hsv, lower_green, upper_green)
29 |         res_green = cv2.bitwise_and(frame, frame, mask=mask_green)
30 | 
31 |         cv2.imshow('frame', frame)
32 |         cv2.imshow('Blue', res_blue)
33 |         cv2.imshow('Red', res_red)
34 |         cv2.imshow('green', res_green)
35 |         
36 |         k = cv2.waitKey(5) & 0xFF
37 |         if k == 27:
38 |             break
39 | 
40 | cap.release()
41 | cv2.destroyAllWindows()
42 | 


--------------------------------------------------------------------------------
/05. Corner Detection/cornerDetection.py:
--------------------------------------------------------------------------------
 1 | ''' 
 2 | Corner detection in Python 
 3 | 
 4 | '''
 5 | import cv2
 6 | import numpy as np
 7 | 
 8 | img = cv2.imread('./Media/corner_detection.jpg')
 9 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
10 | gray = np.float32(gray)
11 | 
12 | corners = cv2.goodFeaturesToTrack(gray, 100, 0.01, 10)
13 | corners = np.int0(corners)
14 | 
15 | for corner in corners:
16 |     x, y = corner.ravel()
17 |     cv2.circle(img,(x,y), 3, 255, -1)
18 | 
19 |     cv2.imshow('Corner', img)
20 | 
21 | cv2.waitKey(0) & 0xFF
22 | cv2.destroyAllWindows()
23 | 


--------------------------------------------------------------------------------
/06. Background subtraction/liveBackgroundSubtraction.py:
--------------------------------------------------------------------------------
 1 | """ Live Background subtraction using opencv """
 2 | 
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | cap = cv2.VideoCapture(0)
 7 | fgbg = cv2.createBackgroundSubtractorKNN()
 8 | 
 9 | while True:
10 |     ret, frame = cap.read()
11 |     if ret:    
12 |         fgmask = fgbg.apply(frame)
13 |         
14 |         cv2.imshow('frame', fgmask)
15 | 
16 |         k = cv2.waitKey(5) & 0xFF
17 |         if k == 27:
18 |             break
19 | 
20 | cap.release()
21 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/06. Background subtraction/mogBackgroundDetection.py:
--------------------------------------------------------------------------------
 1 | """ Background subtraction using opencv """
 2 | 
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | cap = cv2.VideoCapture('./Media/people-walking.mp4')
 7 | fgbg = cv2.createBackgroundSubtractorMOG2()
 8 | 
 9 | while(1):
10 |     ret, frame = cap.read()
11 | 
12 |     fgmask = fgbg.apply(frame)
13 |     
14 |     cv2.imshow('frame', fgmask)
15 | 
16 |     k = cv2.waitKey(5) & 0xFF
17 |     if k == 27:
18 |         break
19 | 
20 | cap.release()
21 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/06. Background subtraction/runningAverage.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | cap = cv2.VideoCapture(0)
 5 | 
 6 | # read the frame from the camera
 7 | _, frame = cap.read()
 8 | 
 9 | # converting data type to float32
10 | averageValue = np.float32(frame)
11 | 
12 | while True:
13 |     # read the frame from camera
14 |     _, frame = cap.read()
15 | 
16 |     # accumulateWeighted used to update the running weights
17 |     cv2.accumulateWeighted(frame, averageValue, 0.02)
18 | 
19 |     resultingFrame = cv2.convertScaleAbs(averageValue)
20 | 
21 |     cv2.imshow('Original Window', frame)
22 |     cv2.imshow('averageValue', resultingFrame)
23 | 
24 |     k = cv2.waitKey(30) & 0xff
25 |     if k == 27: 
26 |         break
27 | 
28 | cap.release()
29 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/07. Face Detection/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/07. Face Detection/README.md


--------------------------------------------------------------------------------
/07. Face Detection/blurTheFace.py:
--------------------------------------------------------------------------------
 1 | ''' Real time Face bluring using webcam '''
 2 | 
 3 | import cv2
 4 | import numpy as np
 5 | import time
 6 | 
 7 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt
 8 | prototxt_path = "./assets/deploy.prototxt.txt"
 9 | # https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180205_fp16/res10_300x300_ssd_iter_140000_fp16.caffemodel 
10 | model_path = "./assets/res10_300x300_ssd_iter_140000.caffemodel"
11 | 
12 | # load Caffe model
13 | model = cv2.dnn.readNetFromCaffe(prototxt_path, model_path)
14 | 
15 | cap = cv2.VideoCapture(0)
16 | 
17 | while True:
18 |     start = time.time()
19 |     _, image = cap.read()
20 |     # get width and height of the image
21 |     h, w = image.shape[:2]
22 |     kernel_width = (w // 7) | 1
23 |     kernel_height = (h // 7) | 1
24 |     # preprocess the image: resize and performs mean subtraction
25 |     blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), (104.0, 177.0, 123.0))
26 |     # set the image into the input of the neural network
27 |     model.setInput(blob)
28 |     # perform inference and get the result
29 |     output = np.squeeze(model.forward())
30 |     for i in range(0, output.shape[0]):
31 |         confidence = output[i, 2]
32 |         # get the confidence
33 |         # if confidence is above 40%, then blur the bounding box (face)
34 |         if confidence > 0.4:
35 |             # get the surrounding box cordinates and upscale them to original image
36 |             box = output[i, 3:7] * np.array([w, h, w, h])
37 |             # convert to integers
38 |             start_x, start_y, end_x, end_y = box.astype(np.int)
39 |             # get the face image
40 |             face = image[start_y: end_y, start_x: end_x]
41 |             # apply gaussian blur to this face
42 |             face = cv2.GaussianBlur(face, (kernel_width, kernel_height), 0)
43 |             # put the blurred face into the original image
44 |             image[start_y: end_y, start_x: end_x] = face
45 |     cv2.imshow("image", image)
46 |     if cv2.waitKey(1) == ord("q"):
47 |         break
48 |     time_elapsed = time.time() - start
49 |     fps = 1 / time_elapsed
50 |     print("FPS:", fps)
51 | 
52 | cv2.destroyAllWindows()
53 | cap.release()


--------------------------------------------------------------------------------
/07. Face Detection/faceDetectionDNN.py:
--------------------------------------------------------------------------------
 1 | """ Face detection using deep learning and opencv """
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | # Global Declarations
 6 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt
 7 | prototxt_path = "./assets/deploy.prototxt.txt"
 8 | # https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180205_fp16/res10_300x300_ssd_iter_140000_fp16.caffemodel 
 9 | model_path = "./assets/res10_300x300_ssd_iter_140000.caffemodel"
10 | 
11 | confThresh=0.8
12 | net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path)
13 | 
14 | def detectFace(imgPath):
15 |     img = cv2.imread(imgPath)
16 | 
17 |     (h, w) = img.shape[:2]
18 |     blob = cv2.dnn.blobFromImage(cv2.resize(img, (300, 300)), 1.0,(300, 300), (104.0, 177.0, 123.0))
19 |     
20 |     net.setInput(blob)
21 |     detections = net.forward()
22 |     for i in range(0, detections.shape[2]):
23 |         confidence = detections[0, 0, i, 2]
24 |         if confidence < confThresh:
25 |             continue
26 | 
27 |         box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
28 |         (startX, startY, endX, endY) = box.astype("int")
29 |         y = startY - 10 if startY - 10 > 10 else startY + 10
30 |         cv2.rectangle(img, (startX, startY), (endX, endY),(0, 0, 255), 2)
31 |         
32 |     cv2.imshow("Output", img)
33 |     key = cv2.waitKey(0)
34 |     if key == 27:
35 |         cv2.destroyAllWindows()
36 |     elif key == ord('s'):
37 |         cv2.imwrite('./Media/face-detected-dnn.jpeg', img)
38 |         cv2.destroyAllWindows()
39 | 
40 | 
41 | 
42 | path_img = './Media/face-001.jpg'
43 | detectFace(path_img)
44 | 


--------------------------------------------------------------------------------
/07. Face Detection/faceDetectionHaarCascade.py:
--------------------------------------------------------------------------------
 1 | """ Face detection using haarcascade_frontalface and eye classifier """
 2 | 
 3 | import cv2
 4 | 
 5 | # Path
 6 | face_cascade = cv2.CascadeClassifier('./assets/haarcascade_frontalface_default.xml')
 7 | eye_cascade = cv2.CascadeClassifier('./assets/haarcascade_eye.xml')
 8 | 
 9 | 
10 | def detectedFace(img):
11 |     img = cv2.imread(img)
12 | 
13 |     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
14 | 
15 |     faces = face_cascade.detectMultiScale(gray, 1.4, 5)
16 | 
17 |     for face in faces:
18 |         x, y, width, height = face
19 |         # draw a rectangle for detection
20 |         cv2.rectangle(
21 |                 img,
22 |                 (x, y),
23 |                 (x + width, y + height),
24 |                 (0, 0, 255),
25 |                 1,
26 |         )
27 |         roi_gray = gray[y:y+height, x:x+width]
28 |         roi_color = img[y:y+height, x:x+width]
29 | 
30 |         eyes = eye_cascade.detectMultiScale(roi_gray)
31 |         for (ex, ey, ew, eh) in eyes:
32 |             cv2.rectangle(
33 |                     roi_color,
34 |                     (ex, ey),
35 |                     (ex+ew, ey+eh),
36 |                     (0, 255, 0),
37 |                     2,
38 |             )
39 | 
40 |     cv2.imshow('Face Detection', img)
41 |     k = cv2.waitKey(0) & 0xFF
42 | 
43 |     if k == 27:
44 |         cv2.destroyAllWindows()
45 |     elif k == ord('s'):
46 |         cv2.imwrite('./Media/face-detected.jpeg', img)
47 |         cv2.destroyAllWindows()
48 | 
49 | 
50 | path_img = './Media/face-001.jpg'
51 | detectedFace(path_img)
52 | 


--------------------------------------------------------------------------------
/07. Face Detection/realTimeFaceDetection.py:
--------------------------------------------------------------------------------
 1 | """ Real time face detection using haarcascade classifier """\
 2 | 
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | face_cascade = cv2.CascadeClassifier('./assets/haarcascade_frontalface_default.xml')
 7 | eye_cascade = cv2.CascadeClassifier('./assets/haarcascade_eye.xml')
 8 | 
 9 | cap = cv2.VideoCapture(0)
10 | 
11 | while True:
12 |     ret, frame = cap.read()
13 |     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
14 |     faces = face_cascade.detectMultiScale(gray, 1.3, 5)
15 | 
16 |     for (x,y,w,h) in faces:
17 |         cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2)
18 |         roi_gray = gray[y:y+h, x:x+w]
19 |         roi_color = frame[y:y+h, x:x+w]
20 |         
21 |         eyes = eye_cascade.detectMultiScale(roi_gray)
22 |         for (ex,ey,ew,eh) in eyes:
23 |             cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)
24 | 
25 |     cv2.imshow('face and eyes detection', frame)
26 |     k = cv2.waitKey(30) & 0xff
27 |     if k == 27:
28 |         break
29 | 
30 | cap.release()
31 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/07. Face Detection/realTimeFaceDetectionDNN.py:
--------------------------------------------------------------------------------
 1 | """ Real time face detection using deep learning """
 2 | 
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt
 7 | prototxt_path = "./assets/deploy.prototxt.txt"
 8 | # https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180205_fp16/res10_300x300_ssd_iter_140000_fp16.caffemodel 
 9 | model_path = "./assets/res10_300x300_ssd_iter_140000.caffemodel"
10 | 
11 | confThresh = 0.5
12 | net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path)
13 | 
14 | cam=cv2.VideoCapture(0)
15 | 
16 | while True:
17 |     ret, frame = cam.read()
18 |     frame = cv2.flip(frame, 1)
19 |     (h, w) = frame.shape[:2]
20 |     blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0))
21 |     
22 |     net.setInput(blob)
23 |     detections = net.forward()
24 |     for i in range(0, detections.shape[2]):
25 |         confidence = detections[0, 0, i, 2]
26 |         if confidence < confThresh:
27 |             continue
28 | 
29 |         box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
30 |         (startX, startY, endX, endY) = box.astype("int")
31 |         y = startY - 10 if startY - 10 > 10 else startY + 10
32 |         cv2.rectangle(frame, (startX, startY), (endX, endY),(0, 0, 255), 2)
33 |         
34 |     cv2.imshow("Frame", frame)
35 |     key = cv2.waitKey(1)
36 |     if key == 27:
37 |         break
38 | 
39 | cam.release()
40 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/07. Face Detection/smileDetection.py:
--------------------------------------------------------------------------------
 1 | """ Real time face detection using haarcascade classifier """\
 2 | 
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | face_cascade = cv2.CascadeClassifier('./assets/haarcascade_frontalface_default.xml')
 7 | smile_cascade = cv2.CascadeClassifier('./assets/haarcascade_smile.xml')
 8 | 
 9 | cap = cv2.VideoCapture(0)
10 | 
11 | while True:
12 |     ret, frame = cap.read()
13 |     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
14 |     faces = face_cascade.detectMultiScale(gray, 1.3, 5)
15 | 
16 |     for (x, y, w, h) in faces:
17 |         cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2)
18 |         roi_gray = gray[y: y + h, x: x + w]
19 |         roi_color = frame[y: y + h, x: x + w]
20 |         
21 |         smiles = smile_cascade.detectMultiScale(roi_gray, 1.3, 5)
22 |         for (sx, sy, sw, sh) in smiles:
23 |             cv2.rectangle(roi_color, (sx, sy), (sx+sw, sy+sh), (0, 0, 255), 2)
24 |     
25 |     cv2.imshow('Smile Detection', frame)
26 |     k = cv2.waitKey(30) & 0xff
27 |     if k == 27:
28 |         break
29 | 
30 | cap.release()
31 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/08. Object Detection/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/08. Object Detection/README.md


--------------------------------------------------------------------------------
/08. Object Detection/ojectDetectionCVLIB.py:
--------------------------------------------------------------------------------
 1 | """ Common object detection using CvLib and yolo3 """
 2 | 
 3 | import cv2
 4 | import matplotlib.pyplot as plt
 5 | import cvlib as cv
 6 | from cvlib.object_detection import draw_bbox
 7 | 
 8 | img = cv2.imread('./Media/apple.jpeg')
 9 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
10 | 
11 | bbox, label, conf = cv.detect_common_objects(gray)
12 | 
13 | output_image = draw_bbox(im, bbox, label, conf)
14 | 
15 | plt.imshow(output_image)
16 | plt.savefig("./Media/apple-detected.jpeg")
17 | plt.show()


--------------------------------------------------------------------------------
/09. Template Matching/README.md:
--------------------------------------------------------------------------------
1 | ## Template Matching
2 | 
3 | Template Matching is a method for searching and finding the location of a template image in a larger image.
4 | OpenCV comes with a function cv2.matchTemplate() for this purpose. It simply slides the template image over the input
5 | image (as in 2D convolution) and compares the template and patch of input image under the template image.
6 | 
7 | 


--------------------------------------------------------------------------------
/09. Template Matching/portMatching.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Template Matching is a method for searching and finding the location of a template image in a larger image.
 3 | OpenCV comes with a function cv2.matchTemplate() for this purpose. It simply slides the template image over the input
 4 | image (as in 2D convolution) and compares the template and patch of input image under the template image.
 5 | """
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | source_color = cv2.imread(r"./Media/pieboard-templatematching.jpg")
10 | #keeping source_color in colored to show the cordinate in original picture instead to grayscale
11 | source_gray = cv2.cvtColor(source_color, cv2.COLOR_BGR2GRAY)
12 | template_image = cv2.imread(r"./Media/port-templatematching.jpg", 0)
13 | w,h = template_image.shape[::-1]
14 | 
15 | res = cv2.matchTemplate(source_gray, template_image, cv2.TM_CCOEFF_NORMED)
16 | threshold = 0.8
17 | loc = np.where(res >= threshold)
18 | 
19 | print(*loc)
20 | for port in zip(*loc[::-1]):
21 |     cv2.rectangle(source_color, port,(port[0] + w, port[1] + h), (0, 255, 255), 2)
22 | 
23 | cv2.imshow("Port Detected", source_color)
24 | k = cv2.waitKey(0) & 0xFF
25 | 
26 | if k == 27:
27 |     cv2.destroyAllWindows()
28 | elif k == ord("s"):
29 |     cv2.imwrite('./Media/port-detected.jpeg', source_color)
30 |     cv2.destroyAllWindows()
31 | 


--------------------------------------------------------------------------------
/10. Invisible_Cloak/invisiblecloak.py:
--------------------------------------------------------------------------------
 1 |   
 2 | import cv2
 3 | import time
 4 | import numpy as np
 5 | 
 6 | ## Preparation for writing the ouput video
 7 | fourcc = cv2.VideoWriter_fourcc(*'XVID')
 8 | out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640, 480))
 9 | 
10 | ##reading from the webcam
11 | cap = cv2.VideoCapture(0)
12 | 
13 | ## Allow the system to sleep for 3 seconds before the webcam starts
14 | time.sleep(3)
15 | count = 0
16 | background = 0
17 | 
18 | ## Capture the background in range of 60
19 | for i in range(60):
20 |     ret, background = cap.read()
21 | background = np.flip(background, axis=1)
22 | 
23 | ## Read every frame from the webcam, until the camera is open
24 | while (cap.isOpened()):
25 |     ret, img = cap.read()
26 |     if not ret:
27 |         break
28 |     count += 1
29 |     img = np.flip(img, axis=1)
30 | 
31 |     ## Convert the color space from BGR to HSV
32 |     hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
33 | 
34 |     ## Generat masks to detect red color
35 |     lower_red = np.array([0, 120, 50])
36 |     upper_red = np.array([10, 255,255])
37 |     mask1 = cv2.inRange(hsv, lower_red, upper_red)
38 | 
39 |     lower_red = np.array([170, 120, 70])
40 |     upper_red = np.array([180, 255, 255])
41 |     mask2 = cv2.inRange(hsv, lower_red, upper_red)
42 | 
43 |     mask1 = mask1 + mask2
44 | 
45 |     ## Open and Dilate the mask image
46 |     mask1 = cv2.morphologyEx(mask1, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8))
47 |     mask1 = cv2.morphologyEx(mask1, cv2.MORPH_DILATE, np.ones((3, 3), np.uint8))
48 | 
49 |     ## Create an inverted mask to segment out the red color from the frame
50 |     mask2 = cv2.bitwise_not(mask1)
51 | 
52 |     ## Segment the red color part out of the frame using bitwise and with the inverted mask
53 |     res1 = cv2.bitwise_and(img, img, mask=mask2)
54 | 
55 |     ## Create image showing static background frame pixels only for the masked region
56 |     res2 = cv2.bitwise_and(background, background, mask=mask1)
57 | 
58 |     ## Generating the final output and writing
59 |     finalOutput = cv2.addWeighted(res1, 1, res2, 1, 0)
60 |     out.write(finalOutput)
61 |     cv2.imshow("magic", finalOutput)
62 |     cv2.waitKey(1)
63 | 
64 | 
65 | cap.release()
66 | out.release()
67 | cv2.destroyAllWindows()
68 | 
69 | #------------------------
70 | #colors code
71 | 
72 | #skin color
73 | #lower_red = np.array([0, 0, 70])
74 | #upper_red = np.array([100, 255,255])
75 | # mask1 = cv2.inRange(hsv, lower_red, upper_red)
76 | 
77 | # lower_red = np.array([170, 120, 70])
78 | #  upper_red = np.array([180, 255, 255])
79 | 
80 | #-----------------------


--------------------------------------------------------------------------------
/11. Optical Flow/opticalFlow.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | cap = cv2.VideoCapture('motion.avi')
 5 | 
 6 | ret, frame = cap.read()
 7 | gs_im0 = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
 8 | points_prev = cv2.goodFeaturesToTrack(gs_im0, 100, 0.03, 9.0, False)
 9 | 
10 | while(cap.isOpened()):
11 |     ret, frame = cap.read()
12 |     
13 |     gs_im1 = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
14 |     # Call tracker.
15 |     points, st, err = cv2.calcOpticalFlowPyrLK(gs_im0, gs_im1, points_prev, None, (3,3))
16 | 
17 |     for i,p in enumerate(points):
18 |         a,b = p.ravel()
19 |         frame = cv2.circle(frame,(a,b),3,(255,255,255),-1)
20 | 
21 |     cv2.imshow('frame',frame)
22 |     points_prev = points
23 |     gs_im0 = gs_im1
24 |     if cv2.waitKey(1) & 0xFF == ord('q'):
25 |         break
26 | 
27 | cap.release()
28 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/12. Blob Detection/blobDetection.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/12. Blob Detection/blobDetection.py


--------------------------------------------------------------------------------
/13. contouring/README.md:
--------------------------------------------------------------------------------
 1 | # Shape Contouring  
 2 | 
 3 | Contouring is like drawing an outline along the boundary of an object. In OpenCV this will be achieved with 2 processes:  
 4 | **Thresholding** and **Contouring**.  
 5 | 
 6 | * Thresholding allows us to filter the object  
 7 | * Contouring lets us outline/mark the boundary of the object  
 8 | 
 9 | ## Result  
10 | **Original** ![](https://github.com/Pranjalmishra30/rep.1/blob/master/Contouring/Data/Shapes.png) **Contoured** ![](https://github.com/Pranjalmishra30/openCV-Rep/blob/master/Mini-Projects/ShapeContouring/Shape_Detected.png)  
11 | 
12 | ## Refrences  
13 | 1. [Thresholding](https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_thresholding/py_thresholding.html)  
14 | 2. [Contours](https://docs.opencv.org/trunk/d4/d73/tutorial_py_contours_begin.html)  
15 | 


--------------------------------------------------------------------------------
/13. contouring/contouring.py:
--------------------------------------------------------------------------------
 1 | """ Image contoutring on thumbs images using binary thresholding and findCountours() """
 2 | 
 3 | import cv2
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | # read the image
 7 | image = cv2.imread('./Media/thumbs_up_down.jpg')
 8 | # convert to RGB
 9 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
10 | # convert to grayscale
11 | gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
12 | # create a binary thresholded image
13 | _, binary = cv2.threshold(gray, 225, 255, cv2.THRESH_BINARY_INV)
14 | 
15 | # find the contours from the thresholded image
16 | contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
17 | # draw all contours
18 | image = cv2.drawContours(image, contours, -1, (0, 255, 0), 2)
19 | # show the image with the drawn contours
20 | plt.imshow(image)
21 | #plt.imsave('./Media/thumbs_up_down_countour.jpg', image)
22 | plt.show()
23 | 
24 | 


--------------------------------------------------------------------------------
/13. contouring/liveContourDetection.py:
--------------------------------------------------------------------------------
 1 | """ Live cam contouring using thresholding and findContours() """
 2 | 
 3 | import cv2
 4 | 
 5 | cap = cv2.VideoCapture(0)
 6 | 
 7 | while True:
 8 |     cam, frame = cap.read()
 9 | 
10 |     if cam is True:
11 |         # convert frame into grayscale
12 |         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
13 | 
14 |         # create a binary threshold
15 |         _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
16 | 
17 |         contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, 
18 |                                                cv2.CHAIN_APPROX_SIMPLE)
19 | 
20 |         image = cv2.drawContours(image=frame, contours= contours,
21 |                                 contourIdx=-1, color=(128, 0, 0), thickness=2)
22 | 
23 |         cv2.imshow("live-contour-detector", image)
24 | 
25 |         if cv2.waitKey(1) == ord('q'):
26 |             break
27 | 
28 | cap.release()
29 | cv2.destroyAllWindows()
30 | 


--------------------------------------------------------------------------------
/13. contouring/shapeDetection.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | 
 3 | img  = cv2.imread('./Media/Shapes.png')
 4 | gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
 5 | blur = cv2.GaussianBlur(gray,(11,11),0) #The values need to be >1 and odd
 6 | 
 7 | # creating binary thresholding
 8 | ret, th = cv2.threshold(blur,220,255,cv2.THRESH_BINARY_INV) # Inverse Binary thresholding technique
 9 | 
10 | # finding and drawing contour
11 | (cnts,_) = cv2.findContours(th.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
12 | cv2.drawContours(img,cnts,-1,(0,0,0),2)
13 | 
14 | cv2.imshow('image',img)
15 | cv2.imwrite("./Media/Shape_Detected.png",img) # Save the contoured image
16 | cv2.waitKey(0)
17 | cv2.destroyAllWindows()
18 | 


--------------------------------------------------------------------------------
/14. ImageOperations/colvolutionaFeature.py:
--------------------------------------------------------------------------------
 1 | """ Convolutional Operations on Image using numpy """
 2 | 
 3 | import numpy as np
 4 | from PIL import Image
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | 
 8 | class ConvolutionalOperations:
 9 |     def __init__(self, image_path):
10 |         self.image_path = image_path
11 | 
12 |     def _color_to_gray(self):
13 |         gray_img = np.array(Image.open(self.image_path).convert('L'))
14 |         return gray_img
15 |         
16 |     
17 |     
18 | if __name__ == '__main__':
19 |     image_path = './Media/apple.jpeg'
20 |     obj = ConvolutionalOperations(image_path)
21 |     output = obj._color_to_gray()
22 |     plt.imshow(output)
23 |     plt.show()


--------------------------------------------------------------------------------
/14. ImageOperations/filterVGG16.py:
--------------------------------------------------------------------------------
 1 | """ Image filters using Keras and tensorflow """
 2 | import numpy as np
 3 | from tensorflow.keras.applications.vgg16 import VGG16
 4 | from tensorflow.keras.applications.vgg16 import preprocess_input
 5 | from tensorflow.keras.preprocessing.image import load_img
 6 | from tensorflow.keras.preprocessing.image import img_to_array
 7 | from tensorflow.keras.models import Model
 8 | from matplotlib import pyplot as plt
 9 | 
10 | model = VGG16()
11 | model = Model(input=model.input, outputs=model.layers[1].output)
12 | model.summary()
13 | 
14 | img = load_img("./Media/face-001.jpg")
15 | img = img_to_array(img)
16 | img = np.expand_dims(img, asix=0)
17 | img = preprocess_input(img)
18 | feature_map = model.predict(img)
19 | 
20 | square = 8
21 | i = 1
22 | for _ in range(square):
23 |     for _ in range(square):
24 |         ax = plt.subplot(square, square, i)
25 |         ax.set_xticks([])
26 |         ax.set_yticks([])
27 |         plt.imshow(feature_map[0, :, :, i-1], cmap='gray')
28 |         i += 1
29 | plt.show()


--------------------------------------------------------------------------------
/14. ImageOperations/imageEnocdingDecoding.py:
--------------------------------------------------------------------------------
 1 | """ Image Encoding and decoding using base64 """
 2 | 
 3 | import base64
 4 | 
 5 | image = open('./Media/apple.jpeg', 'rb')
 6 | image_read = image.read()
 7 | 
 8 | image_64_encode = base64.encodebytes(image_read)
 9 | print(image_64_encode)
10 | 
11 | image_64_decode = base64.decodebytes(image_64_encode)
12 | image_result = open('./Media/apple.png', 'wb')
13 | image_result.write(image_64_decode)


--------------------------------------------------------------------------------
/14. ImageOperations/simpleImageOperations.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from PIL import Image
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | 
 6 | class ImageOperation:
 7 |     """ Simple image operations using numpy arrays """
 8 |     def __init__(self, img_array: np.ndarray):
 9 |         self.img_array = img_array
10 |         
11 |     def color_to_gray(self):
12 |         img = self.img_array
13 |         gray = np.dot(img[...,:3], [0.299, 0.587, 0.144])
14 |         return gray
15 |     
16 |     def color_inversion(self):
17 |         inv_img = 255 - self.img_array
18 |         return inv_img
19 |     
20 |     def color_reduction(self):
21 |         
22 |         im_32 = self.img_array // 32 * 32
23 |         im_128 = self.img_array // 128 * 128
24 |         
25 |         im_red = np.concatenate((self.img_array, im_32, im_128), axis=1)
26 |         return im_red        
27 |     
28 |     def gamma_correction(self):
29 |         
30 |         img = self.img_array
31 |         img1 = 255.0 * (self.img_array / 255.0)**(1/2.2)
32 |         img2 = 255.0 * (self.img_array / 255.0)**2.2
33 |         
34 |         return np.concatenate((img, img1, img2), axis=1)
35 |     
36 |     def slice_n_paste(self):
37 |         
38 |         src = np.resize(self.img_array, (128, 128))
39 |         dst = np.resize(self.img_array, (256, 256)) // 4
40 |         
41 |         dst_copy = dst.copy()
42 |         
43 |         dst_copy[110:200, 110:200] = src[10:100, 10:100]
44 |         return dst_copy
45 |     
46 |     def image_binarization(self):
47 |         
48 |         img = self.img_array
49 |         gray = np.dot(img[...,:3], [0.299, 0.587, 0.144])
50 |         thresh = 128
51 |         max_val = 255.0
52 |         
53 |         im_bin = (gray > thresh) * max_val
54 |     
55 |         return im_bin
56 |         
57 |         
58 | if __name__ == '__main__':
59 |     image_path = './Media/apple.jpeg'
60 |     img_array = np.array(Image.open(image_path))
61 |     obj = ImageOperation(img_array)
62 |     output = obj.gamma_correction()
63 |     plt.imshow(output)
64 |     plt.show()


--------------------------------------------------------------------------------
/15. VirtualPen/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/15. VirtualPen/README.md


--------------------------------------------------------------------------------
/16. EyeBall Tracking/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/16. EyeBall Tracking/README.md


--------------------------------------------------------------------------------
/17. Color Trackbar/colorTrackbar.py:
--------------------------------------------------------------------------------
 1 | """ Color Tracker GUI """
 2 | 
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | def nothing(x):
 7 |     pass
 8 | 
 9 | # Create a black image, a window
10 | img = np.zeros((300, 512, 3), np.uint8)
11 | cv2.namedWindow('Color Tracker')
12 | 
13 | # create trackbars for color change
14 | cv2.createTrackbar('R','image',0,255,nothing)
15 | cv2.createTrackbar('G','image',0,255,nothing)
16 | cv2.createTrackbar('B','image',0,255,nothing)
17 | 
18 | # create switch for ON/OFF functionality
19 | switch = '0 : OFF \n1 : ON'
20 | cv2.createTrackbar(switch, 'image',0,1,nothing)
21 | 
22 | while(1):
23 |     cv2.imshow('image',img)
24 |     k = cv2.waitKey(1) & 0xFF
25 |     if k == 27:
26 |         break
27 | 
28 |     # get current positions of four trackbars
29 |     r = cv2.getTrackbarPos('R','image')
30 |     g = cv2.getTrackbarPos('G','image')
31 |     b = cv2.getTrackbarPos('B','image')
32 |     s = cv2.getTrackbarPos(switch,'image')
33 | 
34 |     if s == 0:
35 |         img[:] = 0
36 |     else:
37 |         img[:] = [b,g,r]
38 | 
39 | cv2.destroyAllWindows()
40 | 


--------------------------------------------------------------------------------
/18. SIFT Feature Extraction/README.md:
--------------------------------------------------------------------------------
 1 | **SIFT** stands for `Scale Invariant Feature Transform`, it is a feature extraction method (among others, such as `HOG feature extraction`) where image content is transformed into local feature coordinates that are invariant to translation, scale and other image transformations.
 2 | 
 3 | Below are the advantages of SIFT:
 4 | 
 5 | - Locality: Features are local; robust to occlusion and clutter.
 6 | - Distinctiveness: Individual features extracted can be matched to a large dataset of objects.
 7 | - Quantity: Using SIFT, we can extract many features from small objects.
 8 | - Efficiency: SIFT is close to real-time performance.
 9 | 
10 | [SIFT original paper](https://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf)


--------------------------------------------------------------------------------
/18. SIFT Feature Extraction/basic.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | img = cv2.imread('./Media/apple.jpeg')
 5 | gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 6 | 
 7 | sift = cv2.SIFT_create()
 8 | 
 9 | keypoints, descrptors = sift.detectAndCompute(img, None)
10 | sift_image = cv2.drawKeypoints(gray_img, keypoints, img)
11 | 
12 | cv2.imshow('image', sift_image)
13 | 
14 | k = cv2.waitKey(0) & 0xff
15 | 
16 | if k == 27:
17 |     cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/18. SIFT Feature Extraction/compareFeatures.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | # loading and resizing the image 
 5 | img1 = cv2.imread('./Media/book.png')
 6 | img2 = cv2.imread('./Media/book_on_table.jpeg')
 7 | 
 8 | img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
 9 | img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
10 | 
11 | sift = cv2.SIFT_create()
12 | 
13 | keypoints1, descrptors1 = sift.detectAndCompute(img1, None)
14 | keypoints2, descrptors2 = sift.detectAndCompute(img2, None)
15 | 
16 | bf = cv2.BFMatcher(cv2.NORM_L1, crossCheck=True)
17 | 
18 | matches = bf.match(descrptors1, descrptors2)
19 | matches = sorted(matches, key= lambda x: x.distance)
20 | 
21 | matched_img = cv2.drawMatches(img1, keypoints1, img2, keypoints2, matches[:20], img2, flags=2)
22 | 
23 | # showing the output
24 | cv2.imshow('image', cv2.resize(matched_img, (800, 600)))
25 | 
26 | k = cv2.waitKey(0) & 0xff
27 | 
28 | if k == 27:
29 |     cv2.destroyAllWindows()
30 | 


--------------------------------------------------------------------------------
/19. Hog Feature Extraction/README.md:
--------------------------------------------------------------------------------
1 | # HOG(Histogram Oriented Gradients) Feature Extraction
2 | 


--------------------------------------------------------------------------------
/19. Hog Feature Extraction/featureExtractor.py:
--------------------------------------------------------------------------------
 1 | ''' Feature extraction using hog and skimage '''
 2 | 
 3 | from skimage.io import imread
 4 | from skimage.transform import resize
 5 | from skimage.feature import hog
 6 | from matplotlib import pyplot as plt
 7 | 
 8 | img = imread("./Media/coins.jpg")
 9 | 
10 | # applying the hog algorithm
11 | fd, hog_image = hog(img,
12 |                     orientations=9,
13 |                     pixels_per_cell=(8, 8),
14 |                     cells_per_block=(2,2),
15 |                     visualize=True,
16 |                     multichannel=True)
17 | 
18 | plt.axis("off")
19 | plt.imshow(hog_image, cmap='gray')
20 | plt.savefig("./Media/coins_hog.png")
21 | plt.show()
22 | 


--------------------------------------------------------------------------------
/20. Image Segmentation/KmeansImageSegmentation.py:
--------------------------------------------------------------------------------
 1 | """ Image segmentation using K-Menas """
 2 | 
 3 | import cv2
 4 | import numpy as np
 5 | from matplotlib import pyplot as plt
 6 | 
 7 | image = cv2.imread('./Media/nature.png')
 8 | 
 9 | # reshape the image to a 2D array of pixels and 3 array values
10 | pixels_values = image.reshape((-1, 3))
11 | 
12 | # conerting to float32
13 | pixels_values = np.float32(pixels_values)
14 | 
15 | criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
16 | 
17 | # Number of clusters]
18 | K = 3
19 | 
20 | _, labels, (centers) = cv2.kmeans(pixels_values,
21 |                                   K=K,
22 |                                   bestLabels=None,
23 |                                   criteria=criteria,
24 |                                   attempts=10,
25 |                                   flags=cv2.KMEANS_RANDOM_CENTERS)
26 | 
27 | # converting to 8 bit values
28 | centers = np.uint8(centers)
29 | 
30 | # flatten the labels array
31 | labels = labels.flatten()
32 | 
33 | # convert all pixels to the color of the centroids
34 | segmented_image = centers[labels]
35 | 
36 | segmented_image = segmented_image.reshape(image.shape)
37 | 
38 | # show the original and segmented output
39 | cv2.imshow('Original', image)
40 | cv2.imshow('Segmented', segmented_image)
41 | 
42 | # saving the output
43 | cv2.imwrite('./Media/nature_output.png', segmented_image)
44 | k = cv2.waitKey(0) & 0xFF
45 | 
46 | if k == 27:
47 |     cv2.destroyAllWindows()
48 | 


--------------------------------------------------------------------------------
/20. Image Segmentation/README.md:
--------------------------------------------------------------------------------
1 | **Image segmentation** is the process of partitioning an image into multiple different regions (or segments). 
2 | The goal is to change the representation of the image into an easier and more meaningful image.
3 | 
4 | K-Means clustering is unsupervised machine learning algorithm that aims to partition N observations into K clusters in which each observation belongs to the cluster with the nearest mean. A cluster refers to a collection of data points aggregated together because of certain similarities. For image segmentation, clusters here are different image colors.


--------------------------------------------------------------------------------
/20. Image Segmentation/waterShedAlgorithm.py:
--------------------------------------------------------------------------------
 1 | ''' watershed Algorithm in opencv ''' 
 2 | 
 3 | import cv2
 4 | import numpy as np
 5 | 
 6 | img = cv2.imread('./Media/coins.jpg')
 7 | gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 8 | 
 9 | ret, thresh = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
10 | 
11 | # noise removal
12 | kernel = np.ones((3, 3), np.uint8)
13 | opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
14 | 
15 | #sure background area
16 | sure_bg = cv2.dilate(opening, kernel)
17 | 
18 | # finding sure foreground area
19 | dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
20 | ret, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0)
21 | 
22 | # finding unknown region
23 | 
24 | sure_fg = np.uint8(sure_fg)
25 | unknown = cv2.subtract(sure_bg, sure_fg)
26 | 
27 | # makrer labeling
28 | ret, markers = cv2.connectedComponents(sure_fg)
29 | 
30 | # add one to all labels so that sure background is not 0, but 1
31 | markers = markers + 1
32 | 
33 | # mark the region of unknown with zero
34 | markers[unknown==255] = 0
35 | 
36 | markers = cv2.watershed(img, markers)
37 | img[markers == -1] = [255, 0, 0]
38 | 
39 | cv2.imshow('output', img)
40 | 
41 | key = cv2.waitKey(0) & 0xFF
42 | if key == 27:
43 |     cv2.destroyAllWindows()
44 | 


--------------------------------------------------------------------------------
/21. Facial Recognition/FaceRec.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import face_recognition
 3 | import cv2
 4 | import numpy as np
 5 | 
 6 | video_capture = cv2.VideoCapture(0)
 7 | 
 8 | # Create arrays of known face encodings and their names
 9 | known_face_encodings = []
10 | known_face_names = []
11 | 
12 | root_dir = os.path.dirname(os.path.abspath(os.path.abspath(__file__)))
13 | image_dir = os.path.join(root_dir, "images")
14 | 
15 | # creating encodings for faces from images folder
16 | for file in os.listdir(image_dir):
17 |     if file.endswith == "jpeg" or "jpg":
18 |         input_face_name = file.split('.')[0]
19 |         input_face = face_recognition.load_image_file(os.path.join(image_dir, file))
20 |         input_face_encoding = face_recognition.face_encodings(input_face)[0]
21 | 
22 |         # appending face_names and face encoding
23 |         known_face_names.append(input_face_name)
24 |         known_face_encodings.append(input_face_encoding)
25 | 
26 | # Initialize some variables
27 | face_locations = []
28 | face_encodings = []
29 | face_names = []
30 | process_this_frame = True
31 | 
32 | while True:
33 |     # Grab a single frame of video
34 |     ret, frame = video_capture.read()
35 | 
36 |     # Resize frame of video to 1/4 size for faster face recognition processing
37 |     small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
38 | 
39 |     # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
40 |     rgb_small_frame = small_frame[:, :, ::-1]
41 | 
42 |     # Only process every other frame of video to save time
43 |     if process_this_frame:
44 |         # Find all the faces and face encodings in the current frame of video
45 |         face_locations = face_recognition.face_locations(rgb_small_frame)
46 |         face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
47 | 
48 |         face_names = []
49 |         for face_encoding in face_encodings:
50 |             # See if the face is a match for the known face(s)
51 |             matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
52 |             name = "Unknown"
53 | 
54 |             # # If a match was found in known_face_encodings, just use the first one.
55 |             # if True in matches:
56 |             #     first_match_index = matches.index(True)
57 |             #     name = known_face_names[first_match_index]
58 | 
59 |             # Or instead, use the known face with the smallest distance to the new face
60 |             face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
61 |             best_match_index = np.argmin(face_distances)
62 |             if matches[best_match_index]:
63 |                 name = known_face_names[best_match_index]
64 | 
65 |             face_names.append(name)
66 |     process_this_frame = not process_this_frame
67 | 
68 | 
69 |     # Display the results
70 |     for (top, right, bottom, left), name in zip(face_locations, face_names):
71 |         # Scale back up face locations since the frame we detected in was scaled to 1/4 size
72 |         top *= 4
73 |         right *= 4
74 |         bottom *= 4
75 |         left *= 4
76 | 
77 |         # Draw a box around the face
78 |         cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
79 | 
80 |         # Draw a label with a name below the face
81 |         cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
82 |         font = cv2.FONT_HERSHEY_DUPLEX
83 |         cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)
84 | 
85 |     # Display the resulting image
86 |     cv2.imshow('Video', frame)
87 | 
88 |     # Hit 'q' on the keyboard to quit!
89 |     if cv2.waitKey(1) & 0xFF == ord('q'):
90 |         break
91 | 
92 | # Release handle to the webcam
93 | video_capture.release()
94 | cv2.destroyAllWindows()
95 | 


--------------------------------------------------------------------------------
/21. Facial Recognition/README.md:
--------------------------------------------------------------------------------
 1 | ## Usgaes
 2 | 
 3 | 1. Add Your Image in `Images` folder. exmaple >> name.jpeg
 4 | 
 5 | 2. Install the requirements file
 6 | 
 7 | ```bash
 8 | python -m pip install -r requirements.txt
 9 | ```
10 | 
11 | 3. Run the Script
12 | 
13 | ```bash
14 | python FaceRec.py
15 | ```
16 | 
17 | NOTE:- incase installtation stuck on dlib
18 | 
19 | ```bash
20 | python -m pip install dlib -vvv 
21 | ```
22 | 


--------------------------------------------------------------------------------
/21. Facial Recognition/images/obama.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/21. Facial Recognition/images/obama.jpg


--------------------------------------------------------------------------------
/22. Optical Character Recognition/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/22. Optical Character Recognition/README.md


--------------------------------------------------------------------------------
/23. PixelLib Segmentation/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/23. PixelLib Segmentation/README.md


--------------------------------------------------------------------------------
/23. PixelLib Segmentation/instanceSegmentationExample.py:
--------------------------------------------------------------------------------
 1 | """ Instance Segmentation using PixelLib and Mask_Rcnn Pretained model on coco dataset """
 2 | 
 3 | import pixellib
 4 | from PIL import Image
 5 | from pixellib.instance import instance_segmentation
 6 | 
 7 | model_path = "./assets/mask_rcnn_coco.h5"
 8 | image_path = "./Media/road.jpg"
 9 | image_output = './media/road_segmentation.jpg'
10 | 
11 | # creating instace 
12 | segment_image = instance_segmentation()
13 | segment_image.load_model(model_path)
14 | 
15 | # applying semantic segmentation
16 | segment_image.segmentImage(image_path, show_bboxes = True, output_image_name=image_output)
17 | 
18 | # showing the output
19 | img = Image.open(image_output)
20 | img.show()
21 | 


--------------------------------------------------------------------------------
/24. Road Lane Detection/README.md:
--------------------------------------------------------------------------------
1 | # Road Lane Detection
2 | 


--------------------------------------------------------------------------------
/24. Road Lane Detection/laneDetection.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/24. Road Lane Detection/laneDetection.py


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Algorithms and Data Structures
 2 | 
 3 | - [Contributing to Algorithms and Data Structures](#contributing-to-algorithms-and-data-structures)
 4 |   - [Steps to contribute](#steps-to-contribute)
 5 |     - [Making a PR](#making-a-pr)
 6 |     - [Additional Notes](#additional-notes)
 7 |   - [Issue suggestions/Bug reporting](#issue-suggestionsbug-reporting)
 8 |   - [License](#license)
 9 |   - [References](#references)
10 | 
11 | We love your input! We want to make contributing to this project as easy and transparent as possible, whether it's:
12 | 
13 | - Reporting a bug
14 | - Discussing the current state of the code
15 | - Submitting a fix
16 | - Proposing new features
17 | - Becoming a maintainer
18 | 
19 | ## Steps to contribute
20 | 
21 | - Comment on the issue you want to work on. Make sure it's not assigned to someone else.
22 | 
23 | - If you think an algorithm is missing, create an issue.
24 | 
25 | ### Making a PR
26 | 
27 | - Make sure you have been assigned the issue to which you are making a PR.
28 | - If you make PR before being assigned, It will be labeled `invalid` and closed without merging.
29 | 
30 | - Fork the repo and clone it on your machine.
31 | - Add a upstream link to main branch in your cloned repo
32 | 
33 | ```bash
34 | git remote add upstream https://github.com/codePerfectPlus/ComputerVision-Essentials
35 | ```
36 | 
37 | - Keep your cloned repo upto date by pulling from upstream (this will also avoid any merge conflicts while committing new changes)
38 | 
39 | ```bash
40 | git pull upstream master
41 | ```
42 | 
43 | - Create your feature branch
44 | 
45 | ```bash
46 | git checkout -b <feature-name>
47 | ```
48 | 
49 | - Commit all the changes
50 | 
51 | ```bash
52 | git commit -am "Meaningful commit message"
53 | ```
54 | 
55 | - Push the changes for review
56 | 
57 | ```bash
58 | git push origin <branch-name>
59 | ```
60 | 
61 | - Create a PR from our repo on Github.
62 | 
63 | ### Additional Notes
64 | 
65 | - Code should be properly commented to ensure it's readability.
66 | - If you've added code that should be tested, add tests as comments.
67 | - Make sure your code properly formatted.
68 | - Issue that pull request!
69 | 
70 | ## Issue suggestions/Bug reporting
71 | 
72 | When you are creating an issue, make sure it's not already present. Furthermore, provide a proper description of the changes. If you are suggesting any code improvements, provide through details about the improvements.
73 | 
74 | **Great Issue suggestions** tend to have:
75 | 
76 | - A quick summary of the changes.
77 | - In case of any bug provide steps to reproduce
78 |   - Be specific!
79 |   - Give sample code if you can.
80 |   - What you expected would happen
81 |   - What actually happens
82 |   - Notes (possibly including why you think this might be happening, or stuff you tried that didn't work)
83 | 
84 | ## License
85 | 
86 | By contributing, you agree that your contributions will be licensed under its  [MIT License](/LICENSE).
87 | 
88 | ## References
89 | 
90 | This document was adapted from the open-source contribution guidelines for [Facebook's Draft](https://github.com/facebook/draft-js/blob/a9316a723f9e918afde44dea68b5f9f39b7d9b00/CONTRIBUTING.md)
91 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Deepak Raj
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Media/Man_United.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/Man_United.jpeg


--------------------------------------------------------------------------------
/Media/Shape_Detected.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/Shape_Detected.png


--------------------------------------------------------------------------------
/Media/Shapes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/Shapes.png


--------------------------------------------------------------------------------
/Media/apple.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/apple.jpeg


--------------------------------------------------------------------------------
/Media/bnw.jfif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/bnw.jfif


--------------------------------------------------------------------------------
/Media/book.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/book.png


--------------------------------------------------------------------------------
/Media/book_on_table.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/book_on_table.jpeg


--------------------------------------------------------------------------------
/Media/coins.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/coins.jpg


--------------------------------------------------------------------------------
/Media/coins_hog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/coins_hog.png


--------------------------------------------------------------------------------
/Media/corner_detection.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/corner_detection.jpg


--------------------------------------------------------------------------------
/Media/cropped image1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/cropped image1.png


--------------------------------------------------------------------------------
/Media/edge-detection.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/edge-detection.jpg


--------------------------------------------------------------------------------
/Media/face-001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/face-001.jpg


--------------------------------------------------------------------------------
/Media/face-detected-dnn.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/face-detected-dnn.jpeg


--------------------------------------------------------------------------------
/Media/face-detected.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/face-detected.jpeg


--------------------------------------------------------------------------------
/Media/nature.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/nature.png


--------------------------------------------------------------------------------
/Media/nature_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/nature_output.png


--------------------------------------------------------------------------------
/Media/opencv-logo-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/opencv-logo-white.png


--------------------------------------------------------------------------------
/Media/pieboard-templatematching.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/pieboard-templatematching.jpg


--------------------------------------------------------------------------------
/Media/port-detected.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/port-detected.jpeg


--------------------------------------------------------------------------------
/Media/port-templatematching.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/port-templatematching.jpg


--------------------------------------------------------------------------------
/Media/road.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/road.jpg


--------------------------------------------------------------------------------
/Media/road_segmentation.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/road_segmentation.jpg


--------------------------------------------------------------------------------
/Media/sample.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/sample.jpeg


--------------------------------------------------------------------------------
/Media/sample2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/sample2.jpeg


--------------------------------------------------------------------------------
/Media/thumbs_up_down.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/thumbs_up_down.jpg


--------------------------------------------------------------------------------
/Media/thumbs_up_down_countour.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/thumbs_up_down_countour.jpg


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Computer Vision Essentials
  2 | 
  3 | - [Computer Vision Essentials](#computer-vision-essentials)
  4 |   - [Introduction](#introduction)
  5 |   - [Used Libraries/Packages](#used-librariespackages)
  6 |   - [How To Run](#how-to-run)
  7 |   - [Usage](#usage)
  8 |   - [Support](#support)
  9 |   - [Roadmap](#roadmap)
 10 |   - [Contributing](#contributing)
 11 |   - [Authors and acknowledgment](#authors-and-acknowledgment)
 12 |   - [License](#license)
 13 |   - [Citation](#citation)
 14 |   - [Author](#author)
 15 |   - [Extra Downloads](#extra-downloads)
 16 | 
 17 | ## Introduction
 18 | 
 19 | According to [wikipedia](https://en.wikipedia.org/wiki/Computer_vision, "computer_vision-Wikipedia") -
 20 | 
 21 | Computer vision is an interdisciplinary scientific field that deals with how computers can gain high-level understanding from digital images or videos. From the perspective of engineering, it seeks to understand and automate tasks that the human visual system can do.
 22 | 
 23 | Computer vision tasks include methods for acquiring, processing, analyzing and understanding digital images, and extraction of high-dimensional data from the real world in order to produce numerical or symbolic information, e.g. in the forms of decisions.
 24 | 
 25 | [Read More ...](https://en.wikipedia.org/wiki/Computer_vision, "computer_vision-Wikipedia")
 26 | 
 27 | ## Used Libraries/Packages
 28 | 
 29 | - **OpenCV** - OpenCV (Open Source Computer Vision Library) is an open source computer vision and machine learning software library.
 30 | - **PixelLib** - PixelLib is a library created for performing image and video segmentation using few lines of code.
 31 | - **CVLib** - A simple, high level, easy-to-use open source Computer Vision library for Python.
 32 | - **Dlib** - Dlib is a general purpose cross-platform software library written in the programming language C++.
 33 | - **PIL/Pillow** - Python Imaging Library is a free and open-source additional library for the Python programming language that adds support for opening, manipulating, and saving many different image file formats
 34 | - **Keras** - Keras is the most used deep learning framework among top-5 winning teams on Kaggle.
 35 | - **Tensorflow** - TensorFlow is a free and open-source software library for machine learning.
 36 | - **Pytessarct** - Python-tesseract is an optical character recognition (OCR) tool for python. That is, it will recognize and “read” the text embedded in images.
 37 | - **scikit-image** - scikit-image is an open-source image processing library for the Python programming language. It includes algorithms for segmentation, geometric transformations, color space manipulation, analysis, filtering, morphology, feature detection, and more.
 38 | - **Matplotlib** - Matplotlib is a cross-platform, data visualization and graphical plotting library for Python and its numerical extension NumPy.
 39 | 
 40 | ## How To Run
 41 | 
 42 | - Install python 3.6+
 43 | 
 44 | Create virtual envionment with `pipenv`.
 45 | 
 46 | ```bash
 47 | python -m pip install pipenv
 48 | pipenv install -r requirements.txt
 49 | pipenv shell
 50 | ```
 51 | 
 52 | NOTE- check the [guide](https://www.tensorflow.org/install) for tenosflow installation for your CPU/GPU. for using tensorflow-gpu install the CUDA-11.0 and necessary libraries.
 53 | 
 54 | Large models and files hosted on google drive. **For downloading them run [utils.py](utils.py)**
 55 | 
 56 | ```bash
 57 | python utils.py
 58 | ```
 59 | 
 60 | ## Usage
 61 | 
 62 | Computer vision allows the computer to perform the same kind of tasks as humans with the same efficiency. There are a two main task which are defined below:
 63 | 
 64 | - Object Classification - In the object classification, we train a model on a dataset of particular objects, and the model classifies new objects as belonging to one or more of your training categories.
 65 | - Object Identification - In the object identification, our model will identify a particular instance of an object - for example, parsing two faces in an image and tagging one as Virat Kohli and other one as Rohit Sharma.
 66 | 
 67 | <img height="400px" width="600px" src="Media/face-detected.jpeg">
 68 | 
 69 | ## Support
 70 | 
 71 | contributers
 72 | 
 73 | ## Roadmap
 74 | 
 75 | 
 76 | ## Contributing
 77 | 
 78 | Before submitting a bug, please do the following:
 79 | 
 80 | Perform basic troubleshooting steps:
 81 | 
 82 | - Make sure you are on the latest version. If you are not on the most recent version, your problem may have been solved already! Upgrading is always the best first step.
 83 | - Try older versions. If you are already on the latest release, try rolling back a few minor versions (e.g. if on 1.7, try 1.5 or 1.6) and see if the problem goes away. This will help the devs narrow down when the problem first arose in the commit log.
 84 | - Try switching up dependency versions. If the software in question has dependencies (other libraries, etc) try upgrading/downgrading those as well.
 85 | 
 86 | ## Authors and acknowledgment
 87 | 
 88 | - [Deepak Raj](https://github.com/codePerfectPlus)
 89 | - [Pranjalmishra30](https://github.com/Pranjalmishra30)
 90 | - [GloriousMusketeer](https://github.com/GloriousMusketeer)
 91 | - [bislara](https://github.com/bislara)
 92 | - [its-harshil](https://github.com/its-harshil)
 93 | - [farhan0syakir](https://github.com/farhan0syakir)
 94 | - [harshit-saraswat](https://github.com/harshit-saraswat)
 95 | - [...](https://github.com/codePerfectPlus/OpenCv-tutorial/graphs/contributors)
 96 | 
 97 | ## License
 98 | 
 99 | For open source projects,Under MIT License.
100 | 
101 | ## Citation
102 | 
103 | ```
104 | Stéfan van der Walt, Johannes L. Schönberger, Juan Nunez-Iglesias, François Boulogne, Joshua D. Warner, Neil Yager, Emmanuelle Gouillart, Tony Yu and the scikit-image contributors. scikit-image: Image processing in Python. PeerJ 2:e453 (2014) https://doi.org/10.7717/peerj.453
105 | 
106 | Coelho, L.P. 2013. Mahotas: Open source software for scriptable computer vision. Journal of Open Research Software 1(1):e3, DOI: http://dx.doi.org/10.5334/jors.ac
107 | ```
108 | 
109 | ## Author
110 | 
111 | - Project : Computer Vision Essentials
112 | - Language : Python
113 | - Github : <https://github.com/codePerfectPlus>
114 | - Website : <http://codeperfectplus.herokuapp.com>
115 | 
116 | ## Extra Downloads
117 | 
118 | 1. FaceDetection Caffee Models


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-architect


--------------------------------------------------------------------------------
/assets/deploy.prototxt.txt:
--------------------------------------------------------------------------------
   1 | input: "data"
   2 | input_shape {
   3 |   dim: 1
   4 |   dim: 3
   5 |   dim: 300
   6 |   dim: 300
   7 | }
   8 | 
   9 | layer {
  10 |   name: "data_bn"
  11 |   type: "BatchNorm"
  12 |   bottom: "data"
  13 |   top: "data_bn"
  14 |   param {
  15 |     lr_mult: 0.0
  16 |   }
  17 |   param {
  18 |     lr_mult: 0.0
  19 |   }
  20 |   param {
  21 |     lr_mult: 0.0
  22 |   }
  23 | }
  24 | layer {
  25 |   name: "data_scale"
  26 |   type: "Scale"
  27 |   bottom: "data_bn"
  28 |   top: "data_bn"
  29 |   param {
  30 |     lr_mult: 1.0
  31 |     decay_mult: 1.0
  32 |   }
  33 |   param {
  34 |     lr_mult: 2.0
  35 |     decay_mult: 1.0
  36 |   }
  37 |   scale_param {
  38 |     bias_term: true
  39 |   }
  40 | }
  41 | layer {
  42 |   name: "conv1_h"
  43 |   type: "Convolution"
  44 |   bottom: "data_bn"
  45 |   top: "conv1_h"
  46 |   param {
  47 |     lr_mult: 1.0
  48 |     decay_mult: 1.0
  49 |   }
  50 |   param {
  51 |     lr_mult: 2.0
  52 |     decay_mult: 1.0
  53 |   }
  54 |   convolution_param {
  55 |     num_output: 32
  56 |     pad: 3
  57 |     kernel_size: 7
  58 |     stride: 2
  59 |     weight_filler {
  60 |       type: "msra"
  61 |       variance_norm: FAN_OUT
  62 |     }
  63 |     bias_filler {
  64 |       type: "constant"
  65 |       value: 0.0
  66 |     }
  67 |   }
  68 | }
  69 | layer {
  70 |   name: "conv1_bn_h"
  71 |   type: "BatchNorm"
  72 |   bottom: "conv1_h"
  73 |   top: "conv1_h"
  74 |   param {
  75 |     lr_mult: 0.0
  76 |   }
  77 |   param {
  78 |     lr_mult: 0.0
  79 |   }
  80 |   param {
  81 |     lr_mult: 0.0
  82 |   }
  83 | }
  84 | layer {
  85 |   name: "conv1_scale_h"
  86 |   type: "Scale"
  87 |   bottom: "conv1_h"
  88 |   top: "conv1_h"
  89 |   param {
  90 |     lr_mult: 1.0
  91 |     decay_mult: 1.0
  92 |   }
  93 |   param {
  94 |     lr_mult: 2.0
  95 |     decay_mult: 1.0
  96 |   }
  97 |   scale_param {
  98 |     bias_term: true
  99 |   }
 100 | }
 101 | layer {
 102 |   name: "conv1_relu"
 103 |   type: "ReLU"
 104 |   bottom: "conv1_h"
 105 |   top: "conv1_h"
 106 | }
 107 | layer {
 108 |   name: "conv1_pool"
 109 |   type: "Pooling"
 110 |   bottom: "conv1_h"
 111 |   top: "conv1_pool"
 112 |   pooling_param {
 113 |     kernel_size: 3
 114 |     stride: 2
 115 |   }
 116 | }
 117 | layer {
 118 |   name: "layer_64_1_conv1_h"
 119 |   type: "Convolution"
 120 |   bottom: "conv1_pool"
 121 |   top: "layer_64_1_conv1_h"
 122 |   param {
 123 |     lr_mult: 1.0
 124 |     decay_mult: 1.0
 125 |   }
 126 |   convolution_param {
 127 |     num_output: 32
 128 |     bias_term: false
 129 |     pad: 1
 130 |     kernel_size: 3
 131 |     stride: 1
 132 |     weight_filler {
 133 |       type: "msra"
 134 |     }
 135 |     bias_filler {
 136 |       type: "constant"
 137 |       value: 0.0
 138 |     }
 139 |   }
 140 | }
 141 | layer {
 142 |   name: "layer_64_1_bn2_h"
 143 |   type: "BatchNorm"
 144 |   bottom: "layer_64_1_conv1_h"
 145 |   top: "layer_64_1_conv1_h"
 146 |   param {
 147 |     lr_mult: 0.0
 148 |   }
 149 |   param {
 150 |     lr_mult: 0.0
 151 |   }
 152 |   param {
 153 |     lr_mult: 0.0
 154 |   }
 155 | }
 156 | layer {
 157 |   name: "layer_64_1_scale2_h"
 158 |   type: "Scale"
 159 |   bottom: "layer_64_1_conv1_h"
 160 |   top: "layer_64_1_conv1_h"
 161 |   param {
 162 |     lr_mult: 1.0
 163 |     decay_mult: 1.0
 164 |   }
 165 |   param {
 166 |     lr_mult: 2.0
 167 |     decay_mult: 1.0
 168 |   }
 169 |   scale_param {
 170 |     bias_term: true
 171 |   }
 172 | }
 173 | layer {
 174 |   name: "layer_64_1_relu2"
 175 |   type: "ReLU"
 176 |   bottom: "layer_64_1_conv1_h"
 177 |   top: "layer_64_1_conv1_h"
 178 | }
 179 | layer {
 180 |   name: "layer_64_1_conv2_h"
 181 |   type: "Convolution"
 182 |   bottom: "layer_64_1_conv1_h"
 183 |   top: "layer_64_1_conv2_h"
 184 |   param {
 185 |     lr_mult: 1.0
 186 |     decay_mult: 1.0
 187 |   }
 188 |   convolution_param {
 189 |     num_output: 32
 190 |     bias_term: false
 191 |     pad: 1
 192 |     kernel_size: 3
 193 |     stride: 1
 194 |     weight_filler {
 195 |       type: "msra"
 196 |     }
 197 |     bias_filler {
 198 |       type: "constant"
 199 |       value: 0.0
 200 |     }
 201 |   }
 202 | }
 203 | layer {
 204 |   name: "layer_64_1_sum"
 205 |   type: "Eltwise"
 206 |   bottom: "layer_64_1_conv2_h"
 207 |   bottom: "conv1_pool"
 208 |   top: "layer_64_1_sum"
 209 | }
 210 | layer {
 211 |   name: "layer_128_1_bn1_h"
 212 |   type: "BatchNorm"
 213 |   bottom: "layer_64_1_sum"
 214 |   top: "layer_128_1_bn1_h"
 215 |   param {
 216 |     lr_mult: 0.0
 217 |   }
 218 |   param {
 219 |     lr_mult: 0.0
 220 |   }
 221 |   param {
 222 |     lr_mult: 0.0
 223 |   }
 224 | }
 225 | layer {
 226 |   name: "layer_128_1_scale1_h"
 227 |   type: "Scale"
 228 |   bottom: "layer_128_1_bn1_h"
 229 |   top: "layer_128_1_bn1_h"
 230 |   param {
 231 |     lr_mult: 1.0
 232 |     decay_mult: 1.0
 233 |   }
 234 |   param {
 235 |     lr_mult: 2.0
 236 |     decay_mult: 1.0
 237 |   }
 238 |   scale_param {
 239 |     bias_term: true
 240 |   }
 241 | }
 242 | layer {
 243 |   name: "layer_128_1_relu1"
 244 |   type: "ReLU"
 245 |   bottom: "layer_128_1_bn1_h"
 246 |   top: "layer_128_1_bn1_h"
 247 | }
 248 | layer {
 249 |   name: "layer_128_1_conv1_h"
 250 |   type: "Convolution"
 251 |   bottom: "layer_128_1_bn1_h"
 252 |   top: "layer_128_1_conv1_h"
 253 |   param {
 254 |     lr_mult: 1.0
 255 |     decay_mult: 1.0
 256 |   }
 257 |   convolution_param {
 258 |     num_output: 128
 259 |     bias_term: false
 260 |     pad: 1
 261 |     kernel_size: 3
 262 |     stride: 2
 263 |     weight_filler {
 264 |       type: "msra"
 265 |     }
 266 |     bias_filler {
 267 |       type: "constant"
 268 |       value: 0.0
 269 |     }
 270 |   }
 271 | }
 272 | layer {
 273 |   name: "layer_128_1_bn2"
 274 |   type: "BatchNorm"
 275 |   bottom: "layer_128_1_conv1_h"
 276 |   top: "layer_128_1_conv1_h"
 277 |   param {
 278 |     lr_mult: 0.0
 279 |   }
 280 |   param {
 281 |     lr_mult: 0.0
 282 |   }
 283 |   param {
 284 |     lr_mult: 0.0
 285 |   }
 286 | }
 287 | layer {
 288 |   name: "layer_128_1_scale2"
 289 |   type: "Scale"
 290 |   bottom: "layer_128_1_conv1_h"
 291 |   top: "layer_128_1_conv1_h"
 292 |   param {
 293 |     lr_mult: 1.0
 294 |     decay_mult: 1.0
 295 |   }
 296 |   param {
 297 |     lr_mult: 2.0
 298 |     decay_mult: 1.0
 299 |   }
 300 |   scale_param {
 301 |     bias_term: true
 302 |   }
 303 | }
 304 | layer {
 305 |   name: "layer_128_1_relu2"
 306 |   type: "ReLU"
 307 |   bottom: "layer_128_1_conv1_h"
 308 |   top: "layer_128_1_conv1_h"
 309 | }
 310 | layer {
 311 |   name: "layer_128_1_conv2"
 312 |   type: "Convolution"
 313 |   bottom: "layer_128_1_conv1_h"
 314 |   top: "layer_128_1_conv2"
 315 |   param {
 316 |     lr_mult: 1.0
 317 |     decay_mult: 1.0
 318 |   }
 319 |   convolution_param {
 320 |     num_output: 128
 321 |     bias_term: false
 322 |     pad: 1
 323 |     kernel_size: 3
 324 |     stride: 1
 325 |     weight_filler {
 326 |       type: "msra"
 327 |     }
 328 |     bias_filler {
 329 |       type: "constant"
 330 |       value: 0.0
 331 |     }
 332 |   }
 333 | }
 334 | layer {
 335 |   name: "layer_128_1_conv_expand_h"
 336 |   type: "Convolution"
 337 |   bottom: "layer_128_1_bn1_h"
 338 |   top: "layer_128_1_conv_expand_h"
 339 |   param {
 340 |     lr_mult: 1.0
 341 |     decay_mult: 1.0
 342 |   }
 343 |   convolution_param {
 344 |     num_output: 128
 345 |     bias_term: false
 346 |     pad: 0
 347 |     kernel_size: 1
 348 |     stride: 2
 349 |     weight_filler {
 350 |       type: "msra"
 351 |     }
 352 |     bias_filler {
 353 |       type: "constant"
 354 |       value: 0.0
 355 |     }
 356 |   }
 357 | }
 358 | layer {
 359 |   name: "layer_128_1_sum"
 360 |   type: "Eltwise"
 361 |   bottom: "layer_128_1_conv2"
 362 |   bottom: "layer_128_1_conv_expand_h"
 363 |   top: "layer_128_1_sum"
 364 | }
 365 | layer {
 366 |   name: "layer_256_1_bn1"
 367 |   type: "BatchNorm"
 368 |   bottom: "layer_128_1_sum"
 369 |   top: "layer_256_1_bn1"
 370 |   param {
 371 |     lr_mult: 0.0
 372 |   }
 373 |   param {
 374 |     lr_mult: 0.0
 375 |   }
 376 |   param {
 377 |     lr_mult: 0.0
 378 |   }
 379 | }
 380 | layer {
 381 |   name: "layer_256_1_scale1"
 382 |   type: "Scale"
 383 |   bottom: "layer_256_1_bn1"
 384 |   top: "layer_256_1_bn1"
 385 |   param {
 386 |     lr_mult: 1.0
 387 |     decay_mult: 1.0
 388 |   }
 389 |   param {
 390 |     lr_mult: 2.0
 391 |     decay_mult: 1.0
 392 |   }
 393 |   scale_param {
 394 |     bias_term: true
 395 |   }
 396 | }
 397 | layer {
 398 |   name: "layer_256_1_relu1"
 399 |   type: "ReLU"
 400 |   bottom: "layer_256_1_bn1"
 401 |   top: "layer_256_1_bn1"
 402 | }
 403 | layer {
 404 |   name: "layer_256_1_conv1"
 405 |   type: "Convolution"
 406 |   bottom: "layer_256_1_bn1"
 407 |   top: "layer_256_1_conv1"
 408 |   param {
 409 |     lr_mult: 1.0
 410 |     decay_mult: 1.0
 411 |   }
 412 |   convolution_param {
 413 |     num_output: 256
 414 |     bias_term: false
 415 |     pad: 1
 416 |     kernel_size: 3
 417 |     stride: 2
 418 |     weight_filler {
 419 |       type: "msra"
 420 |     }
 421 |     bias_filler {
 422 |       type: "constant"
 423 |       value: 0.0
 424 |     }
 425 |   }
 426 | }
 427 | layer {
 428 |   name: "layer_256_1_bn2"
 429 |   type: "BatchNorm"
 430 |   bottom: "layer_256_1_conv1"
 431 |   top: "layer_256_1_conv1"
 432 |   param {
 433 |     lr_mult: 0.0
 434 |   }
 435 |   param {
 436 |     lr_mult: 0.0
 437 |   }
 438 |   param {
 439 |     lr_mult: 0.0
 440 |   }
 441 | }
 442 | layer {
 443 |   name: "layer_256_1_scale2"
 444 |   type: "Scale"
 445 |   bottom: "layer_256_1_conv1"
 446 |   top: "layer_256_1_conv1"
 447 |   param {
 448 |     lr_mult: 1.0
 449 |     decay_mult: 1.0
 450 |   }
 451 |   param {
 452 |     lr_mult: 2.0
 453 |     decay_mult: 1.0
 454 |   }
 455 |   scale_param {
 456 |     bias_term: true
 457 |   }
 458 | }
 459 | layer {
 460 |   name: "layer_256_1_relu2"
 461 |   type: "ReLU"
 462 |   bottom: "layer_256_1_conv1"
 463 |   top: "layer_256_1_conv1"
 464 | }
 465 | layer {
 466 |   name: "layer_256_1_conv2"
 467 |   type: "Convolution"
 468 |   bottom: "layer_256_1_conv1"
 469 |   top: "layer_256_1_conv2"
 470 |   param {
 471 |     lr_mult: 1.0
 472 |     decay_mult: 1.0
 473 |   }
 474 |   convolution_param {
 475 |     num_output: 256
 476 |     bias_term: false
 477 |     pad: 1
 478 |     kernel_size: 3
 479 |     stride: 1
 480 |     weight_filler {
 481 |       type: "msra"
 482 |     }
 483 |     bias_filler {
 484 |       type: "constant"
 485 |       value: 0.0
 486 |     }
 487 |   }
 488 | }
 489 | layer {
 490 |   name: "layer_256_1_conv_expand"
 491 |   type: "Convolution"
 492 |   bottom: "layer_256_1_bn1"
 493 |   top: "layer_256_1_conv_expand"
 494 |   param {
 495 |     lr_mult: 1.0
 496 |     decay_mult: 1.0
 497 |   }
 498 |   convolution_param {
 499 |     num_output: 256
 500 |     bias_term: false
 501 |     pad: 0
 502 |     kernel_size: 1
 503 |     stride: 2
 504 |     weight_filler {
 505 |       type: "msra"
 506 |     }
 507 |     bias_filler {
 508 |       type: "constant"
 509 |       value: 0.0
 510 |     }
 511 |   }
 512 | }
 513 | layer {
 514 |   name: "layer_256_1_sum"
 515 |   type: "Eltwise"
 516 |   bottom: "layer_256_1_conv2"
 517 |   bottom: "layer_256_1_conv_expand"
 518 |   top: "layer_256_1_sum"
 519 | }
 520 | layer {
 521 |   name: "layer_512_1_bn1"
 522 |   type: "BatchNorm"
 523 |   bottom: "layer_256_1_sum"
 524 |   top: "layer_512_1_bn1"
 525 |   param {
 526 |     lr_mult: 0.0
 527 |   }
 528 |   param {
 529 |     lr_mult: 0.0
 530 |   }
 531 |   param {
 532 |     lr_mult: 0.0
 533 |   }
 534 | }
 535 | layer {
 536 |   name: "layer_512_1_scale1"
 537 |   type: "Scale"
 538 |   bottom: "layer_512_1_bn1"
 539 |   top: "layer_512_1_bn1"
 540 |   param {
 541 |     lr_mult: 1.0
 542 |     decay_mult: 1.0
 543 |   }
 544 |   param {
 545 |     lr_mult: 2.0
 546 |     decay_mult: 1.0
 547 |   }
 548 |   scale_param {
 549 |     bias_term: true
 550 |   }
 551 | }
 552 | layer {
 553 |   name: "layer_512_1_relu1"
 554 |   type: "ReLU"
 555 |   bottom: "layer_512_1_bn1"
 556 |   top: "layer_512_1_bn1"
 557 | }
 558 | layer {
 559 |   name: "layer_512_1_conv1_h"
 560 |   type: "Convolution"
 561 |   bottom: "layer_512_1_bn1"
 562 |   top: "layer_512_1_conv1_h"
 563 |   param {
 564 |     lr_mult: 1.0
 565 |     decay_mult: 1.0
 566 |   }
 567 |   convolution_param {
 568 |     num_output: 128
 569 |     bias_term: false
 570 |     pad: 1
 571 |     kernel_size: 3
 572 |     stride: 1 # 2
 573 |     weight_filler {
 574 |       type: "msra"
 575 |     }
 576 |     bias_filler {
 577 |       type: "constant"
 578 |       value: 0.0
 579 |     }
 580 |   }
 581 | }
 582 | layer {
 583 |   name: "layer_512_1_bn2_h"
 584 |   type: "BatchNorm"
 585 |   bottom: "layer_512_1_conv1_h"
 586 |   top: "layer_512_1_conv1_h"
 587 |   param {
 588 |     lr_mult: 0.0
 589 |   }
 590 |   param {
 591 |     lr_mult: 0.0
 592 |   }
 593 |   param {
 594 |     lr_mult: 0.0
 595 |   }
 596 | }
 597 | layer {
 598 |   name: "layer_512_1_scale2_h"
 599 |   type: "Scale"
 600 |   bottom: "layer_512_1_conv1_h"
 601 |   top: "layer_512_1_conv1_h"
 602 |   param {
 603 |     lr_mult: 1.0
 604 |     decay_mult: 1.0
 605 |   }
 606 |   param {
 607 |     lr_mult: 2.0
 608 |     decay_mult: 1.0
 609 |   }
 610 |   scale_param {
 611 |     bias_term: true
 612 |   }
 613 | }
 614 | layer {
 615 |   name: "layer_512_1_relu2"
 616 |   type: "ReLU"
 617 |   bottom: "layer_512_1_conv1_h"
 618 |   top: "layer_512_1_conv1_h"
 619 | }
 620 | layer {
 621 |   name: "layer_512_1_conv2_h"
 622 |   type: "Convolution"
 623 |   bottom: "layer_512_1_conv1_h"
 624 |   top: "layer_512_1_conv2_h"
 625 |   param {
 626 |     lr_mult: 1.0
 627 |     decay_mult: 1.0
 628 |   }
 629 |   convolution_param {
 630 |     num_output: 256
 631 |     bias_term: false
 632 |     pad: 2 # 1
 633 |     kernel_size: 3
 634 |     stride: 1
 635 |     dilation: 2
 636 |     weight_filler {
 637 |       type: "msra"
 638 |     }
 639 |     bias_filler {
 640 |       type: "constant"
 641 |       value: 0.0
 642 |     }
 643 |   }
 644 | }
 645 | layer {
 646 |   name: "layer_512_1_conv_expand_h"
 647 |   type: "Convolution"
 648 |   bottom: "layer_512_1_bn1"
 649 |   top: "layer_512_1_conv_expand_h"
 650 |   param {
 651 |     lr_mult: 1.0
 652 |     decay_mult: 1.0
 653 |   }
 654 |   convolution_param {
 655 |     num_output: 256
 656 |     bias_term: false
 657 |     pad: 0
 658 |     kernel_size: 1
 659 |     stride: 1 # 2
 660 |     weight_filler {
 661 |       type: "msra"
 662 |     }
 663 |     bias_filler {
 664 |       type: "constant"
 665 |       value: 0.0
 666 |     }
 667 |   }
 668 | }
 669 | layer {
 670 |   name: "layer_512_1_sum"
 671 |   type: "Eltwise"
 672 |   bottom: "layer_512_1_conv2_h"
 673 |   bottom: "layer_512_1_conv_expand_h"
 674 |   top: "layer_512_1_sum"
 675 | }
 676 | layer {
 677 |   name: "last_bn_h"
 678 |   type: "BatchNorm"
 679 |   bottom: "layer_512_1_sum"
 680 |   top: "layer_512_1_sum"
 681 |   param {
 682 |     lr_mult: 0.0
 683 |   }
 684 |   param {
 685 |     lr_mult: 0.0
 686 |   }
 687 |   param {
 688 |     lr_mult: 0.0
 689 |   }
 690 | }
 691 | layer {
 692 |   name: "last_scale_h"
 693 |   type: "Scale"
 694 |   bottom: "layer_512_1_sum"
 695 |   top: "layer_512_1_sum"
 696 |   param {
 697 |     lr_mult: 1.0
 698 |     decay_mult: 1.0
 699 |   }
 700 |   param {
 701 |     lr_mult: 2.0
 702 |     decay_mult: 1.0
 703 |   }
 704 |   scale_param {
 705 |     bias_term: true
 706 |   }
 707 | }
 708 | layer {
 709 |   name: "last_relu"
 710 |   type: "ReLU"
 711 |   bottom: "layer_512_1_sum"
 712 |   top: "fc7"
 713 | }
 714 | 
 715 | layer {
 716 |   name: "conv6_1_h"
 717 |   type: "Convolution"
 718 |   bottom: "fc7"
 719 |   top: "conv6_1_h"
 720 |   param {
 721 |     lr_mult: 1
 722 |     decay_mult: 1
 723 |   }
 724 |   param {
 725 |     lr_mult: 2
 726 |     decay_mult: 0
 727 |   }
 728 |   convolution_param {
 729 |     num_output: 128
 730 |     pad: 0
 731 |     kernel_size: 1
 732 |     stride: 1
 733 |     weight_filler {
 734 |       type: "xavier"
 735 |     }
 736 |     bias_filler {
 737 |       type: "constant"
 738 |       value: 0
 739 |     }
 740 |   }
 741 | }
 742 | layer {
 743 |   name: "conv6_1_relu"
 744 |   type: "ReLU"
 745 |   bottom: "conv6_1_h"
 746 |   top: "conv6_1_h"
 747 | }
 748 | layer {
 749 |   name: "conv6_2_h"
 750 |   type: "Convolution"
 751 |   bottom: "conv6_1_h"
 752 |   top: "conv6_2_h"
 753 |   param {
 754 |     lr_mult: 1
 755 |     decay_mult: 1
 756 |   }
 757 |   param {
 758 |     lr_mult: 2
 759 |     decay_mult: 0
 760 |   }
 761 |   convolution_param {
 762 |     num_output: 256
 763 |     pad: 1
 764 |     kernel_size: 3
 765 |     stride: 2
 766 |     weight_filler {
 767 |       type: "xavier"
 768 |     }
 769 |     bias_filler {
 770 |       type: "constant"
 771 |       value: 0
 772 |     }
 773 |   }
 774 | }
 775 | layer {
 776 |   name: "conv6_2_relu"
 777 |   type: "ReLU"
 778 |   bottom: "conv6_2_h"
 779 |   top: "conv6_2_h"
 780 | }
 781 | layer {
 782 |   name: "conv7_1_h"
 783 |   type: "Convolution"
 784 |   bottom: "conv6_2_h"
 785 |   top: "conv7_1_h"
 786 |   param {
 787 |     lr_mult: 1
 788 |     decay_mult: 1
 789 |   }
 790 |   param {
 791 |     lr_mult: 2
 792 |     decay_mult: 0
 793 |   }
 794 |   convolution_param {
 795 |     num_output: 64
 796 |     pad: 0
 797 |     kernel_size: 1
 798 |     stride: 1
 799 |     weight_filler {
 800 |       type: "xavier"
 801 |     }
 802 |     bias_filler {
 803 |       type: "constant"
 804 |       value: 0
 805 |     }
 806 |   }
 807 | }
 808 | layer {
 809 |   name: "conv7_1_relu"
 810 |   type: "ReLU"
 811 |   bottom: "conv7_1_h"
 812 |   top: "conv7_1_h"
 813 | }
 814 | layer {
 815 |   name: "conv7_2_h"
 816 |   type: "Convolution"
 817 |   bottom: "conv7_1_h"
 818 |   top: "conv7_2_h"
 819 |   param {
 820 |     lr_mult: 1
 821 |     decay_mult: 1
 822 |   }
 823 |   param {
 824 |     lr_mult: 2
 825 |     decay_mult: 0
 826 |   }
 827 |   convolution_param {
 828 |     num_output: 128
 829 |     pad: 1
 830 |     kernel_size: 3
 831 |     stride: 2
 832 |     weight_filler {
 833 |       type: "xavier"
 834 |     }
 835 |     bias_filler {
 836 |       type: "constant"
 837 |       value: 0
 838 |     }
 839 |   }
 840 | }
 841 | layer {
 842 |   name: "conv7_2_relu"
 843 |   type: "ReLU"
 844 |   bottom: "conv7_2_h"
 845 |   top: "conv7_2_h"
 846 | }
 847 | layer {
 848 |   name: "conv8_1_h"
 849 |   type: "Convolution"
 850 |   bottom: "conv7_2_h"
 851 |   top: "conv8_1_h"
 852 |   param {
 853 |     lr_mult: 1
 854 |     decay_mult: 1
 855 |   }
 856 |   param {
 857 |     lr_mult: 2
 858 |     decay_mult: 0
 859 |   }
 860 |   convolution_param {
 861 |     num_output: 64
 862 |     pad: 0
 863 |     kernel_size: 1
 864 |     stride: 1
 865 |     weight_filler {
 866 |       type: "xavier"
 867 |     }
 868 |     bias_filler {
 869 |       type: "constant"
 870 |       value: 0
 871 |     }
 872 |   }
 873 | }
 874 | layer {
 875 |   name: "conv8_1_relu"
 876 |   type: "ReLU"
 877 |   bottom: "conv8_1_h"
 878 |   top: "conv8_1_h"
 879 | }
 880 | layer {
 881 |   name: "conv8_2_h"
 882 |   type: "Convolution"
 883 |   bottom: "conv8_1_h"
 884 |   top: "conv8_2_h"
 885 |   param {
 886 |     lr_mult: 1
 887 |     decay_mult: 1
 888 |   }
 889 |   param {
 890 |     lr_mult: 2
 891 |     decay_mult: 0
 892 |   }
 893 |   convolution_param {
 894 |     num_output: 128
 895 |     pad: 1
 896 |     kernel_size: 3
 897 |     stride: 1
 898 |     weight_filler {
 899 |       type: "xavier"
 900 |     }
 901 |     bias_filler {
 902 |       type: "constant"
 903 |       value: 0
 904 |     }
 905 |   }
 906 | }
 907 | layer {
 908 |   name: "conv8_2_relu"
 909 |   type: "ReLU"
 910 |   bottom: "conv8_2_h"
 911 |   top: "conv8_2_h"
 912 | }
 913 | layer {
 914 |   name: "conv9_1_h"
 915 |   type: "Convolution"
 916 |   bottom: "conv8_2_h"
 917 |   top: "conv9_1_h"
 918 |   param {
 919 |     lr_mult: 1
 920 |     decay_mult: 1
 921 |   }
 922 |   param {
 923 |     lr_mult: 2
 924 |     decay_mult: 0
 925 |   }
 926 |   convolution_param {
 927 |     num_output: 64
 928 |     pad: 0
 929 |     kernel_size: 1
 930 |     stride: 1
 931 |     weight_filler {
 932 |       type: "xavier"
 933 |     }
 934 |     bias_filler {
 935 |       type: "constant"
 936 |       value: 0
 937 |     }
 938 |   }
 939 | }
 940 | layer {
 941 |   name: "conv9_1_relu"
 942 |   type: "ReLU"
 943 |   bottom: "conv9_1_h"
 944 |   top: "conv9_1_h"
 945 | }
 946 | layer {
 947 |   name: "conv9_2_h"
 948 |   type: "Convolution"
 949 |   bottom: "conv9_1_h"
 950 |   top: "conv9_2_h"
 951 |   param {
 952 |     lr_mult: 1
 953 |     decay_mult: 1
 954 |   }
 955 |   param {
 956 |     lr_mult: 2
 957 |     decay_mult: 0
 958 |   }
 959 |   convolution_param {
 960 |     num_output: 128
 961 |     pad: 1
 962 |     kernel_size: 3
 963 |     stride: 1
 964 |     weight_filler {
 965 |       type: "xavier"
 966 |     }
 967 |     bias_filler {
 968 |       type: "constant"
 969 |       value: 0
 970 |     }
 971 |   }
 972 | }
 973 | layer {
 974 |   name: "conv9_2_relu"
 975 |   type: "ReLU"
 976 |   bottom: "conv9_2_h"
 977 |   top: "conv9_2_h"
 978 | }
 979 | layer {
 980 |   name: "conv4_3_norm"
 981 |   type: "Normalize"
 982 |   bottom: "layer_256_1_bn1"
 983 |   top: "conv4_3_norm"
 984 |   norm_param {
 985 |     across_spatial: false
 986 |     scale_filler {
 987 |       type: "constant"
 988 |       value: 20
 989 |     }
 990 |     channel_shared: false
 991 |   }
 992 | }
 993 | layer {
 994 |   name: "conv4_3_norm_mbox_loc"
 995 |   type: "Convolution"
 996 |   bottom: "conv4_3_norm"
 997 |   top: "conv4_3_norm_mbox_loc"
 998 |   param {
 999 |     lr_mult: 1
1000 |     decay_mult: 1
1001 |   }
1002 |   param {
1003 |     lr_mult: 2
1004 |     decay_mult: 0
1005 |   }
1006 |   convolution_param {
1007 |     num_output: 16
1008 |     pad: 1
1009 |     kernel_size: 3
1010 |     stride: 1
1011 |     weight_filler {
1012 |       type: "xavier"
1013 |     }
1014 |     bias_filler {
1015 |       type: "constant"
1016 |       value: 0
1017 |     }
1018 |   }
1019 | }
1020 | layer {
1021 |   name: "conv4_3_norm_mbox_loc_perm"
1022 |   type: "Permute"
1023 |   bottom: "conv4_3_norm_mbox_loc"
1024 |   top: "conv4_3_norm_mbox_loc_perm"
1025 |   permute_param {
1026 |     order: 0
1027 |     order: 2
1028 |     order: 3
1029 |     order: 1
1030 |   }
1031 | }
1032 | layer {
1033 |   name: "conv4_3_norm_mbox_loc_flat"
1034 |   type: "Flatten"
1035 |   bottom: "conv4_3_norm_mbox_loc_perm"
1036 |   top: "conv4_3_norm_mbox_loc_flat"
1037 |   flatten_param {
1038 |     axis: 1
1039 |   }
1040 | }
1041 | layer {
1042 |   name: "conv4_3_norm_mbox_conf"
1043 |   type: "Convolution"
1044 |   bottom: "conv4_3_norm"
1045 |   top: "conv4_3_norm_mbox_conf"
1046 |   param {
1047 |     lr_mult: 1
1048 |     decay_mult: 1
1049 |   }
1050 |   param {
1051 |     lr_mult: 2
1052 |     decay_mult: 0
1053 |   }
1054 |   convolution_param {
1055 |     num_output: 8 # 84
1056 |     pad: 1
1057 |     kernel_size: 3
1058 |     stride: 1
1059 |     weight_filler {
1060 |       type: "xavier"
1061 |     }
1062 |     bias_filler {
1063 |       type: "constant"
1064 |       value: 0
1065 |     }
1066 |   }
1067 | }
1068 | layer {
1069 |   name: "conv4_3_norm_mbox_conf_perm"
1070 |   type: "Permute"
1071 |   bottom: "conv4_3_norm_mbox_conf"
1072 |   top: "conv4_3_norm_mbox_conf_perm"
1073 |   permute_param {
1074 |     order: 0
1075 |     order: 2
1076 |     order: 3
1077 |     order: 1
1078 |   }
1079 | }
1080 | layer {
1081 |   name: "conv4_3_norm_mbox_conf_flat"
1082 |   type: "Flatten"
1083 |   bottom: "conv4_3_norm_mbox_conf_perm"
1084 |   top: "conv4_3_norm_mbox_conf_flat"
1085 |   flatten_param {
1086 |     axis: 1
1087 |   }
1088 | }
1089 | layer {
1090 |   name: "conv4_3_norm_mbox_priorbox"
1091 |   type: "PriorBox"
1092 |   bottom: "conv4_3_norm"
1093 |   bottom: "data"
1094 |   top: "conv4_3_norm_mbox_priorbox"
1095 |   prior_box_param {
1096 |     min_size: 30.0
1097 |     max_size: 60.0
1098 |     aspect_ratio: 2
1099 |     flip: true
1100 |     clip: false
1101 |     variance: 0.1
1102 |     variance: 0.1
1103 |     variance: 0.2
1104 |     variance: 0.2
1105 |     step: 8
1106 |     offset: 0.5
1107 |   }
1108 | }
1109 | layer {
1110 |   name: "fc7_mbox_loc"
1111 |   type: "Convolution"
1112 |   bottom: "fc7"
1113 |   top: "fc7_mbox_loc"
1114 |   param {
1115 |     lr_mult: 1
1116 |     decay_mult: 1
1117 |   }
1118 |   param {
1119 |     lr_mult: 2
1120 |     decay_mult: 0
1121 |   }
1122 |   convolution_param {
1123 |     num_output: 24
1124 |     pad: 1
1125 |     kernel_size: 3
1126 |     stride: 1
1127 |     weight_filler {
1128 |       type: "xavier"
1129 |     }
1130 |     bias_filler {
1131 |       type: "constant"
1132 |       value: 0
1133 |     }
1134 |   }
1135 | }
1136 | layer {
1137 |   name: "fc7_mbox_loc_perm"
1138 |   type: "Permute"
1139 |   bottom: "fc7_mbox_loc"
1140 |   top: "fc7_mbox_loc_perm"
1141 |   permute_param {
1142 |     order: 0
1143 |     order: 2
1144 |     order: 3
1145 |     order: 1
1146 |   }
1147 | }
1148 | layer {
1149 |   name: "fc7_mbox_loc_flat"
1150 |   type: "Flatten"
1151 |   bottom: "fc7_mbox_loc_perm"
1152 |   top: "fc7_mbox_loc_flat"
1153 |   flatten_param {
1154 |     axis: 1
1155 |   }
1156 | }
1157 | layer {
1158 |   name: "fc7_mbox_conf"
1159 |   type: "Convolution"
1160 |   bottom: "fc7"
1161 |   top: "fc7_mbox_conf"
1162 |   param {
1163 |     lr_mult: 1
1164 |     decay_mult: 1
1165 |   }
1166 |   param {
1167 |     lr_mult: 2
1168 |     decay_mult: 0
1169 |   }
1170 |   convolution_param {
1171 |     num_output: 12 # 126
1172 |     pad: 1
1173 |     kernel_size: 3
1174 |     stride: 1
1175 |     weight_filler {
1176 |       type: "xavier"
1177 |     }
1178 |     bias_filler {
1179 |       type: "constant"
1180 |       value: 0
1181 |     }
1182 |   }
1183 | }
1184 | layer {
1185 |   name: "fc7_mbox_conf_perm"
1186 |   type: "Permute"
1187 |   bottom: "fc7_mbox_conf"
1188 |   top: "fc7_mbox_conf_perm"
1189 |   permute_param {
1190 |     order: 0
1191 |     order: 2
1192 |     order: 3
1193 |     order: 1
1194 |   }
1195 | }
1196 | layer {
1197 |   name: "fc7_mbox_conf_flat"
1198 |   type: "Flatten"
1199 |   bottom: "fc7_mbox_conf_perm"
1200 |   top: "fc7_mbox_conf_flat"
1201 |   flatten_param {
1202 |     axis: 1
1203 |   }
1204 | }
1205 | layer {
1206 |   name: "fc7_mbox_priorbox"
1207 |   type: "PriorBox"
1208 |   bottom: "fc7"
1209 |   bottom: "data"
1210 |   top: "fc7_mbox_priorbox"
1211 |   prior_box_param {
1212 |     min_size: 60.0
1213 |     max_size: 111.0
1214 |     aspect_ratio: 2
1215 |     aspect_ratio: 3
1216 |     flip: true
1217 |     clip: false
1218 |     variance: 0.1
1219 |     variance: 0.1
1220 |     variance: 0.2
1221 |     variance: 0.2
1222 |     step: 16
1223 |     offset: 0.5
1224 |   }
1225 | }
1226 | layer {
1227 |   name: "conv6_2_mbox_loc"
1228 |   type: "Convolution"
1229 |   bottom: "conv6_2_h"
1230 |   top: "conv6_2_mbox_loc"
1231 |   param {
1232 |     lr_mult: 1
1233 |     decay_mult: 1
1234 |   }
1235 |   param {
1236 |     lr_mult: 2
1237 |     decay_mult: 0
1238 |   }
1239 |   convolution_param {
1240 |     num_output: 24
1241 |     pad: 1
1242 |     kernel_size: 3
1243 |     stride: 1
1244 |     weight_filler {
1245 |       type: "xavier"
1246 |     }
1247 |     bias_filler {
1248 |       type: "constant"
1249 |       value: 0
1250 |     }
1251 |   }
1252 | }
1253 | layer {
1254 |   name: "conv6_2_mbox_loc_perm"
1255 |   type: "Permute"
1256 |   bottom: "conv6_2_mbox_loc"
1257 |   top: "conv6_2_mbox_loc_perm"
1258 |   permute_param {
1259 |     order: 0
1260 |     order: 2
1261 |     order: 3
1262 |     order: 1
1263 |   }
1264 | }
1265 | layer {
1266 |   name: "conv6_2_mbox_loc_flat"
1267 |   type: "Flatten"
1268 |   bottom: "conv6_2_mbox_loc_perm"
1269 |   top: "conv6_2_mbox_loc_flat"
1270 |   flatten_param {
1271 |     axis: 1
1272 |   }
1273 | }
1274 | layer {
1275 |   name: "conv6_2_mbox_conf"
1276 |   type: "Convolution"
1277 |   bottom: "conv6_2_h"
1278 |   top: "conv6_2_mbox_conf"
1279 |   param {
1280 |     lr_mult: 1
1281 |     decay_mult: 1
1282 |   }
1283 |   param {
1284 |     lr_mult: 2
1285 |     decay_mult: 0
1286 |   }
1287 |   convolution_param {
1288 |     num_output: 12 # 126
1289 |     pad: 1
1290 |     kernel_size: 3
1291 |     stride: 1
1292 |     weight_filler {
1293 |       type: "xavier"
1294 |     }
1295 |     bias_filler {
1296 |       type: "constant"
1297 |       value: 0
1298 |     }
1299 |   }
1300 | }
1301 | layer {
1302 |   name: "conv6_2_mbox_conf_perm"
1303 |   type: "Permute"
1304 |   bottom: "conv6_2_mbox_conf"
1305 |   top: "conv6_2_mbox_conf_perm"
1306 |   permute_param {
1307 |     order: 0
1308 |     order: 2
1309 |     order: 3
1310 |     order: 1
1311 |   }
1312 | }
1313 | layer {
1314 |   name: "conv6_2_mbox_conf_flat"
1315 |   type: "Flatten"
1316 |   bottom: "conv6_2_mbox_conf_perm"
1317 |   top: "conv6_2_mbox_conf_flat"
1318 |   flatten_param {
1319 |     axis: 1
1320 |   }
1321 | }
1322 | layer {
1323 |   name: "conv6_2_mbox_priorbox"
1324 |   type: "PriorBox"
1325 |   bottom: "conv6_2_h"
1326 |   bottom: "data"
1327 |   top: "conv6_2_mbox_priorbox"
1328 |   prior_box_param {
1329 |     min_size: 111.0
1330 |     max_size: 162.0
1331 |     aspect_ratio: 2
1332 |     aspect_ratio: 3
1333 |     flip: true
1334 |     clip: false
1335 |     variance: 0.1
1336 |     variance: 0.1
1337 |     variance: 0.2
1338 |     variance: 0.2
1339 |     step: 32
1340 |     offset: 0.5
1341 |   }
1342 | }
1343 | layer {
1344 |   name: "conv7_2_mbox_loc"
1345 |   type: "Convolution"
1346 |   bottom: "conv7_2_h"
1347 |   top: "conv7_2_mbox_loc"
1348 |   param {
1349 |     lr_mult: 1
1350 |     decay_mult: 1
1351 |   }
1352 |   param {
1353 |     lr_mult: 2
1354 |     decay_mult: 0
1355 |   }
1356 |   convolution_param {
1357 |     num_output: 24
1358 |     pad: 1
1359 |     kernel_size: 3
1360 |     stride: 1
1361 |     weight_filler {
1362 |       type: "xavier"
1363 |     }
1364 |     bias_filler {
1365 |       type: "constant"
1366 |       value: 0
1367 |     }
1368 |   }
1369 | }
1370 | layer {
1371 |   name: "conv7_2_mbox_loc_perm"
1372 |   type: "Permute"
1373 |   bottom: "conv7_2_mbox_loc"
1374 |   top: "conv7_2_mbox_loc_perm"
1375 |   permute_param {
1376 |     order: 0
1377 |     order: 2
1378 |     order: 3
1379 |     order: 1
1380 |   }
1381 | }
1382 | layer {
1383 |   name: "conv7_2_mbox_loc_flat"
1384 |   type: "Flatten"
1385 |   bottom: "conv7_2_mbox_loc_perm"
1386 |   top: "conv7_2_mbox_loc_flat"
1387 |   flatten_param {
1388 |     axis: 1
1389 |   }
1390 | }
1391 | layer {
1392 |   name: "conv7_2_mbox_conf"
1393 |   type: "Convolution"
1394 |   bottom: "conv7_2_h"
1395 |   top: "conv7_2_mbox_conf"
1396 |   param {
1397 |     lr_mult: 1
1398 |     decay_mult: 1
1399 |   }
1400 |   param {
1401 |     lr_mult: 2
1402 |     decay_mult: 0
1403 |   }
1404 |   convolution_param {
1405 |     num_output: 12 # 126
1406 |     pad: 1
1407 |     kernel_size: 3
1408 |     stride: 1
1409 |     weight_filler {
1410 |       type: "xavier"
1411 |     }
1412 |     bias_filler {
1413 |       type: "constant"
1414 |       value: 0
1415 |     }
1416 |   }
1417 | }
1418 | layer {
1419 |   name: "conv7_2_mbox_conf_perm"
1420 |   type: "Permute"
1421 |   bottom: "conv7_2_mbox_conf"
1422 |   top: "conv7_2_mbox_conf_perm"
1423 |   permute_param {
1424 |     order: 0
1425 |     order: 2
1426 |     order: 3
1427 |     order: 1
1428 |   }
1429 | }
1430 | layer {
1431 |   name: "conv7_2_mbox_conf_flat"
1432 |   type: "Flatten"
1433 |   bottom: "conv7_2_mbox_conf_perm"
1434 |   top: "conv7_2_mbox_conf_flat"
1435 |   flatten_param {
1436 |     axis: 1
1437 |   }
1438 | }
1439 | layer {
1440 |   name: "conv7_2_mbox_priorbox"
1441 |   type: "PriorBox"
1442 |   bottom: "conv7_2_h"
1443 |   bottom: "data"
1444 |   top: "conv7_2_mbox_priorbox"
1445 |   prior_box_param {
1446 |     min_size: 162.0
1447 |     max_size: 213.0
1448 |     aspect_ratio: 2
1449 |     aspect_ratio: 3
1450 |     flip: true
1451 |     clip: false
1452 |     variance: 0.1
1453 |     variance: 0.1
1454 |     variance: 0.2
1455 |     variance: 0.2
1456 |     step: 64
1457 |     offset: 0.5
1458 |   }
1459 | }
1460 | layer {
1461 |   name: "conv8_2_mbox_loc"
1462 |   type: "Convolution"
1463 |   bottom: "conv8_2_h"
1464 |   top: "conv8_2_mbox_loc"
1465 |   param {
1466 |     lr_mult: 1
1467 |     decay_mult: 1
1468 |   }
1469 |   param {
1470 |     lr_mult: 2
1471 |     decay_mult: 0
1472 |   }
1473 |   convolution_param {
1474 |     num_output: 16
1475 |     pad: 1
1476 |     kernel_size: 3
1477 |     stride: 1
1478 |     weight_filler {
1479 |       type: "xavier"
1480 |     }
1481 |     bias_filler {
1482 |       type: "constant"
1483 |       value: 0
1484 |     }
1485 |   }
1486 | }
1487 | layer {
1488 |   name: "conv8_2_mbox_loc_perm"
1489 |   type: "Permute"
1490 |   bottom: "conv8_2_mbox_loc"
1491 |   top: "conv8_2_mbox_loc_perm"
1492 |   permute_param {
1493 |     order: 0
1494 |     order: 2
1495 |     order: 3
1496 |     order: 1
1497 |   }
1498 | }
1499 | layer {
1500 |   name: "conv8_2_mbox_loc_flat"
1501 |   type: "Flatten"
1502 |   bottom: "conv8_2_mbox_loc_perm"
1503 |   top: "conv8_2_mbox_loc_flat"
1504 |   flatten_param {
1505 |     axis: 1
1506 |   }
1507 | }
1508 | layer {
1509 |   name: "conv8_2_mbox_conf"
1510 |   type: "Convolution"
1511 |   bottom: "conv8_2_h"
1512 |   top: "conv8_2_mbox_conf"
1513 |   param {
1514 |     lr_mult: 1
1515 |     decay_mult: 1
1516 |   }
1517 |   param {
1518 |     lr_mult: 2
1519 |     decay_mult: 0
1520 |   }
1521 |   convolution_param {
1522 |     num_output: 8 # 84
1523 |     pad: 1
1524 |     kernel_size: 3
1525 |     stride: 1
1526 |     weight_filler {
1527 |       type: "xavier"
1528 |     }
1529 |     bias_filler {
1530 |       type: "constant"
1531 |       value: 0
1532 |     }
1533 |   }
1534 | }
1535 | layer {
1536 |   name: "conv8_2_mbox_conf_perm"
1537 |   type: "Permute"
1538 |   bottom: "conv8_2_mbox_conf"
1539 |   top: "conv8_2_mbox_conf_perm"
1540 |   permute_param {
1541 |     order: 0
1542 |     order: 2
1543 |     order: 3
1544 |     order: 1
1545 |   }
1546 | }
1547 | layer {
1548 |   name: "conv8_2_mbox_conf_flat"
1549 |   type: "Flatten"
1550 |   bottom: "conv8_2_mbox_conf_perm"
1551 |   top: "conv8_2_mbox_conf_flat"
1552 |   flatten_param {
1553 |     axis: 1
1554 |   }
1555 | }
1556 | layer {
1557 |   name: "conv8_2_mbox_priorbox"
1558 |   type: "PriorBox"
1559 |   bottom: "conv8_2_h"
1560 |   bottom: "data"
1561 |   top: "conv8_2_mbox_priorbox"
1562 |   prior_box_param {
1563 |     min_size: 213.0
1564 |     max_size: 264.0
1565 |     aspect_ratio: 2
1566 |     flip: true
1567 |     clip: false
1568 |     variance: 0.1
1569 |     variance: 0.1
1570 |     variance: 0.2
1571 |     variance: 0.2
1572 |     step: 100
1573 |     offset: 0.5
1574 |   }
1575 | }
1576 | layer {
1577 |   name: "conv9_2_mbox_loc"
1578 |   type: "Convolution"
1579 |   bottom: "conv9_2_h"
1580 |   top: "conv9_2_mbox_loc"
1581 |   param {
1582 |     lr_mult: 1
1583 |     decay_mult: 1
1584 |   }
1585 |   param {
1586 |     lr_mult: 2
1587 |     decay_mult: 0
1588 |   }
1589 |   convolution_param {
1590 |     num_output: 16
1591 |     pad: 1
1592 |     kernel_size: 3
1593 |     stride: 1
1594 |     weight_filler {
1595 |       type: "xavier"
1596 |     }
1597 |     bias_filler {
1598 |       type: "constant"
1599 |       value: 0
1600 |     }
1601 |   }
1602 | }
1603 | layer {
1604 |   name: "conv9_2_mbox_loc_perm"
1605 |   type: "Permute"
1606 |   bottom: "conv9_2_mbox_loc"
1607 |   top: "conv9_2_mbox_loc_perm"
1608 |   permute_param {
1609 |     order: 0
1610 |     order: 2
1611 |     order: 3
1612 |     order: 1
1613 |   }
1614 | }
1615 | layer {
1616 |   name: "conv9_2_mbox_loc_flat"
1617 |   type: "Flatten"
1618 |   bottom: "conv9_2_mbox_loc_perm"
1619 |   top: "conv9_2_mbox_loc_flat"
1620 |   flatten_param {
1621 |     axis: 1
1622 |   }
1623 | }
1624 | layer {
1625 |   name: "conv9_2_mbox_conf"
1626 |   type: "Convolution"
1627 |   bottom: "conv9_2_h"
1628 |   top: "conv9_2_mbox_conf"
1629 |   param {
1630 |     lr_mult: 1
1631 |     decay_mult: 1
1632 |   }
1633 |   param {
1634 |     lr_mult: 2
1635 |     decay_mult: 0
1636 |   }
1637 |   convolution_param {
1638 |     num_output: 8 # 84
1639 |     pad: 1
1640 |     kernel_size: 3
1641 |     stride: 1
1642 |     weight_filler {
1643 |       type: "xavier"
1644 |     }
1645 |     bias_filler {
1646 |       type: "constant"
1647 |       value: 0
1648 |     }
1649 |   }
1650 | }
1651 | layer {
1652 |   name: "conv9_2_mbox_conf_perm"
1653 |   type: "Permute"
1654 |   bottom: "conv9_2_mbox_conf"
1655 |   top: "conv9_2_mbox_conf_perm"
1656 |   permute_param {
1657 |     order: 0
1658 |     order: 2
1659 |     order: 3
1660 |     order: 1
1661 |   }
1662 | }
1663 | layer {
1664 |   name: "conv9_2_mbox_conf_flat"
1665 |   type: "Flatten"
1666 |   bottom: "conv9_2_mbox_conf_perm"
1667 |   top: "conv9_2_mbox_conf_flat"
1668 |   flatten_param {
1669 |     axis: 1
1670 |   }
1671 | }
1672 | layer {
1673 |   name: "conv9_2_mbox_priorbox"
1674 |   type: "PriorBox"
1675 |   bottom: "conv9_2_h"
1676 |   bottom: "data"
1677 |   top: "conv9_2_mbox_priorbox"
1678 |   prior_box_param {
1679 |     min_size: 264.0
1680 |     max_size: 315.0
1681 |     aspect_ratio: 2
1682 |     flip: true
1683 |     clip: false
1684 |     variance: 0.1
1685 |     variance: 0.1
1686 |     variance: 0.2
1687 |     variance: 0.2
1688 |     step: 300
1689 |     offset: 0.5
1690 |   }
1691 | }
1692 | layer {
1693 |   name: "mbox_loc"
1694 |   type: "Concat"
1695 |   bottom: "conv4_3_norm_mbox_loc_flat"
1696 |   bottom: "fc7_mbox_loc_flat"
1697 |   bottom: "conv6_2_mbox_loc_flat"
1698 |   bottom: "conv7_2_mbox_loc_flat"
1699 |   bottom: "conv8_2_mbox_loc_flat"
1700 |   bottom: "conv9_2_mbox_loc_flat"
1701 |   top: "mbox_loc"
1702 |   concat_param {
1703 |     axis: 1
1704 |   }
1705 | }
1706 | layer {
1707 |   name: "mbox_conf"
1708 |   type: "Concat"
1709 |   bottom: "conv4_3_norm_mbox_conf_flat"
1710 |   bottom: "fc7_mbox_conf_flat"
1711 |   bottom: "conv6_2_mbox_conf_flat"
1712 |   bottom: "conv7_2_mbox_conf_flat"
1713 |   bottom: "conv8_2_mbox_conf_flat"
1714 |   bottom: "conv9_2_mbox_conf_flat"
1715 |   top: "mbox_conf"
1716 |   concat_param {
1717 |     axis: 1
1718 |   }
1719 | }
1720 | layer {
1721 |   name: "mbox_priorbox"
1722 |   type: "Concat"
1723 |   bottom: "conv4_3_norm_mbox_priorbox"
1724 |   bottom: "fc7_mbox_priorbox"
1725 |   bottom: "conv6_2_mbox_priorbox"
1726 |   bottom: "conv7_2_mbox_priorbox"
1727 |   bottom: "conv8_2_mbox_priorbox"
1728 |   bottom: "conv9_2_mbox_priorbox"
1729 |   top: "mbox_priorbox"
1730 |   concat_param {
1731 |     axis: 2
1732 |   }
1733 | }
1734 | 
1735 | layer {
1736 |   name: "mbox_conf_reshape"
1737 |   type: "Reshape"
1738 |   bottom: "mbox_conf"
1739 |   top: "mbox_conf_reshape"
1740 |   reshape_param {
1741 |     shape {
1742 |       dim: 0
1743 |       dim: -1
1744 |       dim: 2
1745 |     }
1746 |   }
1747 | }
1748 | layer {
1749 |   name: "mbox_conf_softmax"
1750 |   type: "Softmax"
1751 |   bottom: "mbox_conf_reshape"
1752 |   top: "mbox_conf_softmax"
1753 |   softmax_param {
1754 |     axis: 2
1755 |   }
1756 | }
1757 | layer {
1758 |   name: "mbox_conf_flatten"
1759 |   type: "Flatten"
1760 |   bottom: "mbox_conf_softmax"
1761 |   top: "mbox_conf_flatten"
1762 |   flatten_param {
1763 |     axis: 1
1764 |   }
1765 | }
1766 | 
1767 | layer {
1768 |   name: "detection_out"
1769 |   type: "DetectionOutput"
1770 |   bottom: "mbox_loc"
1771 |   bottom: "mbox_conf_flatten"
1772 |   bottom: "mbox_priorbox"
1773 |   top: "detection_out"
1774 |   include {
1775 |     phase: TEST
1776 |   }
1777 |   detection_output_param {
1778 |     num_classes: 2
1779 |     share_location: true
1780 |     background_label_id: 0
1781 |     nms_param {
1782 |       nms_threshold: 0.45
1783 |       top_k: 400
1784 |     }
1785 |     code_type: CENTER_SIZE
1786 |     keep_top_k: 200
1787 |     confidence_threshold: 0.01
1788 |   }
1789 | }
1790 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | click==7.1.2
 2 | cvlib==0.2.5
 3 | dlib==19.22.0
 4 | face-recognition==1.3.0
 5 | h5py==2.10.0
 6 | keras==2.4.3
 7 | mahotas==1.4.11
 8 | matplotlib==3.3.2
 9 | numpy==1.19.2
10 | opencv-python==4.5.1.48
11 | Pillow==8.3.2
12 | pixellib==0.6.1
13 | PySide2==5.15.2
14 | pytesseract==0.3.7
15 | requests==2.24.0
16 | scikit-image==0.18.1
17 | scipy==1.6.3
18 | tensorflow-gpu==2.5.1
19 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | from google_drive_downloader import GoogleDriveDownloader as gdd
 2 | 
 3 | download_dict = {
 4 |     "16gAKScYAW0bZkyRgcLF71x28du_mLY8-": "assets/res10_300x300_ssd_iter_140000.caffemodel",
 5 |     "1jUIwxXjxz8oC7I2Ta9vtiozsB4i95043": "Media/people-walking.mp4",
 6 |     "1Q7qfr11olEFguRRkKRnC1Yah3ZnJCUnM": "assets/mask_rcnn_coco.h5"
 7 | }
 8 | 
 9 | for file_id, dest_path in download_dict.items():
10 | 
11 |     gdd.download_file_from_google_drive(file_id=file_id,
12 |                                         dest_path=dest_path,
13 |                                         unzip=True)
14 | 


--------------------------------------------------------------------------------