├── .github ├── issues_labeler.yml └── workflows │ ├── greetings.yml │ └── issues_labeler.yml ├── .gitignore ├── 01. Basic operations ├── drawingOnImage.py ├── gettingStarted.py ├── gettingStartedWithVideos.py ├── mouseEvents.py ├── savingVideosFromCamera.py ├── usingMatplotlib.py └── usingSkimage.py ├── 02. Core operations ├── README.md ├── arithmaticOperations.py ├── basicImageMerge.py ├── binaryThresholding.py ├── cropping.py └── resizing.py ├── 03. Edge Detection ├── CannyEdgeRealTime.py ├── README.md └── cannyEdgeDetection.py ├── 04. Image Filter ├── SkImageFilter.py ├── bilateralFilter.py └── colorFiltering.py ├── 05. Corner Detection └── cornerDetection.py ├── 06. Background subtraction ├── liveBackgroundSubtraction.py ├── mogBackgroundDetection.py └── runningAverage.py ├── 07. Face Detection ├── README.md ├── blurTheFace.py ├── faceDetectionDNN.py ├── faceDetectionHaarCascade.py ├── realTimeFaceDetection.py ├── realTimeFaceDetectionDNN.py └── smileDetection.py ├── 08. Object Detection ├── README.md └── ojectDetectionCVLIB.py ├── 09. Template Matching ├── README.md └── portMatching.py ├── 10. Invisible_Cloak └── invisiblecloak.py ├── 11. Optical Flow └── opticalFlow.py ├── 12. Blob Detection └── blobDetection.py ├── 13. contouring ├── README.md ├── contouring.py ├── liveContourDetection.py └── shapeDetection.py ├── 14. ImageOperations ├── colvolutionaFeature.py ├── filterVGG16.py ├── imageEnocdingDecoding.py └── simpleImageOperations.py ├── 15. VirtualPen └── README.md ├── 16. EyeBall Tracking └── README.md ├── 17. Color Trackbar └── colorTrackbar.py ├── 18. SIFT Feature Extraction ├── README.md ├── basic.py └── compareFeatures.py ├── 19. Hog Feature Extraction ├── README.md └── featureExtractor.py ├── 20. Image Segmentation ├── KmeansImageSegmentation.py ├── README.md └── waterShedAlgorithm.py ├── 21. Facial Recognition ├── FaceRec.py ├── README.md └── images │ └── obama.jpg ├── 22. Optical Character Recognition └── README.md ├── 23. PixelLib Segmentation ├── README.md └── instanceSegmentationExample.py ├── 24. Road Lane Detection ├── README.md └── laneDetection.py ├── CONTRIBUTING.md ├── LICENSE ├── Media ├── Man_United.jpeg ├── Shape_Detected.png ├── Shapes.png ├── apple.jpeg ├── bnw.jfif ├── book.png ├── book_on_table.jpeg ├── coins.jpg ├── coins_hog.png ├── corner_detection.jpg ├── cropped image1.png ├── edge-detection.jpg ├── face-001.jpg ├── face-detected-dnn.jpeg ├── face-detected.jpeg ├── nature.png ├── nature_output.png ├── opencv-logo-white.png ├── pieboard-templatematching.jpg ├── port-detected.jpeg ├── port-templatematching.jpg ├── road.jpg ├── road_segmentation.jpg ├── sample.jpeg ├── sample2.jpeg ├── thumbs_up_down.jpg └── thumbs_up_down_countour.jpg ├── README.md ├── _config.yml ├── assets ├── deploy.prototxt.txt ├── haarcascade_eye.xml ├── haarcascade_frontalface_default.xml └── haarcascade_smile.xml ├── requirements.txt └── utils.py /.github/issues_labeler.yml: -------------------------------------------------------------------------------- 1 | APIs: 2 | - "(api|APIs|apis|API|APIS)" 3 | BASH: 4 | - "(Bash|bash|BASH)" 5 | javascript: 6 | - "(JavaScript|JAVASCRIPT|Javascript|javascript|JS|js|Js)" 7 | python: 8 | - "(Python|PYTHON|python)" 9 | enhancement: 10 | - "(enhancement|Enhancement|enhance)" 11 | difficulty-easy: 12 | - "(Easy|easy)" 13 | difficulty-medium: 14 | - "(Medium|medium)" 15 | difficulty-hard: 16 | - "(Hard|hard)" 17 | git: 18 | - "(Git|git|GIT)" 19 | github-actions: 20 | - "(GitHub actions|GIT actions|github actions)" 21 | hacktoberfest: 22 | - "(hacktoberfest|Hacktoberfest|Hacktober fest|Hacktoberfest2020)" 23 | bug: 24 | - "(bug|Bug|BUG)" 25 | up-for-grab: 26 | - "(up for grab)" 27 | urgent: 28 | - "(urgent|URGENT|Urgent)" 29 | documentation: 30 | - "(documentation|Documentation)" 31 | no-Code: 32 | - "(No Code)" 33 | -------------------------------------------------------------------------------- /.github/workflows/greetings.yml: -------------------------------------------------------------------------------- 1 | name: Greetings 2 | 3 | on: [pull_request_target, issues] 4 | 5 | jobs: 6 | greeting: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/first-interaction@v1 10 | with: 11 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 12 | issue-message: 'Hey @${{ github.actor }}, congratulations!! 🎉 for creating your first issue. Wait for the admin approval now and then you can go ahead to solve the issue. Do give a star ⭐ if you like this project. ' 13 | pr-message: 'Congratulations!! 🎉 @${{ github.actor }} for making your first PR. Admin will review the changes soon and merge finally.😊 Do give a star ⭐ if you like this project. ' 14 | -------------------------------------------------------------------------------- /.github/workflows/issues_labeler.yml: -------------------------------------------------------------------------------- 1 | name: "Issue Labeler" 2 | on: 3 | issues: 4 | types: [opened, edited] 5 | 6 | jobs: 7 | triage: 8 | name: Autmomate Issue 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: github/issue-labeler@v2.0 12 | with: 13 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 14 | configuration-path: .github/issues_labeler.yml 15 | enable-versioned-regex: 0 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Unit test / coverage reports 7 | htmlcov/ 8 | .tox/ 9 | .nox/ 10 | .coverage 11 | .coverage.* 12 | .cache 13 | nosetests.xml 14 | coverage.xml 15 | *.cover 16 | *.py,cover 17 | .hypothesis/ 18 | .pytest_cache/ 19 | 20 | # Translations 21 | *.mo 22 | *.pot 23 | 24 | # Jupyter Notebook 25 | .ipynb_checkpoints 26 | 27 | # IPython 28 | profile_default/ 29 | ipython_config.py 30 | 31 | # pyenv 32 | .python-version 33 | 34 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 35 | __pypackages__/ 36 | 37 | # Environments 38 | .env 39 | .venv 40 | env/ 41 | venv/ 42 | ENV/ 43 | env.bak/ 44 | venv.bak/ 45 | 46 | # others 47 | test.py 48 | *.h5 49 | *.caffemodel 50 | *.mp4 51 | inference_model/ -------------------------------------------------------------------------------- /01. Basic operations/drawingOnImage.py: -------------------------------------------------------------------------------- 1 | """ Drawing function in Opencv """ 2 | 3 | import cv2 4 | 5 | img = cv2.imread("./Media/apple.jpeg") 6 | if img is not None: 7 | """ It will be create a line from (10, 10) to (100, 100)""" 8 | img = cv2.line(img, 9 | pt1=(10, 10), 10 | pt2=(180, 100), 11 | color=(255, 255, 255), 12 | thickness=2) 13 | 14 | img = cv2.arrowedLine(img, 15 | pt1=(20, 20), 16 | pt2=(300, 300), 17 | color=(0, 255, 255), 18 | thickness=2) 19 | 20 | img = cv2.rectangle(img, 21 | pt1=(250, 0), 22 | pt2=(450, 250), 23 | color=(0, 255, 0), 24 | thickness=2) 25 | 26 | img = cv2.circle(img, 27 | center=(100, 100), 28 | radius=50, 29 | color=(255, 0, 255), 30 | thickness=-1) 31 | 32 | cv2.imshow("output", img) 33 | else: 34 | print('file not found.') 35 | 36 | cv2.waitKey(0) 37 | cv2.destroyAllWindows() 38 | -------------------------------------------------------------------------------- /01. Basic operations/gettingStarted.py: -------------------------------------------------------------------------------- 1 | ''' Getting started with opencv 2 | 3 | Reading and saving image with opencv using waitKey 4 | ''' 5 | import cv2 6 | 7 | #load an color image in grayscale color 8 | img = cv2.imread('./Media/sample.jpeg',0) 9 | 10 | cv2.imshow('image', img) 11 | k = cv2.waitKey(0) & 0xFF 12 | 13 | # esc key to exit 14 | if k == 27: 15 | cv2.destroyAllWindows() 16 | 17 | # s key to save and exit 18 | elif k == ord('s'): 19 | cv2.imwrite("./Media/sample2.jpeg", img) 20 | cv2.destroyAllWindows() 21 | -------------------------------------------------------------------------------- /01. Basic operations/gettingStartedWithVideos.py: -------------------------------------------------------------------------------- 1 | ''' Getting started with Videos in Opencv''' 2 | import cv2 3 | 4 | cap = cv2.VideoCapture(0) 5 | 6 | while True: 7 | # capture frame by frame 8 | check, frame = cap.read() 9 | if check: 10 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 11 | 12 | # display the result 13 | cv2.imshow('frame', gray) 14 | 15 | # break the while loop by 'q' 16 | if cv2.waitKey(1) & 0xFF == ord('q'): 17 | break 18 | 19 | #release the cap 20 | cap.release() 21 | cv2.destroyAllWindows() 22 | -------------------------------------------------------------------------------- /01. Basic operations/mouseEvents.py: -------------------------------------------------------------------------------- 1 | ''' Mouse Events in OpenCV ''' 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | # mouse callback function 7 | def draw_circle(event, x, y, flags, param): 8 | if event == cv2.EVENT_LBUTTONDBLCLK: 9 | cv2.circle(img, (x,y), 100, (255,0,0), -1) 10 | 11 | # create a black image, a windows and bind the function to window 12 | img = np.zeros((512,512,3), np.uint8) 13 | cv2.namedWindow('image') 14 | cv2.setMouseCallback('image', draw_circle) 15 | 16 | while(1): 17 | cv2.imshow('image', img) 18 | if cv2.waitKey(20) & 0xFF == 27: 19 | break 20 | cv2.destroyAllWindows() 21 | -------------------------------------------------------------------------------- /01. Basic operations/savingVideosFromCamera.py: -------------------------------------------------------------------------------- 1 | ''' Saving videos from Camera ''' 2 | import cv2 3 | 4 | cap = cv2.VideoCapture(0) 5 | 6 | #define codec and create VideoWriter object 7 | fourcc = cv2.VideoWriter_fourcc(*'XVID') 8 | out = cv2.VideoWriter("./Media/output.avi", fourcc, 20.0, (640,480)) 9 | 10 | while(cap.isOpened()): 11 | ret, frame = cap.read() 12 | if ret: 13 | frame = cv2.flip(frame,0) 14 | 15 | # write the filpped frame 16 | out.write(frame) 17 | 18 | cv2.imshow('video',frame) 19 | 20 | if cv2.waitKey(1) & 0xFF == ord('q'): 21 | break 22 | else: 23 | break 24 | 25 | # release video capture 26 | cap.release() 27 | out.release() 28 | cv2.destroyAllWindows() 29 | -------------------------------------------------------------------------------- /01. Basic operations/usingMatplotlib.py: -------------------------------------------------------------------------------- 1 | ''' Showing output using matplotlib ''' 2 | 3 | import cv2 4 | from matplotlib import pyplot as plt 5 | 6 | img = cv2.imread('./Media/sample.jpeg', 0) 7 | 8 | plt.imshow(img, cmap="gray") 9 | plt.title('Sample Image') 10 | plt.xticks([]) 11 | plt.yticks([]) 12 | plt.show() 13 | -------------------------------------------------------------------------------- /01. Basic operations/usingSkimage.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from skimage import io 3 | 4 | # reading the image 5 | img = cv2.imread('./Media/sample.jpeg') 6 | 7 | # changing image from BGR to RGB for correct output 8 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 9 | 10 | # showing output 11 | io.imshow(img) 12 | io.show() 13 | -------------------------------------------------------------------------------- /02. Core operations/README.md: -------------------------------------------------------------------------------- 1 | # Image Cropper 2 | This code allows you to crop images dynamically and save them. Mouse Events in OpenCV will be used to achieve this. 3 | 4 | ### Executing the code 5 | 1. Run the .py file by running the command ``` python3 crop.py``` in the terminal or cmd. 6 | 2. Use the **Left** mouse button to drag out a rectangular region of the image you want to crop. **Release** the button, once you are done. 7 | 3. The selected rectangular is shown on the image. You can press **r** to reset your selection. 8 | 4. Press **c** to crop the image. A new window opens up. 9 | a) Press **s** to save the cropped image. 10 | b) Press **r** to reset and return to the original image. 11 | 5. Repeat from step 2 to crop more images. 12 | 13 | ### Demo 14 | ![](https://github.com/Pranjalmishra30/OpenCV-Rep/blob/master/Mini-Projects/Cropping_Images/Data/crop-DEMO.gif) 15 | 16 | ### Refrences 17 | 1. Mouse events [tutorial](https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_gui/py_mouse_handling/py_mouse_handling.html) 18 | 2. Pyimagesearch [tutorial](https://www.pyimagesearch.com/2015/03/09/capturing-mouse-click-events-with-python-and-opencv/) 19 | -------------------------------------------------------------------------------- /02. Core operations/arithmaticOperations.py: -------------------------------------------------------------------------------- 1 | """ Adding two image using addWeighted() """ 2 | import cv2 3 | 4 | img1 = cv2.imread('./Media/sample.jpeg') 5 | img2 = cv2.imread('./Media/opencv-logo-white.png') 6 | 7 | # resizing the image for arithmatic operations 8 | img1 = cv2.resize(img1, (500, 500)) 9 | img2 = cv2.resize(img2, (500, 500)) 10 | 11 | output_img = cv2.addWeighted(img1, 0.7, img2, 0.3, 0) 12 | 13 | cv2.imshow("final image", output_img) 14 | if cv2.waitKey(0) == ord('q'): 15 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /02. Core operations/basicImageMerge.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from matplotlib import pyplot as plt 4 | 5 | BLUE = [255, 0, 0] 6 | 7 | img1 = cv2.imread("./Media/opencv-logo-white.png") 8 | 9 | replicate = cv2.copyMakeBorder(img1, 20, 20, 20, 20, cv2.BORDER_REPLICATE) 10 | reflect = cv2.copyMakeBorder(img1, 20, 20, 20, 20, cv2.BORDER_REFLECT) 11 | reflect101 = cv2.copyMakeBorder(img1, 20, 20, 20, 20, cv2.BORDER_REFLECT_101) 12 | wrap = cv2.copyMakeBorder(img1, 20, 20, 20, 20, cv2.BORDER_WRAP) 13 | constant = cv2.copyMakeBorder(img1, 20, 20, 20, 20, cv2.BORDER_CONSTANT, value= BLUE) 14 | 15 | plt.subplot(321),plt.imshow(img1, 'gray'),plt.title('original') 16 | plt.subplot(322),plt.imshow(replicate, 'gray'),plt.title('replicate') 17 | plt.subplot(323),plt.imshow(reflect, 'gray'),plt.title('reflect') 18 | plt.subplot(324),plt.imshow(reflect101, 'gray'),plt.title('reflect101') 19 | plt.subplot(325),plt.imshow(wrap, 'gray'),plt.title('wrap') 20 | plt.subplot(326),plt.imshow(constant, 'gray'),plt.title('constant') 21 | 22 | plt.show() -------------------------------------------------------------------------------- /02. Core operations/binaryThresholding.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | 5 | img = cv2.imread('./Media/bnw.jfif') 6 | gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) 7 | 8 | img = cv2.medianBlur(img, 5) 9 | 10 | ret, th1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV) 11 | 12 | plt.imshow(th1, 'gray') 13 | plt.title('Binary Thresholding') 14 | plt.xticks([]) 15 | plt.yticks([]) 16 | plt.show() -------------------------------------------------------------------------------- /02. Core operations/cropping.py: -------------------------------------------------------------------------------- 1 | """ Image cropping using opencv """ 2 | 3 | import cv2 4 | refpt = [] #List of refrence points 5 | 6 | def select_roi (event, x, y, flags, param): 7 | global refpt #Global refrences 8 | 9 | if event == cv2.EVENT_LBUTTONDOWN: # When the left mouse button is clicked 10 | refpt = [(x,y)] 11 | 12 | elif event == cv2.EVENT_LBUTTONUP: # When the left mouse button is released 13 | refpt.append((x,y)) # recording the last coordinates 14 | cv2.rectangle(img_main,refpt[0],refpt[1],(0,255,0),2) 15 | cv2.imshow("frame",img_main) 16 | print("Selection Successful") 17 | 18 | img = cv2.imread("Data/Man_United.jpeg") 19 | img_main = cv2.resize(img,(400,400)) #Resizing image 20 | 21 | clone = img_main.copy() # To reset the image after cropping 22 | clone2 = img_main.copy() # To crop a section out without affecting the original image 23 | 24 | cv2.namedWindow("frame") 25 | cv2.setMouseCallback("frame", select_roi) 26 | 27 | i=1 # Numbering for saving images 28 | 29 | while True: 30 | cv2.imshow("frame", img_main) 31 | var = cv2.waitKey(0) 32 | 33 | ''' 34 | Instructions 35 | - Select a region , then press c to crop that portion. 36 | - Press r to reset your selection. 37 | - In the Crop mode , press s to save your cropped image or press r to reset selection 38 | - Press q to exit the program. 39 | ''' 40 | 41 | if var == ord('c'): # Crop selected images 42 | 43 | if len(refpt)==2: 44 | roi = clone2[refpt[0][1]:refpt[1][1], refpt[0][0]:refpt[1][0]] # [x1:x2 , y1:y2] 45 | cv2.namedWindow("Crop") 46 | cv2.imshow("Crop",roi) 47 | print("Cropped") 48 | 49 | var2 = cv2.waitKey(0) 50 | 51 | if var2 == ord('s'): # Saving cropped image 52 | cv2.imwrite("Data/cropped image{}.png".format(i),roi) # Name of the saved image 53 | i=i+1 54 | print("image saved\n") 55 | cv2.destroyWindow("Crop") 56 | img_main = clone.copy() 57 | 58 | elif var2 == ord('r'): # Reset 59 | cv2.destroyWindow("Crop") 60 | print("Reset\n") 61 | img_main = clone.copy() 62 | 63 | elif var == ord('r'): # Reset 64 | print("Reset\n") 65 | img_main = clone.copy() 66 | 67 | elif var == ord('q'): # Exit the loop 68 | print("Exiting ...") 69 | break 70 | 71 | cv2.destroyAllWindows() 72 | -------------------------------------------------------------------------------- /02. Core operations/resizing.py: -------------------------------------------------------------------------------- 1 | """ Image Resizing 2 | 3 | - cv2.INTER_AREA: This is used when we need to shrink an image. 4 | - cv2.INTER_CUBIC: This is slow but more efficient. 5 | - cv2.INTER_LINEAR: This is primarily used when zooming is required. This is the default interpolation technique in OpenCV. 6 | 7 | https://www.geeksforgeeks.org/image-resizing-using-opencv-python/ 8 | """ 9 | 10 | import cv2 11 | import numpy as np 12 | from matplotlib import pyplot as plt 13 | 14 | img = cv2.imread('./Media/Man_United.jpeg') 15 | 16 | if img is not None: 17 | interpolation_area = cv2.resize(img, 18 | (480, 480), 19 | interpolation = cv2.INTER_AREA) 20 | 21 | interpolation_nearest = cv2.resize(img, 22 | (480, 480), 23 | interpolation=cv2.INTER_NEAREST) 24 | 25 | bigger = cv2.resize(img, 26 | (2048, 2048)) 27 | titles = ['Original', 'INTER_AREA', "INTER_NEAREST", "bigger"] 28 | images = [img, interpolation_area, interpolation_nearest, bigger] 29 | count = 4 30 | 31 | for i in range(count): 32 | plt.subplot(2, 2, i+1) 33 | plt.title(titles[i]) 34 | plt.imshow(images[i]) 35 | plt.show() 36 | else: 37 | print("Check the file path again.") -------------------------------------------------------------------------------- /03. Edge Detection/CannyEdgeRealTime.py: -------------------------------------------------------------------------------- 1 | """ Real time Edge detection """ 2 | import cv2 3 | import numpy as np 4 | 5 | cap = cv2.VideoCapture(0) 6 | 7 | while True: 8 | 9 | check, frame = cap.read() 10 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 11 | 12 | if check: 13 | # applying canny edge transformation 14 | edges = cv2.Canny(gray, threshold1=30, threshold2=100) 15 | 16 | # showing the output frame 17 | cv2.imshow('Original',frame) 18 | cv2.imshow('Edges', edges) 19 | 20 | if cv2.waitKey(5) & 0xFF == 27: 21 | break 22 | 23 | cap.release() 24 | cv2.destroyAllWindows() 25 | -------------------------------------------------------------------------------- /03. Edge Detection/README.md: -------------------------------------------------------------------------------- 1 | ## Edge Detection 2 | 3 | **Canny Edge Detection**- The Canny edge detector is an edge detection operator that uses a multi-stage algorithm to detect a wide range of edges in images. It was developed by John F. Canny in 1986. Canny also produced a computational theory of edge detection explaining why the technique works. 4 | -------------------------------------------------------------------------------- /03. Edge Detection/cannyEdgeDetection.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Edge detection with canny algo 3 | edges = cv2.Canny('/path/to/img', threshold1, threshold2, apertureSize, L2gradient) 4 | 5 | L2gradient: Its default value is false, if value is true, Canny () uses a more computationally expensive equation to detect edges, 6 | which provides more accuracy at the cost of resources. 7 | ''' 8 | import cv2 9 | 10 | img = cv2.imread(r'./Media/face-001.jpg') 11 | gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 12 | 13 | # applying canny edge transformations 14 | edges = cv2.Canny(gray_img, threshold1=30, threshold2=100) 15 | 16 | # showing the output frame 17 | cv2.imshow("Edge Detected Image", edges) 18 | 19 | k = cv2.waitKey(0) & 0xFF 20 | 21 | if k == 27: 22 | cv2.destroyAllWindows() 23 | elif k == ord('s'): 24 | cv2.imwrite("./Media/edge-detection.jpg", edges) 25 | cv2.destroyAllWindows() 26 | -------------------------------------------------------------------------------- /04. Image Filter/SkImageFilter.py: -------------------------------------------------------------------------------- 1 | from skimage import data, io, filters 2 | 3 | image = data.coins() 4 | # ... or any other NumPy array! 5 | edges = filters.sobel(image) 6 | io.imshow(edges) 7 | io.show() 8 | -------------------------------------------------------------------------------- /04. Image Filter/bilateralFilter.py: -------------------------------------------------------------------------------- 1 | ''' 2 | OpenCV provides the bilateralFilter() function to apply the bilateral filter on the image. The bilateral filter can reduce unwanted noise very 3 | well while keeping edges sharp. The syntax of the function is given below 4 | 5 | src- It denotes the source of the image. It can be an 8-bit or floating-point, 1-channel image. 6 | dst- It denotes the destination image of the same size. Its type will be the same as the src image. 7 | d - It denotes the diameter of the pixel neighborhood (integer type) that is used during filtering. If its value is negative, then it is computed from sigmaSpace. 8 | sigmaColor - It denotes the filter sigma in the color space. 9 | sigmaSpace - It denotes the filter sigma in the coordinate space. 10 | ''' 11 | 12 | import cv2 13 | import numpy as np 14 | from matplotlib import pyplot as plt 15 | 16 | img = cv2.imread('./Media/sample.jpeg',1) 17 | 18 | blur = cv2.bilateralFilter(img,9,75,75) 19 | 20 | plt.subplot(121),plt.imshow(img),plt.title('Original') 21 | plt.xticks([]), plt.yticks([]) 22 | plt.subplot(122),plt.imshow(blur),plt.title('Bilateral Filter') 23 | plt.xticks([]), plt.yticks([]) 24 | plt.show() 25 | cv2.waitKey(0) & 0xFF 26 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /04. Image Filter/colorFiltering.py: -------------------------------------------------------------------------------- 1 | ''' color filtering in live videos using color thresholding ''' 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | cap = cv2.VideoCapture(0) 7 | 8 | while True: 9 | cam, frame = cap.read() 10 | if cam is True: 11 | hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) 12 | 13 | lower_blue = np.array([101,50,38]) 14 | upper_blue = np.array([110,255,255]) 15 | 16 | lower_red = np.array([160,20,70]) 17 | upper_red = np.array([190,255,255]) 18 | 19 | lower_green = np.array([36, 25, 25]) 20 | upper_green = np.array([86, 255,255]) 21 | 22 | mask_blue = cv2.inRange(hsv, lower_blue, upper_blue) 23 | res_blue = cv2.bitwise_and(frame, frame, mask=mask_blue) 24 | 25 | mask_red = cv2.inRange(hsv, lower_red, upper_red) 26 | res_red = cv2.bitwise_and(frame, frame, mask=mask_red) 27 | 28 | mask_green = cv2.inRange(hsv, lower_green, upper_green) 29 | res_green = cv2.bitwise_and(frame, frame, mask=mask_green) 30 | 31 | cv2.imshow('frame', frame) 32 | cv2.imshow('Blue', res_blue) 33 | cv2.imshow('Red', res_red) 34 | cv2.imshow('green', res_green) 35 | 36 | k = cv2.waitKey(5) & 0xFF 37 | if k == 27: 38 | break 39 | 40 | cap.release() 41 | cv2.destroyAllWindows() 42 | -------------------------------------------------------------------------------- /05. Corner Detection/cornerDetection.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Corner detection in Python 3 | 4 | ''' 5 | import cv2 6 | import numpy as np 7 | 8 | img = cv2.imread('./Media/corner_detection.jpg') 9 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 10 | gray = np.float32(gray) 11 | 12 | corners = cv2.goodFeaturesToTrack(gray, 100, 0.01, 10) 13 | corners = np.int0(corners) 14 | 15 | for corner in corners: 16 | x, y = corner.ravel() 17 | cv2.circle(img,(x,y), 3, 255, -1) 18 | 19 | cv2.imshow('Corner', img) 20 | 21 | cv2.waitKey(0) & 0xFF 22 | cv2.destroyAllWindows() 23 | -------------------------------------------------------------------------------- /06. Background subtraction/liveBackgroundSubtraction.py: -------------------------------------------------------------------------------- 1 | """ Live Background subtraction using opencv """ 2 | 3 | import numpy as np 4 | import cv2 5 | 6 | cap = cv2.VideoCapture(0) 7 | fgbg = cv2.createBackgroundSubtractorKNN() 8 | 9 | while True: 10 | ret, frame = cap.read() 11 | if ret: 12 | fgmask = fgbg.apply(frame) 13 | 14 | cv2.imshow('frame', fgmask) 15 | 16 | k = cv2.waitKey(5) & 0xFF 17 | if k == 27: 18 | break 19 | 20 | cap.release() 21 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /06. Background subtraction/mogBackgroundDetection.py: -------------------------------------------------------------------------------- 1 | """ Background subtraction using opencv """ 2 | 3 | import numpy as np 4 | import cv2 5 | 6 | cap = cv2.VideoCapture('./Media/people-walking.mp4') 7 | fgbg = cv2.createBackgroundSubtractorMOG2() 8 | 9 | while(1): 10 | ret, frame = cap.read() 11 | 12 | fgmask = fgbg.apply(frame) 13 | 14 | cv2.imshow('frame', fgmask) 15 | 16 | k = cv2.waitKey(5) & 0xFF 17 | if k == 27: 18 | break 19 | 20 | cap.release() 21 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /06. Background subtraction/runningAverage.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | cap = cv2.VideoCapture(0) 5 | 6 | # read the frame from the camera 7 | _, frame = cap.read() 8 | 9 | # converting data type to float32 10 | averageValue = np.float32(frame) 11 | 12 | while True: 13 | # read the frame from camera 14 | _, frame = cap.read() 15 | 16 | # accumulateWeighted used to update the running weights 17 | cv2.accumulateWeighted(frame, averageValue, 0.02) 18 | 19 | resultingFrame = cv2.convertScaleAbs(averageValue) 20 | 21 | cv2.imshow('Original Window', frame) 22 | cv2.imshow('averageValue', resultingFrame) 23 | 24 | k = cv2.waitKey(30) & 0xff 25 | if k == 27: 26 | break 27 | 28 | cap.release() 29 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /07. Face Detection/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/07. Face Detection/README.md -------------------------------------------------------------------------------- /07. Face Detection/blurTheFace.py: -------------------------------------------------------------------------------- 1 | ''' Real time Face bluring using webcam ''' 2 | 3 | import cv2 4 | import numpy as np 5 | import time 6 | 7 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt 8 | prototxt_path = "./assets/deploy.prototxt.txt" 9 | # https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180205_fp16/res10_300x300_ssd_iter_140000_fp16.caffemodel 10 | model_path = "./assets/res10_300x300_ssd_iter_140000.caffemodel" 11 | 12 | # load Caffe model 13 | model = cv2.dnn.readNetFromCaffe(prototxt_path, model_path) 14 | 15 | cap = cv2.VideoCapture(0) 16 | 17 | while True: 18 | start = time.time() 19 | _, image = cap.read() 20 | # get width and height of the image 21 | h, w = image.shape[:2] 22 | kernel_width = (w // 7) | 1 23 | kernel_height = (h // 7) | 1 24 | # preprocess the image: resize and performs mean subtraction 25 | blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), (104.0, 177.0, 123.0)) 26 | # set the image into the input of the neural network 27 | model.setInput(blob) 28 | # perform inference and get the result 29 | output = np.squeeze(model.forward()) 30 | for i in range(0, output.shape[0]): 31 | confidence = output[i, 2] 32 | # get the confidence 33 | # if confidence is above 40%, then blur the bounding box (face) 34 | if confidence > 0.4: 35 | # get the surrounding box cordinates and upscale them to original image 36 | box = output[i, 3:7] * np.array([w, h, w, h]) 37 | # convert to integers 38 | start_x, start_y, end_x, end_y = box.astype(np.int) 39 | # get the face image 40 | face = image[start_y: end_y, start_x: end_x] 41 | # apply gaussian blur to this face 42 | face = cv2.GaussianBlur(face, (kernel_width, kernel_height), 0) 43 | # put the blurred face into the original image 44 | image[start_y: end_y, start_x: end_x] = face 45 | cv2.imshow("image", image) 46 | if cv2.waitKey(1) == ord("q"): 47 | break 48 | time_elapsed = time.time() - start 49 | fps = 1 / time_elapsed 50 | print("FPS:", fps) 51 | 52 | cv2.destroyAllWindows() 53 | cap.release() -------------------------------------------------------------------------------- /07. Face Detection/faceDetectionDNN.py: -------------------------------------------------------------------------------- 1 | """ Face detection using deep learning and opencv """ 2 | import numpy as np 3 | import cv2 4 | 5 | # Global Declarations 6 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt 7 | prototxt_path = "./assets/deploy.prototxt.txt" 8 | # https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180205_fp16/res10_300x300_ssd_iter_140000_fp16.caffemodel 9 | model_path = "./assets/res10_300x300_ssd_iter_140000.caffemodel" 10 | 11 | confThresh=0.8 12 | net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path) 13 | 14 | def detectFace(imgPath): 15 | img = cv2.imread(imgPath) 16 | 17 | (h, w) = img.shape[:2] 18 | blob = cv2.dnn.blobFromImage(cv2.resize(img, (300, 300)), 1.0,(300, 300), (104.0, 177.0, 123.0)) 19 | 20 | net.setInput(blob) 21 | detections = net.forward() 22 | for i in range(0, detections.shape[2]): 23 | confidence = detections[0, 0, i, 2] 24 | if confidence < confThresh: 25 | continue 26 | 27 | box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) 28 | (startX, startY, endX, endY) = box.astype("int") 29 | y = startY - 10 if startY - 10 > 10 else startY + 10 30 | cv2.rectangle(img, (startX, startY), (endX, endY),(0, 0, 255), 2) 31 | 32 | cv2.imshow("Output", img) 33 | key = cv2.waitKey(0) 34 | if key == 27: 35 | cv2.destroyAllWindows() 36 | elif key == ord('s'): 37 | cv2.imwrite('./Media/face-detected-dnn.jpeg', img) 38 | cv2.destroyAllWindows() 39 | 40 | 41 | 42 | path_img = './Media/face-001.jpg' 43 | detectFace(path_img) 44 | -------------------------------------------------------------------------------- /07. Face Detection/faceDetectionHaarCascade.py: -------------------------------------------------------------------------------- 1 | """ Face detection using haarcascade_frontalface and eye classifier """ 2 | 3 | import cv2 4 | 5 | # Path 6 | face_cascade = cv2.CascadeClassifier('./assets/haarcascade_frontalface_default.xml') 7 | eye_cascade = cv2.CascadeClassifier('./assets/haarcascade_eye.xml') 8 | 9 | 10 | def detectedFace(img): 11 | img = cv2.imread(img) 12 | 13 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 14 | 15 | faces = face_cascade.detectMultiScale(gray, 1.4, 5) 16 | 17 | for face in faces: 18 | x, y, width, height = face 19 | # draw a rectangle for detection 20 | cv2.rectangle( 21 | img, 22 | (x, y), 23 | (x + width, y + height), 24 | (0, 0, 255), 25 | 1, 26 | ) 27 | roi_gray = gray[y:y+height, x:x+width] 28 | roi_color = img[y:y+height, x:x+width] 29 | 30 | eyes = eye_cascade.detectMultiScale(roi_gray) 31 | for (ex, ey, ew, eh) in eyes: 32 | cv2.rectangle( 33 | roi_color, 34 | (ex, ey), 35 | (ex+ew, ey+eh), 36 | (0, 255, 0), 37 | 2, 38 | ) 39 | 40 | cv2.imshow('Face Detection', img) 41 | k = cv2.waitKey(0) & 0xFF 42 | 43 | if k == 27: 44 | cv2.destroyAllWindows() 45 | elif k == ord('s'): 46 | cv2.imwrite('./Media/face-detected.jpeg', img) 47 | cv2.destroyAllWindows() 48 | 49 | 50 | path_img = './Media/face-001.jpg' 51 | detectedFace(path_img) 52 | -------------------------------------------------------------------------------- /07. Face Detection/realTimeFaceDetection.py: -------------------------------------------------------------------------------- 1 | """ Real time face detection using haarcascade classifier """\ 2 | 3 | import numpy as np 4 | import cv2 5 | 6 | face_cascade = cv2.CascadeClassifier('./assets/haarcascade_frontalface_default.xml') 7 | eye_cascade = cv2.CascadeClassifier('./assets/haarcascade_eye.xml') 8 | 9 | cap = cv2.VideoCapture(0) 10 | 11 | while True: 12 | ret, frame = cap.read() 13 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 14 | faces = face_cascade.detectMultiScale(gray, 1.3, 5) 15 | 16 | for (x,y,w,h) in faces: 17 | cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2) 18 | roi_gray = gray[y:y+h, x:x+w] 19 | roi_color = frame[y:y+h, x:x+w] 20 | 21 | eyes = eye_cascade.detectMultiScale(roi_gray) 22 | for (ex,ey,ew,eh) in eyes: 23 | cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2) 24 | 25 | cv2.imshow('face and eyes detection', frame) 26 | k = cv2.waitKey(30) & 0xff 27 | if k == 27: 28 | break 29 | 30 | cap.release() 31 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /07. Face Detection/realTimeFaceDetectionDNN.py: -------------------------------------------------------------------------------- 1 | """ Real time face detection using deep learning """ 2 | 3 | import numpy as np 4 | import cv2 5 | 6 | # https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt 7 | prototxt_path = "./assets/deploy.prototxt.txt" 8 | # https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180205_fp16/res10_300x300_ssd_iter_140000_fp16.caffemodel 9 | model_path = "./assets/res10_300x300_ssd_iter_140000.caffemodel" 10 | 11 | confThresh = 0.5 12 | net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path) 13 | 14 | cam=cv2.VideoCapture(0) 15 | 16 | while True: 17 | ret, frame = cam.read() 18 | frame = cv2.flip(frame, 1) 19 | (h, w) = frame.shape[:2] 20 | blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0)) 21 | 22 | net.setInput(blob) 23 | detections = net.forward() 24 | for i in range(0, detections.shape[2]): 25 | confidence = detections[0, 0, i, 2] 26 | if confidence < confThresh: 27 | continue 28 | 29 | box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) 30 | (startX, startY, endX, endY) = box.astype("int") 31 | y = startY - 10 if startY - 10 > 10 else startY + 10 32 | cv2.rectangle(frame, (startX, startY), (endX, endY),(0, 0, 255), 2) 33 | 34 | cv2.imshow("Frame", frame) 35 | key = cv2.waitKey(1) 36 | if key == 27: 37 | break 38 | 39 | cam.release() 40 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /07. Face Detection/smileDetection.py: -------------------------------------------------------------------------------- 1 | """ Real time face detection using haarcascade classifier """\ 2 | 3 | import numpy as np 4 | import cv2 5 | 6 | face_cascade = cv2.CascadeClassifier('./assets/haarcascade_frontalface_default.xml') 7 | smile_cascade = cv2.CascadeClassifier('./assets/haarcascade_smile.xml') 8 | 9 | cap = cv2.VideoCapture(0) 10 | 11 | while True: 12 | ret, frame = cap.read() 13 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 14 | faces = face_cascade.detectMultiScale(gray, 1.3, 5) 15 | 16 | for (x, y, w, h) in faces: 17 | cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2) 18 | roi_gray = gray[y: y + h, x: x + w] 19 | roi_color = frame[y: y + h, x: x + w] 20 | 21 | smiles = smile_cascade.detectMultiScale(roi_gray, 1.3, 5) 22 | for (sx, sy, sw, sh) in smiles: 23 | cv2.rectangle(roi_color, (sx, sy), (sx+sw, sy+sh), (0, 0, 255), 2) 24 | 25 | cv2.imshow('Smile Detection', frame) 26 | k = cv2.waitKey(30) & 0xff 27 | if k == 27: 28 | break 29 | 30 | cap.release() 31 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /08. Object Detection/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/08. Object Detection/README.md -------------------------------------------------------------------------------- /08. Object Detection/ojectDetectionCVLIB.py: -------------------------------------------------------------------------------- 1 | """ Common object detection using CvLib and yolo3 """ 2 | 3 | import cv2 4 | import matplotlib.pyplot as plt 5 | import cvlib as cv 6 | from cvlib.object_detection import draw_bbox 7 | 8 | img = cv2.imread('./Media/apple.jpeg') 9 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 10 | 11 | bbox, label, conf = cv.detect_common_objects(gray) 12 | 13 | output_image = draw_bbox(im, bbox, label, conf) 14 | 15 | plt.imshow(output_image) 16 | plt.savefig("./Media/apple-detected.jpeg") 17 | plt.show() -------------------------------------------------------------------------------- /09. Template Matching/README.md: -------------------------------------------------------------------------------- 1 | ## Template Matching 2 | 3 | Template Matching is a method for searching and finding the location of a template image in a larger image. 4 | OpenCV comes with a function cv2.matchTemplate() for this purpose. It simply slides the template image over the input 5 | image (as in 2D convolution) and compares the template and patch of input image under the template image. 6 | 7 | -------------------------------------------------------------------------------- /09. Template Matching/portMatching.py: -------------------------------------------------------------------------------- 1 | """ 2 | Template Matching is a method for searching and finding the location of a template image in a larger image. 3 | OpenCV comes with a function cv2.matchTemplate() for this purpose. It simply slides the template image over the input 4 | image (as in 2D convolution) and compares the template and patch of input image under the template image. 5 | """ 6 | import cv2 7 | import numpy as np 8 | 9 | source_color = cv2.imread(r"./Media/pieboard-templatematching.jpg") 10 | #keeping source_color in colored to show the cordinate in original picture instead to grayscale 11 | source_gray = cv2.cvtColor(source_color, cv2.COLOR_BGR2GRAY) 12 | template_image = cv2.imread(r"./Media/port-templatematching.jpg", 0) 13 | w,h = template_image.shape[::-1] 14 | 15 | res = cv2.matchTemplate(source_gray, template_image, cv2.TM_CCOEFF_NORMED) 16 | threshold = 0.8 17 | loc = np.where(res >= threshold) 18 | 19 | print(*loc) 20 | for port in zip(*loc[::-1]): 21 | cv2.rectangle(source_color, port,(port[0] + w, port[1] + h), (0, 255, 255), 2) 22 | 23 | cv2.imshow("Port Detected", source_color) 24 | k = cv2.waitKey(0) & 0xFF 25 | 26 | if k == 27: 27 | cv2.destroyAllWindows() 28 | elif k == ord("s"): 29 | cv2.imwrite('./Media/port-detected.jpeg', source_color) 30 | cv2.destroyAllWindows() 31 | -------------------------------------------------------------------------------- /10. Invisible_Cloak/invisiblecloak.py: -------------------------------------------------------------------------------- 1 | 2 | import cv2 3 | import time 4 | import numpy as np 5 | 6 | ## Preparation for writing the ouput video 7 | fourcc = cv2.VideoWriter_fourcc(*'XVID') 8 | out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640, 480)) 9 | 10 | ##reading from the webcam 11 | cap = cv2.VideoCapture(0) 12 | 13 | ## Allow the system to sleep for 3 seconds before the webcam starts 14 | time.sleep(3) 15 | count = 0 16 | background = 0 17 | 18 | ## Capture the background in range of 60 19 | for i in range(60): 20 | ret, background = cap.read() 21 | background = np.flip(background, axis=1) 22 | 23 | ## Read every frame from the webcam, until the camera is open 24 | while (cap.isOpened()): 25 | ret, img = cap.read() 26 | if not ret: 27 | break 28 | count += 1 29 | img = np.flip(img, axis=1) 30 | 31 | ## Convert the color space from BGR to HSV 32 | hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) 33 | 34 | ## Generat masks to detect red color 35 | lower_red = np.array([0, 120, 50]) 36 | upper_red = np.array([10, 255,255]) 37 | mask1 = cv2.inRange(hsv, lower_red, upper_red) 38 | 39 | lower_red = np.array([170, 120, 70]) 40 | upper_red = np.array([180, 255, 255]) 41 | mask2 = cv2.inRange(hsv, lower_red, upper_red) 42 | 43 | mask1 = mask1 + mask2 44 | 45 | ## Open and Dilate the mask image 46 | mask1 = cv2.morphologyEx(mask1, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8)) 47 | mask1 = cv2.morphologyEx(mask1, cv2.MORPH_DILATE, np.ones((3, 3), np.uint8)) 48 | 49 | ## Create an inverted mask to segment out the red color from the frame 50 | mask2 = cv2.bitwise_not(mask1) 51 | 52 | ## Segment the red color part out of the frame using bitwise and with the inverted mask 53 | res1 = cv2.bitwise_and(img, img, mask=mask2) 54 | 55 | ## Create image showing static background frame pixels only for the masked region 56 | res2 = cv2.bitwise_and(background, background, mask=mask1) 57 | 58 | ## Generating the final output and writing 59 | finalOutput = cv2.addWeighted(res1, 1, res2, 1, 0) 60 | out.write(finalOutput) 61 | cv2.imshow("magic", finalOutput) 62 | cv2.waitKey(1) 63 | 64 | 65 | cap.release() 66 | out.release() 67 | cv2.destroyAllWindows() 68 | 69 | #------------------------ 70 | #colors code 71 | 72 | #skin color 73 | #lower_red = np.array([0, 0, 70]) 74 | #upper_red = np.array([100, 255,255]) 75 | # mask1 = cv2.inRange(hsv, lower_red, upper_red) 76 | 77 | # lower_red = np.array([170, 120, 70]) 78 | # upper_red = np.array([180, 255, 255]) 79 | 80 | #----------------------- -------------------------------------------------------------------------------- /11. Optical Flow/opticalFlow.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | cap = cv2.VideoCapture('motion.avi') 5 | 6 | ret, frame = cap.read() 7 | gs_im0 = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 8 | points_prev = cv2.goodFeaturesToTrack(gs_im0, 100, 0.03, 9.0, False) 9 | 10 | while(cap.isOpened()): 11 | ret, frame = cap.read() 12 | 13 | gs_im1 = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 14 | # Call tracker. 15 | points, st, err = cv2.calcOpticalFlowPyrLK(gs_im0, gs_im1, points_prev, None, (3,3)) 16 | 17 | for i,p in enumerate(points): 18 | a,b = p.ravel() 19 | frame = cv2.circle(frame,(a,b),3,(255,255,255),-1) 20 | 21 | cv2.imshow('frame',frame) 22 | points_prev = points 23 | gs_im0 = gs_im1 24 | if cv2.waitKey(1) & 0xFF == ord('q'): 25 | break 26 | 27 | cap.release() 28 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /12. Blob Detection/blobDetection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/12. Blob Detection/blobDetection.py -------------------------------------------------------------------------------- /13. contouring/README.md: -------------------------------------------------------------------------------- 1 | # Shape Contouring 2 | 3 | Contouring is like drawing an outline along the boundary of an object. In OpenCV this will be achieved with 2 processes: 4 | **Thresholding** and **Contouring**. 5 | 6 | * Thresholding allows us to filter the object 7 | * Contouring lets us outline/mark the boundary of the object 8 | 9 | ## Result 10 | **Original** ![](https://github.com/Pranjalmishra30/rep.1/blob/master/Contouring/Data/Shapes.png) **Contoured** ![](https://github.com/Pranjalmishra30/openCV-Rep/blob/master/Mini-Projects/ShapeContouring/Shape_Detected.png) 11 | 12 | ## Refrences 13 | 1. [Thresholding](https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_thresholding/py_thresholding.html) 14 | 2. [Contours](https://docs.opencv.org/trunk/d4/d73/tutorial_py_contours_begin.html) 15 | -------------------------------------------------------------------------------- /13. contouring/contouring.py: -------------------------------------------------------------------------------- 1 | """ Image contoutring on thumbs images using binary thresholding and findCountours() """ 2 | 3 | import cv2 4 | import matplotlib.pyplot as plt 5 | 6 | # read the image 7 | image = cv2.imread('./Media/thumbs_up_down.jpg') 8 | # convert to RGB 9 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 10 | # convert to grayscale 11 | gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) 12 | # create a binary thresholded image 13 | _, binary = cv2.threshold(gray, 225, 255, cv2.THRESH_BINARY_INV) 14 | 15 | # find the contours from the thresholded image 16 | contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 17 | # draw all contours 18 | image = cv2.drawContours(image, contours, -1, (0, 255, 0), 2) 19 | # show the image with the drawn contours 20 | plt.imshow(image) 21 | #plt.imsave('./Media/thumbs_up_down_countour.jpg', image) 22 | plt.show() 23 | 24 | -------------------------------------------------------------------------------- /13. contouring/liveContourDetection.py: -------------------------------------------------------------------------------- 1 | """ Live cam contouring using thresholding and findContours() """ 2 | 3 | import cv2 4 | 5 | cap = cv2.VideoCapture(0) 6 | 7 | while True: 8 | cam, frame = cap.read() 9 | 10 | if cam is True: 11 | # convert frame into grayscale 12 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 13 | 14 | # create a binary threshold 15 | _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV) 16 | 17 | contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, 18 | cv2.CHAIN_APPROX_SIMPLE) 19 | 20 | image = cv2.drawContours(image=frame, contours= contours, 21 | contourIdx=-1, color=(128, 0, 0), thickness=2) 22 | 23 | cv2.imshow("live-contour-detector", image) 24 | 25 | if cv2.waitKey(1) == ord('q'): 26 | break 27 | 28 | cap.release() 29 | cv2.destroyAllWindows() 30 | -------------------------------------------------------------------------------- /13. contouring/shapeDetection.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | img = cv2.imread('./Media/Shapes.png') 4 | gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) 5 | blur = cv2.GaussianBlur(gray,(11,11),0) #The values need to be >1 and odd 6 | 7 | # creating binary thresholding 8 | ret, th = cv2.threshold(blur,220,255,cv2.THRESH_BINARY_INV) # Inverse Binary thresholding technique 9 | 10 | # finding and drawing contour 11 | (cnts,_) = cv2.findContours(th.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) 12 | cv2.drawContours(img,cnts,-1,(0,0,0),2) 13 | 14 | cv2.imshow('image',img) 15 | cv2.imwrite("./Media/Shape_Detected.png",img) # Save the contoured image 16 | cv2.waitKey(0) 17 | cv2.destroyAllWindows() 18 | -------------------------------------------------------------------------------- /14. ImageOperations/colvolutionaFeature.py: -------------------------------------------------------------------------------- 1 | """ Convolutional Operations on Image using numpy """ 2 | 3 | import numpy as np 4 | from PIL import Image 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | class ConvolutionalOperations: 9 | def __init__(self, image_path): 10 | self.image_path = image_path 11 | 12 | def _color_to_gray(self): 13 | gray_img = np.array(Image.open(self.image_path).convert('L')) 14 | return gray_img 15 | 16 | 17 | 18 | if __name__ == '__main__': 19 | image_path = './Media/apple.jpeg' 20 | obj = ConvolutionalOperations(image_path) 21 | output = obj._color_to_gray() 22 | plt.imshow(output) 23 | plt.show() -------------------------------------------------------------------------------- /14. ImageOperations/filterVGG16.py: -------------------------------------------------------------------------------- 1 | """ Image filters using Keras and tensorflow """ 2 | import numpy as np 3 | from tensorflow.keras.applications.vgg16 import VGG16 4 | from tensorflow.keras.applications.vgg16 import preprocess_input 5 | from tensorflow.keras.preprocessing.image import load_img 6 | from tensorflow.keras.preprocessing.image import img_to_array 7 | from tensorflow.keras.models import Model 8 | from matplotlib import pyplot as plt 9 | 10 | model = VGG16() 11 | model = Model(input=model.input, outputs=model.layers[1].output) 12 | model.summary() 13 | 14 | img = load_img("./Media/face-001.jpg") 15 | img = img_to_array(img) 16 | img = np.expand_dims(img, asix=0) 17 | img = preprocess_input(img) 18 | feature_map = model.predict(img) 19 | 20 | square = 8 21 | i = 1 22 | for _ in range(square): 23 | for _ in range(square): 24 | ax = plt.subplot(square, square, i) 25 | ax.set_xticks([]) 26 | ax.set_yticks([]) 27 | plt.imshow(feature_map[0, :, :, i-1], cmap='gray') 28 | i += 1 29 | plt.show() -------------------------------------------------------------------------------- /14. ImageOperations/imageEnocdingDecoding.py: -------------------------------------------------------------------------------- 1 | """ Image Encoding and decoding using base64 """ 2 | 3 | import base64 4 | 5 | image = open('./Media/apple.jpeg', 'rb') 6 | image_read = image.read() 7 | 8 | image_64_encode = base64.encodebytes(image_read) 9 | print(image_64_encode) 10 | 11 | image_64_decode = base64.decodebytes(image_64_encode) 12 | image_result = open('./Media/apple.png', 'wb') 13 | image_result.write(image_64_decode) -------------------------------------------------------------------------------- /14. ImageOperations/simpleImageOperations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | class ImageOperation: 7 | """ Simple image operations using numpy arrays """ 8 | def __init__(self, img_array: np.ndarray): 9 | self.img_array = img_array 10 | 11 | def color_to_gray(self): 12 | img = self.img_array 13 | gray = np.dot(img[...,:3], [0.299, 0.587, 0.144]) 14 | return gray 15 | 16 | def color_inversion(self): 17 | inv_img = 255 - self.img_array 18 | return inv_img 19 | 20 | def color_reduction(self): 21 | 22 | im_32 = self.img_array // 32 * 32 23 | im_128 = self.img_array // 128 * 128 24 | 25 | im_red = np.concatenate((self.img_array, im_32, im_128), axis=1) 26 | return im_red 27 | 28 | def gamma_correction(self): 29 | 30 | img = self.img_array 31 | img1 = 255.0 * (self.img_array / 255.0)**(1/2.2) 32 | img2 = 255.0 * (self.img_array / 255.0)**2.2 33 | 34 | return np.concatenate((img, img1, img2), axis=1) 35 | 36 | def slice_n_paste(self): 37 | 38 | src = np.resize(self.img_array, (128, 128)) 39 | dst = np.resize(self.img_array, (256, 256)) // 4 40 | 41 | dst_copy = dst.copy() 42 | 43 | dst_copy[110:200, 110:200] = src[10:100, 10:100] 44 | return dst_copy 45 | 46 | def image_binarization(self): 47 | 48 | img = self.img_array 49 | gray = np.dot(img[...,:3], [0.299, 0.587, 0.144]) 50 | thresh = 128 51 | max_val = 255.0 52 | 53 | im_bin = (gray > thresh) * max_val 54 | 55 | return im_bin 56 | 57 | 58 | if __name__ == '__main__': 59 | image_path = './Media/apple.jpeg' 60 | img_array = np.array(Image.open(image_path)) 61 | obj = ImageOperation(img_array) 62 | output = obj.gamma_correction() 63 | plt.imshow(output) 64 | plt.show() -------------------------------------------------------------------------------- /15. VirtualPen/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/15. VirtualPen/README.md -------------------------------------------------------------------------------- /16. EyeBall Tracking/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/16. EyeBall Tracking/README.md -------------------------------------------------------------------------------- /17. Color Trackbar/colorTrackbar.py: -------------------------------------------------------------------------------- 1 | """ Color Tracker GUI """ 2 | 3 | import numpy as np 4 | import cv2 5 | 6 | def nothing(x): 7 | pass 8 | 9 | # Create a black image, a window 10 | img = np.zeros((300, 512, 3), np.uint8) 11 | cv2.namedWindow('Color Tracker') 12 | 13 | # create trackbars for color change 14 | cv2.createTrackbar('R','image',0,255,nothing) 15 | cv2.createTrackbar('G','image',0,255,nothing) 16 | cv2.createTrackbar('B','image',0,255,nothing) 17 | 18 | # create switch for ON/OFF functionality 19 | switch = '0 : OFF \n1 : ON' 20 | cv2.createTrackbar(switch, 'image',0,1,nothing) 21 | 22 | while(1): 23 | cv2.imshow('image',img) 24 | k = cv2.waitKey(1) & 0xFF 25 | if k == 27: 26 | break 27 | 28 | # get current positions of four trackbars 29 | r = cv2.getTrackbarPos('R','image') 30 | g = cv2.getTrackbarPos('G','image') 31 | b = cv2.getTrackbarPos('B','image') 32 | s = cv2.getTrackbarPos(switch,'image') 33 | 34 | if s == 0: 35 | img[:] = 0 36 | else: 37 | img[:] = [b,g,r] 38 | 39 | cv2.destroyAllWindows() 40 | -------------------------------------------------------------------------------- /18. SIFT Feature Extraction/README.md: -------------------------------------------------------------------------------- 1 | **SIFT** stands for `Scale Invariant Feature Transform`, it is a feature extraction method (among others, such as `HOG feature extraction`) where image content is transformed into local feature coordinates that are invariant to translation, scale and other image transformations. 2 | 3 | Below are the advantages of SIFT: 4 | 5 | - Locality: Features are local; robust to occlusion and clutter. 6 | - Distinctiveness: Individual features extracted can be matched to a large dataset of objects. 7 | - Quantity: Using SIFT, we can extract many features from small objects. 8 | - Efficiency: SIFT is close to real-time performance. 9 | 10 | [SIFT original paper](https://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf) -------------------------------------------------------------------------------- /18. SIFT Feature Extraction/basic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | img = cv2.imread('./Media/apple.jpeg') 5 | gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 6 | 7 | sift = cv2.SIFT_create() 8 | 9 | keypoints, descrptors = sift.detectAndCompute(img, None) 10 | sift_image = cv2.drawKeypoints(gray_img, keypoints, img) 11 | 12 | cv2.imshow('image', sift_image) 13 | 14 | k = cv2.waitKey(0) & 0xff 15 | 16 | if k == 27: 17 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /18. SIFT Feature Extraction/compareFeatures.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | # loading and resizing the image 5 | img1 = cv2.imread('./Media/book.png') 6 | img2 = cv2.imread('./Media/book_on_table.jpeg') 7 | 8 | img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) 9 | img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) 10 | 11 | sift = cv2.SIFT_create() 12 | 13 | keypoints1, descrptors1 = sift.detectAndCompute(img1, None) 14 | keypoints2, descrptors2 = sift.detectAndCompute(img2, None) 15 | 16 | bf = cv2.BFMatcher(cv2.NORM_L1, crossCheck=True) 17 | 18 | matches = bf.match(descrptors1, descrptors2) 19 | matches = sorted(matches, key= lambda x: x.distance) 20 | 21 | matched_img = cv2.drawMatches(img1, keypoints1, img2, keypoints2, matches[:20], img2, flags=2) 22 | 23 | # showing the output 24 | cv2.imshow('image', cv2.resize(matched_img, (800, 600))) 25 | 26 | k = cv2.waitKey(0) & 0xff 27 | 28 | if k == 27: 29 | cv2.destroyAllWindows() 30 | -------------------------------------------------------------------------------- /19. Hog Feature Extraction/README.md: -------------------------------------------------------------------------------- 1 | # HOG(Histogram Oriented Gradients) Feature Extraction 2 | -------------------------------------------------------------------------------- /19. Hog Feature Extraction/featureExtractor.py: -------------------------------------------------------------------------------- 1 | ''' Feature extraction using hog and skimage ''' 2 | 3 | from skimage.io import imread 4 | from skimage.transform import resize 5 | from skimage.feature import hog 6 | from matplotlib import pyplot as plt 7 | 8 | img = imread("./Media/coins.jpg") 9 | 10 | # applying the hog algorithm 11 | fd, hog_image = hog(img, 12 | orientations=9, 13 | pixels_per_cell=(8, 8), 14 | cells_per_block=(2,2), 15 | visualize=True, 16 | multichannel=True) 17 | 18 | plt.axis("off") 19 | plt.imshow(hog_image, cmap='gray') 20 | plt.savefig("./Media/coins_hog.png") 21 | plt.show() 22 | -------------------------------------------------------------------------------- /20. Image Segmentation/KmeansImageSegmentation.py: -------------------------------------------------------------------------------- 1 | """ Image segmentation using K-Menas """ 2 | 3 | import cv2 4 | import numpy as np 5 | from matplotlib import pyplot as plt 6 | 7 | image = cv2.imread('./Media/nature.png') 8 | 9 | # reshape the image to a 2D array of pixels and 3 array values 10 | pixels_values = image.reshape((-1, 3)) 11 | 12 | # conerting to float32 13 | pixels_values = np.float32(pixels_values) 14 | 15 | criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2) 16 | 17 | # Number of clusters] 18 | K = 3 19 | 20 | _, labels, (centers) = cv2.kmeans(pixels_values, 21 | K=K, 22 | bestLabels=None, 23 | criteria=criteria, 24 | attempts=10, 25 | flags=cv2.KMEANS_RANDOM_CENTERS) 26 | 27 | # converting to 8 bit values 28 | centers = np.uint8(centers) 29 | 30 | # flatten the labels array 31 | labels = labels.flatten() 32 | 33 | # convert all pixels to the color of the centroids 34 | segmented_image = centers[labels] 35 | 36 | segmented_image = segmented_image.reshape(image.shape) 37 | 38 | # show the original and segmented output 39 | cv2.imshow('Original', image) 40 | cv2.imshow('Segmented', segmented_image) 41 | 42 | # saving the output 43 | cv2.imwrite('./Media/nature_output.png', segmented_image) 44 | k = cv2.waitKey(0) & 0xFF 45 | 46 | if k == 27: 47 | cv2.destroyAllWindows() 48 | -------------------------------------------------------------------------------- /20. Image Segmentation/README.md: -------------------------------------------------------------------------------- 1 | **Image segmentation** is the process of partitioning an image into multiple different regions (or segments). 2 | The goal is to change the representation of the image into an easier and more meaningful image. 3 | 4 | K-Means clustering is unsupervised machine learning algorithm that aims to partition N observations into K clusters in which each observation belongs to the cluster with the nearest mean. A cluster refers to a collection of data points aggregated together because of certain similarities. For image segmentation, clusters here are different image colors. -------------------------------------------------------------------------------- /20. Image Segmentation/waterShedAlgorithm.py: -------------------------------------------------------------------------------- 1 | ''' watershed Algorithm in opencv ''' 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | img = cv2.imread('./Media/coins.jpg') 7 | gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 8 | 9 | ret, thresh = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) 10 | 11 | # noise removal 12 | kernel = np.ones((3, 3), np.uint8) 13 | opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2) 14 | 15 | #sure background area 16 | sure_bg = cv2.dilate(opening, kernel) 17 | 18 | # finding sure foreground area 19 | dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5) 20 | ret, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0) 21 | 22 | # finding unknown region 23 | 24 | sure_fg = np.uint8(sure_fg) 25 | unknown = cv2.subtract(sure_bg, sure_fg) 26 | 27 | # makrer labeling 28 | ret, markers = cv2.connectedComponents(sure_fg) 29 | 30 | # add one to all labels so that sure background is not 0, but 1 31 | markers = markers + 1 32 | 33 | # mark the region of unknown with zero 34 | markers[unknown==255] = 0 35 | 36 | markers = cv2.watershed(img, markers) 37 | img[markers == -1] = [255, 0, 0] 38 | 39 | cv2.imshow('output', img) 40 | 41 | key = cv2.waitKey(0) & 0xFF 42 | if key == 27: 43 | cv2.destroyAllWindows() 44 | -------------------------------------------------------------------------------- /21. Facial Recognition/FaceRec.py: -------------------------------------------------------------------------------- 1 | import os 2 | import face_recognition 3 | import cv2 4 | import numpy as np 5 | 6 | video_capture = cv2.VideoCapture(0) 7 | 8 | # Create arrays of known face encodings and their names 9 | known_face_encodings = [] 10 | known_face_names = [] 11 | 12 | root_dir = os.path.dirname(os.path.abspath(os.path.abspath(__file__))) 13 | image_dir = os.path.join(root_dir, "images") 14 | 15 | # creating encodings for faces from images folder 16 | for file in os.listdir(image_dir): 17 | if file.endswith == "jpeg" or "jpg": 18 | input_face_name = file.split('.')[0] 19 | input_face = face_recognition.load_image_file(os.path.join(image_dir, file)) 20 | input_face_encoding = face_recognition.face_encodings(input_face)[0] 21 | 22 | # appending face_names and face encoding 23 | known_face_names.append(input_face_name) 24 | known_face_encodings.append(input_face_encoding) 25 | 26 | # Initialize some variables 27 | face_locations = [] 28 | face_encodings = [] 29 | face_names = [] 30 | process_this_frame = True 31 | 32 | while True: 33 | # Grab a single frame of video 34 | ret, frame = video_capture.read() 35 | 36 | # Resize frame of video to 1/4 size for faster face recognition processing 37 | small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25) 38 | 39 | # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses) 40 | rgb_small_frame = small_frame[:, :, ::-1] 41 | 42 | # Only process every other frame of video to save time 43 | if process_this_frame: 44 | # Find all the faces and face encodings in the current frame of video 45 | face_locations = face_recognition.face_locations(rgb_small_frame) 46 | face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations) 47 | 48 | face_names = [] 49 | for face_encoding in face_encodings: 50 | # See if the face is a match for the known face(s) 51 | matches = face_recognition.compare_faces(known_face_encodings, face_encoding) 52 | name = "Unknown" 53 | 54 | # # If a match was found in known_face_encodings, just use the first one. 55 | # if True in matches: 56 | # first_match_index = matches.index(True) 57 | # name = known_face_names[first_match_index] 58 | 59 | # Or instead, use the known face with the smallest distance to the new face 60 | face_distances = face_recognition.face_distance(known_face_encodings, face_encoding) 61 | best_match_index = np.argmin(face_distances) 62 | if matches[best_match_index]: 63 | name = known_face_names[best_match_index] 64 | 65 | face_names.append(name) 66 | process_this_frame = not process_this_frame 67 | 68 | 69 | # Display the results 70 | for (top, right, bottom, left), name in zip(face_locations, face_names): 71 | # Scale back up face locations since the frame we detected in was scaled to 1/4 size 72 | top *= 4 73 | right *= 4 74 | bottom *= 4 75 | left *= 4 76 | 77 | # Draw a box around the face 78 | cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2) 79 | 80 | # Draw a label with a name below the face 81 | cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED) 82 | font = cv2.FONT_HERSHEY_DUPLEX 83 | cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1) 84 | 85 | # Display the resulting image 86 | cv2.imshow('Video', frame) 87 | 88 | # Hit 'q' on the keyboard to quit! 89 | if cv2.waitKey(1) & 0xFF == ord('q'): 90 | break 91 | 92 | # Release handle to the webcam 93 | video_capture.release() 94 | cv2.destroyAllWindows() 95 | -------------------------------------------------------------------------------- /21. Facial Recognition/README.md: -------------------------------------------------------------------------------- 1 | ## Usgaes 2 | 3 | 1. Add Your Image in `Images` folder. exmaple >> name.jpeg 4 | 5 | 2. Install the requirements file 6 | 7 | ```bash 8 | python -m pip install -r requirements.txt 9 | ``` 10 | 11 | 3. Run the Script 12 | 13 | ```bash 14 | python FaceRec.py 15 | ``` 16 | 17 | NOTE:- incase installtation stuck on dlib 18 | 19 | ```bash 20 | python -m pip install dlib -vvv 21 | ``` 22 | -------------------------------------------------------------------------------- /21. Facial Recognition/images/obama.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/21. Facial Recognition/images/obama.jpg -------------------------------------------------------------------------------- /22. Optical Character Recognition/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/22. Optical Character Recognition/README.md -------------------------------------------------------------------------------- /23. PixelLib Segmentation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/23. PixelLib Segmentation/README.md -------------------------------------------------------------------------------- /23. PixelLib Segmentation/instanceSegmentationExample.py: -------------------------------------------------------------------------------- 1 | """ Instance Segmentation using PixelLib and Mask_Rcnn Pretained model on coco dataset """ 2 | 3 | import pixellib 4 | from PIL import Image 5 | from pixellib.instance import instance_segmentation 6 | 7 | model_path = "./assets/mask_rcnn_coco.h5" 8 | image_path = "./Media/road.jpg" 9 | image_output = './media/road_segmentation.jpg' 10 | 11 | # creating instace 12 | segment_image = instance_segmentation() 13 | segment_image.load_model(model_path) 14 | 15 | # applying semantic segmentation 16 | segment_image.segmentImage(image_path, show_bboxes = True, output_image_name=image_output) 17 | 18 | # showing the output 19 | img = Image.open(image_output) 20 | img.show() 21 | -------------------------------------------------------------------------------- /24. Road Lane Detection/README.md: -------------------------------------------------------------------------------- 1 | # Road Lane Detection 2 | -------------------------------------------------------------------------------- /24. Road Lane Detection/laneDetection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/24. Road Lane Detection/laneDetection.py -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Algorithms and Data Structures 2 | 3 | - [Contributing to Algorithms and Data Structures](#contributing-to-algorithms-and-data-structures) 4 | - [Steps to contribute](#steps-to-contribute) 5 | - [Making a PR](#making-a-pr) 6 | - [Additional Notes](#additional-notes) 7 | - [Issue suggestions/Bug reporting](#issue-suggestionsbug-reporting) 8 | - [License](#license) 9 | - [References](#references) 10 | 11 | We love your input! We want to make contributing to this project as easy and transparent as possible, whether it's: 12 | 13 | - Reporting a bug 14 | - Discussing the current state of the code 15 | - Submitting a fix 16 | - Proposing new features 17 | - Becoming a maintainer 18 | 19 | ## Steps to contribute 20 | 21 | - Comment on the issue you want to work on. Make sure it's not assigned to someone else. 22 | 23 | - If you think an algorithm is missing, create an issue. 24 | 25 | ### Making a PR 26 | 27 | - Make sure you have been assigned the issue to which you are making a PR. 28 | - If you make PR before being assigned, It will be labeled `invalid` and closed without merging. 29 | 30 | - Fork the repo and clone it on your machine. 31 | - Add a upstream link to main branch in your cloned repo 32 | 33 | ```bash 34 | git remote add upstream https://github.com/codePerfectPlus/ComputerVision-Essentials 35 | ``` 36 | 37 | - Keep your cloned repo upto date by pulling from upstream (this will also avoid any merge conflicts while committing new changes) 38 | 39 | ```bash 40 | git pull upstream master 41 | ``` 42 | 43 | - Create your feature branch 44 | 45 | ```bash 46 | git checkout -b 47 | ``` 48 | 49 | - Commit all the changes 50 | 51 | ```bash 52 | git commit -am "Meaningful commit message" 53 | ``` 54 | 55 | - Push the changes for review 56 | 57 | ```bash 58 | git push origin 59 | ``` 60 | 61 | - Create a PR from our repo on Github. 62 | 63 | ### Additional Notes 64 | 65 | - Code should be properly commented to ensure it's readability. 66 | - If you've added code that should be tested, add tests as comments. 67 | - Make sure your code properly formatted. 68 | - Issue that pull request! 69 | 70 | ## Issue suggestions/Bug reporting 71 | 72 | When you are creating an issue, make sure it's not already present. Furthermore, provide a proper description of the changes. If you are suggesting any code improvements, provide through details about the improvements. 73 | 74 | **Great Issue suggestions** tend to have: 75 | 76 | - A quick summary of the changes. 77 | - In case of any bug provide steps to reproduce 78 | - Be specific! 79 | - Give sample code if you can. 80 | - What you expected would happen 81 | - What actually happens 82 | - Notes (possibly including why you think this might be happening, or stuff you tried that didn't work) 83 | 84 | ## License 85 | 86 | By contributing, you agree that your contributions will be licensed under its [MIT License](/LICENSE). 87 | 88 | ## References 89 | 90 | This document was adapted from the open-source contribution guidelines for [Facebook's Draft](https://github.com/facebook/draft-js/blob/a9316a723f9e918afde44dea68b5f9f39b7d9b00/CONTRIBUTING.md) 91 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Deepak Raj 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Media/Man_United.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/Man_United.jpeg -------------------------------------------------------------------------------- /Media/Shape_Detected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/Shape_Detected.png -------------------------------------------------------------------------------- /Media/Shapes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/Shapes.png -------------------------------------------------------------------------------- /Media/apple.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/apple.jpeg -------------------------------------------------------------------------------- /Media/bnw.jfif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/bnw.jfif -------------------------------------------------------------------------------- /Media/book.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/book.png -------------------------------------------------------------------------------- /Media/book_on_table.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/book_on_table.jpeg -------------------------------------------------------------------------------- /Media/coins.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/coins.jpg -------------------------------------------------------------------------------- /Media/coins_hog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/coins_hog.png -------------------------------------------------------------------------------- /Media/corner_detection.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/corner_detection.jpg -------------------------------------------------------------------------------- /Media/cropped image1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/cropped image1.png -------------------------------------------------------------------------------- /Media/edge-detection.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/edge-detection.jpg -------------------------------------------------------------------------------- /Media/face-001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/face-001.jpg -------------------------------------------------------------------------------- /Media/face-detected-dnn.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/face-detected-dnn.jpeg -------------------------------------------------------------------------------- /Media/face-detected.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/face-detected.jpeg -------------------------------------------------------------------------------- /Media/nature.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/nature.png -------------------------------------------------------------------------------- /Media/nature_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/nature_output.png -------------------------------------------------------------------------------- /Media/opencv-logo-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/opencv-logo-white.png -------------------------------------------------------------------------------- /Media/pieboard-templatematching.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/pieboard-templatematching.jpg -------------------------------------------------------------------------------- /Media/port-detected.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/port-detected.jpeg -------------------------------------------------------------------------------- /Media/port-templatematching.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/port-templatematching.jpg -------------------------------------------------------------------------------- /Media/road.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/road.jpg -------------------------------------------------------------------------------- /Media/road_segmentation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/road_segmentation.jpg -------------------------------------------------------------------------------- /Media/sample.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/sample.jpeg -------------------------------------------------------------------------------- /Media/sample2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/sample2.jpeg -------------------------------------------------------------------------------- /Media/thumbs_up_down.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/thumbs_up_down.jpg -------------------------------------------------------------------------------- /Media/thumbs_up_down_countour.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeperfectplus/ComputerVision-Essentials/604eba7ba36381da6dee58146764adae9198bdfd/Media/thumbs_up_down_countour.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Computer Vision Essentials 2 | 3 | - [Computer Vision Essentials](#computer-vision-essentials) 4 | - [Introduction](#introduction) 5 | - [Used Libraries/Packages](#used-librariespackages) 6 | - [How To Run](#how-to-run) 7 | - [Usage](#usage) 8 | - [Support](#support) 9 | - [Roadmap](#roadmap) 10 | - [Contributing](#contributing) 11 | - [Authors and acknowledgment](#authors-and-acknowledgment) 12 | - [License](#license) 13 | - [Citation](#citation) 14 | - [Author](#author) 15 | - [Extra Downloads](#extra-downloads) 16 | 17 | ## Introduction 18 | 19 | According to [wikipedia](https://en.wikipedia.org/wiki/Computer_vision, "computer_vision-Wikipedia") - 20 | 21 | Computer vision is an interdisciplinary scientific field that deals with how computers can gain high-level understanding from digital images or videos. From the perspective of engineering, it seeks to understand and automate tasks that the human visual system can do. 22 | 23 | Computer vision tasks include methods for acquiring, processing, analyzing and understanding digital images, and extraction of high-dimensional data from the real world in order to produce numerical or symbolic information, e.g. in the forms of decisions. 24 | 25 | [Read More ...](https://en.wikipedia.org/wiki/Computer_vision, "computer_vision-Wikipedia") 26 | 27 | ## Used Libraries/Packages 28 | 29 | - **OpenCV** - OpenCV (Open Source Computer Vision Library) is an open source computer vision and machine learning software library. 30 | - **PixelLib** - PixelLib is a library created for performing image and video segmentation using few lines of code. 31 | - **CVLib** - A simple, high level, easy-to-use open source Computer Vision library for Python. 32 | - **Dlib** - Dlib is a general purpose cross-platform software library written in the programming language C++. 33 | - **PIL/Pillow** - Python Imaging Library is a free and open-source additional library for the Python programming language that adds support for opening, manipulating, and saving many different image file formats 34 | - **Keras** - Keras is the most used deep learning framework among top-5 winning teams on Kaggle. 35 | - **Tensorflow** - TensorFlow is a free and open-source software library for machine learning. 36 | - **Pytessarct** - Python-tesseract is an optical character recognition (OCR) tool for python. That is, it will recognize and “read” the text embedded in images. 37 | - **scikit-image** - scikit-image is an open-source image processing library for the Python programming language. It includes algorithms for segmentation, geometric transformations, color space manipulation, analysis, filtering, morphology, feature detection, and more. 38 | - **Matplotlib** - Matplotlib is a cross-platform, data visualization and graphical plotting library for Python and its numerical extension NumPy. 39 | 40 | ## How To Run 41 | 42 | - Install python 3.6+ 43 | 44 | Create virtual envionment with `pipenv`. 45 | 46 | ```bash 47 | python -m pip install pipenv 48 | pipenv install -r requirements.txt 49 | pipenv shell 50 | ``` 51 | 52 | NOTE- check the [guide](https://www.tensorflow.org/install) for tenosflow installation for your CPU/GPU. for using tensorflow-gpu install the CUDA-11.0 and necessary libraries. 53 | 54 | Large models and files hosted on google drive. **For downloading them run [utils.py](utils.py)** 55 | 56 | ```bash 57 | python utils.py 58 | ``` 59 | 60 | ## Usage 61 | 62 | Computer vision allows the computer to perform the same kind of tasks as humans with the same efficiency. There are a two main task which are defined below: 63 | 64 | - Object Classification - In the object classification, we train a model on a dataset of particular objects, and the model classifies new objects as belonging to one or more of your training categories. 65 | - Object Identification - In the object identification, our model will identify a particular instance of an object - for example, parsing two faces in an image and tagging one as Virat Kohli and other one as Rohit Sharma. 66 | 67 | 68 | 69 | ## Support 70 | 71 | contributers 72 | 73 | ## Roadmap 74 | 75 | 76 | ## Contributing 77 | 78 | Before submitting a bug, please do the following: 79 | 80 | Perform basic troubleshooting steps: 81 | 82 | - Make sure you are on the latest version. If you are not on the most recent version, your problem may have been solved already! Upgrading is always the best first step. 83 | - Try older versions. If you are already on the latest release, try rolling back a few minor versions (e.g. if on 1.7, try 1.5 or 1.6) and see if the problem goes away. This will help the devs narrow down when the problem first arose in the commit log. 84 | - Try switching up dependency versions. If the software in question has dependencies (other libraries, etc) try upgrading/downgrading those as well. 85 | 86 | ## Authors and acknowledgment 87 | 88 | - [Deepak Raj](https://github.com/codePerfectPlus) 89 | - [Pranjalmishra30](https://github.com/Pranjalmishra30) 90 | - [GloriousMusketeer](https://github.com/GloriousMusketeer) 91 | - [bislara](https://github.com/bislara) 92 | - [its-harshil](https://github.com/its-harshil) 93 | - [farhan0syakir](https://github.com/farhan0syakir) 94 | - [harshit-saraswat](https://github.com/harshit-saraswat) 95 | - [...](https://github.com/codePerfectPlus/OpenCv-tutorial/graphs/contributors) 96 | 97 | ## License 98 | 99 | For open source projects,Under MIT License. 100 | 101 | ## Citation 102 | 103 | ``` 104 | Stéfan van der Walt, Johannes L. Schönberger, Juan Nunez-Iglesias, François Boulogne, Joshua D. Warner, Neil Yager, Emmanuelle Gouillart, Tony Yu and the scikit-image contributors. scikit-image: Image processing in Python. PeerJ 2:e453 (2014) https://doi.org/10.7717/peerj.453 105 | 106 | Coelho, L.P. 2013. Mahotas: Open source software for scriptable computer vision. Journal of Open Research Software 1(1):e3, DOI: http://dx.doi.org/10.5334/jors.ac 107 | ``` 108 | 109 | ## Author 110 | 111 | - Project : Computer Vision Essentials 112 | - Language : Python 113 | - Github : 114 | - Website : 115 | 116 | ## Extra Downloads 117 | 118 | 1. FaceDetection Caffee Models -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-architect -------------------------------------------------------------------------------- /assets/deploy.prototxt.txt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape { 3 | dim: 1 4 | dim: 3 5 | dim: 300 6 | dim: 300 7 | } 8 | 9 | layer { 10 | name: "data_bn" 11 | type: "BatchNorm" 12 | bottom: "data" 13 | top: "data_bn" 14 | param { 15 | lr_mult: 0.0 16 | } 17 | param { 18 | lr_mult: 0.0 19 | } 20 | param { 21 | lr_mult: 0.0 22 | } 23 | } 24 | layer { 25 | name: "data_scale" 26 | type: "Scale" 27 | bottom: "data_bn" 28 | top: "data_bn" 29 | param { 30 | lr_mult: 1.0 31 | decay_mult: 1.0 32 | } 33 | param { 34 | lr_mult: 2.0 35 | decay_mult: 1.0 36 | } 37 | scale_param { 38 | bias_term: true 39 | } 40 | } 41 | layer { 42 | name: "conv1_h" 43 | type: "Convolution" 44 | bottom: "data_bn" 45 | top: "conv1_h" 46 | param { 47 | lr_mult: 1.0 48 | decay_mult: 1.0 49 | } 50 | param { 51 | lr_mult: 2.0 52 | decay_mult: 1.0 53 | } 54 | convolution_param { 55 | num_output: 32 56 | pad: 3 57 | kernel_size: 7 58 | stride: 2 59 | weight_filler { 60 | type: "msra" 61 | variance_norm: FAN_OUT 62 | } 63 | bias_filler { 64 | type: "constant" 65 | value: 0.0 66 | } 67 | } 68 | } 69 | layer { 70 | name: "conv1_bn_h" 71 | type: "BatchNorm" 72 | bottom: "conv1_h" 73 | top: "conv1_h" 74 | param { 75 | lr_mult: 0.0 76 | } 77 | param { 78 | lr_mult: 0.0 79 | } 80 | param { 81 | lr_mult: 0.0 82 | } 83 | } 84 | layer { 85 | name: "conv1_scale_h" 86 | type: "Scale" 87 | bottom: "conv1_h" 88 | top: "conv1_h" 89 | param { 90 | lr_mult: 1.0 91 | decay_mult: 1.0 92 | } 93 | param { 94 | lr_mult: 2.0 95 | decay_mult: 1.0 96 | } 97 | scale_param { 98 | bias_term: true 99 | } 100 | } 101 | layer { 102 | name: "conv1_relu" 103 | type: "ReLU" 104 | bottom: "conv1_h" 105 | top: "conv1_h" 106 | } 107 | layer { 108 | name: "conv1_pool" 109 | type: "Pooling" 110 | bottom: "conv1_h" 111 | top: "conv1_pool" 112 | pooling_param { 113 | kernel_size: 3 114 | stride: 2 115 | } 116 | } 117 | layer { 118 | name: "layer_64_1_conv1_h" 119 | type: "Convolution" 120 | bottom: "conv1_pool" 121 | top: "layer_64_1_conv1_h" 122 | param { 123 | lr_mult: 1.0 124 | decay_mult: 1.0 125 | } 126 | convolution_param { 127 | num_output: 32 128 | bias_term: false 129 | pad: 1 130 | kernel_size: 3 131 | stride: 1 132 | weight_filler { 133 | type: "msra" 134 | } 135 | bias_filler { 136 | type: "constant" 137 | value: 0.0 138 | } 139 | } 140 | } 141 | layer { 142 | name: "layer_64_1_bn2_h" 143 | type: "BatchNorm" 144 | bottom: "layer_64_1_conv1_h" 145 | top: "layer_64_1_conv1_h" 146 | param { 147 | lr_mult: 0.0 148 | } 149 | param { 150 | lr_mult: 0.0 151 | } 152 | param { 153 | lr_mult: 0.0 154 | } 155 | } 156 | layer { 157 | name: "layer_64_1_scale2_h" 158 | type: "Scale" 159 | bottom: "layer_64_1_conv1_h" 160 | top: "layer_64_1_conv1_h" 161 | param { 162 | lr_mult: 1.0 163 | decay_mult: 1.0 164 | } 165 | param { 166 | lr_mult: 2.0 167 | decay_mult: 1.0 168 | } 169 | scale_param { 170 | bias_term: true 171 | } 172 | } 173 | layer { 174 | name: "layer_64_1_relu2" 175 | type: "ReLU" 176 | bottom: "layer_64_1_conv1_h" 177 | top: "layer_64_1_conv1_h" 178 | } 179 | layer { 180 | name: "layer_64_1_conv2_h" 181 | type: "Convolution" 182 | bottom: "layer_64_1_conv1_h" 183 | top: "layer_64_1_conv2_h" 184 | param { 185 | lr_mult: 1.0 186 | decay_mult: 1.0 187 | } 188 | convolution_param { 189 | num_output: 32 190 | bias_term: false 191 | pad: 1 192 | kernel_size: 3 193 | stride: 1 194 | weight_filler { 195 | type: "msra" 196 | } 197 | bias_filler { 198 | type: "constant" 199 | value: 0.0 200 | } 201 | } 202 | } 203 | layer { 204 | name: "layer_64_1_sum" 205 | type: "Eltwise" 206 | bottom: "layer_64_1_conv2_h" 207 | bottom: "conv1_pool" 208 | top: "layer_64_1_sum" 209 | } 210 | layer { 211 | name: "layer_128_1_bn1_h" 212 | type: "BatchNorm" 213 | bottom: "layer_64_1_sum" 214 | top: "layer_128_1_bn1_h" 215 | param { 216 | lr_mult: 0.0 217 | } 218 | param { 219 | lr_mult: 0.0 220 | } 221 | param { 222 | lr_mult: 0.0 223 | } 224 | } 225 | layer { 226 | name: "layer_128_1_scale1_h" 227 | type: "Scale" 228 | bottom: "layer_128_1_bn1_h" 229 | top: "layer_128_1_bn1_h" 230 | param { 231 | lr_mult: 1.0 232 | decay_mult: 1.0 233 | } 234 | param { 235 | lr_mult: 2.0 236 | decay_mult: 1.0 237 | } 238 | scale_param { 239 | bias_term: true 240 | } 241 | } 242 | layer { 243 | name: "layer_128_1_relu1" 244 | type: "ReLU" 245 | bottom: "layer_128_1_bn1_h" 246 | top: "layer_128_1_bn1_h" 247 | } 248 | layer { 249 | name: "layer_128_1_conv1_h" 250 | type: "Convolution" 251 | bottom: "layer_128_1_bn1_h" 252 | top: "layer_128_1_conv1_h" 253 | param { 254 | lr_mult: 1.0 255 | decay_mult: 1.0 256 | } 257 | convolution_param { 258 | num_output: 128 259 | bias_term: false 260 | pad: 1 261 | kernel_size: 3 262 | stride: 2 263 | weight_filler { 264 | type: "msra" 265 | } 266 | bias_filler { 267 | type: "constant" 268 | value: 0.0 269 | } 270 | } 271 | } 272 | layer { 273 | name: "layer_128_1_bn2" 274 | type: "BatchNorm" 275 | bottom: "layer_128_1_conv1_h" 276 | top: "layer_128_1_conv1_h" 277 | param { 278 | lr_mult: 0.0 279 | } 280 | param { 281 | lr_mult: 0.0 282 | } 283 | param { 284 | lr_mult: 0.0 285 | } 286 | } 287 | layer { 288 | name: "layer_128_1_scale2" 289 | type: "Scale" 290 | bottom: "layer_128_1_conv1_h" 291 | top: "layer_128_1_conv1_h" 292 | param { 293 | lr_mult: 1.0 294 | decay_mult: 1.0 295 | } 296 | param { 297 | lr_mult: 2.0 298 | decay_mult: 1.0 299 | } 300 | scale_param { 301 | bias_term: true 302 | } 303 | } 304 | layer { 305 | name: "layer_128_1_relu2" 306 | type: "ReLU" 307 | bottom: "layer_128_1_conv1_h" 308 | top: "layer_128_1_conv1_h" 309 | } 310 | layer { 311 | name: "layer_128_1_conv2" 312 | type: "Convolution" 313 | bottom: "layer_128_1_conv1_h" 314 | top: "layer_128_1_conv2" 315 | param { 316 | lr_mult: 1.0 317 | decay_mult: 1.0 318 | } 319 | convolution_param { 320 | num_output: 128 321 | bias_term: false 322 | pad: 1 323 | kernel_size: 3 324 | stride: 1 325 | weight_filler { 326 | type: "msra" 327 | } 328 | bias_filler { 329 | type: "constant" 330 | value: 0.0 331 | } 332 | } 333 | } 334 | layer { 335 | name: "layer_128_1_conv_expand_h" 336 | type: "Convolution" 337 | bottom: "layer_128_1_bn1_h" 338 | top: "layer_128_1_conv_expand_h" 339 | param { 340 | lr_mult: 1.0 341 | decay_mult: 1.0 342 | } 343 | convolution_param { 344 | num_output: 128 345 | bias_term: false 346 | pad: 0 347 | kernel_size: 1 348 | stride: 2 349 | weight_filler { 350 | type: "msra" 351 | } 352 | bias_filler { 353 | type: "constant" 354 | value: 0.0 355 | } 356 | } 357 | } 358 | layer { 359 | name: "layer_128_1_sum" 360 | type: "Eltwise" 361 | bottom: "layer_128_1_conv2" 362 | bottom: "layer_128_1_conv_expand_h" 363 | top: "layer_128_1_sum" 364 | } 365 | layer { 366 | name: "layer_256_1_bn1" 367 | type: "BatchNorm" 368 | bottom: "layer_128_1_sum" 369 | top: "layer_256_1_bn1" 370 | param { 371 | lr_mult: 0.0 372 | } 373 | param { 374 | lr_mult: 0.0 375 | } 376 | param { 377 | lr_mult: 0.0 378 | } 379 | } 380 | layer { 381 | name: "layer_256_1_scale1" 382 | type: "Scale" 383 | bottom: "layer_256_1_bn1" 384 | top: "layer_256_1_bn1" 385 | param { 386 | lr_mult: 1.0 387 | decay_mult: 1.0 388 | } 389 | param { 390 | lr_mult: 2.0 391 | decay_mult: 1.0 392 | } 393 | scale_param { 394 | bias_term: true 395 | } 396 | } 397 | layer { 398 | name: "layer_256_1_relu1" 399 | type: "ReLU" 400 | bottom: "layer_256_1_bn1" 401 | top: "layer_256_1_bn1" 402 | } 403 | layer { 404 | name: "layer_256_1_conv1" 405 | type: "Convolution" 406 | bottom: "layer_256_1_bn1" 407 | top: "layer_256_1_conv1" 408 | param { 409 | lr_mult: 1.0 410 | decay_mult: 1.0 411 | } 412 | convolution_param { 413 | num_output: 256 414 | bias_term: false 415 | pad: 1 416 | kernel_size: 3 417 | stride: 2 418 | weight_filler { 419 | type: "msra" 420 | } 421 | bias_filler { 422 | type: "constant" 423 | value: 0.0 424 | } 425 | } 426 | } 427 | layer { 428 | name: "layer_256_1_bn2" 429 | type: "BatchNorm" 430 | bottom: "layer_256_1_conv1" 431 | top: "layer_256_1_conv1" 432 | param { 433 | lr_mult: 0.0 434 | } 435 | param { 436 | lr_mult: 0.0 437 | } 438 | param { 439 | lr_mult: 0.0 440 | } 441 | } 442 | layer { 443 | name: "layer_256_1_scale2" 444 | type: "Scale" 445 | bottom: "layer_256_1_conv1" 446 | top: "layer_256_1_conv1" 447 | param { 448 | lr_mult: 1.0 449 | decay_mult: 1.0 450 | } 451 | param { 452 | lr_mult: 2.0 453 | decay_mult: 1.0 454 | } 455 | scale_param { 456 | bias_term: true 457 | } 458 | } 459 | layer { 460 | name: "layer_256_1_relu2" 461 | type: "ReLU" 462 | bottom: "layer_256_1_conv1" 463 | top: "layer_256_1_conv1" 464 | } 465 | layer { 466 | name: "layer_256_1_conv2" 467 | type: "Convolution" 468 | bottom: "layer_256_1_conv1" 469 | top: "layer_256_1_conv2" 470 | param { 471 | lr_mult: 1.0 472 | decay_mult: 1.0 473 | } 474 | convolution_param { 475 | num_output: 256 476 | bias_term: false 477 | pad: 1 478 | kernel_size: 3 479 | stride: 1 480 | weight_filler { 481 | type: "msra" 482 | } 483 | bias_filler { 484 | type: "constant" 485 | value: 0.0 486 | } 487 | } 488 | } 489 | layer { 490 | name: "layer_256_1_conv_expand" 491 | type: "Convolution" 492 | bottom: "layer_256_1_bn1" 493 | top: "layer_256_1_conv_expand" 494 | param { 495 | lr_mult: 1.0 496 | decay_mult: 1.0 497 | } 498 | convolution_param { 499 | num_output: 256 500 | bias_term: false 501 | pad: 0 502 | kernel_size: 1 503 | stride: 2 504 | weight_filler { 505 | type: "msra" 506 | } 507 | bias_filler { 508 | type: "constant" 509 | value: 0.0 510 | } 511 | } 512 | } 513 | layer { 514 | name: "layer_256_1_sum" 515 | type: "Eltwise" 516 | bottom: "layer_256_1_conv2" 517 | bottom: "layer_256_1_conv_expand" 518 | top: "layer_256_1_sum" 519 | } 520 | layer { 521 | name: "layer_512_1_bn1" 522 | type: "BatchNorm" 523 | bottom: "layer_256_1_sum" 524 | top: "layer_512_1_bn1" 525 | param { 526 | lr_mult: 0.0 527 | } 528 | param { 529 | lr_mult: 0.0 530 | } 531 | param { 532 | lr_mult: 0.0 533 | } 534 | } 535 | layer { 536 | name: "layer_512_1_scale1" 537 | type: "Scale" 538 | bottom: "layer_512_1_bn1" 539 | top: "layer_512_1_bn1" 540 | param { 541 | lr_mult: 1.0 542 | decay_mult: 1.0 543 | } 544 | param { 545 | lr_mult: 2.0 546 | decay_mult: 1.0 547 | } 548 | scale_param { 549 | bias_term: true 550 | } 551 | } 552 | layer { 553 | name: "layer_512_1_relu1" 554 | type: "ReLU" 555 | bottom: "layer_512_1_bn1" 556 | top: "layer_512_1_bn1" 557 | } 558 | layer { 559 | name: "layer_512_1_conv1_h" 560 | type: "Convolution" 561 | bottom: "layer_512_1_bn1" 562 | top: "layer_512_1_conv1_h" 563 | param { 564 | lr_mult: 1.0 565 | decay_mult: 1.0 566 | } 567 | convolution_param { 568 | num_output: 128 569 | bias_term: false 570 | pad: 1 571 | kernel_size: 3 572 | stride: 1 # 2 573 | weight_filler { 574 | type: "msra" 575 | } 576 | bias_filler { 577 | type: "constant" 578 | value: 0.0 579 | } 580 | } 581 | } 582 | layer { 583 | name: "layer_512_1_bn2_h" 584 | type: "BatchNorm" 585 | bottom: "layer_512_1_conv1_h" 586 | top: "layer_512_1_conv1_h" 587 | param { 588 | lr_mult: 0.0 589 | } 590 | param { 591 | lr_mult: 0.0 592 | } 593 | param { 594 | lr_mult: 0.0 595 | } 596 | } 597 | layer { 598 | name: "layer_512_1_scale2_h" 599 | type: "Scale" 600 | bottom: "layer_512_1_conv1_h" 601 | top: "layer_512_1_conv1_h" 602 | param { 603 | lr_mult: 1.0 604 | decay_mult: 1.0 605 | } 606 | param { 607 | lr_mult: 2.0 608 | decay_mult: 1.0 609 | } 610 | scale_param { 611 | bias_term: true 612 | } 613 | } 614 | layer { 615 | name: "layer_512_1_relu2" 616 | type: "ReLU" 617 | bottom: "layer_512_1_conv1_h" 618 | top: "layer_512_1_conv1_h" 619 | } 620 | layer { 621 | name: "layer_512_1_conv2_h" 622 | type: "Convolution" 623 | bottom: "layer_512_1_conv1_h" 624 | top: "layer_512_1_conv2_h" 625 | param { 626 | lr_mult: 1.0 627 | decay_mult: 1.0 628 | } 629 | convolution_param { 630 | num_output: 256 631 | bias_term: false 632 | pad: 2 # 1 633 | kernel_size: 3 634 | stride: 1 635 | dilation: 2 636 | weight_filler { 637 | type: "msra" 638 | } 639 | bias_filler { 640 | type: "constant" 641 | value: 0.0 642 | } 643 | } 644 | } 645 | layer { 646 | name: "layer_512_1_conv_expand_h" 647 | type: "Convolution" 648 | bottom: "layer_512_1_bn1" 649 | top: "layer_512_1_conv_expand_h" 650 | param { 651 | lr_mult: 1.0 652 | decay_mult: 1.0 653 | } 654 | convolution_param { 655 | num_output: 256 656 | bias_term: false 657 | pad: 0 658 | kernel_size: 1 659 | stride: 1 # 2 660 | weight_filler { 661 | type: "msra" 662 | } 663 | bias_filler { 664 | type: "constant" 665 | value: 0.0 666 | } 667 | } 668 | } 669 | layer { 670 | name: "layer_512_1_sum" 671 | type: "Eltwise" 672 | bottom: "layer_512_1_conv2_h" 673 | bottom: "layer_512_1_conv_expand_h" 674 | top: "layer_512_1_sum" 675 | } 676 | layer { 677 | name: "last_bn_h" 678 | type: "BatchNorm" 679 | bottom: "layer_512_1_sum" 680 | top: "layer_512_1_sum" 681 | param { 682 | lr_mult: 0.0 683 | } 684 | param { 685 | lr_mult: 0.0 686 | } 687 | param { 688 | lr_mult: 0.0 689 | } 690 | } 691 | layer { 692 | name: "last_scale_h" 693 | type: "Scale" 694 | bottom: "layer_512_1_sum" 695 | top: "layer_512_1_sum" 696 | param { 697 | lr_mult: 1.0 698 | decay_mult: 1.0 699 | } 700 | param { 701 | lr_mult: 2.0 702 | decay_mult: 1.0 703 | } 704 | scale_param { 705 | bias_term: true 706 | } 707 | } 708 | layer { 709 | name: "last_relu" 710 | type: "ReLU" 711 | bottom: "layer_512_1_sum" 712 | top: "fc7" 713 | } 714 | 715 | layer { 716 | name: "conv6_1_h" 717 | type: "Convolution" 718 | bottom: "fc7" 719 | top: "conv6_1_h" 720 | param { 721 | lr_mult: 1 722 | decay_mult: 1 723 | } 724 | param { 725 | lr_mult: 2 726 | decay_mult: 0 727 | } 728 | convolution_param { 729 | num_output: 128 730 | pad: 0 731 | kernel_size: 1 732 | stride: 1 733 | weight_filler { 734 | type: "xavier" 735 | } 736 | bias_filler { 737 | type: "constant" 738 | value: 0 739 | } 740 | } 741 | } 742 | layer { 743 | name: "conv6_1_relu" 744 | type: "ReLU" 745 | bottom: "conv6_1_h" 746 | top: "conv6_1_h" 747 | } 748 | layer { 749 | name: "conv6_2_h" 750 | type: "Convolution" 751 | bottom: "conv6_1_h" 752 | top: "conv6_2_h" 753 | param { 754 | lr_mult: 1 755 | decay_mult: 1 756 | } 757 | param { 758 | lr_mult: 2 759 | decay_mult: 0 760 | } 761 | convolution_param { 762 | num_output: 256 763 | pad: 1 764 | kernel_size: 3 765 | stride: 2 766 | weight_filler { 767 | type: "xavier" 768 | } 769 | bias_filler { 770 | type: "constant" 771 | value: 0 772 | } 773 | } 774 | } 775 | layer { 776 | name: "conv6_2_relu" 777 | type: "ReLU" 778 | bottom: "conv6_2_h" 779 | top: "conv6_2_h" 780 | } 781 | layer { 782 | name: "conv7_1_h" 783 | type: "Convolution" 784 | bottom: "conv6_2_h" 785 | top: "conv7_1_h" 786 | param { 787 | lr_mult: 1 788 | decay_mult: 1 789 | } 790 | param { 791 | lr_mult: 2 792 | decay_mult: 0 793 | } 794 | convolution_param { 795 | num_output: 64 796 | pad: 0 797 | kernel_size: 1 798 | stride: 1 799 | weight_filler { 800 | type: "xavier" 801 | } 802 | bias_filler { 803 | type: "constant" 804 | value: 0 805 | } 806 | } 807 | } 808 | layer { 809 | name: "conv7_1_relu" 810 | type: "ReLU" 811 | bottom: "conv7_1_h" 812 | top: "conv7_1_h" 813 | } 814 | layer { 815 | name: "conv7_2_h" 816 | type: "Convolution" 817 | bottom: "conv7_1_h" 818 | top: "conv7_2_h" 819 | param { 820 | lr_mult: 1 821 | decay_mult: 1 822 | } 823 | param { 824 | lr_mult: 2 825 | decay_mult: 0 826 | } 827 | convolution_param { 828 | num_output: 128 829 | pad: 1 830 | kernel_size: 3 831 | stride: 2 832 | weight_filler { 833 | type: "xavier" 834 | } 835 | bias_filler { 836 | type: "constant" 837 | value: 0 838 | } 839 | } 840 | } 841 | layer { 842 | name: "conv7_2_relu" 843 | type: "ReLU" 844 | bottom: "conv7_2_h" 845 | top: "conv7_2_h" 846 | } 847 | layer { 848 | name: "conv8_1_h" 849 | type: "Convolution" 850 | bottom: "conv7_2_h" 851 | top: "conv8_1_h" 852 | param { 853 | lr_mult: 1 854 | decay_mult: 1 855 | } 856 | param { 857 | lr_mult: 2 858 | decay_mult: 0 859 | } 860 | convolution_param { 861 | num_output: 64 862 | pad: 0 863 | kernel_size: 1 864 | stride: 1 865 | weight_filler { 866 | type: "xavier" 867 | } 868 | bias_filler { 869 | type: "constant" 870 | value: 0 871 | } 872 | } 873 | } 874 | layer { 875 | name: "conv8_1_relu" 876 | type: "ReLU" 877 | bottom: "conv8_1_h" 878 | top: "conv8_1_h" 879 | } 880 | layer { 881 | name: "conv8_2_h" 882 | type: "Convolution" 883 | bottom: "conv8_1_h" 884 | top: "conv8_2_h" 885 | param { 886 | lr_mult: 1 887 | decay_mult: 1 888 | } 889 | param { 890 | lr_mult: 2 891 | decay_mult: 0 892 | } 893 | convolution_param { 894 | num_output: 128 895 | pad: 1 896 | kernel_size: 3 897 | stride: 1 898 | weight_filler { 899 | type: "xavier" 900 | } 901 | bias_filler { 902 | type: "constant" 903 | value: 0 904 | } 905 | } 906 | } 907 | layer { 908 | name: "conv8_2_relu" 909 | type: "ReLU" 910 | bottom: "conv8_2_h" 911 | top: "conv8_2_h" 912 | } 913 | layer { 914 | name: "conv9_1_h" 915 | type: "Convolution" 916 | bottom: "conv8_2_h" 917 | top: "conv9_1_h" 918 | param { 919 | lr_mult: 1 920 | decay_mult: 1 921 | } 922 | param { 923 | lr_mult: 2 924 | decay_mult: 0 925 | } 926 | convolution_param { 927 | num_output: 64 928 | pad: 0 929 | kernel_size: 1 930 | stride: 1 931 | weight_filler { 932 | type: "xavier" 933 | } 934 | bias_filler { 935 | type: "constant" 936 | value: 0 937 | } 938 | } 939 | } 940 | layer { 941 | name: "conv9_1_relu" 942 | type: "ReLU" 943 | bottom: "conv9_1_h" 944 | top: "conv9_1_h" 945 | } 946 | layer { 947 | name: "conv9_2_h" 948 | type: "Convolution" 949 | bottom: "conv9_1_h" 950 | top: "conv9_2_h" 951 | param { 952 | lr_mult: 1 953 | decay_mult: 1 954 | } 955 | param { 956 | lr_mult: 2 957 | decay_mult: 0 958 | } 959 | convolution_param { 960 | num_output: 128 961 | pad: 1 962 | kernel_size: 3 963 | stride: 1 964 | weight_filler { 965 | type: "xavier" 966 | } 967 | bias_filler { 968 | type: "constant" 969 | value: 0 970 | } 971 | } 972 | } 973 | layer { 974 | name: "conv9_2_relu" 975 | type: "ReLU" 976 | bottom: "conv9_2_h" 977 | top: "conv9_2_h" 978 | } 979 | layer { 980 | name: "conv4_3_norm" 981 | type: "Normalize" 982 | bottom: "layer_256_1_bn1" 983 | top: "conv4_3_norm" 984 | norm_param { 985 | across_spatial: false 986 | scale_filler { 987 | type: "constant" 988 | value: 20 989 | } 990 | channel_shared: false 991 | } 992 | } 993 | layer { 994 | name: "conv4_3_norm_mbox_loc" 995 | type: "Convolution" 996 | bottom: "conv4_3_norm" 997 | top: "conv4_3_norm_mbox_loc" 998 | param { 999 | lr_mult: 1 1000 | decay_mult: 1 1001 | } 1002 | param { 1003 | lr_mult: 2 1004 | decay_mult: 0 1005 | } 1006 | convolution_param { 1007 | num_output: 16 1008 | pad: 1 1009 | kernel_size: 3 1010 | stride: 1 1011 | weight_filler { 1012 | type: "xavier" 1013 | } 1014 | bias_filler { 1015 | type: "constant" 1016 | value: 0 1017 | } 1018 | } 1019 | } 1020 | layer { 1021 | name: "conv4_3_norm_mbox_loc_perm" 1022 | type: "Permute" 1023 | bottom: "conv4_3_norm_mbox_loc" 1024 | top: "conv4_3_norm_mbox_loc_perm" 1025 | permute_param { 1026 | order: 0 1027 | order: 2 1028 | order: 3 1029 | order: 1 1030 | } 1031 | } 1032 | layer { 1033 | name: "conv4_3_norm_mbox_loc_flat" 1034 | type: "Flatten" 1035 | bottom: "conv4_3_norm_mbox_loc_perm" 1036 | top: "conv4_3_norm_mbox_loc_flat" 1037 | flatten_param { 1038 | axis: 1 1039 | } 1040 | } 1041 | layer { 1042 | name: "conv4_3_norm_mbox_conf" 1043 | type: "Convolution" 1044 | bottom: "conv4_3_norm" 1045 | top: "conv4_3_norm_mbox_conf" 1046 | param { 1047 | lr_mult: 1 1048 | decay_mult: 1 1049 | } 1050 | param { 1051 | lr_mult: 2 1052 | decay_mult: 0 1053 | } 1054 | convolution_param { 1055 | num_output: 8 # 84 1056 | pad: 1 1057 | kernel_size: 3 1058 | stride: 1 1059 | weight_filler { 1060 | type: "xavier" 1061 | } 1062 | bias_filler { 1063 | type: "constant" 1064 | value: 0 1065 | } 1066 | } 1067 | } 1068 | layer { 1069 | name: "conv4_3_norm_mbox_conf_perm" 1070 | type: "Permute" 1071 | bottom: "conv4_3_norm_mbox_conf" 1072 | top: "conv4_3_norm_mbox_conf_perm" 1073 | permute_param { 1074 | order: 0 1075 | order: 2 1076 | order: 3 1077 | order: 1 1078 | } 1079 | } 1080 | layer { 1081 | name: "conv4_3_norm_mbox_conf_flat" 1082 | type: "Flatten" 1083 | bottom: "conv4_3_norm_mbox_conf_perm" 1084 | top: "conv4_3_norm_mbox_conf_flat" 1085 | flatten_param { 1086 | axis: 1 1087 | } 1088 | } 1089 | layer { 1090 | name: "conv4_3_norm_mbox_priorbox" 1091 | type: "PriorBox" 1092 | bottom: "conv4_3_norm" 1093 | bottom: "data" 1094 | top: "conv4_3_norm_mbox_priorbox" 1095 | prior_box_param { 1096 | min_size: 30.0 1097 | max_size: 60.0 1098 | aspect_ratio: 2 1099 | flip: true 1100 | clip: false 1101 | variance: 0.1 1102 | variance: 0.1 1103 | variance: 0.2 1104 | variance: 0.2 1105 | step: 8 1106 | offset: 0.5 1107 | } 1108 | } 1109 | layer { 1110 | name: "fc7_mbox_loc" 1111 | type: "Convolution" 1112 | bottom: "fc7" 1113 | top: "fc7_mbox_loc" 1114 | param { 1115 | lr_mult: 1 1116 | decay_mult: 1 1117 | } 1118 | param { 1119 | lr_mult: 2 1120 | decay_mult: 0 1121 | } 1122 | convolution_param { 1123 | num_output: 24 1124 | pad: 1 1125 | kernel_size: 3 1126 | stride: 1 1127 | weight_filler { 1128 | type: "xavier" 1129 | } 1130 | bias_filler { 1131 | type: "constant" 1132 | value: 0 1133 | } 1134 | } 1135 | } 1136 | layer { 1137 | name: "fc7_mbox_loc_perm" 1138 | type: "Permute" 1139 | bottom: "fc7_mbox_loc" 1140 | top: "fc7_mbox_loc_perm" 1141 | permute_param { 1142 | order: 0 1143 | order: 2 1144 | order: 3 1145 | order: 1 1146 | } 1147 | } 1148 | layer { 1149 | name: "fc7_mbox_loc_flat" 1150 | type: "Flatten" 1151 | bottom: "fc7_mbox_loc_perm" 1152 | top: "fc7_mbox_loc_flat" 1153 | flatten_param { 1154 | axis: 1 1155 | } 1156 | } 1157 | layer { 1158 | name: "fc7_mbox_conf" 1159 | type: "Convolution" 1160 | bottom: "fc7" 1161 | top: "fc7_mbox_conf" 1162 | param { 1163 | lr_mult: 1 1164 | decay_mult: 1 1165 | } 1166 | param { 1167 | lr_mult: 2 1168 | decay_mult: 0 1169 | } 1170 | convolution_param { 1171 | num_output: 12 # 126 1172 | pad: 1 1173 | kernel_size: 3 1174 | stride: 1 1175 | weight_filler { 1176 | type: "xavier" 1177 | } 1178 | bias_filler { 1179 | type: "constant" 1180 | value: 0 1181 | } 1182 | } 1183 | } 1184 | layer { 1185 | name: "fc7_mbox_conf_perm" 1186 | type: "Permute" 1187 | bottom: "fc7_mbox_conf" 1188 | top: "fc7_mbox_conf_perm" 1189 | permute_param { 1190 | order: 0 1191 | order: 2 1192 | order: 3 1193 | order: 1 1194 | } 1195 | } 1196 | layer { 1197 | name: "fc7_mbox_conf_flat" 1198 | type: "Flatten" 1199 | bottom: "fc7_mbox_conf_perm" 1200 | top: "fc7_mbox_conf_flat" 1201 | flatten_param { 1202 | axis: 1 1203 | } 1204 | } 1205 | layer { 1206 | name: "fc7_mbox_priorbox" 1207 | type: "PriorBox" 1208 | bottom: "fc7" 1209 | bottom: "data" 1210 | top: "fc7_mbox_priorbox" 1211 | prior_box_param { 1212 | min_size: 60.0 1213 | max_size: 111.0 1214 | aspect_ratio: 2 1215 | aspect_ratio: 3 1216 | flip: true 1217 | clip: false 1218 | variance: 0.1 1219 | variance: 0.1 1220 | variance: 0.2 1221 | variance: 0.2 1222 | step: 16 1223 | offset: 0.5 1224 | } 1225 | } 1226 | layer { 1227 | name: "conv6_2_mbox_loc" 1228 | type: "Convolution" 1229 | bottom: "conv6_2_h" 1230 | top: "conv6_2_mbox_loc" 1231 | param { 1232 | lr_mult: 1 1233 | decay_mult: 1 1234 | } 1235 | param { 1236 | lr_mult: 2 1237 | decay_mult: 0 1238 | } 1239 | convolution_param { 1240 | num_output: 24 1241 | pad: 1 1242 | kernel_size: 3 1243 | stride: 1 1244 | weight_filler { 1245 | type: "xavier" 1246 | } 1247 | bias_filler { 1248 | type: "constant" 1249 | value: 0 1250 | } 1251 | } 1252 | } 1253 | layer { 1254 | name: "conv6_2_mbox_loc_perm" 1255 | type: "Permute" 1256 | bottom: "conv6_2_mbox_loc" 1257 | top: "conv6_2_mbox_loc_perm" 1258 | permute_param { 1259 | order: 0 1260 | order: 2 1261 | order: 3 1262 | order: 1 1263 | } 1264 | } 1265 | layer { 1266 | name: "conv6_2_mbox_loc_flat" 1267 | type: "Flatten" 1268 | bottom: "conv6_2_mbox_loc_perm" 1269 | top: "conv6_2_mbox_loc_flat" 1270 | flatten_param { 1271 | axis: 1 1272 | } 1273 | } 1274 | layer { 1275 | name: "conv6_2_mbox_conf" 1276 | type: "Convolution" 1277 | bottom: "conv6_2_h" 1278 | top: "conv6_2_mbox_conf" 1279 | param { 1280 | lr_mult: 1 1281 | decay_mult: 1 1282 | } 1283 | param { 1284 | lr_mult: 2 1285 | decay_mult: 0 1286 | } 1287 | convolution_param { 1288 | num_output: 12 # 126 1289 | pad: 1 1290 | kernel_size: 3 1291 | stride: 1 1292 | weight_filler { 1293 | type: "xavier" 1294 | } 1295 | bias_filler { 1296 | type: "constant" 1297 | value: 0 1298 | } 1299 | } 1300 | } 1301 | layer { 1302 | name: "conv6_2_mbox_conf_perm" 1303 | type: "Permute" 1304 | bottom: "conv6_2_mbox_conf" 1305 | top: "conv6_2_mbox_conf_perm" 1306 | permute_param { 1307 | order: 0 1308 | order: 2 1309 | order: 3 1310 | order: 1 1311 | } 1312 | } 1313 | layer { 1314 | name: "conv6_2_mbox_conf_flat" 1315 | type: "Flatten" 1316 | bottom: "conv6_2_mbox_conf_perm" 1317 | top: "conv6_2_mbox_conf_flat" 1318 | flatten_param { 1319 | axis: 1 1320 | } 1321 | } 1322 | layer { 1323 | name: "conv6_2_mbox_priorbox" 1324 | type: "PriorBox" 1325 | bottom: "conv6_2_h" 1326 | bottom: "data" 1327 | top: "conv6_2_mbox_priorbox" 1328 | prior_box_param { 1329 | min_size: 111.0 1330 | max_size: 162.0 1331 | aspect_ratio: 2 1332 | aspect_ratio: 3 1333 | flip: true 1334 | clip: false 1335 | variance: 0.1 1336 | variance: 0.1 1337 | variance: 0.2 1338 | variance: 0.2 1339 | step: 32 1340 | offset: 0.5 1341 | } 1342 | } 1343 | layer { 1344 | name: "conv7_2_mbox_loc" 1345 | type: "Convolution" 1346 | bottom: "conv7_2_h" 1347 | top: "conv7_2_mbox_loc" 1348 | param { 1349 | lr_mult: 1 1350 | decay_mult: 1 1351 | } 1352 | param { 1353 | lr_mult: 2 1354 | decay_mult: 0 1355 | } 1356 | convolution_param { 1357 | num_output: 24 1358 | pad: 1 1359 | kernel_size: 3 1360 | stride: 1 1361 | weight_filler { 1362 | type: "xavier" 1363 | } 1364 | bias_filler { 1365 | type: "constant" 1366 | value: 0 1367 | } 1368 | } 1369 | } 1370 | layer { 1371 | name: "conv7_2_mbox_loc_perm" 1372 | type: "Permute" 1373 | bottom: "conv7_2_mbox_loc" 1374 | top: "conv7_2_mbox_loc_perm" 1375 | permute_param { 1376 | order: 0 1377 | order: 2 1378 | order: 3 1379 | order: 1 1380 | } 1381 | } 1382 | layer { 1383 | name: "conv7_2_mbox_loc_flat" 1384 | type: "Flatten" 1385 | bottom: "conv7_2_mbox_loc_perm" 1386 | top: "conv7_2_mbox_loc_flat" 1387 | flatten_param { 1388 | axis: 1 1389 | } 1390 | } 1391 | layer { 1392 | name: "conv7_2_mbox_conf" 1393 | type: "Convolution" 1394 | bottom: "conv7_2_h" 1395 | top: "conv7_2_mbox_conf" 1396 | param { 1397 | lr_mult: 1 1398 | decay_mult: 1 1399 | } 1400 | param { 1401 | lr_mult: 2 1402 | decay_mult: 0 1403 | } 1404 | convolution_param { 1405 | num_output: 12 # 126 1406 | pad: 1 1407 | kernel_size: 3 1408 | stride: 1 1409 | weight_filler { 1410 | type: "xavier" 1411 | } 1412 | bias_filler { 1413 | type: "constant" 1414 | value: 0 1415 | } 1416 | } 1417 | } 1418 | layer { 1419 | name: "conv7_2_mbox_conf_perm" 1420 | type: "Permute" 1421 | bottom: "conv7_2_mbox_conf" 1422 | top: "conv7_2_mbox_conf_perm" 1423 | permute_param { 1424 | order: 0 1425 | order: 2 1426 | order: 3 1427 | order: 1 1428 | } 1429 | } 1430 | layer { 1431 | name: "conv7_2_mbox_conf_flat" 1432 | type: "Flatten" 1433 | bottom: "conv7_2_mbox_conf_perm" 1434 | top: "conv7_2_mbox_conf_flat" 1435 | flatten_param { 1436 | axis: 1 1437 | } 1438 | } 1439 | layer { 1440 | name: "conv7_2_mbox_priorbox" 1441 | type: "PriorBox" 1442 | bottom: "conv7_2_h" 1443 | bottom: "data" 1444 | top: "conv7_2_mbox_priorbox" 1445 | prior_box_param { 1446 | min_size: 162.0 1447 | max_size: 213.0 1448 | aspect_ratio: 2 1449 | aspect_ratio: 3 1450 | flip: true 1451 | clip: false 1452 | variance: 0.1 1453 | variance: 0.1 1454 | variance: 0.2 1455 | variance: 0.2 1456 | step: 64 1457 | offset: 0.5 1458 | } 1459 | } 1460 | layer { 1461 | name: "conv8_2_mbox_loc" 1462 | type: "Convolution" 1463 | bottom: "conv8_2_h" 1464 | top: "conv8_2_mbox_loc" 1465 | param { 1466 | lr_mult: 1 1467 | decay_mult: 1 1468 | } 1469 | param { 1470 | lr_mult: 2 1471 | decay_mult: 0 1472 | } 1473 | convolution_param { 1474 | num_output: 16 1475 | pad: 1 1476 | kernel_size: 3 1477 | stride: 1 1478 | weight_filler { 1479 | type: "xavier" 1480 | } 1481 | bias_filler { 1482 | type: "constant" 1483 | value: 0 1484 | } 1485 | } 1486 | } 1487 | layer { 1488 | name: "conv8_2_mbox_loc_perm" 1489 | type: "Permute" 1490 | bottom: "conv8_2_mbox_loc" 1491 | top: "conv8_2_mbox_loc_perm" 1492 | permute_param { 1493 | order: 0 1494 | order: 2 1495 | order: 3 1496 | order: 1 1497 | } 1498 | } 1499 | layer { 1500 | name: "conv8_2_mbox_loc_flat" 1501 | type: "Flatten" 1502 | bottom: "conv8_2_mbox_loc_perm" 1503 | top: "conv8_2_mbox_loc_flat" 1504 | flatten_param { 1505 | axis: 1 1506 | } 1507 | } 1508 | layer { 1509 | name: "conv8_2_mbox_conf" 1510 | type: "Convolution" 1511 | bottom: "conv8_2_h" 1512 | top: "conv8_2_mbox_conf" 1513 | param { 1514 | lr_mult: 1 1515 | decay_mult: 1 1516 | } 1517 | param { 1518 | lr_mult: 2 1519 | decay_mult: 0 1520 | } 1521 | convolution_param { 1522 | num_output: 8 # 84 1523 | pad: 1 1524 | kernel_size: 3 1525 | stride: 1 1526 | weight_filler { 1527 | type: "xavier" 1528 | } 1529 | bias_filler { 1530 | type: "constant" 1531 | value: 0 1532 | } 1533 | } 1534 | } 1535 | layer { 1536 | name: "conv8_2_mbox_conf_perm" 1537 | type: "Permute" 1538 | bottom: "conv8_2_mbox_conf" 1539 | top: "conv8_2_mbox_conf_perm" 1540 | permute_param { 1541 | order: 0 1542 | order: 2 1543 | order: 3 1544 | order: 1 1545 | } 1546 | } 1547 | layer { 1548 | name: "conv8_2_mbox_conf_flat" 1549 | type: "Flatten" 1550 | bottom: "conv8_2_mbox_conf_perm" 1551 | top: "conv8_2_mbox_conf_flat" 1552 | flatten_param { 1553 | axis: 1 1554 | } 1555 | } 1556 | layer { 1557 | name: "conv8_2_mbox_priorbox" 1558 | type: "PriorBox" 1559 | bottom: "conv8_2_h" 1560 | bottom: "data" 1561 | top: "conv8_2_mbox_priorbox" 1562 | prior_box_param { 1563 | min_size: 213.0 1564 | max_size: 264.0 1565 | aspect_ratio: 2 1566 | flip: true 1567 | clip: false 1568 | variance: 0.1 1569 | variance: 0.1 1570 | variance: 0.2 1571 | variance: 0.2 1572 | step: 100 1573 | offset: 0.5 1574 | } 1575 | } 1576 | layer { 1577 | name: "conv9_2_mbox_loc" 1578 | type: "Convolution" 1579 | bottom: "conv9_2_h" 1580 | top: "conv9_2_mbox_loc" 1581 | param { 1582 | lr_mult: 1 1583 | decay_mult: 1 1584 | } 1585 | param { 1586 | lr_mult: 2 1587 | decay_mult: 0 1588 | } 1589 | convolution_param { 1590 | num_output: 16 1591 | pad: 1 1592 | kernel_size: 3 1593 | stride: 1 1594 | weight_filler { 1595 | type: "xavier" 1596 | } 1597 | bias_filler { 1598 | type: "constant" 1599 | value: 0 1600 | } 1601 | } 1602 | } 1603 | layer { 1604 | name: "conv9_2_mbox_loc_perm" 1605 | type: "Permute" 1606 | bottom: "conv9_2_mbox_loc" 1607 | top: "conv9_2_mbox_loc_perm" 1608 | permute_param { 1609 | order: 0 1610 | order: 2 1611 | order: 3 1612 | order: 1 1613 | } 1614 | } 1615 | layer { 1616 | name: "conv9_2_mbox_loc_flat" 1617 | type: "Flatten" 1618 | bottom: "conv9_2_mbox_loc_perm" 1619 | top: "conv9_2_mbox_loc_flat" 1620 | flatten_param { 1621 | axis: 1 1622 | } 1623 | } 1624 | layer { 1625 | name: "conv9_2_mbox_conf" 1626 | type: "Convolution" 1627 | bottom: "conv9_2_h" 1628 | top: "conv9_2_mbox_conf" 1629 | param { 1630 | lr_mult: 1 1631 | decay_mult: 1 1632 | } 1633 | param { 1634 | lr_mult: 2 1635 | decay_mult: 0 1636 | } 1637 | convolution_param { 1638 | num_output: 8 # 84 1639 | pad: 1 1640 | kernel_size: 3 1641 | stride: 1 1642 | weight_filler { 1643 | type: "xavier" 1644 | } 1645 | bias_filler { 1646 | type: "constant" 1647 | value: 0 1648 | } 1649 | } 1650 | } 1651 | layer { 1652 | name: "conv9_2_mbox_conf_perm" 1653 | type: "Permute" 1654 | bottom: "conv9_2_mbox_conf" 1655 | top: "conv9_2_mbox_conf_perm" 1656 | permute_param { 1657 | order: 0 1658 | order: 2 1659 | order: 3 1660 | order: 1 1661 | } 1662 | } 1663 | layer { 1664 | name: "conv9_2_mbox_conf_flat" 1665 | type: "Flatten" 1666 | bottom: "conv9_2_mbox_conf_perm" 1667 | top: "conv9_2_mbox_conf_flat" 1668 | flatten_param { 1669 | axis: 1 1670 | } 1671 | } 1672 | layer { 1673 | name: "conv9_2_mbox_priorbox" 1674 | type: "PriorBox" 1675 | bottom: "conv9_2_h" 1676 | bottom: "data" 1677 | top: "conv9_2_mbox_priorbox" 1678 | prior_box_param { 1679 | min_size: 264.0 1680 | max_size: 315.0 1681 | aspect_ratio: 2 1682 | flip: true 1683 | clip: false 1684 | variance: 0.1 1685 | variance: 0.1 1686 | variance: 0.2 1687 | variance: 0.2 1688 | step: 300 1689 | offset: 0.5 1690 | } 1691 | } 1692 | layer { 1693 | name: "mbox_loc" 1694 | type: "Concat" 1695 | bottom: "conv4_3_norm_mbox_loc_flat" 1696 | bottom: "fc7_mbox_loc_flat" 1697 | bottom: "conv6_2_mbox_loc_flat" 1698 | bottom: "conv7_2_mbox_loc_flat" 1699 | bottom: "conv8_2_mbox_loc_flat" 1700 | bottom: "conv9_2_mbox_loc_flat" 1701 | top: "mbox_loc" 1702 | concat_param { 1703 | axis: 1 1704 | } 1705 | } 1706 | layer { 1707 | name: "mbox_conf" 1708 | type: "Concat" 1709 | bottom: "conv4_3_norm_mbox_conf_flat" 1710 | bottom: "fc7_mbox_conf_flat" 1711 | bottom: "conv6_2_mbox_conf_flat" 1712 | bottom: "conv7_2_mbox_conf_flat" 1713 | bottom: "conv8_2_mbox_conf_flat" 1714 | bottom: "conv9_2_mbox_conf_flat" 1715 | top: "mbox_conf" 1716 | concat_param { 1717 | axis: 1 1718 | } 1719 | } 1720 | layer { 1721 | name: "mbox_priorbox" 1722 | type: "Concat" 1723 | bottom: "conv4_3_norm_mbox_priorbox" 1724 | bottom: "fc7_mbox_priorbox" 1725 | bottom: "conv6_2_mbox_priorbox" 1726 | bottom: "conv7_2_mbox_priorbox" 1727 | bottom: "conv8_2_mbox_priorbox" 1728 | bottom: "conv9_2_mbox_priorbox" 1729 | top: "mbox_priorbox" 1730 | concat_param { 1731 | axis: 2 1732 | } 1733 | } 1734 | 1735 | layer { 1736 | name: "mbox_conf_reshape" 1737 | type: "Reshape" 1738 | bottom: "mbox_conf" 1739 | top: "mbox_conf_reshape" 1740 | reshape_param { 1741 | shape { 1742 | dim: 0 1743 | dim: -1 1744 | dim: 2 1745 | } 1746 | } 1747 | } 1748 | layer { 1749 | name: "mbox_conf_softmax" 1750 | type: "Softmax" 1751 | bottom: "mbox_conf_reshape" 1752 | top: "mbox_conf_softmax" 1753 | softmax_param { 1754 | axis: 2 1755 | } 1756 | } 1757 | layer { 1758 | name: "mbox_conf_flatten" 1759 | type: "Flatten" 1760 | bottom: "mbox_conf_softmax" 1761 | top: "mbox_conf_flatten" 1762 | flatten_param { 1763 | axis: 1 1764 | } 1765 | } 1766 | 1767 | layer { 1768 | name: "detection_out" 1769 | type: "DetectionOutput" 1770 | bottom: "mbox_loc" 1771 | bottom: "mbox_conf_flatten" 1772 | bottom: "mbox_priorbox" 1773 | top: "detection_out" 1774 | include { 1775 | phase: TEST 1776 | } 1777 | detection_output_param { 1778 | num_classes: 2 1779 | share_location: true 1780 | background_label_id: 0 1781 | nms_param { 1782 | nms_threshold: 0.45 1783 | top_k: 400 1784 | } 1785 | code_type: CENTER_SIZE 1786 | keep_top_k: 200 1787 | confidence_threshold: 0.01 1788 | } 1789 | } 1790 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | click==7.1.2 2 | cvlib==0.2.5 3 | dlib==19.22.0 4 | face-recognition==1.3.0 5 | h5py==2.10.0 6 | keras==2.4.3 7 | mahotas==1.4.11 8 | matplotlib==3.3.2 9 | numpy==1.19.2 10 | opencv-python==4.5.1.48 11 | Pillow==8.3.2 12 | pixellib==0.6.1 13 | PySide2==5.15.2 14 | pytesseract==0.3.7 15 | requests==2.24.0 16 | scikit-image==0.18.1 17 | scipy==1.6.3 18 | tensorflow-gpu==2.5.1 19 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from google_drive_downloader import GoogleDriveDownloader as gdd 2 | 3 | download_dict = { 4 | "16gAKScYAW0bZkyRgcLF71x28du_mLY8-": "assets/res10_300x300_ssd_iter_140000.caffemodel", 5 | "1jUIwxXjxz8oC7I2Ta9vtiozsB4i95043": "Media/people-walking.mp4", 6 | "1Q7qfr11olEFguRRkKRnC1Yah3ZnJCUnM": "assets/mask_rcnn_coco.h5" 7 | } 8 | 9 | for file_id, dest_path in download_dict.items(): 10 | 11 | gdd.download_file_from_google_drive(file_id=file_id, 12 | dest_path=dest_path, 13 | unzip=True) 14 | --------------------------------------------------------------------------------