├── EAST_Text_Detection ├── frozen_east_text_detection.pb ├── images │ ├── car_wash.png │ ├── computer-vision-768x340.jpg │ ├── lebron_james.jpg │ ├── road-sign-1.jpg │ ├── road-sign-2-768x347.jpg │ ├── road-sign-3-300x112.jpg │ └── sign.jpg ├── text_detection.py └── text_detection_video.py ├── README.md ├── Using_Tesseract_OCR ├── images │ ├── example_blur.png │ ├── example_thresh.png │ └── receipt.png └── pytesseract_test.py ├── bubble_sheet_scanner ├── bubble_sheet_scanner.ipynb ├── images │ ├── bubble_test.png │ ├── edged.png │ ├── omr_mask.gif │ ├── omr_multiple_bubbles.jpg │ └── omr_no_bubbles.jpg └── origin.jpg ├── canny_edge_detection ├── 69914.png ├── canny_edge_detection.ipynb └── img11.jpg ├── distance_to_camera ├── 2ft.jpg ├── distance_to_camera.py ├── distance_to_camera_edged.jpg └── images │ ├── 3ft.jpg │ └── 4ft.jpg ├── document_scanner ├── a4paper.jpg ├── document_scanner.ipynb ├── receipt.png └── transform.py └── perspective_transform ├── birdseye.jpg ├── original.png ├── perspective_transform.ipynb └── transform.py /EAST_Text_Detection/frozen_east_text_detection.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/EAST_Text_Detection/frozen_east_text_detection.pb -------------------------------------------------------------------------------- /EAST_Text_Detection/images/car_wash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/EAST_Text_Detection/images/car_wash.png -------------------------------------------------------------------------------- /EAST_Text_Detection/images/computer-vision-768x340.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/EAST_Text_Detection/images/computer-vision-768x340.jpg -------------------------------------------------------------------------------- /EAST_Text_Detection/images/lebron_james.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/EAST_Text_Detection/images/lebron_james.jpg -------------------------------------------------------------------------------- /EAST_Text_Detection/images/road-sign-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/EAST_Text_Detection/images/road-sign-1.jpg -------------------------------------------------------------------------------- /EAST_Text_Detection/images/road-sign-2-768x347.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/EAST_Text_Detection/images/road-sign-2-768x347.jpg -------------------------------------------------------------------------------- /EAST_Text_Detection/images/road-sign-3-300x112.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/EAST_Text_Detection/images/road-sign-3-300x112.jpg -------------------------------------------------------------------------------- /EAST_Text_Detection/images/sign.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/EAST_Text_Detection/images/sign.jpg -------------------------------------------------------------------------------- /EAST_Text_Detection/text_detection.py: -------------------------------------------------------------------------------- 1 | from imutils.object_detection import non_max_suppression 2 | from PIL import Image 3 | import numpy as np 4 | import pytesseract 5 | import time 6 | import cv2 7 | import os 8 | 9 | 10 | def decode_predictions(scores, geometry): 11 | """ 12 | EAST 文本检测器两个参数: 13 | scores:文本区域的概率。 14 | geometry:文本区域的边界框位置。 15 | """ 16 | # The minimum probability of a detected text region 17 | min_confidence = 0.5 18 | 19 | # grab the number of rows and columns from the scores volume, then 20 | # initialize our set of bounding box rectangles and corresponding 21 | # confidence scores 22 | numRows, numCols = scores.shape[2:4] 23 | rects = [] 24 | confidences = [] 25 | 26 | # loop over the number of rows 27 | for y in range(0, numRows): 28 | # extract the scores (probabilities), followed by the 29 | # geometrical data used to derive potential bounding box 30 | # coordinates that surround text 31 | scoresData = scores[0, 0, y] 32 | xData0 = geometry[0, 0, y] 33 | xData1 = geometry[0, 1, y] 34 | xData2 = geometry[0, 2, y] 35 | xData3 = geometry[0, 3, y] 36 | anglesData = geometry[0, 4, y] 37 | 38 | # loop over the number of columns 39 | for x in range(0, numCols): 40 | # if our score does not have sufficient probability, 41 | # ignore it 42 | if scoresData[x] < min_confidence: 43 | continue 44 | 45 | # compute the offset factor as our resulting feature 46 | # maps will be 4x smaller than the input image 47 | (offsetX, offsetY) = (x * 4.0, y * 4.0) 48 | 49 | # extract the rotation angle for the prediction and 50 | # then compute the sin and cosine 51 | angle = anglesData[x] 52 | cos = np.cos(angle) 53 | sin = np.sin(angle) 54 | 55 | # use the geometry volume to derive the width and height 56 | # of the bounding box 57 | h = xData0[x] + xData2[x] 58 | w = xData1[x] + xData3[x] 59 | 60 | # compute both the starting and ending (x, y)-coordinates 61 | # for the text prediction bounding box 62 | endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) 63 | endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) 64 | startX = int(endX - w) 65 | startY = int(endY - h) 66 | 67 | # add the bounding box coordinates and probability score 68 | # to our respective lists 69 | rects.append((startX, startY, endX, endY)) 70 | confidences.append(scoresData[x]) 71 | 72 | # return a tuple of the bounding boxes and associated confidences 73 | return (rects, confidences) 74 | 75 | 76 | def text_recognition(): 77 | east_model = "frozen_east_text_detection.pb" 78 | # img_path = "images/road-sign-2-768x347.jpg" 79 | 80 | # set the new width and height and then determine the ratio in change for 81 | # both the width and height, both of them are multiples of 32 82 | newW, newH = 320, 320 83 | 84 | # The (optional) amount of padding to add to each ROI border 85 | # You can try 0.05 for 5% or 0.10 for 10% (and so on) if find OCR result is incorrect 86 | padding = 0.0 87 | 88 | # in order to apply Tesseract v4 to OCR text we must supply 89 | # (1) a language, (2) an OEM flag of 4, indicating that the we 90 | # wish to use the LSTM neural net model for OCR, and finally 91 | # (3) an OEM value, in this case, 7 which implies that we are 92 | # treating the ROI as a single line of text 93 | config = ("-l eng --oem 1 --psm 7") # chi_sim 94 | 95 | image = cv2.imread(img_path) 96 | orig = image.copy() 97 | origH, origW = image.shape[:2] 98 | 99 | # calculate ratios that will be used to scale bounding box coordinates 100 | rW = origW / float(newW) 101 | rH = origH / float(newH) 102 | 103 | # resize the image and grab the new image dimensions 104 | image = cv2.resize(image, (newW, newH)) 105 | (H, W) = image.shape[:2] 106 | 107 | # define the two output layer names for the EAST detector model the first is the output probabilities 108 | # and the second can be used to derive the bounding box coordinates of text 109 | layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] 110 | 111 | # load the pre-trained EAST text detector 112 | print("[INFO] loading EAST text detector...") 113 | net = cv2.dnn.readNet(east_model) 114 | 115 | # construct a blob from the image and then perform a forward pass of 116 | # the model to obtain the two output layer sets 117 | blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), 118 | (123.68, 116.78, 103.94), swapRB=True, crop=False) 119 | start = time.time() 120 | net.setInput(blob) 121 | (scores, geometry) = net.forward(layerNames) 122 | end = time.time() 123 | 124 | # show timing information on text prediction 125 | print("[INFO] text detection cost {:.6f} seconds".format(end - start)) 126 | 127 | # decode the predictions, then apply non-maxima suppression to 128 | # suppress weak, overlapping bounding boxes 129 | (rects, confidences) = decode_predictions(scores, geometry) 130 | # NMS effectively takes the most likely text regions, eliminating other overlapping regions 131 | boxes = non_max_suppression(np.array(rects), probs=confidences) 132 | 133 | # initialize the list of results to contain our OCR bounding boxes and text 134 | results = [] 135 | 136 | # the bounding boxes represent where the text regions are, then recognize the text. 137 | # loop over the bounding boxes and process the results, preparing the stage for actual text recognition 138 | for (startX, startY, endX, endY) in boxes: 139 | # scale the bounding boxes coordinates based on the respective ratios 140 | startX = int(startX * rW) 141 | startY = int(startY * rH) 142 | endX = int(endX * rW) 143 | endY = int(endY * rH) 144 | 145 | # in order to obtain a better OCR of the text we can potentially 146 | # add a bit of padding surrounding the bounding box -- here we 147 | # are computing the deltas in both the x and y directions 148 | dX = int((endX - startX) * padding) 149 | dY = int((endY - startY) * padding) 150 | 151 | # apply padding to each side of the bounding box, respectively 152 | startX = max(0, startX - dX) 153 | startY = max(0, startY - dY) 154 | endX = min(origW, endX + (dX * 2)) 155 | endY = min(origH, endY + (dY * 2)) 156 | 157 | # extract the actual padded ROI 158 | roi = orig[startY:endY, startX:endX] 159 | 160 | # use Tesseract v4 to recognize a text ROI in an image 161 | text = pytesseract.image_to_string(roi, config=config) 162 | 163 | # add the bounding box coordinates and actual text string to the results list 164 | results.append(((startX, startY, endX, endY), text)) 165 | 166 | # sort the bounding boxes coordinates from top to bottom based on the y-coordinate of the bounding box 167 | results = sorted(results, key=lambda r:r[0][1]) 168 | 169 | output = orig.copy() 170 | # loop over the results 171 | for ((startX, startY, endX, endY), text) in results: 172 | # display the text OCR'd by Tesseract 173 | print("OCR TEXT") 174 | print("========") 175 | print("{}\n".format(text)) 176 | 177 | # strip out non-ASCII text so we can draw the text on the image using OpenCV 178 | text = "".join([c if ord(c) < 128 else "" for c in text]).strip() 179 | # draw the text and a bounding box surrounding the text region of the input image 180 | cv2.rectangle(output, (startX, startY), (endX, endY), (0, 0, 255), 2) 181 | cv2.putText(output, text, (startX, startY - 20), 182 | cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3) 183 | 184 | # show the output image 185 | cv2.imshow("Text Detection", output) 186 | cv2.waitKey(0) 187 | 188 | 189 | if __name__ == "__main__": 190 | text_recognition() 191 | -------------------------------------------------------------------------------- /EAST_Text_Detection/text_detection_video.py: -------------------------------------------------------------------------------- 1 | # import the necessary packages 2 | from imutils.video import VideoStream 3 | from imutils.video import FPS 4 | from imutils.object_detection import non_max_suppression 5 | import numpy as np 6 | import argparse 7 | import imutils 8 | import time 9 | import cv2 10 | 11 | def decode_predictions(scores, geometry): 12 | """ 13 | EAST 文本检测器两个参数: 14 | scores:文本区域的概率。 15 | geometry:文本区域的边界框位置。 16 | """ 17 | # The minimum probability of a detected text region 18 | min_confidence = 0.5 19 | 20 | # grab the number of rows and columns from the scores volume, then 21 | # initialize our set of bounding box rectangles and corresponding 22 | # confidence scores 23 | numRows, numCols = scores.shape[2:4] 24 | rects = [] 25 | confidences = [] 26 | 27 | # loop over the number of rows 28 | for y in range(0, numRows): 29 | # extract the scores (probabilities), followed by the 30 | # geometrical data used to derive potential bounding box 31 | # coordinates that surround text 32 | scoresData = scores[0, 0, y] 33 | xData0 = geometry[0, 0, y] 34 | xData1 = geometry[0, 1, y] 35 | xData2 = geometry[0, 2, y] 36 | xData3 = geometry[0, 3, y] 37 | anglesData = geometry[0, 4, y] 38 | 39 | # loop over the number of columns 40 | for x in range(0, numCols): 41 | # if our score does not have sufficient probability, 42 | # ignore it 43 | if scoresData[x] < min_confidence: 44 | continue 45 | 46 | # compute the offset factor as our resulting feature 47 | # maps will be 4x smaller than the input image 48 | (offsetX, offsetY) = (x * 4.0, y * 4.0) 49 | 50 | # extract the rotation angle for the prediction and 51 | # then compute the sin and cosine 52 | angle = anglesData[x] 53 | cos = np.cos(angle) 54 | sin = np.sin(angle) 55 | 56 | # use the geometry volume to derive the width and height 57 | # of the bounding box 58 | h = xData0[x] + xData2[x] 59 | w = xData1[x] + xData3[x] 60 | 61 | # compute both the starting and ending (x, y)-coordinates 62 | # for the text prediction bounding box 63 | endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) 64 | endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) 65 | startX = int(endX - w) 66 | startY = int(endY - h) 67 | 68 | # add the bounding box coordinates and probability score 69 | # to our respective lists 70 | rects.append((startX, startY, endX, endY)) 71 | confidences.append(scoresData[x]) 72 | 73 | # return a tuple of the bounding boxes and associated confidences 74 | return (rects, confidences) 75 | 76 | 77 | if __name__ == "__main__": 78 | # construct the argument parser and parse the arguments 79 | ap = argparse.ArgumentParser() 80 | ap.add_argument("-v", "--video", type=str, 81 | help="path to optinal input video file") 82 | args = vars(ap.parse_args()) 83 | 84 | # initialize the original frame dimensions, new frame dimensions, 85 | # and ratio between the dimensions 86 | (newW, newH) = 320, 320 87 | (W, H) = (None, None) 88 | (rW, rH) = (None, None) 89 | 90 | east_model = "frozen_east_text_detection.pb" 91 | 92 | # define the two output layer names for the EAST detector model that 93 | # we are interested -- the first is the output probabilities and the 94 | # second can be used to derive the bounding box coordinates of text 95 | layerNames = [ 96 | "feature_fusion/Conv_7/Sigmoid", 97 | "feature_fusion/concat_3"] 98 | 99 | # load the pre-trained EAST text detector 100 | print("[INFO] loading EAST text detector...") 101 | net = cv2.dnn.readNet(east_model) 102 | 103 | # if a video path was not supplied, grab the reference to the web cam 104 | if not args.get("video", False): 105 | print("[INFO] starting video stream...") 106 | vs = VideoStream(src=0).start() 107 | time.sleep(1.0) 108 | 109 | # otherwise, grab a reference to the video file 110 | else: 111 | vs = cv2.VideoCapture(args["video"]) 112 | 113 | # start the FPS throughput estimator 114 | fps = FPS().start() 115 | 116 | # loop over frames from the video stream 117 | while True: 118 | # grab the current frame, then handle if we are using a 119 | # VideoStream or VideoCapture object 120 | frame = vs.read() 121 | frame = frame[1] if args.get("video", False) else frame 122 | 123 | # check to see if we have reached the end of the stream 124 | if frame is None: 125 | break 126 | 127 | # resize the frame, maintaining the aspect ratio 128 | frame = imutils.resize(frame, width=1000) 129 | orig = frame.copy() 130 | 131 | # if our frame dimensions are None, we still need to compute the 132 | # ratio of old frame dimensions to new frame dimensions 133 | if W is None or H is None: 134 | (H, W) = frame.shape[:2] 135 | rW = W / float(newW) 136 | rH = H / float(newH) 137 | 138 | # resize the frame, this time ignoring aspect ratio 139 | frame = cv2.resize(frame, (newW, newH)) 140 | 141 | # construct a blob from the frame and then perform a forward pass 142 | # of the model to obtain the two output layer sets 143 | blob = cv2.dnn.blobFromImage(frame, 1.0, (newW, newH), 144 | (123.68, 116.78, 103.94), swapRB=True, crop=False) 145 | net.setInput(blob) 146 | (scores, geometry) = net.forward(layerNames) 147 | 148 | # decode the predictions, then apply non-maxima suppression to 149 | # suppress weak, overlapping bounding boxes 150 | (rects, confidences) = decode_predictions(scores, geometry) 151 | boxes = non_max_suppression(np.array(rects), probs=confidences) 152 | 153 | # loop over the bounding boxes 154 | for (startX, startY, endX, endY) in boxes: 155 | # scale the bounding box coordinates based on the respective 156 | # ratios 157 | startX = int(startX * rW) 158 | startY = int(startY * rH) 159 | endX = int(endX * rW) 160 | endY = int(endY * rH) 161 | 162 | # draw the bounding box on the frame 163 | cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2) 164 | 165 | # update the FPS counter 166 | fps.update() 167 | 168 | # show the output frame 169 | cv2.imshow("Text Detection", orig) 170 | key = cv2.waitKey(1) & 0xFF 171 | 172 | # if the `q` key was pressed, break from the loop 173 | if key == ord("q"): 174 | break 175 | 176 | # stop the timer and display FPS information 177 | fps.stop() 178 | print("[INFO] elasped time: {:.2f}".format(fps.elapsed())) 179 | print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) 180 | 181 | # if we are using a webcam, release the pointer 182 | if not args.get("video", False): 183 | vs.stop() 184 | 185 | # otherwise, release the file pointer 186 | else: 187 | vs.release() 188 | 189 | # close all windows 190 | cv2.destroyAllWindows() 191 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [bubble_sheet_scanner](https://github.com/zxdefying/OpenCV_project/blob/master/bubble_sheet_scanner/bubble_sheet_scanner.ipynb) 2 | 3 | [canny_edge_detection](https://github.com/zxdefying/OpenCV_project/tree/master/canny_edge_detection) 4 | 5 | [document_scanner](https://github.com/zxdefying/OpenCV_project/tree/master/document_scanner) 6 | 7 | [perspective_transform](https://github.com/zxdefying/OpenCV_project/tree/master/perspective_transform) 8 | 9 | -------------------------------------------------------------------------------- /Using_Tesseract_OCR/images/example_blur.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/Using_Tesseract_OCR/images/example_blur.png -------------------------------------------------------------------------------- /Using_Tesseract_OCR/images/example_thresh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/Using_Tesseract_OCR/images/example_thresh.png -------------------------------------------------------------------------------- /Using_Tesseract_OCR/images/receipt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/Using_Tesseract_OCR/images/receipt.png -------------------------------------------------------------------------------- /Using_Tesseract_OCR/pytesseract_test.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import pytesseract 3 | import cv2 4 | import os 5 | 6 | 7 | img_path = "images/example_thresh.png" 8 | preprocess = "thresh" 9 | 10 | # img_path = "images/receipt.png" 11 | # preprocess = "blur" 12 | 13 | image = cv2.imread(img_path) 14 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 15 | 16 | # apply thresholding to segment the foreground from the background 17 | # 这种阈值方法对于读取覆盖在灰色形状上的暗文本非常有用 18 | if preprocess == "thresh": 19 | gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] 20 | 21 | # median blurring to remove salt and pepper noise 22 | elif preprocess == "blur": 23 | gray = cv2.medianBlur(gray, 3) 24 | 25 | # save the grayscale image as a temporary file so we can apply OCR to it 26 | filename = os.path.join("images", "postprocess.png") 27 | cv2.imwrite(filename, gray) 28 | 29 | # load the image as a PIL/Pillow image, apply OCR, then delete the temporary file 30 | # convert the contents of the image into string 31 | text = pytesseract.image_to_string(Image.open(filename)) 32 | os.remove(filename) 33 | print(text) 34 | 35 | # show the output images 36 | cv2.imshow("Image", image) 37 | cv2.imshow("Output", gray) 38 | cv2.waitKey(0) 39 | -------------------------------------------------------------------------------- /bubble_sheet_scanner/images/bubble_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/bubble_sheet_scanner/images/bubble_test.png -------------------------------------------------------------------------------- /bubble_sheet_scanner/images/edged.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/bubble_sheet_scanner/images/edged.png -------------------------------------------------------------------------------- /bubble_sheet_scanner/images/omr_mask.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/bubble_sheet_scanner/images/omr_mask.gif -------------------------------------------------------------------------------- /bubble_sheet_scanner/images/omr_multiple_bubbles.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/bubble_sheet_scanner/images/omr_multiple_bubbles.jpg -------------------------------------------------------------------------------- /bubble_sheet_scanner/images/omr_no_bubbles.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/bubble_sheet_scanner/images/omr_no_bubbles.jpg -------------------------------------------------------------------------------- /bubble_sheet_scanner/origin.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/bubble_sheet_scanner/origin.jpg -------------------------------------------------------------------------------- /canny_edge_detection/69914.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/canny_edge_detection/69914.png -------------------------------------------------------------------------------- /canny_edge_detection/canny_edge_detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "https://docs.opencv.org/3.1.0/da/d22/tutorial_py_canny.html\n", 8 | "\n", 9 | "https://www.meccanismocomplesso.org/en/opencv-python-canny-edge-detection/\n", 10 | "\n", 11 | "https://pythonprogramming.net/template-matching-python-opencv-tutorial/?completed=/canny-edge-detection-gradients-python-opencv-tutorial/" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "image gradients and edge detection. Image gradients can be used to measure directional intensity. \n", 19 | "We can use these gradients to convert to pure edges, can also use [Canny Edge detection](https://docs.opencv.org/master/dd/d1a/group__imgproc__feature.html#ga04723e007ed888ddf11d9ba04e2232de)." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import cv2\n", 29 | "import numpy as np\n", 30 | "\n", 31 | "frame = cv2.imread('img11.jpg',1)\n", 32 | "frame = cv2.resize(frame, dsize=None, fx=0.3, fy=0.3)\n", 33 | "hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)\n", 34 | "\n", 35 | "lower_red = np.array([30,150,50])\n", 36 | "upper_red = np.array([255,255,180])\n", 37 | "\n", 38 | "mask = cv2.inRange(hsv, lower_red, upper_red)\n", 39 | "res = cv2.bitwise_and(frame,frame, mask= mask)\n", 40 | "\n", 41 | "kernel = np.ones((5,5),np.uint8)\n", 42 | "erosion = cv2.erode(mask,kernel,iterations=1)\n", 43 | "dilation = cv2.dilate(mask,kernel,iterations=1)\n", 44 | "\n", 45 | "opening = cv2.morphologyEx(mask,cv2.MORPH_OPEN,kernel)\n", 46 | "closing = cv2.morphologyEx(mask,cv2.MORPH_CLOSE,kernel)\n", 47 | "\n", 48 | "laplacian = cv2.Laplacian(frame,cv2.CV_64F)\n", 49 | "sobelx = cv2.Sobel(frame,cv2.CV_64F,1,0,ksize=5)\n", 50 | "sobely = cv2.Sobel(frame,cv2.CV_64F,0,1,ksize=5)\n", 51 | "\n", 52 | "while True:\n", 53 | " cv2.imshow('Original',frame)\n", 54 | " # cv2.imshow('Mask',res)\n", 55 | " # cv2.imshow('erosion',erosion)\n", 56 | " # cv2.imshow('dilation',dilation)\n", 57 | " # cv2.imshow('opening',opening)\n", 58 | " # cv2.imshow('closing',closing)\n", 59 | " cv2.imshow('laplacian',laplacian)\n", 60 | " cv2.imshow('sobelx',sobelx)\n", 61 | " cv2.imshow('sobely',sobely)\n", 62 | "\n", 63 | " k = cv2.waitKey(0)\n", 64 | " if k == ord('q'):\n", 65 | " break\n", 66 | "\n", 67 | "cv2.destroyAllWindows()" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 4, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "# canny边缘检测:1.灰度处理,2.高斯滤波(去除噪声),3.调用canny方法\n", 77 | "\n", 78 | "import cv2\n", 79 | "import numpy as np\n", 80 | "\n", 81 | "frame = cv2.imread('img11.jpg',1)\n", 82 | "frame = cv2.resize(frame, dsize=None, fx=0.3, fy=0.3)\n", 83 | "gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n", 84 | "\n", 85 | "# 使用高斯模糊消除高频噪声,3x3内核\n", 86 | "imgG = cv2.GaussianBlur(gray,(3,3),0)\n", 87 | "# 查找图像边缘,图片某处经过卷积后的值与阈值进行比较\n", 88 | "edges = cv2.Canny(imgG,20,100) #img, lower, upper\n", 89 | "\n", 90 | "while True:\n", 91 | " cv2.imshow('Original',frame)\n", 92 | " cv2.imshow('Canny',edges)\n", 93 | "\n", 94 | " k = cv2.waitKey(0)\n", 95 | " if k == ord('q'):\n", 96 | " break\n", 97 | "\n", 98 | "cv2.destroyAllWindows()" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "## Zero-parameter, automatic Canny edge detection\n", 106 | "\n", 107 | "it’s clear that the automatic, zero-parameter version of the Canny edge detection obtains the best results with the least effort.\n", 108 | "\n", 109 | "[zero-parameter-automatic-canny-edge-detection-with-python-and-opencv](https://www.pyimagesearch.com/2015/04/06/zero-parameter-automatic-canny-edge-detection-with-python-and-opencv/),auto_canny.py" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "### The Canny edge detector\n", 117 | "\n", 118 | "The Canny edge detector is still widely used today was one of the default edge detectors in image processing.\n", 119 | "\n", 120 | "The Canny edge detection algorithm can be broken down into 5 steps:\n", 121 | "\n", 122 | "1. Smooth the image using a Gaussian filter to remove high frequency noise.\n", 123 | "2. Compute the gradient intensity representations of the image.\n", 124 | "3. Apply non-maximum suppression to remove “false” responses to to edge detection.\n", 125 | "4. Apply thresholding using a lower and upper boundary on the gradient values.\n", 126 | "5. Track edges using hysteresis by suppressing weak edges that are not connected to strong edges.\n", 127 | "\n", 128 | "The OpenCV implementation of the Canny edge detector: `cv2.canny(image, lower, upper)`.\n", 129 | "\n", 130 | "The lower and upper are integer thresholds. The problem becomes determining optimal lower and upper threshold values.\n", 131 | "\n", 132 | "It is important when processing multiple images with different contents captured under varying lighting conditions.\n", 133 | "\n", 134 | "This is a trick that relies on basic statistics that can remove the manual tuning of the thresholds for Canny edge detection. This trick will save time for parameter tuning — and still get a nice Canny edge map after applying the function." 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 5, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "import numpy as np\n", 144 | "import argparse # 解析命令行参数\n", 145 | "import glob # 获得图像的磁盘路径\n", 146 | "import cv2\n", 147 | "\n", 148 | "# image是单通道图像\n", 149 | "# sigma可以用来改变基于简单统计数据确定的百分比阈值\n", 150 | "def auto_canny(image, sigma=0.33):\n", 151 | " # compute the median of the single channel pixel intensities\n", 152 | " v = np.median(image)\n", 153 | "\n", 154 | " # construct two thresholds using the median\n", 155 | " lower = int(max(0, (1.0 - sigma) * v))\n", 156 | " upper = int(min(255, (1.0 + sigma) * v))\n", 157 | " # 边缘检测,灰度值小于lower的会被丢弃,大于upper的会被保留,之间的部分自动检测\n", 158 | " edged = cv2.Canny(image, lower, upper)\n", 159 | " print(\"lower:\", lower, \"upper:\", upper)\n", 160 | "\n", 161 | " # return the edged image\n", 162 | " return edged" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 8, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "name": "stdout", 172 | "output_type": "stream", 173 | "text": [ 174 | "lower: 53 upper: 106\n" 175 | ] 176 | } 177 | ], 178 | "source": [ 179 | "# load the image, convert it to grayscale, and blur it slightly\n", 180 | "image = cv2.imread('img11.jpg')\n", 181 | "image = cv2.resize(image, dsize=None, fx=0.3, fy=0.3)\n", 182 | "gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", 183 | "blurred = cv2.GaussianBlur(gray, (3, 3), 0)\n", 184 | "\n", 185 | "# apply Canny edge detection\n", 186 | "wide = cv2.Canny(blurred, 10, 200) # A wide threshold\n", 187 | "tight = cv2.Canny(blurred, 225, 250) # A tight threshold\n", 188 | "auto = auto_canny(blurred) # automatically determined threshold\n", 189 | "\n", 190 | "# show the images\n", 191 | "while True:\n", 192 | " cv2.imshow(\"Original\", image)\n", 193 | " cv2.imshow(\"Edges\", np.hstack([wide, tight, auto]))\n", 194 | " k = cv2.waitKey(0)\n", 195 | " if k==ord('q'):\n", 196 | " break\n", 197 | "\n", 198 | "cv2.destroyAllWindows()" 199 | ] 200 | } 201 | ], 202 | "metadata": { 203 | "kernelspec": { 204 | "display_name": "Python 3", 205 | "language": "python", 206 | "name": "python3" 207 | }, 208 | "language_info": { 209 | "codemirror_mode": { 210 | "name": "ipython", 211 | "version": 3 212 | }, 213 | "file_extension": ".py", 214 | "mimetype": "text/x-python", 215 | "name": "python", 216 | "nbconvert_exporter": "python", 217 | "pygments_lexer": "ipython3", 218 | "version": "3.6.7" 219 | } 220 | }, 221 | "nbformat": 4, 222 | "nbformat_minor": 2 223 | } 224 | -------------------------------------------------------------------------------- /canny_edge_detection/img11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/canny_edge_detection/img11.jpg -------------------------------------------------------------------------------- /distance_to_camera/2ft.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/distance_to_camera/2ft.jpg -------------------------------------------------------------------------------- /distance_to_camera/distance_to_camera.py: -------------------------------------------------------------------------------- 1 | from imutils import paths 2 | import numpy as np 3 | import imutils 4 | import cv2 5 | 6 | # initialize the known distance from the camera to the object, which 7 | # in this case is 24 inches 8 | KNOWN_DISTANCE = 24.0 9 | 10 | # initialize the known object width, which in this case, the piece of 11 | # paper is 12 inches wide 12 | KNOWN_WIDTH = 11.0 13 | 14 | def get_focalLength(): 15 | # load the furst image that contains an object that is KNOWN TO BE 2 feet 16 | # from our camera, then find the paper marker in the image, and initialize 17 | # the focal length 18 | image = cv2.imread("./2ft.jpg") 19 | marker = find_marker(image) 20 | focalLength = (marker[1][0] * KNOWN_DISTANCE) / KNOWN_WIDTH 21 | 22 | return focalLength 23 | 24 | 25 | def find_marker(image): 26 | # convert the image to grayscale, blur it, and detect edges 27 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 28 | gray = cv2.GaussianBlur(gray, (5, 5), 0) 29 | edged = cv2.Canny(gray, 35, 125) 30 | 31 | # the contour of paper is not closed, so apply close operation(dilate and erode) 32 | kernel = np.ones((3, 3), np.uint8) 33 | close = cv2.morphologyEx(edged, cv2.MORPH_CLOSE, kernel) 34 | 35 | # find the contours in the edged image and keep the largest one; 36 | # we'll assume that this is our piece of paper in the image 37 | cnts = cv2.findContours(close.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) 38 | cnts = imutils.grab_contours(cnts) 39 | c = max(cnts, key = cv2.contourArea) 40 | 41 | # compute the bounding box of the of the paper region and return it 42 | return cv2.minAreaRect(c) 43 | 44 | 45 | def distance_to_camera(knownWidth, focalLength, perWidth): 46 | # compute and return the distance from the maker to the camera 47 | return (knownWidth * focalLength) / perWidth 48 | 49 | 50 | if __name__ == "__main__": 51 | 52 | focalLength = get_focalLength() 53 | 54 | # loop over the images 55 | for imagePath in sorted(paths.list_images("images")): 56 | # load the image, find the marker in the image, then compute the 57 | # distance to the marker from the camera 58 | image = cv2.imread(imagePath) 59 | marker = find_marker(image) 60 | inches = distance_to_camera(KNOWN_WIDTH, focalLength, marker[1][0]) 61 | 62 | # draw a bounding box around the image and display it 63 | box = cv2.cv.BoxPoints(marker) if imutils.is_cv2() else cv2.boxPoints(marker) 64 | box = np.int0(box) 65 | cv2.drawContours(image, [box], -1, (0, 255, 0), 2) 66 | cv2.putText(image, "%.2fft" % (inches / 12), 67 | (image.shape[1] - 200, image.shape[0] - 20), cv2.FONT_HERSHEY_SIMPLEX, 68 | 2.0, (0, 255, 0), 3) 69 | cv2.imshow(imagePath.split('/')[-1], image) 70 | 71 | if cv2.waitKey(0) & 0xFF == ord('q'): 72 | cv2.destroyAllWindows() 73 | -------------------------------------------------------------------------------- /distance_to_camera/distance_to_camera_edged.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/distance_to_camera/distance_to_camera_edged.jpg -------------------------------------------------------------------------------- /distance_to_camera/images/3ft.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/distance_to_camera/images/3ft.jpg -------------------------------------------------------------------------------- /distance_to_camera/images/4ft.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/distance_to_camera/images/4ft.jpg -------------------------------------------------------------------------------- /document_scanner/a4paper.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/document_scanner/a4paper.jpg -------------------------------------------------------------------------------- /document_scanner/receipt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/document_scanner/receipt.png -------------------------------------------------------------------------------- /document_scanner/transform.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | def order_points(pts): 6 | # initialzie a list of coordinates that will be ordered 7 | # such that the first entry in the list is the top-left, 8 | # the second entry is the top-right, the third is the 9 | # bottom-right, and the fourth is the bottom-left 10 | rect = np.zeros((4, 2), dtype = "float32") 11 | 12 | # the top-left point will have the smallest sum, whereas 13 | # the bottom-right point will have the largest sum 14 | s = pts.sum(axis = 1) 15 | rect[0] = pts[np.argmin(s)] 16 | rect[2] = pts[np.argmax(s)] 17 | 18 | # now, compute the difference between the points, the 19 | # top-right point will have the smallest difference, 20 | # whereas the bottom-left will have the largest difference 21 | diff = np.diff(pts, axis = 1) 22 | rect[1] = pts[np.argmin(diff)] 23 | rect[3] = pts[np.argmax(diff)] 24 | 25 | # return the ordered coordinates 26 | return rect 27 | 28 | 29 | def four_point_transform(image, pts): 30 | # obtain a consistent order of the points and unpack them 31 | # individually 32 | rect = order_points(pts) 33 | (tl, tr, br, bl) = rect 34 | 35 | # compute the width of the new image, which will be the 36 | # maximum distance between bottom-right and bottom-left 37 | # x-coordiates or the top-right and top-left x-coordinates 38 | widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) 39 | widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) 40 | maxWidth = max(int(widthA), int(widthB)) 41 | 42 | # compute the height of the new image, which will be the 43 | # maximum distance between the top-right and bottom-right 44 | # y-coordinates or the top-left and bottom-left y-coordinates 45 | heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) 46 | heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) 47 | maxHeight = max(int(heightA), int(heightB)) 48 | 49 | # now that we have the dimensions of the new image, construct 50 | # the set of destination points to obtain a "birds eye view", 51 | # (i.e. top-down view) of the image, again specifying points 52 | # in the top-left, top-right, bottom-right, and bottom-left 53 | # order 54 | dst = np.array([ 55 | [0, 0], 56 | [maxWidth - 1, 0], 57 | [maxWidth - 1, maxHeight - 1], 58 | [0, maxHeight - 1]], dtype = "float32") 59 | 60 | # compute the perspective transform matrix and then apply it 61 | M = cv2.getPerspectiveTransform(rect, dst) 62 | warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) 63 | 64 | # return the warped image 65 | return warped 66 | 67 | -------------------------------------------------------------------------------- /perspective_transform/birdseye.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/perspective_transform/birdseye.jpg -------------------------------------------------------------------------------- /perspective_transform/original.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/demuxin/OpenCV_project/1a87a32deeceb55f933a33c5fe7380d6b51613a0/perspective_transform/original.png -------------------------------------------------------------------------------- /perspective_transform/transform.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | def order_points(pts): 6 | # initialzie a list of coordinates that will be ordered 7 | # such that the first entry in the list is the top-left, 8 | # the second entry is the top-right, the third is the 9 | # bottom-right, and the fourth is the bottom-left 10 | rect = np.zeros((4, 2), dtype = "float32") 11 | 12 | # the top-left point will have the smallest sum, whereas 13 | # the bottom-right point will have the largest sum 14 | s = pts.sum(axis = 1) 15 | rect[0] = pts[np.argmin(s)] 16 | rect[2] = pts[np.argmax(s)] 17 | 18 | # now, compute the difference between the points, the 19 | # top-right point will have the smallest difference, 20 | # whereas the bottom-left will have the largest difference 21 | diff = np.diff(pts, axis = 1) 22 | rect[1] = pts[np.argmin(diff)] 23 | rect[3] = pts[np.argmax(diff)] 24 | 25 | # return the ordered coordinates 26 | return rect 27 | 28 | 29 | def four_point_transform(image, pts): 30 | # obtain a consistent order of the points and unpack them 31 | # individually 32 | rect = order_points(pts) 33 | (tl, tr, br, bl) = rect 34 | 35 | # compute the width of the new image, which will be the 36 | # maximum distance between bottom-right and bottom-left 37 | # x-coordiates or the top-right and top-left x-coordinates 38 | widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) 39 | widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) 40 | maxWidth = max(int(widthA), int(widthB)) 41 | 42 | # compute the height of the new image, which will be the 43 | # maximum distance between the top-right and bottom-right 44 | # y-coordinates or the top-left and bottom-left y-coordinates 45 | heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) 46 | heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) 47 | maxHeight = max(int(heightA), int(heightB)) 48 | 49 | # now that we have the dimensions of the new image, construct 50 | # the set of destination points to obtain a "birds eye view", 51 | # (i.e. top-down view) of the image, again specifying points 52 | # in the top-left, top-right, bottom-right, and bottom-left 53 | # order 54 | dst = np.array([ 55 | [0, 0], 56 | [maxWidth - 1, 0], 57 | [maxWidth - 1, maxHeight - 1], 58 | [0, maxHeight - 1]], dtype = "float32") 59 | 60 | # compute the perspective transform matrix and then apply it 61 | M = cv2.getPerspectiveTransform(rect, dst) 62 | warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) 63 | 64 | # return the warped image 65 | return warped 66 | 67 | --------------------------------------------------------------------------------