├── img ├── dog.jpg ├── A_abcd.JPG ├── iphone.jpg ├── starrynight.jpg └── GraderPage-Answered.jpg ├── detection ├── yolov3.weights ├── coco.names └── yolov3.cfg ├── Video1.cs ├── README.md ├── Video4.cs ├── Video3.cs ├── Video2.cs ├── Video5.cs └── Video6.cs /img/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Programming-With-Chris/OpenCVTutorials/HEAD/img/dog.jpg -------------------------------------------------------------------------------- /img/A_abcd.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Programming-With-Chris/OpenCVTutorials/HEAD/img/A_abcd.JPG -------------------------------------------------------------------------------- /img/iphone.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Programming-With-Chris/OpenCVTutorials/HEAD/img/iphone.jpg -------------------------------------------------------------------------------- /img/starrynight.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Programming-With-Chris/OpenCVTutorials/HEAD/img/starrynight.jpg -------------------------------------------------------------------------------- /img/GraderPage-Answered.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Programming-With-Chris/OpenCVTutorials/HEAD/img/GraderPage-Answered.jpg -------------------------------------------------------------------------------- /detection/yolov3.weights: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:523e4e69e1d015393a1b0a441cef1d9c7659e3eb2d7e15f793f060a21b32f297 3 | size 248007048 4 | -------------------------------------------------------------------------------- /Video1.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Emgu.CV; 3 | 4 | 5 | namespace myNamespace 6 | { 7 | class Video1 8 | { 9 | /*static void Main(string[] args) 10 | { 11 | Mat pic = new Mat(); 12 | 13 | pic = CvInvoke.Imread("./img/starrynight.jpg"); 14 | 15 | Mat gaussianBlur = new Mat(); 16 | 17 | CvInvoke.GaussianBlur(pic, gaussianBlur, new System.Drawing.Size(3, 3), 7.0); 18 | 19 | CvInvoke.Imshow("starry night", pic); 20 | CvInvoke.Imshow("blurry night", gaussianBlur); 21 | 22 | 23 | CvInvoke.WaitKey(); 24 | }*/ 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /detection/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenCVTutorials 2 | 3 | 4 | Video 1 link - [Youtube](https://youtu.be/YTBAjP-0Fto) - Covers the Mat structure, reading and displaying an image, and how to do a basic Gaussian Blur on an image. 5 | 6 | Video 2 link - [Youtube](https://youtu.be/LARuIN9dask) - Covers Image Manipulation, resizing and rotation, changing the colors of an image, and applying Convolution kernels to an image to produce a desired effect. 7 | 8 | Video 3 link - [Youtube](https://youtu.be/wuQsW-LZ3kw) - Covers Edge Detection and Contours, Sobel, Canny, and then using contours to fill an image. 9 | 10 | Video 4 link - [Youtube](https://youtu.be/AAV3a_ngSUc) - Covers Template Matching in Emgu and OpenCV, and then shows an example of using that to visually grade a student's multiple choice test. 11 | 12 | Video 5 link - [Youtube](https://www.youtube.com/watch?v=JjQgemNtEZA) - Covers Video, how to capture from a webcam, and then how to manipulate frames to change the video stream shown. 13 | 14 | Video 6 link - [Youtube](https://youtu.be/v7_g1Zoapkg) - Covers Object Detection with Haar Cascade Classifiers and the YOLO Object Detection algorithm. 15 | -------------------------------------------------------------------------------- /Video4.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Emgu.CV; 3 | using Emgu.CV.Structure; 4 | 5 | namespace ourNamespace 6 | { 7 | class Program 8 | { 9 | /*static void Main(string[] args) 10 | { 11 | Mat answeredPic = CvInvoke.Imread("./img/GraderPage-Answered.jpg"); 12 | Mat aWasAnswered = CvInvoke.Imread("./img/A_abcd.jpg"); 13 | 14 | CvInvoke.Resize(answeredPic, answeredPic, new System.Drawing.Size(0,0), .7d, .7d); 15 | CvInvoke.Resize(aWasAnswered, aWasAnswered, new System.Drawing.Size(0,0), .7d, .7d); 16 | 17 | Mat templateOutput = new Mat(); 18 | 19 | CvInvoke.MatchTemplate(answeredPic, aWasAnswered, templateOutput, Emgu.CV.CvEnum.TemplateMatchingType.CcoeffNormed); 20 | 21 | double minVal = 0.0d; 22 | double maxVal = 0.0d; 23 | System.Drawing.Point minLoc = new System.Drawing.Point(); 24 | System.Drawing.Point maxLoc = new System.Drawing.Point(); 25 | 26 | CvInvoke.MinMaxLoc(templateOutput, ref minVal, ref maxVal, ref minLoc, ref maxLoc); 27 | 28 | CvInvoke.Threshold(templateOutput, templateOutput, 0.85, 1, Emgu.CV.CvEnum.ThresholdType.ToZero); 29 | 30 | var matches = templateOutput.ToImage(); 31 | 32 | for (int i = 0; i < matches.Rows; i++) 33 | { 34 | for (int j = 0; j < matches.Cols; j++) 35 | { 36 | if (matches[i, j].Intensity > .8) 37 | { 38 | System.Drawing.Point loc = new System.Drawing.Point(j, i); 39 | System.Drawing.Rectangle box = new System.Drawing.Rectangle(loc, aWasAnswered.Size); 40 | 41 | CvInvoke.Rectangle(answeredPic, box, new Emgu.CV.Structure.MCvScalar(0, 255, 0), 2); 42 | } 43 | } 44 | } 45 | 46 | CvInvoke.Imshow("templates detected", answeredPic); 47 | CvInvoke.Imshow("templateOutput", templateOutput); 48 | 49 | CvInvoke.WaitKey(); 50 | 51 | }*/ 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /Video3.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Emgu.CV; 3 | using Emgu.CV.Structure; 4 | using Emgu.CV.Util; 5 | 6 | namespace myNamespace 7 | { 8 | class Program 9 | { 10 | /* static void Main(string[] args) 11 | { 12 | Mat pic = CvInvoke.Imread("./img/dog.jpg"); 13 | 14 | Mat gaussianBlur = new Mat(); 15 | Mat sobelX = new Mat(); 16 | Mat sobelY = new Mat(); 17 | Mat sobelXY = new Mat(); 18 | 19 | pic.CopyTo(sobelX); 20 | pic.CopyTo(sobelY); 21 | pic.CopyTo(sobelXY); 22 | 23 | CvInvoke.GaussianBlur(pic, gaussianBlur, new System.Drawing.Size(3,3), 5.0); 24 | 25 | CvInvoke.Sobel(gaussianBlur, sobelX, Emgu.CV.CvEnum.DepthType.Default, 1, 0, 5); 26 | CvInvoke.Sobel(gaussianBlur, sobelY, Emgu.CV.CvEnum.DepthType.Default, 0, 1, 5); 27 | CvInvoke.Sobel(gaussianBlur, sobelXY, Emgu.CV.CvEnum.DepthType.Default, 1, 1, 5); 28 | 29 | //CvInvoke.Imshow("sobelX", sobelX); 30 | //CvInvoke.Imshow("sobelY", sobelY); 31 | //CvInvoke.Imshow("sobelXY", sobelXY); 32 | 33 | //CvInvoke.WaitKey(); 34 | 35 | 36 | Mat cannyPic = new Mat(); 37 | 38 | var average = pic.ToImage().GetAverage(); 39 | 40 | var lowerthreshold = Math.Max(0, (1.0 - 0.33) * average.Intensity); 41 | var upperthreshold = Math.Max(255, (1.0 + 0.33) * average.Intensity); 42 | 43 | 44 | 45 | CvInvoke.Canny(gaussianBlur, cannyPic, lowerthreshold, upperthreshold, 3); 46 | 47 | //CvInvoke.Imshow("canny", cannyPic); 48 | 49 | //CvInvoke.WaitKey(); 50 | 51 | 52 | Mat iphone = CvInvoke.Imread("./img/iphone.jpg"); 53 | 54 | VectorOfVectorOfPoint contours = new VectorOfVectorOfPoint(); 55 | 56 | Mat thresholdPic = new Mat(); 57 | Mat hierarchy = new Mat(); 58 | 59 | Image grayPhone = iphone.ToImage(); 60 | 61 | CvInvoke.Threshold(grayPhone, thresholdPic, 210, 255, Emgu.CV.CvEnum.ThresholdType.Binary); 62 | 63 | CvInvoke.Imshow("threshold", thresholdPic); 64 | 65 | CvInvoke.FindContours(thresholdPic, contours, hierarchy, Emgu.CV.CvEnum.RetrType.Tree, Emgu.CV.CvEnum.ChainApproxMethod.ChainApproxNone); 66 | //CvInvoke.DrawContours(iphone, contours, -1, new MCvScalar(0,255,0), 2); 67 | 68 | CvInvoke.FillPoly(iphone, contours, new MCvScalar(255, 100, 100)); 69 | 70 | CvInvoke.Imshow("iphone", iphone); 71 | 72 | CvInvoke.WaitKey(); 73 | 74 | }*/ 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /Video2.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Emgu.CV; 3 | using Emgu.CV.CvEnum; 4 | using Emgu.CV.Structure; 5 | 6 | 7 | namespace MyNamespace 8 | { 9 | class Program 10 | { 11 | /*static void Main(string[] args) 12 | { 13 | Mat pic = CvInvoke.Imread("./img/starrynight.jpg"); 14 | Mat resizedPic = new Mat(); 15 | 16 | int height = pic.Rows; 17 | int width = pic.Cols; 18 | 19 | Console.WriteLine($"starry night is : {height} x {width}"); 20 | 21 | CvInvoke.Resize(pic, resizedPic, new System.Drawing.Size(400, 500)); 22 | 23 | CvInvoke.Imshow("starry night", pic); 24 | CvInvoke.Imshow("resized night", resizedPic); 25 | 26 | CvInvoke.WaitKey(); 27 | 28 | 29 | double angleFourtyFive = 45d; 30 | 31 | System.Drawing.PointF center = new System.Drawing.PointF((width - 1) / 2.0f, (height - 1) / 2.0f); 32 | Mat rotationMatrix = new Mat(); 33 | 34 | CvInvoke.GetRotationMatrix2D(center, angleFourtyFive, 1.0, rotationMatrix); 35 | Mat rotatedPic = new Mat(); 36 | 37 | CvInvoke.WarpAffine(pic, rotatedPic, rotationMatrix, new System.Drawing.Size(width, height)); 38 | 39 | CvInvoke.Imshow("rotated night", rotatedPic); 40 | 41 | CvInvoke.WaitKey(); 42 | 43 | 44 | Image convertPic = pic.ToImage(); 45 | 46 | var image = convertPic.InRange(new Bgr(75, 0, 0), new Bgr(255, 125, 125)); 47 | 48 | for (int i = 0; i < image.Rows; i++) 49 | { 50 | for (int j = 0; j < image.Cols; j++) 51 | { 52 | var num = image[i, j]; 53 | 54 | if (num.Intensity > 0) 55 | { 56 | convertPic[i, j] = new Bgr(convertPic[i,j].MCvScalar.V0 - 50, convertPic[i,j].MCvScalar.V1 - 50, convertPic[i,j].MCvScalar.V2 + 100); 57 | } 58 | } 59 | 60 | } 61 | 62 | Mat changedPic = convertPic.Mat; 63 | 64 | CvInvoke.Imshow("starry night", pic); 65 | CvInvoke.Imshow("color-shifted night", changedPic); 66 | 67 | CvInvoke.WaitKey(); 68 | 69 | 70 | float[,] kernelArray = new float[3, 3] { 71 | { -1, -1, -1}, 72 | { -1, 8, -1}, 73 | { -1, -1, -1} 74 | }; 75 | 76 | ConvolutionKernelF kernel = new ConvolutionKernelF(kernelArray); 77 | 78 | Mat filteredPic = new Mat(); 79 | 80 | pic.CopyTo(filteredPic); 81 | 82 | CvInvoke.Filter2D(pic, filteredPic, kernel, new System.Drawing.Point(0, 0)); 83 | 84 | CvInvoke.Imshow("convoluted night", filteredPic); 85 | CvInvoke.WaitKey(); 86 | 87 | }*/ 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /Video5.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Emgu.CV; 3 | using Emgu.CV.Structure; 4 | 5 | namespace ourNamespace 6 | { 7 | class Video5 8 | { 9 | /*static void Main(string[] args) 10 | { 11 | var vc = new VideoCapture(0, VideoCapture.API.DShow); 12 | 13 | Mat frame = new(); 14 | bool pause = false; 15 | 16 | Mat myface = new(); 17 | Mat templateOutput = new(); 18 | Mat frameGray = new(); 19 | 20 | myface = CvInvoke.Imread("./img/myface.jpg"); 21 | CvInvoke.CvtColor(myface, myface, Emgu.CV.CvEnum.ColorConversion.Bgr2Gray); 22 | 23 | while(!pause) 24 | { 25 | vc.Read(frame); 26 | 27 | /*CvInvoke.CvtColor(frame, frameGray, Emgu.CV.CvEnum.ColorConversion.Bgr2Gray); 28 | 29 | CvInvoke.MatchTemplate(frameGray, myface, templateOutput, Emgu.CV.CvEnum.TemplateMatchingType.CcoeffNormed); 30 | 31 | CvInvoke.Threshold(templateOutput, templateOutput, 0.85, 1, Emgu.CV.CvEnum.ThresholdType.ToZero); 32 | 33 | var matches = templateOutput.ToImage(); 34 | 35 | for (int i = 0; i < matches.Rows; i++) 36 | { 37 | for (int j = 0; j < matches.Cols; j++) 38 | { 39 | if (matches[i, j].Intensity > .8) { 40 | 41 | System.Drawing.Point loc = new System.Drawing.Point(j, i); 42 | 43 | System.Drawing.Rectangle box = new System.Drawing.Rectangle(loc, myface.Size); 44 | 45 | CvInvoke.Rectangle(frame, box, new Emgu.CV.Structure.MCvScalar(0, 255, 0), 2); 46 | } 47 | } 48 | } 49 | 50 | Image convertFrame = frame.ToImage(); 51 | var image = convertFrame.InRange(new Bgr(75, 0, 0), new Bgr(255, 190, 190)); 52 | 53 | for (int i = 0; i < image.Rows; i++) 54 | { 55 | for (int j = 0; j < image.Cols; j++) 56 | { 57 | var intensity = image[i, j]; 58 | 59 | if (intensity.Intensity > 0) 60 | { 61 | convertFrame[i, j] = new Bgr(convertFrame[i,j].MCvScalar.V0 - 50, convertFrame[i,j].MCvScalar.V1 - 50, convertFrame[i,j].MCvScalar.V2 + 100); 62 | } 63 | } 64 | } 65 | 66 | 67 | CvInvoke.Imshow("video", convertFrame); 68 | 69 | int keypressed = CvInvoke.WaitKey(1); 70 | if (keypressed == 27) 71 | pause = true; 72 | 73 | } 74 | 75 | 76 | }*/ 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /Video6.cs: -------------------------------------------------------------------------------- 1 | using Emgu.CV; 2 | using Emgu.CV.Structure; 3 | using Emgu.CV.Util; 4 | using Emgu.CV.Dnn; 5 | 6 | namespace ourNamespace; 7 | public class Video6 8 | { 9 | static void Main(string[] args) 10 | { 11 | 12 | var net = Emgu.CV.Dnn.DnnInvoke.ReadNetFromDarknet("./detection/yolov3.cfg", "./detection/yolov3.weights"); 13 | var classLabels = File.ReadAllLines("./detection/coco.names"); 14 | 15 | net.SetPreferableBackend(Emgu.CV.Dnn.Backend.OpenCV); 16 | net.SetPreferableTarget(Emgu.CV.Dnn.Target.Cpu); 17 | 18 | var vc = new VideoCapture(0, VideoCapture.API.DShow); 19 | 20 | Mat frame = new(); 21 | VectorOfMat output = new(); 22 | 23 | VectorOfRect boxes = new(); 24 | VectorOfFloat scores = new(); 25 | VectorOfInt indices = new(); 26 | 27 | while(true) 28 | { 29 | vc.Read(frame); 30 | 31 | CvInvoke.Resize(frame, frame, new System.Drawing.Size(0,0), .4, .4); 32 | 33 | boxes = new(); 34 | indices = new(); 35 | scores = new(); 36 | 37 | var image = frame.ToImage(); 38 | 39 | var input = DnnInvoke.BlobFromImage(image, 1/255.0, swapRB: true); 40 | 41 | net.SetInput(input); 42 | 43 | net.Forward(output, net.UnconnectedOutLayersNames); 44 | 45 | for(int i = 0; i < output.Size; i++) 46 | { 47 | var mat = output[i]; 48 | var data = (float[,]) mat.GetData(); 49 | 50 | for (int j = 0; j < data.GetLength(0); j++) 51 | { 52 | float[] row = Enumerable.Range(0, data.GetLength(1)) 53 | .Select(x => data[j, x]) 54 | .ToArray(); 55 | 56 | var rowScore = row.Skip(5).ToArray(); 57 | var classId = rowScore.ToList().IndexOf(rowScore.Max()); 58 | var confidence = rowScore[classId]; 59 | 60 | if (confidence > 0.8f) 61 | { 62 | var centerX = (int) (row[0] * frame.Width); 63 | var centerY = (int) (row[1] * frame.Height); 64 | var boxWidth = (int) (row[2] * frame.Width); 65 | var boxHeight = (int) (row[3] * frame.Height); 66 | 67 | var x = (int)(centerX - (boxWidth / 2)); 68 | var y = (int)(centerY - (boxHeight / 2)); 69 | 70 | boxes.Push(new System.Drawing.Rectangle[] { new System.Drawing.Rectangle(x, y, boxWidth, boxHeight)}); 71 | indices.Push( new int[] {classId}); 72 | scores.Push( new float[] { confidence}); 73 | } 74 | 75 | } 76 | 77 | } 78 | 79 | var bestIndex = DnnInvoke.NMSBoxes(boxes.ToArray(), scores.ToArray(), .8f, .8f); 80 | 81 | var frameOut = frame.ToImage(); 82 | 83 | for (int i = 0; i < bestIndex.Length; i++) 84 | { 85 | int index = bestIndex[i]; 86 | var box = boxes[index]; 87 | CvInvoke.Rectangle(frameOut, box, new MCvScalar(0, 255, 0), 2); 88 | CvInvoke.PutText(frameOut, classLabels[indices[index]], new System.Drawing.Point(box.X, box.Y - 20), 89 | Emgu.CV.CvEnum.FontFace.HersheyPlain, 1.0, new MCvScalar(0, 0, 255), 2); 90 | 91 | } 92 | 93 | CvInvoke.Resize(frameOut, frameOut, new System.Drawing.Size(0,0), 4, 4); 94 | CvInvoke.Imshow("output", frameOut); 95 | 96 | if (CvInvoke.WaitKey(1) == 27) 97 | break; 98 | 99 | } 100 | 101 | 102 | /*var faceCascade = new CascadeClassifier("./detection/haarcascade_frontalface_default.xml"); 103 | var vc = new VideoCapture(0, Emgu.CV.VideoCapture.API.DShow); 104 | 105 | Mat frame = new(); 106 | Mat frameGray = new(); 107 | 108 | while(true) 109 | { 110 | vc.Read(frame); 111 | 112 | CvInvoke.CvtColor(frame, frameGray, Emgu.CV.CvEnum.ColorConversion.Bgr2Gray); 113 | 114 | var faces = faceCascade.DetectMultiScale(frameGray, 1.3, 5); 115 | 116 | if (faces is not null && faces.Length > 0) 117 | CvInvoke.Rectangle(frame, faces[0], new MCvScalar(0, 255, 0), 2); 118 | 119 | CvInvoke.Imshow("face detection", frame); 120 | 121 | if (CvInvoke.WaitKey(1) == 27) 122 | { 123 | break; 124 | } 125 | 126 | } 127 | */ 128 | } 129 | } 130 | 131 | -------------------------------------------------------------------------------- /detection/yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | 790 | --------------------------------------------------------------------------------