├── img
    ├── dog.jpg
    ├── A_abcd.JPG
    ├── iphone.jpg
    ├── starrynight.jpg
    └── GraderPage-Answered.jpg
├── detection
    ├── yolov3.weights
    ├── coco.names
    └── yolov3.cfg
├── Video1.cs
├── README.md
├── Video4.cs
├── Video3.cs
├── Video2.cs
├── Video5.cs
└── Video6.cs


/img/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Programming-With-Chris/OpenCVTutorials/HEAD/img/dog.jpg


--------------------------------------------------------------------------------
/img/A_abcd.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Programming-With-Chris/OpenCVTutorials/HEAD/img/A_abcd.JPG


--------------------------------------------------------------------------------
/img/iphone.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Programming-With-Chris/OpenCVTutorials/HEAD/img/iphone.jpg


--------------------------------------------------------------------------------
/img/starrynight.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Programming-With-Chris/OpenCVTutorials/HEAD/img/starrynight.jpg


--------------------------------------------------------------------------------
/img/GraderPage-Answered.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Programming-With-Chris/OpenCVTutorials/HEAD/img/GraderPage-Answered.jpg


--------------------------------------------------------------------------------
/detection/yolov3.weights:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:523e4e69e1d015393a1b0a441cef1d9c7659e3eb2d7e15f793f060a21b32f297
3 | size 248007048
4 | 


--------------------------------------------------------------------------------
/Video1.cs:
--------------------------------------------------------------------------------
 1 | using System;
 2 | using Emgu.CV; 
 3 | 
 4 | 
 5 | namespace myNamespace 
 6 | {
 7 |   class Video1 
 8 |   {
 9 |     /*static void Main(string[] args)
10 |     {
11 |         Mat pic = new Mat(); 
12 | 
13 |         pic = CvInvoke.Imread("./img/starrynight.jpg"); 
14 | 
15 |         Mat gaussianBlur = new Mat(); 
16 | 
17 |         CvInvoke.GaussianBlur(pic, gaussianBlur, new System.Drawing.Size(3, 3), 7.0); 
18 | 
19 |         CvInvoke.Imshow("starry night", pic); 
20 |         CvInvoke.Imshow("blurry night", gaussianBlur); 
21 | 
22 | 
23 |         CvInvoke.WaitKey(); 
24 |     }*/
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/detection/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # OpenCVTutorials
 2 | 
 3 | 
 4 | Video 1 link - [Youtube](https://youtu.be/YTBAjP-0Fto) - Covers the Mat structure, reading and displaying an image, and how to do a basic Gaussian Blur on an image. 
 5 | 
 6 | Video 2 link - [Youtube](https://youtu.be/LARuIN9dask) - Covers Image Manipulation, resizing and rotation, changing the colors of an image, and applying Convolution kernels to an image to produce a desired effect. 
 7 | 
 8 | Video 3 link - [Youtube](https://youtu.be/wuQsW-LZ3kw) - Covers Edge Detection and Contours, Sobel, Canny, and then using contours to fill an image. 
 9 | 
10 | Video 4 link - [Youtube](https://youtu.be/AAV3a_ngSUc) - Covers Template Matching in Emgu and OpenCV, and then shows an example of using that to visually grade a student's multiple choice test. 
11 | 
12 | Video 5 link - [Youtube](https://www.youtube.com/watch?v=JjQgemNtEZA) - Covers Video, how to capture from a webcam, and then how to manipulate frames to change the video stream shown.
13 | 
14 | Video 6 link - [Youtube](https://youtu.be/v7_g1Zoapkg) - Covers Object Detection with Haar Cascade Classifiers and the YOLO Object Detection algorithm. 
15 | 


--------------------------------------------------------------------------------
/Video4.cs:
--------------------------------------------------------------------------------
 1 | using System; 
 2 | using Emgu.CV;
 3 | using Emgu.CV.Structure; 
 4 | 
 5 | namespace ourNamespace 
 6 | {
 7 |   class Program 
 8 |   {
 9 |     /*static void Main(string[] args)
10 |     {
11 |         Mat answeredPic = CvInvoke.Imread("./img/GraderPage-Answered.jpg");
12 |         Mat aWasAnswered = CvInvoke.Imread("./img/A_abcd.jpg"); 
13 | 
14 |         CvInvoke.Resize(answeredPic, answeredPic, new System.Drawing.Size(0,0), .7d, .7d); 
15 |         CvInvoke.Resize(aWasAnswered, aWasAnswered, new System.Drawing.Size(0,0), .7d, .7d); 
16 | 
17 |         Mat templateOutput = new Mat(); 
18 | 
19 |         CvInvoke.MatchTemplate(answeredPic, aWasAnswered, templateOutput, Emgu.CV.CvEnum.TemplateMatchingType.CcoeffNormed); 
20 | 
21 |         double minVal = 0.0d; 
22 |         double maxVal = 0.0d; 
23 |         System.Drawing.Point minLoc = new System.Drawing.Point(); 
24 |         System.Drawing.Point maxLoc = new System.Drawing.Point(); 
25 | 
26 |         CvInvoke.MinMaxLoc(templateOutput, ref minVal, ref maxVal, ref minLoc, ref maxLoc); 
27 | 
28 |         CvInvoke.Threshold(templateOutput, templateOutput, 0.85, 1, Emgu.CV.CvEnum.ThresholdType.ToZero); 
29 | 
30 |         var matches = templateOutput.ToImage<Gray, byte>(); 
31 | 
32 |         for (int i = 0; i < matches.Rows; i++) 
33 |         {
34 |             for (int j = 0; j < matches.Cols; j++) 
35 |             {
36 |               if (matches[i, j].Intensity > .8) 
37 |               {
38 |                 System.Drawing.Point loc = new System.Drawing.Point(j, i); 
39 |                 System.Drawing.Rectangle box = new System.Drawing.Rectangle(loc, aWasAnswered.Size); 
40 | 
41 |                 CvInvoke.Rectangle(answeredPic, box, new Emgu.CV.Structure.MCvScalar(0, 255, 0), 2); 
42 |               }
43 |             }
44 |         }
45 | 
46 |         CvInvoke.Imshow("templates detected", answeredPic); 
47 |         CvInvoke.Imshow("templateOutput", templateOutput); 
48 | 
49 |         CvInvoke.WaitKey(); 
50 | 
51 |     }*/
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/Video3.cs:
--------------------------------------------------------------------------------
 1 | using System;
 2 | using Emgu.CV; 
 3 | using Emgu.CV.Structure; 
 4 | using Emgu.CV.Util; 
 5 | 
 6 | namespace myNamespace 
 7 | {
 8 |   class Program 
 9 |   {
10 |    /* static void Main(string[] args)
11 |     {
12 |        Mat pic = CvInvoke.Imread("./img/dog.jpg"); 
13 | 
14 |        Mat gaussianBlur = new Mat(); 
15 |        Mat sobelX = new Mat(); 
16 |        Mat sobelY = new Mat(); 
17 |        Mat sobelXY = new Mat(); 
18 | 
19 |        pic.CopyTo(sobelX); 
20 |        pic.CopyTo(sobelY); 
21 |        pic.CopyTo(sobelXY);
22 | 
23 |        CvInvoke.GaussianBlur(pic, gaussianBlur, new System.Drawing.Size(3,3), 5.0); 
24 | 
25 |       CvInvoke.Sobel(gaussianBlur, sobelX, Emgu.CV.CvEnum.DepthType.Default, 1, 0, 5); 
26 |       CvInvoke.Sobel(gaussianBlur, sobelY, Emgu.CV.CvEnum.DepthType.Default, 0, 1, 5); 
27 |       CvInvoke.Sobel(gaussianBlur, sobelXY, Emgu.CV.CvEnum.DepthType.Default, 1, 1, 5); 
28 | 
29 |       //CvInvoke.Imshow("sobelX", sobelX); 
30 |       //CvInvoke.Imshow("sobelY", sobelY); 
31 |       //CvInvoke.Imshow("sobelXY", sobelXY);
32 | 
33 |       //CvInvoke.WaitKey();  
34 | 
35 | 
36 |       Mat cannyPic = new Mat(); 
37 |       
38 |       var average = pic.ToImage<Gray, byte>().GetAverage(); 
39 | 
40 |       var lowerthreshold = Math.Max(0, (1.0 - 0.33) * average.Intensity); 
41 |       var upperthreshold = Math.Max(255, (1.0 + 0.33) * average.Intensity); 
42 | 
43 | 
44 | 
45 |       CvInvoke.Canny(gaussianBlur, cannyPic, lowerthreshold, upperthreshold, 3); 
46 | 
47 |       //CvInvoke.Imshow("canny", cannyPic); 
48 | 
49 |       //CvInvoke.WaitKey(); 
50 | 
51 | 
52 |       Mat iphone = CvInvoke.Imread("./img/iphone.jpg"); 
53 | 
54 |       VectorOfVectorOfPoint contours = new VectorOfVectorOfPoint(); 
55 | 
56 |       Mat thresholdPic = new Mat(); 
57 |       Mat hierarchy = new Mat(); 
58 | 
59 |       Image<Gray, byte> grayPhone = iphone.ToImage<Gray, byte>(); 
60 | 
61 |       CvInvoke.Threshold(grayPhone, thresholdPic, 210, 255, Emgu.CV.CvEnum.ThresholdType.Binary); 
62 | 
63 |       CvInvoke.Imshow("threshold", thresholdPic); 
64 | 
65 |       CvInvoke.FindContours(thresholdPic, contours, hierarchy, Emgu.CV.CvEnum.RetrType.Tree, Emgu.CV.CvEnum.ChainApproxMethod.ChainApproxNone); 
66 |       //CvInvoke.DrawContours(iphone, contours, -1, new MCvScalar(0,255,0), 2); 
67 | 
68 |       CvInvoke.FillPoly(iphone, contours, new MCvScalar(255, 100, 100)); 
69 | 
70 |       CvInvoke.Imshow("iphone", iphone); 
71 | 
72 |       CvInvoke.WaitKey(); 
73 | 
74 |     }*/
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/Video2.cs:
--------------------------------------------------------------------------------
 1 | using System;
 2 | using Emgu.CV; 
 3 | using Emgu.CV.CvEnum;
 4 | using Emgu.CV.Structure; 
 5 | 
 6 | 
 7 | namespace MyNamespace 
 8 | {
 9 |   class Program 
10 |   {
11 |     /*static void Main(string[] args)
12 |     {
13 |         Mat pic = CvInvoke.Imread("./img/starrynight.jpg"); 
14 |         Mat resizedPic = new Mat(); 
15 | 
16 |         int height = pic.Rows;
17 |         int width = pic.Cols; 
18 | 
19 |         Console.WriteLine($"starry night is : {height} x {width}"); 
20 | 
21 |         CvInvoke.Resize(pic, resizedPic, new System.Drawing.Size(400, 500)); 
22 | 
23 |         CvInvoke.Imshow("starry night", pic); 
24 |         CvInvoke.Imshow("resized night", resizedPic); 
25 | 
26 |         CvInvoke.WaitKey(); 
27 | 
28 | 
29 |         double angleFourtyFive = 45d; 
30 | 
31 |         System.Drawing.PointF center = new System.Drawing.PointF((width - 1) / 2.0f, (height - 1) / 2.0f); 
32 |         Mat rotationMatrix = new Mat(); 
33 | 
34 |         CvInvoke.GetRotationMatrix2D(center, angleFourtyFive, 1.0, rotationMatrix); 
35 |         Mat rotatedPic = new Mat(); 
36 | 
37 |         CvInvoke.WarpAffine(pic, rotatedPic, rotationMatrix, new System.Drawing.Size(width, height)); 
38 | 
39 |         CvInvoke.Imshow("rotated night", rotatedPic); 
40 | 
41 |         CvInvoke.WaitKey(); 
42 | 
43 | 
44 |         Image<Bgr, byte> convertPic = pic.ToImage<Bgr, byte>(); 
45 | 
46 |         var image = convertPic.InRange(new Bgr(75, 0, 0), new Bgr(255, 125, 125));
47 | 
48 |         for (int i = 0; i < image.Rows; i++) 
49 |         {
50 |             for (int j = 0; j < image.Cols; j++) 
51 |             {
52 |                 var num = image[i, j]; 
53 | 
54 |                 if (num.Intensity > 0) 
55 |                 {
56 |                     convertPic[i, j] = new Bgr(convertPic[i,j].MCvScalar.V0 - 50, convertPic[i,j].MCvScalar.V1 - 50, convertPic[i,j].MCvScalar.V2 + 100); 
57 |                 }
58 |             }
59 | 
60 |         } 
61 | 
62 |         Mat changedPic = convertPic.Mat; 
63 | 
64 |         CvInvoke.Imshow("starry night", pic); 
65 |         CvInvoke.Imshow("color-shifted night", changedPic); 
66 | 
67 |         CvInvoke.WaitKey(); 
68 | 
69 | 
70 |         float[,] kernelArray = new float[3, 3] {
71 |             {  -1, -1,  -1},
72 |             { -1,  8, -1},
73 |             {  -1, -1,  -1}
74 |         };
75 | 
76 |         ConvolutionKernelF kernel = new ConvolutionKernelF(kernelArray); 
77 | 
78 |         Mat filteredPic = new Mat(); 
79 | 
80 |         pic.CopyTo(filteredPic);
81 | 
82 |         CvInvoke.Filter2D(pic, filteredPic, kernel, new System.Drawing.Point(0, 0)); 
83 | 
84 |         CvInvoke.Imshow("convoluted night", filteredPic); 
85 |         CvInvoke.WaitKey(); 
86 | 
87 |     }*/
88 |   }
89 | }
90 | 


--------------------------------------------------------------------------------
/Video5.cs:
--------------------------------------------------------------------------------
 1 | using System;
 2 | using Emgu.CV; 
 3 | using Emgu.CV.Structure; 
 4 | 
 5 | namespace ourNamespace 
 6 | {
 7 |   class Video5 
 8 |   {
 9 |     /*static void Main(string[] args)
10 |     {
11 |         var vc = new VideoCapture(0, VideoCapture.API.DShow); 
12 | 
13 |         Mat frame = new(); 
14 |         bool pause = false; 
15 | 
16 |         Mat myface = new(); 
17 |         Mat templateOutput = new(); 
18 |         Mat frameGray = new(); 
19 | 
20 |         myface = CvInvoke.Imread("./img/myface.jpg"); 
21 |         CvInvoke.CvtColor(myface, myface, Emgu.CV.CvEnum.ColorConversion.Bgr2Gray);
22 | 
23 |         while(!pause) 
24 |         {
25 |             vc.Read(frame); 
26 | 
27 |             /*CvInvoke.CvtColor(frame, frameGray, Emgu.CV.CvEnum.ColorConversion.Bgr2Gray);
28 | 
29 |             CvInvoke.MatchTemplate(frameGray, myface, templateOutput, Emgu.CV.CvEnum.TemplateMatchingType.CcoeffNormed); 
30 | 
31 |             CvInvoke.Threshold(templateOutput, templateOutput, 0.85, 1, Emgu.CV.CvEnum.ThresholdType.ToZero); 
32 | 
33 |             var matches = templateOutput.ToImage<Gray, byte>(); 
34 | 
35 |              for (int i = 0; i < matches.Rows; i++) 
36 |             {
37 |                 for (int j = 0; j < matches.Cols; j++) 
38 |                 { 
39 |                     if (matches[i, j].Intensity > .8) {
40 |                         
41 |                         System.Drawing.Point loc = new System.Drawing.Point(j, i); 
42 | 
43 |                         System.Drawing.Rectangle box = new System.Drawing.Rectangle(loc, myface.Size);  
44 | 
45 |                         CvInvoke.Rectangle(frame, box, new Emgu.CV.Structure.MCvScalar(0, 255, 0), 2); 
46 |                     }
47 |                 }
48 |             }
49 | 
50 |             Image<Bgr, byte> convertFrame = frame.ToImage<Bgr, byte>(); 
51 |             var image = convertFrame.InRange(new Bgr(75, 0, 0), new Bgr(255, 190, 190)); 
52 | 
53 |             for (int i = 0; i < image.Rows; i++)
54 |             {
55 |                 for (int j = 0; j < image.Cols; j++)
56 |                 {
57 |                     var intensity = image[i, j]; 
58 | 
59 |                     if (intensity.Intensity > 0) 
60 |                     {
61 |                         convertFrame[i, j] = new Bgr(convertFrame[i,j].MCvScalar.V0 - 50, convertFrame[i,j].MCvScalar.V1 - 50, convertFrame[i,j].MCvScalar.V2 + 100); 
62 |                     }
63 |                 }
64 |             }
65 |             
66 | 
67 |             CvInvoke.Imshow("video", convertFrame); 
68 | 
69 |             int keypressed = CvInvoke.WaitKey(1); 
70 |             if (keypressed == 27)
71 |                 pause = true; 
72 | 
73 |         }
74 | 
75 | 
76 |     }*/
77 |   }
78 | }
79 | 


--------------------------------------------------------------------------------
/Video6.cs:
--------------------------------------------------------------------------------
  1 | using Emgu.CV; 
  2 | using Emgu.CV.Structure; 
  3 | using Emgu.CV.Util; 
  4 | using Emgu.CV.Dnn; 
  5 | 
  6 | namespace ourNamespace;
  7 | public class Video6 
  8 | {
  9 |   static void Main(string[] args)
 10 |   {
 11 | 
 12 |     var net = Emgu.CV.Dnn.DnnInvoke.ReadNetFromDarknet("./detection/yolov3.cfg", "./detection/yolov3.weights"); 
 13 |     var classLabels = File.ReadAllLines("./detection/coco.names"); 
 14 | 
 15 |     net.SetPreferableBackend(Emgu.CV.Dnn.Backend.OpenCV); 
 16 |     net.SetPreferableTarget(Emgu.CV.Dnn.Target.Cpu); 
 17 | 
 18 |     var vc = new VideoCapture(0, VideoCapture.API.DShow); 
 19 | 
 20 |     Mat frame = new(); 
 21 |     VectorOfMat output = new(); 
 22 | 
 23 |     VectorOfRect boxes = new(); 
 24 |     VectorOfFloat scores = new(); 
 25 |     VectorOfInt indices = new(); 
 26 | 
 27 |     while(true) 
 28 |     {
 29 |       vc.Read(frame); 
 30 | 
 31 |       CvInvoke.Resize(frame, frame, new System.Drawing.Size(0,0), .4, .4); 
 32 | 
 33 |       boxes = new(); 
 34 |       indices = new(); 
 35 |       scores = new(); 
 36 | 
 37 |       var image = frame.ToImage<Bgr, byte>(); 
 38 | 
 39 |       var input = DnnInvoke.BlobFromImage(image, 1/255.0, swapRB: true);
 40 | 
 41 |       net.SetInput(input); 
 42 | 
 43 |       net.Forward(output, net.UnconnectedOutLayersNames); 
 44 | 
 45 |       for(int i = 0; i < output.Size; i++)
 46 |       {
 47 |         var mat = output[i]; 
 48 |         var data = (float[,]) mat.GetData(); 
 49 | 
 50 |         for (int j = 0; j < data.GetLength(0); j++)
 51 |         {
 52 |           float[] row = Enumerable.Range(0, data.GetLength(1))
 53 |                         .Select(x => data[j, x])
 54 |                         .ToArray(); 
 55 | 
 56 |           var rowScore = row.Skip(5).ToArray(); 
 57 |           var classId = rowScore.ToList().IndexOf(rowScore.Max()); 
 58 |           var confidence = rowScore[classId]; 
 59 | 
 60 |           if (confidence > 0.8f) 
 61 |           {
 62 |               var centerX = (int) (row[0] * frame.Width); 
 63 |               var centerY = (int) (row[1] * frame.Height); 
 64 |               var boxWidth = (int) (row[2] * frame.Width); 
 65 |               var boxHeight = (int) (row[3] * frame.Height); 
 66 | 
 67 |               var x = (int)(centerX - (boxWidth / 2)); 
 68 |               var y = (int)(centerY - (boxHeight / 2)); 
 69 | 
 70 |               boxes.Push(new System.Drawing.Rectangle[] { new System.Drawing.Rectangle(x, y, boxWidth, boxHeight)});
 71 |               indices.Push( new int[] {classId}); 
 72 |               scores.Push( new float[] { confidence});  
 73 |           }
 74 | 
 75 |         }
 76 | 
 77 |       }
 78 | 
 79 |       var bestIndex = DnnInvoke.NMSBoxes(boxes.ToArray(), scores.ToArray(), .8f, .8f); 
 80 | 
 81 |       var frameOut = frame.ToImage<Bgr, byte>(); 
 82 | 
 83 |       for (int i = 0; i < bestIndex.Length; i++) 
 84 |       {
 85 |           int index = bestIndex[i]; 
 86 |           var box = boxes[index]; 
 87 |           CvInvoke.Rectangle(frameOut, box, new MCvScalar(0, 255, 0), 2); 
 88 |           CvInvoke.PutText(frameOut, classLabels[indices[index]], new System.Drawing.Point(box.X, box.Y - 20),
 89 |           Emgu.CV.CvEnum.FontFace.HersheyPlain, 1.0, new MCvScalar(0, 0, 255), 2); 
 90 | 
 91 |       }
 92 | 
 93 |       CvInvoke.Resize(frameOut, frameOut, new System.Drawing.Size(0,0), 4, 4); 
 94 |       CvInvoke.Imshow("output", frameOut); 
 95 | 
 96 |       if (CvInvoke.WaitKey(1) == 27)
 97 |         break; 
 98 | 
 99 |     }
100 | 
101 | 
102 |     /*var faceCascade = new CascadeClassifier("./detection/haarcascade_frontalface_default.xml"); 
103 |     var vc = new VideoCapture(0, Emgu.CV.VideoCapture.API.DShow); 
104 | 
105 |     Mat frame = new(); 
106 |     Mat frameGray = new(); 
107 | 
108 |     while(true)
109 |     {
110 |         vc.Read(frame); 
111 | 
112 |         CvInvoke.CvtColor(frame, frameGray, Emgu.CV.CvEnum.ColorConversion.Bgr2Gray); 
113 | 
114 |         var faces = faceCascade.DetectMultiScale(frameGray, 1.3, 5); 
115 | 
116 |         if (faces is not null && faces.Length > 0) 
117 |           CvInvoke.Rectangle(frame, faces[0], new MCvScalar(0, 255, 0), 2);
118 | 
119 |         CvInvoke.Imshow("face detection", frame); 
120 | 
121 |         if (CvInvoke.WaitKey(1) == 27)
122 |         {
123 |           break;
124 |         }
125 | 
126 |     }
127 |     */
128 |   }
129 | }
130 | 
131 | 


--------------------------------------------------------------------------------
/detection/yolov3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 
790 | 


--------------------------------------------------------------------------------