├── COCO and Pascal VOC.md ├── LICENSE ├── OneStage ├── ssd │ ├── README.md │ └── ssd_img │ │ ├── SSD-architecture.png │ │ ├── SSD-box-scales.png │ │ ├── SSD-framework.png │ │ └── proof.png └── yolo │ ├── README.md │ ├── Train-a-YOLOv4-model │ ├── README.md │ ├── cfg │ │ ├── 9k.labels │ │ ├── 9k.names │ │ ├── 9k.tree │ │ ├── Gaussian_yolov3_BDD.cfg │ │ ├── alexnet.cfg │ │ ├── cd53paspp-gamma.cfg │ │ ├── cifar.cfg │ │ ├── cifar.test.cfg │ │ ├── coco.data │ │ ├── coco.names │ │ ├── coco9k.map │ │ ├── combine9k.data │ │ ├── crnn.train.cfg │ │ ├── csdarknet53-omega.cfg │ │ ├── csresnext50-panet-spp-original-optimal.cfg │ │ ├── csresnext50-panet-spp.cfg │ │ ├── darknet.cfg │ │ ├── darknet19.cfg │ │ ├── darknet19_448.cfg │ │ ├── darknet53.cfg │ │ ├── darknet53_448_xnor.cfg │ │ ├── densenet201.cfg │ │ ├── efficientnet-lite3.cfg │ │ ├── efficientnet_b0.cfg │ │ ├── enet-coco.cfg │ │ ├── extraction.cfg │ │ ├── extraction.conv.cfg │ │ ├── extraction22k.cfg │ │ ├── go.test.cfg │ │ ├── gru.cfg │ │ ├── imagenet.labels.list │ │ ├── imagenet.shortnames.list │ │ ├── imagenet1k.data │ │ ├── imagenet22k.dataset │ │ ├── imagenet9k.hierarchy.dataset │ │ ├── inet9k.map │ │ ├── jnet-conv.cfg │ │ ├── lstm.train.cfg │ │ ├── openimages.data │ │ ├── resnet101.cfg │ │ ├── resnet152.cfg │ │ ├── resnet152_trident.cfg │ │ ├── resnet50.cfg │ │ ├── resnext152-32x4d.cfg │ │ ├── rnn.cfg │ │ ├── rnn.train.cfg │ │ ├── strided.cfg │ │ ├── t1.test.cfg │ │ ├── tiny-yolo-voc.cfg │ │ ├── tiny-yolo.cfg │ │ ├── tiny-yolo_xnor.cfg │ │ ├── tiny.cfg │ │ ├── vgg-16.cfg │ │ ├── vgg-conv.cfg │ │ ├── voc.data │ │ ├── writing.cfg │ │ ├── yolo-voc.2.0.cfg │ │ ├── yolo-voc.cfg │ │ ├── yolo.2.0.cfg │ │ ├── yolo.cfg │ │ ├── yolo9000.cfg │ │ ├── yolov1 │ │ │ ├── tiny-coco.cfg │ │ │ ├── tiny-yolo.cfg │ │ │ ├── xyolo.test.cfg │ │ │ ├── yolo-coco.cfg │ │ │ ├── yolo-small.cfg │ │ │ ├── yolo.cfg │ │ │ ├── yolo.train.cfg │ │ │ └── yolo2.cfg │ │ ├── yolov2-tiny-voc.cfg │ │ ├── yolov2-tiny.cfg │ │ ├── yolov2-voc.cfg │ │ ├── yolov2.cfg │ │ ├── yolov3-openimages.cfg │ │ ├── yolov3-spp.cfg │ │ ├── yolov3-tiny-prn.cfg │ │ ├── yolov3-tiny.cfg │ │ ├── yolov3-tiny_3l.cfg │ │ ├── yolov3-tiny_obj.cfg │ │ ├── yolov3-tiny_occlusion_track.cfg │ │ ├── yolov3-tiny_xnor.cfg │ │ ├── yolov3-voc.cfg │ │ ├── yolov3-voc.yolov3-giou-40.cfg │ │ ├── yolov3.cfg │ │ ├── yolov3.coco-giou-12.cfg │ │ ├── yolov3_5l.cfg │ │ ├── yolov4-custom.cfg │ │ └── yolov4.cfg │ ├── imgs │ │ ├── chart_yolov4-custom.png │ │ └── yolov4.png │ ├── requirements.txt │ ├── tools │ │ ├── img2train.py │ │ ├── name.py │ │ └── voc_label.py │ └── yolov4-custom.cfg │ ├── coco2voc.md │ ├── coco2voc.py │ ├── convert2Yolo │ ├── Format.py │ ├── README.md │ ├── example.py │ ├── example │ │ ├── kitti │ │ │ ├── images │ │ │ │ └── 000021.jpg │ │ │ ├── labels │ │ │ │ └── 000021.txt │ │ │ └── names.txt │ │ └── voc │ │ │ ├── JPEG │ │ │ └── 000001.jpg │ │ │ ├── label │ │ │ └── 000001.xml │ │ │ └── names.txt │ ├── images │ │ ├── voc_image.png │ │ └── voc_xml.png │ ├── label_visualization.py │ ├── msgLogInfo.py │ └── requirements.txt │ ├── deep_sort_yolov3 │ ├── LICENSE │ ├── README.md │ ├── convert.py │ ├── deep_sort │ │ ├── __init__.py │ │ ├── detection.py │ │ ├── iou_matching.py │ │ ├── kalman_filter.py │ │ ├── linear_assignment.py │ │ ├── nn_matching.py │ │ ├── preprocessing.py │ │ ├── track.py │ │ └── tracker.py │ ├── detection.txt │ ├── main.py │ ├── model_data │ │ ├── coco_classes.txt │ │ ├── market1501.pb │ │ ├── mars-small128.pb │ │ ├── mars.pb │ │ ├── obj.txt │ │ ├── voc_classes.txt │ │ ├── yolo3_object.names │ │ ├── yolo_anchors.txt │ │ └── yolov3.cfg │ ├── requirements.txt │ ├── tools │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ └── generate_detections.cpython-35.pyc │ │ ├── freeze_model.py │ │ └── generate_detections.py │ ├── yolo.py │ └── yolo3 │ │ ├── model.py │ │ └── utils.py │ ├── deep_sort_yolov4 │ ├── README.md │ ├── convert.py │ ├── deep_sort │ │ ├── __init__.py │ │ ├── detection.py │ │ ├── detection_yolo.py │ │ ├── iou_matching.py │ │ ├── kalman_filter.py │ │ ├── linear_assignment.py │ │ ├── nn_matching.py │ │ ├── preprocessing.py │ │ ├── track.py │ │ └── tracker.py │ ├── main.py │ ├── model_data │ │ ├── coco_classes.txt │ │ ├── market1501.pb │ │ ├── mars-small128.pb │ │ ├── mars.pb │ │ ├── obj.txt │ │ └── yolo_anchors.txt │ ├── output │ │ ├── README.md │ │ └── comparison.png │ ├── requirements.txt │ ├── social_distance.py │ ├── test_video │ │ └── README.md │ ├── tools │ │ ├── frame2video.py │ │ ├── freeze_model.py │ │ ├── generate_detections.py │ │ └── video2frame.py │ ├── yolo.py │ └── yolo4 │ │ ├── model.py │ │ └── utils.py │ ├── main.py │ ├── requirements.txt │ ├── tools │ ├── frame2video.py │ ├── freeze_model.py │ ├── generate_detections.py │ └── video2frame.py │ ├── w_name2txt.py │ ├── yolo.py │ ├── yolo4 │ ├── model.py │ └── utils.py │ ├── yolo_img │ ├── 1*DhuOI39lNp6ZrG63h-ioBQ.png │ ├── Results on MS COCO.png │ ├── Results on PASCAL VOC 2012 test set.png │ ├── Screenshot from 2019-05-18 16-55-25.png │ ├── TownCentreXVID_output_ss.gif │ ├── model2.png │ ├── output_49.gif │ ├── output_car_143.gif │ ├── output_person_315_1120_s.gif │ ├── yolo-network-architecture.png │ ├── yolo-responsible-predictor.png │ ├── yolo.png │ ├── yologo_1.png │ ├── yolov1.png │ ├── yolov1_lossfunc.png │ ├── yolov1network.png │ └── yolov2.png │ ├── yolov3 │ ├── Annotations │ │ ├── README.md │ │ └── t1_video_00001_00001.xml │ ├── JPEGImages │ │ ├── README.md │ │ └── t1_video_00001_00001.jpg │ ├── README.md │ ├── backup │ │ └── README.md │ ├── cfg │ │ ├── example.cfg │ │ ├── yolo3_object.data │ │ ├── yolov3-cai-tiny.cfg │ │ ├── yolov3-tiny-action.cfg │ │ ├── yolov3-voc-object.cfg │ │ ├── yolov3-voc.cfg │ │ ├── yolov3.cfg │ │ └── yolov3_action.cfg │ ├── img2train.py │ ├── labels │ │ ├── README.md │ │ └── t1_video_00001_00001.txt │ ├── object_train.txt │ ├── object_val.txt │ ├── test_img │ │ └── predictions.jpg │ ├── train.txt │ ├── val.txt │ ├── voc_label.py │ └── yolo3_object.names │ └── yolov3_sort │ ├── README.md │ ├── main.py │ ├── sort.py │ └── yolo-obj │ ├── coco.names │ ├── yolo3_object.names │ ├── yolov3.cfg │ └── yolov3_1.cfg ├── README.md ├── Tensorflow detection model zoo.md ├── Two-stage vs One-stage Detectors.md ├── TwoStage └── R-CNN │ ├── README.md │ ├── annotation.jpg │ └── convert2json.py ├── img ├── 8.1.2.png ├── COCO object detection dataset.jpeg ├── F1.png ├── Object-Detection-Deep-Learning.jpg ├── PASCAL VOC 2007 and 2012 data FPS.png ├── PASCAL VOC 2007 and 2012 data.png ├── ap.png ├── coco.png ├── coco_yolo.png ├── dataset.png ├── deep_learning_object_detection_history.PNG ├── fig1 .png ├── fig1-1.jpeg ├── fig1-2.jpeg ├── fig2.png ├── fig3.png ├── fig4.png ├── objectdetection.gif ├── one_stage.png ├── two_stage.png ├── voc.png ├── voc_yolo.png └── yolo_vs_rcnn.png └── mAP&IoU.md /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Bobby Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /OneStage/ssd/README.md: -------------------------------------------------------------------------------- 1 | # SDD: Single Shot MultiBox Detector 2 | *The Single Shot Detector (SSD; Liu et al, 2016) is one of the first attempts at using convolutional neural network’s pyramidal feature hierarchy for efficient detection of objects of various sizes.* 3 | 4 | ## Image Pyramid 5 | 6 | SSD uses the VGG-16 model pre-trained on ImageNet as its base model for extracting useful image features. On top of VGG16, SSD adds several conv feature layers of decreasing sizes. They can be seen as a pyramid representation of images at different scales. Intuitively large fine-grained feature maps at earlier levels are good at capturing small objects and small coarse-grained feature maps can detect large objects well. In SSD, the detection happens in every pyramidal layer, targeting at objects of various sizes. 7 | 8 | 9 | 10 | ## Workflow 11 | Unlike YOLO, SSD does not split the image into grids of arbitrary size but predicts offset of predefined anchor boxes (this is called “default boxes” in the paper) for every location of the feature map. Each box has a fixed size and position relative to its corresponding cell. All the anchor boxes tile the whole feature map in a convolutional manner. 12 | 13 | Feature maps at different levels have different receptive field sizes. The anchor boxes on different levels are rescaled so that one feature map is only responsible for objects at one particular scale. For example, in Fig. 5 the dog can only be detected in the 4x4 feature map (higher level) while the cat is just captured by the 8x8 feature map (lower level). 14 | 15 | 16 | 17 | *The SSD framework. (a) The training data contains images and ground truth boxes for every object. (b) In a fine-grained feature maps (8 x 8), the anchor boxes of different aspect ratios correspond to smaller area of the raw input. (c) In a coarse-grained feature map (4 x 4), the anchor boxes cover larger area of the raw input. (Image source: original paper)* 18 | 19 | The width, height and the center location of an anchor box are all normalized to be (0, 1). At a location (i,j) of the ℓ-th feature layer of size m×n, i=1,…,n,j=1,…,m, we have a unique linear scale proportional to the layer level and 5 different box aspect ratios (width-to-height ratios), in addition to a special scale (why we need this? the paper didn’t explain. maybe just a heuristic trick) when the aspect ratio is 1. This gives us 6 anchor boxes in total per feature cell. 20 | 21 | 22 | 23 | 24 | 25 | *An example of how the anchor box size is scaled up with the layer index ℓ for L=6,smin=0.2,smax=0.9. Only the boxes of aspect ratio r=1 are illustrated.* 26 | 27 | At every location, the model outputs 4 offsets and c class probabilities by applying a 3×3×p conv filter (where p is the number of channels in the feature map) for every one of k anchor boxes. Therefore, given a feature map of size m×n, we need kmn(c+4) prediction filters. 28 | -------------------------------------------------------------------------------- /OneStage/ssd/ssd_img/SSD-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/ssd/ssd_img/SSD-architecture.png -------------------------------------------------------------------------------- /OneStage/ssd/ssd_img/SSD-box-scales.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/ssd/ssd_img/SSD-box-scales.png -------------------------------------------------------------------------------- /OneStage/ssd/ssd_img/SSD-framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/ssd/ssd_img/SSD-framework.png -------------------------------------------------------------------------------- /OneStage/ssd/ssd_img/proof.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/ssd/ssd_img/proof.png -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/alexnet.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=227 5 | width=227 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | max_crop=256 10 | 11 | learning_rate=0.01 12 | policy=poly 13 | power=4 14 | max_batches=800000 15 | 16 | angle=7 17 | hue = .1 18 | saturation=.75 19 | exposure=.75 20 | aspect=.75 21 | 22 | [convolutional] 23 | filters=96 24 | size=11 25 | stride=4 26 | pad=0 27 | activation=relu 28 | 29 | [maxpool] 30 | size=3 31 | stride=2 32 | padding=0 33 | 34 | [convolutional] 35 | filters=256 36 | size=5 37 | stride=1 38 | pad=1 39 | activation=relu 40 | 41 | [maxpool] 42 | size=3 43 | stride=2 44 | padding=0 45 | 46 | [convolutional] 47 | filters=384 48 | size=3 49 | stride=1 50 | pad=1 51 | activation=relu 52 | 53 | [convolutional] 54 | filters=384 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=relu 59 | 60 | [convolutional] 61 | filters=256 62 | size=3 63 | stride=1 64 | pad=1 65 | activation=relu 66 | 67 | [maxpool] 68 | size=3 69 | stride=2 70 | padding=0 71 | 72 | [connected] 73 | output=4096 74 | activation=relu 75 | 76 | [dropout] 77 | probability=.5 78 | 79 | [connected] 80 | output=4096 81 | activation=relu 82 | 83 | [dropout] 84 | probability=.5 85 | 86 | [connected] 87 | output=1000 88 | activation=linear 89 | 90 | [softmax] 91 | groups=1 92 | 93 | [cost] 94 | type=sse 95 | 96 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/cifar.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=32 5 | width=32 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.4 11 | policy=poly 12 | power=4 13 | max_batches = 50000 14 | 15 | [crop] 16 | crop_width=28 17 | crop_height=28 18 | flip=1 19 | angle=0 20 | saturation = 1 21 | exposure = 1 22 | noadjust=1 23 | 24 | [convolutional] 25 | batch_normalize=1 26 | filters=128 27 | size=3 28 | stride=1 29 | pad=1 30 | activation=leaky 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=128 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=128 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [maxpool] 49 | size=2 50 | stride=2 51 | 52 | [dropout] 53 | probability=.5 54 | 55 | [convolutional] 56 | batch_normalize=1 57 | filters=256 58 | size=3 59 | stride=1 60 | pad=1 61 | activation=leaky 62 | 63 | [convolutional] 64 | batch_normalize=1 65 | filters=256 66 | size=3 67 | stride=1 68 | pad=1 69 | activation=leaky 70 | 71 | [convolutional] 72 | batch_normalize=1 73 | filters=256 74 | size=3 75 | stride=1 76 | pad=1 77 | activation=leaky 78 | 79 | [maxpool] 80 | size=2 81 | stride=2 82 | 83 | [dropout] 84 | probability=.5 85 | 86 | [convolutional] 87 | batch_normalize=1 88 | filters=512 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=leaky 93 | 94 | [convolutional] 95 | batch_normalize=1 96 | filters=512 97 | size=3 98 | stride=1 99 | pad=1 100 | activation=leaky 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | filters=512 105 | size=3 106 | stride=1 107 | pad=1 108 | activation=leaky 109 | 110 | [dropout] 111 | probability=.5 112 | 113 | [convolutional] 114 | filters=10 115 | size=1 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [avgpool] 121 | 122 | [softmax] 123 | groups=1 124 | 125 | [cost] 126 | 127 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/cifar.test.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=32 5 | width=32 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.4 11 | policy=poly 12 | power=4 13 | max_batches = 50000 14 | 15 | 16 | [convolutional] 17 | batch_normalize=1 18 | filters=128 19 | size=3 20 | stride=1 21 | pad=1 22 | activation=leaky 23 | 24 | [convolutional] 25 | batch_normalize=1 26 | filters=128 27 | size=3 28 | stride=1 29 | pad=1 30 | activation=leaky 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=128 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [dropout] 45 | probability=.5 46 | 47 | [convolutional] 48 | batch_normalize=1 49 | filters=256 50 | size=3 51 | stride=1 52 | pad=1 53 | activation=leaky 54 | 55 | [convolutional] 56 | batch_normalize=1 57 | filters=256 58 | size=3 59 | stride=1 60 | pad=1 61 | activation=leaky 62 | 63 | [convolutional] 64 | batch_normalize=1 65 | filters=256 66 | size=3 67 | stride=1 68 | pad=1 69 | activation=leaky 70 | 71 | [maxpool] 72 | size=2 73 | stride=2 74 | 75 | [dropout] 76 | probability=.5 77 | 78 | [convolutional] 79 | batch_normalize=1 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=leaky 85 | 86 | [convolutional] 87 | batch_normalize=1 88 | filters=512 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=leaky 93 | 94 | [convolutional] 95 | batch_normalize=1 96 | filters=512 97 | size=3 98 | stride=1 99 | pad=1 100 | activation=leaky 101 | 102 | [dropout] 103 | probability=.5 104 | 105 | [convolutional] 106 | filters=10 107 | size=1 108 | stride=1 109 | pad=1 110 | activation=leaky 111 | 112 | [avgpool] 113 | 114 | [softmax] 115 | groups=1 116 | temperature=3 117 | 118 | [cost] 119 | 120 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/coco.data: -------------------------------------------------------------------------------- 1 | classes= 80 2 | train = /home/pjreddie/data/coco/trainvalno5k.txt 3 | valid = coco_testdev 4 | #valid = data/coco_val_5k.list 5 | names = data/coco.names 6 | backup = /home/pjreddie/backup/ 7 | eval=coco 8 | 9 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/coco9k.map: -------------------------------------------------------------------------------- 1 | 5177 2 | 3768 3 | 3802 4 | 3800 5 | 4107 6 | 4072 7 | 4071 8 | 3797 9 | 4097 10 | 2645 11 | 5150 12 | 2644 13 | 3257 14 | 2523 15 | 6527 16 | 6866 17 | 6912 18 | 7342 19 | 7255 20 | 7271 21 | 7217 22 | 6858 23 | 7343 24 | 7233 25 | 3704 26 | 4374 27 | 3641 28 | 5001 29 | 3899 30 | 2999 31 | 2631 32 | 5141 33 | 2015 34 | 1133 35 | 1935 36 | 1930 37 | 5144 38 | 5143 39 | 2371 40 | 3916 41 | 3745 42 | 3640 43 | 4749 44 | 4736 45 | 4735 46 | 3678 47 | 58 48 | 42 49 | 771 50 | 81 51 | 152 52 | 141 53 | 786 54 | 700 55 | 218 56 | 791 57 | 2518 58 | 2521 59 | 3637 60 | 2458 61 | 2505 62 | 2519 63 | 3499 64 | 2837 65 | 3503 66 | 2597 67 | 3430 68 | 2080 69 | 5103 70 | 5111 71 | 5102 72 | 3013 73 | 5096 74 | 1102 75 | 3218 76 | 4010 77 | 2266 78 | 1127 79 | 5122 80 | 2360 81 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/combine9k.data: -------------------------------------------------------------------------------- 1 | classes= 9418 2 | #train = /home/pjreddie/data/coco/trainvalno5k.txt 3 | train = data/combine9k.train.list 4 | valid = /home/pjreddie/data/imagenet/det.val.files 5 | labels = data/9k.labels 6 | names = data/9k.names 7 | backup = backup/ 8 | map = data/inet9k.map 9 | eval = imagenet 10 | results = results 11 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/crnn.train.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | subdivisions=8 3 | inputs=256 4 | batch = 128 5 | momentum=0.9 6 | decay=0.001 7 | max_batches = 2000 8 | time_steps=576 9 | learning_rate=0.1 10 | policy=steps 11 | steps=1000,1500 12 | scales=.1,.1 13 | 14 | try_fix_nan=1 15 | 16 | [connected] 17 | output=256 18 | activation=leaky 19 | 20 | [crnn] 21 | batch_normalize=1 22 | size=1 23 | pad=0 24 | output = 1024 25 | hidden=1024 26 | activation=leaky 27 | 28 | [crnn] 29 | batch_normalize=1 30 | size=1 31 | pad=0 32 | output = 1024 33 | hidden=1024 34 | activation=leaky 35 | 36 | [crnn] 37 | batch_normalize=1 38 | size=1 39 | pad=0 40 | output = 1024 41 | hidden=1024 42 | activation=leaky 43 | 44 | [connected] 45 | output=256 46 | activation=leaky 47 | 48 | [softmax] 49 | 50 | [cost] 51 | type=sse 52 | 53 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/darknet.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=224 5 | width=224 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | max_crop=320 10 | 11 | learning_rate=0.1 12 | policy=poly 13 | power=4 14 | max_batches=1600000 15 | 16 | [convolutional] 17 | batch_normalize=1 18 | filters=16 19 | size=3 20 | stride=1 21 | pad=1 22 | activation=leaky 23 | 24 | [maxpool] 25 | size=2 26 | stride=2 27 | 28 | [convolutional] 29 | batch_normalize=1 30 | filters=32 31 | size=3 32 | stride=1 33 | pad=1 34 | activation=leaky 35 | 36 | [maxpool] 37 | size=2 38 | stride=2 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=64 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [maxpool] 49 | size=2 50 | stride=2 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=128 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [maxpool] 61 | size=2 62 | stride=2 63 | 64 | [convolutional] 65 | batch_normalize=1 66 | filters=256 67 | size=3 68 | stride=1 69 | pad=1 70 | activation=leaky 71 | 72 | [maxpool] 73 | size=2 74 | stride=2 75 | 76 | [convolutional] 77 | batch_normalize=1 78 | filters=512 79 | size=3 80 | stride=1 81 | pad=1 82 | activation=leaky 83 | 84 | [maxpool] 85 | size=2 86 | stride=2 87 | padding=1 88 | 89 | [convolutional] 90 | batch_normalize=1 91 | filters=1024 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=1000 99 | size=1 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [avgpool] 105 | 106 | [softmax] 107 | groups=1 108 | 109 | [cost] 110 | type=sse 111 | 112 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/darknet19.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=224 5 | width=224 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | max_crop=448 10 | 11 | learning_rate=0.1 12 | policy=poly 13 | power=4 14 | max_batches=1600000 15 | 16 | [convolutional] 17 | batch_normalize=1 18 | filters=32 19 | size=3 20 | stride=1 21 | pad=1 22 | activation=leaky 23 | 24 | [maxpool] 25 | size=2 26 | stride=2 27 | 28 | [convolutional] 29 | batch_normalize=1 30 | filters=64 31 | size=3 32 | stride=1 33 | pad=1 34 | activation=leaky 35 | 36 | [maxpool] 37 | size=2 38 | stride=2 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=128 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=64 51 | size=1 52 | stride=1 53 | pad=1 54 | activation=leaky 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [convolutional] 77 | batch_normalize=1 78 | filters=128 79 | size=1 80 | stride=1 81 | pad=1 82 | activation=leaky 83 | 84 | [convolutional] 85 | batch_normalize=1 86 | filters=256 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [maxpool] 93 | size=2 94 | stride=2 95 | 96 | [convolutional] 97 | batch_normalize=1 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | batch_normalize=1 106 | filters=256 107 | size=1 108 | stride=1 109 | pad=1 110 | activation=leaky 111 | 112 | [convolutional] 113 | batch_normalize=1 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | batch_normalize=1 122 | filters=256 123 | size=1 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [convolutional] 129 | batch_normalize=1 130 | filters=512 131 | size=3 132 | stride=1 133 | pad=1 134 | activation=leaky 135 | 136 | [maxpool] 137 | size=2 138 | stride=2 139 | 140 | [convolutional] 141 | batch_normalize=1 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [convolutional] 149 | batch_normalize=1 150 | filters=512 151 | size=1 152 | stride=1 153 | pad=1 154 | activation=leaky 155 | 156 | [convolutional] 157 | batch_normalize=1 158 | filters=1024 159 | size=3 160 | stride=1 161 | pad=1 162 | activation=leaky 163 | 164 | [convolutional] 165 | batch_normalize=1 166 | filters=512 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | [convolutional] 173 | batch_normalize=1 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | [convolutional] 181 | filters=1000 182 | size=1 183 | stride=1 184 | pad=1 185 | activation=linear 186 | 187 | [avgpool] 188 | 189 | [softmax] 190 | groups=1 191 | 192 | [cost] 193 | type=sse 194 | 195 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/darknet19_448.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | #batch=128 3 | #subdivisions=4 4 | batch=1 5 | subdivisions=1 6 | height=448 7 | width=448 8 | max_crop=512 9 | channels=3 10 | momentum=0.9 11 | decay=0.0005 12 | 13 | learning_rate=0.001 14 | policy=poly 15 | power=4 16 | max_batches=100000 17 | 18 | angle=7 19 | hue = .1 20 | saturation=.75 21 | exposure=.75 22 | aspect=.75 23 | 24 | [convolutional] 25 | batch_normalize=1 26 | filters=32 27 | size=3 28 | stride=1 29 | pad=1 30 | activation=leaky 31 | 32 | [maxpool] 33 | size=2 34 | stride=2 35 | 36 | [convolutional] 37 | batch_normalize=1 38 | filters=64 39 | size=3 40 | stride=1 41 | pad=1 42 | activation=leaky 43 | 44 | [maxpool] 45 | size=2 46 | stride=2 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=128 51 | size=3 52 | stride=1 53 | pad=1 54 | activation=leaky 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=64 59 | size=1 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [convolutional] 65 | batch_normalize=1 66 | filters=128 67 | size=3 68 | stride=1 69 | pad=1 70 | activation=leaky 71 | 72 | [maxpool] 73 | size=2 74 | stride=2 75 | 76 | [convolutional] 77 | batch_normalize=1 78 | filters=256 79 | size=3 80 | stride=1 81 | pad=1 82 | activation=leaky 83 | 84 | [convolutional] 85 | batch_normalize=1 86 | filters=128 87 | size=1 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=256 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | [maxpool] 101 | size=2 102 | stride=2 103 | 104 | [convolutional] 105 | batch_normalize=1 106 | filters=512 107 | size=3 108 | stride=1 109 | pad=1 110 | activation=leaky 111 | 112 | [convolutional] 113 | batch_normalize=1 114 | filters=256 115 | size=1 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | batch_normalize=1 122 | filters=512 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [convolutional] 129 | batch_normalize=1 130 | filters=256 131 | size=1 132 | stride=1 133 | pad=1 134 | activation=leaky 135 | 136 | [convolutional] 137 | batch_normalize=1 138 | filters=512 139 | size=3 140 | stride=1 141 | pad=1 142 | activation=leaky 143 | 144 | [maxpool] 145 | size=2 146 | stride=2 147 | 148 | [convolutional] 149 | batch_normalize=1 150 | filters=1024 151 | size=3 152 | stride=1 153 | pad=1 154 | activation=leaky 155 | 156 | [convolutional] 157 | batch_normalize=1 158 | filters=512 159 | size=1 160 | stride=1 161 | pad=1 162 | activation=leaky 163 | 164 | [convolutional] 165 | batch_normalize=1 166 | filters=1024 167 | size=3 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | [convolutional] 173 | batch_normalize=1 174 | filters=512 175 | size=1 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | [convolutional] 181 | batch_normalize=1 182 | filters=1024 183 | size=3 184 | stride=1 185 | pad=1 186 | activation=leaky 187 | 188 | [convolutional] 189 | filters=1000 190 | size=1 191 | stride=1 192 | pad=1 193 | activation=linear 194 | 195 | [avgpool] 196 | 197 | [softmax] 198 | groups=1 199 | 200 | [cost] 201 | type=sse 202 | 203 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/extraction.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=224 5 | width=224 6 | max_crop=320 7 | channels=3 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | learning_rate=0.1 12 | policy=poly 13 | power=4 14 | max_batches=1600000 15 | 16 | [convolutional] 17 | batch_normalize=1 18 | filters=64 19 | size=7 20 | stride=2 21 | pad=1 22 | activation=leaky 23 | 24 | [maxpool] 25 | size=2 26 | stride=2 27 | 28 | [convolutional] 29 | batch_normalize=1 30 | filters=192 31 | size=3 32 | stride=1 33 | pad=1 34 | activation=leaky 35 | 36 | [maxpool] 37 | size=2 38 | stride=2 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=128 43 | size=1 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=256 51 | size=3 52 | stride=1 53 | pad=1 54 | activation=leaky 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=256 59 | size=1 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [convolutional] 65 | batch_normalize=1 66 | filters=512 67 | size=3 68 | stride=1 69 | pad=1 70 | activation=leaky 71 | 72 | [maxpool] 73 | size=2 74 | stride=2 75 | 76 | [convolutional] 77 | batch_normalize=1 78 | filters=256 79 | size=1 80 | stride=1 81 | pad=1 82 | activation=leaky 83 | 84 | [convolutional] 85 | batch_normalize=1 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=256 95 | size=1 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | filters=512 103 | size=3 104 | stride=1 105 | pad=1 106 | activation=leaky 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | batch_normalize=1 126 | filters=256 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | batch_normalize=1 134 | filters=512 135 | size=3 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [convolutional] 141 | batch_normalize=1 142 | filters=512 143 | size=1 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [convolutional] 149 | batch_normalize=1 150 | filters=1024 151 | size=3 152 | stride=1 153 | pad=1 154 | activation=leaky 155 | 156 | [maxpool] 157 | size=2 158 | stride=2 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=512 163 | size=1 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | batch_normalize=1 170 | filters=1024 171 | size=3 172 | stride=1 173 | pad=1 174 | activation=leaky 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=512 179 | size=1 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | [convolutional] 185 | batch_normalize=1 186 | filters=1024 187 | size=3 188 | stride=1 189 | pad=1 190 | activation=leaky 191 | 192 | [convolutional] 193 | filters=1000 194 | size=1 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [avgpool] 200 | 201 | [softmax] 202 | groups=1 203 | 204 | [cost] 205 | type=sse 206 | 207 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/extraction.conv.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=256 5 | width=256 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.5 11 | policy=poly 12 | power=6 13 | max_batches=500000 14 | 15 | [convolutional] 16 | filters=64 17 | size=7 18 | stride=2 19 | pad=1 20 | activation=leaky 21 | 22 | [maxpool] 23 | size=2 24 | stride=2 25 | 26 | [convolutional] 27 | filters=192 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | filters=128 39 | size=1 40 | stride=1 41 | pad=1 42 | activation=leaky 43 | 44 | [convolutional] 45 | filters=256 46 | size=3 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | filters=256 53 | size=1 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [convolutional] 59 | filters=512 60 | size=3 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [maxpool] 66 | size=2 67 | stride=2 68 | 69 | [convolutional] 70 | filters=256 71 | size=1 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [convolutional] 77 | filters=512 78 | size=3 79 | stride=1 80 | pad=1 81 | activation=leaky 82 | 83 | [convolutional] 84 | filters=256 85 | size=1 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [convolutional] 91 | filters=512 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=256 99 | size=1 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [convolutional] 112 | filters=256 113 | size=1 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [convolutional] 119 | filters=512 120 | size=3 121 | stride=1 122 | pad=1 123 | activation=leaky 124 | 125 | [convolutional] 126 | filters=512 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | filters=1024 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [maxpool] 140 | size=2 141 | stride=2 142 | 143 | [convolutional] 144 | filters=512 145 | size=1 146 | stride=1 147 | pad=1 148 | activation=leaky 149 | 150 | [convolutional] 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | filters=512 159 | size=1 160 | stride=1 161 | pad=1 162 | activation=leaky 163 | 164 | [convolutional] 165 | filters=1024 166 | size=3 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [avgpool] 172 | 173 | [connected] 174 | output=1000 175 | activation=leaky 176 | 177 | [softmax] 178 | groups=1 179 | 180 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/extraction22k.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=224 5 | width=224 6 | max_crop=320 7 | channels=3 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | learning_rate=0.01 12 | max_batches = 0 13 | policy=steps 14 | steps=444000,590000,970000 15 | scales=.5,.2,.1 16 | 17 | #policy=sigmoid 18 | #gamma=.00008 19 | #step=100000 20 | #max_batches=200000 21 | 22 | [convolutional] 23 | batch_normalize=1 24 | filters=64 25 | size=7 26 | stride=2 27 | pad=1 28 | activation=leaky 29 | 30 | [maxpool] 31 | size=2 32 | stride=2 33 | 34 | [convolutional] 35 | batch_normalize=1 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | batch_normalize=1 48 | filters=128 49 | size=1 50 | stride=1 51 | pad=1 52 | activation=leaky 53 | 54 | [convolutional] 55 | batch_normalize=1 56 | filters=256 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | [convolutional] 63 | batch_normalize=1 64 | filters=256 65 | size=1 66 | stride=1 67 | pad=1 68 | activation=leaky 69 | 70 | [convolutional] 71 | batch_normalize=1 72 | filters=512 73 | size=3 74 | stride=1 75 | pad=1 76 | activation=leaky 77 | 78 | [maxpool] 79 | size=2 80 | stride=2 81 | 82 | [convolutional] 83 | batch_normalize=1 84 | filters=256 85 | size=1 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [convolutional] 91 | batch_normalize=1 92 | filters=512 93 | size=3 94 | stride=1 95 | pad=1 96 | activation=leaky 97 | 98 | [convolutional] 99 | batch_normalize=1 100 | filters=256 101 | size=1 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | batch_normalize=1 108 | filters=512 109 | size=3 110 | stride=1 111 | pad=1 112 | activation=leaky 113 | 114 | [convolutional] 115 | batch_normalize=1 116 | filters=256 117 | size=1 118 | stride=1 119 | pad=1 120 | activation=leaky 121 | 122 | [convolutional] 123 | batch_normalize=1 124 | filters=512 125 | size=3 126 | stride=1 127 | pad=1 128 | activation=leaky 129 | 130 | [convolutional] 131 | batch_normalize=1 132 | filters=256 133 | size=1 134 | stride=1 135 | pad=1 136 | activation=leaky 137 | 138 | [convolutional] 139 | batch_normalize=1 140 | filters=512 141 | size=3 142 | stride=1 143 | pad=1 144 | activation=leaky 145 | 146 | [convolutional] 147 | batch_normalize=1 148 | filters=512 149 | size=1 150 | stride=1 151 | pad=1 152 | activation=leaky 153 | 154 | [convolutional] 155 | batch_normalize=1 156 | filters=1024 157 | size=3 158 | stride=1 159 | pad=1 160 | activation=leaky 161 | 162 | [maxpool] 163 | size=2 164 | stride=2 165 | 166 | [convolutional] 167 | batch_normalize=1 168 | filters=1024 169 | size=1 170 | stride=1 171 | pad=1 172 | activation=leaky 173 | 174 | [convolutional] 175 | batch_normalize=1 176 | filters=2048 177 | size=3 178 | stride=1 179 | pad=1 180 | activation=leaky 181 | 182 | [convolutional] 183 | batch_normalize=1 184 | filters=1024 185 | size=1 186 | stride=1 187 | pad=1 188 | activation=leaky 189 | 190 | [convolutional] 191 | batch_normalize=1 192 | filters=2048 193 | size=3 194 | stride=1 195 | pad=1 196 | activation=leaky 197 | 198 | [avgpool] 199 | 200 | [connected] 201 | output=21842 202 | activation=leaky 203 | 204 | [softmax] 205 | groups=1 206 | 207 | [cost] 208 | type=sse 209 | 210 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/go.test.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=19 5 | width=19 6 | channels=1 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.1 11 | policy=poly 12 | power=4 13 | max_batches=400000 14 | 15 | [convolutional] 16 | filters=192 17 | size=3 18 | stride=1 19 | pad=1 20 | activation=relu 21 | batch_normalize=1 22 | 23 | [convolutional] 24 | filters=192 25 | size=3 26 | stride=1 27 | pad=1 28 | activation=relu 29 | batch_normalize=1 30 | 31 | [convolutional] 32 | filters=192 33 | size=3 34 | stride=1 35 | pad=1 36 | activation=relu 37 | batch_normalize=1 38 | 39 | [convolutional] 40 | filters=192 41 | size=3 42 | stride=1 43 | pad=1 44 | activation=relu 45 | batch_normalize=1 46 | 47 | [convolutional] 48 | filters=192 49 | size=3 50 | stride=1 51 | pad=1 52 | activation=relu 53 | batch_normalize=1 54 | 55 | [convolutional] 56 | filters=192 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=relu 61 | batch_normalize=1 62 | 63 | [convolutional] 64 | filters=192 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=relu 69 | batch_normalize=1 70 | 71 | [convolutional] 72 | filters=192 73 | size=3 74 | stride=1 75 | pad=1 76 | activation=relu 77 | batch_normalize=1 78 | 79 | [convolutional] 80 | filters=192 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=relu 85 | batch_normalize=1 86 | 87 | [convolutional] 88 | filters=192 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=relu 93 | batch_normalize=1 94 | 95 | [convolutional] 96 | filters=192 97 | size=3 98 | stride=1 99 | pad=1 100 | activation=relu 101 | batch_normalize=1 102 | 103 | [convolutional] 104 | filters=192 105 | size=3 106 | stride=1 107 | pad=1 108 | activation=relu 109 | batch_normalize=1 110 | 111 | [convolutional] 112 | filters=192 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=relu 117 | batch_normalize=1 118 | 119 | 120 | [convolutional] 121 | filters=1 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=linear 126 | 127 | [softmax] 128 | 129 | [cost] 130 | type=sse 131 | 132 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/gru.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | subdivisions=1 3 | inputs=256 4 | batch = 1 5 | momentum=0.9 6 | decay=0.001 7 | time_steps=1 8 | learning_rate=0.5 9 | 10 | policy=poly 11 | power=4 12 | max_batches=2000 13 | 14 | [gru] 15 | batch_normalize=1 16 | output = 1024 17 | 18 | [gru] 19 | batch_normalize=1 20 | output = 1024 21 | 22 | [gru] 23 | batch_normalize=1 24 | output = 1024 25 | 26 | [connected] 27 | output=256 28 | activation=linear 29 | 30 | [softmax] 31 | 32 | [cost] 33 | type=sse 34 | 35 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/imagenet1k.data: -------------------------------------------------------------------------------- 1 | classes=1000 2 | train = /data/imagenet/imagenet1k.train.list 3 | valid = /data/imagenet/imagenet1k.valid.list 4 | backup = /home/pjreddie/backup/ 5 | labels = data/imagenet.labels.list 6 | names = data/imagenet.shortnames.list 7 | top=5 8 | 9 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/imagenet22k.dataset: -------------------------------------------------------------------------------- 1 | classes=21842 2 | train = /data/imagenet/imagenet22k.train.list 3 | valid = /data/imagenet/imagenet22k.valid.list 4 | backup = /home/pjreddie/backup/ 5 | labels = data/imagenet.labels.list 6 | names = data/imagenet.shortnames.list 7 | top = 5 8 | 9 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/imagenet9k.hierarchy.dataset: -------------------------------------------------------------------------------- 1 | classes=9418 2 | train = data/9k.train.list 3 | valid = /data/imagenet/imagenet1k.valid.list 4 | leaves = data/imagenet1k.labels 5 | backup = /home/pjreddie/backup/ 6 | labels = data/9k.labels 7 | names = data/9k.names 8 | top=5 9 | 10 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/inet9k.map: -------------------------------------------------------------------------------- 1 | 2687 2 | 4107 3 | 8407 4 | 7254 5 | 42 6 | 6797 7 | 127 8 | 2268 9 | 2442 10 | 3704 11 | 260 12 | 1970 13 | 58 14 | 4443 15 | 2661 16 | 2043 17 | 2039 18 | 4858 19 | 4007 20 | 6858 21 | 8408 22 | 166 23 | 2523 24 | 3768 25 | 4347 26 | 6527 27 | 2446 28 | 5005 29 | 3274 30 | 3678 31 | 4918 32 | 709 33 | 4072 34 | 8428 35 | 7223 36 | 2251 37 | 3802 38 | 3848 39 | 7271 40 | 2677 41 | 8267 42 | 2849 43 | 2518 44 | 2738 45 | 3746 46 | 5105 47 | 3430 48 | 3503 49 | 2249 50 | 1841 51 | 2032 52 | 2358 53 | 122 54 | 3984 55 | 4865 56 | 3246 57 | 5095 58 | 6912 59 | 6878 60 | 8467 61 | 2741 62 | 1973 63 | 3057 64 | 7217 65 | 1872 66 | 44 67 | 2452 68 | 3637 69 | 2704 70 | 6917 71 | 2715 72 | 6734 73 | 2325 74 | 6864 75 | 6677 76 | 2035 77 | 1949 78 | 338 79 | 2664 80 | 5122 81 | 1844 82 | 784 83 | 2223 84 | 7188 85 | 2719 86 | 2670 87 | 4830 88 | 158 89 | 4818 90 | 7228 91 | 1965 92 | 7342 93 | 786 94 | 2095 95 | 8281 96 | 8258 97 | 7406 98 | 3915 99 | 8382 100 | 2437 101 | 2837 102 | 82 103 | 6871 104 | 1876 105 | 7447 106 | 8285 107 | 5007 108 | 2740 109 | 3463 110 | 5103 111 | 3755 112 | 4910 113 | 6809 114 | 3800 115 | 118 116 | 3396 117 | 3092 118 | 2709 119 | 81 120 | 7105 121 | 4036 122 | 2366 123 | 1846 124 | 5177 125 | 2684 126 | 64 127 | 2041 128 | 3919 129 | 700 130 | 3724 131 | 1742 132 | 39 133 | 807 134 | 7184 135 | 2256 136 | 235 137 | 2778 138 | 2996 139 | 2030 140 | 3714 141 | 7167 142 | 2369 143 | 6705 144 | 6861 145 | 5096 146 | 2597 147 | 2166 148 | 2036 149 | 3228 150 | 3747 151 | 2711 152 | 8300 153 | 2226 154 | 7153 155 | 7255 156 | 2631 157 | 7109 158 | 8242 159 | 7445 160 | 3776 161 | 3803 162 | 3690 163 | 2025 164 | 2521 165 | 2316 166 | 7190 167 | 8249 168 | 3352 169 | 2639 170 | 2887 171 | 100 172 | 4219 173 | 3344 174 | 5008 175 | 7224 176 | 3351 177 | 2434 178 | 2074 179 | 2034 180 | 8304 181 | 5004 182 | 6868 183 | 5102 184 | 2645 185 | 4071 186 | 2716 187 | 2717 188 | 7420 189 | 3499 190 | 3763 191 | 5084 192 | 2676 193 | 2046 194 | 5107 195 | 5097 196 | 3944 197 | 4097 198 | 7132 199 | 3956 200 | 7343 201 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/jnet-conv.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=10 5 | width=10 6 | channels=3 7 | learning_rate=0.01 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | [convolutional] 12 | filters=32 13 | size=3 14 | stride=1 15 | pad=1 16 | activation=leaky 17 | 18 | [convolutional] 19 | filters=32 20 | size=3 21 | stride=1 22 | pad=1 23 | activation=leaky 24 | 25 | [maxpool] 26 | stride=2 27 | size=2 28 | 29 | [convolutional] 30 | filters=64 31 | size=3 32 | stride=1 33 | pad=1 34 | activation=leaky 35 | 36 | [convolutional] 37 | filters=64 38 | size=3 39 | stride=1 40 | pad=1 41 | activation=leaky 42 | 43 | [maxpool] 44 | stride=2 45 | size=2 46 | 47 | [convolutional] 48 | filters=128 49 | size=3 50 | stride=1 51 | pad=1 52 | activation=leaky 53 | 54 | [convolutional] 55 | filters=128 56 | size=3 57 | stride=1 58 | pad=1 59 | activation=leaky 60 | 61 | [maxpool] 62 | stride=2 63 | size=2 64 | 65 | [convolutional] 66 | filters=256 67 | size=3 68 | stride=1 69 | pad=1 70 | activation=leaky 71 | 72 | [convolutional] 73 | filters=256 74 | size=3 75 | stride=1 76 | pad=1 77 | activation=leaky 78 | 79 | [maxpool] 80 | stride=2 81 | size=2 82 | 83 | [convolutional] 84 | filters=512 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [convolutional] 91 | filters=512 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [maxpool] 98 | stride=2 99 | size=2 100 | 101 | [convolutional] 102 | filters=1024 103 | size=3 104 | stride=1 105 | pad=1 106 | activation=leaky 107 | 108 | [convolutional] 109 | filters=1024 110 | size=3 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [maxpool] 116 | size=2 117 | stride=2 118 | 119 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/lstm.train.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | subdivisions=8 3 | inputs=256 4 | batch = 128 5 | momentum=0.9 6 | decay=0.001 7 | max_batches = 2000 8 | time_steps=576 9 | learning_rate=0.5 10 | policy=steps 11 | burn_in=10 12 | steps=1000,1500 13 | scales=.1,.1 14 | 15 | [lstm] 16 | batch_normalize=1 17 | output = 1024 18 | 19 | [lstm] 20 | batch_normalize=1 21 | output = 1024 22 | 23 | [lstm] 24 | batch_normalize=1 25 | output = 1024 26 | 27 | [connected] 28 | output=256 29 | activation=leaky 30 | 31 | [softmax] 32 | 33 | [cost] 34 | type=sse 35 | 36 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/openimages.data: -------------------------------------------------------------------------------- 1 | classes= 601 2 | train = /home/pjreddie/data/openimsv4/openimages.train.list 3 | #valid = coco_testdev 4 | valid = data/coco_val_5k.list 5 | names = data/openimages.names 6 | backup = /home/pjreddie/backup/ 7 | eval=coco 8 | 9 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/rnn.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | subdivisions=1 3 | inputs=256 4 | batch = 1 5 | momentum=0.9 6 | decay=0.001 7 | max_batches = 2000 8 | time_steps=1 9 | learning_rate=0.1 10 | policy=steps 11 | steps=1000,1500 12 | scales=.1,.1 13 | 14 | [rnn] 15 | batch_normalize=1 16 | output = 1024 17 | hidden=1024 18 | activation=leaky 19 | 20 | [rnn] 21 | batch_normalize=1 22 | output = 1024 23 | hidden=1024 24 | activation=leaky 25 | 26 | [rnn] 27 | batch_normalize=1 28 | output = 1024 29 | hidden=1024 30 | activation=leaky 31 | 32 | [connected] 33 | output=256 34 | activation=leaky 35 | 36 | [softmax] 37 | 38 | [cost] 39 | type=sse 40 | 41 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/rnn.train.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | subdivisions=8 3 | inputs=256 4 | batch = 128 5 | momentum=0.9 6 | decay=0.001 7 | max_batches = 2000 8 | time_steps=576 9 | learning_rate=0.1 10 | policy=steps 11 | steps=1000,1500 12 | scales=.1,.1 13 | 14 | [rnn] 15 | batch_normalize=1 16 | output = 1024 17 | hidden=1024 18 | activation=leaky 19 | 20 | [rnn] 21 | batch_normalize=1 22 | output = 1024 23 | hidden=1024 24 | activation=leaky 25 | 26 | [rnn] 27 | batch_normalize=1 28 | output = 1024 29 | hidden=1024 30 | activation=leaky 31 | 32 | [connected] 33 | output=256 34 | activation=leaky 35 | 36 | [softmax] 37 | 38 | [cost] 39 | type=sse 40 | 41 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/strided.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=4 4 | height=256 5 | width=256 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.01 11 | policy=steps 12 | scales=.1,.1,.1 13 | steps=200000,300000,400000 14 | max_batches=800000 15 | 16 | 17 | [crop] 18 | crop_height=224 19 | crop_width=224 20 | flip=1 21 | angle=0 22 | saturation=1 23 | exposure=1 24 | shift=.2 25 | 26 | [convolutional] 27 | filters=64 28 | size=7 29 | stride=2 30 | pad=1 31 | activation=ramp 32 | 33 | [convolutional] 34 | filters=192 35 | size=3 36 | stride=2 37 | pad=1 38 | activation=ramp 39 | 40 | [convolutional] 41 | filters=128 42 | size=1 43 | stride=1 44 | pad=1 45 | activation=ramp 46 | 47 | [convolutional] 48 | filters=256 49 | size=3 50 | stride=2 51 | pad=1 52 | activation=ramp 53 | 54 | [convolutional] 55 | filters=128 56 | size=1 57 | stride=1 58 | pad=1 59 | activation=ramp 60 | 61 | [convolutional] 62 | filters=256 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=ramp 67 | 68 | [convolutional] 69 | filters=128 70 | size=1 71 | stride=1 72 | pad=1 73 | activation=ramp 74 | 75 | [convolutional] 76 | filters=512 77 | size=3 78 | stride=2 79 | pad=1 80 | activation=ramp 81 | 82 | [convolutional] 83 | filters=256 84 | size=1 85 | stride=1 86 | pad=1 87 | activation=ramp 88 | 89 | [convolutional] 90 | filters=512 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=ramp 95 | 96 | [convolutional] 97 | filters=256 98 | size=1 99 | stride=1 100 | pad=1 101 | activation=ramp 102 | 103 | [convolutional] 104 | filters=512 105 | size=3 106 | stride=1 107 | pad=1 108 | activation=ramp 109 | 110 | [convolutional] 111 | filters=256 112 | size=1 113 | stride=1 114 | pad=1 115 | activation=ramp 116 | 117 | [convolutional] 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=ramp 123 | 124 | [convolutional] 125 | filters=256 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=ramp 130 | 131 | [convolutional] 132 | filters=512 133 | size=3 134 | stride=1 135 | pad=1 136 | activation=ramp 137 | 138 | [convolutional] 139 | filters=256 140 | size=1 141 | stride=1 142 | pad=1 143 | activation=ramp 144 | 145 | [convolutional] 146 | filters=1024 147 | size=3 148 | stride=2 149 | pad=1 150 | activation=ramp 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=ramp 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=ramp 165 | 166 | [maxpool] 167 | size=3 168 | stride=2 169 | 170 | [connected] 171 | output=4096 172 | activation=ramp 173 | 174 | [dropout] 175 | probability=0.5 176 | 177 | [connected] 178 | output=1000 179 | activation=ramp 180 | 181 | [softmax] 182 | 183 | [cost] 184 | type=sse 185 | 186 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/t1.test.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=224 5 | width=224 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0005 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [convolutional] 17 | filters=16 18 | size=3 19 | stride=1 20 | pad=1 21 | activation=leaky 22 | 23 | [maxpool] 24 | size=2 25 | stride=2 26 | 27 | [convolutional] 28 | filters=32 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [maxpool] 35 | size=2 36 | stride=2 37 | 38 | [convolutional] 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | filters=128 51 | size=3 52 | stride=1 53 | pad=1 54 | activation=leaky 55 | 56 | [maxpool] 57 | size=2 58 | stride=2 59 | 60 | [convolutional] 61 | filters=256 62 | size=3 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [maxpool] 68 | size=2 69 | stride=2 70 | 71 | [convolutional] 72 | filters=512 73 | size=3 74 | stride=1 75 | pad=1 76 | activation=leaky 77 | 78 | [convolutional] 79 | filters=1024 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=1024 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=3 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [connected] 100 | output= 1470 101 | activation=linear 102 | 103 | [detection] 104 | classes=20 105 | coords=4 106 | rescore=1 107 | side=7 108 | num=2 109 | softmax=0 110 | sqrt=1 111 | jitter=.2 112 | 113 | object_scale=1 114 | noobject_scale=.5 115 | class_scale=1 116 | coord_scale=5 117 | 118 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/tiny-yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 40200 16 | policy=steps 17 | steps=-1,100,20000,30000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=125 115 | activation=linear 116 | 117 | [region] 118 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 119 | bias_match=1 120 | classes=20 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh = .6 134 | random=1 135 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/tiny-yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 120000 16 | policy=steps 17 | steps=-1,100,80000,100000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=425 115 | activation=linear 116 | 117 | [region] 118 | anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741 119 | bias_match=1 120 | classes=80 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh = .6 134 | random=1 135 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/tiny-yolo_xnor.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 40200 16 | policy=steps 17 | steps=-1,100,20000,30000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | #xnor=1 22 | batch_normalize=1 23 | filters=16 24 | size=3 25 | stride=1 26 | pad=1 27 | activation=leaky 28 | 29 | [maxpool] 30 | size=2 31 | stride=2 32 | 33 | [convolutional] 34 | xnor=1 35 | bin_output=1 36 | batch_normalize=1 37 | filters=32 38 | size=3 39 | stride=1 40 | pad=1 41 | activation=leaky 42 | 43 | [maxpool] 44 | size=2 45 | stride=2 46 | 47 | [convolutional] 48 | xnor=1 49 | bin_output=1 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | xnor=1 63 | bin_output=1 64 | batch_normalize=1 65 | filters=128 66 | size=3 67 | stride=1 68 | pad=1 69 | activation=leaky 70 | 71 | [maxpool] 72 | size=2 73 | stride=2 74 | 75 | [convolutional] 76 | xnor=1 77 | bin_output=1 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [maxpool] 86 | size=2 87 | stride=2 88 | 89 | [convolutional] 90 | xnor=1 91 | bin_output=1 92 | batch_normalize=1 93 | filters=512 94 | size=3 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [maxpool] 100 | size=2 101 | stride=1 102 | 103 | [convolutional] 104 | xnor=1 105 | bin_output=1 106 | batch_normalize=1 107 | filters=1024 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | ########### 114 | 115 | [convolutional] 116 | xnor=1 117 | batch_normalize=1 118 | size=3 119 | stride=1 120 | pad=1 121 | filters=1024 122 | activation=leaky 123 | 124 | [convolutional] 125 | size=1 126 | stride=1 127 | pad=1 128 | filters=425 129 | activation=linear 130 | 131 | [region] 132 | anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741 133 | bias_match=1 134 | classes=80 135 | coords=4 136 | num=5 137 | softmax=1 138 | jitter=.2 139 | rescore=1 140 | 141 | object_scale=5 142 | noobject_scale=1 143 | class_scale=1 144 | coord_scale=1 145 | 146 | absolute=1 147 | thresh = .6 148 | random=1 149 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=224 5 | width=224 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | max_crop=320 10 | 11 | learning_rate=0.1 12 | policy=poly 13 | power=4 14 | max_batches=1600000 15 | 16 | angle=7 17 | hue=.1 18 | saturation=.75 19 | exposure=.75 20 | aspect=.75 21 | 22 | [convolutional] 23 | batch_normalize=1 24 | filters=16 25 | size=3 26 | stride=1 27 | pad=1 28 | activation=leaky 29 | 30 | [maxpool] 31 | size=2 32 | stride=2 33 | 34 | [convolutional] 35 | batch_normalize=1 36 | filters=32 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | batch_normalize=1 48 | filters=16 49 | size=1 50 | stride=1 51 | pad=1 52 | activation=leaky 53 | 54 | [convolutional] 55 | batch_normalize=1 56 | filters=128 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | [convolutional] 63 | batch_normalize=1 64 | filters=16 65 | size=1 66 | stride=1 67 | pad=1 68 | activation=leaky 69 | 70 | [convolutional] 71 | batch_normalize=1 72 | filters=128 73 | size=3 74 | stride=1 75 | pad=1 76 | activation=leaky 77 | 78 | [maxpool] 79 | size=2 80 | stride=2 81 | 82 | [convolutional] 83 | batch_normalize=1 84 | filters=32 85 | size=1 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [convolutional] 91 | batch_normalize=1 92 | filters=256 93 | size=3 94 | stride=1 95 | pad=1 96 | activation=leaky 97 | 98 | [convolutional] 99 | batch_normalize=1 100 | filters=32 101 | size=1 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | batch_normalize=1 108 | filters=256 109 | size=3 110 | stride=1 111 | pad=1 112 | activation=leaky 113 | 114 | [maxpool] 115 | size=2 116 | stride=2 117 | 118 | [convolutional] 119 | batch_normalize=1 120 | filters=64 121 | size=1 122 | stride=1 123 | pad=1 124 | activation=leaky 125 | 126 | [convolutional] 127 | batch_normalize=1 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | batch_normalize=1 136 | filters=64 137 | size=1 138 | stride=1 139 | pad=1 140 | activation=leaky 141 | 142 | [convolutional] 143 | batch_normalize=1 144 | filters=512 145 | size=3 146 | stride=1 147 | pad=1 148 | activation=leaky 149 | 150 | [convolutional] 151 | batch_normalize=1 152 | filters=128 153 | size=1 154 | stride=1 155 | pad=1 156 | activation=leaky 157 | 158 | [convolutional] 159 | filters=1000 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=linear 164 | 165 | [avgpool] 166 | 167 | [softmax] 168 | groups=1 169 | 170 | [cost] 171 | type=sse 172 | 173 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/vgg-16.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=4 4 | height=256 5 | width=256 6 | channels=3 7 | learning_rate=0.00001 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | [crop] 12 | crop_height=224 13 | crop_width=224 14 | flip=1 15 | exposure=1 16 | saturation=1 17 | angle=0 18 | 19 | [convolutional] 20 | filters=64 21 | size=3 22 | stride=1 23 | pad=1 24 | activation=relu 25 | 26 | [convolutional] 27 | filters=64 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=relu 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | filters=128 39 | size=3 40 | stride=1 41 | pad=1 42 | activation=relu 43 | 44 | [convolutional] 45 | filters=128 46 | size=3 47 | stride=1 48 | pad=1 49 | activation=relu 50 | 51 | [maxpool] 52 | size=2 53 | stride=2 54 | 55 | [convolutional] 56 | filters=256 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=relu 61 | 62 | [convolutional] 63 | filters=256 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=relu 68 | 69 | [convolutional] 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=relu 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | filters=512 82 | size=3 83 | stride=1 84 | pad=1 85 | activation=relu 86 | 87 | [convolutional] 88 | filters=512 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=relu 93 | 94 | [convolutional] 95 | filters=512 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=relu 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | filters=512 107 | size=3 108 | stride=1 109 | pad=1 110 | activation=relu 111 | 112 | [convolutional] 113 | filters=512 114 | size=3 115 | stride=1 116 | pad=1 117 | activation=relu 118 | 119 | [convolutional] 120 | filters=512 121 | size=3 122 | stride=1 123 | pad=1 124 | activation=relu 125 | 126 | [maxpool] 127 | size=2 128 | stride=2 129 | 130 | [connected] 131 | output=4096 132 | activation=relu 133 | 134 | [dropout] 135 | probability=.5 136 | 137 | [connected] 138 | output=4096 139 | activation=relu 140 | 141 | [dropout] 142 | probability=.5 143 | 144 | [connected] 145 | output=1000 146 | activation=linear 147 | 148 | [softmax] 149 | groups=1 150 | 151 | [cost] 152 | type=sse 153 | 154 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/vgg-conv.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | width=224 5 | height=224 6 | channels=3 7 | learning_rate=0.00001 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | [convolutional] 12 | filters=64 13 | size=3 14 | stride=1 15 | pad=1 16 | activation=relu 17 | 18 | [convolutional] 19 | filters=64 20 | size=3 21 | stride=1 22 | pad=1 23 | activation=relu 24 | 25 | [maxpool] 26 | size=2 27 | stride=2 28 | 29 | [convolutional] 30 | filters=128 31 | size=3 32 | stride=1 33 | pad=1 34 | activation=relu 35 | 36 | [convolutional] 37 | filters=128 38 | size=3 39 | stride=1 40 | pad=1 41 | activation=relu 42 | 43 | [maxpool] 44 | size=2 45 | stride=2 46 | 47 | [convolutional] 48 | filters=256 49 | size=3 50 | stride=1 51 | pad=1 52 | activation=relu 53 | 54 | [convolutional] 55 | filters=256 56 | size=3 57 | stride=1 58 | pad=1 59 | activation=relu 60 | 61 | [convolutional] 62 | filters=256 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=relu 67 | 68 | [maxpool] 69 | size=2 70 | stride=2 71 | 72 | [convolutional] 73 | filters=512 74 | size=3 75 | stride=1 76 | pad=1 77 | activation=relu 78 | 79 | [convolutional] 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=relu 85 | 86 | [convolutional] 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=relu 92 | 93 | [maxpool] 94 | size=2 95 | stride=2 96 | 97 | [convolutional] 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=relu 103 | 104 | [convolutional] 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=relu 110 | 111 | [convolutional] 112 | filters=512 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=relu 117 | 118 | [maxpool] 119 | size=2 120 | stride=2 121 | 122 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/voc.data: -------------------------------------------------------------------------------- 1 | classes= 20 2 | train = /home/pjreddie/data/voc/train.txt 3 | valid = /home/pjreddie/data/voc/2007_test.txt 4 | names = data/voc.names 5 | backup = /home/pjreddie/backup/ 6 | 7 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/writing.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=2 4 | height=256 5 | width=256 6 | channels=3 7 | learning_rate=0.00000001 8 | momentum=0.9 9 | decay=0.0005 10 | seen=0 11 | 12 | [convolutional] 13 | filters=32 14 | size=3 15 | stride=1 16 | pad=1 17 | activation=leaky 18 | 19 | [convolutional] 20 | filters=32 21 | size=3 22 | stride=1 23 | pad=1 24 | activation=leaky 25 | 26 | [convolutional] 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [convolutional] 34 | filters=1 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=logistic 39 | 40 | [cost] 41 | 42 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolo-voc.2.0.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | height=416 5 | width=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.0001 15 | max_batches = 45000 16 | policy=steps 17 | steps=100,25000,35000 18 | scales=10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=32 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=64 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=128 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=64 55 | size=1 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=128 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [maxpool] 69 | size=2 70 | stride=2 71 | 72 | [convolutional] 73 | batch_normalize=1 74 | filters=256 75 | size=3 76 | stride=1 77 | pad=1 78 | activation=leaky 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=128 83 | size=1 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [convolutional] 89 | batch_normalize=1 90 | filters=256 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=leaky 95 | 96 | [maxpool] 97 | size=2 98 | stride=2 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | filters=512 103 | size=3 104 | stride=1 105 | pad=1 106 | activation=leaky 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | batch_normalize=1 126 | filters=256 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | batch_normalize=1 134 | filters=512 135 | size=3 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [maxpool] 141 | size=2 142 | stride=2 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=1024 147 | size=3 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=512 155 | size=1 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=1024 163 | size=3 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | batch_normalize=1 170 | filters=512 171 | size=1 172 | stride=1 173 | pad=1 174 | activation=leaky 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=1024 179 | size=3 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | 185 | ####### 186 | 187 | [convolutional] 188 | batch_normalize=1 189 | size=3 190 | stride=1 191 | pad=1 192 | filters=1024 193 | activation=leaky 194 | 195 | [convolutional] 196 | batch_normalize=1 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [route] 204 | layers=-9 205 | 206 | [reorg] 207 | stride=2 208 | 209 | [route] 210 | layers=-1,-3 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | size=3 215 | stride=1 216 | pad=1 217 | filters=1024 218 | activation=leaky 219 | 220 | [convolutional] 221 | size=1 222 | stride=1 223 | pad=1 224 | filters=125 225 | activation=linear 226 | 227 | [region] 228 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 229 | bias_match=1 230 | classes=20 231 | coords=4 232 | num=5 233 | softmax=1 234 | jitter=.2 235 | rescore=1 236 | 237 | object_scale=5 238 | noobject_scale=1 239 | class_scale=1 240 | coord_scale=1 241 | 242 | absolute=1 243 | thresh = .6 244 | random=0 245 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | height=416 9 | width=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 80200 21 | policy=steps 22 | steps=40000,60000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=125 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 243 | bias_match=1 244 | classes=20 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolo.2.0.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 120000 16 | policy=steps 17 | steps=-1,100,80000,100000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=32 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=64 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=128 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=64 55 | size=1 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=128 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [maxpool] 69 | size=2 70 | stride=2 71 | 72 | [convolutional] 73 | batch_normalize=1 74 | filters=256 75 | size=3 76 | stride=1 77 | pad=1 78 | activation=leaky 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=128 83 | size=1 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [convolutional] 89 | batch_normalize=1 90 | filters=256 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=leaky 95 | 96 | [maxpool] 97 | size=2 98 | stride=2 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | filters=512 103 | size=3 104 | stride=1 105 | pad=1 106 | activation=leaky 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | batch_normalize=1 126 | filters=256 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | batch_normalize=1 134 | filters=512 135 | size=3 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [maxpool] 141 | size=2 142 | stride=2 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=1024 147 | size=3 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=512 155 | size=1 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=1024 163 | size=3 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | batch_normalize=1 170 | filters=512 171 | size=1 172 | stride=1 173 | pad=1 174 | activation=leaky 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=1024 179 | size=3 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | 185 | ####### 186 | 187 | [convolutional] 188 | batch_normalize=1 189 | size=3 190 | stride=1 191 | pad=1 192 | filters=1024 193 | activation=leaky 194 | 195 | [convolutional] 196 | batch_normalize=1 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [route] 204 | layers=-9 205 | 206 | [reorg] 207 | stride=2 208 | 209 | [route] 210 | layers=-1,-3 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | size=3 215 | stride=1 216 | pad=1 217 | filters=1024 218 | activation=leaky 219 | 220 | [convolutional] 221 | size=1 222 | stride=1 223 | pad=1 224 | filters=425 225 | activation=linear 226 | 227 | [region] 228 | anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741 229 | bias_match=1 230 | classes=80 231 | coords=4 232 | num=5 233 | softmax=1 234 | jitter=.2 235 | rescore=1 236 | 237 | object_scale=5 238 | noobject_scale=1 239 | class_scale=1 240 | coord_scale=1 241 | 242 | absolute=1 243 | thresh = .6 244 | random=0 245 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | height=416 9 | width=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=425 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 243 | bias_match=1 244 | classes=80 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolo9000.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | batch=1 9 | subdivisions=1 10 | height=544 11 | width=544 12 | channels=3 13 | momentum=0.9 14 | decay=0.0005 15 | 16 | learning_rate=0.001 17 | burn_in=1000 18 | max_batches = 500200 19 | policy=steps 20 | steps=400000,450000 21 | scales=.1,.1 22 | 23 | hue=.1 24 | saturation=.75 25 | exposure=.75 26 | 27 | [convolutional] 28 | batch_normalize=1 29 | filters=32 30 | size=3 31 | stride=1 32 | pad=1 33 | activation=leaky 34 | 35 | [maxpool] 36 | size=2 37 | stride=2 38 | 39 | [convolutional] 40 | batch_normalize=1 41 | filters=64 42 | size=3 43 | stride=1 44 | pad=1 45 | activation=leaky 46 | 47 | [maxpool] 48 | size=2 49 | stride=2 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=128 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=64 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=128 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | batch_normalize=1 81 | filters=256 82 | size=3 83 | stride=1 84 | pad=1 85 | activation=leaky 86 | 87 | [convolutional] 88 | batch_normalize=1 89 | filters=128 90 | size=1 91 | stride=1 92 | pad=1 93 | activation=leaky 94 | 95 | [convolutional] 96 | batch_normalize=1 97 | filters=256 98 | size=3 99 | stride=1 100 | pad=1 101 | activation=leaky 102 | 103 | [maxpool] 104 | size=2 105 | stride=2 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=512 110 | size=3 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=1 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=512 126 | size=3 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=1 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [convolutional] 140 | batch_normalize=1 141 | filters=512 142 | size=3 143 | stride=1 144 | pad=1 145 | activation=leaky 146 | 147 | [maxpool] 148 | size=2 149 | stride=2 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=1024 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=512 162 | size=1 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | batch_normalize=1 169 | filters=1024 170 | size=3 171 | stride=1 172 | pad=1 173 | activation=leaky 174 | 175 | [convolutional] 176 | batch_normalize=1 177 | filters=512 178 | size=1 179 | stride=1 180 | pad=1 181 | activation=leaky 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=1024 186 | size=3 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | filters=28269 193 | size=1 194 | stride=1 195 | pad=1 196 | activation=linear 197 | 198 | [region] 199 | anchors = 0.77871, 1.14074, 3.00525, 4.31277, 9.22725, 9.61974 200 | bias_match=1 201 | classes=9418 202 | coords=4 203 | num=3 204 | softmax=1 205 | jitter=.2 206 | rescore=1 207 | 208 | object_scale=5 209 | noobject_scale=1 210 | class_scale=1 211 | coord_scale=1 212 | 213 | thresh = .6 214 | absolute=1 215 | random=1 216 | 217 | tree=data/9k.tree 218 | map = data/coco9k.map 219 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov1/tiny-coco.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | hue = .1 11 | saturation=.75 12 | exposure=.75 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,100000,150000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 200000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [connected] 109 | output= 4655 110 | activation=linear 111 | 112 | [detection] 113 | classes=80 114 | coords=4 115 | rescore=1 116 | side=7 117 | num=3 118 | softmax=0 119 | sqrt=1 120 | jitter=.2 121 | 122 | object_scale=1 123 | noobject_scale=.5 124 | class_scale=1 125 | coord_scale=5 126 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov1/tiny-yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | saturation=.75 11 | exposure=.75 12 | hue = .1 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,20000,30000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 40000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [connected] 109 | output= 1470 110 | activation=linear 111 | 112 | [detection] 113 | classes=20 114 | coords=4 115 | rescore=1 116 | side=7 117 | num=2 118 | softmax=0 119 | sqrt=1 120 | jitter=.2 121 | 122 | object_scale=1 123 | noobject_scale=.5 124 | class_scale=1 125 | coord_scale=5 126 | 127 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov1/xyolo.test.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [convolutional] 17 | batch_normalize=1 18 | filters=16 19 | size=3 20 | stride=1 21 | pad=1 22 | activation=leaky 23 | 24 | [maxpool] 25 | size=2 26 | stride=2 27 | 28 | [batchnorm] 29 | 30 | [convolutional] 31 | xnor = 1 32 | batch_normalize=1 33 | filters=32 34 | size=3 35 | stride=1 36 | pad=1 37 | activation=leaky 38 | 39 | [maxpool] 40 | size=2 41 | stride=2 42 | 43 | [batchnorm] 44 | 45 | [convolutional] 46 | xnor = 1 47 | batch_normalize=1 48 | filters=64 49 | size=3 50 | stride=1 51 | pad=1 52 | activation=leaky 53 | 54 | [maxpool] 55 | size=2 56 | stride=2 57 | 58 | [batchnorm] 59 | 60 | [convolutional] 61 | xnor = 1 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [batchnorm] 74 | 75 | [convolutional] 76 | xnor = 1 77 | batch_normalize=1 78 | filters=256 79 | size=3 80 | stride=1 81 | pad=1 82 | activation=leaky 83 | 84 | [maxpool] 85 | size=2 86 | stride=2 87 | 88 | [batchnorm] 89 | 90 | [convolutional] 91 | xnor = 1 92 | batch_normalize=1 93 | filters=512 94 | size=3 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [maxpool] 100 | size=2 101 | stride=2 102 | 103 | [batchnorm] 104 | 105 | [convolutional] 106 | xnor = 1 107 | batch_normalize=1 108 | filters=1024 109 | size=3 110 | stride=1 111 | pad=1 112 | activation=leaky 113 | 114 | [batchnorm] 115 | 116 | [convolutional] 117 | xnor = 1 118 | batch_normalize=1 119 | filters=256 120 | size=3 121 | stride=1 122 | pad=1 123 | activation=leaky 124 | 125 | [connected] 126 | output= 1470 127 | activation=linear 128 | 129 | [detection] 130 | classes=20 131 | coords=4 132 | rescore=1 133 | side=7 134 | num=2 135 | softmax=0 136 | sqrt=1 137 | jitter=.2 138 | 139 | object_scale=1 140 | noobject_scale=.5 141 | class_scale=1 142 | coord_scale=5 143 | 144 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov1/yolo-coco.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=4 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | hue = .1 11 | saturation=.75 12 | exposure=.75 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,100000,150000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 200000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=64 23 | size=7 24 | stride=2 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=192 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=128 47 | size=1 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=256 63 | size=1 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=512 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=256 83 | size=1 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [convolutional] 89 | batch_normalize=1 90 | filters=512 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=leaky 95 | 96 | [convolutional] 97 | batch_normalize=1 98 | filters=256 99 | size=1 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | batch_normalize=1 106 | filters=512 107 | size=3 108 | stride=1 109 | pad=1 110 | activation=leaky 111 | 112 | [convolutional] 113 | batch_normalize=1 114 | filters=256 115 | size=1 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | batch_normalize=1 122 | filters=512 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [convolutional] 129 | batch_normalize=1 130 | filters=256 131 | size=1 132 | stride=1 133 | pad=1 134 | activation=leaky 135 | 136 | [convolutional] 137 | batch_normalize=1 138 | filters=512 139 | size=3 140 | stride=1 141 | pad=1 142 | activation=leaky 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=512 147 | size=1 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=1024 155 | size=3 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [maxpool] 161 | size=2 162 | stride=2 163 | 164 | [convolutional] 165 | batch_normalize=1 166 | filters=512 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | [convolutional] 173 | batch_normalize=1 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | [convolutional] 181 | batch_normalize=1 182 | filters=512 183 | size=1 184 | stride=1 185 | pad=1 186 | activation=leaky 187 | 188 | [convolutional] 189 | batch_normalize=1 190 | filters=1024 191 | size=3 192 | stride=1 193 | pad=1 194 | activation=leaky 195 | 196 | ####### 197 | 198 | [convolutional] 199 | batch_normalize=1 200 | size=3 201 | stride=1 202 | pad=1 203 | filters=1024 204 | activation=leaky 205 | 206 | [convolutional] 207 | batch_normalize=1 208 | size=3 209 | stride=2 210 | pad=1 211 | filters=1024 212 | activation=leaky 213 | 214 | [convolutional] 215 | batch_normalize=1 216 | size=3 217 | stride=1 218 | pad=1 219 | filters=1024 220 | activation=leaky 221 | 222 | [convolutional] 223 | batch_normalize=1 224 | size=3 225 | stride=1 226 | pad=1 227 | filters=1024 228 | activation=leaky 229 | 230 | [local] 231 | size=3 232 | stride=1 233 | pad=1 234 | filters=256 235 | activation=leaky 236 | 237 | [connected] 238 | output= 4655 239 | activation=linear 240 | 241 | [detection] 242 | classes=80 243 | coords=4 244 | rescore=1 245 | side=7 246 | num=3 247 | softmax=0 248 | sqrt=1 249 | jitter=.2 250 | 251 | object_scale=1 252 | noobject_scale=.5 253 | class_scale=1 254 | coord_scale=5 255 | 256 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov1/yolo-small.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.001 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=512 212 | activation=leaky 213 | 214 | [connected] 215 | output=4096 216 | activation=leaky 217 | 218 | [dropout] 219 | probability=.5 220 | 221 | [connected] 222 | output= 1470 223 | activation=linear 224 | 225 | [detection] 226 | classes=20 227 | coords=4 228 | rescore=1 229 | side=7 230 | num=2 231 | softmax=0 232 | sqrt=1 233 | jitter=.2 234 | 235 | object_scale=1 236 | noobject_scale=.5 237 | class_scale=1 238 | coord_scale=5 239 | 240 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov1/yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | saturation=1.5 10 | exposure=1.5 11 | hue=.1 12 | 13 | learning_rate=0.0005 14 | policy=steps 15 | steps=200,400,600,20000,30000 16 | scales=2.5,2,2,.1,.1 17 | max_batches = 40000 18 | 19 | [convolutional] 20 | batch_normalize=1 21 | filters=64 22 | size=7 23 | stride=2 24 | pad=1 25 | activation=leaky 26 | 27 | [maxpool] 28 | size=2 29 | stride=2 30 | 31 | [convolutional] 32 | batch_normalize=1 33 | filters=192 34 | size=3 35 | stride=1 36 | pad=1 37 | activation=leaky 38 | 39 | [maxpool] 40 | size=2 41 | stride=2 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=128 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=256 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=512 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | batch_normalize=1 81 | filters=256 82 | size=1 83 | stride=1 84 | pad=1 85 | activation=leaky 86 | 87 | [convolutional] 88 | batch_normalize=1 89 | filters=512 90 | size=3 91 | stride=1 92 | pad=1 93 | activation=leaky 94 | 95 | [convolutional] 96 | batch_normalize=1 97 | filters=256 98 | size=1 99 | stride=1 100 | pad=1 101 | activation=leaky 102 | 103 | [convolutional] 104 | batch_normalize=1 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [convolutional] 112 | batch_normalize=1 113 | filters=256 114 | size=1 115 | stride=1 116 | pad=1 117 | activation=leaky 118 | 119 | [convolutional] 120 | batch_normalize=1 121 | filters=512 122 | size=3 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | batch_normalize=1 129 | filters=256 130 | size=1 131 | stride=1 132 | pad=1 133 | activation=leaky 134 | 135 | [convolutional] 136 | batch_normalize=1 137 | filters=512 138 | size=3 139 | stride=1 140 | pad=1 141 | activation=leaky 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=512 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=1024 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [maxpool] 160 | size=2 161 | stride=2 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=512 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=1024 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [convolutional] 180 | batch_normalize=1 181 | filters=512 182 | size=1 183 | stride=1 184 | pad=1 185 | activation=leaky 186 | 187 | [convolutional] 188 | batch_normalize=1 189 | filters=1024 190 | size=3 191 | stride=1 192 | pad=1 193 | activation=leaky 194 | 195 | ####### 196 | 197 | [convolutional] 198 | batch_normalize=1 199 | size=3 200 | stride=1 201 | pad=1 202 | filters=1024 203 | activation=leaky 204 | 205 | [convolutional] 206 | batch_normalize=1 207 | size=3 208 | stride=2 209 | pad=1 210 | filters=1024 211 | activation=leaky 212 | 213 | [convolutional] 214 | batch_normalize=1 215 | size=3 216 | stride=1 217 | pad=1 218 | filters=1024 219 | activation=leaky 220 | 221 | [convolutional] 222 | batch_normalize=1 223 | size=3 224 | stride=1 225 | pad=1 226 | filters=1024 227 | activation=leaky 228 | 229 | [local] 230 | size=3 231 | stride=1 232 | pad=1 233 | filters=256 234 | activation=leaky 235 | 236 | [dropout] 237 | probability=.5 238 | 239 | [connected] 240 | output= 1715 241 | activation=linear 242 | 243 | [detection] 244 | classes=20 245 | coords=4 246 | rescore=1 247 | side=7 248 | num=3 249 | softmax=0 250 | sqrt=1 251 | jitter=.2 252 | 253 | object_scale=1 254 | noobject_scale=.5 255 | class_scale=1 256 | coord_scale=5 257 | 258 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov1/yolo.train.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=4 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | saturation=1.5 10 | exposure=1.5 11 | hue=.1 12 | 13 | learning_rate=0.0005 14 | policy=steps 15 | steps=200,400,600,20000,30000 16 | scales=2.5,2,2,.1,.1 17 | max_batches = 40000 18 | 19 | [convolutional] 20 | batch_normalize=1 21 | filters=64 22 | size=7 23 | stride=2 24 | pad=1 25 | activation=leaky 26 | 27 | [maxpool] 28 | size=2 29 | stride=2 30 | 31 | [convolutional] 32 | batch_normalize=1 33 | filters=192 34 | size=3 35 | stride=1 36 | pad=1 37 | activation=leaky 38 | 39 | [maxpool] 40 | size=2 41 | stride=2 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=128 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=256 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=512 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | batch_normalize=1 81 | filters=256 82 | size=1 83 | stride=1 84 | pad=1 85 | activation=leaky 86 | 87 | [convolutional] 88 | batch_normalize=1 89 | filters=512 90 | size=3 91 | stride=1 92 | pad=1 93 | activation=leaky 94 | 95 | [convolutional] 96 | batch_normalize=1 97 | filters=256 98 | size=1 99 | stride=1 100 | pad=1 101 | activation=leaky 102 | 103 | [convolutional] 104 | batch_normalize=1 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [convolutional] 112 | batch_normalize=1 113 | filters=256 114 | size=1 115 | stride=1 116 | pad=1 117 | activation=leaky 118 | 119 | [convolutional] 120 | batch_normalize=1 121 | filters=512 122 | size=3 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | batch_normalize=1 129 | filters=256 130 | size=1 131 | stride=1 132 | pad=1 133 | activation=leaky 134 | 135 | [convolutional] 136 | batch_normalize=1 137 | filters=512 138 | size=3 139 | stride=1 140 | pad=1 141 | activation=leaky 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=512 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=1024 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [maxpool] 160 | size=2 161 | stride=2 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=512 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=1024 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [convolutional] 180 | batch_normalize=1 181 | filters=512 182 | size=1 183 | stride=1 184 | pad=1 185 | activation=leaky 186 | 187 | [convolutional] 188 | batch_normalize=1 189 | filters=1024 190 | size=3 191 | stride=1 192 | pad=1 193 | activation=leaky 194 | 195 | ####### 196 | 197 | [convolutional] 198 | batch_normalize=1 199 | size=3 200 | stride=1 201 | pad=1 202 | filters=1024 203 | activation=leaky 204 | 205 | [convolutional] 206 | batch_normalize=1 207 | size=3 208 | stride=2 209 | pad=1 210 | filters=1024 211 | activation=leaky 212 | 213 | [convolutional] 214 | batch_normalize=1 215 | size=3 216 | stride=1 217 | pad=1 218 | filters=1024 219 | activation=leaky 220 | 221 | [convolutional] 222 | batch_normalize=1 223 | size=3 224 | stride=1 225 | pad=1 226 | filters=1024 227 | activation=leaky 228 | 229 | [local] 230 | size=3 231 | stride=1 232 | pad=1 233 | filters=256 234 | activation=leaky 235 | 236 | [dropout] 237 | probability=.5 238 | 239 | [connected] 240 | output= 1715 241 | activation=linear 242 | 243 | [detection] 244 | classes=20 245 | coords=4 246 | rescore=1 247 | side=7 248 | num=3 249 | softmax=0 250 | sqrt=1 251 | jitter=.2 252 | 253 | object_scale=1 254 | noobject_scale=.5 255 | class_scale=1 256 | coord_scale=5 257 | 258 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov1/yolo2.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0005 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [convolutional] 17 | batch_normalize=1 18 | filters=64 19 | size=7 20 | stride=2 21 | pad=1 22 | activation=leaky 23 | 24 | [maxpool] 25 | size=2 26 | stride=2 27 | 28 | [convolutional] 29 | batch_normalize=1 30 | filters=192 31 | size=3 32 | stride=1 33 | pad=1 34 | activation=leaky 35 | 36 | [maxpool] 37 | size=2 38 | stride=2 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=128 43 | size=1 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=256 51 | size=3 52 | stride=1 53 | pad=1 54 | activation=leaky 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=256 59 | size=1 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [convolutional] 65 | batch_normalize=1 66 | filters=512 67 | size=3 68 | stride=1 69 | pad=1 70 | activation=leaky 71 | 72 | [maxpool] 73 | size=2 74 | stride=2 75 | 76 | [convolutional] 77 | batch_normalize=1 78 | filters=256 79 | size=1 80 | stride=1 81 | pad=1 82 | activation=leaky 83 | 84 | [convolutional] 85 | batch_normalize=1 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=256 95 | size=1 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | filters=512 103 | size=3 104 | stride=1 105 | pad=1 106 | activation=leaky 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | batch_normalize=1 126 | filters=256 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | batch_normalize=1 134 | filters=512 135 | size=3 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [convolutional] 141 | batch_normalize=1 142 | filters=512 143 | size=1 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [convolutional] 149 | batch_normalize=1 150 | filters=1024 151 | size=3 152 | stride=1 153 | pad=1 154 | activation=leaky 155 | 156 | [maxpool] 157 | size=2 158 | stride=2 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=512 163 | size=1 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | batch_normalize=1 170 | filters=1024 171 | size=3 172 | stride=1 173 | pad=1 174 | activation=leaky 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=512 179 | size=1 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | [convolutional] 185 | batch_normalize=1 186 | filters=1024 187 | size=3 188 | stride=1 189 | pad=1 190 | activation=leaky 191 | 192 | ####### 193 | 194 | [convolutional] 195 | batch_normalize=1 196 | size=3 197 | stride=1 198 | pad=1 199 | filters=1024 200 | activation=leaky 201 | 202 | [convolutional] 203 | batch_normalize=1 204 | size=3 205 | stride=2 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [convolutional] 211 | batch_normalize=1 212 | size=3 213 | stride=1 214 | pad=1 215 | filters=1024 216 | activation=leaky 217 | 218 | [convolutional] 219 | batch_normalize=1 220 | size=3 221 | stride=1 222 | pad=1 223 | filters=1024 224 | activation=leaky 225 | 226 | [local] 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=256 231 | activation=leaky 232 | 233 | [connected] 234 | output= 1715 235 | activation=linear 236 | 237 | [detection] 238 | classes=20 239 | coords=4 240 | rescore=1 241 | side=7 242 | num=3 243 | softmax=0 244 | sqrt=1 245 | jitter=.2 246 | 247 | object_scale=1 248 | noobject_scale=.5 249 | class_scale=1 250 | coord_scale=5 251 | 252 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov2-tiny-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | max_batches = 40200 20 | policy=steps 21 | steps=-1,100,20000,30000 22 | scales=.1,10,.1,.1 23 | 24 | [convolutional] 25 | batch_normalize=1 26 | filters=16 27 | size=3 28 | stride=1 29 | pad=1 30 | activation=leaky 31 | 32 | [maxpool] 33 | size=2 34 | stride=2 35 | 36 | [convolutional] 37 | batch_normalize=1 38 | filters=32 39 | size=3 40 | stride=1 41 | pad=1 42 | activation=leaky 43 | 44 | [maxpool] 45 | size=2 46 | stride=2 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=64 51 | size=3 52 | stride=1 53 | pad=1 54 | activation=leaky 55 | 56 | [maxpool] 57 | size=2 58 | stride=2 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=128 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [maxpool] 69 | size=2 70 | stride=2 71 | 72 | [convolutional] 73 | batch_normalize=1 74 | filters=256 75 | size=3 76 | stride=1 77 | pad=1 78 | activation=leaky 79 | 80 | [maxpool] 81 | size=2 82 | stride=2 83 | 84 | [convolutional] 85 | batch_normalize=1 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [maxpool] 93 | size=2 94 | stride=1 95 | 96 | [convolutional] 97 | batch_normalize=1 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | ########### 105 | 106 | [convolutional] 107 | batch_normalize=1 108 | size=3 109 | stride=1 110 | pad=1 111 | filters=1024 112 | activation=leaky 113 | 114 | [convolutional] 115 | size=1 116 | stride=1 117 | pad=1 118 | filters=125 119 | activation=linear 120 | 121 | [region] 122 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 123 | bias_match=1 124 | classes=20 125 | coords=4 126 | num=5 127 | softmax=1 128 | jitter=.2 129 | rescore=1 130 | 131 | object_scale=5 132 | noobject_scale=1 133 | class_scale=1 134 | coord_scale=1 135 | 136 | absolute=1 137 | thresh = .6 138 | random=1 139 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov2-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | size=3 110 | stride=1 111 | pad=1 112 | filters=512 113 | activation=leaky 114 | 115 | [convolutional] 116 | size=1 117 | stride=1 118 | pad=1 119 | filters=425 120 | activation=linear 121 | 122 | [region] 123 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 124 | bias_match=1 125 | classes=80 126 | coords=4 127 | num=5 128 | softmax=1 129 | jitter=.2 130 | rescore=0 131 | 132 | object_scale=5 133 | noobject_scale=1 134 | class_scale=1 135 | coord_scale=1 136 | 137 | absolute=1 138 | thresh = .6 139 | random=1 140 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov2-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | height=416 9 | width=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 80200 21 | policy=steps 22 | steps=40000,60000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=125 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 243 | bias_match=1 244 | classes=20 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov2.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=425 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 243 | bias_match=1 244 | classes=80 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov3-tiny-prn.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=8 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=512 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | [shortcut] 106 | activation=leaky 107 | from=-3 108 | 109 | ########### 110 | 111 | [convolutional] 112 | batch_normalize=1 113 | filters=256 114 | size=1 115 | stride=1 116 | pad=1 117 | activation=leaky 118 | 119 | [convolutional] 120 | batch_normalize=1 121 | filters=256 122 | size=3 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [shortcut] 128 | activation=leaky 129 | from=-2 130 | 131 | [convolutional] 132 | size=1 133 | stride=1 134 | pad=1 135 | filters=255 136 | activation=linear 137 | 138 | 139 | 140 | [yolo] 141 | mask = 3,4,5 142 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 143 | classes=80 144 | num=6 145 | jitter=.3 146 | ignore_thresh = .7 147 | truth_thresh = 1 148 | random=1 149 | 150 | [route] 151 | layers = -4 152 | 153 | [convolutional] 154 | batch_normalize=1 155 | filters=128 156 | size=1 157 | stride=1 158 | pad=1 159 | activation=leaky 160 | 161 | [upsample] 162 | stride=2 163 | 164 | [shortcut] 165 | activation=leaky 166 | from=8 167 | 168 | [convolutional] 169 | batch_normalize=1 170 | filters=128 171 | size=3 172 | stride=1 173 | pad=1 174 | activation=leaky 175 | 176 | [shortcut] 177 | activation=leaky 178 | from=-3 179 | 180 | [shortcut] 181 | activation=leaky 182 | from=8 183 | 184 | [convolutional] 185 | size=1 186 | stride=1 187 | pad=1 188 | filters=255 189 | activation=linear 190 | 191 | [yolo] 192 | mask = 1,2,3 193 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 194 | classes=80 195 | num=6 196 | jitter=.3 197 | ignore_thresh = .7 198 | truth_thresh = 1 199 | random=1 200 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov3-tiny_3l.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 200000 21 | policy=steps 22 | steps=180000,190000 23 | scales=.1,.1 24 | 25 | 26 | [convolutional] 27 | batch_normalize=1 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [maxpool] 35 | size=2 36 | stride=2 37 | 38 | [convolutional] 39 | batch_normalize=1 40 | filters=32 41 | size=3 42 | stride=1 43 | pad=1 44 | activation=leaky 45 | 46 | [maxpool] 47 | size=2 48 | stride=2 49 | 50 | [convolutional] 51 | batch_normalize=1 52 | filters=64 53 | size=3 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [maxpool] 59 | size=2 60 | stride=2 61 | 62 | [convolutional] 63 | batch_normalize=1 64 | filters=128 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=leaky 69 | 70 | [maxpool] 71 | size=2 72 | stride=2 73 | 74 | [convolutional] 75 | batch_normalize=1 76 | filters=256 77 | size=3 78 | stride=1 79 | pad=1 80 | activation=leaky 81 | 82 | [maxpool] 83 | size=2 84 | stride=2 85 | 86 | [convolutional] 87 | batch_normalize=1 88 | filters=512 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=leaky 93 | 94 | [maxpool] 95 | size=2 96 | stride=1 97 | 98 | [convolutional] 99 | batch_normalize=1 100 | filters=1024 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | ########### 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | size=1 126 | stride=1 127 | pad=1 128 | filters=21 129 | activation=linear 130 | 131 | 132 | 133 | [yolo] 134 | mask = 6,7,8 135 | anchors = 4,7, 7,15, 13,25, 25,42, 41,67, 75,94, 91,162, 158,205, 250,332 136 | classes=2 137 | num=9 138 | jitter=.3 139 | ignore_thresh = .7 140 | truth_thresh = 1 141 | random=1 142 | 143 | [route] 144 | layers = -4 145 | 146 | [convolutional] 147 | batch_normalize=1 148 | filters=128 149 | size=1 150 | stride=1 151 | pad=1 152 | activation=leaky 153 | 154 | [upsample] 155 | stride=2 156 | 157 | [route] 158 | layers = -1, 8 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=256 163 | size=3 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | size=1 170 | stride=1 171 | pad=1 172 | filters=21 173 | activation=linear 174 | 175 | [yolo] 176 | mask = 3,4,5 177 | anchors = 4,7, 7,15, 13,25, 25,42, 41,67, 75,94, 91,162, 158,205, 250,332 178 | classes=2 179 | num=9 180 | jitter=.3 181 | ignore_thresh = .7 182 | truth_thresh = 1 183 | random=1 184 | 185 | 186 | 187 | [route] 188 | layers = -3 189 | 190 | [convolutional] 191 | batch_normalize=1 192 | filters=128 193 | size=1 194 | stride=1 195 | pad=1 196 | activation=leaky 197 | 198 | [upsample] 199 | stride=2 200 | 201 | [route] 202 | layers = -1, 6 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=3 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=21 217 | activation=linear 218 | 219 | [yolo] 220 | mask = 0,1,2 221 | anchors = 4,7, 7,15, 13,25, 25,42, 41,67, 75,94, 91,162, 158,205, 250,332 222 | classes=2 223 | num=9 224 | jitter=.3 225 | ignore_thresh = .7 226 | truth_thresh = 1 227 | random=1 -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov3-tiny_obj.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov3-tiny_occlusion_track.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=8 7 | subdivisions=4 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | track=1 19 | time_steps=20 20 | augment_speed=3 21 | 22 | learning_rate=0.001 23 | burn_in=1000 24 | max_batches = 10000 25 | policy=steps 26 | steps=9000,9500 27 | scales=.1,.1 28 | 29 | [convolutional] 30 | batch_normalize=1 31 | filters=16 32 | size=3 33 | stride=1 34 | pad=1 35 | activation=leaky 36 | 37 | [maxpool] 38 | size=2 39 | stride=2 40 | 41 | [convolutional] 42 | batch_normalize=1 43 | filters=32 44 | size=3 45 | stride=1 46 | pad=1 47 | activation=leaky 48 | 49 | [maxpool] 50 | size=2 51 | stride=2 52 | 53 | [convolutional] 54 | batch_normalize=1 55 | filters=64 56 | size=3 57 | stride=1 58 | pad=1 59 | activation=leaky 60 | 61 | [maxpool] 62 | size=2 63 | stride=2 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [maxpool] 86 | size=2 87 | stride=2 88 | 89 | [convolutional] 90 | batch_normalize=1 91 | filters=512 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [maxpool] 98 | size=2 99 | stride=1 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=1024 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | ########### 110 | 111 | 112 | [crnn] 113 | batch_normalize=1 114 | size=3 115 | pad=1 116 | output=512 117 | hidden=256 118 | activation=leaky 119 | 120 | #[shortcut] 121 | #from=-2 122 | #activation=linear 123 | 124 | ########### 125 | 126 | [convolutional] 127 | batch_normalize=1 128 | filters=256 129 | size=1 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | batch_normalize=1 136 | filters=512 137 | size=3 138 | stride=1 139 | pad=1 140 | activation=leaky 141 | 142 | [convolutional] 143 | batch_normalize=1 144 | filters=512 145 | size=3 146 | stride=1 147 | pad=1 148 | activation=leaky 149 | 150 | [convolutional] 151 | size=1 152 | stride=1 153 | pad=1 154 | filters=18 155 | activation=linear 156 | 157 | 158 | 159 | [yolo] 160 | mask = 3,4,5 161 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 162 | classes=1 163 | num=6 164 | jitter=.3 165 | ignore_thresh = .7 166 | truth_thresh = 1 167 | random=0 168 | 169 | [route] 170 | layers = -4 171 | 172 | [convolutional] 173 | batch_normalize=1 174 | filters=128 175 | size=1 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | [upsample] 181 | stride=2 182 | 183 | [route] 184 | layers = -1, 8 185 | 186 | [crnn] 187 | batch_normalize=1 188 | size=3 189 | pad=1 190 | output=256 191 | hidden=128 192 | activation=leaky 193 | 194 | [convolutional] 195 | batch_normalize=1 196 | filters=256 197 | size=3 198 | stride=1 199 | pad=1 200 | activation=leaky 201 | 202 | 203 | [convolutional] 204 | size=1 205 | stride=1 206 | pad=1 207 | filters=18 208 | activation=linear 209 | 210 | [yolo] 211 | mask = 0,1,2 212 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 213 | classes=1 214 | num=6 215 | jitter=.3 216 | ignore_thresh = .7 217 | truth_thresh = 1 218 | random=0 219 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/cfg/yolov3-tiny_xnor.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | xnor=1 39 | bin_output=1 40 | batch_normalize=1 41 | filters=32 42 | size=3 43 | stride=1 44 | pad=1 45 | activation=leaky 46 | 47 | [maxpool] 48 | size=2 49 | stride=2 50 | 51 | [convolutional] 52 | xnor=1 53 | bin_output=1 54 | batch_normalize=1 55 | filters=64 56 | size=3 57 | stride=1 58 | pad=1 59 | activation=leaky 60 | 61 | [maxpool] 62 | size=2 63 | stride=2 64 | 65 | [convolutional] 66 | xnor=1 67 | bin_output=1 68 | batch_normalize=1 69 | filters=128 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | xnor=1 81 | batch_normalize=1 82 | filters=256 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | xnor=1 94 | bin_output=1 95 | batch_normalize=1 96 | filters=512 97 | size=3 98 | stride=1 99 | pad=1 100 | activation=leaky 101 | 102 | [maxpool] 103 | size=2 104 | stride=1 105 | 106 | [convolutional] 107 | xnor=1 108 | bin_output=1 109 | batch_normalize=1 110 | filters=1024 111 | size=3 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | ########### 117 | 118 | [convolutional] 119 | xnor=1 120 | batch_normalize=1 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | batch_normalize=1 129 | filters=512 130 | size=3 131 | stride=1 132 | pad=1 133 | activation=leaky 134 | 135 | [convolutional] 136 | size=1 137 | stride=1 138 | pad=1 139 | filters=255 140 | activation=linear 141 | 142 | 143 | 144 | [yolo] 145 | mask = 3,4,5 146 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 147 | classes=80 148 | num=6 149 | jitter=.3 150 | ignore_thresh = .7 151 | truth_thresh = 1 152 | random=1 153 | 154 | [route] 155 | layers = -4 156 | 157 | [convolutional] 158 | xnor=1 159 | batch_normalize=1 160 | filters=128 161 | size=1 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [upsample] 167 | stride=2 168 | 169 | [route] 170 | layers = -1, 8 171 | 172 | [convolutional] 173 | xnor=1 174 | batch_normalize=1 175 | filters=256 176 | size=3 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | 182 | [convolutional] 183 | size=1 184 | stride=1 185 | pad=1 186 | filters=255 187 | activation=linear 188 | 189 | [yolo] 190 | mask = 0,1,2 191 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 192 | classes=80 193 | num=6 194 | jitter=.3 195 | ignore_thresh = .7 196 | truth_thresh = 1 197 | random=1 198 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/imgs/chart_yolov4-custom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/Train-a-YOLOv4-model/imgs/chart_yolov4-custom.png -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/imgs/yolov4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/Train-a-YOLOv4-model/imgs/yolov4.png -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.18.4 2 | matplotlib 3 | tensorflow 4 | tensorboard 5 | terminaltables 6 | pillow 7 | tqdm 8 | pickle 9 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/tools/img2train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Author : Andy Liu 3 | # Last modified: 2018-8-15 4 | 5 | # This tool is used to create VOC-like txt file by reading image folder 6 | # input: python create_txt_list.py "/home/andy/Data/img" 7 | # output: 8 | # ./train.txt 9 | # ./val.txt 10 | 11 | import argparse 12 | import os,sys 13 | import random 14 | from os import listdir, getcwd 15 | from os.path import join 16 | import cv2 17 | from tqdm import tqdm 18 | 19 | 20 | def parse_args(): 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('srcdir', help='file directory', type=str) 23 | 24 | args = parser.parse_args() 25 | return args 26 | 27 | def makelist(srcdir): 28 | srcdir = os.path.abspath(srcdir) 29 | if srcdir[-1] == "/": 30 | srcdir = srcdir[:-1] 31 | 32 | train_path_txt = "./train.txt" 33 | val_path_txt = "./val.txt" 34 | train_file=open(train_path_txt,'w+') # 'w+' rewrite, 'a' add 35 | val_file=open(val_path_txt,'w+') 36 | 37 | filelist = os.listdir(srcdir) 38 | trainset = random.sample(filelist, int(len(filelist)*0.8)) 39 | 40 | for file in tqdm(filelist): 41 | file_name,file_extend=os.path.splitext(file) 42 | 43 | img_path = srcdir + "/" + file 44 | img = cv2.imread(img_path) 45 | if img is None: 46 | print("%s can't read!"%file) 47 | continue 48 | 49 | if file in trainset: 50 | train_file.write(srcdir+"/"+file+'\n') 51 | else: 52 | val_file.write(srcdir+"/"+file+'\n') 53 | 54 | train_file.close() 55 | val_file.close() 56 | 57 | print("Path of train text = ",os.path.abspath(train_path_txt)) 58 | print("Path of valid text = ",os.path.abspath(val_path_txt)) 59 | 60 | if __name__ == '__main__': 61 | args = parse_args() 62 | srcdir = args.srcdir 63 | 64 | if not os.path.exists(srcdir): 65 | print("Error !!! %s is not exists, please check the parameter"%srcdir) 66 | sys.exit(0) 67 | 68 | makelist(srcdir) 69 | print("Done!") 70 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/tools/name.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import os 3 | names = os.listdir('/home/cai/workspace/fly_piggy/JPEGImages') #图片路径 4 | i=0 5 | train_val = open('/home/cai/workspace/fly_piggy/train.txt','w') #txt文件路径 6 | for name in names: 7 | index = name.rfind('.') 8 | name = name[:index] 9 | train_val.write(name+'\n') 10 | i=i+1 11 | print(i) 12 | -------------------------------------------------------------------------------- /OneStage/yolo/Train-a-YOLOv4-model/tools/voc_label.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import xml.etree.ElementTree as ET 3 | import pickle 4 | import os 5 | from os import listdir, getcwd 6 | from os.path import join 7 | 8 | #我的项目中有4个类别,类别名称在这里修改 change your classes in here 9 | classes = ["holothurian","echinus","scallop","starfish"] 10 | def convert(size, box): 11 | dw = 1./size[0] 12 | dh = 1./size[1] 13 | x = (box[0] + box[1])/2.0 14 | y = (box[2] + box[3])/2.0 15 | w = box[1] - box[0] 16 | h = box[3] - box[2] 17 | x = x*dw 18 | w = w*dw 19 | y = y*dh 20 | h = h*dh 21 | return (x,y,w,h) 22 | 23 | def convert_annotation(image_id): 24 | #这里改为.xml文件夹的路径 change the pth 25 | in_file = open('/home/cai/workspace/fly_piggy/Annotations/%s.xml'%(image_id)) 26 | #这里是生成每张图片对应的.txt文件的路径 change the pth 27 | out_file = open('/home/cai/workspace/fly_piggy/labels/%s.txt'%(image_id),'w') 28 | tree=ET.parse(in_file) 29 | root = tree.getroot() 30 | size = root.find('size') 31 | w = int(size.find('width').text) 32 | h = int(size.find('height').text)# 33 | 34 | for obj in root.iter('object'): 35 | cls = obj.find('name').text 36 | if cls not in classes : 37 | continue 38 | cls_id = classes.index(cls) 39 | xmlbox = obj.find('bndbox') 40 | b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) 41 | bb = convert((w,h), b) 42 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 43 | #这里是train.txt文件的路径 change the pth 44 | image_ids_train = open('/home/cai/workspace/fly_piggy/train.txt').read().strip().split() 45 | #这里是val.txt文件的路径 change the pth 46 | image_ids_val = open('/home/cai/workspace/fly_piggy/val.txt').read().strip().split() 47 | 48 | list_file_train = open('object_train.txt', 'w') 49 | list_file_val = open('object_val.txt', 'w') 50 | for image_id in image_ids_train: 51 | #这里改为样本图片所在文件夹的路径 change the pth 52 | list_file_train.write('/home/cai/workspace/fly_piggy/JPEGImages/%s.jpg\n'%(image_id)) 53 | convert_annotation(image_id) 54 | list_file_train.close() 55 | for image_id in image_ids_val: 56 | #这里改为样本图片所在文件夹的路径 change the pth 57 | list_file_val.write('/home/cai/workspace/fly_piggy/JPEGImages/%s.jpg\n'%(image_id)) 58 | convert_annotation(image_id) 59 | list_file_val.close() 60 | -------------------------------------------------------------------------------- /OneStage/yolo/coco2voc.md: -------------------------------------------------------------------------------- 1 | # coco2voc 2 | *提取 coco 数据集中需要的类别和标记进行转换成 yolo 可以使用的数据集* 3 | ### 1. Download coco2017 data 4 | *首先下载 [2017 Train images] 数据集和 [2017 annotations] 并且放入 coco 目录下* 5 | 6 | #### [2017 Train images](http://images.cocodataset.org/zips/train2017.zip) [118K/18GB] 7 | 8 | #### [2017 annotations](http://images.cocodataset.org/annotations/annotations_trainval2017.zip) [241MB] 9 | 10 | [2017 Val images](http://images.cocodataset.org/zips/val2017.zip) [5K/1GB] 11 | 12 | [2017 Test images](http://images.cocodataset.org/zips/test2017.zip) [41K/6GB] 13 | 14 | cd ~/Desktop 15 | mkdir coco 16 | cd coco 17 | unzip train2017.zip -d ~/Desktop/coco && unzip annotations_trainval2017.zip -d ~/Desktop/coco 18 | mkdir -p result/Annotations result/images 19 | 20 | 21 | 22 | ### 2. Install cython 23 | *安装 cython 并且下载 cocoapi* 24 | 25 | pip3 install cython 26 | git clone https://github.com/cocodataset/cocoapi.git 27 | cd coco/PythonAPI 28 | make 29 | 30 | ### 3. Change the classes_names and path 31 | *将 [coco2voc.py](https://github.com/yehengchen/ObjectDetection/blob/master/OneStage/yolo/coco2voc.py) 放在 PythonAPI/ 目录下运行,并且修改 coco2voc.py 相关路径* 32 | 33 | classes_names = ['person', 'fire_extinguisher', 'fireplug', 'car', 'bicycle','motorcycle'] 34 | 35 | *修改coco数据集中自己所需要的类别名称* 36 | 37 | python3 coco2voc.py 38 | #result/Annotations 目录将放.xml文件, result/images 目录将放 .jpg文件 39 | -------------------------------------------------------------------------------- /OneStage/yolo/convert2Yolo/example/kitti/images/000021.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/convert2Yolo/example/kitti/images/000021.jpg -------------------------------------------------------------------------------- /OneStage/yolo/convert2Yolo/example/kitti/labels/000021.txt: -------------------------------------------------------------------------------- 1 | Skigate 0.0 0 0.0 686 172 746 312 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2 | Skier 0.0 0 0.0 438 146 489 214 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3 | Person 0.0 0 0.0 353 126 380 192 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4 | -------------------------------------------------------------------------------- /OneStage/yolo/convert2Yolo/example/kitti/names.txt: -------------------------------------------------------------------------------- 1 | Skigate 2 | Skier 3 | Person 4 | -------------------------------------------------------------------------------- /OneStage/yolo/convert2Yolo/example/voc/JPEG/000001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/convert2Yolo/example/voc/JPEG/000001.jpg -------------------------------------------------------------------------------- /OneStage/yolo/convert2Yolo/example/voc/label/000001.xml: -------------------------------------------------------------------------------- 1 | 2 | voc2012 3 | 000001 4 | /media/martin/SsaRu/Aidentify/deeplearning/Yolo_darknet/darknet/voc/person_img/voc2007/000001.jpg 5 | 6 | Unknown 7 | 8 | 9 | 353 10 | 500 11 | 3 12 | 13 | 0 14 | 15 | M_50s 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 129 21 | 31 22 | 298 23 | 227 24 | 25 | 26 | -------------------------------------------------------------------------------- /OneStage/yolo/convert2Yolo/example/voc/names.txt: -------------------------------------------------------------------------------- 1 | M_50s 2 | -------------------------------------------------------------------------------- /OneStage/yolo/convert2Yolo/images/voc_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/convert2Yolo/images/voc_image.png -------------------------------------------------------------------------------- /OneStage/yolo/convert2Yolo/images/voc_xml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/convert2Yolo/images/voc_xml.png -------------------------------------------------------------------------------- /OneStage/yolo/convert2Yolo/label_visualization.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | import os 4 | import argparse 5 | import time 6 | import pprint 7 | 8 | from PIL import Image, ImageDraw 9 | import matplotlib.pyplot as plt 10 | import json 11 | 12 | from Format import VOC, COCO, UDACITY, KITTI, YOLO 13 | 14 | parser = argparse.ArgumentParser(description='Evaluate label Converting.') 15 | parser.add_argument('--datasets', type=str, help='type of datasets') 16 | parser.add_argument('--img_path', type=str, help='directory of image folder') 17 | parser.add_argument('--label_path', type=str, help='directory of label folder') 18 | parser.add_argument('--img_type', type=str, help='type of image', default='.jpg') 19 | parser.add_argument('--cls_list_file', type=str, help='directory of *.names file', default="./") 20 | 21 | 22 | args = parser.parse_args() 23 | 24 | def main(): 25 | pp = pprint.PrettyPrinter(indent=4) 26 | 27 | img_path = args.img_path 28 | label_path = args.label_path 29 | img_type = args.img_type 30 | datasets = args.datasets 31 | cls_list = args.cls_list_file 32 | 33 | result = None 34 | data = None 35 | 36 | if datasets == "COCO": 37 | coco = COCO() 38 | result, data = coco.parse(label_path) 39 | elif datasets == "VOC": 40 | voc = VOC() 41 | result, data = voc.parse(label_path) 42 | elif datasets == "UDACITY": 43 | udacity = UDACITY() 44 | result, data = udacity.parse(label_path, img_path) 45 | elif datasets == "KITTI": 46 | kitti = KITTI() 47 | result, data = kitti.parse(label_path, img_path, img_type=img_type) 48 | elif datasets == "YOLO": 49 | yolo =YOLO(os.path.abspath(cls_list)) 50 | result, data = yolo.parse(label_path, img_path, img_type=img_type) 51 | 52 | if result is True: 53 | for key in data: 54 | 55 | filepath = "".join([img_path, key, img_type]) 56 | 57 | im = Image.open(filepath) 58 | 59 | draw = ImageDraw.Draw(im) 60 | print("data['{}']: ".format(key), end="") 61 | pp.pprint(data[key]) 62 | print("num_object : {}".format(data[key]["objects"]["num_obj"])) 63 | for idx in range(0, int(data[key]["objects"]["num_obj"])): 64 | print("idx {}, name : {}, bndbox :{}".format(idx, data[key]["objects"][str(idx)]["name"], data[key]["objects"][str(idx)]["bndbox"])) 65 | 66 | x0 = data[key]["objects"][str(idx)]["bndbox"]["xmin"] 67 | y0 = data[key]["objects"][str(idx)]["bndbox"]["ymin"] 68 | x1 = data[key]["objects"][str(idx)]["bndbox"]["xmax"] 69 | y1 = data[key]["objects"][str(idx)]["bndbox"]["ymax"] 70 | 71 | draw.rectangle(((x0,y0), (x1,y1)), outline='#00ff88') 72 | draw.text((x0,y0), data[key]["objects"][str(idx)]["name"]) 73 | 74 | del draw 75 | print("===============================================================================================\n\n") 76 | plt.imshow(im) 77 | plt.show() 78 | plt.clf() 79 | im.close() 80 | 81 | else: 82 | print("return value : {}, msg : {}, args: {}".format(result, data, args)) 83 | 84 | if __name__ == '__main__': 85 | main() 86 | -------------------------------------------------------------------------------- /OneStage/yolo/convert2Yolo/msgLogInfo.py: -------------------------------------------------------------------------------- 1 | class color: 2 | BOLD = '\033[1m' 3 | END = '\033[0m' 4 | DEFAULT = '\033[0;37;40m' 5 | RED = '\033[91m' -------------------------------------------------------------------------------- /OneStage/yolo/convert2Yolo/requirements.txt: -------------------------------------------------------------------------------- 1 | Pillow 2 | cycler==0.10.0 3 | kiwisolver==1.0.1 4 | matplotlib==2.2.2 5 | numpy==1.14.3 6 | pyparsing==2.2.0 7 | python-dateutil==2.7.2 8 | pytz==2018.4 9 | six==1.11.0 10 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/deep_sort/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/deep_sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | feature : ndarray | NoneType 25 | A feature vector that describes the object contained in this image. 26 | 27 | """ 28 | 29 | def __init__(self, tlwh, confidence, feature): 30 | self.tlwh = np.asarray(tlwh, dtype=np.float) 31 | self.confidence = float(confidence) 32 | self.feature = np.asarray(feature, dtype=np.float32) 33 | 34 | def to_tlbr(self): 35 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 36 | `(top left, bottom right)`. 37 | """ 38 | ret = self.tlwh.copy() 39 | ret[2:] += ret[:2] 40 | return ret 41 | 42 | def to_xyah(self): 43 | """Convert bounding box to format `(center x, center y, aspect ratio, 44 | height)`, where the aspect ratio is `width / height`. 45 | """ 46 | ret = self.tlwh.copy() 47 | ret[:2] += ret[2:] / 2 48 | ret[2] /= ret[3] 49 | return ret 50 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/deep_sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import linear_assignment 5 | 6 | 7 | def iou(bbox, candidates): 8 | """Computer intersection over union. 9 | 10 | Parameters 11 | ---------- 12 | bbox : ndarray 13 | A bounding box in format `(top left x, top left y, width, height)`. 14 | candidates : ndarray 15 | A matrix of candidate bounding boxes (one per row) in the same format 16 | as `bbox`. 17 | 18 | Returns 19 | ------- 20 | ndarray 21 | The intersection over union in [0, 1] between the `bbox` and each 22 | candidate. A higher score means a larger fraction of the `bbox` is 23 | occluded by the candidate. 24 | 25 | """ 26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 27 | candidates_tl = candidates[:, :2] 28 | candidates_br = candidates[:, :2] + candidates[:, 2:] 29 | 30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 34 | wh = np.maximum(0., br - tl) 35 | 36 | area_intersection = wh.prod(axis=1) 37 | area_bbox = bbox[2:].prod() 38 | area_candidates = candidates[:, 2:].prod(axis=1) 39 | return area_intersection / (area_bbox + area_candidates - area_intersection) 40 | 41 | 42 | def iou_cost(tracks, detections, track_indices=None, 43 | detection_indices=None): 44 | """An intersection over union distance metric. 45 | 46 | Parameters 47 | ---------- 48 | tracks : List[deep_sort.track.Track] 49 | A list of tracks. 50 | detections : List[deep_sort.detection.Detection] 51 | A list of detections. 52 | track_indices : Optional[List[int]] 53 | A list of indices to tracks that should be matched. Defaults to 54 | all `tracks`. 55 | detection_indices : Optional[List[int]] 56 | A list of indices to detections that should be matched. Defaults 57 | to all `detections`. 58 | 59 | Returns 60 | ------- 61 | ndarray 62 | Returns a cost matrix of shape 63 | len(track_indices), len(detection_indices) where entry (i, j) is 64 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 65 | 66 | """ 67 | if track_indices is None: 68 | track_indices = np.arange(len(tracks)) 69 | if detection_indices is None: 70 | detection_indices = np.arange(len(detections)) 71 | 72 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 73 | for row, track_idx in enumerate(track_indices): 74 | if tracks[track_idx].time_since_update > 1: 75 | cost_matrix[row, :] = linear_assignment.INFTY_COST 76 | continue 77 | 78 | bbox = tracks[track_idx].to_tlwh() 79 | candidates = np.asarray([detections[i].tlwh for i in detection_indices]) 80 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 81 | return cost_matrix 82 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/deep_sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> scores = [d.confidence for d in detections] 19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 20 | >>> detections = [detections[i] for i in indices] 21 | 22 | Parameters 23 | ---------- 24 | boxes : ndarray 25 | Array of ROIs (x, y, width, height). 26 | max_bbox_overlap : float 27 | ROIs that overlap more than this values are suppressed. 28 | scores : Optional[array_like] 29 | Detector confidence score. 30 | 31 | Returns 32 | ------- 33 | List[int] 34 | Returns indices of detections that have survived non-maxima suppression. 35 | 36 | """ 37 | if len(boxes) == 0: 38 | return [] 39 | 40 | boxes = boxes.astype(np.float) 41 | pick = [] 42 | 43 | x1 = boxes[:, 0] 44 | y1 = boxes[:, 1] 45 | x2 = boxes[:, 2] + boxes[:, 0] 46 | y2 = boxes[:, 3] + boxes[:, 1] 47 | 48 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | if scores is not None: 50 | idxs = np.argsort(scores) 51 | else: 52 | idxs = np.argsort(y2) 53 | 54 | while len(idxs) > 0: 55 | last = len(idxs) - 1 56 | i = idxs[last] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 60 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 61 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 62 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 63 | 64 | w = np.maximum(0, xx2 - xx1 + 1) 65 | h = np.maximum(0, yy2 - yy1 + 1) 66 | 67 | overlap = (w * h) / area[idxs[:last]] 68 | 69 | idxs = np.delete( 70 | idxs, np.concatenate( 71 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 72 | 73 | return pick 74 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/model_data/coco_classes.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/model_data/market1501.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/deep_sort_yolov3/model_data/market1501.pb -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/model_data/mars-small128.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/deep_sort_yolov3/model_data/mars-small128.pb -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/model_data/mars.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/deep_sort_yolov3/model_data/mars.pb -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/model_data/obj.txt: -------------------------------------------------------------------------------- 1 | person 2 | fire_extinguisher 3 | fireplug 4 | car 5 | bicycle 6 | motorcycle 7 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/model_data/voc_classes.txt: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor 21 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/model_data/yolo3_object.names: -------------------------------------------------------------------------------- 1 | person 2 | fire_extinguisher 3 | fireplug 4 | car 5 | bicycle 6 | motorcycle 7 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/model_data/yolo_anchors.txt: -------------------------------------------------------------------------------- 1 | 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 2 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/requirements.txt: -------------------------------------------------------------------------------- 1 | Keras==2.3.1 2 | tensorflow-gpu==1.15.2 3 | numpy==1.15.0 4 | opencv-python==3.4.4.19 5 | scikit-learn==0.21.2 6 | scipy==1.1.0 7 | Pillow 8 | torch==1.3.0 9 | torchvision==0.4.1 10 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/tools/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/deep_sort_yolov3/tools/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/tools/__pycache__/generate_detections.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/deep_sort_yolov3/tools/__pycache__/generate_detections.cpython-35.pyc -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov3/yolo3/utils.py: -------------------------------------------------------------------------------- 1 | """Miscellaneous utility functions.""" 2 | 3 | from functools import reduce 4 | 5 | from PIL import Image 6 | 7 | def compose(*funcs): 8 | """Compose arbitrarily many functions, evaluated left to right. 9 | 10 | Reference: https://mathieularose.com/function-composition-in-python/ 11 | """ 12 | # return lambda x: reduce(lambda v, f: f(v), funcs, x) 13 | if funcs: 14 | return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs) 15 | else: 16 | raise ValueError('Composition of empty sequence not supported.') 17 | 18 | def letterbox_image(image, size): 19 | '''resize image with unchanged aspect ratio using padding''' 20 | image_w, image_h = image.size 21 | w, h = size 22 | new_w = int(image_w * min(w*1.0/image_w, h*1.0/image_h)) 23 | new_h = int(image_h * min(w*1.0/image_w, h*1.0/image_h)) 24 | resized_image = image.resize((new_w,new_h), Image.BICUBIC) 25 | 26 | boxed_image = Image.new('RGB', size, (128,128,128)) 27 | boxed_image.paste(resized_image, ((w-new_w)//2,(h-new_h)//2)) 28 | return boxed_image 29 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/deep_sort/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/deep_sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | feature : ndarray | NoneType 25 | A feature vector that describes the object contained in this image. 26 | 27 | """ 28 | 29 | def __init__(self, tlwh, confidence, feature): 30 | self.tlwh = np.asarray(tlwh, dtype=np.float) 31 | self.confidence = float(confidence) 32 | self.feature = np.asarray(feature, dtype=np.float32) 33 | 34 | def to_tlbr(self): 35 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 36 | `(top left, bottom right)`. 37 | """ 38 | ret = self.tlwh.copy() 39 | ret[2:] += ret[:2] 40 | return ret 41 | 42 | def to_xyah(self): 43 | """Convert bounding box to format `(center x, center y, aspect ratio, 44 | height)`, where the aspect ratio is `width / height`. 45 | """ 46 | ret = self.tlwh.copy() 47 | ret[:2] += ret[2:] / 2 48 | ret[2] /= ret[3] 49 | return ret 50 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/deep_sort/detection_yolo.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection_YOLO(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | Parameters 9 | ---------- 10 | tlwh : array_like 11 | Bounding box in format `(x, y, w, h)`. 12 | confidence : float 13 | Detector confidence score. 14 | feature : array_like 15 | A feature vector that describes the object contained in this image. 16 | Attributesutils 17 | ---------- 18 | tlwh : ndarray 19 | Bounding box in format `(top left x, top left y, width, height)`. 20 | confidence : ndarray 21 | Detector confidence score. 22 | feature : ndarray | NoneType 23 | A feature vector that describes the object contained in this image. 24 | """ 25 | 26 | def __init__(self, tlwh, confidence, cls): 27 | self.tlwh = np.asarray(tlwh, dtype=np.float) 28 | self.confidence = float(confidence) 29 | self.cls = cls 30 | 31 | def to_tlbr(self): 32 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 33 | `(top left, bottom right)`. 34 | """ 35 | ret = self.tlwh.copy() 36 | ret[2:] += ret[:2] 37 | return ret 38 | 39 | def to_xyah(self): 40 | """Convert bounding box to format `(center x, center y, aspect ratio, 41 | height)`, where the aspect ratio is `width / height`. 42 | """ 43 | ret = self.tlwh.copy() 44 | ret[:2] += ret[2:] / 2 45 | ret[2] /= ret[3] 46 | return ret 47 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/deep_sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import linear_assignment 5 | 6 | 7 | def iou(bbox, candidates): 8 | """Computer intersection over union. 9 | 10 | Parameters 11 | ---------- 12 | bbox : ndarray 13 | A bounding box in format `(top left x, top left y, width, height)`. 14 | candidates : ndarray 15 | A matrix of candidate bounding boxes (one per row) in the same format 16 | as `bbox`. 17 | 18 | Returns 19 | ------- 20 | ndarray 21 | The intersection over union in [0, 1] between the `bbox` and each 22 | candidate. A higher score means a larger fraction of the `bbox` is 23 | occluded by the candidate. 24 | 25 | """ 26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 27 | candidates_tl = candidates[:, :2] 28 | candidates_br = candidates[:, :2] + candidates[:, 2:] 29 | 30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 34 | wh = np.maximum(0., br - tl) 35 | 36 | area_intersection = wh.prod(axis=1) 37 | area_bbox = bbox[2:].prod() 38 | area_candidates = candidates[:, 2:].prod(axis=1) 39 | return area_intersection / (area_bbox + area_candidates - area_intersection) 40 | 41 | 42 | def iou_cost(tracks, detections, track_indices=None, 43 | detection_indices=None): 44 | """An intersection over union distance metric. 45 | 46 | Parameters 47 | ---------- 48 | tracks : List[deep_sort.track.Track] 49 | A list of tracks. 50 | detections : List[deep_sort.detection.Detection] 51 | A list of detections. 52 | track_indices : Optional[List[int]] 53 | A list of indices to tracks that should be matched. Defaults to 54 | all `tracks`. 55 | detection_indices : Optional[List[int]] 56 | A list of indices to detections that should be matched. Defaults 57 | to all `detections`. 58 | 59 | Returns 60 | ------- 61 | ndarray 62 | Returns a cost matrix of shape 63 | len(track_indices), len(detection_indices) where entry (i, j) is 64 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 65 | 66 | """ 67 | if track_indices is None: 68 | track_indices = np.arange(len(tracks)) 69 | if detection_indices is None: 70 | detection_indices = np.arange(len(detections)) 71 | 72 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 73 | for row, track_idx in enumerate(track_indices): 74 | if tracks[track_idx].time_since_update > 1: 75 | cost_matrix[row, :] = linear_assignment.INFTY_COST 76 | continue 77 | 78 | bbox = tracks[track_idx].to_tlwh() 79 | candidates = np.asarray([detections[i].tlwh for i in detection_indices]) 80 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 81 | return cost_matrix 82 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/deep_sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> scores = [d.confidence for d in detections] 19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 20 | >>> detections = [detections[i] for i in indices] 21 | 22 | Parameters 23 | ---------- 24 | boxes : ndarray 25 | Array of ROIs (x, y, width, height). 26 | max_bbox_overlap : float 27 | ROIs that overlap more than this values are suppressed. 28 | scores : Optional[array_like] 29 | Detector confidence score. 30 | 31 | Returns 32 | ------- 33 | List[int] 34 | Returns indices of detections that have survived non-maxima suppression. 35 | 36 | """ 37 | if len(boxes) == 0: 38 | return [] 39 | 40 | boxes = boxes.astype(np.float) 41 | pick = [] 42 | 43 | x1 = boxes[:, 0] 44 | y1 = boxes[:, 1] 45 | x2 = boxes[:, 2] + boxes[:, 0] 46 | y2 = boxes[:, 3] + boxes[:, 1] 47 | 48 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | if scores is not None: 50 | idxs = np.argsort(scores) 51 | else: 52 | idxs = np.argsort(y2) 53 | 54 | while len(idxs) > 0: 55 | last = len(idxs) - 1 56 | i = idxs[last] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 60 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 61 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 62 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 63 | 64 | w = np.maximum(0, xx2 - xx1 + 1) 65 | h = np.maximum(0, yy2 - yy1 + 1) 66 | 67 | overlap = (w * h) / area[idxs[:last]] 68 | 69 | idxs = np.delete( 70 | idxs, np.concatenate( 71 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 72 | 73 | return pick 74 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/model_data/coco_classes.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/model_data/market1501.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/deep_sort_yolov4/model_data/market1501.pb -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/model_data/mars-small128.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/deep_sort_yolov4/model_data/mars-small128.pb -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/model_data/mars.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/deep_sort_yolov4/model_data/mars.pb -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/model_data/obj.txt: -------------------------------------------------------------------------------- 1 | person 2 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/model_data/yolo_anchors.txt: -------------------------------------------------------------------------------- 1 | 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 2 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/output/README.md: -------------------------------------------------------------------------------- 1 | Output_video 2 | 3 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/output/comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/deep_sort_yolov4/output/comparison.png -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/requirements.txt: -------------------------------------------------------------------------------- 1 | Keras==2.3.1 2 | tensorflow-gpu==1.13.1 3 | numpy==1.15.0 4 | opencv-python==4.2.0 5 | scikit-learn==0.21.2 6 | scipy==1.4.1 7 | Pillow 8 | torch==1.3.0 9 | torchvision==0.4.1 10 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/test_video/README.md: -------------------------------------------------------------------------------- 1 | Intput_video 2 | 3 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/tools/frame2video.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import moviepy.editor as mp 4 | 5 | video = "00046" 6 | #input_imgs 7 | im_dir = '/home/cai/Desktop/yolo_dataset/t1_video/t1_video_'+video+'/' 8 | #im_dir = 9 | #output_video 10 | video_dir = '/home/cai/Desktop/yolo_dataset/t1_video/test_video/det_t1_video_'+video+'_test_q.avi' 11 | #fps 12 | fps = 50 13 | #num_of_imgs 14 | num = 310 15 | #img_size 16 | img_size = (1920,1080) 17 | fourcc = cv2.VideoWriter_fourcc('M','J','P','G') 18 | #opencv3 19 | 20 | videoWriter = cv2.VideoWriter(video_dir, fourcc, fps, img_size) 21 | for i in range(0,num): 22 | #im_name = os.path.join(im_dir,'frame-' + str(i) + '.png') 23 | im_name = os.path.join(im_dir,'t1_video_'+video+'_' + "%05d" % i + '.jpg') 24 | frame = cv2.imread(im_name) 25 | #frame = cv2.resize(frame, (480, 320)) 26 | #frame = cv2.resize(frame,(520,320), interpolation=cv2.INTER_CUBIC) 27 | videoWriter.write(frame) 28 | print (im_name) 29 | videoWriter.release() 30 | print('finish') 31 | -------------------------------------------------------------------------------- /OneStage/yolo/deep_sort_yolov4/tools/video2frame.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | image_folder = './mask_face' 4 | video_name = './cut_test.m4v' 5 | 6 | vc = cv2.VideoCapture(video_name) 7 | c = 1 8 | if vc.isOpened(): 9 | rval,frame=vc.read() 10 | else: 11 | rval=False 12 | while rval: 13 | rval,frame=vc.read() 14 | cv2.imwrite('./mask_face/IMG_'+str(c)+'.jpg',frame) 15 | c=c+1 16 | cv2.waitKey(1) 17 | vc.release() 18 | -------------------------------------------------------------------------------- /OneStage/yolo/requirements.txt: -------------------------------------------------------------------------------- 1 | Keras==2.3.1 2 | tensorflow-gpu==1.13.1 3 | numpy==1.15.0 4 | opencv-python==4.2.0 5 | scikit-learn==0.21.2 6 | scipy==1.4.1 7 | Pillow 8 | torch==1.3.0 9 | torchvision==0.4.1 10 | -------------------------------------------------------------------------------- /OneStage/yolo/tools/frame2video.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import moviepy.editor as mp 4 | 5 | video = "00046" 6 | #input_imgs 7 | im_dir = '/home/cai/Desktop/yolo_dataset/t1_video/t1_video_'+video+'/' 8 | #im_dir = 9 | #output_video 10 | video_dir = '/home/cai/Desktop/yolo_dataset/t1_video/test_video/det_t1_video_'+video+'_test_q.avi' 11 | #fps 12 | fps = 50 13 | #num_of_imgs 14 | num = 310 15 | #img_size 16 | img_size = (1920,1080) 17 | fourcc = cv2.VideoWriter_fourcc('M','J','P','G') 18 | #opencv3 19 | 20 | videoWriter = cv2.VideoWriter(video_dir, fourcc, fps, img_size) 21 | for i in range(0,num): 22 | #im_name = os.path.join(im_dir,'frame-' + str(i) + '.png') 23 | im_name = os.path.join(im_dir,'t1_video_'+video+'_' + "%05d" % i + '.jpg') 24 | frame = cv2.imread(im_name) 25 | #frame = cv2.resize(frame, (480, 320)) 26 | #frame = cv2.resize(frame,(520,320), interpolation=cv2.INTER_CUBIC) 27 | videoWriter.write(frame) 28 | print (im_name) 29 | videoWriter.release() 30 | print('finish') 31 | -------------------------------------------------------------------------------- /OneStage/yolo/tools/video2frame.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | image_folder = './mask_face' 4 | video_name = './cut_test.m4v' 5 | 6 | vc = cv2.VideoCapture(video_name) 7 | c = 1 8 | if vc.isOpened(): 9 | rval,frame=vc.read() 10 | else: 11 | rval=False 12 | while rval: 13 | rval,frame=vc.read() 14 | cv2.imwrite('./mask_face/IMG_'+str(c)+'.jpg',frame) 15 | c=c+1 16 | cv2.waitKey(1) 17 | vc.release() 18 | -------------------------------------------------------------------------------- /OneStage/yolo/w_name2txt.py: -------------------------------------------------------------------------------- 1 | import os 2 | names = os.listdir('/home/cai/Desktop/object_detection/datasets/coco/result/images/') #图片路径 3 | i=0 4 | train_val = open('/home/cai/Desktop/yolo_dataset/data/train.txt','w') #txt文件路径 5 | for name in names: 6 | index = name.rfind('.') 7 | name = name[:index] 8 | train_val.write(name+'\n') 9 | i=i+1 10 | print(i) 11 | -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/1*DhuOI39lNp6ZrG63h-ioBQ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/1*DhuOI39lNp6ZrG63h-ioBQ.png -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/Results on MS COCO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/Results on MS COCO.png -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/Results on PASCAL VOC 2012 test set.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/Results on PASCAL VOC 2012 test set.png -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/Screenshot from 2019-05-18 16-55-25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/Screenshot from 2019-05-18 16-55-25.png -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/TownCentreXVID_output_ss.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/TownCentreXVID_output_ss.gif -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/model2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/model2.png -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/output_49.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/output_49.gif -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/output_car_143.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/output_car_143.gif -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/output_person_315_1120_s.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/output_person_315_1120_s.gif -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/yolo-network-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/yolo-network-architecture.png -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/yolo-responsible-predictor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/yolo-responsible-predictor.png -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/yolo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/yolo.png -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/yologo_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/yologo_1.png -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/yolov1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/yolov1.png -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/yolov1_lossfunc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/yolov1_lossfunc.png -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/yolov1network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/yolov1network.png -------------------------------------------------------------------------------- /OneStage/yolo/yolo_img/yolov2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolo_img/yolov2.png -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/Annotations/README.md: -------------------------------------------------------------------------------- 1 | # Put all .xml file in here 2 | *用于存放与图片对应的XML文件* 3 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/Annotations/t1_video_00001_00001.xml: -------------------------------------------------------------------------------- 1 | 2 | JPEGImages 3 | RGB_Record_Data_05-42-08_0.rgb-69.jpg 4 | /home/cai/Desktop/dataset/car_action_detect/JPEGImages/RGB_Record_Data_05-42-08_0.rgb-69.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1280 10 | 720 11 | 3 12 | 13 | 0 14 | 15 | person 16 | Unspecified 17 | 1 18 | 0 19 | 20 | 809 21 | 73 22 | 1262 23 | 720 24 | 25 | 26 | 27 | person 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 46 33 | 102 34 | 367 35 | 567 36 | 37 | 38 | 39 | person 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 441 45 | 194 46 | 665 47 | 465 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/JPEGImages/README.md: -------------------------------------------------------------------------------- 1 | # Store all img in here 2 | *用于存放所有的图片,格式为.jpg* 3 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/JPEGImages/t1_video_00001_00001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolov3/JPEGImages/t1_video_00001_00001.jpg -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/backup/README.md: -------------------------------------------------------------------------------- 1 | # weights 2 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/cfg/yolo3_object.data: -------------------------------------------------------------------------------- 1 | classes= 6 2 | train = /home/cai/Desktop/yolo_dataset/objectdetection/object_train.txt 3 | valid = /home/cai/Desktop/yolo_dataset/objectdetection/object_val.txt 4 | names = /home/cai/Desktop/yolo_dataset/objectdetection/yolo3_object.names 5 | backup = /home/cai/Desktop/yolo_dataset/objectdetection/backup/ 6 | 7 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/cfg/yolov3-cai-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | #batch=64 7 | #subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 20000 21 | policy=steps 22 | steps=18000,19000 23 | scales=.1,.1 24 | 25 | 26 | [convolutional] 27 | batch_normalize=1 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [maxpool] 35 | size=2 36 | stride=2 37 | 38 | [convolutional] 39 | batch_normalize=1 40 | filters=32 41 | size=3 42 | stride=1 43 | pad=1 44 | activation=leaky 45 | 46 | [maxpool] 47 | size=2 48 | stride=2 49 | 50 | [convolutional] 51 | batch_normalize=1 52 | filters=64 53 | size=3 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [maxpool] 59 | size=2 60 | stride=2 61 | 62 | [convolutional] 63 | batch_normalize=1 64 | filters=128 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=leaky 69 | 70 | [maxpool] 71 | size=2 72 | stride=2 73 | 74 | [convolutional] 75 | batch_normalize=1 76 | filters=256 77 | size=3 78 | stride=1 79 | pad=1 80 | activation=leaky 81 | 82 | [maxpool] 83 | size=2 84 | stride=2 85 | 86 | [convolutional] 87 | batch_normalize=1 88 | filters=512 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=leaky 93 | 94 | [maxpool] 95 | size=2 96 | stride=1 97 | 98 | [convolutional] 99 | batch_normalize=1 100 | filters=1024 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | ########### 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | size=1 126 | stride=1 127 | pad=1 128 | filters=24 129 | activation=linear 130 | 131 | 132 | 133 | [yolo] 134 | mask = 6,7,8 135 | anchors = 4,7, 7,15, 13,25, 25,42, 41,67, 75,94, 91,162, 158,205, 250,332 136 | classes=3 137 | num=9 138 | jitter=.3 139 | ignore_thresh = .7 140 | truth_thresh = 1 141 | random=1 142 | 143 | [route] 144 | layers = -4 145 | 146 | [convolutional] 147 | batch_normalize=1 148 | filters=128 149 | size=1 150 | stride=1 151 | pad=1 152 | activation=leaky 153 | 154 | [upsample] 155 | stride=2 156 | 157 | [route] 158 | layers = -1, 8 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=256 163 | size=3 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | size=1 170 | stride=1 171 | pad=1 172 | filters=24 173 | activation=linear 174 | 175 | [yolo] 176 | mask = 3,4,5 177 | anchors = 4,7, 7,15, 13,25, 25,42, 41,67, 75,94, 91,162, 158,205, 250,332 178 | classes=3 179 | num=9 180 | jitter=.3 181 | ignore_thresh = .7 182 | truth_thresh = 1 183 | random=1 184 | 185 | 186 | 187 | [route] 188 | layers = -3 189 | 190 | [convolutional] 191 | batch_normalize=1 192 | filters=128 193 | size=1 194 | stride=1 195 | pad=1 196 | activation=leaky 197 | 198 | [upsample] 199 | stride=2 200 | 201 | [route] 202 | layers = -1, 6 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=3 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=24 217 | activation=linear 218 | 219 | [yolo] 220 | mask = 0,1,2 221 | anchors = 4,7, 7,15, 13,25, 25,42, 41,67, 75,94, 91,162, 158,205, 250,332 222 | classes=3 223 | num=9 224 | jitter=.3 225 | ignore_thresh = .7 226 | truth_thresh = 1 227 | random=1 228 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/cfg/yolov3-tiny-action.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=24 7 | subdivisions=8 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=24 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=3 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=24 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=3 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/img2train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Author : Andy Liu 3 | # Last modified: 2018-8-15 4 | 5 | # This tool is used to create VOC-like txt file by reading image folder 6 | # input: python create_txt_list.py "/home/andy/Data/img" 7 | # output: 8 | # ./train.txt 9 | # ./val.txt 10 | 11 | import argparse 12 | import os,sys 13 | import random 14 | from os import listdir, getcwd 15 | from os.path import join 16 | import cv2 17 | from tqdm import tqdm 18 | 19 | 20 | def parse_args(): 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('srcdir', help='file directory', type=str) 23 | 24 | args = parser.parse_args() 25 | return args 26 | 27 | def makelist(srcdir): 28 | srcdir = os.path.abspath(srcdir) 29 | if srcdir[-1] == "/": 30 | srcdir = srcdir[:-1] 31 | 32 | train_path_txt = "./train.txt" 33 | val_path_txt = "./val.txt" 34 | train_file=open(train_path_txt,'w+') # 'w+' rewrite, 'a' add 35 | val_file=open(val_path_txt,'w+') 36 | 37 | filelist = os.listdir(srcdir) 38 | trainset = random.sample(filelist, int(len(filelist)*0.8)) 39 | 40 | for file in tqdm(filelist): 41 | file_name,file_extend=os.path.splitext(file) 42 | 43 | img_path = srcdir + "/" + file 44 | img = cv2.imread(img_path) 45 | if img is None: 46 | print("%s can't read!"%file) 47 | continue 48 | 49 | if file in trainset: 50 | train_file.write(srcdir+"/"+file+'\n') 51 | else: 52 | val_file.write(srcdir+"/"+file+'\n') 53 | 54 | train_file.close() 55 | val_file.close() 56 | 57 | print("Path of train text = ",os.path.abspath(train_path_txt)) 58 | print("Path of valid text = ",os.path.abspath(val_path_txt)) 59 | 60 | if __name__ == '__main__': 61 | args = parse_args() 62 | srcdir = args.srcdir 63 | 64 | if not os.path.exists(srcdir): 65 | print("Error !!! %s is not exists, please check the parameter"%srcdir) 66 | sys.exit(0) 67 | 68 | makelist(srcdir) 69 | print("Done!") 70 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/labels/README.md: -------------------------------------------------------------------------------- 1 | # Put all .txt file in here 2 | *存放每张图片对应的txt文件 3 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/labels/t1_video_00001_00001.txt: -------------------------------------------------------------------------------- 1 | 0 0.739844 0.517361 0.067188 0.190278 2 | 0 0.658203 0.509028 0.052344 0.212500 3 | 0 0.517578 0.433333 0.044531 0.127778 4 | 0 0.419141 0.446528 0.041406 0.090278 5 | 0 0.457031 0.438194 0.046875 0.126389 6 | 3 0.746484 0.145139 0.113281 0.223611 7 | 3 0.561719 0.967361 0.328125 0.065278 8 | 3 0.033984 0.939583 0.067969 0.120833 9 | 3 0.372656 0.697222 0.582812 0.444444 10 | 4 0.728125 0.565972 0.056250 0.154167 11 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/test_img/predictions.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/OneStage/yolo/yolov3/test_img/predictions.jpg -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/voc_label.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import pickle 3 | import os 4 | from os import listdir, getcwd 5 | from os.path import join 6 | 7 | #我的项目中有6个类别,类别名称在这里修改 change your classes in here 8 | classes = ["people","fire_extinguisher","fireplug","car","bicycle","motorcycle"] 9 | def convert(size, box): 10 | dw = 1./size[0] 11 | dh = 1./size[1] 12 | x = (box[0] + box[1])/2.0 13 | y = (box[2] + box[3])/2.0 14 | w = box[1] - box[0] 15 | h = box[3] - box[2] 16 | x = x*dw 17 | w = w*dw 18 | y = y*dh 19 | h = h*dh 20 | return (x,y,w,h) 21 | 22 | def convert_annotation(image_id): 23 | #这里改为.xml文件夹的路径 change the pth 24 | in_file = open('/home/cai/Desktop/dataset/data/Annotations/%s.xml'%(image_id)) 25 | #这里是生成每张图片对应的.txt文件的路径 change the pth 26 | out_file = open('/home/cai/Desktop/yolo_dataset/objectdetection/labels/%s.txt'%(image_id),'w') 27 | tree=ET.parse(in_file) 28 | root = tree.getroot() 29 | size = root.find('size') 30 | w = int(size.find('width').text) 31 | h = int(size.find('height').text)# 32 | 33 | for obj in root.iter('object'): 34 | cls = obj.find('name').text 35 | if cls not in classes : 36 | continue 37 | cls_id = classes.index(cls) 38 | xmlbox = obj.find('bndbox') 39 | b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) 40 | bb = convert((w,h), b) 41 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 42 | #这里是train.txt文件的路径 change the pth 43 | image_ids_train = open('/home/cai/Desktop/yolo_dataset/objectdetection/train.txt').read().strip().split() 44 | #这里是val.txt文件的路径 change the pth 45 | image_ids_val = open('/home/cai/Desktop/yolo_dataset/objectdetection/val.txt').read().strip().split() 46 | 47 | list_file_train = open('object_train.txt', 'w') 48 | list_file_val = open('object_val.txt', 'w') 49 | for image_id in image_ids_train: 50 | #这里改为样本图片所在文件夹的路径 change the pth 51 | list_file_train.write('/home/cai/Desktop/yolo_dataset/objectdetection/JPEGImages/%s.jpg\n'%(image_id)) 52 | #convert_annotation(image_id) 53 | list_file_train.close() 54 | for image_id in image_ids_val: 55 | #这里改为样本图片所在文件夹的路径 change the pth 56 | list_file_val.write('/home/cai/Desktop/yolo_dataset/objectdetection/JPEGImages/%s.jpg\n'%(image_id)) 57 | #convert_annotation(image_id) 58 | list_file_val.close() 59 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3/yolo3_object.names: -------------------------------------------------------------------------------- 1 | person 2 | fire_extinguisher 3 | fireplug 4 | car 5 | bicycle 6 | motorcycle 7 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3_sort/README.md: -------------------------------------------------------------------------------- 1 | # YOLOv3 + SORT - Person Counter 2 | 3 | *This project is to detect and track person on a video stream and count those going through a defined line.* 4 | 5 | ![sort.gif](https://github.com/yehengchen/ObjectDetection/blob/master/OneStage/yolo/yolo_img/sort_1.gif) 6 | 7 | ## Requirement 8 | 9 | * Python 3.5 10 | * OpenCV 11 | * Numpy 12 | 13 | It uses: 14 | 15 | * __[YOLOv3](https://github.com/yehengchen/ObjectDetection/tree/master/OneStage/yolo/yolov3)__ to detect objects on each of the video frames. - 用自己的数据训练 YOLOv3 模型 16 | 17 | * __[SORT](https://github.com/abewley/sort)__ to track those objects over different frames. 18 | 19 | Once the objects are detected and tracked over different frames a simple mathematical calculation is applied to count the intersections between the vehicles previous and current frame positions with a defined line. 20 | 21 | 22 | ## Quick Start 23 | 24 | 1. Download the code to your computer. 25 | 26 | 2. Download __[[yolov3.weights]](https://pjreddie.com/media/files/yolov3.weights)__ and place it in `yolov3_sort/yolo-obj/` 27 | 28 | 3. [yolov3_sort/main.py] Change the Path to __labelsPath / weightsPath / configPath__. - 更换main.py中的路径 29 | 30 | 4. Run the yolov3 counter: 31 | ``` 32 | $ python3 main.py --input input/test.mp4 --output output/test.avi --yolo yolo-obj 33 | ``` 34 | 35 | ## Citation 36 | 37 | ### YOLOv3 : 38 | 39 | @article{yolov3, 40 | title={YOLOv3: An Incremental Improvement}, 41 | author={Redmon, Joseph and Farhadi, Ali}, 42 | journal = {arXiv}, 43 | year={2018} 44 | } 45 | 46 | ### SORT : 47 | 48 | @inproceedings{Bewley2016_sort, 49 | author={Bewley, Alex and Ge, Zongyuan and Ott, Lionel and Ramos, Fabio and Upcroft, Ben}, 50 | booktitle={2016 IEEE International Conference on Image Processing (ICIP)}, 51 | title={Simple online and realtime tracking}, 52 | year={2016}, 53 | pages={3464-3468}, 54 | keywords={Benchmark testing;Complexity theory;Detectors;Kalman filters;Target tracking;Visualization;Computer Vision;Data Association;Detection;Multiple Object Tracking}, 55 | doi={10.1109/ICIP.2016.7533003} 56 | } 57 | 58 | ## Reference 59 | #### Github@ [guillelopez](https://github.com/guillelopez/python-traffic-counter-with-yolo-and-sort) 60 | 61 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3_sort/yolo-obj/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /OneStage/yolo/yolov3_sort/yolo-obj/yolo3_object.names: -------------------------------------------------------------------------------- 1 | people 2 | fire_extinguisher 3 | fireplug 4 | car 5 | bicycle 6 | motorcycle 7 | -------------------------------------------------------------------------------- /Two-stage vs One-stage Detectors.md: -------------------------------------------------------------------------------- 1 | # Two-stage vs One-stage Detectors 2 | ### Comparison Two-stage and One-stage - [[YouTube]](https://www.youtube.com/watch?v=V4P_ptn2FF4) 3 | 4 | ### Two-stage Detectors 5 | *找出物体(Region Proposals) -> 识别物体(Object Recognition)* 6 | 7 | -__Models in the R-CNN family are all region-based - [[R-CNN]]()__ 8 | * First, the model proposes a set of regions of interests by select search or regional proposal network. The proposed regions are sparse as the potential bounding box candidates can be infinite. 9 | * Then a classifier only processes the region candidates. 10 | 11 | 12 | ![](https://github.com/yehengchen/ObjectDetection/blob/master/img/two_stage.png) 13 | 14 | *The other different approach skips the region proposal stage and runs detection directly over a dense sampling of possible locations. This is how a one-stage object detection algorithm works. This is faster and simpler, but might potentially drag down the performance a bit.* 15 | 16 | ### One-stage Detectors 17 | *找出物体同时识别物体 - Detecting objects in images using a single deep neural network* 18 | 19 | -__YOLO (You only look once): YOLOv1, YOLOv2, YOLOv3, Tiny YOLO - [[YOLO]](https://github.com/yehengchen/ObjectDetection/blob/master/OneStage/yolo/yolo.md)__ 20 | 21 | -__Single Shot Detector (SSD) - [[SSD]]()__ 22 | 23 | * Single convolutional network predicts the bounding boxes and the class probabilities for these boxes. 24 | 25 | ![](https://github.com/yehengchen/ObjectDetection/blob/master/img/one_stage.png) 26 | 27 | ## Two-stage vs One-stage Detectors 28 | ![](https://github.com/yehengchen/ObjectDetection/blob/master/img/yolo_vs_rcnn.png) 29 | 30 | ## Result on COCO 31 | *For the last couple years, many results are exclusively measured with the COCO object detection dataset. COCO dataset is _harder_ for object detection and usually detectors achieve much lower mAP. Here are the comparison for some key detectors.* 32 | ![](https://github.com/yehengchen/ObjectDetection/blob/master/img/COCO%20object%20detection%20dataset.jpeg) 33 | 34 | ## Result on PASCAL VOC 35 | *For the result presented below, the model is trained with both PASCAL VOC 2007 and 2012 data. The mAP is measured with the PASCAL VOC 2012 testing set. For SSD, the chart shows results for 300 × 300 and 512 × 512 input images. For YOLO, it has results for 288 × 288, 416 ×461 and 544 × 544 images. Higher resolution images for the same model have better mAP but slower to process.* 36 | 37 | ![](https://github.com/yehengchen/ObjectDetection/blob/master/img/PASCAL%20VOC%202007%20and%202012%20data.png) 38 | 39 | *Input image resolutions and feature extractors impact speed. Below is the highest and lowest FPS reported by the corresponding papers. Yet, the result below can be highly biased in particular they are measured at different mAP.* 40 | 41 | ![](https://github.com/yehengchen/ObjectDetection/blob/master/img/PASCAL%20VOC%202007%20and%202012%20data%20FPS.png) 42 | 43 | __Comparison COCO and Pascal VOC dataset__ -> [[Click Here]](https://github.com/yehengchen/ObjectDetection/blob/master/COCO%20and%20Pascal%20VOC.md) 44 | -------------------------------------------------------------------------------- /TwoStage/R-CNN/README.md: -------------------------------------------------------------------------------- 1 | # Maks R-CNN 2 | 3 | ## [labelme](https://github.com/wkentaro/labelme) - Image Polygonal Annotation with Python 4 | 5 |
6 | 7 |
8 | 9 | ### Requirements 10 | 11 | - Ubuntu / macOS / Windows 12 | - Python2 / Python3 13 | - [PyQt4 / PyQt5](http://www.riverbankcomputing.co.uk/software/pyqt/intro) / [PySide2](https://wiki.qt.io/PySide2_GettingStarted) 14 | 15 | 16 | #### Anaconda 17 | 18 | You need install [Anaconda](https://www.continuum.io/downloads), then run below: 19 | 20 | ``` 21 | # python2 22 | conda create --name=labelme python=2.7 23 | source activate labelme 24 | # conda install -c conda-forge pyside2 25 | conda install pyqt 26 | pip install labelme 27 | # if you'd like to use the latest version. run below: 28 | # pip install git+https://github.com/wkentaro/labelme.git 29 | 30 | # python3 31 | conda create --name=labelme python=3.6 32 | source activate labelme 33 | # conda install -c conda-forge pyside2 34 | # conda install pyqt 35 | # pip install pyqt5 # pyqt5 can be installed via pip on python3 36 | pip install labelme 37 | # or you can install everything by conda command 38 | # conda install labelme -c conda-forge 39 | ``` 40 | 41 | #### Ubuntu 42 | 43 | ```bash 44 | # Ubuntu 14.04 / Ubuntu 16.04 45 | # Python2 46 | # sudo apt-get install python-qt4 # PyQt4 47 | sudo apt-get install python-pyqt5 # PyQt5 48 | sudo pip install labelme 49 | # Python3 50 | sudo apt-get install python3-pyqt5 # PyQt5 51 | sudo pip3 install labelme 52 | ``` 53 | 54 | ### Windows 55 | 56 | Firstly, follow instruction in [Anaconda](#anaconda). 57 | 58 | ```bash 59 | # Pillow 5 causes dll load error on Windows. 60 | # https://github.com/wkentaro/labelme/pull/174 61 | conda install pillow=4.0.0 62 | ``` 63 | -------------------------------------------------------------------------------- /TwoStage/R-CNN/annotation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/TwoStage/R-CNN/annotation.jpg -------------------------------------------------------------------------------- /TwoStage/R-CNN/convert2json.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | s1="/home/cai/Desktop/TableCapturer/json/" 4 | s2=".json" 5 | 6 | for i in range(99): 7 | s3 = str(i).zfill(6) 8 | os.system("labelme_json_to_dataset"+" "+ s1 + s3 + s2) 9 | i+=1 10 | -------------------------------------------------------------------------------- /img/8.1.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/8.1.2.png -------------------------------------------------------------------------------- /img/COCO object detection dataset.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/COCO object detection dataset.jpeg -------------------------------------------------------------------------------- /img/F1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/F1.png -------------------------------------------------------------------------------- /img/Object-Detection-Deep-Learning.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/Object-Detection-Deep-Learning.jpg -------------------------------------------------------------------------------- /img/PASCAL VOC 2007 and 2012 data FPS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/PASCAL VOC 2007 and 2012 data FPS.png -------------------------------------------------------------------------------- /img/PASCAL VOC 2007 and 2012 data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/PASCAL VOC 2007 and 2012 data.png -------------------------------------------------------------------------------- /img/ap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/ap.png -------------------------------------------------------------------------------- /img/coco.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/coco.png -------------------------------------------------------------------------------- /img/coco_yolo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/coco_yolo.png -------------------------------------------------------------------------------- /img/dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/dataset.png -------------------------------------------------------------------------------- /img/deep_learning_object_detection_history.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/deep_learning_object_detection_history.PNG -------------------------------------------------------------------------------- /img/fig1 .png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/fig1 .png -------------------------------------------------------------------------------- /img/fig1-1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/fig1-1.jpeg -------------------------------------------------------------------------------- /img/fig1-2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/fig1-2.jpeg -------------------------------------------------------------------------------- /img/fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/fig2.png -------------------------------------------------------------------------------- /img/fig3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/fig3.png -------------------------------------------------------------------------------- /img/fig4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/fig4.png -------------------------------------------------------------------------------- /img/objectdetection.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/objectdetection.gif -------------------------------------------------------------------------------- /img/one_stage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/one_stage.png -------------------------------------------------------------------------------- /img/two_stage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/two_stage.png -------------------------------------------------------------------------------- /img/voc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/voc.png -------------------------------------------------------------------------------- /img/voc_yolo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/voc_yolo.png -------------------------------------------------------------------------------- /img/yolo_vs_rcnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yehengchen/Object-Detection-and-Tracking/a7d750efc3ea47a5aee88a464e37016dff016d06/img/yolo_vs_rcnn.png --------------------------------------------------------------------------------