├── utils
    ├── init.py
    ├── parse_config.py
    ├── torch_utils.py
    ├── google_utils.py
    ├── gcp.sh
    └── adabound.py
├── plot_results.py
├── data
    ├── coco_1img.txt
    ├── samples
    │   ├── bus.jpg
    │   └── zidane.jpg
    ├── coco.data
    ├── coco_1k5k.data
    ├── coco_16img.data
    ├── coco_1cls.data
    ├── coco_1img.data
    ├── coco_32img.data
    ├── coco_64img.data
    ├── coco_500val.data
    ├── coco_1000img.data
    ├── coco_1000val.data
    ├── coco_1cls.txt
    ├── get_coco_dataset_gdrive.sh
    ├── coco_16img.txt
    ├── coco.names
    ├── coco_paper.names
    ├── get_coco_dataset.sh
    ├── coco_32img.txt
    └── coco_64img.txt
├── convert_pt_weights.py
├── requirements.txt
├── .github
    └── ISSUE_TEMPLATE
    │   ├── feature_request.md
    │   └── bug_report.md
├── weights
    └── download_yolov3_weights.sh
├── cfg
    ├── yolov3-tiny.cfg
    ├── yolov3-1cls.cfg
    ├── yolov3.cfg
    ├── yolov3s-3a320.cfg
    ├── yolov3-spp-1cls.cfg
    ├── yolov3-spp.cfg
    ├── yolov3s-9a320.cfg
    ├── yolov3s-18a320.cfg
    ├── yolov3s-30a320.cfg
    └── yolov3-spp-pan-scale.cfg
├── .gitignore
├── detect.py
├── test.py
├── README.md
└── prune.py


/utils/init.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plot_results.py:
--------------------------------------------------------------------------------
1 | from utils import utils
2 | utils.plot_results()
3 | 


--------------------------------------------------------------------------------
/data/coco_1img.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/val2014/COCO_val2014_000000581886.jpg
2 | 


--------------------------------------------------------------------------------
/data/samples/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erikguo/yolov3/HEAD/data/samples/bus.jpg


--------------------------------------------------------------------------------
/data/samples/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erikguo/yolov3/HEAD/data/samples/zidane.jpg


--------------------------------------------------------------------------------
/convert_pt_weights.py:
--------------------------------------------------------------------------------
1 | from models import *; 
2 | import sys
3 | 
4 | print(sys.argv)
5 | convert(sys.argv[1], sys.argv[2])
6 | 


--------------------------------------------------------------------------------
/data/coco.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=../coco/trainvalno5k.txt
3 | valid=../coco/5k.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_1k5k.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1000img.txt
3 | valid=./data/5k.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_16img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_16img.txt
3 | valid=./data/coco_16img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_1cls.data:
--------------------------------------------------------------------------------
1 | classes=1
2 | train=./data/coco_1cls.txt
3 | valid=./data/coco_1cls.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_1img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1img.txt
3 | valid=./data/coco_1img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_32img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_32img.txt
3 | valid=./data/coco_32img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_64img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_64img.txt
3 | valid=./data/coco_64img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_500val.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_500img.txt
3 | valid=./data/coco_500val.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_1000img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1000img.txt
3 | valid=./data/coco_1000img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_1000val.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1000img.txt
3 | valid=./data/coco_1000val.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_1cls.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/val2014/COCO_val2014_000000013992.jpg
2 | ../coco/images/val2014/COCO_val2014_000000047226.jpg
3 | ../coco/images/val2014/COCO_val2014_000000050324.jpg
4 | ../coco/images/val2014/COCO_val2014_000000121497.jpg
5 | ../coco/images/val2014/COCO_val2014_000000001464.jpg
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # pip3 install -U -r requirements.txt
 2 | numpy
 3 | opencv-python
 4 | torch >= 1.1.0
 5 | matplotlib
 6 | pycocotools
 7 | tqdm
 8 | tb-nightly
 9 | future
10 | Pillow
11 | 
12 | # Equivalent conda commands ----------------------------------------------------
13 | # conda update -n base -c defaults conda
14 | # conda install -y -c anaconda future numpy opencv matplotlib tqdm pillow
15 | # conda install -y -c conda-forge scikit-image tensorboard pycocotools
16 | # conda install -y -c spyder-ide spyder-line-profiler
17 | # conda install pytorch torchvision -c pytorch
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/data/get_coco_dataset_gdrive.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # https://stackoverflow.com/questions/48133080/how-to-download-a-google-drive-url-via-curl-or-wget/48133859
 3 | 
 4 | # Zip coco folder
 5 | # zip -r coco.zip coco
 6 | # tar -czvf coco.tar.gz coco
 7 | 
 8 | # Set fileid and filename
 9 | filename="coco.zip"
10 | fileid="1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO"  # coco.zip
11 | 
12 | # Download from Google Drive, accepting presented query
13 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
14 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
15 | rm ./cookie
16 | 
17 | # Unzip
18 | unzip -q ${filename}  # for coco.zip
19 | # tar -xzf ${filename}  # for coco.tar.gz
20 | 


--------------------------------------------------------------------------------
/weights/download_yolov3_weights.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # make '/weights' directory if it does not exist and cd into it
 4 | mkdir -p weights && cd weights
 5 | 
 6 | # copy darknet weight files, continue '-c' if partially downloaded
 7 | wget -c https://pjreddie.com/media/files/yolov3.weights
 8 | wget -c https://pjreddie.com/media/files/yolov3-tiny.weights
 9 | wget -c https://pjreddie.com/media/files/yolov3-spp.weights
10 | 
11 | # yolov3 pytorch weights
12 | # download from Google Drive: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI
13 | 
14 | # darknet53 weights (first 75 layers only)
15 | wget -c https://pjreddie.com/media/files/darknet53.conv.74
16 | 
17 | # yolov3-tiny weights from darknet (first 16 layers only)
18 | # ./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15
19 | # mv yolov3-tiny.conv.15 ../
20 | 
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Version [e.g. 22]
29 | 
30 | **Smartphone (please complete the following information):**
31 |  - Device: [e.g. iPhoneXS]
32 |  - OS: [e.g. iOS8.1]
33 |  - Version [e.g. 22]
34 | 
35 | **Additional context**
36 | Add any other context about the problem here.
37 | 


--------------------------------------------------------------------------------
/data/coco_16img.txt:
--------------------------------------------------------------------------------
 1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg
 2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg
 3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg
 4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg
 5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg
 6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg
 7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg
 8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg
 9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg
10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg
11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg
12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg
13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg
14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg
15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg
16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg
17 | 


--------------------------------------------------------------------------------
/data/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorcycle
 5 | airplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | couch
59 | potted plant
60 | bed
61 | dining table
62 | toilet
63 | tv
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/data/coco_paper.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorcycle
 5 | airplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | street sign
13 | stop sign
14 | parking meter
15 | bench
16 | bird
17 | cat
18 | dog
19 | horse
20 | sheep
21 | cow
22 | elephant
23 | bear
24 | zebra
25 | giraffe
26 | hat
27 | backpack
28 | umbrella
29 | shoe
30 | eye glasses
31 | handbag
32 | tie
33 | suitcase
34 | frisbee
35 | skis
36 | snowboard
37 | sports ball
38 | kite
39 | baseball bat
40 | baseball glove
41 | skateboard
42 | surfboard
43 | tennis racket
44 | bottle
45 | plate
46 | wine glass
47 | cup
48 | fork
49 | knife
50 | spoon
51 | bowl
52 | banana
53 | apple
54 | sandwich
55 | orange
56 | broccoli
57 | carrot
58 | hot dog
59 | pizza
60 | donut
61 | cake
62 | chair
63 | couch
64 | potted plant
65 | bed
66 | mirror
67 | dining table
68 | window
69 | desk
70 | toilet
71 | door
72 | tv
73 | laptop
74 | mouse
75 | remote
76 | keyboard
77 | cell phone
78 | microwave
79 | oven
80 | toaster
81 | sink
82 | refrigerator
83 | blender
84 | book
85 | clock
86 | vase
87 | scissors
88 | teddy bear
89 | hair drier
90 | toothbrush
91 | hair brush


--------------------------------------------------------------------------------
/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def parse_model_cfg(path):
 5 |     # Parses the yolo-v3 layer configuration file and returns module definitions
 6 |     file = open(path, 'r')
 7 |     lines = file.read().split('\n')
 8 |     lines = [x for x in lines if x and not x.startswith('#')]
 9 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
10 |     mdefs = []  # module definitions
11 |     for line in lines:
12 |         if line.startswith('['):  # This marks the start of a new block
13 |             mdefs.append({})
14 |             mdefs[-1]['type'] = line[1:-1].rstrip()
15 |             if mdefs[-1]['type'] == 'convolutional':
16 |                 mdefs[-1]['batch_normalize'] = 0  # pre-populate with zeros (may be overwritten later)
17 |         else:
18 |             key, val = line.split("=")
19 |             key = key.rstrip()
20 | 
21 |             if 'anchors' in key:
22 |                 mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2))  # np anchors
23 |             else:
24 |                 mdefs[-1][key] = val.strip()
25 | 
26 |     return mdefs
27 | 
28 | 
29 | def parse_data_cfg(path):
30 |     # Parses the data configuration file
31 |     options = dict()
32 |     with open(path, 'r') as fp:
33 |         lines = fp.readlines()
34 | 
35 |     for line in lines:
36 |         line = line.strip()
37 |         if line == '' or line.startswith('#'):
38 |             continue
39 |         key, val = line.split('=')
40 |         options[key.strip()] = val.strip()
41 | 
42 |     return options
43 | 


--------------------------------------------------------------------------------
/data/get_coco_dataset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh
 3 | 
 4 | # Clone COCO API
 5 | git clone https://github.com/pdollar/coco && cd coco
 6 | 
 7 | # Download Images
 8 | mkdir images && cd images
 9 | wget -c https://pjreddie.com/media/files/train2014.zip
10 | wget -c https://pjreddie.com/media/files/val2014.zip
11 | 
12 | # Unzip
13 | unzip -q train2014.zip
14 | unzip -q val2014.zip
15 | 
16 | # (optional) Delete zip files
17 | rm -rf *.zip
18 | 
19 | cd ..
20 | 
21 | # Download COCO Metadata
22 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip
23 | wget -c https://pjreddie.com/media/files/coco/5k.part
24 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part
25 | wget -c https://pjreddie.com/media/files/coco/labels.tgz
26 | tar xzf labels.tgz
27 | unzip -q instances_train-val2014.zip
28 | 
29 | # Set Up Image Lists
30 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt
31 | paste <(awk "{print \"$PWD\"}" <trainvalno5k.part) trainvalno5k.part | tr -d '\t' > trainvalno5k.txt
32 | 
33 | # get xview training data
34 | # wget -O train_images.tgz 'https://d307kc0mrhucc3.cloudfront.net/train_images.tgz?Expires=1530124049&Signature=JrQoxipmsETvb7eQHCfDFUO-QEHJGAayUv0i-ParmS-1hn7hl9D~bzGuHWG82imEbZSLUARTtm0wOJ7EmYMGmG5PtLKz9H5qi6DjoSUuFc13NQ-~6yUhE~NfPaTnehUdUMCa3On2wl1h1ZtRG~0Jq1P-AJbpe~oQxbyBrs1KccaMa7FK4F4oMM6sMnNgoXx8-3O77kYw~uOpTMFmTaQdHln6EztW0Lx17i57kK3ogbSUpXgaUTqjHCRA1dWIl7PY1ngQnLslkLhZqmKcaL-BvWf0ZGjHxCDQBpnUjIlvMu5NasegkwD9Jjc0ClgTxsttSkmbapVqaVC8peR0pO619Q__&Key-Pair-Id=APKAIKGDJB5C3XUL2DXQ'
35 | # tar -xvzf train_images.tgz
36 | # sudo rm -rf train_images/._*
37 | # lastly convert each .tif to a .bmp for faster loading in cv2
38 | 
39 | # ./coco/images/train2014/COCO_train2014_000000167126.jpg  # corrupted image
40 | 


--------------------------------------------------------------------------------
/data/coco_32img.txt:
--------------------------------------------------------------------------------
 1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg
 2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg
 3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg
 4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg
 5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg
 6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg
 7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg
 8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg
 9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg
10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg
11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg
12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg
13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg
14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg
15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg
16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg
17 | ../coco/images/train2014/COCO_train2014_000000000094.jpg
18 | ../coco/images/train2014/COCO_train2014_000000000109.jpg
19 | ../coco/images/train2014/COCO_train2014_000000000110.jpg
20 | ../coco/images/train2014/COCO_train2014_000000000113.jpg
21 | ../coco/images/train2014/COCO_train2014_000000000127.jpg
22 | ../coco/images/train2014/COCO_train2014_000000000138.jpg
23 | ../coco/images/train2014/COCO_train2014_000000000142.jpg
24 | ../coco/images/train2014/COCO_train2014_000000000144.jpg
25 | ../coco/images/train2014/COCO_train2014_000000000149.jpg
26 | ../coco/images/train2014/COCO_train2014_000000000151.jpg
27 | ../coco/images/train2014/COCO_train2014_000000000154.jpg
28 | ../coco/images/train2014/COCO_train2014_000000000165.jpg
29 | ../coco/images/train2014/COCO_train2014_000000000194.jpg
30 | ../coco/images/train2014/COCO_train2014_000000000201.jpg
31 | ../coco/images/train2014/COCO_train2014_000000000247.jpg
32 | ../coco/images/train2014/COCO_train2014_000000000260.jpg
33 | 


--------------------------------------------------------------------------------
/utils/torch_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def init_seeds(seed=0):
 5 |     torch.cuda.empty_cache()
 6 |     torch.manual_seed(seed)
 7 |     torch.cuda.manual_seed(seed)
 8 |     torch.cuda.manual_seed_all(seed)
 9 |     # torch.backends.cudnn.deterministic = True  # https://pytorch.org/docs/stable/notes/randomness.html
10 | 
11 | 
12 | def select_device(force_cpu=False, apex=False):
13 |     # apex if mixed precision training https://github.com/NVIDIA/apex
14 |     cuda = False if force_cpu else torch.cuda.is_available()
15 |     device = torch.device('cuda:0' if cuda else 'cpu')
16 | 
17 |     if not cuda:
18 |         print('Using CPU')
19 |     if cuda:
20 |         torch.backends.cudnn.benchmark = True  # set False for reproducible results
21 |         c = 1024 ** 2  # bytes to MB
22 |         ng = torch.cuda.device_count()
23 |         x = [torch.cuda.get_device_properties(i) for i in range(ng)]
24 |         cuda_str = 'Using CUDA ' + ('Apex ' if apex else '')
25 |         for i in range(0, ng):
26 |             if i == 1:
27 |                 # torch.cuda.set_device(0)  # OPTIONAL: Set GPU ID
28 |                 cuda_str = ' ' * len(cuda_str)
29 |             print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
30 |                   (cuda_str, i, x[i].name, x[i].total_memory / c))
31 | 
32 |     print('')  # skip a line
33 |     return device
34 | 
35 | 
36 | def fuse_conv_and_bn(conv, bn):
37 |     # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
38 |     with torch.no_grad():
39 |         # init
40 |         fusedconv = torch.nn.Conv2d(conv.in_channels,
41 |                                     conv.out_channels,
42 |                                     kernel_size=conv.kernel_size,
43 |                                     stride=conv.stride,
44 |                                     padding=conv.padding,
45 |                                     bias=True)
46 | 
47 |         # prepare filters
48 |         w_conv = conv.weight.clone().view(conv.out_channels, -1)
49 |         w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
50 |         fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
51 | 
52 |         # prepare spatial bias
53 |         if conv.bias is not None:
54 |             b_conv = conv.bias
55 |         else:
56 |             b_conv = torch.zeros(conv.weight.size(0))
57 |         b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
58 |         fusedconv.bias.copy_(b_conv + b_bn)
59 | 
60 |         return fusedconv
61 | 


--------------------------------------------------------------------------------
/utils/google_utils.py:
--------------------------------------------------------------------------------
 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries
 2 | # pip install --upgrade google-cloud-storage
 3 | 
 4 | import os
 5 | import time
 6 | 
 7 | 
 8 | # from google.cloud import storage
 9 | 
10 | 
11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'):
12 |     # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f
13 |     # Downloads a file from Google Drive, accepting presented query
14 |     # from utils.google_utils import *; gdrive_download()
15 |     t = time.time()
16 | 
17 |     print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
18 |     if os.path.exists(name):  # remove existing
19 |         os.remove(name)
20 | 
21 |     # Attempt small file download
22 |     s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id)
23 |     os.system(s)
24 | 
25 |     # Attempt large file download
26 |     if not os.path.exists(name):  # file size > 40MB
27 |         s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id,
28 |              "curl -Lb ./cookie \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % (
29 |                  id, name),
30 |              'rm ./cookie']
31 |         [os.system(x) for x in s]  # run commands
32 | 
33 |     # Unzip if archive
34 |     if name.endswith('.zip'):
35 |         print('unzipping... ', end='')
36 |         os.system('unzip -q %s' % name)  # unzip
37 |         os.remove(name)  # remove zip to free space
38 | 
39 |     print('Done (%.1fs)' % (time.time() - t))
40 | 
41 | 
42 | def upload_blob(bucket_name, source_file_name, destination_blob_name):
43 |     # Uploads a file to a bucket
44 |     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
45 | 
46 |     storage_client = storage.Client()
47 |     bucket = storage_client.get_bucket(bucket_name)
48 |     blob = bucket.blob(destination_blob_name)
49 | 
50 |     blob.upload_from_filename(source_file_name)
51 | 
52 |     print('File {} uploaded to {}.'.format(
53 |         source_file_name,
54 |         destination_blob_name))
55 | 
56 | 
57 | def download_blob(bucket_name, source_blob_name, destination_file_name):
58 |     # Uploads a blob from a bucket
59 |     storage_client = storage.Client()
60 |     bucket = storage_client.get_bucket(bucket_name)
61 |     blob = bucket.blob(source_blob_name)
62 | 
63 |     blob.download_to_filename(destination_file_name)
64 | 
65 |     print('Blob {} downloaded to {}.'.format(
66 |         source_blob_name,
67 |         destination_file_name))
68 | 


--------------------------------------------------------------------------------
/cfg/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=255
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=80
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=255
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 1,2,3
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=80
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/data/coco_64img.txt:
--------------------------------------------------------------------------------
 1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg
 2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg
 3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg
 4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg
 5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg
 6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg
 7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg
 8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg
 9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg
10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg
11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg
12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg
13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg
14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg
15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg
16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg
17 | ../coco/images/train2014/COCO_train2014_000000000094.jpg
18 | ../coco/images/train2014/COCO_train2014_000000000109.jpg
19 | ../coco/images/train2014/COCO_train2014_000000000110.jpg
20 | ../coco/images/train2014/COCO_train2014_000000000113.jpg
21 | ../coco/images/train2014/COCO_train2014_000000000127.jpg
22 | ../coco/images/train2014/COCO_train2014_000000000138.jpg
23 | ../coco/images/train2014/COCO_train2014_000000000142.jpg
24 | ../coco/images/train2014/COCO_train2014_000000000144.jpg
25 | ../coco/images/train2014/COCO_train2014_000000000149.jpg
26 | ../coco/images/train2014/COCO_train2014_000000000151.jpg
27 | ../coco/images/train2014/COCO_train2014_000000000154.jpg
28 | ../coco/images/train2014/COCO_train2014_000000000165.jpg
29 | ../coco/images/train2014/COCO_train2014_000000000194.jpg
30 | ../coco/images/train2014/COCO_train2014_000000000201.jpg
31 | ../coco/images/train2014/COCO_train2014_000000000247.jpg
32 | ../coco/images/train2014/COCO_train2014_000000000260.jpg
33 | ../coco/images/train2014/COCO_train2014_000000000263.jpg
34 | ../coco/images/train2014/COCO_train2014_000000000307.jpg
35 | ../coco/images/train2014/COCO_train2014_000000000308.jpg
36 | ../coco/images/train2014/COCO_train2014_000000000309.jpg
37 | ../coco/images/train2014/COCO_train2014_000000000312.jpg
38 | ../coco/images/train2014/COCO_train2014_000000000315.jpg
39 | ../coco/images/train2014/COCO_train2014_000000000321.jpg
40 | ../coco/images/train2014/COCO_train2014_000000000322.jpg
41 | ../coco/images/train2014/COCO_train2014_000000000326.jpg
42 | ../coco/images/train2014/COCO_train2014_000000000332.jpg
43 | ../coco/images/train2014/COCO_train2014_000000000349.jpg
44 | ../coco/images/train2014/COCO_train2014_000000000368.jpg
45 | ../coco/images/train2014/COCO_train2014_000000000370.jpg
46 | ../coco/images/train2014/COCO_train2014_000000000382.jpg
47 | ../coco/images/train2014/COCO_train2014_000000000384.jpg
48 | ../coco/images/train2014/COCO_train2014_000000000389.jpg
49 | ../coco/images/train2014/COCO_train2014_000000000394.jpg
50 | ../coco/images/train2014/COCO_train2014_000000000404.jpg
51 | ../coco/images/train2014/COCO_train2014_000000000419.jpg
52 | ../coco/images/train2014/COCO_train2014_000000000431.jpg
53 | ../coco/images/train2014/COCO_train2014_000000000436.jpg
54 | ../coco/images/train2014/COCO_train2014_000000000438.jpg
55 | ../coco/images/train2014/COCO_train2014_000000000443.jpg
56 | ../coco/images/train2014/COCO_train2014_000000000446.jpg
57 | ../coco/images/train2014/COCO_train2014_000000000450.jpg
58 | ../coco/images/train2014/COCO_train2014_000000000471.jpg
59 | ../coco/images/train2014/COCO_train2014_000000000490.jpg
60 | ../coco/images/train2014/COCO_train2014_000000000491.jpg
61 | ../coco/images/train2014/COCO_train2014_000000000510.jpg
62 | ../coco/images/train2014/COCO_train2014_000000000514.jpg
63 | ../coco/images/train2014/COCO_train2014_000000000529.jpg
64 | ../coco/images/train2014/COCO_train2014_000000000531.jpg
65 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Repo-specific GitIgnore ----------------------------------------------------------------------------------------------
  2 | *.jpg
  3 | *.png
  4 | *.bmp
  5 | *.tif
  6 | *.heic
  7 | *.JPG
  8 | *.PNG
  9 | *.TIF
 10 | *.HEIC
 11 | *.mp4
 12 | *.mov
 13 | *.MOV
 14 | *.avi
 15 | *.data
 16 | *.json
 17 | 
 18 | *.cfg
 19 | !cfg/yolov3*.cfg
 20 | 
 21 | runs/*
 22 | data/*
 23 | !data/samples/zidane.jpg
 24 | !data/samples/bus.jpg
 25 | !data/coco.names
 26 | !data/coco_paper.names
 27 | !data/coco.data
 28 | !data/coco_*.data
 29 | !data/coco_*.txt
 30 | !data/coco_*.txt
 31 | !data/trainvalno5k.shapes
 32 | !data/5k.shapes
 33 | !data/5k.txt
 34 | !data/*.sh
 35 | 
 36 | pycocotools/*
 37 | results*.txt
 38 | gcp_test*.sh
 39 | 
 40 | # MATLAB GitIgnore -----------------------------------------------------------------------------------------------------
 41 | *.m~
 42 | *.mat
 43 | !targets*.mat
 44 | 
 45 | # Neural Network weights -----------------------------------------------------------------------------------------------
 46 | *.weights
 47 | *.pt
 48 | *.onnx
 49 | *.mlmodel
 50 | darknet53.conv.74
 51 | yolov3-tiny.conv.15
 52 | 
 53 | # GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
 54 | # Byte-compiled / optimized / DLL files
 55 | __pycache__/
 56 | *.py[cod]
 57 | *$py.class
 58 | 
 59 | # C extensions
 60 | *.so
 61 | 
 62 | # Distribution / packaging
 63 | .Python
 64 | env/
 65 | build/
 66 | develop-eggs/
 67 | dist/
 68 | downloads/
 69 | eggs/
 70 | .eggs/
 71 | lib/
 72 | lib64/
 73 | parts/
 74 | sdist/
 75 | var/
 76 | wheels/
 77 | *.egg-info/
 78 | .installed.cfg
 79 | *.egg
 80 | 
 81 | # PyInstaller
 82 | #  Usually these files are written by a python script from a template
 83 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 84 | *.manifest
 85 | *.spec
 86 | 
 87 | # Installer logs
 88 | pip-log.txt
 89 | pip-delete-this-directory.txt
 90 | 
 91 | # Unit test / coverage reports
 92 | htmlcov/
 93 | .tox/
 94 | .coverage
 95 | .coverage.*
 96 | .cache
 97 | nosetests.xml
 98 | coverage.xml
 99 | *.cover
100 | .hypothesis/
101 | 
102 | # Translations
103 | *.mo
104 | *.pot
105 | 
106 | # Django stuff:
107 | *.log
108 | local_settings.py
109 | 
110 | # Flask stuff:
111 | instance/
112 | .webassets-cache
113 | 
114 | # Scrapy stuff:
115 | .scrapy
116 | 
117 | # Sphinx documentation
118 | docs/_build/
119 | 
120 | # PyBuilder
121 | target/
122 | 
123 | # Jupyter Notebook
124 | .ipynb_checkpoints
125 | 
126 | # pyenv
127 | .python-version
128 | 
129 | # celery beat schedule file
130 | celerybeat-schedule
131 | 
132 | # SageMath parsed files
133 | *.sage.py
134 | 
135 | # dotenv
136 | .env
137 | 
138 | # virtualenv
139 | .venv
140 | venv/
141 | ENV/
142 | 
143 | # Spyder project settings
144 | .spyderproject
145 | .spyproject
146 | 
147 | # Rope project settings
148 | .ropeproject
149 | 
150 | # mkdocs documentation
151 | /site
152 | 
153 | # mypy
154 | .mypy_cache/
155 | 
156 | 
157 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
158 | 
159 | # General
160 | .DS_Store
161 | .AppleDouble
162 | .LSOverride
163 | 
164 | # Icon must end with two \r
165 | Icon
166 | Icon?
167 | 
168 | # Thumbnails
169 | ._*
170 | 
171 | # Files that might appear in the root of a volume
172 | .DocumentRevisions-V100
173 | .fseventsd
174 | .Spotlight-V100
175 | .TemporaryItems
176 | .Trashes
177 | .VolumeIcon.icns
178 | .com.apple.timemachine.donotpresent
179 | 
180 | # Directories potentially created on remote AFP share
181 | .AppleDB
182 | .AppleDesktop
183 | Network Trash Folder
184 | Temporary Items
185 | .apdisk
186 | 
187 | 
188 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
189 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
190 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
191 | 
192 | # User-specific stuff:
193 | .idea/*
194 | .idea/**/workspace.xml
195 | .idea/**/tasks.xml
196 | .idea/dictionaries
197 | .html  # Bokeh Plots
198 | .pg  # TensorFlow Frozen Graphs
199 | .avi # videos
200 | 
201 | # Sensitive or high-churn files:
202 | .idea/**/dataSources/
203 | .idea/**/dataSources.ids
204 | .idea/**/dataSources.local.xml
205 | .idea/**/sqlDataSources.xml
206 | .idea/**/dynamic.xml
207 | .idea/**/uiDesigner.xml
208 | 
209 | # Gradle:
210 | .idea/**/gradle.xml
211 | .idea/**/libraries
212 | 
213 | # CMake
214 | cmake-build-debug/
215 | cmake-build-release/
216 | 
217 | # Mongo Explorer plugin:
218 | .idea/**/mongoSettings.xml
219 | 
220 | ## File-based project format:
221 | *.iws
222 | 
223 | ## Plugin-specific files:
224 | 
225 | # IntelliJ
226 | out/
227 | 
228 | # mpeltonen/sbt-idea plugin
229 | .idea_modules/
230 | 
231 | # JIRA plugin
232 | atlassian-ide-plugin.xml
233 | 
234 | # Cursive Clojure plugin
235 | .idea/replstate.xml
236 | 
237 | # Crashlytics plugin (for Android Studio and IntelliJ)
238 | com_crashlytics_export_strings.xml
239 | crashlytics.properties
240 | crashlytics-build.properties
241 | fabric.properties
242 | 


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | from sys import platform
  4 | 
  5 | from models import *
  6 | from utils.datasets import *
  7 | from utils.utils import *
  8 | 
  9 | 
 10 | def detect(cfg,
 11 |            data,
 12 |            weights,
 13 |            images='data/samples',  # input folder
 14 |            output='output',  # output folder
 15 |            fourcc='mp4v',  # video codec
 16 |            img_size=416,
 17 |            conf_thres=0.5,
 18 |            nms_thres=0.5,
 19 |            save_txt=False,
 20 |            save_images=True):
 21 |     # Initialize
 22 |     device = torch_utils.select_device(force_cpu=ONNX_EXPORT)
 23 |     torch.backends.cudnn.benchmark = False  # set False for reproducible results
 24 |     if os.path.exists(output):
 25 |         shutil.rmtree(output)  # delete output folder
 26 |     os.makedirs(output)  # make new output folder
 27 | 
 28 |     # Initialize model
 29 |     if ONNX_EXPORT:
 30 |         s = (320, 192)  # (320, 192) or (416, 256) or (608, 352) onnx model image size (height, width)
 31 |         model = Darknet(cfg, s)
 32 |     else:
 33 |         model = Darknet(cfg, img_size)
 34 | 
 35 |     # Load weights
 36 |     if weights.endswith('.pt'):  # pytorch format
 37 |         model.load_state_dict(torch.load(weights, map_location=device)['model'])
 38 |     else:  # darknet format
 39 |         _ = load_darknet_weights(model, weights)
 40 | 
 41 |     # Fuse Conv2d + BatchNorm2d layers
 42 |     # model.fuse()
 43 | 
 44 |     # Eval mode
 45 |     model.to(device).eval()
 46 | 
 47 |     # Export mode
 48 |     if ONNX_EXPORT:
 49 |         img = torch.zeros((1, 3, s[0], s[1]))
 50 |         torch.onnx.export(model, img, 'weights/export.onnx', verbose=True)
 51 |         return
 52 | 
 53 |     # Half precision
 54 |     opt.half = opt.half and device.type != 'cpu'  # half precision only supported on CUDA
 55 |     if opt.half:
 56 |         model.half()
 57 | 
 58 |     # Set Dataloader
 59 |     vid_path, vid_writer = None, None
 60 |     if opt.webcam:
 61 |         save_images = False
 62 |         dataloader = LoadWebcam(img_size=img_size, half=opt.half)
 63 |     else:
 64 |         dataloader = LoadImages(images, img_size=img_size, half=opt.half)
 65 | 
 66 |     # Get classes and colors
 67 |     classes = load_classes(parse_data_cfg(data)['names'])
 68 |     colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]
 69 | 
 70 |     # Run inference
 71 |     t0 = time.time()
 72 |     for i, (path, img, im0, vid_cap) in enumerate(dataloader):
 73 |         t = time.time()
 74 |         save_path = str(Path(output) / Path(path).name)
 75 | 
 76 |         # Get detections
 77 |         img = torch.from_numpy(img).unsqueeze(0).to(device)
 78 |         pred, _ = model(img)
 79 |         det = non_max_suppression(pred.float(), conf_thres, nms_thres)[0]
 80 | 
 81 |         if det is not None and len(det) > 0:
 82 |             # Rescale boxes from 416 to true image size
 83 |             det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
 84 | 
 85 |             # Print results to screen
 86 |             print('%gx%g ' % img.shape[2:], end='')  # print image size
 87 |             for c in det[:, -1].unique():
 88 |                 n = (det[:, -1] == c).sum()
 89 |                 print('%g %ss' % (n, classes[int(c)]), end=', ')
 90 | 
 91 |             # Draw bounding boxes and labels of detections
 92 |             for *xyxy, conf, cls_conf, cls in det:
 93 |                 if save_txt:  # Write to file
 94 |                     with open(save_path + '.txt', 'a') as file:
 95 |                         file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))
 96 | 
 97 |                 # Add bbox to the image
 98 |                 label = '%s %.2f' % (classes[int(cls)], conf)
 99 |                 plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])
100 | 
101 |         print('Done. (%.3fs)' % (time.time() - t))
102 | 
103 |         if opt.webcam:  # Show live webcam
104 |             cv2.imshow(weights, im0)
105 | 
106 |         if save_images:  # Save image with detections
107 |             if dataloader.mode == 'images':
108 |                 cv2.imwrite(save_path, im0)
109 |             else:
110 |                 if vid_path != save_path:  # new video
111 |                     vid_path = save_path
112 |                     if isinstance(vid_writer, cv2.VideoWriter):
113 |                         vid_writer.release()  # release previous video writer
114 | 
115 |                     fps = vid_cap.get(cv2.CAP_PROP_FPS)
116 |                     width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
117 |                     height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
118 |                     vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (width, height))
119 |                 vid_writer.write(im0)
120 | 
121 |     if save_images:
122 |         print('Results saved to %s' % os.getcwd() + os.sep + output)
123 |         if platform == 'darwin':  # macos
124 |             os.system('open ' + output + ' ' + save_path)
125 | 
126 |     print('Done. (%.3fs)' % (time.time() - t0))
127 | 
128 | 
129 | if __name__ == '__main__':
130 |     parser = argparse.ArgumentParser()
131 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
132 |     parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
133 |     parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
134 |     parser.add_argument('--images', type=str, default='data/samples', help='path to images')
135 |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
136 |     parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
137 |     parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
138 |     parser.add_argument('--fourcc', type=str, default='mp4v', help='fourcc output video codec (verify ffmpeg support)')
139 |     parser.add_argument('--output', type=str, default='output', help='specifies the output path for images and videos')
140 |     parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
141 |     parser.add_argument('--webcam', action='store_true', help='use webcam')
142 |     opt = parser.parse_args()
143 |     print(opt)
144 | 
145 |     with torch.no_grad():
146 |         detect(opt.cfg,
147 |                opt.data,
148 |                opt.weights,
149 |                images=opt.images,
150 |                img_size=opt.img_size,
151 |                conf_thres=opt.conf_thres,
152 |                nms_thres=opt.nms_thres,
153 |                fourcc=opt.fourcc,
154 |                output=opt.output)
155 | 


--------------------------------------------------------------------------------
/utils/gcp.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | # New VM
  4 | rm -rf sample_data yolov3 darknet apex coco cocoapi knife knifec
  5 | git clone https://github.com/ultralytics/yolov3
  6 | git clone https://github.com/AlexeyAB/darknet && cd darknet && make GPU=1 CUDNN=1 CUDNN_HALF=1 OPENCV=1 && wget -c https://pjreddie.com/media/files/darknet53.conv.74 && cd ..
  7 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex
  8 | #git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3
  9 | sudo conda install -y -c conda-forge scikit-image tensorboard pycocotools
 10 | python3 -c "
 11 | from yolov3.utils.google_utils import gdrive_download
 12 | gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')
 13 | gdrive_download('1GrFcTIIsKzOafZltUOS75RSahPrj2KyT','knife.zip')
 14 | gdrive_download('19sLJEGHlIAIFHcEftq4aLCw_tkWZmhD1','knifec.zip')"
 15 | sudo shutdown
 16 | 
 17 | # Re-clone
 18 | rm -rf yolov3  # Warning: remove existing
 19 | git clone https://github.com/ultralytics/yolov3 && cd yolov3 # master
 20 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test  # branch
 21 | 
 22 | # Train
 23 | python3 train.py
 24 | 
 25 | # Resume
 26 | python3 train.py --resume
 27 | 
 28 | # Detect
 29 | python3 detect.py
 30 | 
 31 | # Test
 32 | python3 test.py --save-json
 33 | 
 34 | # Evolve
 35 | for i in {0..500}
 36 | do
 37 |   python3 train.py --data data/coco.data --img-size 320 --epochs 1 --batch-size 64 --accumulate 1 --evolve --bucket yolov4
 38 | done
 39 | 
 40 | # Git pull
 41 | git pull https://github.com/ultralytics/yolov3  # master
 42 | git pull https://github.com/ultralytics/yolov3 test  # branch
 43 | 
 44 | # Test Darknet training
 45 | python3 test.py --weights ../darknet/backup/yolov3.backup
 46 | 
 47 | # Copy last.pt TO bucket
 48 | gsutil cp yolov3/weights/last1gpu.pt gs://ultralytics
 49 | 
 50 | # Copy last.pt FROM bucket
 51 | gsutil cp gs://ultralytics/last.pt yolov3/weights/last.pt
 52 | wget https://storage.googleapis.com/ultralytics/yolov3/last_v1_0.pt -O weights/last_v1_0.pt
 53 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt
 54 | 
 55 | # Reproduce tutorials
 56 | rm results*.txt  # WARNING: removes existing results
 57 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results0r_1img.txt
 58 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results0r_10img.txt
 59 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results0r_100img.txt
 60 | # python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt
 61 | python3 -c "from utils import utils; utils.plot_results()"
 62 | # gsutil cp results*.txt gs://ultralytics
 63 | gsutil cp results.png gs://ultralytics
 64 | sudo shutdown
 65 | 
 66 | # Reproduce mAP
 67 | python3 test.py --save-json --img-size 608
 68 | python3 test.py --save-json --img-size 416
 69 | python3 test.py --save-json --img-size 320
 70 | sudo shutdown
 71 | 
 72 | # Benchmark script
 73 | git clone https://github.com/ultralytics/yolov3  # clone our repo
 74 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex  # install nvidia apex
 75 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')"  # download coco dataset (20GB)
 76 | cd yolov3 && clear && python3 train.py --epochs 1  # run benchmark (~30 min)
 77 | 
 78 | # Unit tests
 79 | python3 detect.py  # detect 2 persons, 1 tie
 80 | python3 test.py --data data/coco_32img.data  # test mAP = 0.8
 81 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave  # train 5 epochs
 82 | python3 train.py --data data/coco_1cls.data --epochs 5 --nosave  # train 5 epochs
 83 | python3 train.py --data data/coco_1img.data --epochs 5 --nosave  # train 5 epochs
 84 | 
 85 | # AlexyAB Darknet
 86 | gsutil cp -r gs://sm6/supermarket2 .  # dataset from bucket
 87 | rm -rf darknet && git clone https://github.com/AlexeyAB/darknet && cd darknet && wget -c https://pjreddie.com/media/files/darknet53.conv.74  # sudo apt install libopencv-dev && make
 88 | ./darknet detector calc_anchors data/coco_img64.data -num_of_clusters 9 -width 320 -height 320  # kmeans anchor calculation
 89 | ./darknet detector train ../supermarket2/supermarket2.data ../yolo_v3_spp_pan_scale.cfg darknet53.conv.74 -map -dont_show # train spp
 90 | ./darknet detector train ../yolov3/data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp coco
 91 | 
 92 | ./darknet detector train data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp
 93 | gsutil cp -r backup/*5000.weights gs://sm6/weights
 94 | sudo shutdown
 95 | 
 96 | 
 97 | ./darknet detector train ../supermarket2/supermarket2.data ../yolov3-tiny-sm2-1cls.cfg yolov3-tiny.conv.15 -map -dont_show # train tiny
 98 | ./darknet detector train ../supermarket2/supermarket2.data cfg/yolov3-spp-sm2-1cls.cfg backup/yolov3-spp-sm2-1cls_last.weights  # resume
 99 | python3 train.py --data ../supermarket2/supermarket2.data --cfg ../yolov3-spp-sm2-1cls.cfg --epochs 100 --num-workers 8 --img-size 320 --nosave  # train ultralytics
100 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls_5000.weights --cfg cfg/yolov3-spp-sm2-1cls.cfg  # test
101 | gsutil cp -r backup/*.weights gs://sm6/weights  # weights to bucket
102 | 
103 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls_5000.weights --cfg ../yolov3-spp-sm2-1cls.cfg --img-size 320 --conf-thres 0.2  # test
104 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_125_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_125.cfg --img-size 320 --conf-thres 0.2  # test
105 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_150_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_150.cfg --img-size 320 --conf-thres 0.2  # test
106 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_200_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_200.cfg --img-size 320 --conf-thres 0.2  # test
107 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls-scalexy_variable_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_variable.cfg --img-size 320 --conf-thres 0.2  # test
108 | 
109 | python3 train.py --img-size 320 --epochs 27 --batch-size 64 --accumulate 1 --nosave --notest && python3 test.py --weights weights/last.pt --img-size 320 --save-json && sudo shutdown
110 | 
111 | # Debug/Development
112 | python3 train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve --giou
113 | python3 test.py --weights weights/last.pt --cfg cfg/yolov3-spp.cfg --img-size 320
114 | 
115 | gsutil cp evolve.txt gs://ultralytics
116 | sudo shutdown
117 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | 
  4 | from torch.utils.data import DataLoader
  5 | 
  6 | from models import *
  7 | from utils.datasets import *
  8 | from utils.utils import *
  9 | 
 10 | 
 11 | def test(cfg,
 12 |          data,
 13 |          weights=None,
 14 |          batch_size=16,
 15 |          img_size=416,
 16 |          iou_thres=0.5,
 17 |          conf_thres=0.001,
 18 |          nms_thres=0.5,
 19 |          save_json=False,
 20 |          model=None):
 21 |     # Initialize/load model and set device
 22 |     if model is None:
 23 |         device = torch_utils.select_device()
 24 |         verbose = True
 25 | 
 26 |         # Initialize model
 27 |         model = Darknet(cfg, img_size).to(device)
 28 | 
 29 |         # Load weights
 30 |         if weights.endswith('.pt'):  # pytorch format
 31 |             model.load_state_dict(torch.load(weights, map_location=device)['model'])
 32 |         else:  # darknet format
 33 |             _ = load_darknet_weights(model, weights)
 34 | 
 35 |         if torch.cuda.device_count() > 1:
 36 |             model = nn.DataParallel(model)
 37 |     else:
 38 |         device = next(model.parameters()).device  # get model device
 39 |         verbose = False
 40 | 
 41 |     # Configure run
 42 |     data = parse_data_cfg(data)
 43 |     nc = int(data['classes'])  # number of classes
 44 |     test_path = data['valid']  # path to test images
 45 |     names = load_classes(data['names'])  # class names
 46 | 
 47 |     # Dataloader
 48 |     dataset = LoadImagesAndLabels(test_path, img_size, batch_size)
 49 |     dataloader = DataLoader(dataset,
 50 |                             batch_size=batch_size,
 51 |                             num_workers=min(os.cpu_count(), batch_size),
 52 |                             pin_memory=True,
 53 |                             collate_fn=dataset.collate_fn)
 54 | 
 55 |     seen = 0
 56 |     model.eval()
 57 |     coco91class = coco80_to_coco91_class()
 58 |     s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1')
 59 |     p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0.
 60 |     loss = torch.zeros(3)
 61 |     jdict, stats, ap, ap_class = [], [], [], []
 62 |     for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
 63 |         targets = targets.to(device)
 64 |         imgs = imgs.to(device)
 65 |         _, _, height, width = imgs.shape  # batch size, channels, height, width
 66 | 
 67 |         # Plot images with bounding boxes
 68 |         if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
 69 |             plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg')
 70 | 
 71 |         # Run model
 72 |         inf_out, train_out = model(imgs)  # inference and training outputs
 73 | 
 74 |         # Compute loss
 75 |         if hasattr(model, 'hyp'):  # if model has loss hyperparameters
 76 |             loss += compute_loss(train_out, targets, model)[1][:3].cpu()  # GIoU, obj, cls
 77 | 
 78 |         # Run NMS
 79 |         output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres)
 80 | 
 81 |         # Statistics per image
 82 |         for si, pred in enumerate(output):
 83 |             labels = targets[targets[:, 0] == si, 1:]
 84 |             nl = len(labels)
 85 |             tcls = labels[:, 0].tolist() if nl else []  # target class
 86 |             seen += 1
 87 | 
 88 |             if pred is None:
 89 |                 if nl:
 90 |                     stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
 91 |                 continue
 92 | 
 93 |             # Append to text file
 94 |             # with open('test.txt', 'a') as file:
 95 |             #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]
 96 | 
 97 |             # Append to pycocotools JSON dictionary
 98 |             if save_json:
 99 |                 # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
100 |                 image_id = int(Path(paths[si]).stem.split('_')[-1])
101 |                 box = pred[:, :4].clone()  # xyxy
102 |                 scale_coords(imgs[si].shape[1:], box, shapes[si])  # to original shape
103 |                 box = xyxy2xywh(box)  # xywh
104 |                 box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
105 |                 for di, d in enumerate(pred):
106 |                     jdict.append({'image_id': image_id,
107 |                                   'category_id': coco91class[int(d[6])],
108 |                                   'bbox': [floatn(x, 3) for x in box[di]],
109 |                                   'score': floatn(d[4], 5)})
110 | 
111 |             # Clip boxes to image bounds
112 |             clip_coords(pred, (height, width))
113 | 
114 |             # Assign all predictions as incorrect
115 |             correct = [0] * len(pred)
116 |             if nl:
117 |                 detected = []
118 |                 tcls_tensor = labels[:, 0]
119 | 
120 |                 # target boxes
121 |                 tbox = xywh2xyxy(labels[:, 1:5])
122 |                 tbox[:, [0, 2]] *= width
123 |                 tbox[:, [1, 3]] *= height
124 | 
125 |                 # Search for correct predictions
126 |                 for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):
127 | 
128 |                     # Break if all targets already located in image
129 |                     if len(detected) == nl:
130 |                         break
131 | 
132 |                     # Continue if predicted class not among image classes
133 |                     if pcls.item() not in tcls:
134 |                         continue
135 | 
136 |                     # Best iou, index between pred and targets
137 |                     m = (pcls == tcls_tensor).nonzero().view(-1)
138 |                     iou, bi = bbox_iou(pbox, tbox[m]).max(0)
139 | 
140 |                     # If iou > threshold and class is correct mark as correct
141 |                     if iou > iou_thres and m[bi] not in detected:  # and pcls == tcls[bi]:
142 |                         correct[i] = 1
143 |                         detected.append(m[bi])
144 | 
145 |             # Append statistics (correct, conf, pcls, tcls)
146 |             stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
147 | 
148 |     # Compute statistics
149 |     stats = [np.concatenate(x, 0) for x in list(zip(*stats))]  # to numpy
150 |     if len(stats):
151 |         p, r, ap, f1, ap_class = ap_per_class(*stats)
152 |         mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
153 |         nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
154 |     else:
155 |         nt = torch.zeros(1)
156 | 
157 |     # Print results
158 |     pf = '%20s' + '%10.3g' * 6  # print format
159 |     print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))
160 | 
161 |     # Print results per class
162 |     if verbose and nc > 1 and len(stats):
163 |         for i, c in enumerate(ap_class):
164 |             print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
165 | 
166 |     # Save JSON
167 |     if save_json and map and len(jdict):
168 |         imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files]
169 |         with open('results.json', 'w') as file:
170 |             json.dump(jdict, file)
171 | 
172 |         from pycocotools.coco import COCO
173 |         from pycocotools.cocoeval import COCOeval
174 | 
175 |         # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
176 |         cocoGt = COCO('../coco/annotations/instances_val2014.json')  # initialize COCO ground truth api
177 |         cocoDt = cocoGt.loadRes('results.json')  # initialize COCO pred api
178 | 
179 |         cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
180 |         cocoEval.params.imgIds = imgIds  # [:32]  # only evaluate these images
181 |         cocoEval.evaluate()
182 |         cocoEval.accumulate()
183 |         cocoEval.summarize()
184 |         map = cocoEval.stats[1]  # update mAP to pycocotools mAP
185 | 
186 |     # Return results
187 |     maps = np.zeros(nc) + map
188 |     for i, c in enumerate(ap_class):
189 |         maps[c] = ap[i]
190 |     return (mp, mr, map, mf1, *(loss / len(dataloader)).tolist()), maps
191 | 
192 | 
193 | if __name__ == '__main__':
194 |     parser = argparse.ArgumentParser(prog='test.py')
195 |     parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch')
196 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
197 |     parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
198 |     parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
199 |     parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
200 |     parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
201 |     parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
202 |     parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
203 |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
204 |     opt = parser.parse_args()
205 |     print(opt)
206 | 
207 |     with torch.no_grad():
208 |         test(opt.cfg,
209 |              opt.data,
210 |              opt.weights,
211 |              opt.batch_size,
212 |              opt.img_size,
213 |              opt.iou_thres,
214 |              opt.conf_thres,
215 |              opt.nms_thres,
216 |              opt.save_json)
217 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | I combined SLIMYOLO here.
  2 | 
  3 | You can use train.py to train sparsity with argument "--sparsity 0.0001", and use prune.py to pruning.
  4 | 
  5 | 
  6 | 
  7 | 
  8 | <table style="width:100%">
  9 |   <tr>
 10 |     <td>
 11 |       <img src="https://user-images.githubusercontent.com/26833433/61591130-f7beea00-abc2-11e9-9dc0-d6abcf41d713.jpg">
 12 |     </td>
 13 |     <td align="center">
 14 |     <a href="https://www.ultralytics.com" target="_blank">
 15 |     <img src="https://storage.googleapis.com/ultralytics/logo/logoname1000.png" width="160"></a>
 16 |       <img src="https://user-images.githubusercontent.com/26833433/61591093-2b4d4480-abc2-11e9-8b46-d88eb1dabba1.jpg">
 17 |           <a href="https://itunes.apple.com/app/id1452689527" target="_blank">
 18 |     <img src="https://user-images.githubusercontent.com/26833433/50044365-9b22ac00-0082-11e9-862f-e77aee7aa7b0.png" width="180"></a>
 19 |     </td>
 20 |     <td>
 21 |       <img src="https://user-images.githubusercontent.com/26833433/61591100-55066b80-abc2-11e9-9647-52c0e045b288.jpg">
 22 |     </td>
 23 |   </tr>
 24 | </table>
 25 | 
 26 | # Introduction
 27 | 
 28 | This directory contains PyTorch YOLOv3 software developed by Ultralytics LLC, and **is freely available for redistribution under the GPL-3.0 license**. For more information please visit https://www.ultralytics.com.
 29 | 
 30 | # Description
 31 | 
 32 | The https://github.com/ultralytics/yolov3 repo contains inference and training code for YOLOv3 in PyTorch. The code works on Linux, MacOS and Windows. Training is done on the COCO dataset by default: https://cocodataset.org/#home. **Credit to Joseph Redmon for YOLO:** https://pjreddie.com/darknet/yolo/.
 33 | 
 34 | # Requirements
 35 | 
 36 | Python 3.7 or later with the following `pip3 install -U -r requirements.txt` packages:
 37 | 
 38 | - `numpy`
 39 | - `torch >= 1.1.0`
 40 | - `opencv-python`
 41 | - `tqdm`
 42 | 
 43 | # Tutorials
 44 | 
 45 | * [GCP Quickstart](https://github.com/ultralytics/yolov3/wiki/GCP-Quickstart)
 46 | * [Transfer Learning](https://github.com/ultralytics/yolov3/wiki/Example:-Transfer-Learning)
 47 | * [Train Single Image](https://github.com/ultralytics/yolov3/wiki/Example:-Train-Single-Image)
 48 | * [Train Single Class](https://github.com/ultralytics/yolov3/wiki/Example:-Train-Single-Class)
 49 | * [Train Custom Data](https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data)
 50 | 
 51 | # Jupyter Notebook
 52 | 
 53 | Our Jupyter [notebook](https://colab.research.google.com/github/ultralytics/yolov3/blob/master/examples.ipynb) provides quick training, inference and testing examples.
 54 | 
 55 | # Training
 56 | 
 57 | **Start Training:** `python3 train.py` to begin training after downloading COCO data with `data/get_coco_dataset.sh`. Each epoch trains on 117,263 images from the train and validate COCO sets, and tests on 5000 images from the COCO validate set.
 58 | 
 59 | **Resume Training:** `python3 train.py --resume` to resume training from `weights/last.pt`.
 60 | 
 61 | **Plot Training:** `from utils import utils; utils.plot_results()` plots training results from `coco_16img.data`, `coco_64img.data`, 2 example datasets available in the `data/` folder, which train and test on the first 16 and 64 images of the COCO2014-trainval dataset.
 62 | ![image](https://user-images.githubusercontent.com/26833433/63258271-fe9d5300-c27b-11e9-9a15-95038daf4438.png)
 63 | 
 64 | ## Image Augmentation
 65 | 
 66 | `datasets.py` applies random OpenCV-powered (https://opencv.org/) augmentation to the input images in accordance with the following specifications. Augmentation is applied **only** during training, not during inference. Bounding boxes are automatically tracked and updated with the images. 416 x 416 examples pictured below.
 67 | 
 68 | Augmentation | Description
 69 | --- | ---
 70 | Translation | +/- 10% (vertical and horizontal)
 71 | Rotation | +/- 5 degrees
 72 | Shear | +/- 2 degrees (vertical and horizontal)
 73 | Scale | +/- 10%
 74 | Reflection | 50% probability (horizontal-only)
 75 | H**S**V Saturation | +/- 50%
 76 | HS**V** Intensity | +/- 50%
 77 | 
 78 | <img src="https://user-images.githubusercontent.com/26833433/61579359-507b7d80-ab04-11e9-8a2a-bd6f59bbdfb4.jpg">
 79 | 
 80 | ## Speed
 81 | 
 82 | https://cloud.google.com/deep-learning-vm/  
 83 | **Machine type:** n1-standard-8 (8 vCPUs, 30 GB memory)  
 84 | **CPU platform:** Intel Skylake  
 85 | **GPUs:** K80 ($0.20/hr), T4 ($0.35/hr), V100 ($0.83/hr) CUDA with [Nvidia Apex](https://github.com/NVIDIA/apex) FP16/32  
 86 | **HDD:** 100 GB SSD  
 87 | **Dataset:** COCO train 2014 (117,263 images)
 88 | 
 89 | GPUs | `batch_size` | images/sec | epoch time | epoch cost
 90 | --- |---| --- | --- | --- 
 91 | K80 | 64 (32x2) | 11  | 175 min  | $0.58
 92 | T4 | 64 (32x2) | 40  | 49 min  | $0.29
 93 | T4 x2 | 64 (64x1) | 61  | 32 min  | $0.36
 94 | V100 | 64 (32x2) | 115 | 17 min | $0.24
 95 | V100 x2 | 64 (64x1) | 150 | 13 min | $0.36
 96 | 2080Ti | 64 (32x2) | 69  | 28 min  | - 
 97 | 
 98 | 
 99 | # Inference
100 | 
101 | `detect.py` runs inference on all images **and videos** in the `data/samples` folder:
102 | 
103 | **YOLOv3:** `python3 detect.py --cfg cfg/yolov3.cfg --weights weights/yolov3.weights`
104 | <img src="https://user-images.githubusercontent.com/26833433/50524393-b0adc200-0ad5-11e9-9335-4774a1e52374.jpg" width="600">
105 | 
106 | **YOLOv3-tiny:** `python3 detect.py --cfg cfg/yolov3-tiny.cfg --weights weights/yolov3-tiny.weights`
107 | <img src="https://user-images.githubusercontent.com/26833433/50374155-21427380-05ea-11e9-8d24-f1a4b2bac1ad.jpg" width="600">
108 | 
109 | **YOLOv3-SPP:** `python3 detect.py --cfg cfg/yolov3-spp.cfg --weights weights/yolov3-spp.weights`
110 | <img src="https://user-images.githubusercontent.com/26833433/54747926-e051ff00-4bd8-11e9-8b5d-93a41d871ec7.jpg" width="600">
111 | 
112 | ## Webcam
113 | 
114 | `python3 detect.py --webcam` shows a live webcam feed.
115 | 
116 | # Pretrained Weights
117 | 
118 | - Darknet `*.weights` format: https://pjreddie.com/media/files/yolov3.weights
119 | - PyTorch `*.pt` format: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI
120 | 
121 | ## Darknet Conversion
122 | 
123 | ```bash
124 | git clone https://github.com/ultralytics/yolov3 && cd yolov3
125 | 
126 | # convert darknet cfg/weights to pytorch model
127 | python3  -c "from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights')"
128 | Success: converted 'weights/yolov3-spp.weights' to 'converted.pt'
129 | 
130 | # convert cfg/pytorch model to darknet weights
131 | python3  -c "from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.pt')"
132 | Success: converted 'weights/yolov3-spp.pt' to 'converted.weights'
133 | ```
134 | 
135 | # mAP
136 | 
137 | - `test.py --weights weights/yolov3.weights` tests official YOLOv3 weights.
138 | - `test.py --weights weights/last.pt` tests most recent checkpoint.
139 | - `test.py --weights weights/best.pt` tests best checkpoint.
140 | - Compare to darknet published results https://arxiv.org/abs/1804.02767.
141 | 
142 | [ultralytics/yolov3](https://github.com/ultralytics/yolov3) mAP@0.5 ([darknet](https://arxiv.org/abs/1804.02767)-reported mAP@0.5)
143 | 
144 | <i></i>         | 320         | 416         | 608
145 | ---             | ---         | ---         | ---
146 | `YOLOv3`        | 51.8 (51.5) | 55.4 (55.3) | 58.2 (57.9)
147 | `YOLOv3-SPP`    | 52.4        | 56.5        | 60.7 (60.6)
148 | `YOLOv3-tiny`   | 29.0        | 32.9 (33.1) | 35.5
149 | 
150 | ``` bash
151 | # install pycocotools
152 | git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3
153 | cd yolov3
154 | 
155 | python3 test.py --save-json --img-size 608
156 | Namespace(batch_size=16, cfg='cfg/yolov3-spp.cfg', conf_thres=0.001, data='data/coco.data', img_size=608, iou_thres=0.5, nms_thres=0.5, save_json=True, weights='weights/yolov3-spp.weights')
157 | Using CUDA device0 _CudaDeviceProperties(name='Tesla T4', total_memory=15079MB)
158 |                 Class    Images   Targets         P         R       mAP        F1: 100% 313/313 [07:40<00:00,  2.34s/it]
159 |                 all       5e+03  3.58e+04     0.117     0.788     0.595     0.199
160 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.367 <---
161 |  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.607 <---
162 |  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.387
163 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.208
164 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.392
165 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.487
166 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.297
167 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.465
168 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.495
169 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.332
170 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.518
171 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.621
172 | 
173 | python3 test.py --save-json --img-size 416
174 | Namespace(batch_size=16, cfg='cfg/yolov3-spp.cfg', conf_thres=0.001, data='data/coco.data', img_size=416, iou_thres=0.5, nms_thres=0.5, save_json=True, weights='weights/yolov3-spp.weights')
175 | Using CUDA device0 _CudaDeviceProperties(name='Tesla T4', total_memory=15079MB)
176 |                 Class    Images   Targets         P         R       mAP        F1: 100% 313/313 [07:01<00:00,  1.41s/it]
177 |                 all       5e+03  3.58e+04     0.105     0.746     0.554      0.18
178 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.336 <---
179 |  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.565 <---
180 |  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.350
181 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.151
182 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.361
183 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.494
184 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.281
185 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.433
186 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.459
187 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.256
188 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.495
189 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.622
190 | ```
191 | 
192 | # Citation
193 | 
194 | [![DOI](https://zenodo.org/badge/146165888.svg)](https://zenodo.org/badge/latestdoi/146165888)
195 | 
196 | # Contact
197 | 
198 | Issues should be raised directly in the repository. For additional questions or comments please email Glenn Jocher at glenn.jocher@ultralytics.com or visit us at https://contact.ultralytics.com.
199 | 


--------------------------------------------------------------------------------
/utils/adabound.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch
  4 | from torch.optim import Optimizer
  5 | 
  6 | 
  7 | class AdaBound(Optimizer):
  8 |     """Implements AdaBound algorithm.
  9 |     It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
 10 |     Arguments:
 11 |         params (iterable): iterable of parameters to optimize or dicts defining
 12 |             parameter groups
 13 |         lr (float, optional): Adam learning rate (default: 1e-3)
 14 |         betas (Tuple[float, float], optional): coefficients used for computing
 15 |             running averages of gradient and its square (default: (0.9, 0.999))
 16 |         final_lr (float, optional): final (SGD) learning rate (default: 0.1)
 17 |         gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
 18 |         eps (float, optional): term added to the denominator to improve
 19 |             numerical stability (default: 1e-8)
 20 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
 21 |         amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
 22 |     .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
 23 |         https://openreview.net/forum?id=Bkg3g2R9FX
 24 |     """
 25 | 
 26 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
 27 |                  eps=1e-8, weight_decay=0, amsbound=False):
 28 |         if not 0.0 <= lr:
 29 |             raise ValueError("Invalid learning rate: {}".format(lr))
 30 |         if not 0.0 <= eps:
 31 |             raise ValueError("Invalid epsilon value: {}".format(eps))
 32 |         if not 0.0 <= betas[0] < 1.0:
 33 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
 34 |         if not 0.0 <= betas[1] < 1.0:
 35 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
 36 |         if not 0.0 <= final_lr:
 37 |             raise ValueError("Invalid final learning rate: {}".format(final_lr))
 38 |         if not 0.0 <= gamma < 1.0:
 39 |             raise ValueError("Invalid gamma parameter: {}".format(gamma))
 40 |         defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
 41 |                         weight_decay=weight_decay, amsbound=amsbound)
 42 |         super(AdaBound, self).__init__(params, defaults)
 43 | 
 44 |         self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
 45 | 
 46 |     def __setstate__(self, state):
 47 |         super(AdaBound, self).__setstate__(state)
 48 |         for group in self.param_groups:
 49 |             group.setdefault('amsbound', False)
 50 | 
 51 |     def step(self, closure=None):
 52 |         """Performs a single optimization step.
 53 |         Arguments:
 54 |             closure (callable, optional): A closure that reevaluates the model
 55 |                 and returns the loss.
 56 |         """
 57 |         loss = None
 58 |         if closure is not None:
 59 |             loss = closure()
 60 | 
 61 |         for group, base_lr in zip(self.param_groups, self.base_lrs):
 62 |             for p in group['params']:
 63 |                 if p.grad is None:
 64 |                     continue
 65 |                 grad = p.grad.data
 66 |                 if grad.is_sparse:
 67 |                     raise RuntimeError(
 68 |                         'Adam does not support sparse gradients, please consider SparseAdam instead')
 69 |                 amsbound = group['amsbound']
 70 | 
 71 |                 state = self.state[p]
 72 | 
 73 |                 # State initialization
 74 |                 if len(state) == 0:
 75 |                     state['step'] = 0
 76 |                     # Exponential moving average of gradient values
 77 |                     state['exp_avg'] = torch.zeros_like(p.data)
 78 |                     # Exponential moving average of squared gradient values
 79 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
 80 |                     if amsbound:
 81 |                         # Maintains max of all exp. moving avg. of sq. grad. values
 82 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
 83 | 
 84 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
 85 |                 if amsbound:
 86 |                     max_exp_avg_sq = state['max_exp_avg_sq']
 87 |                 beta1, beta2 = group['betas']
 88 | 
 89 |                 state['step'] += 1
 90 | 
 91 |                 if group['weight_decay'] != 0:
 92 |                     grad = grad.add(group['weight_decay'], p.data)
 93 | 
 94 |                 # Decay the first and second moment running average coefficient
 95 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
 96 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
 97 |                 if amsbound:
 98 |                     # Maintains the maximum of all 2nd moment running avg. till now
 99 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
100 |                     # Use the max. for normalizing running avg. of gradient
101 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
102 |                 else:
103 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
104 | 
105 |                 bias_correction1 = 1 - beta1 ** state['step']
106 |                 bias_correction2 = 1 - beta2 ** state['step']
107 |                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
108 | 
109 |                 # Applies bounds on actual learning rate
110 |                 # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
111 |                 final_lr = group['final_lr'] * group['lr'] / base_lr
112 |                 lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
113 |                 upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
114 |                 step_size = torch.full_like(denom, step_size)
115 |                 step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
116 | 
117 |                 p.data.add_(-step_size)
118 | 
119 |         return loss
120 | 
121 | 
122 | class AdaBoundW(Optimizer):
123 |     """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101)
124 |     It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
125 |     Arguments:
126 |         params (iterable): iterable of parameters to optimize or dicts defining
127 |             parameter groups
128 |         lr (float, optional): Adam learning rate (default: 1e-3)
129 |         betas (Tuple[float, float], optional): coefficients used for computing
130 |             running averages of gradient and its square (default: (0.9, 0.999))
131 |         final_lr (float, optional): final (SGD) learning rate (default: 0.1)
132 |         gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
133 |         eps (float, optional): term added to the denominator to improve
134 |             numerical stability (default: 1e-8)
135 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
136 |         amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
137 |     .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
138 |         https://openreview.net/forum?id=Bkg3g2R9FX
139 |     """
140 | 
141 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
142 |                  eps=1e-8, weight_decay=0, amsbound=False):
143 |         if not 0.0 <= lr:
144 |             raise ValueError("Invalid learning rate: {}".format(lr))
145 |         if not 0.0 <= eps:
146 |             raise ValueError("Invalid epsilon value: {}".format(eps))
147 |         if not 0.0 <= betas[0] < 1.0:
148 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
149 |         if not 0.0 <= betas[1] < 1.0:
150 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
151 |         if not 0.0 <= final_lr:
152 |             raise ValueError("Invalid final learning rate: {}".format(final_lr))
153 |         if not 0.0 <= gamma < 1.0:
154 |             raise ValueError("Invalid gamma parameter: {}".format(gamma))
155 |         defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
156 |                         weight_decay=weight_decay, amsbound=amsbound)
157 |         super(AdaBoundW, self).__init__(params, defaults)
158 | 
159 |         self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
160 | 
161 |     def __setstate__(self, state):
162 |         super(AdaBoundW, self).__setstate__(state)
163 |         for group in self.param_groups:
164 |             group.setdefault('amsbound', False)
165 | 
166 |     def step(self, closure=None):
167 |         """Performs a single optimization step.
168 |         Arguments:
169 |             closure (callable, optional): A closure that reevaluates the model
170 |                 and returns the loss.
171 |         """
172 |         loss = None
173 |         if closure is not None:
174 |             loss = closure()
175 | 
176 |         for group, base_lr in zip(self.param_groups, self.base_lrs):
177 |             for p in group['params']:
178 |                 if p.grad is None:
179 |                     continue
180 |                 grad = p.grad.data
181 |                 if grad.is_sparse:
182 |                     raise RuntimeError(
183 |                         'Adam does not support sparse gradients, please consider SparseAdam instead')
184 |                 amsbound = group['amsbound']
185 | 
186 |                 state = self.state[p]
187 | 
188 |                 # State initialization
189 |                 if len(state) == 0:
190 |                     state['step'] = 0
191 |                     # Exponential moving average of gradient values
192 |                     state['exp_avg'] = torch.zeros_like(p.data)
193 |                     # Exponential moving average of squared gradient values
194 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
195 |                     if amsbound:
196 |                         # Maintains max of all exp. moving avg. of sq. grad. values
197 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
198 | 
199 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
200 |                 if amsbound:
201 |                     max_exp_avg_sq = state['max_exp_avg_sq']
202 |                 beta1, beta2 = group['betas']
203 | 
204 |                 state['step'] += 1
205 | 
206 |                 # Decay the first and second moment running average coefficient
207 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
208 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
209 |                 if amsbound:
210 |                     # Maintains the maximum of all 2nd moment running avg. till now
211 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
212 |                     # Use the max. for normalizing running avg. of gradient
213 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
214 |                 else:
215 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
216 | 
217 |                 bias_correction1 = 1 - beta1 ** state['step']
218 |                 bias_correction2 = 1 - beta2 ** state['step']
219 |                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
220 | 
221 |                 # Applies bounds on actual learning rate
222 |                 # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
223 |                 final_lr = group['final_lr'] * group['lr'] / base_lr
224 |                 lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
225 |                 upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
226 |                 step_size = torch.full_like(denom, step_size)
227 |                 step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
228 | 
229 |                 if group['weight_decay'] != 0:
230 |                     decayed_weights = torch.mul(p.data, group['weight_decay'])
231 |                     p.data.add_(-step_size)
232 |                     p.data.sub_(decayed_weights)
233 |                 else:
234 |                     p.data.add_(-step_size)
235 | 
236 |         return loss
237 | 


--------------------------------------------------------------------------------
/cfg/yolov3-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=16
  7 | subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=18
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=1
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=18
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=1
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=18
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=1
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 


--------------------------------------------------------------------------------
/cfg/yolov3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=16
  7 | subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 


--------------------------------------------------------------------------------
/cfg/yolov3s-3a320.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=85
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 2
642 | anchors = 16,30,  62,45, 156,198
643 | classes=80
644 | num=3
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=85
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 1
728 | anchors = 16,30,  62,45, 156,198
729 | classes=80
730 | num=3
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=85
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0
815 | anchors = 16,30,  62,45, 156,198
816 | classes=80
817 | num=3
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/cfg/yolov3-spp-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=100
 20 | max_batches = 5000
 21 | policy=steps
 22 | steps=4000,4500
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=18
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=1
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=18
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
729 | classes=1
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=18
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
816 | classes=1
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/cfg/yolov3-spp.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=255
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=80
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=255
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
729 | classes=80
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=255
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
816 | classes=80
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/cfg/yolov3s-9a320.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=255
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 9,11,  25,27,  33,63,  71,43,  62,120,  135,86,  123,199,  257,100,  264,223
643 | classes=80
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=255
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 9,11,  25,27,  33,63,  71,43,  62,120,  135,86,  123,199,  257,100,  264,223
729 | classes=80
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=255
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 9,11,  25,27,  33,63,  71,43,  62,120,  135,86,  123,199,  257,100,  264,223
816 | classes=80
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/cfg/yolov3s-18a320.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=510
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 12,13,14,15,16,17
642 | anchors = 7,8,  11,20,  27,15,  20,36,  50,29,  28,60,  61,61,  99,39,  43,99,  98,91,  66,148,  180,68,  139,135,  104,210,  285,92,  205,173,  186,274,  302,212
643 | classes=80
644 | num=18
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=510
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 6,7,8,9,10,11
728 | anchors = 7,8,  11,20,  27,15,  20,36,  50,29,  28,60,  61,61,  99,39,  43,99,  98,91,  66,148,  180,68,  139,135,  104,210,  285,92,  205,173,  186,274,  302,212
729 | classes=80
730 | num=18
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=510
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2,3,4,5
815 | anchors = 7,8,  11,20,  27,15,  20,36,  50,29,  28,60,  61,61,  99,39,  43,99,  98,91,  66,148,  180,68,  139,135,  104,210,  285,92,  205,173,  186,274,  302,212
816 | classes=80
817 | num=18
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/cfg/yolov3s-30a320.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=850
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 20,21,22,23,24,25,26,27,28,29
642 | anchors = 6,7,  9,18,  17,10,  21,22,  14,33,  36,15,  22,51,  34,34,  59,24,  32,74,  51,49,  90,38,  41,105,  67,72,  144,48,  54,148,  106,79,  81,109,  211,63,  107,147,  81,200,  149,112,  297,73,  152,187,  214,135,  121,264,  220,206,  299,153,  211,291,  309,230
643 | classes=80
644 | num=30
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=850
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 10,11,12,13,14,15,16,17,18,19
728 | anchors = 6,7,  9,18,  17,10,  21,22,  14,33,  36,15,  22,51,  34,34,  59,24,  32,74,  51,49,  90,38,  41,105,  67,72,  144,48,  54,148,  106,79,  81,109,  211,63,  107,147,  81,200,  149,112,  297,73,  152,187,  214,135,  121,264,  220,206,  299,153,  211,291,  309,230
729 | classes=80
730 | num=30
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=850
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2,3,4,5,6,7,8,9
815 | anchors = 6,7,  9,18,  17,10,  21,22,  14,33,  36,15,  22,51,  34,34,  59,24,  32,74,  51,49,  90,38,  41,105,  67,72,  144,48,  54,148,  106,79,  81,109,  211,63,  107,147,  81,200,  149,112,  297,73,  152,187,  214,135,  121,264,  220,206,  299,153,  211,291,  309,230
816 | classes=80
817 | num=30
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/cfg/yolov3-spp-pan-scale.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=32
  8 | width=544
  9 | height=544
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | 
 19 | learning_rate=0.001
 20 | burn_in=1000
 21 | max_batches = 10000
 22 | 
 23 | policy=steps
 24 | steps=8000,9000
 25 | scales=.1,.1
 26 | 
 27 | #policy=sgdr
 28 | #sgdr_cycle=1000
 29 | #sgdr_mult=2
 30 | #steps=4000,6000,8000,9000
 31 | #scales=1, 1, 0.1, 0.1
 32 | 
 33 | [convolutional]
 34 | batch_normalize=1
 35 | filters=32
 36 | size=3
 37 | stride=1
 38 | pad=1
 39 | activation=leaky
 40 | 
 41 | # Downsample
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=64
 46 | size=3
 47 | stride=2
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=32
 54 | size=1
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [convolutional]
 60 | batch_normalize=1
 61 | filters=64
 62 | size=3
 63 | stride=1
 64 | pad=1
 65 | activation=leaky
 66 | 
 67 | [shortcut]
 68 | from=-3
 69 | activation=linear
 70 | 
 71 | # Downsample
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=128
 76 | size=3
 77 | stride=2
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=64
 84 | size=1
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [convolutional]
 90 | batch_normalize=1
 91 | filters=128
 92 | size=3
 93 | stride=1
 94 | pad=1
 95 | activation=leaky
 96 | 
 97 | [shortcut]
 98 | from=-3
 99 | activation=linear
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=64
104 | size=1
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [convolutional]
110 | batch_normalize=1
111 | filters=128
112 | size=3
113 | stride=1
114 | pad=1
115 | activation=leaky
116 | 
117 | [shortcut]
118 | from=-3
119 | activation=linear
120 | 
121 | # Downsample
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=256
126 | size=3
127 | stride=2
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=128
134 | size=1
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [convolutional]
140 | batch_normalize=1
141 | filters=256
142 | size=3
143 | stride=1
144 | pad=1
145 | activation=leaky
146 | 
147 | [shortcut]
148 | from=-3
149 | activation=linear
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=128
154 | size=1
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [shortcut]
168 | from=-3
169 | activation=linear
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=128
174 | size=1
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [convolutional]
180 | batch_normalize=1
181 | filters=256
182 | size=3
183 | stride=1
184 | pad=1
185 | activation=leaky
186 | 
187 | [shortcut]
188 | from=-3
189 | activation=linear
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=128
194 | size=1
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [convolutional]
200 | batch_normalize=1
201 | filters=256
202 | size=3
203 | stride=1
204 | pad=1
205 | activation=leaky
206 | 
207 | [shortcut]
208 | from=-3
209 | activation=linear
210 | 
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=128
215 | size=1
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [convolutional]
221 | batch_normalize=1
222 | filters=256
223 | size=3
224 | stride=1
225 | pad=1
226 | activation=leaky
227 | 
228 | [shortcut]
229 | from=-3
230 | activation=linear
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=128
235 | size=1
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [convolutional]
241 | batch_normalize=1
242 | filters=256
243 | size=3
244 | stride=1
245 | pad=1
246 | activation=leaky
247 | 
248 | [shortcut]
249 | from=-3
250 | activation=linear
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=128
255 | size=1
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [convolutional]
261 | batch_normalize=1
262 | filters=256
263 | size=3
264 | stride=1
265 | pad=1
266 | activation=leaky
267 | 
268 | [shortcut]
269 | from=-3
270 | activation=linear
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=128
275 | size=1
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [convolutional]
281 | batch_normalize=1
282 | filters=256
283 | size=3
284 | stride=1
285 | pad=1
286 | activation=leaky
287 | 
288 | [shortcut]
289 | from=-3
290 | activation=linear
291 | 
292 | # Downsample
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=512
297 | size=3
298 | stride=2
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=256
305 | size=1
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [convolutional]
311 | batch_normalize=1
312 | filters=512
313 | size=3
314 | stride=1
315 | pad=1
316 | activation=leaky
317 | 
318 | [shortcut]
319 | from=-3
320 | activation=linear
321 | 
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=256
326 | size=1
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [convolutional]
332 | batch_normalize=1
333 | filters=512
334 | size=3
335 | stride=1
336 | pad=1
337 | activation=leaky
338 | 
339 | [shortcut]
340 | from=-3
341 | activation=linear
342 | 
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=256
347 | size=1
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [convolutional]
353 | batch_normalize=1
354 | filters=512
355 | size=3
356 | stride=1
357 | pad=1
358 | activation=leaky
359 | 
360 | [shortcut]
361 | from=-3
362 | activation=linear
363 | 
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=256
368 | size=1
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [convolutional]
374 | batch_normalize=1
375 | filters=512
376 | size=3
377 | stride=1
378 | pad=1
379 | activation=leaky
380 | 
381 | [shortcut]
382 | from=-3
383 | activation=linear
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=256
388 | size=1
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [convolutional]
394 | batch_normalize=1
395 | filters=512
396 | size=3
397 | stride=1
398 | pad=1
399 | activation=leaky
400 | 
401 | [shortcut]
402 | from=-3
403 | activation=linear
404 | 
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=256
409 | size=1
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [convolutional]
415 | batch_normalize=1
416 | filters=512
417 | size=3
418 | stride=1
419 | pad=1
420 | activation=leaky
421 | 
422 | [shortcut]
423 | from=-3
424 | activation=linear
425 | 
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=256
430 | size=1
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [convolutional]
436 | batch_normalize=1
437 | filters=512
438 | size=3
439 | stride=1
440 | pad=1
441 | activation=leaky
442 | 
443 | [shortcut]
444 | from=-3
445 | activation=linear
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=256
450 | size=1
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [convolutional]
456 | batch_normalize=1
457 | filters=512
458 | size=3
459 | stride=1
460 | pad=1
461 | activation=leaky
462 | 
463 | [shortcut]
464 | from=-3
465 | activation=linear
466 | 
467 | # Downsample
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=1024
472 | size=3
473 | stride=2
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=512
480 | size=1
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [convolutional]
486 | batch_normalize=1
487 | filters=1024
488 | size=3
489 | stride=1
490 | pad=1
491 | activation=leaky
492 | 
493 | [shortcut]
494 | from=-3
495 | activation=linear
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=512
500 | size=1
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [convolutional]
506 | batch_normalize=1
507 | filters=1024
508 | size=3
509 | stride=1
510 | pad=1
511 | activation=leaky
512 | 
513 | [shortcut]
514 | from=-3
515 | activation=linear
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=512
520 | size=1
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [convolutional]
526 | batch_normalize=1
527 | filters=1024
528 | size=3
529 | stride=1
530 | pad=1
531 | activation=leaky
532 | 
533 | [shortcut]
534 | from=-3
535 | activation=linear
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=512
540 | size=1
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [convolutional]
546 | batch_normalize=1
547 | filters=1024
548 | size=3
549 | stride=1
550 | pad=1
551 | activation=leaky
552 | 
553 | [shortcut]
554 | from=-3
555 | activation=linear
556 | 
557 | ######################
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | filters=512
562 | size=1
563 | stride=1
564 | pad=1
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | size=3
570 | stride=1
571 | pad=1
572 | filters=1024
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | filters=512
578 | size=1
579 | stride=1
580 | pad=1
581 | activation=leaky
582 | 
583 | ### SPP ###
584 | [maxpool]
585 | stride=1
586 | size=5
587 | 
588 | [route]
589 | layers=-2
590 | 
591 | [maxpool]
592 | stride=1
593 | size=9
594 | 
595 | [route]
596 | layers=-4
597 | 
598 | [maxpool]
599 | stride=1
600 | size=13
601 | 
602 | [route]
603 | layers=-1,-3,-5,-6
604 | 
605 | ### End SPP ###
606 | 
607 | [convolutional]
608 | batch_normalize=1
609 | filters=512
610 | size=1
611 | stride=1
612 | pad=1
613 | activation=leaky
614 | 
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | size=3
619 | stride=1
620 | pad=1
621 | filters=1024
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | filters=512
627 | size=1
628 | stride=1
629 | pad=1
630 | activation=leaky
631 | 
632 | 
633 | 
634 | ########### to [yolo-3]
635 | 
636 | 
637 | 
638 | [route]
639 | layers = -4
640 | 
641 | [convolutional]
642 | batch_normalize=1
643 | filters=256
644 | size=1
645 | stride=1
646 | pad=1
647 | activation=leaky
648 | 
649 | [upsample]
650 | stride=2
651 | 
652 | [route]
653 | layers = -1, 61
654 | 
655 | 
656 | 
657 | [convolutional]
658 | batch_normalize=1
659 | filters=256
660 | size=1
661 | stride=1
662 | pad=1
663 | activation=leaky
664 | 
665 | [convolutional]
666 | batch_normalize=1
667 | size=3
668 | stride=1
669 | pad=1
670 | filters=512
671 | activation=leaky
672 | 
673 | [convolutional]
674 | batch_normalize=1
675 | filters=256
676 | size=1
677 | stride=1
678 | pad=1
679 | activation=leaky
680 | 
681 | [convolutional]
682 | batch_normalize=1
683 | size=3
684 | stride=1
685 | pad=1
686 | filters=512
687 | activation=leaky
688 | 
689 | [convolutional]
690 | batch_normalize=1
691 | filters=256
692 | size=1
693 | stride=1
694 | pad=1
695 | activation=leaky
696 | 
697 | 
698 | ########### to [yolo-2]
699 | 
700 | 
701 | 
702 | 
703 | [route]
704 | layers = -4
705 | 
706 | [convolutional]
707 | batch_normalize=1
708 | filters=128
709 | size=1
710 | stride=1
711 | pad=1
712 | activation=leaky
713 | 
714 | [upsample]
715 | stride=2
716 | 
717 | [route]
718 | layers = -1, 36
719 | 
720 | 
721 | 
722 | [convolutional]
723 | batch_normalize=1
724 | filters=128
725 | size=1
726 | stride=1
727 | pad=1
728 | activation=leaky
729 | 
730 | [convolutional]
731 | batch_normalize=1
732 | size=3
733 | stride=1
734 | pad=1
735 | filters=256
736 | activation=leaky
737 | 
738 | [convolutional]
739 | batch_normalize=1
740 | filters=128
741 | size=1
742 | stride=1
743 | pad=1
744 | activation=leaky
745 | 
746 | [convolutional]
747 | batch_normalize=1
748 | size=3
749 | stride=1
750 | pad=1
751 | filters=256
752 | activation=leaky
753 | 
754 | [convolutional]
755 | batch_normalize=1
756 | filters=128
757 | size=1
758 | stride=1
759 | pad=1
760 | activation=leaky
761 | 
762 | 
763 | 
764 | ########### to [yolo-1]
765 | 
766 | 
767 | ########### features of different layers
768 | 
769 | 
770 | [route]
771 | layers=1
772 | 
773 | [reorg3d]
774 | stride=2
775 | 
776 | [route]
777 | layers=5,-1
778 | 
779 | [reorg3d]
780 | stride=2
781 | 
782 | [route]
783 | layers=12,-1
784 | 
785 | [reorg3d]
786 | stride=2
787 | 
788 | [route]
789 | layers=37,-1
790 | 
791 | [reorg3d]
792 | stride=2
793 | 
794 | [route]
795 | layers=62,-1
796 | 
797 | 
798 | 
799 | ########### [yolo-1]
800 | 
801 | [convolutional]
802 | batch_normalize=1
803 | filters=128
804 | size=1
805 | stride=1
806 | pad=1
807 | activation=leaky
808 | 
809 | [upsample]
810 | stride=4
811 | 
812 | [route]
813 | layers = -1,-12
814 | 
815 | 
816 | [convolutional]
817 | batch_normalize=1
818 | size=3
819 | stride=1
820 | pad=1
821 | filters=256
822 | activation=leaky
823 | 
824 | [convolutional]
825 | size=1
826 | stride=1
827 | pad=1
828 | filters=340
829 | activation=linear
830 | 
831 | 
832 | [yolo]
833 | mask = 0,1,2,3
834 | anchors = 8,8, 10,13, 16,30, 33,23,  32,32, 30,61, 62,45, 64,64,  59,119, 116,90, 156,198, 373,326
835 | classes=80
836 | num=12
837 | jitter=.3
838 | ignore_thresh = .7
839 | truth_thresh = 1
840 | scale_x_y = 1.05
841 | random=0
842 | 
843 | 
844 | 
845 | 
846 | ########### [yolo-2]
847 | 
848 | 
849 | [route]
850 | layers = -7
851 | 
852 | [convolutional]
853 | batch_normalize=1
854 | filters=256
855 | size=1
856 | stride=1
857 | pad=1
858 | activation=leaky
859 | 
860 | [upsample]
861 | stride=2
862 | 
863 | [route]
864 | layers = -1,-28
865 | 
866 | 
867 | [convolutional]
868 | batch_normalize=1
869 | size=3
870 | stride=1
871 | pad=1
872 | filters=512
873 | activation=leaky
874 | 
875 | [convolutional]
876 | size=1
877 | stride=1
878 | pad=1
879 | filters=340
880 | activation=linear
881 | 
882 | 
883 | [yolo]
884 | mask = 4,5,6,7
885 | anchors = 8,8, 10,13, 16,30, 33,23,  32,32, 30,61, 62,45, 64,64,  59,119, 116,90, 156,198, 373,326
886 | classes=80
887 | num=12
888 | jitter=.3
889 | ignore_thresh = .7
890 | truth_thresh = 1
891 | scale_x_y = 1.1
892 | random=0
893 | 
894 | 
895 | 
896 | ########### [yolo-3]
897 | 
898 | [route]
899 | layers = -14
900 | 
901 | [convolutional]
902 | batch_normalize=1
903 | filters=512
904 | size=1
905 | stride=1
906 | pad=1
907 | activation=leaky
908 | 
909 | [route]
910 | layers = -1,-43
911 | 
912 | [convolutional]
913 | batch_normalize=1
914 | size=3
915 | stride=1
916 | pad=1
917 | filters=1024
918 | activation=leaky
919 | 
920 | 
921 | [convolutional]
922 | size=1
923 | stride=1
924 | pad=1
925 | filters=340
926 | activation=linear
927 | 
928 | 
929 | [yolo]
930 | mask = 8,9,10,11
931 | anchors = 8,8, 10,13, 16,30, 33,23,  32,32, 30,61, 62,45, 59,119,   80,80, 116,90, 156,198, 373,326
932 | classes=80
933 | num=12
934 | jitter=.3
935 | ignore_thresh = .7
936 | truth_thresh = 1
937 | scale_x_y = 1.2
938 | random=0
939 | 


--------------------------------------------------------------------------------
/prune.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | """
  4 | Pengyi Zhang
  5 | 201906
  6 | """
  7 | import cv2
  8 | 
  9 | import argparse
 10 | import json
 11 | import os
 12 | 
 13 | import numpy
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | 
 18 | from torch.utils.data import DataLoader
 19 | 
 20 | from models import *
 21 | from utils.datasets import *
 22 | from utils.utils import *
 23 | from utils.parse_config import *
 24 | 
 25 | """ Slim Principle
 26 | (1) Use global threshold to control pruning ratio
 27 | (2) Use local threshold to keep at least 10% unpruned 
 28 | """
 29 | 
 30 | def route_conv(layer_index, module_defs):
 31 |     """ find the convolutional layers connected by route layer
 32 |     """
 33 |     module_def = module_defs[layer_index]
 34 |     mtype = module_def['type']
 35 |     
 36 |     before_conv_id = []
 37 |     if mtype in ['convolutional', 'shortcut', 'upsample', 'maxpool', 'reorg3d']:
 38 |         if module_defs[layer_index-1]['type'] == 'convolutional':
 39 |             return [layer_index-1]
 40 |         before_conv_id += route_conv(layer_index-1, module_defs)
 41 | 
 42 |     elif mtype == "route":
 43 |         layer_is = [int(x)+layer_index if int(x) < 0 else int(x) for x in module_defs[layer_index]['layers'].split(',')]
 44 |         for layer_i in layer_is: 
 45 |             if module_defs[layer_i]['type'] == 'convolutional':
 46 |                 before_conv_id += [layer_i]
 47 |             else:
 48 |                 before_conv_id += route_conv(layer_i, module_defs)
 49 |         
 50 |     return before_conv_id
 51 | 
 52 | 
 53 | def write_model_cfg(old_path, new_path, new_module_defs):
 54 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
 55 |     lines = []
 56 |     with open(old_path, 'r') as fp:
 57 |         old_lines = fp.readlines()
 58 |     for _line in old_lines:
 59 |         if "[convolutional]" in _line:
 60 |             break
 61 |         lines.append(_line)
 62 | 
 63 |     for i, module_def in enumerate(new_module_defs):
 64 |         
 65 |         mtype = module_def['type']
 66 |         lines.append("[{}]\n".format(mtype))
 67 |         print("layer:", i, mtype)
 68 |         if mtype == "convolutional":
 69 |             bn = 0
 70 |             filters = module_def['filters']
 71 |             bn = int(module_def['batch_normalize'])
 72 |             if bn:
 73 |                 lines.append("batch_normalize={}\n".format(bn))
 74 |                 filters = torch.sum(module_def['mask']).cpu().numpy().astype('int')        
 75 |             lines.append("filters={}\n".format(filters))
 76 |             lines.append("size={}\n".format(module_def['size']))
 77 |             lines.append("stride={}\n".format(module_def['stride']))
 78 |             lines.append("pad={}\n".format(module_def['pad']))
 79 |             lines.append("activation={}\n\n".format(module_def['activation']))
 80 |         elif mtype == "shortcut":
 81 |             lines.append("from={}\n".format(module_def['from']))
 82 |             lines.append("activation={}\n\n".format(module_def['activation']))   
 83 |         elif mtype == 'route':
 84 |             lines.append("layers={}\n\n".format(module_def['layers']))               
 85 |         elif mtype == 'reorg3d':
 86 |             lines.append("stride={}\n\n".format(module_def['stride']))
 87 |             
 88 |         elif mtype == 'upsample':
 89 |             lines.append("stride={}\n\n".format(module_def['stride']))
 90 |         elif mtype == 'maxpool':
 91 |             lines.append("stride={}\n".format(module_def['stride']))
 92 |             lines.append("size={}\n\n".format(module_def['size']))
 93 |         elif mtype == 'yolo':
 94 |             lines.append("mask = {}\n".format(module_def['mask']))
 95 |             an_rows, an_cols = module_def['anchors'].shape
 96 |             tmp_str = ''
 97 |             for an_i in range(an_rows):
 98 |                 tmp_str += '%d,%d, ' % (module_def['anchors'][an_i, 0], module_def['anchors'][an_i, 1])
 99 |             print('===============', tmp_str[0:-2])
100 |             lines.append("anchors = {}\n".format(tmp_str[0:-2]))
101 |             lines.append("classes = {}\n".format(module_def['classes']))
102 |             lines.append("num = {}\n".format(module_def['num']))
103 |             lines.append("jitter = {}\n".format(module_def['jitter']))
104 |             lines.append("ignore_thresh = {}\n".format(module_def['ignore_thresh']))
105 |             lines.append("truth_thresh = {}\n".format(module_def['truth_thresh']))
106 |             lines.append("random = {}\n\n".format(module_def['random']))
107 |     
108 |     with open(new_path, "w") as f:
109 |         f.writelines(lines)
110 | 
111 | 
112 |         
113 | def test(
114 |         cfg,
115 |         weights=None,
116 |         img_size=406,
117 |         save=None,
118 |         overall_ratio=0.5,
119 |         perlayer_ratio=0.1
120 | ):
121 | 
122 |     """prune yolov3 and generate cfg, weights
123 |     """
124 |     if save != None:
125 |         if not os.path.exists(save):
126 |             os.makedirs(save)
127 |     device = torch_utils.select_device()
128 |     # Initialize model
129 |     model = Darknet(cfg, img_size).to(device)
130 | 
131 |     # Load weights
132 |     if weights.endswith('.pt'):  # pytorch format
133 |         _state_dict = torch.load(weights, map_location=device)['model']
134 |         model.load_state_dict(_state_dict)
135 |     else:  # darknet format
136 |         _ = load_darknet_weights(model, weights)
137 | 
138 | ##  output a new cfg file
139 |     total = 0
140 |     for m in model.modules():
141 |         if isinstance(m, nn.BatchNorm2d):
142 |             total += m.weight.data.shape[0] # channels numbers
143 |     
144 |     bn = torch.zeros(total)
145 |     index = 0
146 | 
147 |     for m in model.modules():
148 |         if isinstance(m, nn.BatchNorm2d):
149 |             size = m.weight.data.shape[0]
150 |             bn[index:(index+size)] = m.weight.data.abs().clone()
151 |             index += size
152 | 
153 |     sorted_bn, sorted_index = torch.sort(bn)
154 |     thresh_index = int(total*overall_ratio)
155 |     thresh = sorted_bn[thresh_index].cuda()
156 | 
157 |     print("--"*30)
158 |     print()
159 |     #print(list(model.modules()))
160 |     # 
161 |     proned_module_defs = model.module_defs
162 |     for i, (module_def, module) in enumerate(zip(model.module_defs, model.module_list)):
163 |         print("layer:", i)
164 |         mtype = module_def['type']
165 |         if mtype  == 'convolutional':
166 |             bn = int(module_def['batch_normalize'])
167 |             if bn:
168 |                 m = getattr(module, 'BatchNorm2d') # batch_norm layer
169 |                 weight_copy = m.weight.data.abs().clone()
170 |                 channels = weight_copy.shape[0] #
171 |                 min_channel_num = int(channels * perlayer_ratio) if int(channels * perlayer_ratio) > 0 else 1
172 |                 mask = weight_copy.gt(thresh).float().cuda()  
173 |                 
174 |                 if int(torch.sum(mask)) < min_channel_num: 
175 |                     _, sorted_index_weights = torch.sort(weight_copy,descending=True)
176 |                     mask[sorted_index_weights[:min_channel_num]]=1. 
177 | 
178 |                 proned_module_defs[i]['mask'] = mask.clone()
179 | 
180 |                 print('layer index: {:d} \t total channel: {:d} \t remaining channel: {:d}'.
181 |                         format(i, mask.shape[0], int(torch.sum(mask)))) 
182 | 
183 |             print("layer:", mtype)
184 | 
185 |         elif mtype in ['upsample', 'maxpool', 'reorg3d']:
186 |             print("layer:", mtype)
187 | 
188 |         elif mtype == 'route':
189 |             print("layer:", mtype)
190 |             # 
191 | 
192 |         elif mtype == 'shortcut':
193 |             layer_i = int(module_def['from'])+i
194 |             print("from layer ", layer_i)
195 |             print("layer:", mtype)
196 |             proned_module_defs[i]['is_access'] = False
197 |             
198 | 
199 |         elif mtype == 'yolo':
200 |             print("layer:", mtype)
201 |             
202 | 
203 |     layer_number = len(proned_module_defs)
204 |     for i in range(layer_number-1, -1, -1):
205 |         mtype = proned_module_defs[i]['type']
206 |         if mtype == 'shortcut': 
207 |             if proned_module_defs[i]['is_access']: 
208 |                 continue
209 | 
210 |             Merge_masks =  []
211 |             layer_i = i
212 |             while mtype == 'shortcut':
213 |                 proned_module_defs[layer_i]['is_access'] = True
214 | 
215 |                 if proned_module_defs[layer_i-1]['type'] == 'convolutional': 
216 |                     bn = int(proned_module_defs[layer_i-1]['batch_normalize'])
217 |                     if bn: 
218 |                         Merge_masks.append(proned_module_defs[layer_i-1]["mask"].unsqueeze(0))
219 | 
220 |                 layer_i = int(proned_module_defs[layer_i]['from'])+layer_i 
221 |                 mtype = proned_module_defs[layer_i]['type']
222 | 
223 |                 if mtype == 'convolutional':              
224 |                     bn = int(proned_module_defs[layer_i]['batch_normalize'])
225 |                     if bn: 
226 |                         Merge_masks.append(proned_module_defs[layer_i]["mask"].unsqueeze(0))
227 |                 
228 | 
229 |             if len(Merge_masks) > 1:
230 |                 Merge_masks = torch.cat(Merge_masks, 0)
231 |                 merge_mask = (torch.sum(Merge_masks, dim=0) > 0).float().cuda()
232 |             else:
233 |                 merge_mask = Merge_masks[0].float().cuda()
234 | 
235 |             layer_i = i
236 |             mtype = 'shortcut'
237 |             while mtype == 'shortcut':
238 | 
239 |                 if proned_module_defs[layer_i-1]['type'] == 'convolutional': 
240 |                     bn = int(proned_module_defs[layer_i-1]['batch_normalize'])
241 |                     if bn:
242 |                         proned_module_defs[layer_i-1]["mask"] = merge_mask
243 | 
244 |                 layer_i = int(proned_module_defs[layer_i]['from'])+layer_i 
245 |                 mtype = proned_module_defs[layer_i]['type']
246 | 
247 |                 if mtype == 'convolutional': 
248 |                     bn = int(proned_module_defs[layer_i]['batch_normalize'])
249 |                     if bn:     
250 |                         proned_module_defs[layer_i]["mask"] = merge_mask
251 | 
252 | 
253 | 
254 |     for i, (module_def, module) in enumerate(zip(model.module_defs, model.module_list)):
255 |         print("layer:", i)
256 |         mtype = module_def['type']
257 |         if mtype  == 'convolutional':
258 |             bn = int(module_def['batch_normalize'])
259 |             if bn:
260 | 
261 |                 layer_i_1 = i - 1
262 |                 proned_module_defs[i]['mask_before'] = None
263 | 
264 |                 mask_before = []
265 |                 conv_indexs = []
266 |                 if i > 0:
267 |                     conv_indexs = route_conv(i, proned_module_defs)
268 |                     for conv_index in conv_indexs:
269 |                         mask_before += proned_module_defs[conv_index]["mask"].clone().cpu().numpy().tolist()
270 |                     proned_module_defs[i]['mask_before'] = torch.tensor(mask_before).float().cuda()  
271 |                    
272 |                             
273 | 
274 |  
275 |     output_cfg_path = os.path.join(save, "prune.cfg")
276 |     write_model_cfg(cfg, output_cfg_path, proned_module_defs)
277 | 
278 |     pruned_model = Darknet(output_cfg_path, img_size).to(device)
279 |     print(list(pruned_model.modules()))
280 |     for i, (module_def, old_module, new_module) in enumerate(zip(proned_module_defs, model.module_list, pruned_model.module_list)):  
281 |         mtype = module_def['type']
282 |         print("layer: ",i, mtype)
283 |         if mtype  == 'convolutional': # 
284 |             bn = int(module_def['batch_normalize'])
285 |             if bn:
286 |                 new_norm = getattr(new_module, 'BatchNorm2d') # batch_norm layer
287 |                 old_norm = getattr(old_module, 'BatchNorm2d') # batch_norm layer
288 | 
289 |                 new_conv = getattr(new_module, 'Conv2d') # conv layer
290 |                 old_conv = getattr(old_module, 'Conv2d') # conv layer  
291 |                 
292 | 
293 |                 idx1 = np.squeeze(np.argwhere(np.asarray(module_def['mask'].cpu().numpy())))
294 |                 if i > 0:
295 |                     idx2 = np.squeeze(np.argwhere(np.asarray(module_def['mask_before'].cpu().numpy())))
296 |                     new_conv.weight.data = old_conv.weight.data[idx1.tolist()][:, idx2.tolist(), :, :].clone()
297 |                     
298 |                     print("idx1: ", len(idx1), ", idx2: ", len(idx2))
299 |                 else:
300 |                     new_conv.weight.data = old_conv.weight.data[idx1.tolist()].clone()
301 | 
302 |                 new_norm.weight.data = old_norm.weight.data[idx1.tolist()].clone()
303 |                 new_norm.bias.data = old_norm.bias.data[idx1.tolist()].clone()
304 |                 new_norm.running_mean = old_norm.running_mean[idx1.tolist()].clone()
305 |                 new_norm.running_var = old_norm.running_var[idx1.tolist()].clone()
306 |                 
307 | 
308 |                 print('layer index: ', i, 'idx1: ', idx1)     
309 |             else: 
310 | 
311 |                 new_conv = getattr(new_module, 'Conv2d') # batch_norm layer
312 |                 old_conv = getattr(old_module, 'Conv2d') # batch_norm layer
313 |                 idx2 = np.squeeze(np.argwhere(np.asarray(proned_module_defs[i-1]['mask'].cpu().numpy())))
314 |                 new_conv.weight.data = old_conv.weight.data[:,idx2.tolist(),:,:].clone()
315 |                 new_conv.bias.data = old_conv.bias.data.clone()
316 |                 print('layer index: ', i, "entire copy") 
317 | 
318 |     print('--'*30)
319 |     print('prune done!')    
320 |     print('pruned ratio %.3f'%overall_ratio)
321 |     prune_weights_path = os.path.join(save, "prune.pt")    
322 |     _pruned_state_dict = pruned_model.state_dict()
323 |     torch.save(_pruned_state_dict, prune_weights_path)
324 | 
325 |     print("Done!") 
326 | 
327 | 
328 | 
329 |     # test
330 |     pruned_model.eval()
331 |     img_path = "test.jpg"
332 |     
333 |     org_img = cv2.imread(img_path)  # BGR
334 |     # img, ratiow, ratioh, padw, padh = letterbox(org_img, new_shape=[img_size,img_size], mode='rect')
335 |     img = org_img
336 | 
337 |     # Normalize
338 |     img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
339 |     img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
340 |     img /= 255.0  # 0 - 255 to 0.0 - 1.0
341 | 
342 |     imgs = torch.from_numpy(img).unsqueeze(0).to(device)
343 |     _, _, height, width = imgs.shape  # batch size, channels, height, width
344 | 
345 |     # Run model
346 |     inf_out, train_out = pruned_model(imgs)  # inference and training outputs
347 |     # Run NMS
348 |     output = non_max_suppression(inf_out, conf_thres=0.005, nms_thres=0.5)
349 |     # Statistics per image
350 |     for si, pred in enumerate(output):
351 |         if pred is None:
352 |             continue
353 |         if True:
354 |             box = pred[:, :4].clone()  # xyxy
355 |             scale_coords(imgs[si].shape[1:], box, org_img.shape[:2])  # to original shape
356 |             for di, d in enumerate(pred):
357 |                 category_id = int(d[6])
358 |                 left, top, right, bot = [float(x) for x in box[di]]
359 |                 confidence = float(d[4])
360 | 
361 |                 cv2.rectangle(org_img, (int(left), int(top)), (int(right), int(bot)),
362 |                                 (255, 0, 0), 2)
363 |                 cv2.putText(org_img, str(category_id) + ":" + str('%.1f' % (float(confidence) * 100)) + "%", (int(left), int(top) - 8),
364 |                             cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1)  
365 |         cv2.imshow("result", org_img)
366 |         cv2.waitKey(-1)            
367 |         cv2.imwrite('result_{}'.format(img_path), org_img)
368 | 
369 | 
370 |     # convert pt to weights:
371 |     prune_c_weights_path = os.path.join(save, "prune.weights")
372 |     save_weights(pruned_model, prune_c_weights_path)
373 |     
374 | 
375 | if __name__ == '__main__':
376 |     os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
377 |     os.environ["CUDA_VISIBLE_DEVICES"] = "0"
378 |     parser = argparse.ArgumentParser(description='PyTorch Slimming Yolov3 prune')
379 |     parser.add_argument('--cfg', type=str, default='VisDrone2019/yolov3-spp3.cfg', help='cfg file path')
380 |     parser.add_argument('--weights', type=str, default='yolov3-spp3_final.weights', help='path to weights file')
381 |     parser.add_argument('--img_size', type=int, default=608, help='inference size (pixels)')
382 |     parser.add_argument('--save', default='prune', type=str, metavar='PATH', help='path to save pruned model (default: none)')
383 |     parser.add_argument('--overall_ratio', type=float, default=0.5, help='scale sparse rate (default: 0.5)')    
384 |     parser.add_argument('--perlayer_ratio', type=float, default=0.1, help='minimal scale sparse rate (default: 0.1) to prevent disconnect')    
385 |     
386 |     opt = parser.parse_args()
387 |     opt.save += "_{}_{}".format(opt.overall_ratio, opt.perlayer_ratio)
388 | 
389 |     print(opt)
390 | 
391 |     with torch.no_grad():
392 |         test(
393 |             opt.cfg,
394 |             opt.weights,
395 |             opt.img_size,
396 |             opt.save,
397 |             opt.overall_ratio,
398 |             opt.perlayer_ratio,
399 |         )
400 | 


--------------------------------------------------------------------------------