├── .gitignore
├── .gitmodules
├── Dockerfile
├── LICENSE
├── Makefile
├── Poster.pdf
├── README.md
├── build_docker_container.sh
├── convert_coco_yolo.py
├── preprocess_flir_dataset.sh
├── run_all_iters.sh
├── run_docker_container.sh
├── start_map_calc.sh
├── start_training.sh
├── thermal.data
├── thermal.names
├── yolov3-spp-custom.cfg
├── yolov3-spp.cfg
├── yolov3-thermal.cfg
└── yolov3_5l.cfg


/.gitignore:
--------------------------------------------------------------------------------
  1 | *.txt
  2 | *.ipynb
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | .pytest_cache/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | db.sqlite3
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # Environments
 87 | .env
 88 | .venv
 89 | env/
 90 | venv/
 91 | ENV/
 92 | env.bak/
 93 | venv.bak/
 94 | 
 95 | # Spyder project settings
 96 | .spyderproject
 97 | .spyproject
 98 | 
 99 | # Rope project settings
100 | .ropeproject
101 | 
102 | # mkdocs documentation
103 | /site
104 | 
105 | # mypy
106 | .mypy_cache/
107 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "darknet"]
2 | 	path = darknet
3 | 	url = https://github.com/AlexeyAB/darknet.git
4 | 	ignore = dirty
5 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.0-devel-ubuntu18.04
 2 | LABEL maintainer "NVIDIA CORPORATION <cudatools@nvidia.com>"
 3 | 
 4 | ENV CUDNN_VERSION 7.5.0.56
 5 | LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}"
 6 | 
 7 | RUN apt-get update && apt-get install -y --no-install-recommends \
 8 |         build-essential \
 9 |         cmake \
10 |         git \
11 |         wget \
12 |         sudo \
13 |         python \
14 |         libcudnn7=$CUDNN_VERSION-1+cuda10.0 \
15 |         libcudnn7-dev=$CUDNN_VERSION-1+cuda10.0 && \
16 |     apt-mark hold libcudnn7 && \
17 |     rm -rf /var/lib/apt/lists/*
18 | ENV HOME /home
19 | ENV REPOSITORY_PATH $HOME/object-detection
20 | ENV DARKNET_PATH $REPOSITORY_PATH/darknet
21 | RUN cd $HOME && git clone https://github.com/enesozi/object-detection $REPOSITORY_PATH &&  \
22 |     cd $REPOSITORY_PATH && git submodule update --init && cp Makefile $DARKNET_PATH    &&  \
23 |     cd $DARKNET_PATH  && make


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Enes Özipek
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | GPU=1
  2 | CUDNN=1
  3 | CUDNN_HALF=1
  4 | OPENCV=0
  5 | AVX=1
  6 | OPENMP=1
  7 | LIBSO=1
  8 | ZED_CAMERA=0
  9 | 
 10 | # set GPU=1 and CUDNN=1 to speedup on GPU
 11 | # set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing and higher
 12 | # set AVX=1 and OPENMP=1 to speedup on CPU (if error occurs then set AVX=0)
 13 | 
 14 | DEBUG=0
 15 | 
 16 | ARCH= -gencode arch=compute_30,code=sm_30 \
 17 |       -gencode arch=compute_35,code=sm_35 \
 18 |       -gencode arch=compute_50,code=[sm_50,compute_50] \
 19 |       -gencode arch=compute_52,code=[sm_52,compute_52] \
 20 | 	  -gencode arch=compute_61,code=[sm_61,compute_61] \
 21 | 	  -gencode arch=compute_75,code=[sm_75,compute_75]
 22 | 
 23 | OS := $(shell uname)
 24 | 
 25 | # Tesla V100
 26 | # ARCH= -gencode arch=compute_70,code=[sm_70,compute_70]
 27 | 
 28 | # GeForce RTX 2080 Ti, RTX 2080, RTX 2070, Quadro RTX 8000, Quadro RTX 6000, Quadro RTX 5000, Tesla T4, XNOR Tensor Cores
 29 | # ARCH= -gencode arch=compute_75,code=[sm_75,compute_75]
 30 | 
 31 | # Jetson XAVIER
 32 | # ARCH= -gencode arch=compute_72,code=[sm_72,compute_72]
 33 | 
 34 | # GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4
 35 | # ARCH= -gencode arch=compute_61,code=sm_61 -gencode arch=compute_61,code=compute_61
 36 | 
 37 | # GP100/Tesla P100 - DGX-1
 38 | # ARCH= -gencode arch=compute_60,code=sm_60
 39 | 
 40 | # For Jetson TX1, Tegra X1, DRIVE CX, DRIVE PX - uncomment:
 41 | # ARCH= -gencode arch=compute_53,code=[sm_53,compute_53]
 42 | 
 43 | # For Jetson Tx2 or Drive-PX2 uncomment:
 44 | # ARCH= -gencode arch=compute_62,code=[sm_62,compute_62]
 45 | 
 46 | 
 47 | VPATH=./src/
 48 | EXEC=darknet
 49 | OBJDIR=./obj/
 50 | 
 51 | ifeq ($(LIBSO), 1)
 52 | LIBNAMESO=libdarknet.so
 53 | APPNAMESO=uselib
 54 | endif
 55 | 
 56 | CC=gcc
 57 | CPP=g++
 58 | NVCC=nvcc
 59 | OPTS=-Ofast
 60 | LDFLAGS= -lm -pthread
 61 | COMMON= -Iinclude/ -I3rdparty/stb/include
 62 | CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC
 63 | 
 64 | ifeq ($(DEBUG), 1)
 65 | #OPTS= -O0 -g
 66 | #OPTS= -Og -g
 67 | COMMON+= -DDEBUG
 68 | CFLAGS+= -DDEBUG
 69 | else
 70 | ifeq ($(AVX), 1)
 71 | CFLAGS+= -ffp-contract=fast -mavx -mavx2 -msse3 -msse4.1 -msse4.2 -msse4a
 72 | endif
 73 | endif
 74 | 
 75 | CFLAGS+=$(OPTS)
 76 | 
 77 | ifeq ($(OPENCV), 1)
 78 | COMMON+= -DOPENCV
 79 | CFLAGS+= -DOPENCV
 80 | LDFLAGS+= `pkg-config --libs opencv`
 81 | COMMON+= `pkg-config --cflags opencv`
 82 | endif
 83 | 
 84 | ifeq ($(OPENMP), 1)
 85 | CFLAGS+= -fopenmp
 86 | LDFLAGS+= -lgomp
 87 | endif
 88 | 
 89 | ifeq ($(GPU), 1)
 90 | COMMON+= -DGPU -I/usr/local/cuda/include/
 91 | CFLAGS+= -DGPU
 92 | ifeq ($(OS),Darwin) #MAC
 93 | LDFLAGS+= -L/usr/local/cuda/lib -lcuda -lcudart -lcublas -lcurand
 94 | else
 95 | LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand
 96 | endif
 97 | endif
 98 | 
 99 | ifeq ($(CUDNN), 1)
100 | COMMON+= -DCUDNN
101 | ifeq ($(OS),Darwin) #MAC
102 | CFLAGS+= -DCUDNN -I/usr/local/cuda/include
103 | LDFLAGS+= -L/usr/local/cuda/lib -lcudnn
104 | else
105 | CFLAGS+= -DCUDNN -I/usr/local/cudnn/include
106 | LDFLAGS+= -L/usr/local/cudnn/lib64 -lcudnn
107 | endif
108 | endif
109 | 
110 | ifeq ($(CUDNN_HALF), 1)
111 | COMMON+= -DCUDNN_HALF
112 | CFLAGS+= -DCUDNN_HALF
113 | ARCH+= -gencode arch=compute_70,code=[sm_70,compute_70]
114 | endif
115 | 
116 | ifeq ($(ZED_CAMERA), 1)
117 | CFLAGS+= -DZED_STEREO -I/usr/local/zed/include
118 | LDFLAGS+= -L/usr/local/zed/lib -lsl_core -lsl_input -lsl_zed
119 | #-lstdc++ -D_GLIBCXX_USE_CXX11_ABI=0 
120 | endif
121 | 
122 | OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o upsample_layer.o lstm_layer.o
123 | ifeq ($(GPU), 1) 
124 | LDFLAGS+= -lstdc++ 
125 | OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
126 | endif
127 | 
128 | OBJS = $(addprefix $(OBJDIR), $(OBJ))
129 | DEPS = $(wildcard src/*.h) Makefile include/darknet.h
130 | 
131 | all: obj backup results setchmod $(EXEC) $(LIBNAMESO) $(APPNAMESO)
132 | 
133 | ifeq ($(LIBSO), 1)
134 | CFLAGS+= -fPIC
135 | 
136 | $(LIBNAMESO): $(OBJS) include/yolo_v2_class.hpp src/yolo_v2_class.cpp
137 | 	$(CPP) -shared -std=c++11 -fvisibility=hidden -DLIB_EXPORTS $(COMMON) $(CFLAGS) $(OBJS) src/yolo_v2_class.cpp -o $@ $(LDFLAGS)
138 | 
139 | $(APPNAMESO): $(LIBNAMESO) include/yolo_v2_class.hpp src/yolo_console_dll.cpp
140 | 	$(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ -l:$(LIBNAMESO)
141 | endif
142 | 
143 | $(EXEC): $(OBJS)
144 | 	$(CPP) -std=c++11 $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS)
145 | 
146 | $(OBJDIR)%.o: %.c $(DEPS)
147 | 	$(CC) $(COMMON) $(CFLAGS) -c $< -o $@
148 | 
149 | $(OBJDIR)%.o: %.cpp $(DEPS)
150 | 	$(CPP) -std=c++11 $(COMMON) $(CFLAGS) -c $< -o $@
151 | 
152 | $(OBJDIR)%.o: %.cu $(DEPS)
153 | 	$(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
154 | 
155 | obj:
156 | 	mkdir -p obj
157 | backup:
158 | 	mkdir -p backup
159 | results:
160 | 	mkdir -p results
161 | setchmod:
162 | 	chmod +x *.sh
163 | 
164 | .PHONY: clean
165 | 
166 | clean:
167 | 	rm -rf $(OBJS) $(EXEC) $(LIBNAMESO) $(APPNAMESO)
168 | 


--------------------------------------------------------------------------------
/Poster.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/enesozi/object-detection/fc60f9a5a8ce261f6beace0dc387bb6feee859f2/Poster.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Object Detection
 2 | Object detection on thermal images
 3 | 
 4 | ### Steps to follow:
 5 | * **./build_docker_container.sh** (To build an nvidia-docker)
 6 | * **./run_docker_container.sh** (To run the built nvidia-docker by name "darknet_thermal" and with mounted dataset.
 7 | * Make sure that your gpu arch is included in [Makefile](https://github.com/enesozi/object-detection/blob/master/Makefile#L16)
 8 |   * If it's not, then add your gpu arch and run **make clean** and **make** commands in darknet directory.
 9 | * **./preprocess_flir_dataset.sh** (Make sure that image directories are consistent with yours.)
10 | * Exit the container by using "**Ctrl+P and Q**". This leaves the container still running.
11 | * Start training in detached mode by using the following command:
12 |   * **nvidia-docker exec -d darknet_thermal bash -c "cd /home/object-detection/ ; ./preprocess_flir_dataset.sh ; ./start_training.sh"**
13 |   * In **start_training.sh** script gpu id is 3 by default. You might need to adjust this according to yours.
14 | 
15 | #### PyCoco Results for IoU=0.50, area=all, maxDets=100
16 |    Average Precision  (AP) @[ IoU=0.50:0.50 | area=   all | maxDets=100 ] = **0.714**  
17 |    Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = -1.000  
18 |    Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = -1.000  
19 |    Average Precision  (AP) @[ IoU=0.50:0.50 | area= small | maxDets=100 ] = 0.576  
20 |    Average Precision  (AP) @[ IoU=0.50:0.50 | area=medium | maxDets=100 ] = 0.819  
21 |    Average Precision  (AP) @[ IoU=0.50:0.50 | area= large | maxDets=100 ] = 0.906  
22 |    Average Recall     (AR) @[ IoU=0.50:0.50 | area=   all | maxDets=  1 ] = 0.348  
23 |    Average Recall     (AR) @[ IoU=0.50:0.50 | area=   all | maxDets= 10 ] = 0.781  
24 |    Average Recall     (AR) @[ IoU=0.50:0.50 | area=   all | maxDets=100 ] = **0.787**  
25 |    Average Recall     (AR) @[ IoU=0.50:0.50 | area= small | maxDets=100 ] = 0.719  
26 |    Average Recall     (AR) @[ IoU=0.50:0.50 | area=medium | maxDets=100 ] = 0.834  
27 |    Average Recall     (AR) @[ IoU=0.50:0.50 | area= large | maxDets=100 ] = 0.918  
28 | 
29 | Baseline result: mAP IoU(0.5) of 0.587
30 | 
31 | You can download the dataset from [here](https://mega.nz/#!j9l32aAJ!wB4pk6H_12AaCRZT5flmNKcBcpCDdfleTaMi4WA8_-0)
32 | 
33 | You can find the [blog post](https://medium.com/swlh/object-detection-on-thermal-images-4f3410a89db4) published on Medium.
34 | 
35 | Pretrained weights: [thermal](https://mega.nz/#!vk9HDICC!qK13x8bjF1zY2aIJalR6BIZ1yfQye_r1NLcTxUJGNEs)
36 | 


--------------------------------------------------------------------------------
/build_docker_container.sh:
--------------------------------------------------------------------------------
1 | nvidia-docker build --no-cache -t thermal:darknet .


--------------------------------------------------------------------------------
/convert_coco_yolo.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | # best model until now
 3 | import argparse
 4 | import glob
 5 | import os
 6 | import sys
 7 | import json
 8 | 
 9 | if __name__ == '__main__':
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument(
12 |         "path", help='Directory of json files containing annotations')
13 |     parser.add_argument(
14 |         "output_path", help='Output directory for image.txt files')
15 |     parser.add_argument("--debug", action="store_true")
16 |     args = parser.parse_args()
17 |     json_files = sorted(glob.glob(os.path.join(args.path, '*.json')))
18 |     if args.debug:
19 |         total_count = 0
20 |         cats = {0: 0, 1: 0, 2: 0}
21 |         bike_images = set()
22 |     for json_file in json_files:
23 |         with open(json_file) as f:
24 |             data = json.load(f)
25 |             image = data['image']
26 |             annotations = data['annotation']
27 |             file_name = image['file_name']
28 |             width = float(image['width'])
29 |             height = float(image['height'])
30 |             converted_results = []
31 |             for ann in annotations:
32 |                 cat_id = int(ann['category_id'])
33 |                 if cat_id <= 3:
34 |                     left, top, bbox_width, bbox_height = map(
35 |                         float, ann['bbox'])
36 | 
37 |                     # Yolo classes are starting from zero index
38 |                     cat_id -= 1
39 |                     if args.debug:
40 |                         cats[cat_id] += 1
41 |                         total_count += 1
42 |                         if cat_id == 1:
43 |                             bike_images.add(file_name)
44 |                     x_center, y_center = (
45 |                         left + bbox_width / 2, top + bbox_height / 2)
46 |                     # darknet expects relative values wrt image width&height
47 |                     x_rel, y_rel = (x_center / width, y_center / height)
48 |                     w_rel, h_rel = (bbox_width / width, bbox_height / height)
49 |                     converted_results.append(
50 |                         (cat_id, x_rel, y_rel, w_rel, h_rel))
51 |             if not args.debug:
52 |                 with open(os.path.join(args.output_path, file_name + '.txt'), 'w+') as fp:
53 |                     fp.write('\n'.join('%d %.6f %.6f %.6f %.6f' %
54 |                                        res for res in converted_results))
55 |     if args.debug:
56 |         print({cat: cats[cat] for cat in cats})
57 |         print(total_count)
58 |         with open('bikes.txt', 'a+') as f:
59 |             f.write('\n'.join("data/thermal/%s.jpeg"%b_img for b_img in bike_images))
60 |             f.write('\n')
61 | 


--------------------------------------------------------------------------------
/preprocess_flir_dataset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | train_images="$HOME/Downloads/FLIR_ADAS/training/PreviewData"
 3 | validation_images="$HOME/Downloads/FLIR_ADAS/validation/PreviewData"
 4 | video_images="$HOME/Downloads/FLIR_ADAS/video/PreviewData"
 5 | train_anns="$HOME/Downloads/FLIR_ADAS/training/Annotations"
 6 | validation_anns="$HOME/Downloads/FLIR_ADAS/validation/Annotations"
 7 | video_anns="$HOME/Downloads/FLIR_ADAS/video/Annotations"
 8 | train_file="thermal_train.txt"
 9 | valid_file="thermal_validation.txt"
10 | cfg_file="yolov3-spp-custom.cfg"
11 | data_file="thermal.data"
12 | name_file="thermal.names"
13 | image_dir="$PWD/darknet/build/darknet/x64/data/thermal"
14 | 
15 | [ -f "$train_file" ] && rm "$train_file"
16 | [ -f "$valid_file" ] && rm "$valid_file"
17 | 
18 | 
19 | for value in {1..8862}
20 | do
21 | printf "data/thermal/FLIR_%05d.jpeg\n" $value >> "$train_file"
22 | done
23 | for value in {1..4224}
24 | do
25 | printf "data/thermal/FLIR_video_%05d.jpeg\n" $value >> "$train_file"
26 | done
27 | for value in {8863..10228}
28 | do
29 | printf "data/thermal/FLIR_%05d.jpeg\n" $value >> "$valid_file"
30 | done
31 | 
32 | # Copy images to the correct directory
33 | rm -rf "$image_dir"
34 | mkdir "$image_dir"
35 | cp "$train_images/"* "$image_dir" 2>/dev/null
36 | cp "$validation_images/"* "$image_dir" 2>/dev/null
37 | cp "$video_images/"* "$image_dir" 2>/dev/null
38 | 
39 | # Convert anns from standard COCO format to darknet format
40 | python convert_coco_yolo.py "$train_anns" "$image_dir"
41 | python convert_coco_yolo.py "$validation_anns" "$image_dir"
42 | python convert_coco_yolo.py "$video_anns" "$image_dir"
43 | 
44 | # Quick fix for imbalanced dataset
45 | [ -f "bikes.txt" ] && rm "bikes.txt"
46 | 
47 | python convert_coco_yolo.py "$train_anns" . --debug
48 | python convert_coco_yolo.py "$validation_anns" . --debug
49 | python convert_coco_yolo.py "$video_anns" . --debug
50 | 
51 | for iter in {1..5}
52 | do
53 | 	sed p "bikes.txt" >> "bikes_new.txt"
54 | done
55 | 
56 | mv "bikes_new.txt" "bikes.txt"
57 | cat "bikes.txt" >> "$train_file"
58 | rm "bikes.txt"
59 | 
60 | # Shuffle train dataset
61 | shuf "$train_file" > "train_file_shuffled.txt"
62 | mv "train_file_shuffled.txt" "$train_file"
63 | 
64 | # Copy necessary files to the correct directories
65 | cp "$cfg_file"   "$PWD/darknet/build/darknet/x64/"
66 | cp "$train_file" "$PWD/darknet/build/darknet/x64/data/"
67 | cp "$valid_file" "$PWD/darknet/build/darknet/x64/data/"
68 | cp "$data_file"  "$PWD/darknet/build/darknet/x64/data/"
69 | cp "$name_file"  "$PWD/darknet/build/darknet/x64/data/"
70 | cp "run_all_iters.sh" "$PWD/darknet/build/darknet/x64/"
71 | 
72 | # Download pretrained weight
73 | wget https://pjreddie.com/media/files/darknet53.conv.74 -O "$PWD/darknet/build/darknet/x64/darknet53.conv.74"


--------------------------------------------------------------------------------
/run_all_iters.sh:
--------------------------------------------------------------------------------
1 | for iter in {17000..1000..1000}
2 | do
3 | 	../../../darknet detector map data/thermal.data yolov3-spp-custom.cfg backup/yolov3-spp-custom_${iter}.weights -gpus 3 >> val_res.txt 
4 | done
5 | 


--------------------------------------------------------------------------------
/run_docker_container.sh:
--------------------------------------------------------------------------------
1 | docker run --runtime=nvidia -it --name darknet_thermal -v ~/Downloads/FLIR_ADAS_12_11_18:/home/Downloads thermal:darknet


--------------------------------------------------------------------------------
/start_map_calc.sh:
--------------------------------------------------------------------------------
1 | cd "$PWD/darknet/build/darknet/x64/"
2 | ./run_all_iters.sh


--------------------------------------------------------------------------------
/start_training.sh:
--------------------------------------------------------------------------------
1 | cd "$PWD/darknet/build/darknet/x64/"
2 | ../../../darknet detector train data/thermal.data yolov3-spp-custom.cfg darknet53.conv.74 -gpus 3 -dont_show  -map >> /home/tra_results.txt
3 | 


--------------------------------------------------------------------------------
/thermal.data:
--------------------------------------------------------------------------------
1 | classes = 3
2 | train   = data/thermal_train.txt
3 | valid   = data/thermal_validation.txt
4 | names   = data/thermal.names
5 | backup  = backup/
6 | 


--------------------------------------------------------------------------------
/thermal.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | 


--------------------------------------------------------------------------------
/yolov3-spp-custom.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=32
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=24
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 8,20, 17,24, 14,52, 25,82, 34,37, 53,145, 61,60, 110,103, 165,220
643 | classes=3
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=16
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 4,5
728 | anchors = 8,20, 17,24, 14,52, 25,82, 34,37, 53,145, 61,60, 110,103, 165,220
729 | classes=3
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=32
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2,3
815 | anchors =  8,20, 17,24, 14,52, 25,82, 34,37, 53,145, 61,60, 110,103, 165,220
816 | classes=3
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 
823 | 


--------------------------------------------------------------------------------
/yolov3-spp.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=18
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 7,8
642 | anchors = 8,20,  17,22,  13,47,  33,35,  24,80,  58,56,  51,152, 102,93, 158,195
643 | classes=4
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=27
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,5,6
728 | anchors =   8,20,  17,22,  13,47,  33,35,  24,80,  58,56,  51,152, 102,93, 158,195
729 | classes=4
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=36
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2,4
815 | anchors =   8,20,  17,22,  13,47,  33,35,  24,80,  58,56,  51,152, 102,93, 158,195
816 | classes=4
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 
823 | 


--------------------------------------------------------------------------------
/yolov3-thermal.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | try_fix_nan=1
  7 | batch=64
  8 | subdivisions=64
  9 | width=608
 10 | height=608
 11 | channels=1
 12 | momentum=0.9
 13 | decay=0.0005
 14 | angle=0
 15 | saturation = 1.5
 16 | exposure = 1.5
 17 | hue=.1
 18 | adam=0
 19 | learning_rate=0.0003
 20 | burn_in=1000
 21 | max_batches = 10000
 22 | policy=steps
 23 | steps=8000,9000
 24 | scales=.1,.1
 25 | 
 26 | [convolutional]
 27 | batch_normalize=1
 28 | filters=32
 29 | size=3
 30 | stride=1
 31 | pad=1
 32 | activation=leaky
 33 | 
 34 | # Downsample
 35 | 
 36 | [convolutional]
 37 | batch_normalize=1
 38 | filters=64
 39 | size=3
 40 | stride=2
 41 | pad=1
 42 | activation=leaky
 43 | 
 44 | [convolutional]
 45 | batch_normalize=1
 46 | filters=32
 47 | size=1
 48 | stride=1
 49 | pad=1
 50 | activation=leaky
 51 | 
 52 | [convolutional]
 53 | batch_normalize=1
 54 | filters=64
 55 | size=3
 56 | stride=1
 57 | pad=1
 58 | activation=leaky
 59 | 
 60 | [shortcut]
 61 | from=-3
 62 | activation=linear
 63 | 
 64 | # Downsample
 65 | 
 66 | [convolutional]
 67 | batch_normalize=1
 68 | filters=128
 69 | size=3
 70 | stride=2
 71 | pad=1
 72 | activation=leaky
 73 | 
 74 | [convolutional]
 75 | batch_normalize=1
 76 | filters=64
 77 | size=1
 78 | stride=1
 79 | pad=1
 80 | activation=leaky
 81 | 
 82 | [convolutional]
 83 | batch_normalize=1
 84 | filters=128
 85 | size=3
 86 | stride=1
 87 | pad=1
 88 | activation=leaky
 89 | 
 90 | [shortcut]
 91 | from=-3
 92 | activation=linear
 93 | 
 94 | [convolutional]
 95 | batch_normalize=1
 96 | filters=64
 97 | size=1
 98 | stride=1
 99 | pad=1
100 | activation=leaky
101 | 
102 | [convolutional]
103 | batch_normalize=1
104 | filters=128
105 | size=3
106 | stride=1
107 | pad=1
108 | activation=leaky
109 | 
110 | [shortcut]
111 | from=-3
112 | activation=linear
113 | 
114 | # Downsample
115 | 
116 | [convolutional]
117 | batch_normalize=1
118 | filters=256
119 | size=3
120 | stride=2
121 | pad=1
122 | activation=leaky
123 | 
124 | [convolutional]
125 | batch_normalize=1
126 | filters=128
127 | size=1
128 | stride=1
129 | pad=1
130 | activation=leaky
131 | 
132 | [convolutional]
133 | batch_normalize=1
134 | filters=256
135 | size=3
136 | stride=1
137 | pad=1
138 | activation=leaky
139 | 
140 | [shortcut]
141 | from=-3
142 | activation=linear
143 | 
144 | [convolutional]
145 | batch_normalize=1
146 | filters=128
147 | size=1
148 | stride=1
149 | pad=1
150 | activation=leaky
151 | 
152 | [convolutional]
153 | batch_normalize=1
154 | filters=256
155 | size=3
156 | stride=1
157 | pad=1
158 | activation=leaky
159 | 
160 | [shortcut]
161 | from=-3
162 | activation=linear
163 | 
164 | [convolutional]
165 | batch_normalize=1
166 | filters=128
167 | size=1
168 | stride=1
169 | pad=1
170 | activation=leaky
171 | 
172 | [convolutional]
173 | batch_normalize=1
174 | filters=256
175 | size=3
176 | stride=1
177 | pad=1
178 | activation=leaky
179 | 
180 | [shortcut]
181 | from=-3
182 | activation=linear
183 | 
184 | [convolutional]
185 | batch_normalize=1
186 | filters=128
187 | size=1
188 | stride=1
189 | pad=1
190 | activation=leaky
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | filters=256
195 | size=3
196 | stride=1
197 | pad=1
198 | activation=leaky
199 | 
200 | [shortcut]
201 | from=-3
202 | activation=linear
203 | 
204 | 
205 | [convolutional]
206 | batch_normalize=1
207 | filters=128
208 | size=1
209 | stride=1
210 | pad=1
211 | activation=leaky
212 | 
213 | [convolutional]
214 | batch_normalize=1
215 | filters=256
216 | size=3
217 | stride=1
218 | pad=1
219 | activation=leaky
220 | 
221 | [shortcut]
222 | from=-3
223 | activation=linear
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | filters=128
228 | size=1
229 | stride=1
230 | pad=1
231 | activation=leaky
232 | 
233 | [convolutional]
234 | batch_normalize=1
235 | filters=256
236 | size=3
237 | stride=1
238 | pad=1
239 | activation=leaky
240 | 
241 | [shortcut]
242 | from=-3
243 | activation=linear
244 | 
245 | [convolutional]
246 | batch_normalize=1
247 | filters=128
248 | size=1
249 | stride=1
250 | pad=1
251 | activation=leaky
252 | 
253 | [convolutional]
254 | batch_normalize=1
255 | filters=256
256 | size=3
257 | stride=1
258 | pad=1
259 | activation=leaky
260 | 
261 | [shortcut]
262 | from=-3
263 | activation=linear
264 | 
265 | [convolutional]
266 | batch_normalize=1
267 | filters=128
268 | size=1
269 | stride=1
270 | pad=1
271 | activation=leaky
272 | 
273 | [convolutional]
274 | batch_normalize=1
275 | filters=256
276 | size=3
277 | stride=1
278 | pad=1
279 | activation=leaky
280 | 
281 | [shortcut]
282 | from=-3
283 | activation=linear
284 | 
285 | # Downsample
286 | 
287 | [convolutional]
288 | batch_normalize=1
289 | filters=512
290 | size=3
291 | stride=2
292 | pad=1
293 | activation=leaky
294 | 
295 | [convolutional]
296 | batch_normalize=1
297 | filters=256
298 | size=1
299 | stride=1
300 | pad=1
301 | activation=leaky
302 | 
303 | [convolutional]
304 | batch_normalize=1
305 | filters=512
306 | size=3
307 | stride=1
308 | pad=1
309 | activation=leaky
310 | 
311 | [shortcut]
312 | from=-3
313 | activation=linear
314 | 
315 | 
316 | [convolutional]
317 | batch_normalize=1
318 | filters=256
319 | size=1
320 | stride=1
321 | pad=1
322 | activation=leaky
323 | 
324 | [convolutional]
325 | batch_normalize=1
326 | filters=512
327 | size=3
328 | stride=1
329 | pad=1
330 | activation=leaky
331 | 
332 | [shortcut]
333 | from=-3
334 | activation=linear
335 | 
336 | 
337 | [convolutional]
338 | batch_normalize=1
339 | filters=256
340 | size=1
341 | stride=1
342 | pad=1
343 | activation=leaky
344 | 
345 | [convolutional]
346 | batch_normalize=1
347 | filters=512
348 | size=3
349 | stride=1
350 | pad=1
351 | activation=leaky
352 | 
353 | [shortcut]
354 | from=-3
355 | activation=linear
356 | 
357 | 
358 | [convolutional]
359 | batch_normalize=1
360 | filters=256
361 | size=1
362 | stride=1
363 | pad=1
364 | activation=leaky
365 | 
366 | [convolutional]
367 | batch_normalize=1
368 | filters=512
369 | size=3
370 | stride=1
371 | pad=1
372 | activation=leaky
373 | 
374 | [shortcut]
375 | from=-3
376 | activation=linear
377 | 
378 | [convolutional]
379 | batch_normalize=1
380 | filters=256
381 | size=1
382 | stride=1
383 | pad=1
384 | activation=leaky
385 | 
386 | [convolutional]
387 | batch_normalize=1
388 | filters=512
389 | size=3
390 | stride=1
391 | pad=1
392 | activation=leaky
393 | 
394 | [shortcut]
395 | from=-3
396 | activation=linear
397 | 
398 | 
399 | [convolutional]
400 | batch_normalize=1
401 | filters=256
402 | size=1
403 | stride=1
404 | pad=1
405 | activation=leaky
406 | 
407 | [convolutional]
408 | batch_normalize=1
409 | filters=512
410 | size=3
411 | stride=1
412 | pad=1
413 | activation=leaky
414 | 
415 | [shortcut]
416 | from=-3
417 | activation=linear
418 | 
419 | 
420 | [convolutional]
421 | batch_normalize=1
422 | filters=256
423 | size=1
424 | stride=1
425 | pad=1
426 | activation=leaky
427 | 
428 | [convolutional]
429 | batch_normalize=1
430 | filters=512
431 | size=3
432 | stride=1
433 | pad=1
434 | activation=leaky
435 | 
436 | [shortcut]
437 | from=-3
438 | activation=linear
439 | 
440 | [convolutional]
441 | batch_normalize=1
442 | filters=256
443 | size=1
444 | stride=1
445 | pad=1
446 | activation=leaky
447 | 
448 | [convolutional]
449 | batch_normalize=1
450 | filters=512
451 | size=3
452 | stride=1
453 | pad=1
454 | activation=leaky
455 | 
456 | [shortcut]
457 | from=-3
458 | activation=linear
459 | 
460 | # Downsample
461 | 
462 | [convolutional]
463 | batch_normalize=1
464 | filters=1024
465 | size=3
466 | stride=2
467 | pad=1
468 | activation=leaky
469 | 
470 | [convolutional]
471 | batch_normalize=1
472 | filters=512
473 | size=1
474 | stride=1
475 | pad=1
476 | activation=leaky
477 | 
478 | [convolutional]
479 | batch_normalize=1
480 | filters=1024
481 | size=3
482 | stride=1
483 | pad=1
484 | activation=leaky
485 | 
486 | [shortcut]
487 | from=-3
488 | activation=linear
489 | 
490 | [convolutional]
491 | batch_normalize=1
492 | filters=512
493 | size=1
494 | stride=1
495 | pad=1
496 | activation=leaky
497 | 
498 | [convolutional]
499 | batch_normalize=1
500 | filters=1024
501 | size=3
502 | stride=1
503 | pad=1
504 | activation=leaky
505 | 
506 | [shortcut]
507 | from=-3
508 | activation=linear
509 | 
510 | [convolutional]
511 | batch_normalize=1
512 | filters=512
513 | size=1
514 | stride=1
515 | pad=1
516 | activation=leaky
517 | 
518 | [convolutional]
519 | batch_normalize=1
520 | filters=1024
521 | size=3
522 | stride=1
523 | pad=1
524 | activation=leaky
525 | 
526 | [shortcut]
527 | from=-3
528 | activation=linear
529 | 
530 | [convolutional]
531 | batch_normalize=1
532 | filters=512
533 | size=1
534 | stride=1
535 | pad=1
536 | activation=leaky
537 | 
538 | [convolutional]
539 | batch_normalize=1
540 | filters=1024
541 | size=3
542 | stride=1
543 | pad=1
544 | activation=leaky
545 | 
546 | [shortcut]
547 | from=-3
548 | activation=linear
549 | 
550 | ######################
551 | 
552 | [convolutional]
553 | batch_normalize=1
554 | filters=512
555 | size=1
556 | stride=1
557 | pad=1
558 | activation=leaky
559 | 
560 | [convolutional]
561 | batch_normalize=1
562 | size=3
563 | stride=1
564 | pad=1
565 | filters=1024
566 | activation=leaky
567 | 
568 | [convolutional]
569 | batch_normalize=1
570 | filters=512
571 | size=1
572 | stride=1
573 | pad=1
574 | activation=leaky
575 | 
576 | [convolutional]
577 | batch_normalize=1
578 | size=3
579 | stride=1
580 | pad=1
581 | filters=1024
582 | activation=leaky
583 | 
584 | [convolutional]
585 | batch_normalize=1
586 | filters=512
587 | size=1
588 | stride=1
589 | pad=1
590 | activation=leaky
591 | 
592 | [convolutional]
593 | batch_normalize=1
594 | size=3
595 | stride=1
596 | pad=1
597 | filters=1024
598 | activation=leaky
599 | 
600 | [convolutional]
601 | size=1
602 | stride=1
603 | pad=1
604 | filters=27
605 | activation=linear
606 | 
607 | 
608 | [yolo]
609 | mask = 5,7,8
610 | anchors = 13,21, 16,47, 28,29, 47,45, 27,84, 73,71, 54,161, 117,107, 165,200
611 | classes=4
612 | num=9
613 | jitter=.3
614 | ignore_thresh = .5
615 | truth_thresh = 1
616 | random=1
617 | 
618 | 
619 | [route]
620 | layers = -4
621 | 
622 | [convolutional]
623 | batch_normalize=1
624 | filters=256
625 | size=1
626 | stride=1
627 | pad=1
628 | activation=leaky
629 | 
630 | [upsample]
631 | stride=2
632 | 
633 | [route]
634 | layers = -1, 61
635 | 
636 | 
637 | 
638 | [convolutional]
639 | batch_normalize=1
640 | filters=256
641 | size=1
642 | stride=1
643 | pad=1
644 | activation=leaky
645 | 
646 | [convolutional]
647 | batch_normalize=1
648 | size=3
649 | stride=1
650 | pad=1
651 | filters=512
652 | activation=leaky
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [convolutional]
663 | batch_normalize=1
664 | size=3
665 | stride=1
666 | pad=1
667 | filters=512
668 | activation=leaky
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | size=1
688 | stride=1
689 | pad=1
690 | filters=18
691 | activation=linear
692 | 
693 | 
694 | [yolo]
695 | mask = 3,6
696 | anchors = 13,21, 16,47, 28,29, 47,45, 27,84, 73,71, 54,161, 117,107, 165,200
697 | classes=4
698 | num=9
699 | jitter=.3
700 | ignore_thresh = .5
701 | truth_thresh = 1
702 | random=1
703 | 
704 | 
705 | 
706 | [route]
707 | layers = -4
708 | 
709 | [convolutional]
710 | batch_normalize=1
711 | filters=128
712 | size=1
713 | stride=1
714 | pad=1
715 | activation=leaky
716 | 
717 | [upsample]
718 | stride=2
719 | 
720 | [route]
721 | layers = -1, 36
722 | 
723 | 
724 | 
725 | [convolutional]
726 | batch_normalize=1
727 | filters=128
728 | size=1
729 | stride=1
730 | pad=1
731 | activation=leaky
732 | 
733 | [convolutional]
734 | batch_normalize=1
735 | size=3
736 | stride=1
737 | pad=1
738 | filters=256
739 | activation=leaky
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [convolutional]
750 | batch_normalize=1
751 | size=3
752 | stride=1
753 | pad=1
754 | filters=256
755 | activation=leaky
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | size=1
775 | stride=1
776 | pad=1
777 | filters=36
778 | activation=linear
779 | 
780 | 
781 | [yolo]
782 | mask = 0,1,2,4
783 | anchors = 13,21, 16,47, 28,29, 47,45, 27,84, 73,71, 54,161, 117,107, 165,200
784 | classes=4
785 | num=9
786 | jitter=.3
787 | ignore_thresh = .5
788 | truth_thresh = 1
789 | random=1
790 | max=200
791 | 


--------------------------------------------------------------------------------
/yolov3_5l.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=32
  8 | width=608
  9 | height=608
 10 | channels=1
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=48
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 9,10,11,12,13,14
609 | anchors = 7,18, 11,36, 13,68, 15,20, 19,45, 27,28, 24,77, 41,43, 38,107, 63,61, 98,91, 63,162, 154,141, 107,277, 273,253
610 | classes=3
611 | num=15
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=16
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 7,8
695 | anchors = 7,18, 11,36, 13,68, 15,20, 19,45, 27,28, 24,77, 41,43, 38,107, 63,61, 98,91, 63,162, 154,141, 107,277, 273,253
696 | classes=3
697 | num=15
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=24
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 4,5,6
782 | anchors = 7,18, 11,36, 13,68, 15,20, 19,45, 27,28, 24,77, 41,43, 38,107, 63,61, 98,91, 63,162, 154,141, 107,277, 273,253
783 | classes=3
784 | num=15
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 
790 | 
791 | 
792 | ###############
793 | 
794 | 
795 | [route]
796 | layers = -4
797 | 
798 | [convolutional]
799 | batch_normalize=1
800 | filters=128
801 | size=1
802 | stride=1
803 | pad=1
804 | activation=leaky
805 | 
806 | [upsample]
807 | stride=2
808 | 
809 | [route]
810 | layers = -1, 11
811 | 
812 | 
813 | 
814 | [convolutional]
815 | batch_normalize=1
816 | filters=64
817 | size=1
818 | stride=1
819 | pad=1
820 | activation=leaky
821 | 
822 | [convolutional]
823 | batch_normalize=1
824 | size=3
825 | stride=1
826 | pad=1
827 | filters=128
828 | activation=leaky
829 | 
830 | [convolutional]
831 | batch_normalize=1
832 | filters=64
833 | size=1
834 | stride=1
835 | pad=1
836 | activation=leaky
837 | 
838 | [convolutional]
839 | batch_normalize=1
840 | size=3
841 | stride=1
842 | pad=1
843 | filters=128
844 | activation=leaky
845 | 
846 | [convolutional]
847 | batch_normalize=1
848 | filters=64
849 | size=1
850 | stride=1
851 | pad=1
852 | activation=leaky
853 | 
854 | [convolutional]
855 | batch_normalize=1
856 | size=3
857 | stride=1
858 | pad=1
859 | filters=128
860 | activation=leaky
861 | 
862 | [convolutional]
863 | size=1
864 | stride=1
865 | pad=1
866 | filters=16
867 | activation=linear
868 | 
869 | 
870 | [yolo]
871 | mask = 2,3
872 | anchors = 7,18, 11,36, 13,68, 15,20, 19,45, 27,28, 24,77, 41,43, 38,107, 63,61, 98,91, 63,162, 154,141, 107,277, 273,253
873 | classes=3
874 | num=15
875 | jitter=.3
876 | ignore_thresh = .7
877 | truth_thresh = 1
878 | random=1
879 | 
880 | 
881 | 
882 | 
883 | 
884 | [route]
885 | layers = -4
886 | 
887 | [convolutional]
888 | batch_normalize=1
889 | filters=128
890 | size=1
891 | stride=1
892 | pad=1
893 | activation=leaky
894 | 
895 | [upsample]
896 | stride=2
897 | 
898 | [route]
899 | layers = -1, 4
900 | 
901 | 
902 | 
903 | [convolutional]
904 | batch_normalize=1
905 | filters=32
906 | size=1
907 | stride=1
908 | pad=1
909 | activation=leaky
910 | 
911 | [convolutional]
912 | batch_normalize=1
913 | size=3
914 | stride=1
915 | pad=1
916 | filters=64
917 | activation=leaky
918 | 
919 | [convolutional]
920 | batch_normalize=1
921 | filters=32
922 | size=1
923 | stride=1
924 | pad=1
925 | activation=leaky
926 | 
927 | [convolutional]
928 | batch_normalize=1
929 | size=3
930 | stride=1
931 | pad=1
932 | filters=64
933 | activation=leaky
934 | 
935 | [convolutional]
936 | batch_normalize=1
937 | filters=32
938 | size=1
939 | stride=1
940 | pad=1
941 | activation=leaky
942 | 
943 | [convolutional]
944 | batch_normalize=1
945 | size=3
946 | stride=1
947 | pad=1
948 | filters=64
949 | activation=leaky
950 | 
951 | [convolutional]
952 | size=1
953 | stride=1
954 | pad=1
955 | filters=16
956 | activation=linear
957 | 
958 | 
959 | [yolo]
960 | mask = 0,1
961 | anchors = 7,18, 11,36, 13,68, 15,20, 19,45, 27,28, 24,77, 41,43, 38,107, 63,61, 98,91, 63,162, 154,141, 107,277, 273,253
962 | classes=3
963 | num=15
964 | jitter=.3
965 | ignore_thresh = .7
966 | truth_thresh = 1
967 | random=1


--------------------------------------------------------------------------------