├── darkflow ├── __init__.py ├── dark │ ├── __init__.py │ ├── darkop.py │ ├── layer.py │ ├── darknet.py │ ├── connected.py │ └── convolution.py ├── net │ ├── __init__.py │ ├── mnist │ │ └── run.py │ ├── vanilla │ │ ├── __init__.py │ │ └── train.py │ ├── yolov2 │ │ ├── __init__.py │ │ ├── predict.py │ │ ├── data.py │ │ └── train.py │ ├── ops │ │ ├── __init__.py │ │ ├── simple.py │ │ ├── baseop.py │ │ └── convolution.py │ ├── yolo │ │ ├── __init__.py │ │ ├── train.py │ │ ├── predict.py │ │ ├── data.py │ │ └── misc.py │ ├── framework.py │ ├── flow.py │ ├── help.py │ └── build.py ├── utils │ ├── __init__.py │ ├── im_transform.py │ ├── box.py │ ├── pascal_voc_clean_xml.py │ ├── loader.py │ └── process.py ├── cython_utils │ ├── __init__.py │ ├── nms.pxd │ ├── cy_yolo_findboxes.pyx │ ├── cy_yolo2_findboxes.pyx │ └── nms.pyx ├── version.py ├── cli.py └── defaults.py ├── .coveragerc ├── labels.txt ├── preview.png ├── cfg ├── v1 │ ├── tiny.profile │ ├── tiny-old.profile │ ├── yolo-tiny.cfg │ ├── yolo-2c.cfg │ ├── yolo-tiny4c.cfg │ ├── yolo-tiny-extract.cfg │ ├── yolo-tiny-extract_.cfg │ ├── yolo-full.cfg │ ├── yolo-4c.cfg │ └── yolo-small.cfg ├── coco.names ├── v1.1 │ ├── tiny-coco.cfg │ ├── tiny-yolov1.cfg │ ├── person-bottle.cfg │ ├── tiny-yolo-4c.cfg │ ├── yolo-coco.cfg │ └── yolov1.cfg ├── tiny-yolo-4c.cfg ├── tiny-yolo-voc.cfg ├── tiny-yolo.cfg ├── extraction.conv.cfg ├── extraction.cfg ├── yolo-voc.cfg └── yolo.cfg ├── sample_img ├── sample_dog.jpg ├── sample_eagle.jpg ├── sample_horses.jpg ├── sample_office.jpg ├── sample_person.jpg ├── sample_scream.jpg ├── sample_computer.jpg └── sample_giraffe.jpg ├── flow ├── test ├── requirements-testing.txt └── test_darkflow.py ├── .travis.yml ├── .gitignore ├── setup.py └── README.md /darkflow/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darkflow/dark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darkflow/net/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darkflow/net/mnist/run.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darkflow/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /darkflow/cython_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = test/* -------------------------------------------------------------------------------- /labels.txt: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat -------------------------------------------------------------------------------- /preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/darkflow/master/preview.png -------------------------------------------------------------------------------- /darkflow/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.0' 2 | """Current version of darkflow.""" -------------------------------------------------------------------------------- /cfg/v1/tiny.profile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/darkflow/master/cfg/v1/tiny.profile -------------------------------------------------------------------------------- /cfg/v1/tiny-old.profile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/darkflow/master/cfg/v1/tiny-old.profile -------------------------------------------------------------------------------- /sample_img/sample_dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/darkflow/master/sample_img/sample_dog.jpg -------------------------------------------------------------------------------- /sample_img/sample_eagle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/darkflow/master/sample_img/sample_eagle.jpg -------------------------------------------------------------------------------- /sample_img/sample_horses.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/darkflow/master/sample_img/sample_horses.jpg -------------------------------------------------------------------------------- /sample_img/sample_office.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/darkflow/master/sample_img/sample_office.jpg -------------------------------------------------------------------------------- /sample_img/sample_person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/darkflow/master/sample_img/sample_person.jpg -------------------------------------------------------------------------------- /sample_img/sample_scream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/darkflow/master/sample_img/sample_scream.jpg -------------------------------------------------------------------------------- /sample_img/sample_computer.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/darkflow/master/sample_img/sample_computer.jpg -------------------------------------------------------------------------------- /sample_img/sample_giraffe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Todo/darkflow/master/sample_img/sample_giraffe.jpg -------------------------------------------------------------------------------- /flow: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import sys 4 | from darkflow.cli import cliHandler 5 | 6 | cliHandler(sys.argv) 7 | 8 | -------------------------------------------------------------------------------- /test/requirements-testing.txt: -------------------------------------------------------------------------------- 1 | tensorflow 2 | pytest 3 | requests 4 | opencv-python 5 | numpy 6 | Cython 7 | codecov 8 | pytest-cov -------------------------------------------------------------------------------- /darkflow/net/vanilla/__init__.py: -------------------------------------------------------------------------------- 1 | from . import train 2 | 3 | def constructor(self, meta, FLAGS): 4 | self.meta, self.FLAGS = meta, FLAGS -------------------------------------------------------------------------------- /darkflow/net/yolov2/__init__.py: -------------------------------------------------------------------------------- 1 | from . import train 2 | from . import predict 3 | from . import data 4 | from ..yolo import misc 5 | import numpy as np 6 | -------------------------------------------------------------------------------- /darkflow/cython_utils/nms.pxd: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | ctypedef np.float_t DTYPE_t 5 | from libc.math cimport exp 6 | from utils.box import BoundBox 7 | 8 | 9 | cdef NMS(float[:, ::1] , float[:, ::1] ) 10 | 11 | 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | sudo: false 3 | 4 | language: python 5 | python: 6 | - "3.6" 7 | 8 | cache: 9 | directories: 10 | - bin #cache .weights files 11 | 12 | # command to install dependencies 13 | install: 14 | - pip install -r test/requirements-testing.txt 15 | - pip install -e . 16 | 17 | # command to run tests 18 | script: pytest -x --cov=./ 19 | 20 | #Upload code coverage statistics 21 | after_success: 22 | - codecov -------------------------------------------------------------------------------- /darkflow/net/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .simple import * 2 | from .convolution import * 3 | from .baseop import HEADER, LINE 4 | 5 | op_types = { 6 | 'convolutional': convolutional, 7 | 'conv-select': conv_select, 8 | 'connected': connected, 9 | 'maxpool': maxpool, 10 | 'leaky': leaky, 11 | 'dropout': dropout, 12 | 'flatten': flatten, 13 | 'avgpool': avgpool, 14 | 'softmax': softmax, 15 | 'identity': identity, 16 | 'crop': crop, 17 | 'local': local, 18 | 'select': select, 19 | 'route': route, 20 | 'reorg': reorg, 21 | 'conv-extract': conv_extract, 22 | 'extract': extract 23 | } 24 | 25 | def op_create(*args): 26 | layer_type = list(args)[0].type 27 | return op_types[layer_type](*args) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Python bytecode 3 | *.pyc 4 | 5 | # Weight files 6 | bin/ 7 | 8 | # Sample image data 9 | sample_img/*.jpg 10 | !sample_img/sample_*.jpg 11 | sample_img/out/* 12 | 13 | # Annotated test results 14 | results/ 15 | 16 | # Intermediate training data 17 | backup/ 18 | tfnet/yolo/parse-history.txt 19 | tfnet/yolo/*.parsed 20 | *.txt 21 | !requirements*.txt 22 | *.pb 23 | /profile 24 | /test.py 25 | 26 | # Built cython files 27 | darkflow/cython_utils/*.pyd 28 | darkflow/cython_utils/*.c 29 | 30 | #egg-info 31 | darkflow.egg-info/ 32 | 33 | #Other build stuff 34 | build/ 35 | 36 | #TensorBoard logs 37 | summary/ 38 | 39 | #Built graphs 40 | built_graph/ 41 | 42 | #pytest cache 43 | .cache/ 44 | -------------------------------------------------------------------------------- /darkflow/utils/im_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | def imcv2_recolor(im, a = .1): 5 | t = [np.random.uniform()] 6 | t += [np.random.uniform()] 7 | t += [np.random.uniform()] 8 | t = np.array(t) * 2. - 1. 9 | 10 | # random amplify each channel 11 | im = im * (1 + t * a) 12 | mx = 255. * (1 + a) 13 | up = np.random.uniform() * 2 - 1 14 | im = np.power(im/mx, 1. + up * .5) 15 | return np.array(im * 255., np.uint8) 16 | 17 | def imcv2_affine_trans(im): 18 | # Scale and translate 19 | h, w, c = im.shape 20 | scale = np.random.uniform() / 10. + 1. 21 | max_offx = (scale-1.) * w 22 | max_offy = (scale-1.) * h 23 | offx = int(np.random.uniform() * max_offx) 24 | offy = int(np.random.uniform() * max_offy) 25 | 26 | im = cv2.resize(im, (0,0), fx = scale, fy = scale) 27 | im = im[offy : (offy + h), offx : (offx + w)] 28 | flip = np.random.binomial(1, .5) 29 | if flip: im = cv2.flip(im, 1) 30 | return im, [w, h, c], [scale, [offx, offy], flip] -------------------------------------------------------------------------------- /cfg/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /darkflow/cli.py: -------------------------------------------------------------------------------- 1 | from .defaults import argHandler #Import the default arguments 2 | import os 3 | from darkflow.net.build import TFNet 4 | 5 | def cliHandler(args): 6 | FLAGS = argHandler() 7 | FLAGS.setDefaults() 8 | FLAGS.parseArgs(args) 9 | 10 | # make sure all necessary dirs exist 11 | def _get_dir(dirs): 12 | for d in dirs: 13 | this = os.path.abspath(os.path.join(os.path.curdir, d)) 14 | if not os.path.exists(this): os.makedirs(this) 15 | _get_dir([FLAGS.imgdir, FLAGS.binary, FLAGS.backup, 16 | os.path.join(FLAGS.imgdir,'out'), FLAGS.summary]) 17 | 18 | # fix FLAGS.load to appropriate type 19 | try: FLAGS.load = int(FLAGS.load) 20 | except: pass 21 | 22 | tfnet = TFNet(FLAGS) 23 | 24 | if FLAGS.demo: 25 | tfnet.camera() 26 | exit('Demo stopped, exit.') 27 | 28 | if FLAGS.train: 29 | print('Enter training ...'); tfnet.train() 30 | if not FLAGS.savepb: 31 | exit('Training finished, exit.') 32 | 33 | if FLAGS.savepb: 34 | print('Rebuild a constant version ...') 35 | tfnet.savepb(); exit('Done') 36 | 37 | tfnet.predict() 38 | -------------------------------------------------------------------------------- /darkflow/net/yolo/__init__.py: -------------------------------------------------------------------------------- 1 | from . import train 2 | from . import predict 3 | from . import data 4 | from . import misc 5 | import numpy as np 6 | 7 | 8 | """ YOLO framework __init__ equivalent""" 9 | 10 | def constructor(self, meta, FLAGS): 11 | 12 | def _to_color(indx, base): 13 | """ return (b, r, g) tuple""" 14 | base2 = base * base 15 | b = 2 - indx / base2 16 | r = 2 - (indx % base2) / base 17 | g = 2 - (indx % base2) % base 18 | return (b * 127, r * 127, g * 127) 19 | if 'labels' not in meta: 20 | misc.labels(meta, FLAGS) #We're not loading from a .pb so we do need to load the labels 21 | assert len(meta['labels']) == meta['classes'], ( 22 | 'labels.txt and {} indicate' + ' ' 23 | 'inconsistent class numbers' 24 | ).format(meta['model']) 25 | 26 | # assign a color for each label 27 | colors = list() 28 | base = int(np.ceil(pow(meta['classes'], 1./3))) 29 | for x in range(len(meta['labels'])): 30 | colors += [_to_color(x, base)] 31 | meta['colors'] = colors 32 | self.fetch = list() 33 | self.meta, self.FLAGS = meta, FLAGS 34 | 35 | # over-ride the threshold in meta if FLAGS has it. 36 | if FLAGS.threshold > 0.0: 37 | self.meta['thresh'] = FLAGS.threshold -------------------------------------------------------------------------------- /darkflow/utils/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class BoundBox: 4 | def __init__(self, classes): 5 | self.x, self.y = float(), float() 6 | self.w, self.h = float(), float() 7 | self.c = float() 8 | self.class_num = classes 9 | self.probs = np.zeros((classes,)) 10 | 11 | def overlap(x1,w1,x2,w2): 12 | l1 = x1 - w1 / 2.; 13 | l2 = x2 - w2 / 2.; 14 | left = max(l1, l2) 15 | r1 = x1 + w1 / 2.; 16 | r2 = x2 + w2 / 2.; 17 | right = min(r1, r2) 18 | return right - left; 19 | 20 | def box_intersection(a, b): 21 | w = overlap(a.x, a.w, b.x, b.w); 22 | h = overlap(a.y, a.h, b.y, b.h); 23 | if w < 0 or h < 0: return 0; 24 | area = w * h; 25 | return area; 26 | 27 | def box_union(a, b): 28 | i = box_intersection(a, b); 29 | u = a.w * a.h + b.w * b.h - i; 30 | return u; 31 | 32 | def box_iou(a, b): 33 | return box_intersection(a, b) / box_union(a, b); 34 | 35 | def prob_compare(box): 36 | return box.probs[box.class_num] 37 | 38 | def prob_compare2(boxa, boxb): 39 | if (boxa.pi < boxb.pi): 40 | return 1 41 | elif(boxa.pi == boxb.pi): 42 | return 0 43 | else: 44 | return -1 -------------------------------------------------------------------------------- /darkflow/net/vanilla/train.py: -------------------------------------------------------------------------------- 1 | _LOSS_TYPE = ['sse','l2', 'smooth', 2 | 'sparse', 'l1', 'softmax', 3 | 'svm', 'fisher'] 4 | 5 | def loss(self, net_out): 6 | m = self.meta 7 | loss_type = self.meta['type'] 8 | assert loss_type in _LOSS_TYPE, \ 9 | 'Loss type {} not implemented'.format(loss_type) 10 | 11 | out = net_out 12 | out_shape = out.get_shape() 13 | out_dtype = out.dtype.base_dtype 14 | _truth = tf.placeholders(out_dtype, out_shape) 15 | 16 | self.placeholders = dict({ 17 | 'truth': _truth 18 | }) 19 | 20 | diff = _truth - out 21 | if loss_type in ['sse','12']: 22 | loss = tf.nn.l2_loss(diff) 23 | 24 | elif loss_type == ['smooth']: 25 | small = tf.cast(diff < 1, tf.float32) 26 | large = 1. - small 27 | l1_loss = tf.nn.l1_loss(tf.multiply(diff, large)) 28 | l2_loss = tf.nn.l2_loss(tf.multiply(diff, small)) 29 | loss = l1_loss + l2_loss 30 | 31 | elif loss_type in ['sparse', 'l1']: 32 | loss = l1_loss(diff) 33 | 34 | elif loss_type == 'softmax': 35 | loss = tf.nn.softmax_cross_entropy_with_logits(logits, y) 36 | loss = tf.reduce_mean(loss) 37 | 38 | elif loss_type == 'svm': 39 | assert 'train_size' in m, \ 40 | 'Must specify' 41 | size = m['train_size'] 42 | self.nu = tf.Variable(tf.ones([train_size, num_classes])) -------------------------------------------------------------------------------- /darkflow/dark/darkop.py: -------------------------------------------------------------------------------- 1 | from .layer import Layer 2 | from .convolution import * 3 | from .connected import * 4 | 5 | class avgpool_layer(Layer): 6 | pass 7 | 8 | class crop_layer(Layer): 9 | pass 10 | 11 | class maxpool_layer(Layer): 12 | def setup(self, ksize, stride, pad): 13 | self.stride = stride 14 | self.ksize = ksize 15 | self.pad = pad 16 | 17 | class softmax_layer(Layer): 18 | def setup(self, groups): 19 | self.groups = groups 20 | 21 | class dropout_layer(Layer): 22 | def setup(self, p): 23 | self.h['pdrop'] = dict({ 24 | 'feed': p, # for training 25 | 'dfault': 1.0, # for testing 26 | 'shape': () 27 | }) 28 | 29 | class route_layer(Layer): 30 | def setup(self, routes): 31 | self.routes = routes 32 | 33 | class reorg_layer(Layer): 34 | def setup(self, stride): 35 | self.stride = stride 36 | 37 | """ 38 | Darkop Factory 39 | """ 40 | 41 | darkops = { 42 | 'dropout': dropout_layer, 43 | 'connected': connected_layer, 44 | 'maxpool': maxpool_layer, 45 | 'convolutional': convolutional_layer, 46 | 'avgpool': avgpool_layer, 47 | 'softmax': softmax_layer, 48 | 'crop': crop_layer, 49 | 'local': local_layer, 50 | 'select': select_layer, 51 | 'route': route_layer, 52 | 'reorg': reorg_layer, 53 | 'conv-select': conv_select_layer, 54 | 'conv-extract': conv_extract_layer, 55 | 'extract': extract_layer 56 | } 57 | 58 | def create_darkop(ltype, num, *args): 59 | op_class = darkops.get(ltype, Layer) 60 | return op_class(ltype, num, *args) -------------------------------------------------------------------------------- /darkflow/net/framework.py: -------------------------------------------------------------------------------- 1 | from . import yolo 2 | from . import yolov2 3 | from . import vanilla 4 | from os import sep 5 | 6 | class framework(object): 7 | constructor = vanilla.constructor 8 | loss = vanilla.train.loss 9 | 10 | def __init__(self, meta, FLAGS): 11 | model = meta['model'].split(sep)[-1] 12 | model = '.'.join(model.split('.')[:-1]) 13 | meta['name'] = model 14 | 15 | self.constructor(meta, FLAGS) 16 | 17 | def is_inp(self, file_name): 18 | return True 19 | 20 | class YOLO(framework): 21 | constructor = yolo.constructor 22 | parse = yolo.data.parse 23 | shuffle = yolo.data.shuffle 24 | preprocess = yolo.predict.preprocess 25 | postprocess = yolo.predict.postprocess 26 | loss = yolo.train.loss 27 | is_inp = yolo.misc.is_inp 28 | profile = yolo.misc.profile 29 | _batch = yolo.data._batch 30 | resize_input = yolo.predict.resize_input 31 | findboxes = yolo.predict.findboxes 32 | process_box = yolo.predict.process_box 33 | 34 | class YOLOv2(framework): 35 | constructor = yolo.constructor 36 | parse = yolo.data.parse 37 | shuffle = yolov2.data.shuffle 38 | preprocess = yolo.predict.preprocess 39 | loss = yolov2.train.loss 40 | is_inp = yolo.misc.is_inp 41 | postprocess = yolov2.predict.postprocess 42 | _batch = yolov2.data._batch 43 | resize_input = yolo.predict.resize_input 44 | findboxes = yolov2.predict.findboxes 45 | process_box = yolo.predict.process_box 46 | 47 | """ 48 | framework factory 49 | """ 50 | 51 | types = { 52 | '[detection]': YOLO, 53 | '[region]': YOLOv2 54 | } 55 | 56 | def create_framework(meta, FLAGS): 57 | net_type = meta['type'] 58 | this = types.get(net_type, framework) 59 | return this(meta, FLAGS) -------------------------------------------------------------------------------- /cfg/v1.1/tiny-coco.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | hue = .1 11 | saturation=.75 12 | exposure=.75 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,100000,150000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 200000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [connected] 109 | output= 4655 110 | activation=linear 111 | 112 | [detection] 113 | classes=80 114 | coords=4 115 | rescore=1 116 | side=7 117 | num=3 118 | softmax=0 119 | sqrt=1 120 | jitter=.2 121 | 122 | object_scale=1 123 | noobject_scale=.5 124 | class_scale=1 125 | coord_scale=5 126 | -------------------------------------------------------------------------------- /cfg/v1.1/tiny-yolov1.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | saturation=.75 11 | exposure=.75 12 | hue = .1 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,20000,30000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 40000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [connected] 109 | output= 1470 110 | activation=linear 111 | 112 | [detection] 113 | classes=20 114 | coords=4 115 | rescore=1 116 | side=7 117 | num=2 118 | softmax=0 119 | sqrt=1 120 | jitter=.2 121 | 122 | object_scale=1 123 | noobject_scale=.5 124 | class_scale=1 125 | coord_scale=5 126 | 127 | -------------------------------------------------------------------------------- /cfg/v1.1/person-bottle.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | saturation=.75 11 | exposure=.75 12 | hue = .1 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,20000,30000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 40000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [select] 109 | old_output=1470 110 | keep=4,14/20 111 | bins=49 112 | output=588 113 | activation=linear 114 | 115 | [detection] 116 | classes=2 117 | coords=4 118 | rescore=1 119 | side=7 120 | num=2 121 | softmax=0 122 | sqrt=1 123 | jitter=.2 124 | 125 | object_scale=1 126 | noobject_scale=.5 127 | class_scale=1 128 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1.1/tiny-yolo-4c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | saturation=.75 11 | exposure=.75 12 | hue = .1 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,20000,30000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 40000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [select] 109 | old_output=1470 110 | keep=8,14,15,19/20 111 | bins=49 112 | output=686 113 | activation=linear 114 | 115 | [detection] 116 | classes=4 117 | coords=4 118 | rescore=1 119 | side=7 120 | num=2 121 | softmax=0 122 | sqrt=1 123 | jitter=.2 124 | 125 | object_scale=1 126 | noobject_scale=.5 127 | class_scale=1 128 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=16 26 | size=3 27 | stride=1 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=32 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=64 48 | size=3 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [maxpool] 54 | size=2 55 | stride=2 56 | 57 | [convolutional] 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | filters=256 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=leaky 85 | 86 | [maxpool] 87 | size=2 88 | stride=2 89 | 90 | [convolutional] 91 | filters=1024 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=1024 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [connected] 112 | output=256 113 | activation=linear 114 | 115 | [connected] 116 | output=4096 117 | activation=leaky 118 | 119 | [dropout] 120 | probability=.5 121 | 122 | [connected] 123 | output= 1470 124 | activation=linear 125 | 126 | [detection] 127 | classes=20 128 | coords=4 129 | rescore=1 130 | side=7 131 | num=2 132 | softmax=0 133 | sqrt=1 134 | jitter=.2 135 | object_scale=1 136 | noobject_scale=.5 137 | class_scale=1 138 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/tiny-yolo-4c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 40100 16 | policy=steps 17 | steps=-1,100,20000,30000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=50 115 | activation=linear 116 | 117 | [region] 118 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 119 | bias_match=1 120 | classes=5 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh=.6 134 | random=1 135 | -------------------------------------------------------------------------------- /cfg/tiny-yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 40100 16 | policy=steps 17 | steps=-1,100,20000,30000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=125 115 | activation=linear 116 | 117 | [region] 118 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 119 | bias_match=1 120 | classes=20 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh = .5 134 | random=1 135 | -------------------------------------------------------------------------------- /cfg/v1/yolo-2c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=16 26 | size=3 27 | stride=1 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=32 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=64 48 | size=3 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [maxpool] 54 | size=2 55 | stride=2 56 | 57 | [convolutional] 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | filters=256 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=leaky 85 | 86 | [maxpool] 87 | size=2 88 | stride=2 89 | 90 | [convolutional] 91 | filters=1024 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=1024 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [connected] 112 | output=256 113 | activation=linear 114 | 115 | [connected] 116 | output=4096 117 | activation=leaky 118 | 119 | [dropout] 120 | probability=.5 121 | 122 | [select] 123 | old_output=1470 124 | keep=14,19/20 125 | bins=49 126 | output=588 127 | activation=linear 128 | 129 | [detection] 130 | classes=2 131 | coords=4 132 | rescore=1 133 | side=7 134 | num=2 135 | softmax=0 136 | sqrt=1 137 | jitter=.2 138 | object_scale=1 139 | noobject_scale=.5 140 | class_scale=1 141 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-tiny4c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=16 26 | size=3 27 | stride=1 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=32 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=64 48 | size=3 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [maxpool] 54 | size=2 55 | stride=2 56 | 57 | [convolutional] 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | filters=256 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=leaky 85 | 86 | [maxpool] 87 | size=2 88 | stride=2 89 | 90 | [convolutional] 91 | filters=1024 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=1024 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [connected] 112 | output=256 113 | activation=linear 114 | 115 | [connected] 116 | output=4096 117 | activation=leaky 118 | 119 | [dropout] 120 | probability=.5 121 | 122 | [select] 123 | old_output=1470 124 | keep=8,14,15,19/20 125 | bins=49 126 | output=686 127 | activation=linear 128 | 129 | [detection] 130 | classes=4 131 | coords=4 132 | rescore=1 133 | side=7 134 | num=2 135 | softmax=0 136 | sqrt=1 137 | jitter=.2 138 | object_scale=1 139 | noobject_scale=.5 140 | class_scale=1 141 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/tiny-yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 120000 16 | policy=steps 17 | steps=-1,100,80000,100000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=425 115 | activation=linear 116 | 117 | [region] 118 | anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741 119 | bias_match=1 120 | classes=80 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh = .6 134 | random=1 135 | -------------------------------------------------------------------------------- /darkflow/cython_utils/cy_yolo_findboxes.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | ctypedef np.float_t DTYPE_t 5 | from libc.math cimport exp 6 | from darkflow.utils.box import BoundBox 7 | from nms cimport NMS 8 | 9 | 10 | 11 | @cython.cdivision(True) 12 | @cython.boundscheck(False) # turn off bounds-checking for entire function 13 | @cython.wraparound(False) # turn off negative index wrapping for entire function 14 | def yolo_box_constructor(meta,np.ndarray[float] net_out, float threshold): 15 | 16 | cdef: 17 | float sqrt 18 | int C,B,S 19 | int SS,prob_size,conf_size 20 | int grid, b 21 | int class_loop 22 | 23 | 24 | sqrt = meta['sqrt'] + 1 25 | C, B, S = meta['classes'], meta['num'], meta['side'] 26 | boxes = [] 27 | SS = S * S # number of grid cells 28 | prob_size = SS * C # class probabilities 29 | conf_size = SS * B # confidences for each grid cell 30 | 31 | cdef: 32 | float [:,::1] probs = np.ascontiguousarray(net_out[0 : prob_size]).reshape([SS,C]) 33 | float [:,::1] confs = np.ascontiguousarray(net_out[prob_size : (prob_size + conf_size)]).reshape([SS,B]) 34 | float [: , : ,::1] coords = np.ascontiguousarray(net_out[(prob_size + conf_size) : ]).reshape([SS, B, 4]) 35 | float [:,:,::1] final_probs = np.zeros([SS,B,C],dtype=np.float32) 36 | 37 | 38 | for grid in range(SS): 39 | for b in range(B): 40 | coords[grid, b, 0] = (coords[grid, b, 0] + grid % S) / S 41 | coords[grid, b, 1] = (coords[grid, b, 1] + grid // S) / S 42 | coords[grid, b, 2] = coords[grid, b, 2] ** sqrt 43 | coords[grid, b, 3] = coords[grid, b, 3] ** sqrt 44 | for class_loop in range(C): 45 | probs[grid, class_loop] = probs[grid, class_loop] * confs[grid, b] 46 | #print("PROBS",probs[grid,class_loop]) 47 | if(probs[grid,class_loop] > threshold ): 48 | final_probs[grid, b, class_loop] = probs[grid, class_loop] 49 | 50 | 51 | return NMS(np.ascontiguousarray(final_probs).reshape(SS*B, C) , np.ascontiguousarray(coords).reshape(SS*B, 4)) 52 | -------------------------------------------------------------------------------- /darkflow/net/yolov2/predict.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import cv2 4 | import os 5 | #from scipy.special import expit 6 | #from utils.box import BoundBox, box_iou, prob_compare 7 | #from utils.box import prob_compare2, box_intersection 8 | from darkflow.utils.box import BoundBox 9 | from darkflow.cython_utils.cy_yolo2_findboxes import box_constructor 10 | 11 | def expit(x): 12 | return 1. / (1. + np.exp(-x)) 13 | 14 | def _softmax(x): 15 | e_x = np.exp(x - np.max(x)) 16 | out = e_x / e_x.sum() 17 | return out 18 | 19 | def findboxes(self, net_out): 20 | # meta 21 | meta = self.meta 22 | boxes = list() 23 | boxes=box_constructor(meta,net_out) 24 | return boxes 25 | 26 | def postprocess(self, net_out, im, save = True): 27 | """ 28 | Takes net output, draw net_out, save to disk 29 | """ 30 | boxes = self.findboxes(net_out) 31 | 32 | # meta 33 | meta = self.meta 34 | threshold = meta['thresh'] 35 | colors = meta['colors'] 36 | labels = meta['labels'] 37 | if type(im) is not np.ndarray: 38 | imgcv = cv2.imread(im) 39 | else: imgcv = im 40 | h, w, _ = imgcv.shape 41 | 42 | textBuff = "[" 43 | for b in boxes: 44 | boxResults = self.process_box(b, h, w, threshold) 45 | if boxResults is None: 46 | continue 47 | left, right, top, bot, mess, max_indx, confidence = boxResults 48 | thick = int((h + w) // 300) 49 | if self.FLAGS.json: 50 | line = ('{"label":"%s",' 51 | '"confidence":%.2f,' 52 | '"topleft":{"x":%d,"y":%d},' 53 | '"bottomright":{"x":%d,"y":%d}},\n') % \ 54 | (mess, confidence, left, top, right, bot) 55 | textBuff += line 56 | continue 57 | 58 | cv2.rectangle(imgcv, 59 | (left, top), (right, bot), 60 | colors[max_indx], thick) 61 | cv2.putText(imgcv, mess, (left, top - 12), 62 | 0, 1e-3 * h, colors[max_indx],thick//3) 63 | 64 | if not save: return imgcv 65 | # Removing trailing comma+newline adding json list terminator. 66 | textBuff = textBuff[:-2] + "]" 67 | outfolder = os.path.join(self.FLAGS.imgdir, 'out') 68 | img_name = os.path.join(outfolder, os.path.basename(im)) 69 | if self.FLAGS.json: 70 | textFile = os.path.splitext(img_name)[0] + ".json" 71 | with open(textFile, 'w') as f: 72 | f.write(textBuff) 73 | return 74 | 75 | cv2.imwrite(img_name, imgcv) 76 | -------------------------------------------------------------------------------- /darkflow/dark/layer.py: -------------------------------------------------------------------------------- 1 | from darkflow.utils import loader 2 | import numpy as np 3 | 4 | class Layer(object): 5 | 6 | def __init__(self, *args): 7 | self._signature = list(args) 8 | self.type = list(args)[0] 9 | self.number = list(args)[1] 10 | 11 | self.w = dict() # weights 12 | self.h = dict() # placeholders 13 | self.wshape = dict() # weight shape 14 | self.wsize = dict() # weight size 15 | self.setup(*args[2:]) # set attr up 16 | self.present() 17 | for var in self.wshape: 18 | shp = self.wshape[var] 19 | size = np.prod(shp) 20 | self.wsize[var] = size 21 | 22 | def load(self, src_loader): 23 | var_lay = src_loader.VAR_LAYER 24 | if self.type not in var_lay: return 25 | 26 | src_type = type(src_loader) 27 | if src_type is loader.weights_loader: 28 | wdict = self.load_weights(src_loader) 29 | else: 30 | wdict = self.load_ckpt(src_loader) 31 | if wdict is not None: 32 | self.recollect(wdict) 33 | 34 | def load_weights(self, src_loader): 35 | val = src_loader([self.presenter]) 36 | if val is None: return None 37 | else: return val.w 38 | 39 | def load_ckpt(self, src_loader): 40 | result = dict() 41 | presenter = self.presenter 42 | for var in presenter.wshape: 43 | name = presenter.varsig(var) 44 | shape = presenter.wshape[var] 45 | key = [name, shape] 46 | val = src_loader(key) 47 | result[var] = val 48 | return result 49 | 50 | @property 51 | def signature(self): 52 | return self._signature 53 | 54 | # For comparing two layers 55 | def __eq__(self, other): 56 | return self.signature == other.signature 57 | def __ne__(self, other): 58 | return not self.__eq__(other) 59 | 60 | def varsig(self, var): 61 | if var not in self.wshape: 62 | return None 63 | sig = str(self.number) 64 | sig += '-' + self.type 65 | sig += '/' + var 66 | return sig 67 | 68 | def recollect(self, w): self.w = w 69 | def present(self): self.presenter = self 70 | def setup(self, *args): pass 71 | def finalize(self): pass -------------------------------------------------------------------------------- /darkflow/utils/pascal_voc_clean_xml.py: -------------------------------------------------------------------------------- 1 | """ 2 | parse PASCAL VOC xml annotations 3 | """ 4 | 5 | import os 6 | import sys 7 | import xml.etree.ElementTree as ET 8 | import glob 9 | 10 | 11 | def _pp(l): # pretty printing 12 | for i in l: print('{}: {}'.format(i,l[i])) 13 | 14 | def pascal_voc_clean_xml(ANN, pick, exclusive = False): 15 | print('Parsing for {} {}'.format( 16 | pick, 'exclusively' * int(exclusive))) 17 | 18 | dumps = list() 19 | cur_dir = os.getcwd() 20 | os.chdir(ANN) 21 | annotations = os.listdir('.') 22 | annotations = glob.glob(str(annotations)+'*.xml') 23 | size = len(annotations) 24 | 25 | for i, file in enumerate(annotations): 26 | # progress bar 27 | sys.stdout.write('\r') 28 | percentage = 1. * (i+1) / size 29 | progress = int(percentage * 20) 30 | bar_arg = [progress*'=', ' '*(19-progress), percentage*100] 31 | bar_arg += [file] 32 | sys.stdout.write('[{}>{}]{:.0f}% {}'.format(*bar_arg)) 33 | sys.stdout.flush() 34 | 35 | # actual parsing 36 | in_file = open(file) 37 | tree=ET.parse(in_file) 38 | root = tree.getroot() 39 | jpg = str(root.find('filename').text) 40 | imsize = root.find('size') 41 | w = int(imsize.find('width').text) 42 | h = int(imsize.find('height').text) 43 | all = list() 44 | 45 | for obj in root.iter('object'): 46 | current = list() 47 | name = obj.find('name').text 48 | if name not in pick: 49 | continue 50 | 51 | xmlbox = obj.find('bndbox') 52 | xn = int(float(xmlbox.find('xmin').text)) 53 | xx = int(float(xmlbox.find('xmax').text)) 54 | yn = int(float(xmlbox.find('ymin').text)) 55 | yx = int(float(xmlbox.find('ymax').text)) 56 | current = [name,xn,yn,xx,yx] 57 | all += [current] 58 | 59 | add = [[jpg, [w, h, all]]] 60 | dumps += add 61 | in_file.close() 62 | 63 | # gather all stats 64 | stat = dict() 65 | for dump in dumps: 66 | all = dump[1][2] 67 | for current in all: 68 | if current[0] in pick: 69 | if current[0] in stat: 70 | stat[current[0]]+=1 71 | else: 72 | stat[current[0]] =1 73 | 74 | print('\nStatistics:') 75 | _pp(stat) 76 | print('Dataset size: {}'.format(len(dumps))) 77 | 78 | os.chdir(cur_dir) 79 | return dumps -------------------------------------------------------------------------------- /cfg/extraction.conv.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=256 5 | width=256 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.5 11 | policy=poly 12 | power=6 13 | max_batches=500000 14 | 15 | [convolutional] 16 | filters=64 17 | size=7 18 | stride=2 19 | pad=1 20 | activation=leaky 21 | 22 | [maxpool] 23 | size=2 24 | stride=2 25 | 26 | [convolutional] 27 | filters=192 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | filters=128 39 | size=1 40 | stride=1 41 | pad=1 42 | activation=leaky 43 | 44 | [convolutional] 45 | filters=256 46 | size=3 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | filters=256 53 | size=1 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [convolutional] 59 | filters=512 60 | size=3 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [maxpool] 66 | size=2 67 | stride=2 68 | 69 | [convolutional] 70 | filters=256 71 | size=1 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [convolutional] 77 | filters=512 78 | size=3 79 | stride=1 80 | pad=1 81 | activation=leaky 82 | 83 | [convolutional] 84 | filters=256 85 | size=1 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [convolutional] 91 | filters=512 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=256 99 | size=1 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [convolutional] 112 | filters=256 113 | size=1 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [convolutional] 119 | filters=512 120 | size=3 121 | stride=1 122 | pad=1 123 | activation=leaky 124 | 125 | [convolutional] 126 | filters=512 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | filters=1024 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [maxpool] 140 | size=2 141 | stride=2 142 | 143 | [convolutional] 144 | filters=512 145 | size=1 146 | stride=1 147 | pad=1 148 | activation=leaky 149 | 150 | [convolutional] 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | filters=512 159 | size=1 160 | stride=1 161 | pad=1 162 | activation=leaky 163 | 164 | [convolutional] 165 | filters=1024 166 | size=3 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [avgpool] 172 | 173 | [connected] 174 | output=1000 175 | activation=leaky 176 | 177 | [softmax] 178 | groups=1 179 | 180 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from setuptools.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | import os 6 | import imp 7 | 8 | VERSION = imp.load_source('version', os.path.join('.', 'darkflow', 'version.py')) 9 | VERSION = VERSION.__version__ 10 | 11 | if os.name =='nt' : 12 | ext_modules=[ 13 | Extension("darkflow.cython_utils.nms", 14 | sources=["darkflow/cython_utils/nms.pyx"], 15 | #libraries=["m"] # Unix-like specific 16 | include_dirs=[numpy.get_include()] 17 | ), 18 | Extension("darkflow.cython_utils.cy_yolo2_findboxes", 19 | sources=["darkflow/cython_utils/cy_yolo2_findboxes.pyx"], 20 | #libraries=["m"] # Unix-like specific 21 | include_dirs=[numpy.get_include()] 22 | ), 23 | Extension("darkflow.cython_utils.cy_yolo_findboxes", 24 | sources=["darkflow/cython_utils/cy_yolo_findboxes.pyx"], 25 | #libraries=["m"] # Unix-like specific 26 | include_dirs=[numpy.get_include()] 27 | ) 28 | ] 29 | 30 | elif os.name =='posix' : 31 | ext_modules=[ 32 | Extension("darkflow.cython_utils.nms", 33 | sources=["darkflow/cython_utils/nms.pyx"], 34 | libraries=["m"], # Unix-like specific 35 | include_dirs=[numpy.get_include()] 36 | ), 37 | Extension("darkflow.cython_utils.cy_yolo2_findboxes", 38 | sources=["darkflow/cython_utils/cy_yolo2_findboxes.pyx"], 39 | libraries=["m"], # Unix-like specific 40 | include_dirs=[numpy.get_include()] 41 | ), 42 | Extension("darkflow.cython_utils.cy_yolo_findboxes", 43 | sources=["darkflow/cython_utils/cy_yolo_findboxes.pyx"], 44 | libraries=["m"], # Unix-like specific 45 | include_dirs=[numpy.get_include()] 46 | ) 47 | ] 48 | 49 | else : 50 | ext_modules=[ 51 | Extension("darkflow.cython_utils.nms", 52 | sources=["darkflow/cython_utils/nms.pyx"], 53 | libraries=["m"] # Unix-like specific 54 | ), 55 | Extension("darkflow.cython_utils.cy_yolo2_findboxes", 56 | sources=["darkflow/cython_utils/cy_yolo2_findboxes.pyx"], 57 | libraries=["m"] # Unix-like specific 58 | ), 59 | Extension("darkflow.cython_utils.cy_yolo_findboxes", 60 | sources=["darkflow/cython_utils/cy_yolo_findboxes.pyx"], 61 | libraries=["m"] # Unix-like specific 62 | ) 63 | ] 64 | 65 | setup( 66 | version=VERSION, 67 | name='darkflow', 68 | description='Darkflow', 69 | license='GPLv3', 70 | url='https://github.com/thtrieu/darkflow', 71 | packages = find_packages(), 72 | scripts = ['flow'], 73 | ext_modules = cythonize(ext_modules) 74 | ) -------------------------------------------------------------------------------- /cfg/v1/yolo-tiny-extract.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [conv-extract] 25 | profile=cfg/v1/tiny.profile 26 | input=-1 27 | output=0 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [maxpool] 35 | size=2 36 | stride=2 37 | 38 | [conv-extract] 39 | profile=cfg/v1/tiny.profile 40 | input=0 41 | output=1 42 | filters=32 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [maxpool] 49 | size=2 50 | stride=2 51 | 52 | [conv-extract] 53 | profile=cfg/v1/tiny.profile 54 | input=1 55 | output=2 56 | filters=64 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | [maxpool] 63 | size=2 64 | stride=2 65 | 66 | [conv-extract] 67 | profile=cfg/v1/tiny.profile 68 | input=2 69 | output=3 70 | filters=128 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [conv-extract] 81 | profile=cfg/v1/tiny.profile 82 | input=3 83 | output=4 84 | filters=256 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [maxpool] 91 | size=2 92 | stride=2 93 | 94 | [conv-extract] 95 | profile=cfg/v1/tiny.profile 96 | input=4 97 | output=5 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [maxpool] 105 | size=2 106 | stride=2 107 | 108 | [conv-extract] 109 | profile=cfg/v1/tiny.profile 110 | input=5 111 | output=6 112 | filters=1024 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [conv-extract] 119 | profile=cfg/v1/tiny.profile 120 | input=6 121 | output=7 122 | filters=1024 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [conv-extract] 129 | profile=cfg/v1/tiny.profile 130 | input=7 131 | output=8 132 | filters=1024 133 | size=3 134 | stride=1 135 | pad=1 136 | activation=leaky 137 | 138 | [extract] 139 | profile=cfg/v1/tiny.profile 140 | input=8 141 | output=9 142 | old=7,7,1024,256 143 | activation=linear 144 | 145 | [extract] 146 | profile=cfg/v1/tiny.profile 147 | input=9 148 | output=10 149 | old=256,4096 150 | activation=leaky 151 | 152 | [dropout] 153 | probability=1. 154 | 155 | [select] 156 | input=cfg/v1/tiny.profile,10 157 | old_output=1470 158 | keep=8,14,15,19/20 159 | bins=49 160 | output=686 161 | activation=linear 162 | 163 | [detection] 164 | classes=4 165 | coords=4 166 | rescore=1 167 | side=7 168 | num=2 169 | softmax=0 170 | sqrt=1 171 | jitter=.2 172 | object_scale=1 173 | noobject_scale=.5 174 | class_scale=1 175 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-tiny-extract_.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [conv-extract] 25 | profile=cfg/v1/tiny-old.profile 26 | input=-1 27 | output=0 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [maxpool] 35 | size=2 36 | stride=2 37 | 38 | [conv-extract] 39 | profile=cfg/v1/tiny-old.profile 40 | input=0 41 | output=1 42 | filters=32 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [maxpool] 49 | size=2 50 | stride=2 51 | 52 | [conv-extract] 53 | profile=cfg/v1/tiny-old.profile 54 | input=1 55 | output=2 56 | filters=64 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | [maxpool] 63 | size=2 64 | stride=2 65 | 66 | [conv-extract] 67 | profile=cfg/v1/tiny-old.profile 68 | input=2 69 | output=3 70 | filters=128 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [conv-extract] 81 | profile=cfg/v1/tiny-old.profile 82 | input=3 83 | output=4 84 | filters=256 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [maxpool] 91 | size=2 92 | stride=2 93 | 94 | [conv-extract] 95 | profile=cfg/v1/tiny-old.profile 96 | input=4 97 | output=5 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [maxpool] 105 | size=2 106 | stride=2 107 | 108 | [conv-extract] 109 | profile=cfg/v1/tiny-old.profile 110 | input=5 111 | output=6 112 | filters=1024 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [conv-extract] 119 | profile=cfg/v1/tiny-old.profile 120 | input=6 121 | output=7 122 | filters=1024 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [conv-extract] 129 | profile=cfg/v1/tiny-old.profile 130 | input=7 131 | output=8 132 | filters=1024 133 | size=3 134 | stride=1 135 | pad=1 136 | activation=leaky 137 | 138 | [extract] 139 | profile=cfg/v1/tiny-old.profile 140 | input=8 141 | output=9 142 | old=7,7,1024,256 143 | activation=linear 144 | 145 | [extract] 146 | profile=cfg/v1/tiny-old.profile 147 | input=9 148 | output=10 149 | old=256,4096 150 | activation=leaky 151 | 152 | [dropout] 153 | probability=1. 154 | 155 | [select] 156 | input=cfg/v1/tiny-old.profile,10 157 | old_output=1470 158 | keep=8,14,15,19/20 159 | bins=49 160 | output=686 161 | activation=linear 162 | 163 | [detection] 164 | classes=4 165 | coords=4 166 | rescore=1 167 | side=7 168 | num=2 169 | softmax=0 170 | sqrt=1 171 | jitter=.2 172 | object_scale=2.5 173 | noobject_scale=2 174 | class_scale=2.5 175 | coord_scale=5 176 | 177 | save=11250 -------------------------------------------------------------------------------- /darkflow/net/yolov2/data.py: -------------------------------------------------------------------------------- 1 | from darkflow.utils.pascal_voc_clean_xml import pascal_voc_clean_xml 2 | from numpy.random import permutation as perm 3 | from ..yolo.predict import preprocess 4 | from ..yolo.data import shuffle 5 | from copy import deepcopy 6 | import pickle 7 | import numpy as np 8 | import os 9 | 10 | def _batch(self, chunk): 11 | """ 12 | Takes a chunk of parsed annotations 13 | returns value for placeholders of net's 14 | input & loss layer correspond to this chunk 15 | """ 16 | meta = self.meta 17 | labels = meta['labels'] 18 | 19 | H, W, _ = meta['out_size'] 20 | C, B = meta['classes'], meta['num'] 21 | anchors = meta['anchors'] 22 | 23 | # preprocess 24 | jpg = chunk[0]; w, h, allobj_ = chunk[1] 25 | allobj = deepcopy(allobj_) 26 | path = os.path.join(self.FLAGS.dataset, jpg) 27 | img = self.preprocess(path, allobj) 28 | 29 | # Calculate regression target 30 | cellx = 1. * w / W 31 | celly = 1. * h / H 32 | for obj in allobj: 33 | centerx = .5*(obj[1]+obj[3]) #xmin, xmax 34 | centery = .5*(obj[2]+obj[4]) #ymin, ymax 35 | cx = centerx / cellx 36 | cy = centery / celly 37 | if cx >= W or cy >= H: return None, None 38 | obj[3] = float(obj[3]-obj[1]) / w 39 | obj[4] = float(obj[4]-obj[2]) / h 40 | obj[3] = np.sqrt(obj[3]) 41 | obj[4] = np.sqrt(obj[4]) 42 | obj[1] = cx - np.floor(cx) # centerx 43 | obj[2] = cy - np.floor(cy) # centery 44 | obj += [int(np.floor(cy) * W + np.floor(cx))] 45 | 46 | # show(im, allobj, S, w, h, cellx, celly) # unit test 47 | 48 | # Calculate placeholders' values 49 | probs = np.zeros([H*W,B,C]) 50 | confs = np.zeros([H*W,B]) 51 | coord = np.zeros([H*W,B,4]) 52 | proid = np.zeros([H*W,B,C]) 53 | prear = np.zeros([H*W,4]) 54 | for obj in allobj: 55 | probs[obj[5], :, :] = [[0.]*C] * B 56 | probs[obj[5], :, labels.index(obj[0])] = 1. 57 | proid[obj[5], :, :] = [[1.]*C] * B 58 | coord[obj[5], :, :] = [obj[1:5]] * B 59 | prear[obj[5],0] = obj[1] - obj[3]**2 * .5 * W # xleft 60 | prear[obj[5],1] = obj[2] - obj[4]**2 * .5 * H # yup 61 | prear[obj[5],2] = obj[1] + obj[3]**2 * .5 * W # xright 62 | prear[obj[5],3] = obj[2] + obj[4]**2 * .5 * H # ybot 63 | confs[obj[5], :] = [1.] * B 64 | 65 | # Finalise the placeholders' values 66 | upleft = np.expand_dims(prear[:,0:2], 1) 67 | botright = np.expand_dims(prear[:,2:4], 1) 68 | wh = botright - upleft; 69 | area = wh[:,:,0] * wh[:,:,1] 70 | upleft = np.concatenate([upleft] * B, 1) 71 | botright = np.concatenate([botright] * B, 1) 72 | areas = np.concatenate([area] * B, 1) 73 | 74 | # value for placeholder at input layer 75 | inp_feed_val = img 76 | # value for placeholder at loss layer 77 | loss_feed_val = { 78 | 'probs': probs, 'confs': confs, 79 | 'coord': coord, 'proid': proid, 80 | 'areas': areas, 'upleft': upleft, 81 | 'botright': botright 82 | } 83 | 84 | return inp_feed_val, loss_feed_val 85 | 86 | -------------------------------------------------------------------------------- /darkflow/dark/darknet.py: -------------------------------------------------------------------------------- 1 | from darkflow.utils.process import cfg_yielder 2 | from .darkop import create_darkop 3 | from darkflow.utils import loader 4 | import warnings 5 | import time 6 | import os 7 | 8 | class Darknet(object): 9 | 10 | _EXT = '.weights' 11 | 12 | def __init__(self, FLAGS): 13 | self.get_weight_src(FLAGS) 14 | self.modify = False 15 | 16 | print('Parsing {}'.format(self.src_cfg)) 17 | src_parsed = self.parse_cfg(self.src_cfg, FLAGS) 18 | self.src_meta, self.src_layers = src_parsed 19 | 20 | if self.src_cfg == FLAGS.model: 21 | self.meta, self.layers = src_parsed 22 | else: 23 | print('Parsing {}'.format(FLAGS.model)) 24 | des_parsed = self.parse_cfg(FLAGS.model, FLAGS) 25 | self.meta, self.layers = des_parsed 26 | 27 | self.load_weights() 28 | 29 | def get_weight_src(self, FLAGS): 30 | """ 31 | analyse FLAGS.load to know where is the 32 | source binary and what is its config. 33 | can be: None, FLAGS.model, or some other 34 | """ 35 | self.src_bin = FLAGS.model + self._EXT 36 | self.src_bin = FLAGS.binary + self.src_bin 37 | self.src_bin = os.path.abspath(self.src_bin) 38 | exist = os.path.isfile(self.src_bin) 39 | 40 | if FLAGS.load == str(): FLAGS.load = int() 41 | if type(FLAGS.load) is int: 42 | self.src_cfg = FLAGS.model 43 | if FLAGS.load: self.src_bin = None 44 | elif not exist: self.src_bin = None 45 | else: 46 | assert os.path.isfile(FLAGS.load), \ 47 | '{} not found'.format(FLAGS.load) 48 | self.src_bin = FLAGS.load 49 | name = loader.model_name(FLAGS.load) 50 | cfg_path = os.path.join(FLAGS.config, name + '.cfg') 51 | if not os.path.isfile(cfg_path): 52 | warnings.warn( 53 | '{} not found, use {} instead'.format( 54 | cfg_path, FLAGS.model)) 55 | cfg_path = FLAGS.model 56 | self.src_cfg = cfg_path 57 | FLAGS.load = int() 58 | 59 | 60 | def parse_cfg(self, model, FLAGS): 61 | """ 62 | return a list of `layers` objects (darkop.py) 63 | given path to binaries/ and configs/ 64 | """ 65 | args = [model, FLAGS.binary] 66 | cfg_layers = cfg_yielder(*args) 67 | meta = dict(); layers = list() 68 | for i, info in enumerate(cfg_layers): 69 | if i == 0: meta = info; continue 70 | else: new = create_darkop(*info) 71 | layers.append(new) 72 | return meta, layers 73 | 74 | def load_weights(self): 75 | """ 76 | Use `layers` and Loader to load .weights file 77 | """ 78 | print('Loading {} ...'.format(self.src_bin)) 79 | start = time.time() 80 | 81 | args = [self.src_bin, self.src_layers] 82 | wgts_loader = loader.create_loader(*args) 83 | for layer in self.layers: layer.load(wgts_loader) 84 | 85 | stop = time.time() 86 | print('Finished in {}s'.format(stop - start)) -------------------------------------------------------------------------------- /cfg/extraction.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=224 5 | width=224 6 | max_crop=320 7 | channels=3 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | learning_rate=0.1 12 | policy=poly 13 | power=4 14 | max_batches=1600000 15 | 16 | [convolutional] 17 | batch_normalize=1 18 | filters=64 19 | size=7 20 | stride=2 21 | pad=1 22 | activation=leaky 23 | 24 | [maxpool] 25 | size=2 26 | stride=2 27 | 28 | [convolutional] 29 | batch_normalize=1 30 | filters=192 31 | size=3 32 | stride=1 33 | pad=1 34 | activation=leaky 35 | 36 | [maxpool] 37 | size=2 38 | stride=2 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=128 43 | size=1 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=256 51 | size=3 52 | stride=1 53 | pad=1 54 | activation=leaky 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=256 59 | size=1 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [convolutional] 65 | batch_normalize=1 66 | filters=512 67 | size=3 68 | stride=1 69 | pad=1 70 | activation=leaky 71 | 72 | [maxpool] 73 | size=2 74 | stride=2 75 | 76 | [convolutional] 77 | batch_normalize=1 78 | filters=256 79 | size=1 80 | stride=1 81 | pad=1 82 | activation=leaky 83 | 84 | [convolutional] 85 | batch_normalize=1 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=256 95 | size=1 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | filters=512 103 | size=3 104 | stride=1 105 | pad=1 106 | activation=leaky 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | batch_normalize=1 126 | filters=256 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | batch_normalize=1 134 | filters=512 135 | size=3 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [convolutional] 141 | batch_normalize=1 142 | filters=512 143 | size=1 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [convolutional] 149 | batch_normalize=1 150 | filters=1024 151 | size=3 152 | stride=1 153 | pad=1 154 | activation=leaky 155 | 156 | [maxpool] 157 | size=2 158 | stride=2 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=512 163 | size=1 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | batch_normalize=1 170 | filters=1024 171 | size=3 172 | stride=1 173 | pad=1 174 | activation=leaky 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=512 179 | size=1 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | [convolutional] 185 | batch_normalize=1 186 | filters=1024 187 | size=3 188 | stride=1 189 | pad=1 190 | activation=leaky 191 | 192 | [convolutional] 193 | filters=1000 194 | size=1 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [avgpool] 200 | 201 | [softmax] 202 | groups=1 203 | 204 | [cost] 205 | type=sse 206 | 207 | -------------------------------------------------------------------------------- /darkflow/net/ops/simple.py: -------------------------------------------------------------------------------- 1 | import tensorflow.contrib.slim as slim 2 | from .baseop import BaseOp 3 | import tensorflow as tf 4 | from distutils.version import StrictVersion 5 | 6 | class route(BaseOp): 7 | def forward(self): 8 | routes = self.lay.routes 9 | routes_out = list() 10 | for r in routes: 11 | this = self.inp 12 | while this.lay.number != r: 13 | this = this.inp 14 | assert this is not None, \ 15 | 'Routing to non-existence {}'.format(r) 16 | routes_out += [this.out] 17 | self.out = tf.concat(routes_out, 3) 18 | 19 | def speak(self): 20 | msg = 'concat {}' 21 | return msg.format(self.lay.routes) 22 | 23 | class connected(BaseOp): 24 | def forward(self): 25 | self.out = tf.nn.xw_plus_b( 26 | self.inp.out, 27 | self.lay.w['weights'], 28 | self.lay.w['biases'], 29 | name = self.scope) 30 | 31 | def speak(self): 32 | layer = self.lay 33 | args = [layer.inp, layer.out] 34 | args += [layer.activation] 35 | msg = 'full {} x {} {}' 36 | return msg.format(*args) 37 | 38 | class select(connected): 39 | """a weird connected layer""" 40 | def speak(self): 41 | layer = self.lay 42 | args = [layer.inp, layer.out] 43 | args += [layer.activation] 44 | msg = 'sele {} x {} {}' 45 | return msg.format(*args) 46 | 47 | class extract(connected): 48 | """a weird connected layer""" 49 | def speak(self): 50 | layer = self.lay 51 | args = [len(layer.inp), len(layer.out)] 52 | args += [layer.activation] 53 | msg = 'extr {} x {} {}' 54 | return msg.format(*args) 55 | 56 | class flatten(BaseOp): 57 | def forward(self): 58 | temp = tf.transpose( 59 | self.inp.out, [0,3,1,2]) 60 | self.out = slim.flatten( 61 | temp, scope = self.scope) 62 | 63 | def speak(self): return 'flat' 64 | 65 | 66 | class softmax(BaseOp): 67 | def forward(self): 68 | self.out = tf.nn.softmax(self.inp.out) 69 | 70 | def speak(self): return 'softmax()' 71 | 72 | 73 | class avgpool(BaseOp): 74 | def forward(self): 75 | self.out = tf.reduce_mean( 76 | self.inp.out, [1, 2], 77 | name = self.scope 78 | ) 79 | 80 | def speak(self): return 'avgpool()' 81 | 82 | 83 | class dropout(BaseOp): 84 | def forward(self): 85 | if self.lay.h['pdrop'] is None: 86 | self.lay.h['pdrop'] = 1.0 87 | self.out = tf.nn.dropout( 88 | self.inp.out, 89 | self.lay.h['pdrop'], 90 | name = self.scope 91 | ) 92 | 93 | def speak(self): return 'drop' 94 | 95 | 96 | class crop(BaseOp): 97 | def forward(self): 98 | self.out = self.inp.out * 2. - 1. 99 | 100 | def speak(self): 101 | return 'scale to (-1, 1)' 102 | 103 | 104 | class maxpool(BaseOp): 105 | def forward(self): 106 | self.out = tf.nn.max_pool( 107 | self.inp.out, padding = 'SAME', 108 | ksize = [1] + [self.lay.ksize]*2 + [1], 109 | strides = [1] + [self.lay.stride]*2 + [1], 110 | name = self.scope 111 | ) 112 | 113 | def speak(self): 114 | l = self.lay 115 | return 'maxp {}x{}p{}_{}'.format( 116 | l.ksize, l.ksize, l.pad, l.stride) 117 | 118 | 119 | class leaky(BaseOp): 120 | def forward(self): 121 | self.out = tf.maximum( 122 | .1 * self.inp.out, 123 | self.inp.out, 124 | name = self.scope 125 | ) 126 | 127 | def verbalise(self): pass 128 | 129 | 130 | class identity(BaseOp): 131 | def __init__(self, inp): 132 | self.inp = None 133 | self.out = inp -------------------------------------------------------------------------------- /cfg/v1/yolo-full.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.001 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=4096 212 | activation=leaky 213 | 214 | [dropout] 215 | probability=.5 216 | 217 | [connected] 218 | output= 1470 219 | activation=linear 220 | 221 | [detection] 222 | classes=20 223 | coords=4 224 | rescore=1 225 | side=7 226 | num=2 227 | softmax=0 228 | sqrt=1 229 | jitter=.2 230 | 231 | object_scale=1 232 | noobject_scale=.5 233 | class_scale=1 234 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-4c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.001 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=4096 212 | activation=leaky 213 | 214 | [dropout] 215 | probability=.5 216 | 217 | [select] 218 | old_output=1470 219 | keep=8,14,15,19/20 220 | bins=49 221 | output=686 222 | activation=linear 223 | 224 | [detection] 225 | classes=4 226 | coords=4 227 | rescore=1 228 | side=7 229 | num=2 230 | softmax=0 231 | sqrt=1 232 | jitter=.2 233 | 234 | object_scale=1 235 | noobject_scale=.5 236 | class_scale=1 237 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-small.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.001 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=512 212 | activation=leaky 213 | 214 | [connected] 215 | output=4096 216 | activation=leaky 217 | 218 | [dropout] 219 | probability=.5 220 | 221 | [connected] 222 | output= 1470 223 | activation=linear 224 | 225 | [detection] 226 | classes=20 227 | coords=4 228 | rescore=1 229 | side=7 230 | num=2 231 | softmax=0 232 | sqrt=1 233 | jitter=.2 234 | 235 | object_scale=1 236 | noobject_scale=.5 237 | class_scale=1 238 | coord_scale=5 239 | 240 | -------------------------------------------------------------------------------- /darkflow/net/yolo/train.py: -------------------------------------------------------------------------------- 1 | import tensorflow.contrib.slim as slim 2 | import pickle 3 | import tensorflow as tf 4 | from .misc import show 5 | import numpy as np 6 | import os 7 | 8 | def loss(self, net_out): 9 | """ 10 | Takes net.out and placeholders value 11 | returned in batch() func above, 12 | to build train_op and loss 13 | """ 14 | # meta 15 | m = self.meta 16 | sprob = float(m['class_scale']) 17 | sconf = float(m['object_scale']) 18 | snoob = float(m['noobject_scale']) 19 | scoor = float(m['coord_scale']) 20 | S, B, C = m['side'], m['num'], m['classes'] 21 | SS = S * S # number of grid cells 22 | 23 | print('{} loss hyper-parameters:'.format(m['model'])) 24 | print('\tside = {}'.format(m['side'])) 25 | print('\tbox = {}'.format(m['num'])) 26 | print('\tclasses = {}'.format(m['classes'])) 27 | print('\tscales = {}'.format([sprob, sconf, snoob, scoor])) 28 | 29 | size1 = [None, SS, C] 30 | size2 = [None, SS, B] 31 | 32 | # return the below placeholders 33 | _probs = tf.placeholder(tf.float32, size1) 34 | _confs = tf.placeholder(tf.float32, size2) 35 | _coord = tf.placeholder(tf.float32, size2 + [4]) 36 | # weights term for L2 loss 37 | _proid = tf.placeholder(tf.float32, size1) 38 | # material calculating IOU 39 | _areas = tf.placeholder(tf.float32, size2) 40 | _upleft = tf.placeholder(tf.float32, size2 + [2]) 41 | _botright = tf.placeholder(tf.float32, size2 + [2]) 42 | 43 | self.placeholders = { 44 | 'probs':_probs, 'confs':_confs, 'coord':_coord, 'proid':_proid, 45 | 'areas':_areas, 'upleft':_upleft, 'botright':_botright 46 | } 47 | 48 | # Extract the coordinate prediction from net.out 49 | coords = net_out[:, SS * (C + B):] 50 | coords = tf.reshape(coords, [-1, SS, B, 4]) 51 | wh = tf.pow(coords[:,:,:,2:4], 2) * S # unit: grid cell 52 | area_pred = wh[:,:,:,0] * wh[:,:,:,1] # unit: grid cell^2 53 | centers = coords[:,:,:,0:2] # [batch, SS, B, 2] 54 | floor = centers - (wh * .5) # [batch, SS, B, 2] 55 | ceil = centers + (wh * .5) # [batch, SS, B, 2] 56 | 57 | # calculate the intersection areas 58 | intersect_upleft = tf.maximum(floor, _upleft) 59 | intersect_botright = tf.minimum(ceil , _botright) 60 | intersect_wh = intersect_botright - intersect_upleft 61 | intersect_wh = tf.maximum(intersect_wh, 0.0) 62 | intersect = tf.multiply(intersect_wh[:,:,:,0], intersect_wh[:,:,:,1]) 63 | 64 | # calculate the best IOU, set 0.0 confidence for worse boxes 65 | iou = tf.truediv(intersect, _areas + area_pred - intersect) 66 | best_box = tf.equal(iou, tf.reduce_max(iou, [2], True)) 67 | best_box = tf.to_float(best_box) 68 | confs = tf.multiply(best_box, _confs) 69 | 70 | # take care of the weight terms 71 | conid = snoob * (1. - confs) + sconf * confs 72 | weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3) 73 | cooid = scoor * weight_coo 74 | proid = sprob * _proid 75 | 76 | # flatten 'em all 77 | probs = slim.flatten(_probs) 78 | proid = slim.flatten(proid) 79 | confs = slim.flatten(confs) 80 | conid = slim.flatten(conid) 81 | coord = slim.flatten(_coord) 82 | cooid = slim.flatten(cooid) 83 | 84 | self.fetch += [probs, confs, conid, cooid, proid] 85 | true = tf.concat([probs, confs, coord], 1) 86 | wght = tf.concat([proid, conid, cooid], 1) 87 | print('Building {} loss'.format(m['model'])) 88 | loss = tf.pow(net_out - true, 2) 89 | loss = tf.multiply(loss, wght) 90 | loss = tf.reduce_sum(loss, 1) 91 | self.loss = .5 * tf.reduce_mean(loss) 92 | tf.summary.scalar('{} loss'.format(m['model']), self.loss) 93 | -------------------------------------------------------------------------------- /darkflow/net/ops/baseop.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | FORM = '{:>6} | {:>6} | {:<32} | {}' 5 | FORM_ = '{}+{}+{}+{}' 6 | LINE = FORM_.format('-'*7, '-'*8, '-'*34, '-'*15) 7 | HEADER = FORM.format( 8 | 'Source', 'Train?','Layer description', 'Output size') 9 | 10 | def _shape(tensor): # work for both tf.Tensor & np.ndarray 11 | if type(tensor) in [tf.Variable, tf.Tensor]: 12 | return tensor.get_shape() 13 | else: return tensor.shape 14 | 15 | def _name(tensor): 16 | return tensor.name.split(':')[0] 17 | 18 | class BaseOp(object): 19 | """ 20 | BaseOp objects initialise with a darknet's `layer` object 21 | and input tensor of that layer `inp`, it calculates the 22 | output of this layer and place the result in self.out 23 | """ 24 | 25 | # let slim take care of the following vars 26 | _SLIM = ['gamma', 'moving_mean', 'moving_variance'] 27 | 28 | def __init__(self, layer, inp, num, roof, feed): 29 | self.inp = inp # BaseOp 30 | self.num = num # int 31 | self.out = None # tf.Tensor 32 | self.lay = layer 33 | 34 | self.scope = '{}-{}'.format( 35 | str(self.num), self.lay.type) 36 | self.gap = roof - self.num 37 | self.var = not self.gap > 0 38 | self.act = 'Load ' 39 | self.convert(feed) 40 | if self.var: self.train_msg = 'Yep! ' 41 | else: self.train_msg = 'Nope ' 42 | self.forward() 43 | 44 | def convert(self, feed): 45 | """convert self.lay to variables & placeholders""" 46 | for var in self.lay.wshape: 47 | self.wrap_variable(var) 48 | for ph in self.lay.h: 49 | self.wrap_pholder(ph, feed) 50 | 51 | def wrap_variable(self, var): 52 | """wrap layer.w into variables""" 53 | val = self.lay.w.get(var, None) 54 | if val is None: 55 | shape = self.lay.wshape[var] 56 | args = [0., 1e-2, shape] 57 | if 'moving_mean' in var: 58 | val = np.zeros(shape) 59 | elif 'moving_variance' in var: 60 | val = np.ones(shape) 61 | else: 62 | val = np.random.normal(*args) 63 | self.lay.w[var] = val.astype(np.float32) 64 | self.act = 'Init ' 65 | if not self.var: return 66 | 67 | val = self.lay.w[var] 68 | self.lay.w[var] = tf.constant_initializer(val) 69 | if var in self._SLIM: return 70 | with tf.variable_scope(self.scope): 71 | self.lay.w[var] = tf.get_variable(var, 72 | shape = self.lay.wshape[var], 73 | dtype = tf.float32, 74 | initializer = self.lay.w[var]) 75 | 76 | def wrap_pholder(self, ph, feed): 77 | """wrap layer.h into placeholders""" 78 | phtype = type(self.lay.h[ph]) 79 | if phtype is not dict: return 80 | 81 | sig = '{}/{}'.format(self.scope, ph) 82 | val = self.lay.h[ph] 83 | 84 | self.lay.h[ph] = tf.placeholder_with_default( 85 | val['dfault'], val['shape'], name = sig) 86 | feed[self.lay.h[ph]] = val['feed'] 87 | 88 | def verbalise(self): # console speaker 89 | msg = str() 90 | inp = _name(self.inp.out) 91 | if inp == 'input': \ 92 | msg = FORM.format( 93 | '', '', 'input', 94 | _shape(self.inp.out)) + '\n' 95 | if not self.act: return msg 96 | return msg + FORM.format( 97 | self.act, self.train_msg, 98 | self.speak(), _shape(self.out)) 99 | 100 | def speak(self): pass -------------------------------------------------------------------------------- /darkflow/dark/connected.py: -------------------------------------------------------------------------------- 1 | from .layer import Layer 2 | import numpy as np 3 | 4 | class extract_layer(Layer): 5 | def setup(self, old_inp, old_out, 6 | activation, inp, out): 7 | if inp is None: inp = range(old_inp) 8 | self.activation = activation 9 | self.old_inp = old_inp 10 | self.old_out = old_out 11 | self.inp = inp 12 | self.out = out 13 | self.wshape = { 14 | 'biases': [len(self.out)], 15 | 'weights': [len(self.inp), len(self.out)] 16 | } 17 | 18 | @property 19 | def signature(self): 20 | sig = ['connected'] 21 | sig += self._signature[1:-2] 22 | return sig 23 | 24 | def present(self): 25 | args = self.signature 26 | self.presenter = connected_layer(*args) 27 | 28 | def recollect(self, val): 29 | w = val['weights'] 30 | b = val['biases'] 31 | if w is None: self.w = val; return 32 | w = np.take(w, self.inp, 0) 33 | w = np.take(w, self.out, 1) 34 | b = np.take(b, self.out) 35 | assert1 = w.shape == tuple(self.wshape['weights']) 36 | assert2 = b.shape == tuple(self.wshape['biases']) 37 | assert assert1 and assert2, \ 38 | 'Dimension does not match in {} recollect'.format( 39 | self._signature) 40 | 41 | self.w['weights'] = w 42 | self.w['biases'] = b 43 | 44 | 45 | 46 | class select_layer(Layer): 47 | def setup(self, inp, old, 48 | activation, inp_idx, 49 | out, keep, train): 50 | self.old = old 51 | self.keep = keep 52 | self.train = train 53 | self.inp_idx = inp_idx 54 | self.activation = activation 55 | inp_dim = inp 56 | if inp_idx is not None: 57 | inp_dim = len(inp_idx) 58 | self.inp = inp_dim 59 | self.out = out 60 | self.wshape = { 61 | 'biases': [out], 62 | 'weights': [inp_dim, out] 63 | } 64 | 65 | @property 66 | def signature(self): 67 | sig = ['connected'] 68 | sig += self._signature[1:-4] 69 | return sig 70 | 71 | def present(self): 72 | args = self.signature 73 | self.presenter = connected_layer(*args) 74 | 75 | def recollect(self, val): 76 | w = val['weights'] 77 | b = val['biases'] 78 | if w is None: self.w = val; return 79 | if self.inp_idx is not None: 80 | w = np.take(w, self.inp_idx, 0) 81 | 82 | keep_b = np.take(b, self.keep) 83 | keep_w = np.take(w, self.keep, 1) 84 | train_b = b[self.train:] 85 | train_w = w[:, self.train:] 86 | self.w['biases'] = np.concatenate( 87 | (keep_b, train_b), axis = 0) 88 | self.w['weights'] = np.concatenate( 89 | (keep_w, train_w), axis = 1) 90 | 91 | 92 | class connected_layer(Layer): 93 | def setup(self, input_size, 94 | output_size, activation): 95 | self.activation = activation 96 | self.inp = input_size 97 | self.out = output_size 98 | self.wshape = { 99 | 'biases': [self.out], 100 | 'weights': [self.inp, self.out] 101 | } 102 | 103 | def finalize(self, transpose): 104 | weights = self.w['weights'] 105 | if weights is None: return 106 | shp = self.wshape['weights'] 107 | if not transpose: 108 | weights = weights.reshape(shp[::-1]) 109 | weights = weights.transpose([1,0]) 110 | else: weights = weights.reshape(shp) 111 | self.w['weights'] = weights -------------------------------------------------------------------------------- /cfg/yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | height=416 5 | width=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.0001 15 | max_batches = 45000 16 | policy=steps 17 | steps=100,25000,35000 18 | scales=10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=32 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=64 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=128 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=64 55 | size=1 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=128 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [maxpool] 69 | size=2 70 | stride=2 71 | 72 | [convolutional] 73 | batch_normalize=1 74 | filters=256 75 | size=3 76 | stride=1 77 | pad=1 78 | activation=leaky 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=128 83 | size=1 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [convolutional] 89 | batch_normalize=1 90 | filters=256 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=leaky 95 | 96 | [maxpool] 97 | size=2 98 | stride=2 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | filters=512 103 | size=3 104 | stride=1 105 | pad=1 106 | activation=leaky 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | batch_normalize=1 126 | filters=256 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | batch_normalize=1 134 | filters=512 135 | size=3 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [maxpool] 141 | size=2 142 | stride=2 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=1024 147 | size=3 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=512 155 | size=1 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=1024 163 | size=3 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | batch_normalize=1 170 | filters=512 171 | size=1 172 | stride=1 173 | pad=1 174 | activation=leaky 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=1024 179 | size=3 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | 185 | ####### 186 | 187 | [convolutional] 188 | batch_normalize=1 189 | size=3 190 | stride=1 191 | pad=1 192 | filters=1024 193 | activation=leaky 194 | 195 | [convolutional] 196 | batch_normalize=1 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [route] 204 | layers=-9 205 | 206 | [reorg] 207 | stride=2 208 | 209 | [route] 210 | layers=-1,-3 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | size=3 215 | stride=1 216 | pad=1 217 | filters=1024 218 | activation=leaky 219 | 220 | [convolutional] 221 | size=1 222 | stride=1 223 | pad=1 224 | filters=125 225 | activation=linear 226 | 227 | [region] 228 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 229 | bias_match=1 230 | classes=20 231 | coords=4 232 | num=5 233 | softmax=1 234 | jitter=.2 235 | rescore=1 236 | 237 | object_scale=5 238 | noobject_scale=1 239 | class_scale=1 240 | coord_scale=1 241 | 242 | absolute=1 243 | thresh = .6 244 | random=0 245 | -------------------------------------------------------------------------------- /cfg/v1.1/yolo-coco.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=4 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | hue = .1 11 | saturation=.75 12 | exposure=.75 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,100000,150000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 200000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=64 23 | size=7 24 | stride=2 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=192 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=128 47 | size=1 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=256 63 | size=1 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=512 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=256 83 | size=1 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [convolutional] 89 | batch_normalize=1 90 | filters=512 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=leaky 95 | 96 | [convolutional] 97 | batch_normalize=1 98 | filters=256 99 | size=1 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | batch_normalize=1 106 | filters=512 107 | size=3 108 | stride=1 109 | pad=1 110 | activation=leaky 111 | 112 | [convolutional] 113 | batch_normalize=1 114 | filters=256 115 | size=1 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | batch_normalize=1 122 | filters=512 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [convolutional] 129 | batch_normalize=1 130 | filters=256 131 | size=1 132 | stride=1 133 | pad=1 134 | activation=leaky 135 | 136 | [convolutional] 137 | batch_normalize=1 138 | filters=512 139 | size=3 140 | stride=1 141 | pad=1 142 | activation=leaky 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=512 147 | size=1 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=1024 155 | size=3 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [maxpool] 161 | size=2 162 | stride=2 163 | 164 | [convolutional] 165 | batch_normalize=1 166 | filters=512 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | [convolutional] 173 | batch_normalize=1 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | [convolutional] 181 | batch_normalize=1 182 | filters=512 183 | size=1 184 | stride=1 185 | pad=1 186 | activation=leaky 187 | 188 | [convolutional] 189 | batch_normalize=1 190 | filters=1024 191 | size=3 192 | stride=1 193 | pad=1 194 | activation=leaky 195 | 196 | ####### 197 | 198 | [convolutional] 199 | batch_normalize=1 200 | size=3 201 | stride=1 202 | pad=1 203 | filters=1024 204 | activation=leaky 205 | 206 | [convolutional] 207 | batch_normalize=1 208 | size=3 209 | stride=2 210 | pad=1 211 | filters=1024 212 | activation=leaky 213 | 214 | [convolutional] 215 | batch_normalize=1 216 | size=3 217 | stride=1 218 | pad=1 219 | filters=1024 220 | activation=leaky 221 | 222 | [convolutional] 223 | batch_normalize=1 224 | size=3 225 | stride=1 226 | pad=1 227 | filters=1024 228 | activation=leaky 229 | 230 | [local] 231 | size=3 232 | stride=1 233 | pad=1 234 | filters=256 235 | activation=leaky 236 | 237 | [connected] 238 | output= 4655 239 | activation=linear 240 | 241 | [detection] 242 | classes=80 243 | coords=4 244 | rescore=1 245 | side=7 246 | num=3 247 | softmax=0 248 | sqrt=1 249 | jitter=.2 250 | 251 | object_scale=1 252 | noobject_scale=.5 253 | class_scale=1 254 | coord_scale=5 255 | 256 | -------------------------------------------------------------------------------- /cfg/v1.1/yolov1.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | saturation=1.5 10 | exposure=1.5 11 | hue=.1 12 | 13 | learning_rate=0.0005 14 | policy=steps 15 | steps=200,400,600,20000,30000 16 | scales=2.5,2,2,.1,.1 17 | max_batches = 40000 18 | 19 | [convolutional] 20 | batch_normalize=1 21 | filters=64 22 | size=7 23 | stride=2 24 | pad=1 25 | activation=leaky 26 | 27 | [maxpool] 28 | size=2 29 | stride=2 30 | 31 | [convolutional] 32 | batch_normalize=1 33 | filters=192 34 | size=3 35 | stride=1 36 | pad=1 37 | activation=leaky 38 | 39 | [maxpool] 40 | size=2 41 | stride=2 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=128 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=256 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=512 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | batch_normalize=1 81 | filters=256 82 | size=1 83 | stride=1 84 | pad=1 85 | activation=leaky 86 | 87 | [convolutional] 88 | batch_normalize=1 89 | filters=512 90 | size=3 91 | stride=1 92 | pad=1 93 | activation=leaky 94 | 95 | [convolutional] 96 | batch_normalize=1 97 | filters=256 98 | size=1 99 | stride=1 100 | pad=1 101 | activation=leaky 102 | 103 | [convolutional] 104 | batch_normalize=1 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [convolutional] 112 | batch_normalize=1 113 | filters=256 114 | size=1 115 | stride=1 116 | pad=1 117 | activation=leaky 118 | 119 | [convolutional] 120 | batch_normalize=1 121 | filters=512 122 | size=3 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | batch_normalize=1 129 | filters=256 130 | size=1 131 | stride=1 132 | pad=1 133 | activation=leaky 134 | 135 | [convolutional] 136 | batch_normalize=1 137 | filters=512 138 | size=3 139 | stride=1 140 | pad=1 141 | activation=leaky 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=512 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=1024 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [maxpool] 160 | size=2 161 | stride=2 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=512 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=1024 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [convolutional] 180 | batch_normalize=1 181 | filters=512 182 | size=1 183 | stride=1 184 | pad=1 185 | activation=leaky 186 | 187 | [convolutional] 188 | batch_normalize=1 189 | filters=1024 190 | size=3 191 | stride=1 192 | pad=1 193 | activation=leaky 194 | 195 | ####### 196 | 197 | [convolutional] 198 | batch_normalize=1 199 | size=3 200 | stride=1 201 | pad=1 202 | filters=1024 203 | activation=leaky 204 | 205 | [convolutional] 206 | batch_normalize=1 207 | size=3 208 | stride=2 209 | pad=1 210 | filters=1024 211 | activation=leaky 212 | 213 | [convolutional] 214 | batch_normalize=1 215 | size=3 216 | stride=1 217 | pad=1 218 | filters=1024 219 | activation=leaky 220 | 221 | [convolutional] 222 | batch_normalize=1 223 | size=3 224 | stride=1 225 | pad=1 226 | filters=1024 227 | activation=leaky 228 | 229 | [local] 230 | size=3 231 | stride=1 232 | pad=1 233 | filters=256 234 | activation=leaky 235 | 236 | [dropout] 237 | probability=.5 238 | 239 | [connected] 240 | output= 1715 241 | activation=linear 242 | 243 | [detection] 244 | classes=20 245 | coords=4 246 | rescore=1 247 | side=7 248 | num=3 249 | softmax=0 250 | sqrt=1 251 | jitter=.2 252 | 253 | object_scale=1 254 | noobject_scale=.5 255 | class_scale=1 256 | coord_scale=5 257 | 258 | -------------------------------------------------------------------------------- /darkflow/cython_utils/cy_yolo2_findboxes.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | ctypedef np.float_t DTYPE_t 5 | from libc.math cimport exp 6 | from darkflow.utils.box import BoundBox 7 | from nms cimport NMS 8 | 9 | #expit 10 | @cython.boundscheck(False) # turn off bounds-checking for entire function 11 | @cython.wraparound(False) # turn off negative index wrapping for entire function 12 | @cython.cdivision(True) 13 | cdef float expit_c(float x): 14 | cdef float y= 1/(1+exp(-x)) 15 | return y 16 | 17 | #MAX 18 | @cython.boundscheck(False) # turn off bounds-checking for entire function 19 | @cython.wraparound(False) # turn off negative index wrapping for entire function 20 | @cython.cdivision(True) 21 | cdef float max_c(float a, float b): 22 | if(a>b): 23 | return a 24 | return b 25 | 26 | """ 27 | #SOFTMAX! 28 | @cython.cdivision(True) 29 | @cython.boundscheck(False) # turn off bounds-checking for entire function 30 | @cython.wraparound(False) # turn off negative index wrapping for entire function 31 | cdef void _softmax_c(float* x, int classes): 32 | cdef: 33 | float sum = 0 34 | np.intp_t k 35 | float arr_max = 0 36 | for k in range(classes): 37 | arr_max = max(arr_max,x[k]) 38 | 39 | for k in range(classes): 40 | x[k] = exp(x[k]-arr_max) 41 | sum += x[k] 42 | 43 | for k in range(classes): 44 | x[k] = x[k]/sum 45 | """ 46 | 47 | 48 | 49 | #BOX CONSTRUCTOR 50 | @cython.cdivision(True) 51 | @cython.boundscheck(False) # turn off bounds-checking for entire function 52 | @cython.wraparound(False) # turn off negative index wrapping for entire function 53 | def box_constructor(meta,np.ndarray[float,ndim=3] net_out_in): 54 | cdef: 55 | np.intp_t H, W, _, C, B, row, col, box_loop, class_loop 56 | np.intp_t row1, col1, box_loop1,index,index2 57 | float threshold = meta['thresh'] 58 | float tempc,arr_max=0,sum=0 59 | double[:] anchors = np.asarray(meta['anchors']) 60 | list boxes = list() 61 | 62 | H, W, _ = meta['out_size'] 63 | C = meta['classes'] 64 | B = meta['num'] 65 | 66 | cdef: 67 | float[:, :, :, ::1] net_out = net_out_in.reshape([H, W, B, net_out_in.shape[2]/B]) 68 | float[:, :, :, ::1] Classes = net_out[:, :, :, 5:] 69 | float[:, :, :, ::1] Bbox_pred = net_out[:, :, :, :5] 70 | float[:, :, :, ::1] probs = np.zeros((H, W, B, C), dtype=np.float32) 71 | 72 | for row in range(H): 73 | for col in range(W): 74 | for box_loop in range(B): 75 | arr_max=0 76 | sum=0; 77 | Bbox_pred[row, col, box_loop, 4] = expit_c(Bbox_pred[row, col, box_loop, 4]) 78 | Bbox_pred[row, col, box_loop, 0] = (col + expit_c(Bbox_pred[row, col, box_loop, 0])) / W 79 | Bbox_pred[row, col, box_loop, 1] = (row + expit_c(Bbox_pred[row, col, box_loop, 1])) / H 80 | Bbox_pred[row, col, box_loop, 2] = exp(Bbox_pred[row, col, box_loop, 2]) * anchors[2 * box_loop + 0] / W 81 | Bbox_pred[row, col, box_loop, 3] = exp(Bbox_pred[row, col, box_loop, 3]) * anchors[2 * box_loop + 1] / H 82 | #SOFTMAX BLOCK, no more pointer juggling 83 | for class_loop in range(C): 84 | arr_max=max_c(arr_max,Classes[row,col,box_loop,class_loop]) 85 | 86 | for class_loop in range(C): 87 | Classes[row,col,box_loop,class_loop]=exp(Classes[row,col,box_loop,class_loop]-arr_max) 88 | sum+=Classes[row,col,box_loop,class_loop] 89 | 90 | for class_loop in range(C): 91 | tempc = Classes[row, col, box_loop, class_loop] * Bbox_pred[row, col, box_loop, 4]/sum 92 | if(tempc > threshold): 93 | probs[row, col, box_loop, class_loop] = tempc 94 | 95 | 96 | #NMS 97 | return NMS(np.ascontiguousarray(probs).reshape(H*W*B,C), np.ascontiguousarray(Bbox_pred).reshape(H*B*W,5)) 98 | -------------------------------------------------------------------------------- /cfg/yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=425 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 243 | bias_match=1 244 | classes=80 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .1 258 | random=1 259 | -------------------------------------------------------------------------------- /darkflow/net/yolo/predict.py: -------------------------------------------------------------------------------- 1 | from darkflow.utils.im_transform import imcv2_recolor, imcv2_affine_trans 2 | from darkflow.utils.box import BoundBox, box_iou, prob_compare 3 | import numpy as np 4 | import cv2 5 | import os 6 | from darkflow.cython_utils.cy_yolo_findboxes import yolo_box_constructor 7 | 8 | def _fix(obj, dims, scale, offs): 9 | for i in range(1, 5): 10 | dim = dims[(i + 1) % 2] 11 | off = offs[(i + 1) % 2] 12 | obj[i] = int(obj[i] * scale - off) 13 | obj[i] = max(min(obj[i], dim), 0) 14 | 15 | def resize_input(self, im): 16 | h, w, c = self.meta['inp_size'] 17 | imsz = cv2.resize(im, (w, h)) 18 | imsz = imsz / 255. 19 | imsz = imsz[:,:,::-1] 20 | return imsz 21 | 22 | def process_box(self, b, h, w, threshold): 23 | max_indx = np.argmax(b.probs) 24 | max_prob = b.probs[max_indx] 25 | label = self.meta['labels'][max_indx] 26 | if max_prob > threshold: 27 | left = int ((b.x - b.w/2.) * w) 28 | right = int ((b.x + b.w/2.) * w) 29 | top = int ((b.y - b.h/2.) * h) 30 | bot = int ((b.y + b.h/2.) * h) 31 | if left < 0 : left = 0 32 | if right > w - 1: right = w - 1 33 | if top < 0 : top = 0 34 | if bot > h - 1: bot = h - 1 35 | mess = '{}'.format(label) 36 | return (left, right, top, bot, mess, max_indx, max_prob) 37 | return None 38 | 39 | def findboxes(self, net_out): 40 | meta, FLAGS = self.meta, self.FLAGS 41 | threshold = FLAGS.threshold 42 | 43 | boxes = [] 44 | boxes = yolo_box_constructor(meta, net_out, threshold) 45 | 46 | return boxes 47 | 48 | def preprocess(self, im, allobj = None): 49 | """ 50 | Takes an image, return it as a numpy tensor that is readily 51 | to be fed into tfnet. If there is an accompanied annotation (allobj), 52 | meaning this preprocessing is serving the train process, then this 53 | image will be transformed with random noise to augment training data, 54 | using scale, translation, flipping and recolor. The accompanied 55 | parsed annotation (allobj) will also be modified accordingly. 56 | """ 57 | if type(im) is not np.ndarray: 58 | im = cv2.imread(im) 59 | 60 | if allobj is not None: # in training mode 61 | result = imcv2_affine_trans(im) 62 | im, dims, trans_param = result 63 | scale, offs, flip = trans_param 64 | for obj in allobj: 65 | _fix(obj, dims, scale, offs) 66 | if not flip: continue 67 | obj_1_ = obj[1] 68 | obj[1] = dims[0] - obj[3] 69 | obj[3] = dims[0] - obj_1_ 70 | im = imcv2_recolor(im) 71 | 72 | im = self.resize_input(im) 73 | if allobj is None: return im 74 | return im#, np.array(im) # for unit testing 75 | 76 | def postprocess(self, net_out, im, save = True): 77 | """ 78 | Takes net output, draw predictions, save to disk 79 | """ 80 | meta, FLAGS = self.meta, self.FLAGS 81 | threshold = FLAGS.threshold 82 | colors, labels = meta['colors'], meta['labels'] 83 | 84 | boxes = self.findboxes(net_out) 85 | 86 | if type(im) is not np.ndarray: 87 | imgcv = cv2.imread(im) 88 | else: imgcv = im 89 | 90 | h, w, _ = imgcv.shape 91 | textBuff = "[" 92 | for b in boxes: 93 | boxResults = self.process_box(b, h, w, threshold) 94 | if boxResults is None: 95 | continue 96 | left, right, top, bot, mess, max_indx, confidence = boxResults 97 | thick = int((h + w) // 300) 98 | if self.FLAGS.json: 99 | line = ('{"label": "%s",' 100 | '"confidence": %.2f,' 101 | '"topleft": {"x": %d, "y": %d},' 102 | '"bottomright": {"x": %d,"y": %d}}, \n') % \ 103 | (mess, confidence, left, top, right, bot) 104 | textBuff += line 105 | continue 106 | 107 | cv2.rectangle(imgcv, 108 | (left, top), (right, bot), 109 | self.meta['colors'][max_indx], thick) 110 | cv2.putText( 111 | imgcv, mess, (left, top - 12), 112 | 0, 1e-3 * h, self.meta['colors'][max_indx], 113 | thick // 3) 114 | 115 | 116 | if not save: return imgcv 117 | 118 | # Removing trailing comma+newline adding json list terminator. 119 | textBuff = textBuff[:-2] + "]" 120 | outfolder = os.path.join(self.FLAGS.imgdir, 'out') 121 | img_name = os.path.join(outfolder, os.path.basename(im)) 122 | if self.FLAGS.json: 123 | textFile = os.path.splitext(img_name)[0] + ".json" 124 | with open(textFile, 'w') as f: 125 | f.write(textBuff) 126 | return 127 | 128 | cv2.imwrite(img_name, imgcv) 129 | -------------------------------------------------------------------------------- /darkflow/net/ops/convolution.py: -------------------------------------------------------------------------------- 1 | import tensorflow.contrib.slim as slim 2 | from .baseop import BaseOp 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | class reorg(BaseOp): 7 | def _forward(self): 8 | inp = self.inp.out 9 | shape = inp.get_shape().as_list() 10 | _, h, w, c = shape 11 | s = self.lay.stride 12 | out = list() 13 | for i in range(int(h/s)): 14 | row_i = list() 15 | for j in range(int(w/s)): 16 | si, sj = s * i, s * j 17 | boxij = inp[:, si: si+s, sj: sj+s,:] 18 | flatij = tf.reshape(boxij, [-1,1,1,c*s*s]) 19 | row_i += [flatij] 20 | out += [tf.concat(row_i, 2)] 21 | 22 | self.out = tf.concat(out, 1) 23 | 24 | def forward(self): 25 | inp = self.inp.out 26 | s = self.lay.stride 27 | self.out = tf.extract_image_patches( 28 | inp, [1,s,s,1], [1,s,s,1], [1,1,1,1], 'VALID') 29 | 30 | def speak(self): 31 | args = [self.lay.stride] * 2 32 | msg = 'local flatten {}x{}' 33 | return msg.format(*args) 34 | 35 | 36 | class local(BaseOp): 37 | def forward(self): 38 | pad = [[self.lay.pad, self.lay.pad]] * 2; 39 | temp = tf.pad(self.inp.out, [[0, 0]] + pad + [[0, 0]]) 40 | 41 | k = self.lay.w['kernels'] 42 | ksz = self.lay.ksize 43 | half = int(ksz / 2) 44 | out = list() 45 | for i in range(self.lay.h_out): 46 | row_i = list() 47 | for j in range(self.lay.w_out): 48 | kij = k[i * self.lay.w_out + j] 49 | i_, j_ = i + 1 - half, j + 1 - half 50 | tij = temp[:, i_ : i_ + ksz, j_ : j_ + ksz,:] 51 | row_i.append( 52 | tf.nn.conv2d(tij, kij, 53 | padding = 'VALID', 54 | strides = [1] * 4)) 55 | out += [tf.concat(row_i, 2)] 56 | 57 | self.out = tf.concat(out, 1) 58 | 59 | def speak(self): 60 | l = self.lay 61 | args = [l.ksize] * 2 + [l.pad] + [l.stride] 62 | args += [l.activation] 63 | msg = 'loca {}x{}p{}_{} {}'.format(*args) 64 | return msg 65 | 66 | class convolutional(BaseOp): 67 | def forward(self): 68 | pad = [[self.lay.pad, self.lay.pad]] * 2; 69 | temp = tf.pad(self.inp.out, [[0, 0]] + pad + [[0, 0]]) 70 | temp = tf.nn.conv2d(temp, self.lay.w['kernel'], padding = 'VALID', 71 | name = self.scope, strides = [1] + [self.lay.stride] * 2 + [1]) 72 | if self.lay.batch_norm: 73 | temp = self.batchnorm(self.lay, temp) 74 | self.out = tf.nn.bias_add(temp, self.lay.w['biases']) 75 | 76 | def batchnorm(self, layer, inp): 77 | if not self.var: 78 | temp = (inp - layer.w['moving_mean']) 79 | temp /= (np.sqrt(layer.w['moving_variance']) + 1e-5) 80 | temp *= layer.w['gamma'] 81 | return temp 82 | else: 83 | args = dict({ 84 | 'center' : False, 'scale' : True, 85 | 'epsilon': 1e-5, 'scope' : self.scope, 86 | 'updates_collections' : None, 87 | 'is_training': layer.h['is_training'], 88 | 'param_initializers': layer.w 89 | }) 90 | return slim.batch_norm(inp, **args) 91 | 92 | def speak(self): 93 | l = self.lay 94 | args = [l.ksize] * 2 + [l.pad] + [l.stride] 95 | args += [l.batch_norm * '+bnorm'] 96 | args += [l.activation] 97 | msg = 'conv {}x{}p{}_{} {} {}'.format(*args) 98 | return msg 99 | 100 | class conv_select(convolutional): 101 | def speak(self): 102 | l = self.lay 103 | args = [l.ksize] * 2 + [l.pad] + [l.stride] 104 | args += [l.batch_norm * '+bnorm'] 105 | args += [l.activation] 106 | msg = 'sele {}x{}p{}_{} {} {}'.format(*args) 107 | return msg 108 | 109 | class conv_extract(convolutional): 110 | def speak(self): 111 | l = self.lay 112 | args = [l.ksize] * 2 + [l.pad] + [l.stride] 113 | args += [l.batch_norm * '+bnorm'] 114 | args += [l.activation] 115 | msg = 'extr {}x{}p{}_{} {} {}'.format(*args) 116 | return msg -------------------------------------------------------------------------------- /darkflow/net/yolov2/train.py: -------------------------------------------------------------------------------- 1 | import tensorflow.contrib.slim as slim 2 | import pickle 3 | import tensorflow as tf 4 | from ..yolo.misc import show 5 | import numpy as np 6 | import os 7 | import math 8 | 9 | def expit_tensor(x): 10 | return 1. / (1. + tf.exp(-x)) 11 | 12 | def loss(self, net_out): 13 | """ 14 | Takes net.out and placeholders value 15 | returned in batch() func above, 16 | to build train_op and loss 17 | """ 18 | # meta 19 | m = self.meta 20 | sprob = float(m['class_scale']) 21 | sconf = float(m['object_scale']) 22 | snoob = float(m['noobject_scale']) 23 | scoor = float(m['coord_scale']) 24 | H, W, _ = m['out_size'] 25 | B, C = m['num'], m['classes'] 26 | HW = H * W # number of grid cells 27 | anchors = m['anchors'] 28 | 29 | print('{} loss hyper-parameters:'.format(m['model'])) 30 | print('\tH = {}'.format(H)) 31 | print('\tW = {}'.format(W)) 32 | print('\tbox = {}'.format(m['num'])) 33 | print('\tclasses = {}'.format(m['classes'])) 34 | print('\tscales = {}'.format([sprob, sconf, snoob, scoor])) 35 | 36 | size1 = [None, HW, B, C] 37 | size2 = [None, HW, B] 38 | 39 | # return the below placeholders 40 | _probs = tf.placeholder(tf.float32, size1) 41 | _confs = tf.placeholder(tf.float32, size2) 42 | _coord = tf.placeholder(tf.float32, size2 + [4]) 43 | # weights term for L2 loss 44 | _proid = tf.placeholder(tf.float32, size1) 45 | # material calculating IOU 46 | _areas = tf.placeholder(tf.float32, size2) 47 | _upleft = tf.placeholder(tf.float32, size2 + [2]) 48 | _botright = tf.placeholder(tf.float32, size2 + [2]) 49 | 50 | self.placeholders = { 51 | 'probs':_probs, 'confs':_confs, 'coord':_coord, 'proid':_proid, 52 | 'areas':_areas, 'upleft':_upleft, 'botright':_botright 53 | } 54 | 55 | # Extract the coordinate prediction from net.out 56 | net_out_reshape = tf.reshape(net_out, [-1, H, W, B, (4 + 1 + C)]) 57 | coords = net_out_reshape[:, :, :, :, :4] 58 | coords = tf.reshape(coords, [-1, H*W, B, 4]) 59 | adjusted_coords_xy = expit_tensor(coords[:,:,:,0:2]) 60 | adjusted_coords_wh = tf.sqrt(tf.exp(coords[:,:,:,2:4]) * np.reshape(anchors, [1, 1, B, 2]) / np.reshape([W, H], [1, 1, 1, 2])) 61 | coords = tf.concat([adjusted_coords_xy, adjusted_coords_wh], 3) 62 | 63 | adjusted_c = expit_tensor(net_out_reshape[:, :, :, :, 4]) 64 | adjusted_c = tf.reshape(adjusted_c, [-1, H*W, B, 1]) 65 | 66 | adjusted_prob = tf.nn.softmax(net_out_reshape[:, :, :, :, 5:]) 67 | adjusted_prob = tf.reshape(adjusted_prob, [-1, H*W, B, C]) 68 | 69 | adjusted_net_out = tf.concat([adjusted_coords_xy, adjusted_coords_wh, adjusted_c, adjusted_prob], 3) 70 | 71 | wh = tf.pow(coords[:,:,:,2:4], 2) * np.reshape([W, H], [1, 1, 1, 2]) 72 | area_pred = wh[:,:,:,0] * wh[:,:,:,1] 73 | centers = coords[:,:,:,0:2] 74 | floor = centers - (wh * .5) 75 | ceil = centers + (wh * .5) 76 | 77 | # calculate the intersection areas 78 | intersect_upleft = tf.maximum(floor, _upleft) 79 | intersect_botright = tf.minimum(ceil , _botright) 80 | intersect_wh = intersect_botright - intersect_upleft 81 | intersect_wh = tf.maximum(intersect_wh, 0.0) 82 | intersect = tf.multiply(intersect_wh[:,:,:,0], intersect_wh[:,:,:,1]) 83 | 84 | # calculate the best IOU, set 0.0 confidence for worse boxes 85 | iou = tf.truediv(intersect, _areas + area_pred - intersect) 86 | best_box = tf.equal(iou, tf.reduce_max(iou, [2], True)) 87 | best_box = tf.to_float(best_box) 88 | confs = tf.multiply(best_box, _confs) 89 | 90 | # take care of the weight terms 91 | conid = snoob * (1. - confs) + sconf * confs 92 | weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3) 93 | cooid = scoor * weight_coo 94 | weight_pro = tf.concat(C * [tf.expand_dims(confs, -1)], 3) 95 | proid = sprob * weight_pro 96 | 97 | self.fetch += [_probs, confs, conid, cooid, proid] 98 | true = tf.concat([_coord, tf.expand_dims(confs, 3), _probs ], 3) 99 | wght = tf.concat([cooid, tf.expand_dims(conid, 3), proid ], 3) 100 | 101 | print('Building {} loss'.format(m['model'])) 102 | loss = tf.pow(adjusted_net_out - true, 2) 103 | loss = tf.multiply(loss, wght) 104 | loss = tf.reshape(loss, [-1, H*W*B*(4 + 1 + C)]) 105 | loss = tf.reduce_sum(loss, 1) 106 | self.loss = .5 * tf.reduce_mean(loss) 107 | tf.summary.scalar('{} loss'.format(m['model']), self.loss) -------------------------------------------------------------------------------- /darkflow/net/yolo/data.py: -------------------------------------------------------------------------------- 1 | from darkflow.utils.pascal_voc_clean_xml import pascal_voc_clean_xml 2 | from numpy.random import permutation as perm 3 | from .predict import preprocess 4 | # from .misc import show 5 | from copy import deepcopy 6 | import pickle 7 | import numpy as np 8 | import os 9 | 10 | def parse(self, exclusive = False): 11 | meta = self.meta 12 | ext = '.parsed' 13 | ann = self.FLAGS.annotation 14 | if not os.path.isdir(ann): 15 | msg = 'Annotation directory not found {} .' 16 | exit('Error: {}'.format(msg.format(ann))) 17 | print('\n{} parsing {}'.format(meta['model'], ann)) 18 | dumps = pascal_voc_clean_xml(ann, meta['labels'], exclusive) 19 | return dumps 20 | 21 | 22 | def _batch(self, chunk): 23 | """ 24 | Takes a chunk of parsed annotations 25 | returns value for placeholders of net's 26 | input & loss layer correspond to this chunk 27 | """ 28 | meta = self.meta 29 | S, B = meta['side'], meta['num'] 30 | C, labels = meta['classes'], meta['labels'] 31 | 32 | # preprocess 33 | jpg = chunk[0]; w, h, allobj_ = chunk[1] 34 | allobj = deepcopy(allobj_) 35 | path = os.path.join(self.FLAGS.dataset, jpg) 36 | img = self.preprocess(path, allobj) 37 | 38 | # Calculate regression target 39 | cellx = 1. * w / S 40 | celly = 1. * h / S 41 | for obj in allobj: 42 | centerx = .5*(obj[1]+obj[3]) #xmin, xmax 43 | centery = .5*(obj[2]+obj[4]) #ymin, ymax 44 | cx = centerx / cellx 45 | cy = centery / celly 46 | if cx >= S or cy >= S: return None, None 47 | obj[3] = float(obj[3]-obj[1]) / w 48 | obj[4] = float(obj[4]-obj[2]) / h 49 | obj[3] = np.sqrt(obj[3]) 50 | obj[4] = np.sqrt(obj[4]) 51 | obj[1] = cx - np.floor(cx) # centerx 52 | obj[2] = cy - np.floor(cy) # centery 53 | obj += [int(np.floor(cy) * S + np.floor(cx))] 54 | 55 | # show(im, allobj, S, w, h, cellx, celly) # unit test 56 | 57 | # Calculate placeholders' values 58 | probs = np.zeros([S*S,C]) 59 | confs = np.zeros([S*S,B]) 60 | coord = np.zeros([S*S,B,4]) 61 | proid = np.zeros([S*S,C]) 62 | prear = np.zeros([S*S,4]) 63 | for obj in allobj: 64 | probs[obj[5], :] = [0.] * C 65 | probs[obj[5], labels.index(obj[0])] = 1. 66 | proid[obj[5], :] = [1] * C 67 | coord[obj[5], :, :] = [obj[1:5]] * B 68 | prear[obj[5],0] = obj[1] - obj[3]**2 * .5 * S # xleft 69 | prear[obj[5],1] = obj[2] - obj[4]**2 * .5 * S # yup 70 | prear[obj[5],2] = obj[1] + obj[3]**2 * .5 * S # xright 71 | prear[obj[5],3] = obj[2] + obj[4]**2 * .5 * S # ybot 72 | confs[obj[5], :] = [1.] * B 73 | 74 | # Finalise the placeholders' values 75 | upleft = np.expand_dims(prear[:,0:2], 1) 76 | botright = np.expand_dims(prear[:,2:4], 1) 77 | wh = botright - upleft; 78 | area = wh[:,:,0] * wh[:,:,1] 79 | upleft = np.concatenate([upleft] * B, 1) 80 | botright = np.concatenate([botright] * B, 1) 81 | areas = np.concatenate([area] * B, 1) 82 | 83 | # value for placeholder at input layer 84 | inp_feed_val = img 85 | # value for placeholder at loss layer 86 | loss_feed_val = { 87 | 'probs': probs, 'confs': confs, 88 | 'coord': coord, 'proid': proid, 89 | 'areas': areas, 'upleft': upleft, 90 | 'botright': botright 91 | } 92 | 93 | return inp_feed_val, loss_feed_val 94 | 95 | def shuffle(self): 96 | batch = self.FLAGS.batch 97 | data = self.parse() 98 | size = len(data) 99 | 100 | print('Dataset of {} instance(s)'.format(size)) 101 | if batch > size: self.FLAGS.batch = batch = size 102 | batch_per_epoch = int(size / batch) 103 | 104 | for i in range(self.FLAGS.epoch): 105 | shuffle_idx = perm(np.arange(size)) 106 | for b in range(batch_per_epoch): 107 | # yield these 108 | x_batch = list() 109 | feed_batch = dict() 110 | 111 | for j in range(b*batch, b*batch+batch): 112 | train_instance = data[shuffle_idx[j]] 113 | inp, new_feed = self._batch(train_instance) 114 | 115 | if inp is None: continue 116 | x_batch += [np.expand_dims(inp, 0)] 117 | 118 | for key in new_feed: 119 | new = new_feed[key] 120 | old_feed = feed_batch.get(key, 121 | np.zeros((0,) + new.shape)) 122 | feed_batch[key] = np.concatenate([ 123 | old_feed, [new] 124 | ]) 125 | 126 | x_batch = np.concatenate(x_batch, 0) 127 | yield x_batch, feed_batch 128 | 129 | print('Finish {} epoch(es)'.format(i + 1)) 130 | 131 | -------------------------------------------------------------------------------- /darkflow/net/yolo/misc.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | import cv2 4 | import os 5 | 6 | labels20 = ["aeroplane", "bicycle", "bird", "boat", "bottle", 7 | "bus", "car", "cat", "chair", "cow", "diningtable", "dog", 8 | "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", 9 | "train", "tvmonitor"] 10 | 11 | # 8, 14, 15, 19 12 | 13 | voc_models = ['yolo-full', 'yolo-tiny', 'yolo-small', # <- v1 14 | 'yolov1', 'tiny-yolov1', # <- v1.1 15 | 'tiny-yolo-voc', 'yolo-voc'] # <- v2 16 | 17 | coco_models = ['tiny-coco', 'yolo-coco', # <- v1.1 18 | 'yolo', 'tiny-yolo'] # <- v2 19 | 20 | coco_names = 'coco.names' 21 | nine_names = '9k.names' 22 | 23 | def labels(meta, FLAGS): 24 | model = os.path.basename(meta['name']) 25 | if model in voc_models: 26 | print("Model has a VOC model name, loading VOC labels.") 27 | meta['labels'] = labels20 28 | else: 29 | file = 'labels.txt' 30 | if model in coco_models: 31 | print("Model has a coco model name, loading coco labels.") 32 | file = os.path.join(FLAGS.config, coco_names) 33 | elif model == 'yolo9000': 34 | print("Model has name yolo9000, loading yolo9000 labels.") 35 | file = os.path.join(FLAGS.config, nine_names) 36 | with open(file, 'r') as f: 37 | meta['labels'] = list() 38 | labs = [l.strip() for l in f.readlines()] 39 | for lab in labs: 40 | if lab == '----': break 41 | meta['labels'] += [lab] 42 | if len(meta['labels']) == 0: 43 | meta['labels'] = labels20 44 | 45 | def is_inp(self, name): 46 | return name[-4:] in ['.jpg','.JPG', '.jpeg', '.JPEG', '.png', '.PNG'] 47 | 48 | def show(im, allobj, S, w, h, cellx, celly): 49 | for obj in allobj: 50 | a = obj[5] % S 51 | b = obj[5] // S 52 | cx = a + obj[1] 53 | cy = b + obj[2] 54 | centerx = cx * cellx 55 | centery = cy * celly 56 | ww = obj[3]**2 * w 57 | hh = obj[4]**2 * h 58 | cv2.rectangle(im, 59 | (int(centerx - ww/2), int(centery - hh/2)), 60 | (int(centerx + ww/2), int(centery + hh/2)), 61 | (0,0,255), 2) 62 | cv2.imshow("result", im) 63 | cv2.waitKey() 64 | cv2.destroyAllWindows() 65 | 66 | def show2(im, allobj): 67 | for obj in allobj: 68 | cv2.rectangle(im, 69 | (obj[1], obj[2]), 70 | (obj[3], obj[4]), 71 | (0,0,255),2) 72 | cv2.imshow('result', im) 73 | cv2.waitKey() 74 | cv2.destroyAllWindows() 75 | 76 | 77 | _MVA = .05 78 | 79 | def profile(self, net): 80 | pass 81 | # data = self.parse(exclusive = True) 82 | # size = len(data); batch = self.FLAGS.batch 83 | # all_inp_ = [x[0] for x in data] 84 | # net.say('Will cycle through {} examples {} times'.format( 85 | # len(all_inp_), net.FLAGS.epoch)) 86 | 87 | # fetch = list(); mvave = list(); names = list(); 88 | # this = net.top 89 | # conv_lay = ['convolutional', 'connected', 'local', 'conv-select'] 90 | # while this.inp is not None: 91 | # if this.lay.type in conv_lay: 92 | # fetch = [this.out] + fetch 93 | # names = [this.lay.signature] + names 94 | # mvave = [None] + mvave 95 | # this = this.inp 96 | # print(names) 97 | 98 | # total = int(); allofthem = len(all_inp_) * net.FLAGS.epoch 99 | # batch = min(net.FLAGS.batch, len(all_inp_)) 100 | # for count in range(net.FLAGS.epoch): 101 | # net.say('EPOCH {}'.format(count)) 102 | # for j in range(len(all_inp_)/batch): 103 | # inp_feed = list(); new_all = list() 104 | # all_inp = all_inp_[j*batch: (j*batch+batch)] 105 | # for inp in all_inp: 106 | # new_all += [inp] 107 | # this_inp = os.path.join(net.FLAGS.dataset, inp) 108 | # this_inp = net.framework.preprocess(this_inp) 109 | # expanded = np.expand_dims(this_inp, 0) 110 | # inp_feed.append(expanded) 111 | # all_inp = new_all 112 | # feed_dict = {net.inp : np.concatenate(inp_feed, 0)} 113 | # out = net.sess.run(fetch, feed_dict) 114 | 115 | # for i, o in enumerate(out): 116 | # oi = out[i]; 117 | # dim = len(oi.shape) - 1 118 | # ai = mvave[i]; 119 | # mi = np.mean(oi, tuple(range(dim))) 120 | # vi = np.var(oi, tuple(range(dim))) 121 | # if ai is None: mvave[i] = [mi, vi] 122 | # elif 'banana ninja yada yada': 123 | # ai[0] = (1 - _MVA) * ai[0] + _MVA * mi 124 | # ai[1] = (1 - _MVA) * ai[1] + _MVA * vi 125 | # total += len(inp_feed) 126 | # net.say('{} / {} = {}%'.format( 127 | # total, allofthem, 100. * total / allofthem)) 128 | 129 | # with open('profile', 'wb') as f: 130 | # pickle.dump([mvave], f, protocol = -1) 131 | -------------------------------------------------------------------------------- /darkflow/net/flow.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | import tensorflow as tf 5 | import pickle 6 | 7 | train_stats = ( 8 | 'Training statistics: \n' 9 | '\tLearning rate : {}\n' 10 | '\tBatch size : {}\n' 11 | '\tEpoch number : {}\n' 12 | '\tBackup every : {}' 13 | ) 14 | 15 | def _save_ckpt(self, step, loss_profile): 16 | file = '{}-{}{}' 17 | model = self.meta['name'] 18 | 19 | profile = file.format(model, step, '.profile') 20 | profile = os.path.join(self.FLAGS.backup, profile) 21 | with open(profile, 'wb') as profile_ckpt: 22 | pickle.dump(loss_profile, profile_ckpt) 23 | 24 | ckpt = file.format(model, step, '') 25 | ckpt = os.path.join(self.FLAGS.backup, ckpt) 26 | self.say('Checkpoint at step {}'.format(step)) 27 | self.saver.save(self.sess, ckpt) 28 | 29 | 30 | def train(self): 31 | loss_ph = self.framework.placeholders 32 | loss_mva = None; profile = list() 33 | 34 | batches = self.framework.shuffle() 35 | loss_op = self.framework.loss 36 | 37 | for i, (x_batch, datum) in enumerate(batches): 38 | if not i: self.say(train_stats.format( 39 | self.FLAGS.lr, self.FLAGS.batch, 40 | self.FLAGS.epoch, self.FLAGS.save 41 | )) 42 | 43 | feed_dict = { 44 | loss_ph[key]: datum[key] 45 | for key in loss_ph } 46 | feed_dict[self.inp] = x_batch 47 | feed_dict.update(self.feed) 48 | 49 | fetches = [self.train_op, loss_op, self.summary_op] 50 | fetched = self.sess.run(fetches, feed_dict) 51 | loss = fetched[1] 52 | 53 | if loss_mva is None: loss_mva = loss 54 | loss_mva = .9 * loss_mva + .1 * loss 55 | step_now = self.FLAGS.load + i + 1 56 | 57 | self.writer.add_summary(fetched[2], step_now) 58 | 59 | form = 'step {} - loss {} - moving ave loss {}' 60 | self.say(form.format(step_now, loss, loss_mva)) 61 | profile += [(loss, loss_mva)] 62 | 63 | ckpt = (i+1) % (self.FLAGS.save // self.FLAGS.batch) 64 | args = [step_now, profile] 65 | if not ckpt: _save_ckpt(self, *args) 66 | 67 | if ckpt: _save_ckpt(self, *args) 68 | 69 | def return_predict(self, im): 70 | assert isinstance(im, np.ndarray), \ 71 | 'Image is not a np.ndarray' 72 | h, w, _ = im.shape 73 | im = self.framework.resize_input(im) 74 | this_inp = np.expand_dims(im, 0) 75 | feed_dict = {self.inp : this_inp} 76 | 77 | out = self.sess.run(self.out, feed_dict)[0] 78 | boxes = self.framework.findboxes(out) 79 | threshold = self.FLAGS.threshold 80 | boxesInfo = list() 81 | for box in boxes: 82 | tmpBox = self.framework.process_box(box, h, w, threshold) 83 | if tmpBox is None: 84 | continue 85 | boxesInfo.append({ 86 | "label": tmpBox[4], 87 | "confidence": tmpBox[6], 88 | "topleft": { 89 | "x": tmpBox[0], 90 | "y": tmpBox[2]}, 91 | "bottomright": { 92 | "x": tmpBox[1], 93 | "y": tmpBox[3]} 94 | }) 95 | return boxesInfo 96 | 97 | import math 98 | 99 | def predict(self): 100 | inp_path = self.FLAGS.imgdir 101 | all_inps = os.listdir(inp_path) 102 | all_inps = [i for i in all_inps if self.framework.is_inp(i)] 103 | if not all_inps: 104 | msg = 'Failed to find any images in {} .' 105 | exit('Error: {}'.format(msg.format(inp_path))) 106 | 107 | batch = min(self.FLAGS.batch, len(all_inps)) 108 | 109 | # predict in batches 110 | n_batch = int(math.ceil(len(all_inps) / batch)) 111 | for j in range(n_batch): 112 | from_idx = j * batch 113 | to_idx = min(from_idx + batch, len(all_inps)) 114 | 115 | # collect images input in the batch 116 | inp_feed = list(); new_all = list() 117 | this_batch = all_inps[from_idx:to_idx] 118 | for inp in this_batch: 119 | new_all += [inp] 120 | this_inp = os.path.join(inp_path, inp) 121 | this_inp = self.framework.preprocess(this_inp) 122 | expanded = np.expand_dims(this_inp, 0) 123 | inp_feed.append(expanded) 124 | this_batch = new_all 125 | 126 | # Feed to the net 127 | feed_dict = {self.inp : np.concatenate(inp_feed, 0)} 128 | self.say('Forwarding {} inputs ...'.format(len(inp_feed))) 129 | start = time.time() 130 | out = self.sess.run(self.out, feed_dict) 131 | stop = time.time(); last = stop - start 132 | self.say('Total time = {}s / {} inps = {} ips'.format( 133 | last, len(inp_feed), len(inp_feed) / last)) 134 | 135 | # Post processing 136 | self.say('Post processing {} inputs ...'.format(len(inp_feed))) 137 | start = time.time() 138 | for i, prediction in enumerate(out): 139 | self.framework.postprocess(prediction, 140 | os.path.join(inp_path, this_batch[i])) 141 | stop = time.time(); last = stop - start 142 | 143 | # Timing 144 | self.say('Total time = {}s / {} inps = {} ips'.format( 145 | last, len(inp_feed), len(inp_feed) / last)) -------------------------------------------------------------------------------- /darkflow/cython_utils/nms.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | from libc.math cimport exp 5 | from darkflow.utils.box import BoundBox 6 | 7 | 8 | 9 | #OVERLAP 10 | @cython.boundscheck(False) # turn off bounds-checking for entire function 11 | @cython.wraparound(False) # turn off negative index wrapping for entire function 12 | @cython.cdivision(True) 13 | cdef float overlap_c(float x1, float w1 , float x2 , float w2): 14 | cdef: 15 | float l1,l2,left,right 16 | l1 = x1 - w1 /2. 17 | l2 = x2 - w2 /2. 18 | left = max(l1,l2) 19 | r1 = x1 + w1 /2. 20 | r2 = x2 + w2 /2. 21 | right = min(r1, r2) 22 | return right - left; 23 | 24 | #BOX INTERSECTION 25 | @cython.boundscheck(False) # turn off bounds-checking for entire function 26 | @cython.wraparound(False) # turn off negative index wrapping for entire function 27 | @cython.cdivision(True) 28 | cdef float box_intersection_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh): 29 | cdef: 30 | float w,h,area 31 | w = overlap_c(ax, aw, bx, bw) 32 | h = overlap_c(ay, ah, by, bh) 33 | if w < 0 or h < 0: return 0 34 | area = w * h 35 | return area 36 | 37 | #BOX UNION 38 | @cython.boundscheck(False) # turn off bounds-checking for entire function 39 | @cython.wraparound(False) # turn off negative index wrapping for entire function 40 | @cython.cdivision(True) 41 | cdef float box_union_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh): 42 | cdef: 43 | float i,u 44 | i = box_intersection_c(ax, ay, aw, ah, bx, by, bw, bh) 45 | u = aw * ah + bw * bh -i 46 | return u 47 | 48 | 49 | #BOX IOU 50 | @cython.boundscheck(False) # turn off bounds-checking for entire function 51 | @cython.wraparound(False) # turn off negative index wrapping for entire function 52 | @cython.cdivision(True) 53 | cdef float box_iou_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh): 54 | return box_intersection_c(ax, ay, aw, ah, bx, by, bw, bh) / box_union_c(ax, ay, aw, ah, bx, by, bw, bh); 55 | 56 | 57 | 58 | 59 | #NMS 60 | @cython.boundscheck(False) # turn off bounds-checking for entire function 61 | @cython.wraparound(False) # turn off negative index wrapping for entire function 62 | @cython.cdivision(True) 63 | cdef NMS(float[:, ::1] final_probs , float[:, ::1] final_bbox): 64 | cdef list boxes = list() 65 | cdef: 66 | np.intp_t pred_length,class_length,class_loop,index,index2 67 | 68 | 69 | pred_length = final_bbox.shape[0] 70 | class_length = final_probs.shape[1] 71 | for class_loop in range(class_length): 72 | for index in range(pred_length): 73 | if final_probs[index,class_loop] == 0: continue 74 | for index2 in range(index+1,pred_length): 75 | if final_probs[index2,class_loop] == 0: continue 76 | if index==index2 : continue 77 | if box_iou_c(final_bbox[index,0],final_bbox[index,1],final_bbox[index,2],final_bbox[index,3],final_bbox[index2,0],final_bbox[index2,1],final_bbox[index2,2],final_bbox[index2,3]) >= 0.4: 78 | if final_probs[index2,class_loop] > final_probs[index, class_loop] : 79 | final_probs[index, class_loop] =0 80 | break 81 | final_probs[index2,class_loop]=0 82 | bb=BoundBox(class_length) 83 | bb.x = final_bbox[index, 0] 84 | bb.y = final_bbox[index, 1] 85 | bb.w = final_bbox[index, 2] 86 | bb.h = final_bbox[index, 3] 87 | bb.c = final_bbox[index, 4] 88 | bb.probs = np.asarray(final_probs[index,:]) 89 | boxes.append(bb) 90 | 91 | return boxes 92 | 93 | # cdef NMS(float[:, ::1] final_probs , float[:, ::1] final_bbox): 94 | # cdef list boxes = list() 95 | # cdef: 96 | # np.intp_t pred_length,class_length,class_loop,index,index2, i, j 97 | 98 | 99 | # pred_length = final_bbox.shape[0] 100 | # class_length = final_probs.shape[1] 101 | 102 | # for class_loop in range(class_length): 103 | # order = np.argsort(final_probs[:,class_loop])[::-1] 104 | # # First box 105 | # for i in range(pred_length): 106 | # index = order[i] 107 | # if final_probs[index, class_loop] == 0.: 108 | # continue 109 | # # Second box 110 | # for j in range(i+1, pred_length): 111 | # index2 = order[j] 112 | # if box_iou_c( 113 | # final_bbox[index,0],final_bbox[index,1], 114 | # final_bbox[index,2],final_bbox[index,3], 115 | # final_bbox[index2,0],final_bbox[index2,1], 116 | # final_bbox[index2,2],final_bbox[index2,3]) >= 0.4: 117 | # final_probs[index2, class_loop] = 0. 118 | 119 | # bb = BoundBox(class_length) 120 | # bb.x = final_bbox[index, 0] 121 | # bb.y = final_bbox[index, 1] 122 | # bb.w = final_bbox[index, 2] 123 | # bb.h = final_bbox[index, 3] 124 | # bb.c = final_bbox[index, 4] 125 | # bb.probs = np.asarray(final_probs[index,:]) 126 | # boxes.append(bb) 127 | 128 | # return boxes -------------------------------------------------------------------------------- /darkflow/dark/convolution.py: -------------------------------------------------------------------------------- 1 | from .layer import Layer 2 | import numpy as np 3 | 4 | class local_layer(Layer): 5 | def setup(self, ksize, c, n, stride, 6 | pad, w_, h_, activation): 7 | self.pad = pad * int(ksize / 2) 8 | self.activation = activation 9 | self.stride = stride 10 | self.ksize = ksize 11 | self.h_out = h_ 12 | self.w_out = w_ 13 | 14 | self.dnshape = [h_ * w_, n, c, ksize, ksize] 15 | self.wshape = dict({ 16 | 'biases': [h_ * w_ * n], 17 | 'kernels': [h_ * w_, ksize, ksize, c, n] 18 | }) 19 | 20 | def finalize(self, _): 21 | weights = self.w['kernels'] 22 | if weights is None: return 23 | weights = weights.reshape(self.dnshape) 24 | weights = weights.transpose([0,3,4,2,1]) 25 | self.w['kernels'] = weights 26 | 27 | class conv_extract_layer(Layer): 28 | def setup(self, ksize, c, n, stride, 29 | pad, batch_norm, activation, 30 | inp, out): 31 | if inp is None: inp = range(c) 32 | self.activation = activation 33 | self.batch_norm = batch_norm 34 | self.stride = stride 35 | self.ksize = ksize 36 | self.pad = pad 37 | self.inp = inp 38 | self.out = out 39 | self.wshape = dict({ 40 | 'biases': [len(out)], 41 | 'kernel': [ksize, ksize, len(inp), len(out)] 42 | }) 43 | 44 | @property 45 | def signature(self): 46 | sig = ['convolutional'] 47 | sig += self._signature[1:-2] 48 | return sig 49 | 50 | def present(self): 51 | args = self.signature 52 | self.presenter = convolutional_layer(*args) 53 | 54 | def recollect(self, w): 55 | if w is None: 56 | self.w = w 57 | return 58 | k = w['kernel'] 59 | b = w['biases'] 60 | k = np.take(k, self.inp, 2) 61 | k = np.take(k, self.out, 3) 62 | b = np.take(b, self.out) 63 | assert1 = k.shape == tuple(self.wshape['kernel']) 64 | assert2 = b.shape == tuple(self.wshape['biases']) 65 | assert assert1 and assert2, \ 66 | 'Dimension not matching in {} recollect'.format( 67 | self._signature) 68 | self.w['kernel'] = k 69 | self.w['biases'] = b 70 | 71 | 72 | class conv_select_layer(Layer): 73 | def setup(self, ksize, c, n, stride, 74 | pad, batch_norm, activation, 75 | keep_idx, real_n): 76 | self.batch_norm = bool(batch_norm) 77 | self.activation = activation 78 | self.keep_idx = keep_idx 79 | self.stride = stride 80 | self.ksize = ksize 81 | self.pad = pad 82 | self.wshape = dict({ 83 | 'biases': [real_n], 84 | 'kernel': [ksize, ksize, c, real_n] 85 | }) 86 | if self.batch_norm: 87 | self.wshape.update({ 88 | 'moving_variance' : [real_n], 89 | 'moving_mean': [real_n], 90 | 'gamma' : [real_n] 91 | }) 92 | self.h['is_training'] = { 93 | 'shape': (), 94 | 'feed': True, 95 | 'dfault': False 96 | } 97 | 98 | @property 99 | def signature(self): 100 | sig = ['convolutional'] 101 | sig += self._signature[1:-2] 102 | return sig 103 | 104 | def present(self): 105 | args = self.signature 106 | self.presenter = convolutional_layer(*args) 107 | 108 | def recollect(self, w): 109 | if w is None: 110 | self.w = w 111 | return 112 | idx = self.keep_idx 113 | k = w['kernel'] 114 | b = w['biases'] 115 | self.w['kernel'] = np.take(k, idx, 3) 116 | self.w['biases'] = np.take(b, idx) 117 | if self.batch_norm: 118 | m = w['moving_mean'] 119 | v = w['moving_variance'] 120 | g = w['gamma'] 121 | self.w['moving_mean'] = np.take(m, idx) 122 | self.w['moving_variance'] = np.take(v, idx) 123 | self.w['gamma'] = np.take(g, idx) 124 | 125 | class convolutional_layer(Layer): 126 | def setup(self, ksize, c, n, stride, 127 | pad, batch_norm, activation): 128 | self.batch_norm = bool(batch_norm) 129 | self.activation = activation 130 | self.stride = stride 131 | self.ksize = ksize 132 | self.pad = pad 133 | self.dnshape = [n, c, ksize, ksize] # darknet shape 134 | self.wshape = dict({ 135 | 'biases': [n], 136 | 'kernel': [ksize, ksize, c, n] 137 | }) 138 | if self.batch_norm: 139 | self.wshape.update({ 140 | 'moving_variance' : [n], 141 | 'moving_mean': [n], 142 | 'gamma' : [n] 143 | }) 144 | self.h['is_training'] = { 145 | 'feed': True, 146 | 'dfault': False, 147 | 'shape': () 148 | } 149 | 150 | def finalize(self, _): 151 | """deal with darknet""" 152 | kernel = self.w['kernel'] 153 | if kernel is None: return 154 | kernel = kernel.reshape(self.dnshape) 155 | kernel = kernel.transpose([2,3,1,0]) 156 | self.w['kernel'] = kernel -------------------------------------------------------------------------------- /darkflow/defaults.py: -------------------------------------------------------------------------------- 1 | class argHandler(dict): 2 | #A super duper fancy custom made CLI argument handler!! 3 | __getattr__ = dict.get 4 | __setattr__ = dict.__setitem__ 5 | __delattr__ = dict.__delitem__ 6 | _descriptions = {'help, --h, -h': 'show this super helpful message and exit'} 7 | 8 | def setDefaults(self): 9 | self.define('imgdir', './sample_img/', 'path to testing directory with images') 10 | self.define('binary', './bin/', 'path to .weights directory') 11 | self.define('config', './cfg/', 'path to .cfg directory') 12 | self.define('dataset', '../pascal/VOCdevkit/IMG/', 'path to dataset directory') 13 | self.define('backup', './ckpt/', 'path to backup folder') 14 | self.define('summary', './summary/', 'path to TensorBoard summaries directory') 15 | self.define('annotation', '../pascal/VOCdevkit/ANN/', 'path to annotation directory') 16 | self.define('threshold', -0.1, 'detection threshold') 17 | self.define('model', '', 'configuration of choice') 18 | self.define('trainer', 'rmsprop', 'training algorithm') 19 | self.define('momentum', 0.0, 'applicable for rmsprop and momentum optimizers') 20 | self.define('verbalise', True, 'say out loud while building graph') 21 | self.define('train', False, 'train the whole net') 22 | self.define('load', '', 'how to initialize the net? Either from .weights or a checkpoint, or even from scratch') 23 | self.define('savepb', False, 'save net and weight to a .pb file') 24 | self.define('gpu', 0.0, 'how much gpu (from 0.0 to 1.0)') 25 | self.define('gpuName', '/gpu:0', 'GPU device name') 26 | self.define('lr', 1e-5, 'learning rate') 27 | self.define('keep',20,'Number of most recent training results to save') 28 | self.define('batch', 16, 'batch size') 29 | self.define('epoch', 1000, 'number of epoch') 30 | self.define('save', 2000, 'save checkpoint every ? training examples') 31 | self.define('demo', '', 'demo on webcam') 32 | self.define('queue', 1, 'process demo in batch') 33 | self.define('json', False, 'Outputs bounding box information in json format.') 34 | self.define('saveVideo', False, 'Records video from input video or camera') 35 | self.define('pbLoad', '', 'path to .pb protobuf file (metaLoad must also be specified)') 36 | self.define('metaLoad', '', 'path to .meta file generated during --savepb that corresponds to .pb file') 37 | 38 | def define(self, argName, default, description): 39 | self[argName] = default 40 | self._descriptions[argName] = description 41 | 42 | def help(self): 43 | print('Example usage: flow --imgdir sample_img/ --model cfg/yolo.cfg --load bin/yolo.weights') 44 | print('') 45 | print('Arguments:') 46 | spacing = max([len(i) for i in self._descriptions.keys()]) + 2 47 | for item in self._descriptions: 48 | currentSpacing = spacing - len(item) 49 | print(' --' + item + (' ' * currentSpacing) + self._descriptions[item]) 50 | print('') 51 | exit() 52 | 53 | def parseArgs(self, args): 54 | print('') 55 | i = 1 56 | while i < len(args): 57 | if args[i] == '-h' or args[i] == '--h' or args[i] == '--help': 58 | self.help() #Time for some self help! :) 59 | if len(args[i]) < 2: 60 | print('ERROR - Invalid argument: ' + args[i]) 61 | print('Try running flow --help') 62 | exit() 63 | argumentName = args[i][2:] 64 | if isinstance(self.get(argumentName), bool): 65 | if not (i + 1) >= len(args) and (args[i + 1].lower() != 'false' and args[i + 1].lower() != 'true') and not args[i + 1].startswith('--'): 66 | print('ERROR - Expected boolean value (or no value) following argument: ' + args[i]) 67 | print('Try running flow --help') 68 | exit() 69 | elif not (i + 1) >= len(args) and (args[i + 1].lower() == 'false' or args[i + 1].lower() == 'true'): 70 | self[argumentName] = (args[i + 1].lower() == 'true') 71 | i += 1 72 | else: 73 | self[argumentName] = True 74 | elif args[i].startswith('--') and not (i + 1) >= len(args) and not args[i + 1].startswith('--') and argumentName in self: 75 | if isinstance(self[argumentName], float): 76 | try: 77 | args[i + 1] = float(args[i + 1]) 78 | except: 79 | print('ERROR - Expected float for argument: ' + args[i]) 80 | print('Try running flow --help') 81 | exit() 82 | elif isinstance(self[argumentName], int): 83 | try: 84 | args[i + 1] = int(args[i + 1]) 85 | except: 86 | print('ERROR - Expected int for argument: ' + args[i]) 87 | print('Try running flow --help') 88 | exit() 89 | self[argumentName] = args[i + 1] 90 | i += 1 91 | else: 92 | print('ERROR - Invalid argument: ' + args[i]) 93 | print('Try running flow --help') 94 | exit() 95 | i += 1 96 | -------------------------------------------------------------------------------- /darkflow/utils/loader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | import darkflow.dark as dark 4 | import numpy as np 5 | from os import sep 6 | 7 | class loader(object): 8 | """ 9 | interface to work with both .weights and .ckpt files 10 | in loading / recollecting / resolving mode 11 | """ 12 | VAR_LAYER = ['convolutional', 'connected', 'local', 13 | 'select', 'conv-select', 14 | 'extract', 'conv-extract'] 15 | 16 | def __init__(self, *args): 17 | self.src_key = list() 18 | self.vals = list() 19 | self.load(*args) 20 | 21 | def __call__(self, key): 22 | for idx in range(len(key)): 23 | val = self.find(key, idx) 24 | if val is not None: return val 25 | return None 26 | 27 | def find(self, key, idx): 28 | up_to = min(len(self.src_key), 4) 29 | for i in range(up_to): 30 | key_b = self.src_key[i] 31 | if key_b[idx:] == key[idx:]: 32 | return self.yields(i) 33 | return None 34 | 35 | def yields(self, idx): 36 | del self.src_key[idx] 37 | temp = self.vals[idx] 38 | del self.vals[idx] 39 | return temp 40 | 41 | class weights_loader(loader): 42 | """one who understands .weights files""" 43 | 44 | _W_ORDER = dict({ # order of param flattened into .weights file 45 | 'convolutional': [ 46 | 'biases','gamma','moving_mean','moving_variance','kernel' 47 | ], 48 | 'connected': ['biases', 'weights'], 49 | 'local': ['biases', 'kernels'] 50 | }) 51 | 52 | def load(self, path, src_layers): 53 | self.src_layers = src_layers 54 | walker = weights_walker(path) 55 | 56 | for i, layer in enumerate(src_layers): 57 | if layer.type not in self.VAR_LAYER: continue 58 | self.src_key.append([layer]) 59 | 60 | if walker.eof: new = None 61 | else: 62 | args = layer.signature 63 | new = dark.darknet.create_darkop(*args) 64 | self.vals.append(new) 65 | 66 | if new is None: continue 67 | order = self._W_ORDER[new.type] 68 | for par in order: 69 | if par not in new.wshape: continue 70 | val = walker.walk(new.wsize[par]) 71 | new.w[par] = val 72 | new.finalize(walker.transpose) 73 | 74 | if walker.path is not None: 75 | assert walker.offset == walker.size, \ 76 | 'expect {} bytes, found {}'.format( 77 | walker.offset, walker.size) 78 | print('Successfully identified {} bytes'.format( 79 | walker.offset)) 80 | 81 | class checkpoint_loader(loader): 82 | """ 83 | one who understands .ckpt files, very much 84 | """ 85 | def load(self, ckpt, ignore): 86 | meta = ckpt + '.meta' 87 | with tf.Graph().as_default() as graph: 88 | with tf.Session().as_default() as sess: 89 | saver = tf.train.import_meta_graph(meta) 90 | saver.restore(sess, ckpt) 91 | for var in tf.global_variables(): 92 | name = var.name.split(':')[0] 93 | packet = [name, var.get_shape().as_list()] 94 | self.src_key += [packet] 95 | self.vals += [var.eval(sess)] 96 | 97 | def create_loader(path, cfg = None): 98 | if path is None: 99 | load_type = weights_loader 100 | elif '.weights' in path: 101 | load_type = weights_loader 102 | else: 103 | load_type = checkpoint_loader 104 | 105 | return load_type(path, cfg) 106 | 107 | class weights_walker(object): 108 | """incremental reader of float32 binary files""" 109 | def __init__(self, path): 110 | self.eof = False # end of file 111 | self.path = path # current pos 112 | if path is None: 113 | self.eof = True 114 | return 115 | else: 116 | self.size = os.path.getsize(path)# save the path 117 | major, minor, revision, seen = np.memmap(path, 118 | shape = (), mode = 'r', offset = 0, 119 | dtype = '({})i4,'.format(4)) 120 | self.transpose = major > 1000 or minor > 1000 121 | self.offset = 16 122 | 123 | def walk(self, size): 124 | if self.eof: return None 125 | end_point = self.offset + 4 * size 126 | assert end_point <= self.size, \ 127 | 'Over-read {}'.format(self.path) 128 | 129 | float32_1D_array = np.memmap( 130 | self.path, shape = (), mode = 'r', 131 | offset = self.offset, 132 | dtype='({})float32,'.format(size) 133 | ) 134 | 135 | self.offset = end_point 136 | if end_point == self.size: 137 | self.eof = True 138 | return float32_1D_array 139 | 140 | def model_name(file_path): 141 | file_name = file_path.split(sep)[-1] 142 | ext = str() 143 | if '.' in file_name: # exclude extension 144 | file_name = file_name.split('.') 145 | ext = file_name[-1] 146 | file_name = '.'.join(file_name[:-1]) 147 | if ext == str() or ext == 'meta': # ckpt file 148 | file_name = file_name.split('-') 149 | num = int(file_name[-1]) 150 | return '-'.join(file_name[:-1]) 151 | if ext == 'weights': 152 | return file_name -------------------------------------------------------------------------------- /darkflow/net/help.py: -------------------------------------------------------------------------------- 1 | """ 2 | tfnet secondary (helper) methods 3 | """ 4 | from darkflow.utils.loader import create_loader 5 | from time import time as timer 6 | import tensorflow as tf 7 | import numpy as np 8 | import sys 9 | import cv2 10 | import os 11 | 12 | old_graph_msg = 'Resolving old graph def {} (no guarantee)' 13 | 14 | def build_train_op(self): 15 | self.framework.loss(self.out) 16 | self.say('Building {} train op'.format(self.meta['model'])) 17 | optimizer = self._TRAINER[self.FLAGS.trainer](self.FLAGS.lr) 18 | gradients = optimizer.compute_gradients(self.framework.loss) 19 | self.train_op = optimizer.apply_gradients(gradients) 20 | 21 | def load_from_ckpt(self): 22 | if self.FLAGS.load < 0: # load lastest ckpt 23 | with open(self.FLAGS.backup + 'checkpoint', 'r') as f: 24 | last = f.readlines()[-1].strip() 25 | load_point = last.split(' ')[1] 26 | load_point = load_point.split('"')[1] 27 | load_point = load_point.split('-')[-1] 28 | self.FLAGS.load = int(load_point) 29 | 30 | load_point = os.path.join(self.FLAGS.backup, self.meta['name']) 31 | load_point = '{}-{}'.format(load_point, self.FLAGS.load) 32 | self.say('Loading from {}'.format(load_point)) 33 | try: self.saver.restore(self.sess, load_point) 34 | except: load_old_graph(self, load_point) 35 | 36 | def say(self, *msgs): 37 | if not self.FLAGS.verbalise: 38 | return 39 | msgs = list(msgs) 40 | for msg in msgs: 41 | if msg is None: continue 42 | print(msg) 43 | 44 | def load_old_graph(self, ckpt): 45 | ckpt_loader = create_loader(ckpt) 46 | self.say(old_graph_msg.format(ckpt)) 47 | 48 | for var in tf.global_variables(): 49 | name = var.name.split(':')[0] 50 | args = [name, var.get_shape()] 51 | val = ckpt_loader(args) 52 | assert val is not None, \ 53 | 'Cannot find and load {}'.format(var.name) 54 | shp = val.shape 55 | plh = tf.placeholder(tf.float32, shp) 56 | op = tf.assign(var, plh) 57 | self.sess.run(op, {plh: val}) 58 | 59 | def _get_fps(self, frame): 60 | elapsed = int() 61 | start = timer() 62 | preprocessed = self.framework.preprocess(frame) 63 | feed_dict = {self.inp: [preprocessed]} 64 | net_out = self.sess.run(self.out, feed_dict)[0] 65 | processed = self.framework.postprocess(net_out, frame, False) 66 | return timer() - start 67 | 68 | def camera(self): 69 | file = self.FLAGS.demo 70 | SaveVideo = self.FLAGS.saveVideo 71 | 72 | if file == 'camera': 73 | file = 0 74 | else: 75 | assert os.path.isfile(file), \ 76 | 'file {} does not exist'.format(file) 77 | 78 | camera = cv2.VideoCapture(file) 79 | assert camera.isOpened(), \ 80 | 'Cannot capture source' 81 | 82 | cv2.namedWindow('', 0) 83 | _, frame = camera.read() 84 | height, width, _ = frame.shape 85 | cv2.resizeWindow('', width, height) 86 | 87 | if SaveVideo: 88 | fourcc = cv2.VideoWriter_fourcc(*'XVID') 89 | if file == 0: 90 | fps = 1 / self._get_fps(frame) 91 | if fps < 1: 92 | fps = 1 93 | else: 94 | fps = round(camera.get(cv2.CAP_PROP_FPS)) 95 | videoWriter = cv2.VideoWriter( 96 | 'video.avi', fourcc, fps, (width, height)) 97 | 98 | # buffers for demo in batch 99 | buffer_inp = list() 100 | buffer_pre = list() 101 | 102 | elapsed = int() 103 | start = timer() 104 | self.say('Press [ESC] to quit demo') 105 | # Loop through frames 106 | while camera.isOpened(): 107 | elapsed += 1 108 | _, frame = camera.read() 109 | if frame is None: 110 | print ('\nEnd of Video') 111 | break 112 | preprocessed = self.framework.preprocess(frame) 113 | buffer_inp.append(frame) 114 | buffer_pre.append(preprocessed) 115 | 116 | # Only process and imshow when queue is full 117 | if elapsed % self.FLAGS.queue == 0: 118 | feed_dict = {self.inp: buffer_pre} 119 | net_out = self.sess.run(self.out, feed_dict) 120 | for img, single_out in zip(buffer_inp, net_out): 121 | postprocessed = self.framework.postprocess( 122 | single_out, img, False) 123 | if SaveVideo: 124 | videoWriter.write(postprocessed) 125 | cv2.imshow('', postprocessed) 126 | # Clear Buffers 127 | buffer_inp = list() 128 | buffer_pre = list() 129 | 130 | if elapsed % 5 == 0: 131 | sys.stdout.write('\r') 132 | sys.stdout.write('{0:3.3f} FPS'.format( 133 | elapsed / (timer() - start))) 134 | sys.stdout.flush() 135 | choice = cv2.waitKey(1) 136 | if choice == 27: break 137 | 138 | sys.stdout.write('\n') 139 | if SaveVideo: 140 | videoWriter.release() 141 | camera.release() 142 | cv2.destroyAllWindows() 143 | 144 | def to_darknet(self): 145 | darknet_ckpt = self.darknet 146 | 147 | with self.graph.as_default() as g: 148 | for var in tf.global_variables(): 149 | name = var.name.split(':')[0] 150 | var_name = name.split('-') 151 | l_idx = int(var_name[0]) 152 | w_sig = var_name[1].split('/')[-1] 153 | l = darknet_ckpt.layers[l_idx] 154 | l.w[w_sig] = var.eval(self.sess) 155 | 156 | for layer in darknet_ckpt.layers: 157 | for ph in layer.h: 158 | layer.h[ph] = None 159 | 160 | return darknet_ckpt 161 | -------------------------------------------------------------------------------- /darkflow/net/build.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import time 3 | from . import help 4 | from . import flow 5 | from .ops import op_create, identity 6 | from .ops import HEADER, LINE 7 | from .framework import create_framework 8 | from darkflow.dark.darknet import Darknet 9 | import json 10 | import os 11 | 12 | class TFNet(object): 13 | 14 | _TRAINER = dict({ 15 | 'rmsprop': tf.train.RMSPropOptimizer, 16 | 'adadelta': tf.train.AdadeltaOptimizer, 17 | 'adagrad': tf.train.AdagradOptimizer, 18 | 'adagradDA': tf.train.AdagradDAOptimizer, 19 | 'momentum': tf.train.MomentumOptimizer, 20 | 'adam': tf.train.AdamOptimizer, 21 | 'ftrl': tf.train.FtrlOptimizer, 22 | 'sgd': tf.train.GradientDescentOptimizer 23 | }) 24 | 25 | # imported methods 26 | say = help.say 27 | train = flow.train 28 | camera = help.camera 29 | predict = flow.predict 30 | return_predict = flow.return_predict 31 | to_darknet = help.to_darknet 32 | build_train_op = help.build_train_op 33 | load_from_ckpt = help.load_from_ckpt 34 | 35 | def __init__(self, FLAGS, darknet = None): 36 | self.ntrain = 0 37 | 38 | if isinstance(FLAGS, dict): 39 | from ..defaults import argHandler 40 | newFLAGS = argHandler() 41 | newFLAGS.setDefaults() 42 | newFLAGS.update(FLAGS) 43 | FLAGS = newFLAGS 44 | 45 | self.FLAGS = FLAGS 46 | if self.FLAGS.pbLoad and self.FLAGS.metaLoad: 47 | self.say('\nLoading from .pb and .meta') 48 | self.graph = tf.Graph() 49 | device_name = FLAGS.gpuName \ 50 | if FLAGS.gpu > 0.0 else None 51 | with tf.device(device_name): 52 | with self.graph.as_default() as g: 53 | self.build_from_pb() 54 | return 55 | 56 | if darknet is None: 57 | darknet = Darknet(FLAGS) 58 | self.ntrain = len(darknet.layers) 59 | 60 | self.darknet = darknet 61 | args = [darknet.meta, FLAGS] 62 | self.num_layer = len(darknet.layers) 63 | self.framework = create_framework(*args) 64 | 65 | self.meta = darknet.meta 66 | 67 | self.say('\nBuilding net ...') 68 | start = time.time() 69 | self.graph = tf.Graph() 70 | device_name = FLAGS.gpuName \ 71 | if FLAGS.gpu > 0.0 else None 72 | with tf.device(device_name): 73 | with self.graph.as_default() as g: 74 | self.build_forward() 75 | self.setup_meta_ops() 76 | self.say('Finished in {}s\n'.format( 77 | time.time() - start)) 78 | 79 | def build_from_pb(self): 80 | with tf.gfile.FastGFile(self.FLAGS.pbLoad, "rb") as f: 81 | graph_def = tf.GraphDef() 82 | graph_def.ParseFromString(f.read()) 83 | 84 | tf.import_graph_def( 85 | graph_def, 86 | name="" 87 | ) 88 | with open(self.FLAGS.metaLoad, 'r') as fp: 89 | self.meta = json.load(fp) 90 | self.framework = create_framework(self.meta, self.FLAGS) 91 | 92 | # Placeholders 93 | self.inp = tf.get_default_graph().get_tensor_by_name('input:0') 94 | self.feed = dict() # other placeholders 95 | self.out = tf.get_default_graph().get_tensor_by_name('output:0') 96 | 97 | self.setup_meta_ops() 98 | 99 | def build_forward(self): 100 | verbalise = self.FLAGS.verbalise 101 | 102 | # Placeholders 103 | inp_size = [None] + self.meta['inp_size'] 104 | self.inp = tf.placeholder(tf.float32, inp_size, 'input') 105 | self.feed = dict() # other placeholders 106 | 107 | # Build the forward pass 108 | state = identity(self.inp) 109 | roof = self.num_layer - self.ntrain 110 | self.say(HEADER, LINE) 111 | for i, layer in enumerate(self.darknet.layers): 112 | scope = '{}-{}'.format(str(i),layer.type) 113 | args = [layer, state, i, roof, self.feed] 114 | state = op_create(*args) 115 | mess = state.verbalise() 116 | self.say(mess) 117 | self.say(LINE) 118 | 119 | self.top = state 120 | self.out = tf.identity(state.out, name='output') 121 | 122 | def setup_meta_ops(self): 123 | cfg = dict({ 124 | 'allow_soft_placement': False, 125 | 'log_device_placement': False 126 | }) 127 | 128 | utility = min(self.FLAGS.gpu, 1.) 129 | if utility > 0.0: 130 | self.say('GPU mode with {} usage'.format(utility)) 131 | cfg['gpu_options'] = tf.GPUOptions( 132 | per_process_gpu_memory_fraction = utility) 133 | cfg['allow_soft_placement'] = True 134 | else: 135 | self.say('Running entirely on CPU') 136 | cfg['device_count'] = {'GPU': 0} 137 | 138 | if self.FLAGS.train: self.build_train_op() 139 | 140 | if self.FLAGS.summary is not None: 141 | self.summary_op = tf.summary.merge_all() 142 | self.writer = tf.summary.FileWriter(self.FLAGS.summary + 'train') 143 | 144 | self.sess = tf.Session(config = tf.ConfigProto(**cfg)) 145 | self.sess.run(tf.global_variables_initializer()) 146 | 147 | if not self.ntrain: return 148 | self.saver = tf.train.Saver(tf.global_variables(), 149 | max_to_keep = self.FLAGS.keep) 150 | if self.FLAGS.load != 0: self.load_from_ckpt() 151 | 152 | if self.FLAGS.summary is not None: 153 | self.writer.add_graph(self.sess.graph) 154 | 155 | def savepb(self): 156 | """ 157 | Create a standalone const graph def that 158 | C++ can load and run. 159 | """ 160 | darknet_pb = self.to_darknet() 161 | flags_pb = self.FLAGS 162 | flags_pb.verbalise = False 163 | 164 | flags_pb.train = False 165 | # rebuild another tfnet. all const. 166 | tfnet_pb = TFNet(flags_pb, darknet_pb) 167 | tfnet_pb.sess = tf.Session(graph = tfnet_pb.graph) 168 | # tfnet_pb.predict() # uncomment for unit testing 169 | name = 'built_graph/{}.pb'.format(self.meta['name']) 170 | print(name) 171 | os.makedirs(os.path.dirname(name), exist_ok=True) 172 | #Save dump of everything in meta 173 | with open('built_graph/{}.meta'.format(self.meta['name']), 'w') as fp: 174 | json.dump(self.meta, fp) 175 | self.say('Saving const graph def to {}'.format(name)) 176 | graph_def = tfnet_pb.sess.graph_def 177 | tf.train.write_graph(graph_def,'./', name, False) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Intro 2 | 3 | [![Build Status](https://travis-ci.org/thtrieu/darkflow.svg?branch=master)](https://travis-ci.org/thtrieu/darkflow) [![codecov](https://codecov.io/gh/thtrieu/darkflow/branch/master/graph/badge.svg)](https://codecov.io/gh/thtrieu/darkflow) 4 | 5 | Real-time object detection and classification. Paper: [version 1](https://arxiv.org/pdf/1506.02640.pdf), [version 2](https://arxiv.org/pdf/1612.08242.pdf). 6 | 7 | Read more about YOLO (in darknet) and download weight files [here](http://pjreddie.com/darknet/yolo/). In case the weight file cannot be found, I uploaded some of mine [here](https://drive.google.com/drive/folders/0B1tW_VtY7onidEwyQ2FtQVplWEU), which include `yolo-full` and `yolo-tiny` of v1.0, `tiny-yolo-v1.1` of v1.1 and `yolo`, `tiny-yolo-voc` of v2. 8 | 9 | 10 | Click on this image to see demo from yolov2: 11 | 12 | [![img](preview.png)](http://i.imgur.com/EyZZKAA.gif) 13 | 14 | ## Dependencies 15 | 16 | Python3, tensorflow 1.0, numpy, opencv 3. 17 | 18 | ### Getting started 19 | 20 | There are three ways to get started with darkflow. 21 | 22 | 1. Just build the Cython extensions in place. 23 | ``` 24 | python3 setup.py build_ext --inplace 25 | ``` 26 | 27 | 2. Let pip install darkflow in dev mode (globally accessible but changes to the code immediately take effect) 28 | ``` 29 | pip install -e . 30 | ``` 31 | 32 | 3. Install with pip globally 33 | ``` 34 | pip install . 35 | ``` 36 | 37 | ## Update 38 | 39 | **Android demo on Tensorflow's** [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowYoloDetector.java) 40 | 41 | **I am looking for help:** 42 | - `help wanted` labels in issue track 43 | 44 | ## Parsing the annotations 45 | 46 | Skip this if you are not training or fine-tuning anything (you simply want to forward flow a trained net) 47 | 48 | For example, if you want to work with only 3 classes `tvmonitor`, `person`, `pottedplant`; edit `labels.txt` as follows 49 | 50 | ``` 51 | tvmonitor 52 | person 53 | pottedplant 54 | ``` 55 | 56 | And that's it. `darkflow` will take care of the rest. 57 | 58 | ## Design the net 59 | 60 | Skip this if you are working with one of the original configurations since they are already there. Otherwise, see the following example: 61 | 62 | ```python 63 | ... 64 | 65 | [convolutional] 66 | batch_normalize = 1 67 | size = 3 68 | stride = 1 69 | pad = 1 70 | activation = leaky 71 | 72 | [maxpool] 73 | 74 | [connected] 75 | output = 4096 76 | activation = linear 77 | 78 | ... 79 | ``` 80 | 81 | ## Flowing the graph using `flow` 82 | 83 | ```bash 84 | # Have a look at its options 85 | flow --h 86 | ``` 87 | 88 | First, let's take a closer look at one of a very useful option `--load` 89 | 90 | ```bash 91 | # 1. Load yolo-tiny.weights 92 | flow --model cfg/yolo-tiny.cfg --load bin/yolo-tiny.weights 93 | 94 | # 2. To completely initialize a model, leave the --load option 95 | flow --model cfg/yolo-new.cfg 96 | 97 | # 3. It is useful to reuse the first identical layers of tiny for `yolo-new` 98 | flow --model cfg/yolo-new.cfg --load bin/yolo-tiny.weights 99 | # this will print out which layers are reused, which are initialized 100 | ``` 101 | 102 | All input images from default folder `sample_img/` are flowed through the net and predictions are put in `sample_img/out/`. We can always specify more parameters for such forward passes, such as detection threshold, batch size, images folder, etc. 103 | 104 | ```bash 105 | # Forward all images in sample_img/ using tiny yolo and 100% GPU usage 106 | flow --imgdir sample_img/ --model cfg/yolo-tiny.cfg --load bin/yolo-tiny.weights --gpu 1.0 107 | ``` 108 | json output can be generated with descriptions of the pixel location of each bounding box and the pixel location. Each prediction is stored in the `sample_img/out` folder by default. An example json array is shown below. 109 | ```bash 110 | # Forward all images in sample_img/ using tiny yolo and JSON output. 111 | flow --imgdir sample_img/ --model cfg/yolo-tiny.cfg --load bin/yolo-tiny.weights --json 112 | ``` 113 | JSON output: 114 | ```json 115 | [{"label":"person", "confidence": 0.56, "topleft": {"x": 184, "y": 101}, "bottomright": {"x": 274, "y": 382}}, 116 | {"label": "dog", "confidence": 0.32, "topleft": {"x": 71, "y": 263}, "bottomright": {"x": 193, "y": 353}}, 117 | {"label": "horse", "confidence": 0.76, "topleft": {"x": 412, "y": 109}, "bottomright": {"x": 592,"y": 337}}] 118 | ``` 119 | - label: self explanatory 120 | - confidence: somewhere between 0 and 1 (how confident yolo is about that detection) 121 | - topleft: pixel coordinate of top left corner of box. 122 | - bottomright: pixel coordinate of bottom right corner of box. 123 | 124 | ## Training new model 125 | 126 | Training is simple as you only have to add option `--train`. Training set and annotation will be parsed if this is the first time a new configuration is trained. To point to training set and annotations, use option `--dataset` and `--annotation`. A few examples: 127 | 128 | ```bash 129 | # Initialize yolo-new from yolo-tiny, then train the net on 100% GPU: 130 | flow --model cfg/yolo-new.cfg --load bin/yolo-tiny.weights --train --gpu 1.0 131 | 132 | # Completely initialize yolo-new and train it with ADAM optimizer 133 | flow --model cfg/yolo-new.cfg --train --trainer adam 134 | ``` 135 | 136 | During training, the script will occasionally save intermediate results into Tensorflow checkpoints, stored in `ckpt/`. To resume to any checkpoint before performing training/testing, use `--load [checkpoint_num]` option, if `checkpoint_num < 0`, `darkflow` will load the most recent save by parsing `ckpt/checkpoint`. 137 | 138 | ```bash 139 | # Resume the most recent checkpoint for training 140 | flow --train --model cfg/yolo-new.cfg --load -1 141 | 142 | # Test with checkpoint at step 1500 143 | flow --model cfg/yolo-new.cfg --load 1500 144 | 145 | # Fine tuning yolo-tiny from the original one 146 | flow --train --model cfg/yolo-tiny.cfg --load bin/yolo-tiny.weights 147 | ``` 148 | 149 | Example of training on Pascal VOC 2007: 150 | ```bash 151 | # Download the Pascal VOC dataset: 152 | curl -O https://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar 153 | tar xf VOCtest_06-Nov-2007.tar 154 | 155 | # An example of the Pascal VOC annotation format: 156 | vim VOCdevkit/VOC2007/Annotations/000001.xml 157 | 158 | # Train the net on the Pascal dataset: 159 | flow --model cfg/yolo-new.cfg --train --dataset "~/VOCdevkit/VOC2007/JPEGImages" --annotation "~/VOCdevkit/VOC2007/Annotations" 160 | ``` 161 | 162 | ## Camera/video file demo 163 | 164 | For a demo that entirely runs on the CPU: 165 | 166 | ```bash 167 | flow --model cfg/yolo-new.cfg --load bin/yolo-new.weights --demo videofile.avi 168 | ``` 169 | 170 | For a demo that runs 100% on the GPU: 171 | 172 | ```bash 173 | flow --model cfg/yolo-new.cfg --load bin/yolo-new.weights --demo videofile.avi --gpu 1.0 174 | ``` 175 | 176 | To use your webcam/camera, simply replace `videofile.avi` with keyword `camera`. 177 | 178 | To save a video with predicted bounding box, add `--saveVideo` option. 179 | 180 | ## Using darkflow from another python application 181 | 182 | Please note that `return_predict(img)` must take an `numpy.ndarray`. Your image must be loaded beforehand and passed to `return_predict(img)`. Passing the file path won't work. 183 | 184 | Result from `return_predict(img)` will be a list of dictionaries representing each detected object's values in the same format as the JSON output listed above. 185 | 186 | ```python 187 | from darkflow.net.build import TFNet 188 | import cv2 189 | 190 | options = {"model": "cfg/yolo.cfg", "load": "bin/yolo.weights", "threshold": 0.1} 191 | 192 | tfnet = TFNet(options) 193 | 194 | imgcv = cv2.imread("./sample_img/dog.jpg") 195 | result = tfnet.return_predict(imgcv) 196 | print(result) 197 | ``` 198 | 199 | 200 | ## Save the built graph to a protobuf file (`.pb`) 201 | 202 | ```bash 203 | ## Saving the lastest checkpoint to protobuf file 204 | flow --model cfg/yolo-new.cfg --load -1 --savepb 205 | 206 | ## Saving graph and weights to protobuf file 207 | flow --model cfg/yolo.cfg --load bin/yolo.weights --savepb 208 | ``` 209 | When saving the `.pb` file, a `.meta` file will also be generated alongside it. This `.meta` file is a JSON dump of everything in the `meta` dictionary that contains information nessecary for post-processing such as `anchors` and `labels`. This way, everything you need to make predictions from the graph and do post processing is contained in those two files - no need to have the `.cfg` or any labels file tagging along. 210 | 211 | The created `.pb` file can be used to migrate the graph to mobile devices (JAVA / C++ / Objective-C++). The name of input tensor and output tensor are respectively `'input'` and `'output'`. For further usage of this protobuf file, please refer to the official documentation of `Tensorflow` on C++ API [_here_](https://www.tensorflow.org/versions/r0.9/api_docs/cc/index.html). To run it on, say, iOS application, simply add the file to Bundle Resources and update the path to this file inside source code. 212 | 213 | Also, darkflow supports loading from a `.pb` and `.meta` file for generating predictions (instead of loading from a `.cfg` and checkpoint or `.weights`). 214 | ```bash 215 | ## Forward images in sample_img for predictions based on protobuf file 216 | flow --pbLoad graph-cfg/yolo.pb --metaLoad graph-cfg/yolo.meta --imgdir sample_img/ 217 | ``` 218 | If you'd like to load a `.pb` and `.meta` file when using `return_predict()` you can set the `"pbLoad"` and `"metaLoad"` options in place of the `"model"` and `"load"` options you would normally set. 219 | 220 | That's all. 221 | -------------------------------------------------------------------------------- /test/test_darkflow.py: -------------------------------------------------------------------------------- 1 | from darkflow.net.build import TFNet 2 | from darkflow.cli import cliHandler 3 | import json 4 | import requests 5 | import cv2 6 | import os 7 | import sys 8 | import pytest 9 | 10 | #NOTE: This file is designed to be run in the TravisCI environment. If you want to run it locally set the environment variable TRAVIS_BUILD_DIR to the base 11 | # directory of the cloned darkflow repository. WARNING: This file delete images from sample_img/ that won't be used for testing (so don't run it 12 | # locally if you don't want this happening!) 13 | 14 | #Settings 15 | imgWidth = 640 16 | imgHeight = 424 17 | buildPath = os.environ.get("TRAVIS_BUILD_DIR") 18 | 19 | if buildPath is None: 20 | print() 21 | print("TRAVIS_BUILD_DIR environment variable was not found - is this running on TravisCI?") 22 | print("If you want to test this locally, set TRAVIS_BUILD_DIR to the base directory of the cloned darkflow repository.") 23 | exit() 24 | testImgPath = os.path.join(buildPath, "sample_img", "sample_person.jpg") 25 | expectedDetectedObjectsV1 = [{"label": "dog","confidence": 0.46,"topleft": {"x": 84, "y": 249},"bottomright": {"x": 208,"y": 367}}, 26 | {"label": "person","confidence": 0.60,"topleft": {"x": 159, "y": 102},"bottomright": {"x": 304,"y": 365}}] 27 | 28 | expectedDetectedObjectsV2 = [{"label":"person","confidence":0.82,"topleft":{"x":189,"y":96},"bottomright":{"x":271,"y":380}}, 29 | {"label":"dog","confidence":0.79,"topleft":{"x":69,"y":258},"bottomright":{"x":209,"y":354}}, 30 | {"label":"horse","confidence":0.89,"topleft":{"x":397,"y":127},"bottomright":{"x":605,"y":352}}] 31 | posCompareThreshold = 0.05 #Comparisons must match be within 5% of width/height when compared to expected value 32 | threshCompareThreshold = 0.1 #Comparisons must match within 0.1 of expected threshold for each prediction 33 | yoloDownloadV1 = "https://pjreddie.com/media/files/yolo-small.weights" 34 | yoloDownloadV2 = "https://pjreddie.com/media/files/yolo.weights" 35 | 36 | def download_file(url, savePath): 37 | fileName = savePath.split("/")[-1] 38 | if not os.path.isfile(savePath): 39 | os.makedirs(os.path.dirname(savePath), exist_ok=True) #Make directories nessecary for file incase they don't exist 40 | print("Downloading " + fileName + " file...") 41 | r = requests.get(url, stream=True) 42 | with open(savePath, 'wb') as f: 43 | for chunk in r.iter_content(chunk_size=1024): 44 | if chunk: # filter out keep-alive new chunks 45 | f.write(chunk) 46 | r.close() 47 | else: 48 | print("Found existing " + fileName + " file.") 49 | 50 | yoloWeightPathV1 = os.path.join(buildPath, "bin", yoloDownloadV1.split("/")[-1]) 51 | yoloCfgPathV1 = os.path.join(buildPath, "cfg", "v1", "{0}.cfg".format(os.path.splitext(os.path.basename(yoloWeightPathV1))[0])) 52 | 53 | yoloWeightPathV2 = os.path.join(buildPath, "bin", yoloDownloadV2.split("/")[-1]) 54 | yoloCfgPathV2 = os.path.join(buildPath, "cfg", "{0}.cfg".format(os.path.splitext(os.path.basename(yoloWeightPathV2))[0])) 55 | 56 | pbPath = os.path.join(buildPath, "built_graph", os.path.splitext(os.path.basename(yoloWeightPathV2))[0] + ".pb") 57 | metaPath = os.path.join(buildPath, "built_graph", os.path.splitext(os.path.basename(yoloWeightPathV2))[0] + ".meta") 58 | 59 | generalConfigPath = os.path.join(buildPath, "cfg") 60 | 61 | download_file(yoloDownloadV1, yoloWeightPathV1) #Check if we need to download (and if so download) the YOLOv1 weights 62 | download_file(yoloDownloadV2, yoloWeightPathV2) #Check if we need to download (and if so download) the YOLOv2 weights 63 | 64 | def executeCLI(commandString): 65 | print() 66 | print("Executing: {0}".format(commandString)) 67 | print() 68 | splitArgs = [item.strip() for item in commandString.split(" ")] 69 | cliHandler(splitArgs) #Run the command 70 | print() 71 | 72 | def compareSingleObjects(firstObject, secondObject, width, height): 73 | if(abs(firstObject["topleft"]["x"] - secondObject["topleft"]["x"]) > width * posCompareThreshold): 74 | return False 75 | if(abs(firstObject["topleft"]["y"] - secondObject["topleft"]["y"]) > height * posCompareThreshold): 76 | return False 77 | if(abs(firstObject["bottomright"]["x"] - secondObject["bottomright"]["x"]) > width * posCompareThreshold): 78 | return False 79 | if(abs(firstObject["bottomright"]["y"] - secondObject["bottomright"]["y"]) > height * posCompareThreshold): 80 | return False 81 | if(abs(firstObject["confidence"] - secondObject["confidence"]) > threshCompareThreshold): 82 | return False 83 | return True 84 | 85 | def compareObjectData(defaultObjects, newObjects, width, height): 86 | currentlyFound = False 87 | for firstObject in defaultObjects: 88 | currentlyFound = False 89 | for secondObject in newObjects: 90 | if compareSingleObjects(firstObject, secondObject, width, height): 91 | currentlyFound = True 92 | break 93 | if not currentlyFound: 94 | return False 95 | return True 96 | 97 | #Delete all images that won't be tested on so forwarding the whole folder doesn't take forever 98 | filelist = [f for f in os.listdir(os.path.dirname(testImgPath)) if os.path.isfile(os.path.join(os.path.dirname(testImgPath), f)) and f != os.path.basename(testImgPath)] 99 | for f in filelist: 100 | os.remove(os.path.join(os.path.dirname(testImgPath), f)) 101 | 102 | def test_CLI_IMG_YOLOv2(): 103 | #Test predictions outputted to an image using the YOLOv2 model through CLI 104 | #NOTE: This test currently does not verify anything about the image created (i.e. proper labeling, proper positioning of prediction boxes, etc.) 105 | # it simply verifies that the code executes properly and that the expected output image is indeed created in ./test/img/out 106 | 107 | testString = "flow --imgdir {0} --model {1} --load {2} --config {3} --threshold 0.4".format(os.path.dirname(testImgPath), yoloCfgPathV2, yoloWeightPathV2, generalConfigPath) 108 | executeCLI(testString) 109 | 110 | outputImgPath = os.path.join(os.path.dirname(testImgPath), "out", os.path.basename(testImgPath)) 111 | assert os.path.exists(outputImgPath), "Expected output image: {0} was not found.".format(outputImgPath) 112 | 113 | def test_CLI_JSON_YOLOv2(): 114 | #Test predictions outputted to a JSON file using the YOLOv2 model through CLI 115 | #NOTE: This test verifies that the code executes properly, the JSON file is created properly and the predictions generated are within a certain 116 | # margin of error when compared to the expected predictions. 117 | 118 | testString = "flow --imgdir {0} --model {1} --load {2} --config {3} --threshold 0.4 --json".format(os.path.dirname(testImgPath), yoloCfgPathV2, yoloWeightPathV2, generalConfigPath) 119 | executeCLI(testString) 120 | 121 | outputJSONPath = os.path.join(os.path.dirname(testImgPath), "out", os.path.splitext(os.path.basename(testImgPath))[0] + ".json") 122 | assert os.path.exists(outputJSONPath), "Expected output JSON file: {0} was not found.".format(outputJSONPath) 123 | 124 | with open(outputJSONPath) as json_file: 125 | loadedPredictions = json.load(json_file) 126 | 127 | assert compareObjectData(expectedDetectedObjectsV2, loadedPredictions, imgWidth, imgHeight), "Generated object predictions from JSON were not within margin of error compared to expected values." 128 | 129 | def test_CLI_SAVEPB_YOLOv2(): 130 | #Save .pb and .meta as generated from the YOLOv2 model through CLI 131 | #NOTE: This test verifies that the code executes properly, and the .pb and .meta files are successfully created. A subsequent test will verify the 132 | # contents of those files. 133 | 134 | testString = "flow --model {0} --load {1} --config {2} --threshold 0.4 --savepb".format(yoloCfgPathV2, yoloWeightPathV2, generalConfigPath) 135 | 136 | with pytest.raises(SystemExit): 137 | executeCLI(testString) 138 | 139 | assert os.path.exists(pbPath), "Expected output .pb file: {0} was not found.".format(pbPath) 140 | assert os.path.exists(metaPath), "Expected output .meta file: {0} was not found.".format(metaPath) 141 | 142 | def test_RETURNPREDICT_PBLOAD_YOLOv2(): 143 | #Test the .pb and .meta files generated in the previous step 144 | #NOTE: This test verifies that the code executes properly, and the .pb and .meta files that were created are able to be loaded and used for inference. 145 | # The predictions that are generated will be compared against expected predictions. 146 | 147 | options = {"pbLoad": pbPath, "metaLoad": metaPath, "threshold": 0.4} 148 | tfnet = TFNet(options) 149 | imgcv = cv2.imread(testImgPath) 150 | loadedPredictions = tfnet.return_predict(imgcv) 151 | 152 | assert compareObjectData(expectedDetectedObjectsV2, loadedPredictions, imgWidth, imgHeight), "Generated object predictions from return_predict() were not within margin of error compared to expected values." 153 | 154 | def test_RETURNPREDICT_YOLOv1(): 155 | #Test YOLOv1 using normal .weights and .cfg 156 | #NOTE: This test verifies that the code executes properly, and that the predictions generated are within the accepted margin of error to the expected predictions. 157 | 158 | options = {"model": yoloCfgPathV1, "load": yoloWeightPathV1, "config": generalConfigPath, "threshold": 0.4} 159 | tfnet = TFNet(options) 160 | imgcv = cv2.imread(testImgPath) 161 | loadedPredictions = tfnet.return_predict(imgcv) 162 | 163 | assert compareObjectData(expectedDetectedObjectsV1, loadedPredictions, imgWidth, imgHeight), "Generated object predictions from return_predict() were not within margin of error compared to expected values." -------------------------------------------------------------------------------- /darkflow/utils/process.py: -------------------------------------------------------------------------------- 1 | """ 2 | WARNING: spaghetti code. 3 | """ 4 | 5 | import numpy as np 6 | import pickle 7 | import os 8 | 9 | def parser(model): 10 | """ 11 | Read the .cfg file to extract layers into `layers` 12 | as well as model-specific parameters into `meta` 13 | """ 14 | def _parse(l, i = 1): 15 | return l.split('=')[i].strip() 16 | 17 | with open(model, 'rb') as f: 18 | lines = f.readlines() 19 | 20 | lines = [line.decode() for line in lines] 21 | 22 | meta = dict(); layers = list() # will contains layers' info 23 | h, w, c = [int()] * 3; layer = dict() 24 | for line in lines: 25 | line = line.strip() 26 | line = line.split('#')[0] 27 | if '[' in line: 28 | if layer != dict(): 29 | if layer['type'] == '[net]': 30 | h = layer['height'] 31 | w = layer['width'] 32 | c = layer['channels'] 33 | meta['net'] = layer 34 | else: 35 | if layer['type'] == '[crop]': 36 | h = layer['crop_height'] 37 | w = layer['crop_width'] 38 | layers += [layer] 39 | layer = {'type': line} 40 | else: 41 | try: 42 | i = float(_parse(line)) 43 | if i == int(i): i = int(i) 44 | layer[line.split('=')[0].strip()] = i 45 | except: 46 | try: 47 | key = _parse(line, 0) 48 | val = _parse(line, 1) 49 | layer[key] = val 50 | except: 51 | 'banana ninja yadayada' 52 | 53 | meta.update(layer) # last layer contains meta info 54 | if 'anchors' in meta: 55 | splits = meta['anchors'].split(',') 56 | anchors = [float(x.strip()) for x in splits] 57 | meta['anchors'] = anchors 58 | meta['model'] = model # path to cfg, not model name 59 | meta['inp_size'] = [h, w, c] 60 | return layers, meta 61 | 62 | def cfg_yielder(model, binary): 63 | """ 64 | yielding each layer information to initialize `layer` 65 | """ 66 | layers, meta = parser(model); yield meta; 67 | h, w, c = meta['inp_size']; l = w * h * c 68 | 69 | # Start yielding 70 | flat = False # flag for 1st dense layer 71 | conv = '.conv.' in model 72 | for i, d in enumerate(layers): 73 | #----------------------------------------------------- 74 | if d['type'] == '[crop]': 75 | yield ['crop', i] 76 | #----------------------------------------------------- 77 | elif d['type'] == '[local]': 78 | n = d.get('filters', 1) 79 | size = d.get('size', 1) 80 | stride = d.get('stride', 1) 81 | pad = d.get('pad', 0) 82 | activation = d.get('activation', 'logistic') 83 | w_ = (w - 1 - (1 - pad) * (size - 1)) // stride + 1 84 | h_ = (h - 1 - (1 - pad) * (size - 1)) // stride + 1 85 | yield ['local', i, size, c, n, stride, 86 | pad, w_, h_, activation] 87 | if activation != 'linear': yield [activation, i] 88 | w, h, c = w_, h_, n 89 | l = w * h * c 90 | #----------------------------------------------------- 91 | elif d['type'] == '[convolutional]': 92 | n = d.get('filters', 1) 93 | size = d.get('size', 1) 94 | stride = d.get('stride', 1) 95 | pad = d.get('pad', 0) 96 | padding = d.get('padding', 0) 97 | if pad: padding = size // 2 98 | activation = d.get('activation', 'logistic') 99 | batch_norm = d.get('batch_normalize', 0) or conv 100 | yield ['convolutional', i, size, c, n, 101 | stride, padding, batch_norm, 102 | activation] 103 | if activation != 'linear': yield [activation, i] 104 | w_ = (w + 2 * padding - size) // stride + 1 105 | h_ = (h + 2 * padding - size) // stride + 1 106 | w, h, c = w_, h_, n 107 | l = w * h * c 108 | #----------------------------------------------------- 109 | elif d['type'] == '[maxpool]': 110 | stride = d.get('stride', 1) 111 | size = d.get('size', stride) 112 | padding = d.get('padding', (size-1) // 2) 113 | yield ['maxpool', i, size, stride, padding] 114 | w_ = (w + 2*padding) // d['stride'] 115 | h_ = (h + 2*padding) // d['stride'] 116 | w, h = w_, h_ 117 | l = w * h * c 118 | #----------------------------------------------------- 119 | elif d['type'] == '[avgpool]': 120 | flat = True; l = c 121 | yield ['avgpool', i] 122 | #----------------------------------------------------- 123 | elif d['type'] == '[softmax]': 124 | yield ['softmax', i, d['groups']] 125 | #----------------------------------------------------- 126 | elif d['type'] == '[connected]': 127 | if not flat: 128 | yield ['flatten', i] 129 | flat = True 130 | activation = d.get('activation', 'logistic') 131 | yield ['connected', i, l, d['output'], activation] 132 | if activation != 'linear': yield [activation, i] 133 | l = d['output'] 134 | #----------------------------------------------------- 135 | elif d['type'] == '[dropout]': 136 | yield ['dropout', i, d['probability']] 137 | #----------------------------------------------------- 138 | elif d['type'] == '[select]': 139 | if not flat: 140 | yield ['flatten', i] 141 | flat = True 142 | inp = d.get('input', None) 143 | if type(inp) is str: 144 | file = inp.split(',')[0] 145 | layer_num = int(inp.split(',')[1]) 146 | with open(file, 'rb') as f: 147 | profiles = pickle.load(f, encoding = 'latin1')[0] 148 | layer = profiles[layer_num] 149 | else: layer = inp 150 | activation = d.get('activation', 'logistic') 151 | d['keep'] = d['keep'].split('/') 152 | classes = int(d['keep'][-1]) 153 | keep = [int(c) for c in d['keep'][0].split(',')] 154 | keep_n = len(keep) 155 | train_from = classes * d['bins'] 156 | for count in range(d['bins']-1): 157 | for num in keep[-keep_n:]: 158 | keep += [num + classes] 159 | k = 1 160 | while layers[i-k]['type'] not in ['[connected]', '[extract]']: 161 | k += 1 162 | if i-k < 0: 163 | break 164 | if i-k < 0: l_ = l 165 | elif layers[i-k]['type'] == 'connected': 166 | l_ = layers[i-k]['output'] 167 | else: 168 | l_ = layers[i-k].get('old',[l])[-1] 169 | yield ['select', i, l_, d['old_output'], 170 | activation, layer, d['output'], 171 | keep, train_from] 172 | if activation != 'linear': yield [activation, i] 173 | l = d['output'] 174 | #----------------------------------------------------- 175 | elif d['type'] == '[conv-select]': 176 | n = d.get('filters', 1) 177 | size = d.get('size', 1) 178 | stride = d.get('stride', 1) 179 | pad = d.get('pad', 0) 180 | padding = d.get('padding', 0) 181 | if pad: padding = size // 2 182 | activation = d.get('activation', 'logistic') 183 | batch_norm = d.get('batch_normalize', 0) or conv 184 | d['keep'] = d['keep'].split('/') 185 | classes = int(d['keep'][-1]) 186 | keep = [int(x) for x in d['keep'][0].split(',')] 187 | 188 | segment = classes + 5 189 | assert n % segment == 0, \ 190 | 'conv-select: segment failed' 191 | bins = n // segment 192 | keep_idx = list() 193 | for j in range(bins): 194 | offset = j * segment 195 | for k in range(5): 196 | keep_idx += [offset + k] 197 | for k in keep: 198 | keep_idx += [offset + 5 + k] 199 | w_ = (w + 2 * padding - size) // stride + 1 200 | h_ = (h + 2 * padding - size) // stride + 1 201 | c_ = len(keep_idx) 202 | yield ['conv-select', i, size, c, n, 203 | stride, padding, batch_norm, 204 | activation, keep_idx, c_] 205 | w, h, c = w_, h_, c_ 206 | l = w * h * c 207 | #----------------------------------------------------- 208 | elif d['type'] == '[conv-extract]': 209 | file = d['profile'] 210 | with open(file, 'rb') as f: 211 | profiles = pickle.load(f, encoding = 'latin1')[0] 212 | inp_layer = None 213 | inp = d['input'] 214 | out = d['output'] 215 | inp_layer = None 216 | if inp >= 0: 217 | inp_layer = profiles[inp] 218 | if inp_layer is not None: 219 | assert len(inp_layer) == c, \ 220 | 'Conv-extract does not match input dimension' 221 | out_layer = profiles[out] 222 | 223 | n = d.get('filters', 1) 224 | size = d.get('size', 1) 225 | stride = d.get('stride', 1) 226 | pad = d.get('pad', 0) 227 | padding = d.get('padding', 0) 228 | if pad: padding = size // 2 229 | activation = d.get('activation', 'logistic') 230 | batch_norm = d.get('batch_normalize', 0) or conv 231 | 232 | k = 1 233 | find = ['[convolutional]','[conv-extract]'] 234 | while layers[i-k]['type'] not in find: 235 | k += 1 236 | if i-k < 0: break 237 | if i-k >= 0: 238 | previous_layer = layers[i-k] 239 | c_ = previous_layer['filters'] 240 | else: 241 | c_ = c 242 | 243 | yield ['conv-extract', i, size, c_, n, 244 | stride, padding, batch_norm, 245 | activation, inp_layer, out_layer] 246 | if activation != 'linear': yield [activation, i] 247 | w_ = (w + 2 * padding - size) // stride + 1 248 | h_ = (h + 2 * padding - size) // stride + 1 249 | w, h, c = w_, h_, len(out_layer) 250 | l = w * h * c 251 | #----------------------------------------------------- 252 | elif d['type'] == '[extract]': 253 | if not flat: 254 | yield['flatten', i] 255 | flat = True 256 | activation = d.get('activation', 'logistic') 257 | file = d['profile'] 258 | with open(file, 'rb') as f: 259 | profiles = pickle.load(f, encoding = 'latin1')[0] 260 | inp_layer = None 261 | inp = d['input'] 262 | out = d['output'] 263 | if inp >= 0: 264 | inp_layer = profiles[inp] 265 | out_layer = profiles[out] 266 | old = d['old'] 267 | old = [int(x) for x in old.split(',')] 268 | if inp_layer is not None: 269 | if len(old) > 2: 270 | h_, w_, c_, n_ = old 271 | new_inp = list() 272 | for p in range(c_): 273 | for q in range(h_): 274 | for r in range(w_): 275 | if p not in inp_layer: 276 | continue 277 | new_inp += [r + w*(q + h*p)] 278 | inp_layer = new_inp 279 | old = [h_ * w_ * c_, n_] 280 | assert len(inp_layer) == l, \ 281 | 'Extract does not match input dimension' 282 | d['old'] = old 283 | yield ['extract', i] + old + [activation] + [inp_layer, out_layer] 284 | if activation != 'linear': yield [activation, i] 285 | l = len(out_layer) 286 | #----------------------------------------------------- 287 | elif d['type'] == '[route]': # add new layer here 288 | routes = d['layers'] 289 | if type(routes) is int: 290 | routes = [routes] 291 | else: 292 | routes = [int(x.strip()) for x in routes.split(',')] 293 | routes = [i + x if x < 0 else x for x in routes] 294 | for j, x in enumerate(routes): 295 | lx = layers[x]; 296 | xtype = lx['type'] 297 | _size = lx['_size'][:3] 298 | if j == 0: 299 | h, w, c = _size 300 | else: 301 | h_, w_, c_ = _size 302 | assert w_ == w and h_ == h, \ 303 | 'Routing incompatible conv sizes' 304 | c += c_ 305 | yield ['route', i, routes] 306 | l = w * h * c 307 | #----------------------------------------------------- 308 | elif d['type'] == '[reorg]': 309 | stride = d.get('stride', 1) 310 | yield ['reorg', i, stride] 311 | w = w // stride; h = h // stride; 312 | c = c * (stride ** 2) 313 | l = w * h * c 314 | #----------------------------------------------------- 315 | else: 316 | exit('Layer {} not implemented'.format(d['type'])) 317 | 318 | d['_size'] = list([h, w, c, l, flat]) 319 | 320 | if not flat: meta['out_size'] = [h, w, c] 321 | else: meta['out_size'] = l --------------------------------------------------------------------------------