├── .coveragerc ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── cfg ├── coco.names ├── extraction.cfg ├── extraction.conv.cfg ├── tiny-yolo-4c.cfg ├── tiny-yolo-voc.cfg ├── tiny-yolo.cfg ├── v1.1 │ ├── person-bottle.cfg │ ├── tiny-coco.cfg │ ├── tiny-yolo-4c.cfg │ ├── tiny-yolov1.cfg │ ├── yolo-coco.cfg │ └── yolov1.cfg ├── v1 │ ├── tiny-old.profile │ ├── tiny.profile │ ├── yolo-2c.cfg │ ├── yolo-4c.cfg │ ├── yolo-full.cfg │ ├── yolo-small.cfg │ ├── yolo-tiny-extract.cfg │ ├── yolo-tiny-extract_.cfg │ ├── yolo-tiny.cfg │ └── yolo-tiny4c.cfg ├── yolo-voc.cfg └── yolo.cfg ├── darkflow ├── cli.py ├── cython_utils │ ├── cy_yolo2_findboxes.pyx │ ├── cy_yolo_findboxes.pyx │ ├── nms.pxd │ └── nms.pyx ├── dark │ ├── connected.py │ ├── convolution.py │ ├── darknet.py │ ├── darkop.py │ └── layer.py ├── defaults.py ├── net │ ├── build.py │ ├── flow.py │ ├── framework.py │ ├── help.py │ ├── ops │ │ ├── __init__.py │ │ ├── baseop.py │ │ ├── convolution.py │ │ └── simple.py │ ├── vanilla │ │ ├── __init__.py │ │ └── train.py │ ├── yolo │ │ ├── __init__.py │ │ ├── data.py │ │ ├── misc.py │ │ ├── predict.py │ │ └── train.py │ └── yolov2 │ │ ├── __init__.py │ │ ├── data.py │ │ ├── predict.py │ │ └── train.py ├── utils │ ├── box.py │ ├── im_transform.py │ ├── loader.py │ ├── pascal_voc_clean_xml.py │ └── process.py └── version.py ├── demo.gif ├── flow ├── labels.txt ├── preview.png ├── sample_img ├── Thumbs.db ├── sample_computer.jpg ├── sample_dog.jpg ├── sample_eagle.jpg ├── sample_giraffe.jpg ├── sample_horses.jpg ├── sample_office.jpg ├── sample_person.jpg └── sample_scream.jpg ├── setup.py ├── test ├── requirements-testing.txt ├── test_darkflow.py └── training │ ├── annotations │ ├── 1.xml │ └── 2.xml │ └── images │ ├── 1.jpg │ └── 2.jpg └── vbb2voc.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = test/* -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Python bytecode 3 | *.pyc 4 | 5 | # Weight files 6 | bin/ 7 | 8 | # Sample image data 9 | sample_img/*.jpg 10 | !sample_img/sample_*.jpg 11 | sample_img/out/* 12 | 13 | # Annotated test results 14 | results/ 15 | 16 | # Intermediate training data 17 | backup/ 18 | tfnet/yolo/parse-history.txt 19 | tfnet/yolo/*.parsed 20 | *.txt 21 | !requirements*.txt 22 | *.pb 23 | /profile 24 | /test.py 25 | 26 | # Built cython files 27 | darkflow/cython_utils/*.pyd 28 | darkflow/cython_utils/*.c 29 | 30 | #egg-info 31 | darkflow.egg-info/ 32 | 33 | #Other build stuff 34 | build/ 35 | 36 | #TensorBoard logs 37 | summary/ 38 | 39 | #Built graphs 40 | built_graph/ 41 | 42 | #Training checkpoints 43 | ckpt/* 44 | 45 | #pytest cache 46 | .cache/ 47 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | sudo: required 3 | 4 | language: python 5 | python: 6 | - "3.6" 7 | 8 | cache: 9 | directories: 10 | - bin #cache .weights files 11 | 12 | # command to install dependencies 13 | install: 14 | - pip install -r test/requirements-testing.txt 15 | - pip install -e . 16 | 17 | # command to run tests 18 | script: pytest -x --cov=./ 19 | 20 | #Upload code coverage statistics 21 | after_success: 22 | - codecov -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Pedestrian Detection 2 | 3 | I have used Caltech dataset for pedestrian detection. This dataset consists of approximately 10 hours of 640x480 30Hz video. About 250,000 frames with a total of 350,000 bounding boxes and 2300 unique pedestrians were annotated. For more informaton you can refer to [this](http://www.vision.caltech.edu/Image_Datasets/CaltechPedestrians/). 4 | 5 | ### For converting into darkflow format 6 | 7 | The video files in caltech Pedestrian dataset are in .seq and the annotations are in .vbb format. Darkflow needs the images in jpg and annotations in .xml format. To convert the files we have used: 8 | 9 | vbb2voc.py: extract images with person bbox in seq file and convert vbb annotation file to xml files. PS: For Caltech pedestrian dataset, there are 4 kind of persons: person, person-fa, person?, people. In my case, I just need to use person type data. If you want to use other types, specify person_types with corresponding type list (like ['person', 'people']) in vbb_anno2dict function. 10 | 11 | ### Building the Model 12 | 13 | [![Build Status](https://travis-ci.org/thtrieu/darkflow.svg?branch=master)](https://travis-ci.org/thtrieu/darkflow) [![codecov](https://codecov.io/gh/thtrieu/darkflow/branch/master/graph/badge.svg)](https://codecov.io/gh/thtrieu/darkflow) 14 | 15 | Real-time object detection and classification. Paper: [version 1](https://arxiv.org/pdf/1506.02640.pdf), [version 2](https://arxiv.org/pdf/1612.08242.pdf). 16 | 17 | Read more about YOLO (in darknet) and download weight files [here](http://pjreddie.com/darknet/yolo/). In case the weight file cannot be found, I uploaded some of mine [here](https://drive.google.com/drive/folders/0B1tW_VtY7onidEwyQ2FtQVplWEU), which include `yolo-full` and `yolo-tiny` of v1.0, `tiny-yolo-v1.1` of v1.1 and `yolo`, `tiny-yolo-voc` of v2. 18 | 19 | 20 | See demo below or see on [this imgur](http://i.imgur.com/EyZZKAA.gif) 21 | 22 |

23 | 24 | ### Dependencies 25 | 26 | Python3, tensorflow 1.0, numpy, opencv 3. 27 | 28 | ### Getting started 29 | 30 | You can choose _one_ of the following three ways to get started with darkflow. 31 | 32 | 1. Just build the Cython extensions in place. NOTE: If installing this way you will have to use `./flow` in the cloned darkflow directory instead of `flow` as darkflow is not installed globally. 33 | ``` 34 | python3 setup.py build_ext --inplace 35 | ``` 36 | 37 | 2. Let pip install darkflow globally in dev mode (still globally accessible, but changes to the code immediately take effect) 38 | ``` 39 | pip install -e . 40 | ``` 41 | 42 | 3. Install with pip globally 43 | ``` 44 | pip install . 45 | ``` 46 | 47 | ### Update 48 | 49 | **Android demo on Tensorflow's** [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowYoloDetector.java) 50 | 51 | **I am looking for help:** 52 | - `help wanted` labels in issue track 53 | 54 | ## Parsing the annotations 55 | 56 | Skip this if you are not training or fine-tuning anything (you simply want to forward flow a trained net) 57 | 58 | For example, if you want to work with only 3 classes `tvmonitor`, `person`, `pottedplant`; edit `labels.txt` as follows 59 | 60 | ``` 61 | tvmonitor 62 | person 63 | pottedplant 64 | ``` 65 | 66 | And that's it. `darkflow` will take care of the rest. You can also set darkflow to load from a custom labels file with the `--labels` flag (i.e. `--labels myOtherLabelsFile.txt`). This can be helpful when working with multiple models with different sets of output labels. When this flag is not set, darkflow will load from `labels.txt` by default (unless you are using one of the recognized `.cfg` files designed for the COCO or VOC dataset - then the labels file will be ignored and the COCO or VOC labels will be loaded). 67 | 68 | ### Design the net 69 | 70 | Skip this if you are working with one of the original configurations since they are already there. Otherwise, see the following example: 71 | 72 | ```python 73 | ... 74 | 75 | [convolutional] 76 | batch_normalize = 1 77 | size = 3 78 | stride = 1 79 | pad = 1 80 | activation = leaky 81 | 82 | [maxpool] 83 | 84 | [connected] 85 | output = 4096 86 | activation = linear 87 | 88 | ... 89 | ``` 90 | 91 | ### Flowing the graph using `flow` 92 | 93 | ```bash 94 | # Have a look at its options 95 | flow --h 96 | ``` 97 | 98 | First, let's take a closer look at one of a very useful option `--load` 99 | 100 | ```bash 101 | # 1. Load tiny-yolo.weights 102 | flow --model cfg/tiny-yolo.cfg --load bin/tiny-yolo.weights 103 | 104 | # 2. To completely initialize a model, leave the --load option 105 | flow --model cfg/yolo-new.cfg 106 | 107 | # 3. It is useful to reuse the first identical layers of tiny for `yolo-new` 108 | flow --model cfg/yolo-new.cfg --load bin/tiny-yolo.weights 109 | # this will print out which layers are reused, which are initialized 110 | ``` 111 | 112 | All input images from default folder `sample_img/` are flowed through the net and predictions are put in `sample_img/out/`. We can always specify more parameters for such forward passes, such as detection threshold, batch size, images folder, etc. 113 | 114 | ```bash 115 | # Forward all images in sample_img/ using tiny yolo and 100% GPU usage 116 | flow --imgdir sample_img/ --model cfg/tiny-yolo.cfg --load bin/tiny-yolo.weights --gpu 1.0 117 | ``` 118 | json output can be generated with descriptions of the pixel location of each bounding box and the pixel location. Each prediction is stored in the `sample_img/out` folder by default. An example json array is shown below. 119 | ```bash 120 | # Forward all images in sample_img/ using tiny yolo and JSON output. 121 | flow --imgdir sample_img/ --model cfg/tiny-yolo.cfg --load bin/tiny-yolo.weights --json 122 | ``` 123 | JSON output: 124 | ```json 125 | [{"label":"person", "confidence": 0.56, "topleft": {"x": 184, "y": 101}, "bottomright": {"x": 274, "y": 382}}, 126 | {"label": "dog", "confidence": 0.32, "topleft": {"x": 71, "y": 263}, "bottomright": {"x": 193, "y": 353}}, 127 | {"label": "horse", "confidence": 0.76, "topleft": {"x": 412, "y": 109}, "bottomright": {"x": 592,"y": 337}}] 128 | ``` 129 | - label: self explanatory 130 | - confidence: somewhere between 0 and 1 (how confident yolo is about that detection) 131 | - topleft: pixel coordinate of top left corner of box. 132 | - bottomright: pixel coordinate of bottom right corner of box. 133 | 134 | ### Training new model 135 | 136 | Training is simple as you only have to add option `--train`. Training set and annotation will be parsed if this is the first time a new configuration is trained. To point to training set and annotations, use option `--dataset` and `--annotation`. A few examples: 137 | 138 | ```bash 139 | # Initialize yolo-new from yolo-tiny, then train the net on 100% GPU: 140 | flow --model cfg/yolo-new.cfg --load bin/tiny-yolo.weights --train --gpu 1.0 141 | 142 | # Completely initialize yolo-new and train it with ADAM optimizer 143 | flow --model cfg/yolo-new.cfg --train --trainer adam 144 | ``` 145 | 146 | During training, the script will occasionally save intermediate results into Tensorflow checkpoints, stored in `ckpt/`. To resume to any checkpoint before performing training/testing, use `--load [checkpoint_num]` option, if `checkpoint_num < 0`, `darkflow` will load the most recent save by parsing `ckpt/checkpoint`. 147 | 148 | ```bash 149 | # Resume the most recent checkpoint for training 150 | flow --train --model cfg/yolo-new.cfg --load -1 151 | 152 | # Test with checkpoint at step 1500 153 | flow --model cfg/yolo-new.cfg --load 1500 154 | 155 | # Fine tuning yolo-tiny from the original one 156 | flow --train --model cfg/tiny-yolo.cfg --load bin/tiny-yolo.weights 157 | ``` 158 | 159 | Example of training on Pascal VOC 2007: 160 | ```bash 161 | # Download the Pascal VOC dataset: 162 | curl -O https://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar 163 | tar xf VOCtest_06-Nov-2007.tar 164 | 165 | # An example of the Pascal VOC annotation format: 166 | vim VOCdevkit/VOC2007/Annotations/000001.xml 167 | 168 | # Train the net on the Pascal dataset: 169 | flow --model cfg/yolo-new.cfg --train --dataset "~/VOCdevkit/VOC2007/JPEGImages" --annotation "~/VOCdevkit/VOC2007/Annotations" 170 | ``` 171 | 172 | ### Training on your own dataset 173 | 174 | *The steps below assume we want to use tiny YOLO and our dataset has 3 classes* 175 | 176 | 1. Create a copy of the configuration file `tiny-yolo-voc.cfg` and rename it according to your preference `tiny-yolo-voc-3c.cfg` (It is crucial that you leave the original `tiny-yolo-voc.cfg` file unchanged, see below for explanation). 177 | 178 | 2. In `tiny-yolo-voc-3c.cfg`, change classes in the [region] layer (the last layer) to the number of classes you are going to train for. In our case, classes are set to 3. 179 | 180 | ```python 181 | ... 182 | 183 | [region] 184 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 185 | bias_match=1 186 | classes=3 187 | coords=4 188 | num=5 189 | softmax=1 190 | 191 | ... 192 | ``` 193 | 194 | 3. In `tiny-yolo-voc-3c.cfg`, change filters in the [convolutional] layer (the second to last layer) to num * (classes + 5). In our case, num is 5 and classes are 3 so 5 * (3 + 5) = 40 therefore filters are set to 40. 195 | 196 | ```python 197 | ... 198 | 199 | [convolutional] 200 | size=1 201 | stride=1 202 | pad=1 203 | filters=40 204 | activation=linear 205 | 206 | [region] 207 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 208 | 209 | ... 210 | ``` 211 | 212 | 4. Change `labels.txt` to include the label(s) you want to train on (number of labels should be the same as the number of classes you set in `tiny-yolo-voc-3c.cfg` file). In our case, `labels.txt` will contain 3 labels. 213 | 214 | ``` 215 | label1 216 | label2 217 | label3 218 | ``` 219 | 5. Reference the `tiny-yolo-voc-3c.cfg` model when you train. 220 | 221 | `flow --model cfg/tiny-yolo-voc-3c.cfg --load bin/tiny-yolo-voc.weights --train --annotation train/Annotations --dataset train/Images` 222 | 223 | 224 | * Why should I leave the original `tiny-yolo-voc.cfg` file unchanged? 225 | 226 | When darkflow sees you are loading `tiny-yolo-voc.weights` it will look for `tiny-yolo-voc.cfg` in your cfg/ folder and compare that configuration file to the new one you have set with `--model cfg/tiny-yolo-voc-3c.cfg`. In this case, every layer will have the same exact number of weights except for the last two, so it will load the weights into all layers up to the last two because they now contain different number of weights. 227 | 228 | 229 | ### Camera/video file demo 230 | 231 | For a demo that entirely runs on the CPU: 232 | 233 | ```bash 234 | flow --model cfg/yolo-new.cfg --load bin/yolo-new.weights --demo videofile.avi 235 | ``` 236 | 237 | For a demo that runs 100% on the GPU: 238 | 239 | ```bash 240 | flow --model cfg/yolo-new.cfg --load bin/yolo-new.weights --demo videofile.avi --gpu 1.0 241 | ``` 242 | 243 | To use your webcam/camera, simply replace `videofile.avi` with keyword `camera`. 244 | 245 | To save a video with predicted bounding box, add `--saveVideo` option. 246 | 247 | ### Using darkflow from another python application 248 | 249 | Please note that `return_predict(img)` must take an `numpy.ndarray`. Your image must be loaded beforehand and passed to `return_predict(img)`. Passing the file path won't work. 250 | 251 | Result from `return_predict(img)` will be a list of dictionaries representing each detected object's values in the same format as the JSON output listed above. 252 | 253 | ```python 254 | from darkflow.net.build import TFNet 255 | import cv2 256 | 257 | options = {"model": "cfg/yolo.cfg", "load": "bin/yolo.weights", "threshold": 0.1} 258 | 259 | tfnet = TFNet(options) 260 | 261 | imgcv = cv2.imread("./sample_img/sample_dog.jpg") 262 | result = tfnet.return_predict(imgcv) 263 | print(result) 264 | ``` 265 | 266 | 267 | ### Save the built graph to a protobuf file (`.pb`) 268 | 269 | ```bash 270 | ## Saving the lastest checkpoint to protobuf file 271 | flow --model cfg/yolo-new.cfg --load -1 --savepb 272 | 273 | ## Saving graph and weights to protobuf file 274 | flow --model cfg/yolo.cfg --load bin/yolo.weights --savepb 275 | ``` 276 | When saving the `.pb` file, a `.meta` file will also be generated alongside it. This `.meta` file is a JSON dump of everything in the `meta` dictionary that contains information nessecary for post-processing such as `anchors` and `labels`. This way, everything you need to make predictions from the graph and do post processing is contained in those two files - no need to have the `.cfg` or any labels file tagging along. 277 | 278 | The created `.pb` file can be used to migrate the graph to mobile devices (JAVA / C++ / Objective-C++). The name of input tensor and output tensor are respectively `'input'` and `'output'`. For further usage of this protobuf file, please refer to the official documentation of `Tensorflow` on C++ API [_here_](https://www.tensorflow.org/versions/r0.9/api_docs/cc/index.html). To run it on, say, iOS application, simply add the file to Bundle Resources and update the path to this file inside source code. 279 | 280 | Also, darkflow supports loading from a `.pb` and `.meta` file for generating predictions (instead of loading from a `.cfg` and checkpoint or `.weights`). 281 | ```bash 282 | ## Forward images in sample_img for predictions based on protobuf file 283 | flow --pbLoad built_graph/yolo.pb --metaLoad built_graph/yolo.meta --imgdir sample_img/ 284 | ``` 285 | If you'd like to load a `.pb` and `.meta` file when using `return_predict()` you can set the `"pbLoad"` and `"metaLoad"` options in place of the `"model"` and `"load"` options you would normally set. 286 | 287 | That's all. 288 | 289 | ### Credits 290 | 291 | Credit for this code goes to https://github.com/thtrieu and for vbb2voc.py goes to https://github.com/CasiaFan/Dataset_to_VOC_converter . 292 | -------------------------------------------------------------------------------- /cfg/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /cfg/extraction.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=224 5 | width=224 6 | max_crop=320 7 | channels=3 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | learning_rate=0.1 12 | policy=poly 13 | power=4 14 | max_batches=1600000 15 | 16 | [convolutional] 17 | batch_normalize=1 18 | filters=64 19 | size=7 20 | stride=2 21 | pad=1 22 | activation=leaky 23 | 24 | [maxpool] 25 | size=2 26 | stride=2 27 | 28 | [convolutional] 29 | batch_normalize=1 30 | filters=192 31 | size=3 32 | stride=1 33 | pad=1 34 | activation=leaky 35 | 36 | [maxpool] 37 | size=2 38 | stride=2 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=128 43 | size=1 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=256 51 | size=3 52 | stride=1 53 | pad=1 54 | activation=leaky 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=256 59 | size=1 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [convolutional] 65 | batch_normalize=1 66 | filters=512 67 | size=3 68 | stride=1 69 | pad=1 70 | activation=leaky 71 | 72 | [maxpool] 73 | size=2 74 | stride=2 75 | 76 | [convolutional] 77 | batch_normalize=1 78 | filters=256 79 | size=1 80 | stride=1 81 | pad=1 82 | activation=leaky 83 | 84 | [convolutional] 85 | batch_normalize=1 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=256 95 | size=1 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | filters=512 103 | size=3 104 | stride=1 105 | pad=1 106 | activation=leaky 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | batch_normalize=1 126 | filters=256 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | batch_normalize=1 134 | filters=512 135 | size=3 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [convolutional] 141 | batch_normalize=1 142 | filters=512 143 | size=1 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [convolutional] 149 | batch_normalize=1 150 | filters=1024 151 | size=3 152 | stride=1 153 | pad=1 154 | activation=leaky 155 | 156 | [maxpool] 157 | size=2 158 | stride=2 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=512 163 | size=1 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | batch_normalize=1 170 | filters=1024 171 | size=3 172 | stride=1 173 | pad=1 174 | activation=leaky 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=512 179 | size=1 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | [convolutional] 185 | batch_normalize=1 186 | filters=1024 187 | size=3 188 | stride=1 189 | pad=1 190 | activation=leaky 191 | 192 | [convolutional] 193 | filters=1000 194 | size=1 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [avgpool] 200 | 201 | [softmax] 202 | groups=1 203 | 204 | [cost] 205 | type=sse 206 | 207 | -------------------------------------------------------------------------------- /cfg/extraction.conv.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=256 5 | width=256 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.5 11 | policy=poly 12 | power=6 13 | max_batches=500000 14 | 15 | [convolutional] 16 | filters=64 17 | size=7 18 | stride=2 19 | pad=1 20 | activation=leaky 21 | 22 | [maxpool] 23 | size=2 24 | stride=2 25 | 26 | [convolutional] 27 | filters=192 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | filters=128 39 | size=1 40 | stride=1 41 | pad=1 42 | activation=leaky 43 | 44 | [convolutional] 45 | filters=256 46 | size=3 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | filters=256 53 | size=1 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [convolutional] 59 | filters=512 60 | size=3 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [maxpool] 66 | size=2 67 | stride=2 68 | 69 | [convolutional] 70 | filters=256 71 | size=1 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [convolutional] 77 | filters=512 78 | size=3 79 | stride=1 80 | pad=1 81 | activation=leaky 82 | 83 | [convolutional] 84 | filters=256 85 | size=1 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [convolutional] 91 | filters=512 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=256 99 | size=1 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [convolutional] 112 | filters=256 113 | size=1 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [convolutional] 119 | filters=512 120 | size=3 121 | stride=1 122 | pad=1 123 | activation=leaky 124 | 125 | [convolutional] 126 | filters=512 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | filters=1024 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [maxpool] 140 | size=2 141 | stride=2 142 | 143 | [convolutional] 144 | filters=512 145 | size=1 146 | stride=1 147 | pad=1 148 | activation=leaky 149 | 150 | [convolutional] 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | filters=512 159 | size=1 160 | stride=1 161 | pad=1 162 | activation=leaky 163 | 164 | [convolutional] 165 | filters=1024 166 | size=3 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [avgpool] 172 | 173 | [connected] 174 | output=1000 175 | activation=leaky 176 | 177 | [softmax] 178 | groups=1 179 | 180 | -------------------------------------------------------------------------------- /cfg/tiny-yolo-4c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 40100 16 | policy=steps 17 | steps=-1,100,20000,30000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=45 115 | activation=linear 116 | 117 | [region] 118 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 119 | bias_match=1 120 | classes=4 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh=.6 134 | random=1 135 | -------------------------------------------------------------------------------- /cfg/tiny-yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 40100 16 | policy=steps 17 | steps=-1,100,20000,30000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=125 115 | activation=linear 116 | 117 | [region] 118 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 119 | bias_match=1 120 | classes=20 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh = .5 134 | random=1 135 | -------------------------------------------------------------------------------- /cfg/tiny-yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 120000 16 | policy=steps 17 | steps=-1,100,80000,100000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=425 115 | activation=linear 116 | 117 | [region] 118 | anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741 119 | bias_match=1 120 | classes=80 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh = .6 134 | random=1 135 | -------------------------------------------------------------------------------- /cfg/v1.1/person-bottle.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | saturation=.75 11 | exposure=.75 12 | hue = .1 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,20000,30000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 40000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [select] 109 | old_output=1470 110 | keep=4,14/20 111 | bins=49 112 | output=588 113 | activation=linear 114 | 115 | [detection] 116 | classes=2 117 | coords=4 118 | rescore=1 119 | side=7 120 | num=2 121 | softmax=0 122 | sqrt=1 123 | jitter=.2 124 | 125 | object_scale=1 126 | noobject_scale=.5 127 | class_scale=1 128 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1.1/tiny-coco.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | hue = .1 11 | saturation=.75 12 | exposure=.75 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,100000,150000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 200000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [connected] 109 | output= 4655 110 | activation=linear 111 | 112 | [detection] 113 | classes=80 114 | coords=4 115 | rescore=1 116 | side=7 117 | num=3 118 | softmax=0 119 | sqrt=1 120 | jitter=.2 121 | 122 | object_scale=1 123 | noobject_scale=.5 124 | class_scale=1 125 | coord_scale=5 126 | -------------------------------------------------------------------------------- /cfg/v1.1/tiny-yolo-4c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | saturation=.75 11 | exposure=.75 12 | hue = .1 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,20000,30000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 40000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [select] 109 | old_output=1470 110 | keep=8,14,15,19/20 111 | bins=49 112 | output=686 113 | activation=linear 114 | 115 | [detection] 116 | classes=4 117 | coords=4 118 | rescore=1 119 | side=7 120 | num=2 121 | softmax=0 122 | sqrt=1 123 | jitter=.2 124 | 125 | object_scale=1 126 | noobject_scale=.5 127 | class_scale=1 128 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1.1/tiny-yolov1.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | saturation=.75 11 | exposure=.75 12 | hue = .1 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,20000,30000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 40000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [connected] 109 | output= 1470 110 | activation=linear 111 | 112 | [detection] 113 | classes=20 114 | coords=4 115 | rescore=1 116 | side=7 117 | num=2 118 | softmax=0 119 | sqrt=1 120 | jitter=.2 121 | 122 | object_scale=1 123 | noobject_scale=.5 124 | class_scale=1 125 | coord_scale=5 126 | 127 | -------------------------------------------------------------------------------- /cfg/v1.1/yolo-coco.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=4 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | hue = .1 11 | saturation=.75 12 | exposure=.75 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,100000,150000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 200000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=64 23 | size=7 24 | stride=2 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=192 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=128 47 | size=1 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=256 63 | size=1 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=512 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=256 83 | size=1 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [convolutional] 89 | batch_normalize=1 90 | filters=512 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=leaky 95 | 96 | [convolutional] 97 | batch_normalize=1 98 | filters=256 99 | size=1 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | batch_normalize=1 106 | filters=512 107 | size=3 108 | stride=1 109 | pad=1 110 | activation=leaky 111 | 112 | [convolutional] 113 | batch_normalize=1 114 | filters=256 115 | size=1 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | batch_normalize=1 122 | filters=512 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [convolutional] 129 | batch_normalize=1 130 | filters=256 131 | size=1 132 | stride=1 133 | pad=1 134 | activation=leaky 135 | 136 | [convolutional] 137 | batch_normalize=1 138 | filters=512 139 | size=3 140 | stride=1 141 | pad=1 142 | activation=leaky 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=512 147 | size=1 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=1024 155 | size=3 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [maxpool] 161 | size=2 162 | stride=2 163 | 164 | [convolutional] 165 | batch_normalize=1 166 | filters=512 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | [convolutional] 173 | batch_normalize=1 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | [convolutional] 181 | batch_normalize=1 182 | filters=512 183 | size=1 184 | stride=1 185 | pad=1 186 | activation=leaky 187 | 188 | [convolutional] 189 | batch_normalize=1 190 | filters=1024 191 | size=3 192 | stride=1 193 | pad=1 194 | activation=leaky 195 | 196 | ####### 197 | 198 | [convolutional] 199 | batch_normalize=1 200 | size=3 201 | stride=1 202 | pad=1 203 | filters=1024 204 | activation=leaky 205 | 206 | [convolutional] 207 | batch_normalize=1 208 | size=3 209 | stride=2 210 | pad=1 211 | filters=1024 212 | activation=leaky 213 | 214 | [convolutional] 215 | batch_normalize=1 216 | size=3 217 | stride=1 218 | pad=1 219 | filters=1024 220 | activation=leaky 221 | 222 | [convolutional] 223 | batch_normalize=1 224 | size=3 225 | stride=1 226 | pad=1 227 | filters=1024 228 | activation=leaky 229 | 230 | [local] 231 | size=3 232 | stride=1 233 | pad=1 234 | filters=256 235 | activation=leaky 236 | 237 | [connected] 238 | output= 4655 239 | activation=linear 240 | 241 | [detection] 242 | classes=80 243 | coords=4 244 | rescore=1 245 | side=7 246 | num=3 247 | softmax=0 248 | sqrt=1 249 | jitter=.2 250 | 251 | object_scale=1 252 | noobject_scale=.5 253 | class_scale=1 254 | coord_scale=5 255 | 256 | -------------------------------------------------------------------------------- /cfg/v1.1/yolov1.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | saturation=1.5 10 | exposure=1.5 11 | hue=.1 12 | 13 | learning_rate=0.0005 14 | policy=steps 15 | steps=200,400,600,20000,30000 16 | scales=2.5,2,2,.1,.1 17 | max_batches = 40000 18 | 19 | [convolutional] 20 | batch_normalize=1 21 | filters=64 22 | size=7 23 | stride=2 24 | pad=1 25 | activation=leaky 26 | 27 | [maxpool] 28 | size=2 29 | stride=2 30 | 31 | [convolutional] 32 | batch_normalize=1 33 | filters=192 34 | size=3 35 | stride=1 36 | pad=1 37 | activation=leaky 38 | 39 | [maxpool] 40 | size=2 41 | stride=2 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=128 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=256 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=512 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | batch_normalize=1 81 | filters=256 82 | size=1 83 | stride=1 84 | pad=1 85 | activation=leaky 86 | 87 | [convolutional] 88 | batch_normalize=1 89 | filters=512 90 | size=3 91 | stride=1 92 | pad=1 93 | activation=leaky 94 | 95 | [convolutional] 96 | batch_normalize=1 97 | filters=256 98 | size=1 99 | stride=1 100 | pad=1 101 | activation=leaky 102 | 103 | [convolutional] 104 | batch_normalize=1 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [convolutional] 112 | batch_normalize=1 113 | filters=256 114 | size=1 115 | stride=1 116 | pad=1 117 | activation=leaky 118 | 119 | [convolutional] 120 | batch_normalize=1 121 | filters=512 122 | size=3 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | batch_normalize=1 129 | filters=256 130 | size=1 131 | stride=1 132 | pad=1 133 | activation=leaky 134 | 135 | [convolutional] 136 | batch_normalize=1 137 | filters=512 138 | size=3 139 | stride=1 140 | pad=1 141 | activation=leaky 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=512 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=1024 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [maxpool] 160 | size=2 161 | stride=2 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=512 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=1024 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [convolutional] 180 | batch_normalize=1 181 | filters=512 182 | size=1 183 | stride=1 184 | pad=1 185 | activation=leaky 186 | 187 | [convolutional] 188 | batch_normalize=1 189 | filters=1024 190 | size=3 191 | stride=1 192 | pad=1 193 | activation=leaky 194 | 195 | ####### 196 | 197 | [convolutional] 198 | batch_normalize=1 199 | size=3 200 | stride=1 201 | pad=1 202 | filters=1024 203 | activation=leaky 204 | 205 | [convolutional] 206 | batch_normalize=1 207 | size=3 208 | stride=2 209 | pad=1 210 | filters=1024 211 | activation=leaky 212 | 213 | [convolutional] 214 | batch_normalize=1 215 | size=3 216 | stride=1 217 | pad=1 218 | filters=1024 219 | activation=leaky 220 | 221 | [convolutional] 222 | batch_normalize=1 223 | size=3 224 | stride=1 225 | pad=1 226 | filters=1024 227 | activation=leaky 228 | 229 | [local] 230 | size=3 231 | stride=1 232 | pad=1 233 | filters=256 234 | activation=leaky 235 | 236 | [dropout] 237 | probability=.5 238 | 239 | [connected] 240 | output= 1715 241 | activation=linear 242 | 243 | [detection] 244 | classes=20 245 | coords=4 246 | rescore=1 247 | side=7 248 | num=3 249 | softmax=0 250 | sqrt=1 251 | jitter=.2 252 | 253 | object_scale=1 254 | noobject_scale=.5 255 | class_scale=1 256 | coord_scale=5 257 | 258 | -------------------------------------------------------------------------------- /cfg/v1/tiny-old.profile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/cfg/v1/tiny-old.profile -------------------------------------------------------------------------------- /cfg/v1/tiny.profile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/cfg/v1/tiny.profile -------------------------------------------------------------------------------- /cfg/v1/yolo-2c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=16 26 | size=3 27 | stride=1 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=32 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=64 48 | size=3 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [maxpool] 54 | size=2 55 | stride=2 56 | 57 | [convolutional] 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | filters=256 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=leaky 85 | 86 | [maxpool] 87 | size=2 88 | stride=2 89 | 90 | [convolutional] 91 | filters=1024 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=1024 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [connected] 112 | output=256 113 | activation=linear 114 | 115 | [connected] 116 | output=4096 117 | activation=leaky 118 | 119 | [dropout] 120 | probability=.5 121 | 122 | [select] 123 | old_output=1470 124 | keep=14,19/20 125 | bins=49 126 | output=588 127 | activation=linear 128 | 129 | [detection] 130 | classes=2 131 | coords=4 132 | rescore=1 133 | side=7 134 | num=2 135 | softmax=0 136 | sqrt=1 137 | jitter=.2 138 | object_scale=1 139 | noobject_scale=.5 140 | class_scale=1 141 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-4c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.001 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=4096 212 | activation=leaky 213 | 214 | [dropout] 215 | probability=.5 216 | 217 | [select] 218 | old_output=1470 219 | keep=8,14,15,19/20 220 | bins=49 221 | output=686 222 | activation=linear 223 | 224 | [detection] 225 | classes=4 226 | coords=4 227 | rescore=1 228 | side=7 229 | num=2 230 | softmax=0 231 | sqrt=1 232 | jitter=.2 233 | 234 | object_scale=1 235 | noobject_scale=.5 236 | class_scale=1 237 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-full.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.001 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=4096 212 | activation=leaky 213 | 214 | [dropout] 215 | probability=.5 216 | 217 | [connected] 218 | output= 1470 219 | activation=linear 220 | 221 | [detection] 222 | classes=20 223 | coords=4 224 | rescore=1 225 | side=7 226 | num=2 227 | softmax=0 228 | sqrt=1 229 | jitter=.2 230 | 231 | object_scale=1 232 | noobject_scale=.5 233 | class_scale=1 234 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-small.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.001 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=512 212 | activation=leaky 213 | 214 | [connected] 215 | output=4096 216 | activation=leaky 217 | 218 | [dropout] 219 | probability=.5 220 | 221 | [connected] 222 | output= 1470 223 | activation=linear 224 | 225 | [detection] 226 | classes=20 227 | coords=4 228 | rescore=1 229 | side=7 230 | num=2 231 | softmax=0 232 | sqrt=1 233 | jitter=.2 234 | 235 | object_scale=1 236 | noobject_scale=.5 237 | class_scale=1 238 | coord_scale=5 239 | 240 | -------------------------------------------------------------------------------- /cfg/v1/yolo-tiny-extract.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [conv-extract] 25 | profile=cfg/v1/tiny.profile 26 | input=-1 27 | output=0 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [maxpool] 35 | size=2 36 | stride=2 37 | 38 | [conv-extract] 39 | profile=cfg/v1/tiny.profile 40 | input=0 41 | output=1 42 | filters=32 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [maxpool] 49 | size=2 50 | stride=2 51 | 52 | [conv-extract] 53 | profile=cfg/v1/tiny.profile 54 | input=1 55 | output=2 56 | filters=64 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | [maxpool] 63 | size=2 64 | stride=2 65 | 66 | [conv-extract] 67 | profile=cfg/v1/tiny.profile 68 | input=2 69 | output=3 70 | filters=128 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [conv-extract] 81 | profile=cfg/v1/tiny.profile 82 | input=3 83 | output=4 84 | filters=256 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [maxpool] 91 | size=2 92 | stride=2 93 | 94 | [conv-extract] 95 | profile=cfg/v1/tiny.profile 96 | input=4 97 | output=5 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [maxpool] 105 | size=2 106 | stride=2 107 | 108 | [conv-extract] 109 | profile=cfg/v1/tiny.profile 110 | input=5 111 | output=6 112 | filters=1024 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [conv-extract] 119 | profile=cfg/v1/tiny.profile 120 | input=6 121 | output=7 122 | filters=1024 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [conv-extract] 129 | profile=cfg/v1/tiny.profile 130 | input=7 131 | output=8 132 | filters=1024 133 | size=3 134 | stride=1 135 | pad=1 136 | activation=leaky 137 | 138 | [extract] 139 | profile=cfg/v1/tiny.profile 140 | input=8 141 | output=9 142 | old=7,7,1024,256 143 | activation=linear 144 | 145 | [extract] 146 | profile=cfg/v1/tiny.profile 147 | input=9 148 | output=10 149 | old=256,4096 150 | activation=leaky 151 | 152 | [dropout] 153 | probability=1. 154 | 155 | [select] 156 | input=cfg/v1/tiny.profile,10 157 | old_output=1470 158 | keep=8,14,15,19/20 159 | bins=49 160 | output=686 161 | activation=linear 162 | 163 | [detection] 164 | classes=4 165 | coords=4 166 | rescore=1 167 | side=7 168 | num=2 169 | softmax=0 170 | sqrt=1 171 | jitter=.2 172 | object_scale=1 173 | noobject_scale=.5 174 | class_scale=1 175 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-tiny-extract_.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [conv-extract] 25 | profile=cfg/v1/tiny-old.profile 26 | input=-1 27 | output=0 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [maxpool] 35 | size=2 36 | stride=2 37 | 38 | [conv-extract] 39 | profile=cfg/v1/tiny-old.profile 40 | input=0 41 | output=1 42 | filters=32 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [maxpool] 49 | size=2 50 | stride=2 51 | 52 | [conv-extract] 53 | profile=cfg/v1/tiny-old.profile 54 | input=1 55 | output=2 56 | filters=64 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | [maxpool] 63 | size=2 64 | stride=2 65 | 66 | [conv-extract] 67 | profile=cfg/v1/tiny-old.profile 68 | input=2 69 | output=3 70 | filters=128 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [conv-extract] 81 | profile=cfg/v1/tiny-old.profile 82 | input=3 83 | output=4 84 | filters=256 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [maxpool] 91 | size=2 92 | stride=2 93 | 94 | [conv-extract] 95 | profile=cfg/v1/tiny-old.profile 96 | input=4 97 | output=5 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [maxpool] 105 | size=2 106 | stride=2 107 | 108 | [conv-extract] 109 | profile=cfg/v1/tiny-old.profile 110 | input=5 111 | output=6 112 | filters=1024 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [conv-extract] 119 | profile=cfg/v1/tiny-old.profile 120 | input=6 121 | output=7 122 | filters=1024 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [conv-extract] 129 | profile=cfg/v1/tiny-old.profile 130 | input=7 131 | output=8 132 | filters=1024 133 | size=3 134 | stride=1 135 | pad=1 136 | activation=leaky 137 | 138 | [extract] 139 | profile=cfg/v1/tiny-old.profile 140 | input=8 141 | output=9 142 | old=7,7,1024,256 143 | activation=linear 144 | 145 | [extract] 146 | profile=cfg/v1/tiny-old.profile 147 | input=9 148 | output=10 149 | old=256,4096 150 | activation=leaky 151 | 152 | [dropout] 153 | probability=1. 154 | 155 | [select] 156 | input=cfg/v1/tiny-old.profile,10 157 | old_output=1470 158 | keep=8,14,15,19/20 159 | bins=49 160 | output=686 161 | activation=linear 162 | 163 | [detection] 164 | classes=4 165 | coords=4 166 | rescore=1 167 | side=7 168 | num=2 169 | softmax=0 170 | sqrt=1 171 | jitter=.2 172 | object_scale=2.5 173 | noobject_scale=2 174 | class_scale=2.5 175 | coord_scale=5 176 | 177 | save=11250 -------------------------------------------------------------------------------- /cfg/v1/yolo-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=16 26 | size=3 27 | stride=1 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=32 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=64 48 | size=3 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [maxpool] 54 | size=2 55 | stride=2 56 | 57 | [convolutional] 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | filters=256 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=leaky 85 | 86 | [maxpool] 87 | size=2 88 | stride=2 89 | 90 | [convolutional] 91 | filters=1024 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=1024 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [connected] 112 | output=256 113 | activation=linear 114 | 115 | [connected] 116 | output=4096 117 | activation=leaky 118 | 119 | [dropout] 120 | probability=.5 121 | 122 | [connected] 123 | output= 1470 124 | activation=linear 125 | 126 | [detection] 127 | classes=20 128 | coords=4 129 | rescore=1 130 | side=7 131 | num=2 132 | softmax=0 133 | sqrt=1 134 | jitter=.2 135 | object_scale=1 136 | noobject_scale=.5 137 | class_scale=1 138 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-tiny4c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=16 26 | size=3 27 | stride=1 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=32 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=64 48 | size=3 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [maxpool] 54 | size=2 55 | stride=2 56 | 57 | [convolutional] 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | filters=256 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=leaky 85 | 86 | [maxpool] 87 | size=2 88 | stride=2 89 | 90 | [convolutional] 91 | filters=1024 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=1024 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [connected] 112 | output=256 113 | activation=linear 114 | 115 | [connected] 116 | output=4096 117 | activation=leaky 118 | 119 | [dropout] 120 | probability=.5 121 | 122 | [select] 123 | old_output=1470 124 | keep=8,14,15,19/20 125 | bins=49 126 | output=686 127 | activation=linear 128 | 129 | [detection] 130 | classes=4 131 | coords=4 132 | rescore=1 133 | side=7 134 | num=2 135 | softmax=0 136 | sqrt=1 137 | jitter=.2 138 | object_scale=1 139 | noobject_scale=.5 140 | class_scale=1 141 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | height=416 5 | width=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.0001 15 | max_batches = 45000 16 | policy=steps 17 | steps=100,25000,35000 18 | scales=10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=32 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=64 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=128 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=64 55 | size=1 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=128 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [maxpool] 69 | size=2 70 | stride=2 71 | 72 | [convolutional] 73 | batch_normalize=1 74 | filters=256 75 | size=3 76 | stride=1 77 | pad=1 78 | activation=leaky 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=128 83 | size=1 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [convolutional] 89 | batch_normalize=1 90 | filters=256 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=leaky 95 | 96 | [maxpool] 97 | size=2 98 | stride=2 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | filters=512 103 | size=3 104 | stride=1 105 | pad=1 106 | activation=leaky 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | batch_normalize=1 126 | filters=256 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | batch_normalize=1 134 | filters=512 135 | size=3 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [maxpool] 141 | size=2 142 | stride=2 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=1024 147 | size=3 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=512 155 | size=1 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=1024 163 | size=3 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | batch_normalize=1 170 | filters=512 171 | size=1 172 | stride=1 173 | pad=1 174 | activation=leaky 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=1024 179 | size=3 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | 185 | ####### 186 | 187 | [convolutional] 188 | batch_normalize=1 189 | size=3 190 | stride=1 191 | pad=1 192 | filters=1024 193 | activation=leaky 194 | 195 | [convolutional] 196 | batch_normalize=1 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [route] 204 | layers=-9 205 | 206 | [reorg] 207 | stride=2 208 | 209 | [route] 210 | layers=-1,-3 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | size=3 215 | stride=1 216 | pad=1 217 | filters=1024 218 | activation=leaky 219 | 220 | [convolutional] 221 | size=1 222 | stride=1 223 | pad=1 224 | filters=125 225 | activation=linear 226 | 227 | [region] 228 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 229 | bias_match=1 230 | classes=20 231 | coords=4 232 | num=5 233 | softmax=1 234 | jitter=.2 235 | rescore=1 236 | 237 | object_scale=5 238 | noobject_scale=1 239 | class_scale=1 240 | coord_scale=1 241 | 242 | absolute=1 243 | thresh = .6 244 | random=0 245 | -------------------------------------------------------------------------------- /cfg/yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=425 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 243 | bias_match=1 244 | classes=80 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .1 258 | random=1 259 | -------------------------------------------------------------------------------- /darkflow/cli.py: -------------------------------------------------------------------------------- 1 | from .defaults import argHandler #Import the default arguments 2 | import os 3 | from .net.build import TFNet 4 | 5 | def cliHandler(args): 6 | FLAGS = argHandler() 7 | FLAGS.setDefaults() 8 | FLAGS.parseArgs(args) 9 | 10 | # make sure all necessary dirs exist 11 | def _get_dir(dirs): 12 | for d in dirs: 13 | this = os.path.abspath(os.path.join(os.path.curdir, d)) 14 | if not os.path.exists(this): os.makedirs(this) 15 | 16 | requiredDirectories = [FLAGS.imgdir, FLAGS.binary, FLAGS.backup, os.path.join(FLAGS.imgdir,'out')] 17 | if FLAGS.summary: 18 | requiredDirectories.append(FLAGS.summary) 19 | 20 | _get_dir(requiredDirectories) 21 | 22 | # fix FLAGS.load to appropriate type 23 | try: FLAGS.load = int(FLAGS.load) 24 | except: pass 25 | 26 | tfnet = TFNet(FLAGS) 27 | 28 | if FLAGS.demo: 29 | tfnet.camera() 30 | exit('Demo stopped, exit.') 31 | 32 | if FLAGS.train: 33 | print('Enter training ...'); tfnet.train() 34 | if not FLAGS.savepb: 35 | exit('Training finished, exit.') 36 | 37 | if FLAGS.savepb: 38 | print('Rebuild a constant version ...') 39 | tfnet.savepb(); exit('Done') 40 | 41 | tfnet.predict() 42 | -------------------------------------------------------------------------------- /darkflow/cython_utils/cy_yolo2_findboxes.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | ctypedef np.float_t DTYPE_t 5 | from libc.math cimport exp 6 | from ..utils.box import BoundBox 7 | from nms cimport NMS 8 | 9 | #expit 10 | @cython.boundscheck(False) # turn off bounds-checking for entire function 11 | @cython.wraparound(False) # turn off negative index wrapping for entire function 12 | @cython.cdivision(True) 13 | cdef float expit_c(float x): 14 | cdef float y= 1/(1+exp(-x)) 15 | return y 16 | 17 | #MAX 18 | @cython.boundscheck(False) # turn off bounds-checking for entire function 19 | @cython.wraparound(False) # turn off negative index wrapping for entire function 20 | @cython.cdivision(True) 21 | cdef float max_c(float a, float b): 22 | if(a>b): 23 | return a 24 | return b 25 | 26 | """ 27 | #SOFTMAX! 28 | @cython.cdivision(True) 29 | @cython.boundscheck(False) # turn off bounds-checking for entire function 30 | @cython.wraparound(False) # turn off negative index wrapping for entire function 31 | cdef void _softmax_c(float* x, int classes): 32 | cdef: 33 | float sum = 0 34 | np.intp_t k 35 | float arr_max = 0 36 | for k in range(classes): 37 | arr_max = max(arr_max,x[k]) 38 | 39 | for k in range(classes): 40 | x[k] = exp(x[k]-arr_max) 41 | sum += x[k] 42 | 43 | for k in range(classes): 44 | x[k] = x[k]/sum 45 | """ 46 | 47 | 48 | 49 | #BOX CONSTRUCTOR 50 | @cython.cdivision(True) 51 | @cython.boundscheck(False) # turn off bounds-checking for entire function 52 | @cython.wraparound(False) # turn off negative index wrapping for entire function 53 | def box_constructor(meta,np.ndarray[float,ndim=3] net_out_in): 54 | cdef: 55 | np.intp_t H, W, _, C, B, row, col, box_loop, class_loop 56 | np.intp_t row1, col1, box_loop1,index,index2 57 | float threshold = meta['thresh'] 58 | float tempc,arr_max=0,sum=0 59 | double[:] anchors = np.asarray(meta['anchors']) 60 | list boxes = list() 61 | 62 | H, W, _ = meta['out_size'] 63 | C = meta['classes'] 64 | B = meta['num'] 65 | 66 | cdef: 67 | float[:, :, :, ::1] net_out = net_out_in.reshape([H, W, B, net_out_in.shape[2]/B]) 68 | float[:, :, :, ::1] Classes = net_out[:, :, :, 5:] 69 | float[:, :, :, ::1] Bbox_pred = net_out[:, :, :, :5] 70 | float[:, :, :, ::1] probs = np.zeros((H, W, B, C), dtype=np.float32) 71 | 72 | for row in range(H): 73 | for col in range(W): 74 | for box_loop in range(B): 75 | arr_max=0 76 | sum=0; 77 | Bbox_pred[row, col, box_loop, 4] = expit_c(Bbox_pred[row, col, box_loop, 4]) 78 | Bbox_pred[row, col, box_loop, 0] = (col + expit_c(Bbox_pred[row, col, box_loop, 0])) / W 79 | Bbox_pred[row, col, box_loop, 1] = (row + expit_c(Bbox_pred[row, col, box_loop, 1])) / H 80 | Bbox_pred[row, col, box_loop, 2] = exp(Bbox_pred[row, col, box_loop, 2]) * anchors[2 * box_loop + 0] / W 81 | Bbox_pred[row, col, box_loop, 3] = exp(Bbox_pred[row, col, box_loop, 3]) * anchors[2 * box_loop + 1] / H 82 | #SOFTMAX BLOCK, no more pointer juggling 83 | for class_loop in range(C): 84 | arr_max=max_c(arr_max,Classes[row,col,box_loop,class_loop]) 85 | 86 | for class_loop in range(C): 87 | Classes[row,col,box_loop,class_loop]=exp(Classes[row,col,box_loop,class_loop]-arr_max) 88 | sum+=Classes[row,col,box_loop,class_loop] 89 | 90 | for class_loop in range(C): 91 | tempc = Classes[row, col, box_loop, class_loop] * Bbox_pred[row, col, box_loop, 4]/sum 92 | if(tempc > threshold): 93 | probs[row, col, box_loop, class_loop] = tempc 94 | 95 | 96 | #NMS 97 | return NMS(np.ascontiguousarray(probs).reshape(H*W*B,C), np.ascontiguousarray(Bbox_pred).reshape(H*B*W,5)) 98 | -------------------------------------------------------------------------------- /darkflow/cython_utils/cy_yolo_findboxes.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | ctypedef np.float_t DTYPE_t 5 | from libc.math cimport exp 6 | from ..utils.box import BoundBox 7 | from nms cimport NMS 8 | 9 | 10 | 11 | @cython.cdivision(True) 12 | @cython.boundscheck(False) # turn off bounds-checking for entire function 13 | @cython.wraparound(False) # turn off negative index wrapping for entire function 14 | def yolo_box_constructor(meta,np.ndarray[float] net_out, float threshold): 15 | 16 | cdef: 17 | float sqrt 18 | int C,B,S 19 | int SS,prob_size,conf_size 20 | int grid, b 21 | int class_loop 22 | 23 | 24 | sqrt = meta['sqrt'] + 1 25 | C, B, S = meta['classes'], meta['num'], meta['side'] 26 | boxes = [] 27 | SS = S * S # number of grid cells 28 | prob_size = SS * C # class probabilities 29 | conf_size = SS * B # confidences for each grid cell 30 | 31 | cdef: 32 | float [:,::1] probs = np.ascontiguousarray(net_out[0 : prob_size]).reshape([SS,C]) 33 | float [:,::1] confs = np.ascontiguousarray(net_out[prob_size : (prob_size + conf_size)]).reshape([SS,B]) 34 | float [: , : ,::1] coords = np.ascontiguousarray(net_out[(prob_size + conf_size) : ]).reshape([SS, B, 4]) 35 | float [:,:,::1] final_probs = np.zeros([SS,B,C],dtype=np.float32) 36 | 37 | 38 | for grid in range(SS): 39 | for b in range(B): 40 | coords[grid, b, 0] = (coords[grid, b, 0] + grid % S) / S 41 | coords[grid, b, 1] = (coords[grid, b, 1] + grid // S) / S 42 | coords[grid, b, 2] = coords[grid, b, 2] ** sqrt 43 | coords[grid, b, 3] = coords[grid, b, 3] ** sqrt 44 | for class_loop in range(C): 45 | probs[grid, class_loop] = probs[grid, class_loop] * confs[grid, b] 46 | #print("PROBS",probs[grid,class_loop]) 47 | if(probs[grid,class_loop] > threshold ): 48 | final_probs[grid, b, class_loop] = probs[grid, class_loop] 49 | 50 | 51 | return NMS(np.ascontiguousarray(final_probs).reshape(SS*B, C) , np.ascontiguousarray(coords).reshape(SS*B, 4)) 52 | -------------------------------------------------------------------------------- /darkflow/cython_utils/nms.pxd: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | ctypedef np.float_t DTYPE_t 5 | from libc.math cimport exp 6 | from utils.box import BoundBox 7 | 8 | 9 | cdef NMS(float[:, ::1] , float[:, ::1] ) 10 | 11 | 12 | -------------------------------------------------------------------------------- /darkflow/cython_utils/nms.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | from libc.math cimport exp 5 | from ..utils.box import BoundBox 6 | 7 | 8 | 9 | #OVERLAP 10 | @cython.boundscheck(False) # turn off bounds-checking for entire function 11 | @cython.wraparound(False) # turn off negative index wrapping for entire function 12 | @cython.cdivision(True) 13 | cdef float overlap_c(float x1, float w1 , float x2 , float w2): 14 | cdef: 15 | float l1,l2,left,right 16 | l1 = x1 - w1 /2. 17 | l2 = x2 - w2 /2. 18 | left = max(l1,l2) 19 | r1 = x1 + w1 /2. 20 | r2 = x2 + w2 /2. 21 | right = min(r1, r2) 22 | return right - left; 23 | 24 | #BOX INTERSECTION 25 | @cython.boundscheck(False) # turn off bounds-checking for entire function 26 | @cython.wraparound(False) # turn off negative index wrapping for entire function 27 | @cython.cdivision(True) 28 | cdef float box_intersection_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh): 29 | cdef: 30 | float w,h,area 31 | w = overlap_c(ax, aw, bx, bw) 32 | h = overlap_c(ay, ah, by, bh) 33 | if w < 0 or h < 0: return 0 34 | area = w * h 35 | return area 36 | 37 | #BOX UNION 38 | @cython.boundscheck(False) # turn off bounds-checking for entire function 39 | @cython.wraparound(False) # turn off negative index wrapping for entire function 40 | @cython.cdivision(True) 41 | cdef float box_union_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh): 42 | cdef: 43 | float i,u 44 | i = box_intersection_c(ax, ay, aw, ah, bx, by, bw, bh) 45 | u = aw * ah + bw * bh -i 46 | return u 47 | 48 | 49 | #BOX IOU 50 | @cython.boundscheck(False) # turn off bounds-checking for entire function 51 | @cython.wraparound(False) # turn off negative index wrapping for entire function 52 | @cython.cdivision(True) 53 | cdef float box_iou_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh): 54 | return box_intersection_c(ax, ay, aw, ah, bx, by, bw, bh) / box_union_c(ax, ay, aw, ah, bx, by, bw, bh); 55 | 56 | 57 | 58 | 59 | #NMS 60 | @cython.boundscheck(False) # turn off bounds-checking for entire function 61 | @cython.wraparound(False) # turn off negative index wrapping for entire function 62 | @cython.cdivision(True) 63 | cdef NMS(float[:, ::1] final_probs , float[:, ::1] final_bbox): 64 | cdef list boxes = list() 65 | cdef set indices = set() 66 | cdef: 67 | np.intp_t pred_length,class_length,class_loop,index,index2 68 | 69 | 70 | pred_length = final_bbox.shape[0] 71 | class_length = final_probs.shape[1] 72 | for class_loop in range(class_length): 73 | for index in range(pred_length): 74 | if final_probs[index,class_loop] == 0: continue 75 | for index2 in range(index+1,pred_length): 76 | if final_probs[index2,class_loop] == 0: continue 77 | if index==index2 : continue 78 | if box_iou_c(final_bbox[index,0],final_bbox[index,1],final_bbox[index,2],final_bbox[index,3],final_bbox[index2,0],final_bbox[index2,1],final_bbox[index2,2],final_bbox[index2,3]) >= 0.4: 79 | if final_probs[index2,class_loop] > final_probs[index, class_loop] : 80 | final_probs[index, class_loop] =0 81 | break 82 | final_probs[index2,class_loop]=0 83 | 84 | if index not in indices: 85 | bb=BoundBox(class_length) 86 | bb.x = final_bbox[index, 0] 87 | bb.y = final_bbox[index, 1] 88 | bb.w = final_bbox[index, 2] 89 | bb.h = final_bbox[index, 3] 90 | bb.c = final_bbox[index, 4] 91 | bb.probs = np.asarray(final_probs[index,:]) 92 | boxes.append(bb) 93 | indices.add(index) 94 | return boxes 95 | 96 | # cdef NMS(float[:, ::1] final_probs , float[:, ::1] final_bbox): 97 | # cdef list boxes = list() 98 | # cdef: 99 | # np.intp_t pred_length,class_length,class_loop,index,index2, i, j 100 | 101 | 102 | # pred_length = final_bbox.shape[0] 103 | # class_length = final_probs.shape[1] 104 | 105 | # for class_loop in range(class_length): 106 | # order = np.argsort(final_probs[:,class_loop])[::-1] 107 | # # First box 108 | # for i in range(pred_length): 109 | # index = order[i] 110 | # if final_probs[index, class_loop] == 0.: 111 | # continue 112 | # # Second box 113 | # for j in range(i+1, pred_length): 114 | # index2 = order[j] 115 | # if box_iou_c( 116 | # final_bbox[index,0],final_bbox[index,1], 117 | # final_bbox[index,2],final_bbox[index,3], 118 | # final_bbox[index2,0],final_bbox[index2,1], 119 | # final_bbox[index2,2],final_bbox[index2,3]) >= 0.4: 120 | # final_probs[index2, class_loop] = 0. 121 | 122 | # bb = BoundBox(class_length) 123 | # bb.x = final_bbox[index, 0] 124 | # bb.y = final_bbox[index, 1] 125 | # bb.w = final_bbox[index, 2] 126 | # bb.h = final_bbox[index, 3] 127 | # bb.c = final_bbox[index, 4] 128 | # bb.probs = np.asarray(final_probs[index,:]) 129 | # boxes.append(bb) 130 | 131 | # return boxes 132 | -------------------------------------------------------------------------------- /darkflow/dark/connected.py: -------------------------------------------------------------------------------- 1 | from .layer import Layer 2 | import numpy as np 3 | 4 | class extract_layer(Layer): 5 | def setup(self, old_inp, old_out, 6 | activation, inp, out): 7 | if inp is None: inp = range(old_inp) 8 | self.activation = activation 9 | self.old_inp = old_inp 10 | self.old_out = old_out 11 | self.inp = inp 12 | self.out = out 13 | self.wshape = { 14 | 'biases': [len(self.out)], 15 | 'weights': [len(self.inp), len(self.out)] 16 | } 17 | 18 | @property 19 | def signature(self): 20 | sig = ['connected'] 21 | sig += self._signature[1:-2] 22 | return sig 23 | 24 | def present(self): 25 | args = self.signature 26 | self.presenter = connected_layer(*args) 27 | 28 | def recollect(self, val): 29 | w = val['weights'] 30 | b = val['biases'] 31 | if w is None: self.w = val; return 32 | w = np.take(w, self.inp, 0) 33 | w = np.take(w, self.out, 1) 34 | b = np.take(b, self.out) 35 | assert1 = w.shape == tuple(self.wshape['weights']) 36 | assert2 = b.shape == tuple(self.wshape['biases']) 37 | assert assert1 and assert2, \ 38 | 'Dimension does not match in {} recollect'.format( 39 | self._signature) 40 | 41 | self.w['weights'] = w 42 | self.w['biases'] = b 43 | 44 | 45 | 46 | class select_layer(Layer): 47 | def setup(self, inp, old, 48 | activation, inp_idx, 49 | out, keep, train): 50 | self.old = old 51 | self.keep = keep 52 | self.train = train 53 | self.inp_idx = inp_idx 54 | self.activation = activation 55 | inp_dim = inp 56 | if inp_idx is not None: 57 | inp_dim = len(inp_idx) 58 | self.inp = inp_dim 59 | self.out = out 60 | self.wshape = { 61 | 'biases': [out], 62 | 'weights': [inp_dim, out] 63 | } 64 | 65 | @property 66 | def signature(self): 67 | sig = ['connected'] 68 | sig += self._signature[1:-4] 69 | return sig 70 | 71 | def present(self): 72 | args = self.signature 73 | self.presenter = connected_layer(*args) 74 | 75 | def recollect(self, val): 76 | w = val['weights'] 77 | b = val['biases'] 78 | if w is None: self.w = val; return 79 | if self.inp_idx is not None: 80 | w = np.take(w, self.inp_idx, 0) 81 | 82 | keep_b = np.take(b, self.keep) 83 | keep_w = np.take(w, self.keep, 1) 84 | train_b = b[self.train:] 85 | train_w = w[:, self.train:] 86 | self.w['biases'] = np.concatenate( 87 | (keep_b, train_b), axis = 0) 88 | self.w['weights'] = np.concatenate( 89 | (keep_w, train_w), axis = 1) 90 | 91 | 92 | class connected_layer(Layer): 93 | def setup(self, input_size, 94 | output_size, activation): 95 | self.activation = activation 96 | self.inp = input_size 97 | self.out = output_size 98 | self.wshape = { 99 | 'biases': [self.out], 100 | 'weights': [self.inp, self.out] 101 | } 102 | 103 | def finalize(self, transpose): 104 | weights = self.w['weights'] 105 | if weights is None: return 106 | shp = self.wshape['weights'] 107 | if not transpose: 108 | weights = weights.reshape(shp[::-1]) 109 | weights = weights.transpose([1,0]) 110 | else: weights = weights.reshape(shp) 111 | self.w['weights'] = weights -------------------------------------------------------------------------------- /darkflow/dark/convolution.py: -------------------------------------------------------------------------------- 1 | from .layer import Layer 2 | import numpy as np 3 | 4 | class local_layer(Layer): 5 | def setup(self, ksize, c, n, stride, 6 | pad, w_, h_, activation): 7 | self.pad = pad * int(ksize / 2) 8 | self.activation = activation 9 | self.stride = stride 10 | self.ksize = ksize 11 | self.h_out = h_ 12 | self.w_out = w_ 13 | 14 | self.dnshape = [h_ * w_, n, c, ksize, ksize] 15 | self.wshape = dict({ 16 | 'biases': [h_ * w_ * n], 17 | 'kernels': [h_ * w_, ksize, ksize, c, n] 18 | }) 19 | 20 | def finalize(self, _): 21 | weights = self.w['kernels'] 22 | if weights is None: return 23 | weights = weights.reshape(self.dnshape) 24 | weights = weights.transpose([0,3,4,2,1]) 25 | self.w['kernels'] = weights 26 | 27 | class conv_extract_layer(Layer): 28 | def setup(self, ksize, c, n, stride, 29 | pad, batch_norm, activation, 30 | inp, out): 31 | if inp is None: inp = range(c) 32 | self.activation = activation 33 | self.batch_norm = batch_norm 34 | self.stride = stride 35 | self.ksize = ksize 36 | self.pad = pad 37 | self.inp = inp 38 | self.out = out 39 | self.wshape = dict({ 40 | 'biases': [len(out)], 41 | 'kernel': [ksize, ksize, len(inp), len(out)] 42 | }) 43 | 44 | @property 45 | def signature(self): 46 | sig = ['convolutional'] 47 | sig += self._signature[1:-2] 48 | return sig 49 | 50 | def present(self): 51 | args = self.signature 52 | self.presenter = convolutional_layer(*args) 53 | 54 | def recollect(self, w): 55 | if w is None: 56 | self.w = w 57 | return 58 | k = w['kernel'] 59 | b = w['biases'] 60 | k = np.take(k, self.inp, 2) 61 | k = np.take(k, self.out, 3) 62 | b = np.take(b, self.out) 63 | assert1 = k.shape == tuple(self.wshape['kernel']) 64 | assert2 = b.shape == tuple(self.wshape['biases']) 65 | assert assert1 and assert2, \ 66 | 'Dimension not matching in {} recollect'.format( 67 | self._signature) 68 | self.w['kernel'] = k 69 | self.w['biases'] = b 70 | 71 | 72 | class conv_select_layer(Layer): 73 | def setup(self, ksize, c, n, stride, 74 | pad, batch_norm, activation, 75 | keep_idx, real_n): 76 | self.batch_norm = bool(batch_norm) 77 | self.activation = activation 78 | self.keep_idx = keep_idx 79 | self.stride = stride 80 | self.ksize = ksize 81 | self.pad = pad 82 | self.wshape = dict({ 83 | 'biases': [real_n], 84 | 'kernel': [ksize, ksize, c, real_n] 85 | }) 86 | if self.batch_norm: 87 | self.wshape.update({ 88 | 'moving_variance' : [real_n], 89 | 'moving_mean': [real_n], 90 | 'gamma' : [real_n] 91 | }) 92 | self.h['is_training'] = { 93 | 'shape': (), 94 | 'feed': True, 95 | 'dfault': False 96 | } 97 | 98 | @property 99 | def signature(self): 100 | sig = ['convolutional'] 101 | sig += self._signature[1:-2] 102 | return sig 103 | 104 | def present(self): 105 | args = self.signature 106 | self.presenter = convolutional_layer(*args) 107 | 108 | def recollect(self, w): 109 | if w is None: 110 | self.w = w 111 | return 112 | idx = self.keep_idx 113 | k = w['kernel'] 114 | b = w['biases'] 115 | self.w['kernel'] = np.take(k, idx, 3) 116 | self.w['biases'] = np.take(b, idx) 117 | if self.batch_norm: 118 | m = w['moving_mean'] 119 | v = w['moving_variance'] 120 | g = w['gamma'] 121 | self.w['moving_mean'] = np.take(m, idx) 122 | self.w['moving_variance'] = np.take(v, idx) 123 | self.w['gamma'] = np.take(g, idx) 124 | 125 | class convolutional_layer(Layer): 126 | def setup(self, ksize, c, n, stride, 127 | pad, batch_norm, activation): 128 | self.batch_norm = bool(batch_norm) 129 | self.activation = activation 130 | self.stride = stride 131 | self.ksize = ksize 132 | self.pad = pad 133 | self.dnshape = [n, c, ksize, ksize] # darknet shape 134 | self.wshape = dict({ 135 | 'biases': [n], 136 | 'kernel': [ksize, ksize, c, n] 137 | }) 138 | if self.batch_norm: 139 | self.wshape.update({ 140 | 'moving_variance' : [n], 141 | 'moving_mean': [n], 142 | 'gamma' : [n] 143 | }) 144 | self.h['is_training'] = { 145 | 'feed': True, 146 | 'dfault': False, 147 | 'shape': () 148 | } 149 | 150 | def finalize(self, _): 151 | """deal with darknet""" 152 | kernel = self.w['kernel'] 153 | if kernel is None: return 154 | kernel = kernel.reshape(self.dnshape) 155 | kernel = kernel.transpose([2,3,1,0]) 156 | self.w['kernel'] = kernel -------------------------------------------------------------------------------- /darkflow/dark/darknet.py: -------------------------------------------------------------------------------- 1 | from ..utils.process import cfg_yielder 2 | from .darkop import create_darkop 3 | from ..utils import loader 4 | import warnings 5 | import time 6 | import os 7 | 8 | class Darknet(object): 9 | 10 | _EXT = '.weights' 11 | 12 | def __init__(self, FLAGS): 13 | self.get_weight_src(FLAGS) 14 | self.modify = False 15 | 16 | print('Parsing {}'.format(self.src_cfg)) 17 | src_parsed = self.parse_cfg(self.src_cfg, FLAGS) 18 | self.src_meta, self.src_layers = src_parsed 19 | 20 | if self.src_cfg == FLAGS.model: 21 | self.meta, self.layers = src_parsed 22 | else: 23 | print('Parsing {}'.format(FLAGS.model)) 24 | des_parsed = self.parse_cfg(FLAGS.model, FLAGS) 25 | self.meta, self.layers = des_parsed 26 | 27 | self.load_weights() 28 | 29 | def get_weight_src(self, FLAGS): 30 | """ 31 | analyse FLAGS.load to know where is the 32 | source binary and what is its config. 33 | can be: None, FLAGS.model, or some other 34 | """ 35 | self.src_bin = FLAGS.model + self._EXT 36 | self.src_bin = FLAGS.binary + self.src_bin 37 | self.src_bin = os.path.abspath(self.src_bin) 38 | exist = os.path.isfile(self.src_bin) 39 | 40 | if FLAGS.load == str(): FLAGS.load = int() 41 | if type(FLAGS.load) is int: 42 | self.src_cfg = FLAGS.model 43 | if FLAGS.load: self.src_bin = None 44 | elif not exist: self.src_bin = None 45 | else: 46 | assert os.path.isfile(FLAGS.load), \ 47 | '{} not found'.format(FLAGS.load) 48 | self.src_bin = FLAGS.load 49 | name = loader.model_name(FLAGS.load) 50 | cfg_path = os.path.join(FLAGS.config, name + '.cfg') 51 | if not os.path.isfile(cfg_path): 52 | warnings.warn( 53 | '{} not found, use {} instead'.format( 54 | cfg_path, FLAGS.model)) 55 | cfg_path = FLAGS.model 56 | self.src_cfg = cfg_path 57 | FLAGS.load = int() 58 | 59 | 60 | def parse_cfg(self, model, FLAGS): 61 | """ 62 | return a list of `layers` objects (darkop.py) 63 | given path to binaries/ and configs/ 64 | """ 65 | args = [model, FLAGS.binary] 66 | cfg_layers = cfg_yielder(*args) 67 | meta = dict(); layers = list() 68 | for i, info in enumerate(cfg_layers): 69 | if i == 0: meta = info; continue 70 | else: new = create_darkop(*info) 71 | layers.append(new) 72 | return meta, layers 73 | 74 | def load_weights(self): 75 | """ 76 | Use `layers` and Loader to load .weights file 77 | """ 78 | print('Loading {} ...'.format(self.src_bin)) 79 | start = time.time() 80 | 81 | args = [self.src_bin, self.src_layers] 82 | wgts_loader = loader.create_loader(*args) 83 | for layer in self.layers: layer.load(wgts_loader) 84 | 85 | stop = time.time() 86 | print('Finished in {}s'.format(stop - start)) -------------------------------------------------------------------------------- /darkflow/dark/darkop.py: -------------------------------------------------------------------------------- 1 | from .layer import Layer 2 | from .convolution import * 3 | from .connected import * 4 | 5 | class avgpool_layer(Layer): 6 | pass 7 | 8 | class crop_layer(Layer): 9 | pass 10 | 11 | class maxpool_layer(Layer): 12 | def setup(self, ksize, stride, pad): 13 | self.stride = stride 14 | self.ksize = ksize 15 | self.pad = pad 16 | 17 | class softmax_layer(Layer): 18 | def setup(self, groups): 19 | self.groups = groups 20 | 21 | class dropout_layer(Layer): 22 | def setup(self, p): 23 | self.h['pdrop'] = dict({ 24 | 'feed': p, # for training 25 | 'dfault': 1.0, # for testing 26 | 'shape': () 27 | }) 28 | 29 | class route_layer(Layer): 30 | def setup(self, routes): 31 | self.routes = routes 32 | 33 | class reorg_layer(Layer): 34 | def setup(self, stride): 35 | self.stride = stride 36 | 37 | """ 38 | Darkop Factory 39 | """ 40 | 41 | darkops = { 42 | 'dropout': dropout_layer, 43 | 'connected': connected_layer, 44 | 'maxpool': maxpool_layer, 45 | 'convolutional': convolutional_layer, 46 | 'avgpool': avgpool_layer, 47 | 'softmax': softmax_layer, 48 | 'crop': crop_layer, 49 | 'local': local_layer, 50 | 'select': select_layer, 51 | 'route': route_layer, 52 | 'reorg': reorg_layer, 53 | 'conv-select': conv_select_layer, 54 | 'conv-extract': conv_extract_layer, 55 | 'extract': extract_layer 56 | } 57 | 58 | def create_darkop(ltype, num, *args): 59 | op_class = darkops.get(ltype, Layer) 60 | return op_class(ltype, num, *args) -------------------------------------------------------------------------------- /darkflow/dark/layer.py: -------------------------------------------------------------------------------- 1 | from ..utils import loader 2 | import numpy as np 3 | 4 | class Layer(object): 5 | 6 | def __init__(self, *args): 7 | self._signature = list(args) 8 | self.type = list(args)[0] 9 | self.number = list(args)[1] 10 | 11 | self.w = dict() # weights 12 | self.h = dict() # placeholders 13 | self.wshape = dict() # weight shape 14 | self.wsize = dict() # weight size 15 | self.setup(*args[2:]) # set attr up 16 | self.present() 17 | for var in self.wshape: 18 | shp = self.wshape[var] 19 | size = np.prod(shp) 20 | self.wsize[var] = size 21 | 22 | def load(self, src_loader): 23 | var_lay = src_loader.VAR_LAYER 24 | if self.type not in var_lay: return 25 | 26 | src_type = type(src_loader) 27 | if src_type is loader.weights_loader: 28 | wdict = self.load_weights(src_loader) 29 | else: 30 | wdict = self.load_ckpt(src_loader) 31 | if wdict is not None: 32 | self.recollect(wdict) 33 | 34 | def load_weights(self, src_loader): 35 | val = src_loader([self.presenter]) 36 | if val is None: return None 37 | else: return val.w 38 | 39 | def load_ckpt(self, src_loader): 40 | result = dict() 41 | presenter = self.presenter 42 | for var in presenter.wshape: 43 | name = presenter.varsig(var) 44 | shape = presenter.wshape[var] 45 | key = [name, shape] 46 | val = src_loader(key) 47 | result[var] = val 48 | return result 49 | 50 | @property 51 | def signature(self): 52 | return self._signature 53 | 54 | # For comparing two layers 55 | def __eq__(self, other): 56 | return self.signature == other.signature 57 | def __ne__(self, other): 58 | return not self.__eq__(other) 59 | 60 | def varsig(self, var): 61 | if var not in self.wshape: 62 | return None 63 | sig = str(self.number) 64 | sig += '-' + self.type 65 | sig += '/' + var 66 | return sig 67 | 68 | def recollect(self, w): self.w = w 69 | def present(self): self.presenter = self 70 | def setup(self, *args): pass 71 | def finalize(self): pass -------------------------------------------------------------------------------- /darkflow/defaults.py: -------------------------------------------------------------------------------- 1 | class argHandler(dict): 2 | #A super duper fancy custom made CLI argument handler!! 3 | __getattr__ = dict.get 4 | __setattr__ = dict.__setitem__ 5 | __delattr__ = dict.__delitem__ 6 | _descriptions = {'help, --h, -h': 'show this super helpful message and exit'} 7 | 8 | def setDefaults(self): 9 | self.define('imgdir', './sample_img/', 'path to testing directory with images') 10 | self.define('binary', './bin/', 'path to .weights directory') 11 | self.define('config', './cfg/', 'path to .cfg directory') 12 | self.define('dataset', '../pascal/VOCdevkit/IMG/', 'path to dataset directory') 13 | self.define('labels', 'labels.txt', 'path to labels file') 14 | self.define('backup', './ckpt/', 'path to backup folder') 15 | self.define('summary', '', 'path to TensorBoard summaries directory') 16 | self.define('annotation', '../pascal/VOCdevkit/ANN/', 'path to annotation directory') 17 | self.define('threshold', -0.1, 'detection threshold') 18 | self.define('model', '', 'configuration of choice') 19 | self.define('trainer', 'rmsprop', 'training algorithm') 20 | self.define('momentum', 0.0, 'applicable for rmsprop and momentum optimizers') 21 | self.define('verbalise', True, 'say out loud while building graph') 22 | self.define('train', False, 'train the whole net') 23 | self.define('load', '', 'how to initialize the net? Either from .weights or a checkpoint, or even from scratch') 24 | self.define('savepb', False, 'save net and weight to a .pb file') 25 | self.define('gpu', 0.0, 'how much gpu (from 0.0 to 1.0)') 26 | self.define('gpuName', '/gpu:0', 'GPU device name') 27 | self.define('lr', 1e-5, 'learning rate') 28 | self.define('keep',20,'Number of most recent training results to save') 29 | self.define('batch', 16, 'batch size') 30 | self.define('epoch', 1000, 'number of epoch') 31 | self.define('save', 2000, 'save checkpoint every ? training examples') 32 | self.define('demo', '', 'demo on webcam') 33 | self.define('queue', 1, 'process demo in batch') 34 | self.define('json', False, 'Outputs bounding box information in json format.') 35 | self.define('saveVideo', False, 'Records video from input video or camera') 36 | self.define('pbLoad', '', 'path to .pb protobuf file (metaLoad must also be specified)') 37 | self.define('metaLoad', '', 'path to .meta file generated during --savepb that corresponds to .pb file') 38 | 39 | def define(self, argName, default, description): 40 | self[argName] = default 41 | self._descriptions[argName] = description 42 | 43 | def help(self): 44 | print('Example usage: flow --imgdir sample_img/ --model cfg/yolo.cfg --load bin/yolo.weights') 45 | print('') 46 | print('Arguments:') 47 | spacing = max([len(i) for i in self._descriptions.keys()]) + 2 48 | for item in self._descriptions: 49 | currentSpacing = spacing - len(item) 50 | print(' --' + item + (' ' * currentSpacing) + self._descriptions[item]) 51 | print('') 52 | exit() 53 | 54 | def parseArgs(self, args): 55 | print('') 56 | i = 1 57 | while i < len(args): 58 | if args[i] == '-h' or args[i] == '--h' or args[i] == '--help': 59 | self.help() #Time for some self help! :) 60 | if len(args[i]) < 2: 61 | print('ERROR - Invalid argument: ' + args[i]) 62 | print('Try running flow --help') 63 | exit() 64 | argumentName = args[i][2:] 65 | if isinstance(self.get(argumentName), bool): 66 | if not (i + 1) >= len(args) and (args[i + 1].lower() != 'false' and args[i + 1].lower() != 'true') and not args[i + 1].startswith('--'): 67 | print('ERROR - Expected boolean value (or no value) following argument: ' + args[i]) 68 | print('Try running flow --help') 69 | exit() 70 | elif not (i + 1) >= len(args) and (args[i + 1].lower() == 'false' or args[i + 1].lower() == 'true'): 71 | self[argumentName] = (args[i + 1].lower() == 'true') 72 | i += 1 73 | else: 74 | self[argumentName] = True 75 | elif args[i].startswith('--') and not (i + 1) >= len(args) and not args[i + 1].startswith('--') and argumentName in self: 76 | if isinstance(self[argumentName], float): 77 | try: 78 | args[i + 1] = float(args[i + 1]) 79 | except: 80 | print('ERROR - Expected float for argument: ' + args[i]) 81 | print('Try running flow --help') 82 | exit() 83 | elif isinstance(self[argumentName], int): 84 | try: 85 | args[i + 1] = int(args[i + 1]) 86 | except: 87 | print('ERROR - Expected int for argument: ' + args[i]) 88 | print('Try running flow --help') 89 | exit() 90 | self[argumentName] = args[i + 1] 91 | i += 1 92 | else: 93 | print('ERROR - Invalid argument: ' + args[i]) 94 | print('Try running flow --help') 95 | exit() 96 | i += 1 97 | -------------------------------------------------------------------------------- /darkflow/net/build.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import time 3 | from . import help 4 | from . import flow 5 | from .ops import op_create, identity 6 | from .ops import HEADER, LINE 7 | from .framework import create_framework 8 | from ..dark.darknet import Darknet 9 | import json 10 | import os 11 | 12 | class TFNet(object): 13 | 14 | _TRAINER = dict({ 15 | 'rmsprop': tf.train.RMSPropOptimizer, 16 | 'adadelta': tf.train.AdadeltaOptimizer, 17 | 'adagrad': tf.train.AdagradOptimizer, 18 | 'adagradDA': tf.train.AdagradDAOptimizer, 19 | 'momentum': tf.train.MomentumOptimizer, 20 | 'adam': tf.train.AdamOptimizer, 21 | 'ftrl': tf.train.FtrlOptimizer, 22 | 'sgd': tf.train.GradientDescentOptimizer 23 | }) 24 | 25 | # imported methods 26 | _get_fps = help._get_fps 27 | say = help.say 28 | train = flow.train 29 | camera = help.camera 30 | predict = flow.predict 31 | return_predict = flow.return_predict 32 | to_darknet = help.to_darknet 33 | build_train_op = help.build_train_op 34 | load_from_ckpt = help.load_from_ckpt 35 | 36 | def __init__(self, FLAGS, darknet = None): 37 | self.ntrain = 0 38 | 39 | if isinstance(FLAGS, dict): 40 | from ..defaults import argHandler 41 | newFLAGS = argHandler() 42 | newFLAGS.setDefaults() 43 | newFLAGS.update(FLAGS) 44 | FLAGS = newFLAGS 45 | 46 | self.FLAGS = FLAGS 47 | if self.FLAGS.pbLoad and self.FLAGS.metaLoad: 48 | self.say('\nLoading from .pb and .meta') 49 | self.graph = tf.Graph() 50 | device_name = FLAGS.gpuName \ 51 | if FLAGS.gpu > 0.0 else None 52 | with tf.device(device_name): 53 | with self.graph.as_default() as g: 54 | self.build_from_pb() 55 | return 56 | 57 | if darknet is None: 58 | darknet = Darknet(FLAGS) 59 | self.ntrain = len(darknet.layers) 60 | 61 | self.darknet = darknet 62 | args = [darknet.meta, FLAGS] 63 | self.num_layer = len(darknet.layers) 64 | self.framework = create_framework(*args) 65 | 66 | self.meta = darknet.meta 67 | 68 | self.say('\nBuilding net ...') 69 | start = time.time() 70 | self.graph = tf.Graph() 71 | device_name = FLAGS.gpuName \ 72 | if FLAGS.gpu > 0.0 else None 73 | with tf.device(device_name): 74 | with self.graph.as_default() as g: 75 | self.build_forward() 76 | self.setup_meta_ops() 77 | self.say('Finished in {}s\n'.format( 78 | time.time() - start)) 79 | 80 | def build_from_pb(self): 81 | with tf.gfile.FastGFile(self.FLAGS.pbLoad, "rb") as f: 82 | graph_def = tf.GraphDef() 83 | graph_def.ParseFromString(f.read()) 84 | 85 | tf.import_graph_def( 86 | graph_def, 87 | name="" 88 | ) 89 | with open(self.FLAGS.metaLoad, 'r') as fp: 90 | self.meta = json.load(fp) 91 | self.framework = create_framework(self.meta, self.FLAGS) 92 | 93 | # Placeholders 94 | self.inp = tf.get_default_graph().get_tensor_by_name('input:0') 95 | self.feed = dict() # other placeholders 96 | self.out = tf.get_default_graph().get_tensor_by_name('output:0') 97 | 98 | self.setup_meta_ops() 99 | 100 | def build_forward(self): 101 | verbalise = self.FLAGS.verbalise 102 | 103 | # Placeholders 104 | inp_size = [None] + self.meta['inp_size'] 105 | self.inp = tf.placeholder(tf.float32, inp_size, 'input') 106 | self.feed = dict() # other placeholders 107 | 108 | # Build the forward pass 109 | state = identity(self.inp) 110 | roof = self.num_layer - self.ntrain 111 | self.say(HEADER, LINE) 112 | for i, layer in enumerate(self.darknet.layers): 113 | scope = '{}-{}'.format(str(i),layer.type) 114 | args = [layer, state, i, roof, self.feed] 115 | state = op_create(*args) 116 | mess = state.verbalise() 117 | self.say(mess) 118 | self.say(LINE) 119 | 120 | self.top = state 121 | self.out = tf.identity(state.out, name='output') 122 | 123 | def setup_meta_ops(self): 124 | cfg = dict({ 125 | 'allow_soft_placement': False, 126 | 'log_device_placement': False 127 | }) 128 | 129 | utility = min(self.FLAGS.gpu, 1.) 130 | if utility > 0.0: 131 | self.say('GPU mode with {} usage'.format(utility)) 132 | cfg['gpu_options'] = tf.GPUOptions( 133 | per_process_gpu_memory_fraction = utility) 134 | cfg['allow_soft_placement'] = True 135 | else: 136 | self.say('Running entirely on CPU') 137 | cfg['device_count'] = {'GPU': 0} 138 | 139 | if self.FLAGS.train: self.build_train_op() 140 | 141 | if self.FLAGS.summary: 142 | self.summary_op = tf.summary.merge_all() 143 | self.writer = tf.summary.FileWriter(self.FLAGS.summary + 'train') 144 | 145 | self.sess = tf.Session(config = tf.ConfigProto(**cfg)) 146 | self.sess.run(tf.global_variables_initializer()) 147 | 148 | if not self.ntrain: return 149 | self.saver = tf.train.Saver(tf.global_variables(), 150 | max_to_keep = self.FLAGS.keep) 151 | if self.FLAGS.load != 0: self.load_from_ckpt() 152 | 153 | if self.FLAGS.summary: 154 | self.writer.add_graph(self.sess.graph) 155 | 156 | def savepb(self): 157 | """ 158 | Create a standalone const graph def that 159 | C++ can load and run. 160 | """ 161 | darknet_pb = self.to_darknet() 162 | flags_pb = self.FLAGS 163 | flags_pb.verbalise = False 164 | 165 | flags_pb.train = False 166 | # rebuild another tfnet. all const. 167 | tfnet_pb = TFNet(flags_pb, darknet_pb) 168 | tfnet_pb.sess = tf.Session(graph = tfnet_pb.graph) 169 | # tfnet_pb.predict() # uncomment for unit testing 170 | name = 'built_graph/{}.pb'.format(self.meta['name']) 171 | os.makedirs(os.path.dirname(name), exist_ok=True) 172 | #Save dump of everything in meta 173 | with open('built_graph/{}.meta'.format(self.meta['name']), 'w') as fp: 174 | json.dump(self.meta, fp) 175 | self.say('Saving const graph def to {}'.format(name)) 176 | graph_def = tfnet_pb.sess.graph_def 177 | tf.train.write_graph(graph_def,'./', name, False) -------------------------------------------------------------------------------- /darkflow/net/flow.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | import tensorflow as tf 5 | import pickle 6 | from multiprocessing.pool import ThreadPool 7 | 8 | train_stats = ( 9 | 'Training statistics: \n' 10 | '\tLearning rate : {}\n' 11 | '\tBatch size : {}\n' 12 | '\tEpoch number : {}\n' 13 | '\tBackup every : {}' 14 | ) 15 | pool = ThreadPool() 16 | 17 | def _save_ckpt(self, step, loss_profile): 18 | file = '{}-{}{}' 19 | model = self.meta['name'] 20 | 21 | profile = file.format(model, step, '.profile') 22 | profile = os.path.join(self.FLAGS.backup, profile) 23 | with open(profile, 'wb') as profile_ckpt: 24 | pickle.dump(loss_profile, profile_ckpt) 25 | 26 | ckpt = file.format(model, step, '') 27 | ckpt = os.path.join(self.FLAGS.backup, ckpt) 28 | self.say('Checkpoint at step {}'.format(step)) 29 | self.saver.save(self.sess, ckpt) 30 | 31 | 32 | def train(self): 33 | loss_ph = self.framework.placeholders 34 | loss_mva = None; profile = list() 35 | 36 | batches = self.framework.shuffle() 37 | loss_op = self.framework.loss 38 | 39 | for i, (x_batch, datum) in enumerate(batches): 40 | if not i: self.say(train_stats.format( 41 | self.FLAGS.lr, self.FLAGS.batch, 42 | self.FLAGS.epoch, self.FLAGS.save 43 | )) 44 | 45 | feed_dict = { 46 | loss_ph[key]: datum[key] 47 | for key in loss_ph } 48 | feed_dict[self.inp] = x_batch 49 | feed_dict.update(self.feed) 50 | 51 | fetches = [self.train_op, loss_op] 52 | 53 | if self.FLAGS.summary: 54 | fetches.append(self.summary_op) 55 | 56 | fetched = self.sess.run(fetches, feed_dict) 57 | loss = fetched[1] 58 | 59 | if loss_mva is None: loss_mva = loss 60 | loss_mva = .9 * loss_mva + .1 * loss 61 | step_now = self.FLAGS.load + i + 1 62 | 63 | if self.FLAGS.summary: 64 | self.writer.add_summary(fetched[2], step_now) 65 | 66 | form = 'step {} - loss {} - moving ave loss {}' 67 | self.say(form.format(step_now, loss, loss_mva)) 68 | profile += [(loss, loss_mva)] 69 | 70 | ckpt = (i+1) % (self.FLAGS.save // self.FLAGS.batch) 71 | args = [step_now, profile] 72 | if not ckpt: _save_ckpt(self, *args) 73 | 74 | if ckpt: _save_ckpt(self, *args) 75 | 76 | def return_predict(self, im): 77 | assert isinstance(im, np.ndarray), \ 78 | 'Image is not a np.ndarray' 79 | h, w, _ = im.shape 80 | im = self.framework.resize_input(im) 81 | this_inp = np.expand_dims(im, 0) 82 | feed_dict = {self.inp : this_inp} 83 | 84 | out = self.sess.run(self.out, feed_dict)[0] 85 | boxes = self.framework.findboxes(out) 86 | threshold = self.FLAGS.threshold 87 | boxesInfo = list() 88 | for box in boxes: 89 | tmpBox = self.framework.process_box(box, h, w, threshold) 90 | if tmpBox is None: 91 | continue 92 | boxesInfo.append({ 93 | "label": tmpBox[4], 94 | "confidence": tmpBox[6], 95 | "topleft": { 96 | "x": tmpBox[0], 97 | "y": tmpBox[2]}, 98 | "bottomright": { 99 | "x": tmpBox[1], 100 | "y": tmpBox[3]} 101 | }) 102 | return boxesInfo 103 | 104 | import math 105 | 106 | def predict(self): 107 | inp_path = self.FLAGS.imgdir 108 | all_inps = os.listdir(inp_path) 109 | all_inps = [i for i in all_inps if self.framework.is_inp(i)] 110 | if not all_inps: 111 | msg = 'Failed to find any images in {} .' 112 | exit('Error: {}'.format(msg.format(inp_path))) 113 | 114 | batch = min(self.FLAGS.batch, len(all_inps)) 115 | 116 | # predict in batches 117 | n_batch = int(math.ceil(len(all_inps) / batch)) 118 | for j in range(n_batch): 119 | from_idx = j * batch 120 | to_idx = min(from_idx + batch, len(all_inps)) 121 | 122 | # collect images input in the batch 123 | this_batch = all_inps[from_idx:to_idx] 124 | inp_feed = pool.map(lambda inp: ( 125 | np.expand_dims(self.framework.preprocess( 126 | os.path.join(inp_path, inp)), 0)), this_batch) 127 | 128 | # Feed to the net 129 | feed_dict = {self.inp : np.concatenate(inp_feed, 0)} 130 | self.say('Forwarding {} inputs ...'.format(len(inp_feed))) 131 | start = time.time() 132 | out = self.sess.run(self.out, feed_dict) 133 | stop = time.time(); last = stop - start 134 | self.say('Total time = {}s / {} inps = {} ips'.format( 135 | last, len(inp_feed), len(inp_feed) / last)) 136 | 137 | # Post processing 138 | self.say('Post processing {} inputs ...'.format(len(inp_feed))) 139 | start = time.time() 140 | pool.map(lambda p: (lambda i, prediction: 141 | self.framework.postprocess( 142 | prediction, os.path.join(inp_path, this_batch[i])))(*p), 143 | enumerate(out)) 144 | stop = time.time(); last = stop - start 145 | 146 | # Timing 147 | self.say('Total time = {}s / {} inps = {} ips'.format( 148 | last, len(inp_feed), len(inp_feed) / last)) 149 | -------------------------------------------------------------------------------- /darkflow/net/framework.py: -------------------------------------------------------------------------------- 1 | from . import yolo 2 | from . import yolov2 3 | from . import vanilla 4 | from os.path import basename 5 | 6 | class framework(object): 7 | constructor = vanilla.constructor 8 | loss = vanilla.train.loss 9 | 10 | def __init__(self, meta, FLAGS): 11 | model = basename(meta['model']) 12 | model = '.'.join(model.split('.')[:-1]) 13 | meta['name'] = model 14 | 15 | self.constructor(meta, FLAGS) 16 | 17 | def is_inp(self, file_name): 18 | return True 19 | 20 | class YOLO(framework): 21 | constructor = yolo.constructor 22 | parse = yolo.data.parse 23 | shuffle = yolo.data.shuffle 24 | preprocess = yolo.predict.preprocess 25 | postprocess = yolo.predict.postprocess 26 | loss = yolo.train.loss 27 | is_inp = yolo.misc.is_inp 28 | profile = yolo.misc.profile 29 | _batch = yolo.data._batch 30 | resize_input = yolo.predict.resize_input 31 | findboxes = yolo.predict.findboxes 32 | process_box = yolo.predict.process_box 33 | 34 | class YOLOv2(framework): 35 | constructor = yolo.constructor 36 | parse = yolo.data.parse 37 | shuffle = yolov2.data.shuffle 38 | preprocess = yolo.predict.preprocess 39 | loss = yolov2.train.loss 40 | is_inp = yolo.misc.is_inp 41 | postprocess = yolov2.predict.postprocess 42 | _batch = yolov2.data._batch 43 | resize_input = yolo.predict.resize_input 44 | findboxes = yolov2.predict.findboxes 45 | process_box = yolo.predict.process_box 46 | 47 | """ 48 | framework factory 49 | """ 50 | 51 | types = { 52 | '[detection]': YOLO, 53 | '[region]': YOLOv2 54 | } 55 | 56 | def create_framework(meta, FLAGS): 57 | net_type = meta['type'] 58 | this = types.get(net_type, framework) 59 | return this(meta, FLAGS) -------------------------------------------------------------------------------- /darkflow/net/help.py: -------------------------------------------------------------------------------- 1 | """ 2 | tfnet secondary (helper) methods 3 | """ 4 | from ..utils.loader import create_loader 5 | from time import time as timer 6 | import tensorflow as tf 7 | import numpy as np 8 | import sys 9 | import cv2 10 | import os 11 | 12 | old_graph_msg = 'Resolving old graph def {} (no guarantee)' 13 | 14 | def build_train_op(self): 15 | self.framework.loss(self.out) 16 | self.say('Building {} train op'.format(self.meta['model'])) 17 | optimizer = self._TRAINER[self.FLAGS.trainer](self.FLAGS.lr) 18 | gradients = optimizer.compute_gradients(self.framework.loss) 19 | self.train_op = optimizer.apply_gradients(gradients) 20 | 21 | def load_from_ckpt(self): 22 | if self.FLAGS.load < 0: # load lastest ckpt 23 | with open(os.path.join(self.FLAGS.backup, 'checkpoint'), 'r') as f: 24 | last = f.readlines()[-1].strip() 25 | load_point = last.split(' ')[1] 26 | load_point = load_point.split('"')[1] 27 | load_point = load_point.split('-')[-1] 28 | self.FLAGS.load = int(load_point) 29 | 30 | load_point = os.path.join(self.FLAGS.backup, self.meta['name']) 31 | load_point = '{}-{}'.format(load_point, self.FLAGS.load) 32 | self.say('Loading from {}'.format(load_point)) 33 | try: self.saver.restore(self.sess, load_point) 34 | except: load_old_graph(self, load_point) 35 | 36 | def say(self, *msgs): 37 | if not self.FLAGS.verbalise: 38 | return 39 | msgs = list(msgs) 40 | for msg in msgs: 41 | if msg is None: continue 42 | print(msg) 43 | 44 | def load_old_graph(self, ckpt): 45 | ckpt_loader = create_loader(ckpt) 46 | self.say(old_graph_msg.format(ckpt)) 47 | 48 | for var in tf.global_variables(): 49 | name = var.name.split(':')[0] 50 | args = [name, var.get_shape()] 51 | val = ckpt_loader(args) 52 | assert val is not None, \ 53 | 'Cannot find and load {}'.format(var.name) 54 | shp = val.shape 55 | plh = tf.placeholder(tf.float32, shp) 56 | op = tf.assign(var, plh) 57 | self.sess.run(op, {plh: val}) 58 | 59 | def _get_fps(self, frame): 60 | elapsed = int() 61 | start = timer() 62 | preprocessed = self.framework.preprocess(frame) 63 | feed_dict = {self.inp: [preprocessed]} 64 | net_out = self.sess.run(self.out, feed_dict)[0] 65 | processed = self.framework.postprocess(net_out, frame, False) 66 | return timer() - start 67 | 68 | def camera(self): 69 | file = self.FLAGS.demo 70 | SaveVideo = self.FLAGS.saveVideo 71 | 72 | if file == 'camera': 73 | file = 0 74 | else: 75 | assert os.path.isfile(file), \ 76 | 'file {} does not exist'.format(file) 77 | 78 | camera = cv2.VideoCapture(file) 79 | 80 | if file == 0: 81 | self.say('Press [ESC] to quit demo') 82 | 83 | assert camera.isOpened(), \ 84 | 'Cannot capture source' 85 | 86 | if file == 0:#camera window 87 | cv2.namedWindow('', 0) 88 | _, frame = camera.read() 89 | height, width, _ = frame.shape 90 | cv2.resizeWindow('', width, height) 91 | else: 92 | _, frame = camera.read() 93 | height, width, _ = frame.shape 94 | 95 | if SaveVideo: 96 | fourcc = cv2.VideoWriter_fourcc(*'XVID') 97 | if file == 0:#camera window 98 | fps = 1 / self._get_fps(frame) 99 | if fps < 1: 100 | fps = 1 101 | else: 102 | fps = round(camera.get(cv2.CAP_PROP_FPS)) 103 | videoWriter = cv2.VideoWriter( 104 | 'video.avi', fourcc, fps, (width, height)) 105 | 106 | # buffers for demo in batch 107 | buffer_inp = list() 108 | buffer_pre = list() 109 | 110 | elapsed = int() 111 | start = timer() 112 | self.say('Press [ESC] to quit demo') 113 | # Loop through frames 114 | while camera.isOpened(): 115 | elapsed += 1 116 | _, frame = camera.read() 117 | if frame is None: 118 | print ('\nEnd of Video') 119 | break 120 | preprocessed = self.framework.preprocess(frame) 121 | buffer_inp.append(frame) 122 | buffer_pre.append(preprocessed) 123 | 124 | # Only process and imshow when queue is full 125 | if elapsed % self.FLAGS.queue == 0: 126 | feed_dict = {self.inp: buffer_pre} 127 | net_out = self.sess.run(self.out, feed_dict) 128 | for img, single_out in zip(buffer_inp, net_out): 129 | postprocessed = self.framework.postprocess( 130 | single_out, img, False) 131 | if SaveVideo: 132 | videoWriter.write(postprocessed) 133 | if file == 0: #camera window 134 | cv2.imshow('', postprocessed) 135 | # Clear Buffers 136 | buffer_inp = list() 137 | buffer_pre = list() 138 | 139 | if elapsed % 5 == 0: 140 | sys.stdout.write('\r') 141 | sys.stdout.write('{0:3.3f} FPS'.format( 142 | elapsed / (timer() - start))) 143 | sys.stdout.flush() 144 | if file == 0: #camera window 145 | choice = cv2.waitKey(1) 146 | if choice == 27: break 147 | 148 | sys.stdout.write('\n') 149 | if SaveVideo: 150 | videoWriter.release() 151 | camera.release() 152 | if file == 0: #camera window 153 | cv2.destroyAllWindows() 154 | 155 | def to_darknet(self): 156 | darknet_ckpt = self.darknet 157 | 158 | with self.graph.as_default() as g: 159 | for var in tf.global_variables(): 160 | name = var.name.split(':')[0] 161 | var_name = name.split('-') 162 | l_idx = int(var_name[0]) 163 | w_sig = var_name[1].split('/')[-1] 164 | l = darknet_ckpt.layers[l_idx] 165 | l.w[w_sig] = var.eval(self.sess) 166 | 167 | for layer in darknet_ckpt.layers: 168 | for ph in layer.h: 169 | layer.h[ph] = None 170 | 171 | return darknet_ckpt 172 | -------------------------------------------------------------------------------- /darkflow/net/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .simple import * 2 | from .convolution import * 3 | from .baseop import HEADER, LINE 4 | 5 | op_types = { 6 | 'convolutional': convolutional, 7 | 'conv-select': conv_select, 8 | 'connected': connected, 9 | 'maxpool': maxpool, 10 | 'leaky': leaky, 11 | 'dropout': dropout, 12 | 'flatten': flatten, 13 | 'avgpool': avgpool, 14 | 'softmax': softmax, 15 | 'identity': identity, 16 | 'crop': crop, 17 | 'local': local, 18 | 'select': select, 19 | 'route': route, 20 | 'reorg': reorg, 21 | 'conv-extract': conv_extract, 22 | 'extract': extract 23 | } 24 | 25 | def op_create(*args): 26 | layer_type = list(args)[0].type 27 | return op_types[layer_type](*args) -------------------------------------------------------------------------------- /darkflow/net/ops/baseop.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | FORM = '{:>6} | {:>6} | {:<32} | {}' 5 | FORM_ = '{}+{}+{}+{}' 6 | LINE = FORM_.format('-'*7, '-'*8, '-'*34, '-'*15) 7 | HEADER = FORM.format( 8 | 'Source', 'Train?','Layer description', 'Output size') 9 | 10 | def _shape(tensor): # work for both tf.Tensor & np.ndarray 11 | if type(tensor) in [tf.Variable, tf.Tensor]: 12 | return tensor.get_shape() 13 | else: return tensor.shape 14 | 15 | def _name(tensor): 16 | return tensor.name.split(':')[0] 17 | 18 | class BaseOp(object): 19 | """ 20 | BaseOp objects initialise with a darknet's `layer` object 21 | and input tensor of that layer `inp`, it calculates the 22 | output of this layer and place the result in self.out 23 | """ 24 | 25 | # let slim take care of the following vars 26 | _SLIM = ['gamma', 'moving_mean', 'moving_variance'] 27 | 28 | def __init__(self, layer, inp, num, roof, feed): 29 | self.inp = inp # BaseOp 30 | self.num = num # int 31 | self.out = None # tf.Tensor 32 | self.lay = layer 33 | 34 | self.scope = '{}-{}'.format( 35 | str(self.num), self.lay.type) 36 | self.gap = roof - self.num 37 | self.var = not self.gap > 0 38 | self.act = 'Load ' 39 | self.convert(feed) 40 | if self.var: self.train_msg = 'Yep! ' 41 | else: self.train_msg = 'Nope ' 42 | self.forward() 43 | 44 | def convert(self, feed): 45 | """convert self.lay to variables & placeholders""" 46 | for var in self.lay.wshape: 47 | self.wrap_variable(var) 48 | for ph in self.lay.h: 49 | self.wrap_pholder(ph, feed) 50 | 51 | def wrap_variable(self, var): 52 | """wrap layer.w into variables""" 53 | val = self.lay.w.get(var, None) 54 | if val is None: 55 | shape = self.lay.wshape[var] 56 | args = [0., 1e-2, shape] 57 | if 'moving_mean' in var: 58 | val = np.zeros(shape) 59 | elif 'moving_variance' in var: 60 | val = np.ones(shape) 61 | else: 62 | val = np.random.normal(*args) 63 | self.lay.w[var] = val.astype(np.float32) 64 | self.act = 'Init ' 65 | if not self.var: return 66 | 67 | val = self.lay.w[var] 68 | self.lay.w[var] = tf.constant_initializer(val) 69 | if var in self._SLIM: return 70 | with tf.variable_scope(self.scope): 71 | self.lay.w[var] = tf.get_variable(var, 72 | shape = self.lay.wshape[var], 73 | dtype = tf.float32, 74 | initializer = self.lay.w[var]) 75 | 76 | def wrap_pholder(self, ph, feed): 77 | """wrap layer.h into placeholders""" 78 | phtype = type(self.lay.h[ph]) 79 | if phtype is not dict: return 80 | 81 | sig = '{}/{}'.format(self.scope, ph) 82 | val = self.lay.h[ph] 83 | 84 | self.lay.h[ph] = tf.placeholder_with_default( 85 | val['dfault'], val['shape'], name = sig) 86 | feed[self.lay.h[ph]] = val['feed'] 87 | 88 | def verbalise(self): # console speaker 89 | msg = str() 90 | inp = _name(self.inp.out) 91 | if inp == 'input': \ 92 | msg = FORM.format( 93 | '', '', 'input', 94 | _shape(self.inp.out)) + '\n' 95 | if not self.act: return msg 96 | return msg + FORM.format( 97 | self.act, self.train_msg, 98 | self.speak(), _shape(self.out)) 99 | 100 | def speak(self): pass -------------------------------------------------------------------------------- /darkflow/net/ops/convolution.py: -------------------------------------------------------------------------------- 1 | import tensorflow.contrib.slim as slim 2 | from .baseop import BaseOp 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | class reorg(BaseOp): 7 | def _forward(self): 8 | inp = self.inp.out 9 | shape = inp.get_shape().as_list() 10 | _, h, w, c = shape 11 | s = self.lay.stride 12 | out = list() 13 | for i in range(int(h/s)): 14 | row_i = list() 15 | for j in range(int(w/s)): 16 | si, sj = s * i, s * j 17 | boxij = inp[:, si: si+s, sj: sj+s,:] 18 | flatij = tf.reshape(boxij, [-1,1,1,c*s*s]) 19 | row_i += [flatij] 20 | out += [tf.concat(row_i, 2)] 21 | 22 | self.out = tf.concat(out, 1) 23 | 24 | def forward(self): 25 | inp = self.inp.out 26 | s = self.lay.stride 27 | self.out = tf.extract_image_patches( 28 | inp, [1,s,s,1], [1,s,s,1], [1,1,1,1], 'VALID') 29 | 30 | def speak(self): 31 | args = [self.lay.stride] * 2 32 | msg = 'local flatten {}x{}' 33 | return msg.format(*args) 34 | 35 | 36 | class local(BaseOp): 37 | def forward(self): 38 | pad = [[self.lay.pad, self.lay.pad]] * 2; 39 | temp = tf.pad(self.inp.out, [[0, 0]] + pad + [[0, 0]]) 40 | 41 | k = self.lay.w['kernels'] 42 | ksz = self.lay.ksize 43 | half = int(ksz / 2) 44 | out = list() 45 | for i in range(self.lay.h_out): 46 | row_i = list() 47 | for j in range(self.lay.w_out): 48 | kij = k[i * self.lay.w_out + j] 49 | i_, j_ = i + 1 - half, j + 1 - half 50 | tij = temp[:, i_ : i_ + ksz, j_ : j_ + ksz,:] 51 | row_i.append( 52 | tf.nn.conv2d(tij, kij, 53 | padding = 'VALID', 54 | strides = [1] * 4)) 55 | out += [tf.concat(row_i, 2)] 56 | 57 | self.out = tf.concat(out, 1) 58 | 59 | def speak(self): 60 | l = self.lay 61 | args = [l.ksize] * 2 + [l.pad] + [l.stride] 62 | args += [l.activation] 63 | msg = 'loca {}x{}p{}_{} {}'.format(*args) 64 | return msg 65 | 66 | class convolutional(BaseOp): 67 | def forward(self): 68 | pad = [[self.lay.pad, self.lay.pad]] * 2; 69 | temp = tf.pad(self.inp.out, [[0, 0]] + pad + [[0, 0]]) 70 | temp = tf.nn.conv2d(temp, self.lay.w['kernel'], padding = 'VALID', 71 | name = self.scope, strides = [1] + [self.lay.stride] * 2 + [1]) 72 | if self.lay.batch_norm: 73 | temp = self.batchnorm(self.lay, temp) 74 | self.out = tf.nn.bias_add(temp, self.lay.w['biases']) 75 | 76 | def batchnorm(self, layer, inp): 77 | if not self.var: 78 | temp = (inp - layer.w['moving_mean']) 79 | temp /= (np.sqrt(layer.w['moving_variance']) + 1e-5) 80 | temp *= layer.w['gamma'] 81 | return temp 82 | else: 83 | args = dict({ 84 | 'center' : False, 'scale' : True, 85 | 'epsilon': 1e-5, 'scope' : self.scope, 86 | 'updates_collections' : None, 87 | 'is_training': layer.h['is_training'], 88 | 'param_initializers': layer.w 89 | }) 90 | return slim.batch_norm(inp, **args) 91 | 92 | def speak(self): 93 | l = self.lay 94 | args = [l.ksize] * 2 + [l.pad] + [l.stride] 95 | args += [l.batch_norm * '+bnorm'] 96 | args += [l.activation] 97 | msg = 'conv {}x{}p{}_{} {} {}'.format(*args) 98 | return msg 99 | 100 | class conv_select(convolutional): 101 | def speak(self): 102 | l = self.lay 103 | args = [l.ksize] * 2 + [l.pad] + [l.stride] 104 | args += [l.batch_norm * '+bnorm'] 105 | args += [l.activation] 106 | msg = 'sele {}x{}p{}_{} {} {}'.format(*args) 107 | return msg 108 | 109 | class conv_extract(convolutional): 110 | def speak(self): 111 | l = self.lay 112 | args = [l.ksize] * 2 + [l.pad] + [l.stride] 113 | args += [l.batch_norm * '+bnorm'] 114 | args += [l.activation] 115 | msg = 'extr {}x{}p{}_{} {} {}'.format(*args) 116 | return msg -------------------------------------------------------------------------------- /darkflow/net/ops/simple.py: -------------------------------------------------------------------------------- 1 | import tensorflow.contrib.slim as slim 2 | from .baseop import BaseOp 3 | import tensorflow as tf 4 | from distutils.version import StrictVersion 5 | 6 | class route(BaseOp): 7 | def forward(self): 8 | routes = self.lay.routes 9 | routes_out = list() 10 | for r in routes: 11 | this = self.inp 12 | while this.lay.number != r: 13 | this = this.inp 14 | assert this is not None, \ 15 | 'Routing to non-existence {}'.format(r) 16 | routes_out += [this.out] 17 | self.out = tf.concat(routes_out, 3) 18 | 19 | def speak(self): 20 | msg = 'concat {}' 21 | return msg.format(self.lay.routes) 22 | 23 | class connected(BaseOp): 24 | def forward(self): 25 | self.out = tf.nn.xw_plus_b( 26 | self.inp.out, 27 | self.lay.w['weights'], 28 | self.lay.w['biases'], 29 | name = self.scope) 30 | 31 | def speak(self): 32 | layer = self.lay 33 | args = [layer.inp, layer.out] 34 | args += [layer.activation] 35 | msg = 'full {} x {} {}' 36 | return msg.format(*args) 37 | 38 | class select(connected): 39 | """a weird connected layer""" 40 | def speak(self): 41 | layer = self.lay 42 | args = [layer.inp, layer.out] 43 | args += [layer.activation] 44 | msg = 'sele {} x {} {}' 45 | return msg.format(*args) 46 | 47 | class extract(connected): 48 | """a weird connected layer""" 49 | def speak(self): 50 | layer = self.lay 51 | args = [len(layer.inp), len(layer.out)] 52 | args += [layer.activation] 53 | msg = 'extr {} x {} {}' 54 | return msg.format(*args) 55 | 56 | class flatten(BaseOp): 57 | def forward(self): 58 | temp = tf.transpose( 59 | self.inp.out, [0,3,1,2]) 60 | self.out = slim.flatten( 61 | temp, scope = self.scope) 62 | 63 | def speak(self): return 'flat' 64 | 65 | 66 | class softmax(BaseOp): 67 | def forward(self): 68 | self.out = tf.nn.softmax(self.inp.out) 69 | 70 | def speak(self): return 'softmax()' 71 | 72 | 73 | class avgpool(BaseOp): 74 | def forward(self): 75 | self.out = tf.reduce_mean( 76 | self.inp.out, [1, 2], 77 | name = self.scope 78 | ) 79 | 80 | def speak(self): return 'avgpool()' 81 | 82 | 83 | class dropout(BaseOp): 84 | def forward(self): 85 | if self.lay.h['pdrop'] is None: 86 | self.lay.h['pdrop'] = 1.0 87 | self.out = tf.nn.dropout( 88 | self.inp.out, 89 | self.lay.h['pdrop'], 90 | name = self.scope 91 | ) 92 | 93 | def speak(self): return 'drop' 94 | 95 | 96 | class crop(BaseOp): 97 | def forward(self): 98 | self.out = self.inp.out * 2. - 1. 99 | 100 | def speak(self): 101 | return 'scale to (-1, 1)' 102 | 103 | 104 | class maxpool(BaseOp): 105 | def forward(self): 106 | self.out = tf.nn.max_pool( 107 | self.inp.out, padding = 'SAME', 108 | ksize = [1] + [self.lay.ksize]*2 + [1], 109 | strides = [1] + [self.lay.stride]*2 + [1], 110 | name = self.scope 111 | ) 112 | 113 | def speak(self): 114 | l = self.lay 115 | return 'maxp {}x{}p{}_{}'.format( 116 | l.ksize, l.ksize, l.pad, l.stride) 117 | 118 | 119 | class leaky(BaseOp): 120 | def forward(self): 121 | self.out = tf.maximum( 122 | .1 * self.inp.out, 123 | self.inp.out, 124 | name = self.scope 125 | ) 126 | 127 | def verbalise(self): pass 128 | 129 | 130 | class identity(BaseOp): 131 | def __init__(self, inp): 132 | self.inp = None 133 | self.out = inp 134 | -------------------------------------------------------------------------------- /darkflow/net/vanilla/__init__.py: -------------------------------------------------------------------------------- 1 | from . import train 2 | 3 | def constructor(self, meta, FLAGS): 4 | self.meta, self.FLAGS = meta, FLAGS -------------------------------------------------------------------------------- /darkflow/net/vanilla/train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | _LOSS_TYPE = ['sse','l2', 'smooth', 4 | 'sparse', 'l1', 'softmax', 5 | 'svm', 'fisher'] 6 | 7 | def loss(self, net_out): 8 | m = self.meta 9 | loss_type = self.meta['type'] 10 | assert loss_type in _LOSS_TYPE, \ 11 | 'Loss type {} not implemented'.format(loss_type) 12 | 13 | out = net_out 14 | out_shape = out.get_shape() 15 | out_dtype = out.dtype.base_dtype 16 | _truth = tf.placeholders(out_dtype, out_shape) 17 | 18 | self.placeholders = dict({ 19 | 'truth': _truth 20 | }) 21 | 22 | diff = _truth - out 23 | if loss_type in ['sse','12']: 24 | loss = tf.nn.l2_loss(diff) 25 | 26 | elif loss_type == ['smooth']: 27 | small = tf.cast(diff < 1, tf.float32) 28 | large = 1. - small 29 | l1_loss = tf.nn.l1_loss(tf.multiply(diff, large)) 30 | l2_loss = tf.nn.l2_loss(tf.multiply(diff, small)) 31 | loss = l1_loss + l2_loss 32 | 33 | elif loss_type in ['sparse', 'l1']: 34 | loss = l1_loss(diff) 35 | 36 | elif loss_type == 'softmax': 37 | loss = tf.nn.softmax_cross_entropy_with_logits(logits, y) 38 | loss = tf.reduce_mean(loss) 39 | 40 | elif loss_type == 'svm': 41 | assert 'train_size' in m, \ 42 | 'Must specify' 43 | size = m['train_size'] 44 | self.nu = tf.Variable(tf.ones([train_size, num_classes])) 45 | -------------------------------------------------------------------------------- /darkflow/net/yolo/__init__.py: -------------------------------------------------------------------------------- 1 | from . import train 2 | from . import predict 3 | from . import data 4 | from . import misc 5 | import numpy as np 6 | 7 | 8 | """ YOLO framework __init__ equivalent""" 9 | 10 | def constructor(self, meta, FLAGS): 11 | 12 | def _to_color(indx, base): 13 | """ return (b, r, g) tuple""" 14 | base2 = base * base 15 | b = 2 - indx / base2 16 | r = 2 - (indx % base2) / base 17 | g = 2 - (indx % base2) % base 18 | return (b * 127, r * 127, g * 127) 19 | if 'labels' not in meta: 20 | misc.labels(meta, FLAGS) #We're not loading from a .pb so we do need to load the labels 21 | assert len(meta['labels']) == meta['classes'], ( 22 | 'labels.txt and {} indicate' + ' ' 23 | 'inconsistent class numbers' 24 | ).format(meta['model']) 25 | 26 | # assign a color for each label 27 | colors = list() 28 | base = int(np.ceil(pow(meta['classes'], 1./3))) 29 | for x in range(len(meta['labels'])): 30 | colors += [_to_color(x, base)] 31 | meta['colors'] = colors 32 | self.fetch = list() 33 | self.meta, self.FLAGS = meta, FLAGS 34 | 35 | # over-ride the threshold in meta if FLAGS has it. 36 | if FLAGS.threshold > 0.0: 37 | self.meta['thresh'] = FLAGS.threshold -------------------------------------------------------------------------------- /darkflow/net/yolo/data.py: -------------------------------------------------------------------------------- 1 | from ...utils.pascal_voc_clean_xml import pascal_voc_clean_xml 2 | from numpy.random import permutation as perm 3 | from .predict import preprocess 4 | # from .misc import show 5 | from copy import deepcopy 6 | import pickle 7 | import numpy as np 8 | import os 9 | 10 | def parse(self, exclusive = False): 11 | meta = self.meta 12 | ext = '.parsed' 13 | ann = self.FLAGS.annotation 14 | if not os.path.isdir(ann): 15 | msg = 'Annotation directory not found {} .' 16 | exit('Error: {}'.format(msg.format(ann))) 17 | print('\n{} parsing {}'.format(meta['model'], ann)) 18 | dumps = pascal_voc_clean_xml(ann, meta['labels'], exclusive) 19 | return dumps 20 | 21 | 22 | def _batch(self, chunk): 23 | """ 24 | Takes a chunk of parsed annotations 25 | returns value for placeholders of net's 26 | input & loss layer correspond to this chunk 27 | """ 28 | meta = self.meta 29 | S, B = meta['side'], meta['num'] 30 | C, labels = meta['classes'], meta['labels'] 31 | 32 | # preprocess 33 | jpg = chunk[0]; w, h, allobj_ = chunk[1] 34 | allobj = deepcopy(allobj_) 35 | path = os.path.join(self.FLAGS.dataset, jpg) 36 | img = self.preprocess(path, allobj) 37 | 38 | # Calculate regression target 39 | cellx = 1. * w / S 40 | celly = 1. * h / S 41 | for obj in allobj: 42 | centerx = .5*(obj[1]+obj[3]) #xmin, xmax 43 | centery = .5*(obj[2]+obj[4]) #ymin, ymax 44 | cx = centerx / cellx 45 | cy = centery / celly 46 | if cx >= S or cy >= S: return None, None 47 | obj[3] = float(obj[3]-obj[1]) / w 48 | obj[4] = float(obj[4]-obj[2]) / h 49 | obj[3] = np.sqrt(obj[3]) 50 | obj[4] = np.sqrt(obj[4]) 51 | obj[1] = cx - np.floor(cx) # centerx 52 | obj[2] = cy - np.floor(cy) # centery 53 | obj += [int(np.floor(cy) * S + np.floor(cx))] 54 | 55 | # show(im, allobj, S, w, h, cellx, celly) # unit test 56 | 57 | # Calculate placeholders' values 58 | probs = np.zeros([S*S,C]) 59 | confs = np.zeros([S*S,B]) 60 | coord = np.zeros([S*S,B,4]) 61 | proid = np.zeros([S*S,C]) 62 | prear = np.zeros([S*S,4]) 63 | for obj in allobj: 64 | probs[obj[5], :] = [0.] * C 65 | probs[obj[5], labels.index(obj[0])] = 1. 66 | proid[obj[5], :] = [1] * C 67 | coord[obj[5], :, :] = [obj[1:5]] * B 68 | prear[obj[5],0] = obj[1] - obj[3]**2 * .5 * S # xleft 69 | prear[obj[5],1] = obj[2] - obj[4]**2 * .5 * S # yup 70 | prear[obj[5],2] = obj[1] + obj[3]**2 * .5 * S # xright 71 | prear[obj[5],3] = obj[2] + obj[4]**2 * .5 * S # ybot 72 | confs[obj[5], :] = [1.] * B 73 | 74 | # Finalise the placeholders' values 75 | upleft = np.expand_dims(prear[:,0:2], 1) 76 | botright = np.expand_dims(prear[:,2:4], 1) 77 | wh = botright - upleft; 78 | area = wh[:,:,0] * wh[:,:,1] 79 | upleft = np.concatenate([upleft] * B, 1) 80 | botright = np.concatenate([botright] * B, 1) 81 | areas = np.concatenate([area] * B, 1) 82 | 83 | # value for placeholder at input layer 84 | inp_feed_val = img 85 | # value for placeholder at loss layer 86 | loss_feed_val = { 87 | 'probs': probs, 'confs': confs, 88 | 'coord': coord, 'proid': proid, 89 | 'areas': areas, 'upleft': upleft, 90 | 'botright': botright 91 | } 92 | 93 | return inp_feed_val, loss_feed_val 94 | 95 | def shuffle(self): 96 | batch = self.FLAGS.batch 97 | data = self.parse() 98 | size = len(data) 99 | 100 | print('Dataset of {} instance(s)'.format(size)) 101 | if batch > size: self.FLAGS.batch = batch = size 102 | batch_per_epoch = int(size / batch) 103 | 104 | for i in range(self.FLAGS.epoch): 105 | shuffle_idx = perm(np.arange(size)) 106 | for b in range(batch_per_epoch): 107 | # yield these 108 | x_batch = list() 109 | feed_batch = dict() 110 | 111 | for j in range(b*batch, b*batch+batch): 112 | train_instance = data[shuffle_idx[j]] 113 | try: 114 | inp, new_feed = self._batch(train_instance) 115 | except ZeroDivisionError: 116 | print("This image's width or height are zeros: ", train_instance[0]) 117 | print('train_instance:', train_instance) 118 | print('Please remove or fix it then try again.') 119 | raise 120 | 121 | if inp is None: continue 122 | x_batch += [np.expand_dims(inp, 0)] 123 | 124 | for key in new_feed: 125 | new = new_feed[key] 126 | old_feed = feed_batch.get(key, 127 | np.zeros((0,) + new.shape)) 128 | feed_batch[key] = np.concatenate([ 129 | old_feed, [new] 130 | ]) 131 | 132 | x_batch = np.concatenate(x_batch, 0) 133 | yield x_batch, feed_batch 134 | 135 | print('Finish {} epoch(es)'.format(i + 1)) 136 | 137 | -------------------------------------------------------------------------------- /darkflow/net/yolo/misc.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | import cv2 4 | import os 5 | 6 | labels20 = ["aeroplane", "bicycle", "bird", "boat", "bottle", 7 | "bus", "car", "cat", "chair", "cow", "diningtable", "dog", 8 | "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", 9 | "train", "tvmonitor"] 10 | 11 | # 8, 14, 15, 19 12 | 13 | voc_models = ['yolo-full', 'yolo-tiny', 'yolo-small', # <- v1 14 | 'yolov1', 'tiny-yolov1', # <- v1.1 15 | 'tiny-yolo-voc', 'yolo-voc'] # <- v2 16 | 17 | coco_models = ['tiny-coco', 'yolo-coco', # <- v1.1 18 | 'yolo', 'tiny-yolo'] # <- v2 19 | 20 | coco_names = 'coco.names' 21 | nine_names = '9k.names' 22 | 23 | def labels(meta, FLAGS): 24 | model = os.path.basename(meta['name']) 25 | if model in voc_models: 26 | print("Model has a VOC model name, loading VOC labels.") 27 | meta['labels'] = labels20 28 | else: 29 | file = FLAGS.labels 30 | if model in coco_models: 31 | print("Model has a coco model name, loading coco labels.") 32 | file = os.path.join(FLAGS.config, coco_names) 33 | elif model == 'yolo9000': 34 | print("Model has name yolo9000, loading yolo9000 labels.") 35 | file = os.path.join(FLAGS.config, nine_names) 36 | with open(file, 'r') as f: 37 | meta['labels'] = list() 38 | labs = [l.strip() for l in f.readlines()] 39 | for lab in labs: 40 | if lab == '----': break 41 | meta['labels'] += [lab] 42 | if len(meta['labels']) == 0: 43 | meta['labels'] = labels20 44 | 45 | def is_inp(self, name): 46 | return name.lower().endswith(('.jpg', '.jpeg', '.png')) 47 | 48 | def show(im, allobj, S, w, h, cellx, celly): 49 | for obj in allobj: 50 | a = obj[5] % S 51 | b = obj[5] // S 52 | cx = a + obj[1] 53 | cy = b + obj[2] 54 | centerx = cx * cellx 55 | centery = cy * celly 56 | ww = obj[3]**2 * w 57 | hh = obj[4]**2 * h 58 | cv2.rectangle(im, 59 | (int(centerx - ww/2), int(centery - hh/2)), 60 | (int(centerx + ww/2), int(centery + hh/2)), 61 | (0,0,255), 2) 62 | cv2.imshow('result', im) 63 | cv2.waitKey() 64 | cv2.destroyAllWindows() 65 | 66 | def show2(im, allobj): 67 | for obj in allobj: 68 | cv2.rectangle(im, 69 | (obj[1], obj[2]), 70 | (obj[3], obj[4]), 71 | (0,0,255),2) 72 | cv2.imshow('result', im) 73 | cv2.waitKey() 74 | cv2.destroyAllWindows() 75 | 76 | 77 | _MVA = .05 78 | 79 | def profile(self, net): 80 | pass 81 | # data = self.parse(exclusive = True) 82 | # size = len(data); batch = self.FLAGS.batch 83 | # all_inp_ = [x[0] for x in data] 84 | # net.say('Will cycle through {} examples {} times'.format( 85 | # len(all_inp_), net.FLAGS.epoch)) 86 | 87 | # fetch = list(); mvave = list(); names = list(); 88 | # this = net.top 89 | # conv_lay = ['convolutional', 'connected', 'local', 'conv-select'] 90 | # while this.inp is not None: 91 | # if this.lay.type in conv_lay: 92 | # fetch = [this.out] + fetch 93 | # names = [this.lay.signature] + names 94 | # mvave = [None] + mvave 95 | # this = this.inp 96 | # print(names) 97 | 98 | # total = int(); allofthem = len(all_inp_) * net.FLAGS.epoch 99 | # batch = min(net.FLAGS.batch, len(all_inp_)) 100 | # for count in range(net.FLAGS.epoch): 101 | # net.say('EPOCH {}'.format(count)) 102 | # for j in range(len(all_inp_)/batch): 103 | # inp_feed = list(); new_all = list() 104 | # all_inp = all_inp_[j*batch: (j*batch+batch)] 105 | # for inp in all_inp: 106 | # new_all += [inp] 107 | # this_inp = os.path.join(net.FLAGS.dataset, inp) 108 | # this_inp = net.framework.preprocess(this_inp) 109 | # expanded = np.expand_dims(this_inp, 0) 110 | # inp_feed.append(expanded) 111 | # all_inp = new_all 112 | # feed_dict = {net.inp : np.concatenate(inp_feed, 0)} 113 | # out = net.sess.run(fetch, feed_dict) 114 | 115 | # for i, o in enumerate(out): 116 | # oi = out[i]; 117 | # dim = len(oi.shape) - 1 118 | # ai = mvave[i]; 119 | # mi = np.mean(oi, tuple(range(dim))) 120 | # vi = np.var(oi, tuple(range(dim))) 121 | # if ai is None: mvave[i] = [mi, vi] 122 | # elif 'banana ninja yada yada': 123 | # ai[0] = (1 - _MVA) * ai[0] + _MVA * mi 124 | # ai[1] = (1 - _MVA) * ai[1] + _MVA * vi 125 | # total += len(inp_feed) 126 | # net.say('{} / {} = {}%'.format( 127 | # total, allofthem, 100. * total / allofthem)) 128 | 129 | # with open('profile', 'wb') as f: 130 | # pickle.dump([mvave], f, protocol = -1) 131 | -------------------------------------------------------------------------------- /darkflow/net/yolo/predict.py: -------------------------------------------------------------------------------- 1 | from ...utils.im_transform import imcv2_recolor, imcv2_affine_trans 2 | from ...utils.box import BoundBox, box_iou, prob_compare 3 | import numpy as np 4 | import cv2 5 | import os 6 | import json 7 | from ...cython_utils.cy_yolo_findboxes import yolo_box_constructor 8 | 9 | def _fix(obj, dims, scale, offs): 10 | for i in range(1, 5): 11 | dim = dims[(i + 1) % 2] 12 | off = offs[(i + 1) % 2] 13 | obj[i] = int(obj[i] * scale - off) 14 | obj[i] = max(min(obj[i], dim), 0) 15 | 16 | def resize_input(self, im): 17 | h, w, c = self.meta['inp_size'] 18 | imsz = cv2.resize(im, (w, h)) 19 | imsz = imsz / 255. 20 | imsz = imsz[:,:,::-1] 21 | return imsz 22 | 23 | def process_box(self, b, h, w, threshold): 24 | max_indx = np.argmax(b.probs) 25 | max_prob = b.probs[max_indx] 26 | label = self.meta['labels'][max_indx] 27 | if max_prob > threshold: 28 | left = int ((b.x - b.w/2.) * w) 29 | right = int ((b.x + b.w/2.) * w) 30 | top = int ((b.y - b.h/2.) * h) 31 | bot = int ((b.y + b.h/2.) * h) 32 | if left < 0 : left = 0 33 | if right > w - 1: right = w - 1 34 | if top < 0 : top = 0 35 | if bot > h - 1: bot = h - 1 36 | mess = '{}'.format(label) 37 | return (left, right, top, bot, mess, max_indx, max_prob) 38 | return None 39 | 40 | def findboxes(self, net_out): 41 | meta, FLAGS = self.meta, self.FLAGS 42 | threshold = FLAGS.threshold 43 | 44 | boxes = [] 45 | boxes = yolo_box_constructor(meta, net_out, threshold) 46 | 47 | return boxes 48 | 49 | def preprocess(self, im, allobj = None): 50 | """ 51 | Takes an image, return it as a numpy tensor that is readily 52 | to be fed into tfnet. If there is an accompanied annotation (allobj), 53 | meaning this preprocessing is serving the train process, then this 54 | image will be transformed with random noise to augment training data, 55 | using scale, translation, flipping and recolor. The accompanied 56 | parsed annotation (allobj) will also be modified accordingly. 57 | """ 58 | if type(im) is not np.ndarray: 59 | im = cv2.imread(im) 60 | 61 | if allobj is not None: # in training mode 62 | result = imcv2_affine_trans(im) 63 | im, dims, trans_param = result 64 | scale, offs, flip = trans_param 65 | for obj in allobj: 66 | _fix(obj, dims, scale, offs) 67 | if not flip: continue 68 | obj_1_ = obj[1] 69 | obj[1] = dims[0] - obj[3] 70 | obj[3] = dims[0] - obj_1_ 71 | im = imcv2_recolor(im) 72 | 73 | im = self.resize_input(im) 74 | if allobj is None: return im 75 | return im#, np.array(im) # for unit testing 76 | 77 | def postprocess(self, net_out, im, save = True): 78 | """ 79 | Takes net output, draw predictions, save to disk 80 | """ 81 | meta, FLAGS = self.meta, self.FLAGS 82 | threshold = FLAGS.threshold 83 | colors, labels = meta['colors'], meta['labels'] 84 | 85 | boxes = self.findboxes(net_out) 86 | 87 | if type(im) is not np.ndarray: 88 | imgcv = cv2.imread(im) 89 | else: imgcv = im 90 | 91 | h, w, _ = imgcv.shape 92 | resultsForJSON = [] 93 | for b in boxes: 94 | boxResults = self.process_box(b, h, w, threshold) 95 | if boxResults is None: 96 | continue 97 | left, right, top, bot, mess, max_indx, confidence = boxResults 98 | thick = int((h + w) // 300) 99 | if self.FLAGS.json: 100 | resultsForJSON.append({"label": mess, "confidence": float('%.2f' % confidence), "topleft": {"x": left, "y": top}, "bottomright": {"x": right, "y": bot}}) 101 | continue 102 | 103 | cv2.rectangle(imgcv, 104 | (left, top), (right, bot), 105 | self.meta['colors'][max_indx], thick) 106 | cv2.putText( 107 | imgcv, mess, (left, top - 12), 108 | 0, 1e-3 * h, self.meta['colors'][max_indx], 109 | thick // 3) 110 | 111 | 112 | if not save: return imgcv 113 | 114 | outfolder = os.path.join(self.FLAGS.imgdir, 'out') 115 | img_name = os.path.join(outfolder, os.path.basename(im)) 116 | if self.FLAGS.json: 117 | textJSON = json.dumps(resultsForJSON) 118 | textFile = os.path.splitext(img_name)[0] + ".json" 119 | with open(textFile, 'w') as f: 120 | f.write(textJSON) 121 | return 122 | 123 | cv2.imwrite(img_name, imgcv) 124 | -------------------------------------------------------------------------------- /darkflow/net/yolo/train.py: -------------------------------------------------------------------------------- 1 | import tensorflow.contrib.slim as slim 2 | import pickle 3 | import tensorflow as tf 4 | from .misc import show 5 | import numpy as np 6 | import os 7 | 8 | def loss(self, net_out): 9 | """ 10 | Takes net.out and placeholders value 11 | returned in batch() func above, 12 | to build train_op and loss 13 | """ 14 | # meta 15 | m = self.meta 16 | sprob = float(m['class_scale']) 17 | sconf = float(m['object_scale']) 18 | snoob = float(m['noobject_scale']) 19 | scoor = float(m['coord_scale']) 20 | S, B, C = m['side'], m['num'], m['classes'] 21 | SS = S * S # number of grid cells 22 | 23 | print('{} loss hyper-parameters:'.format(m['model'])) 24 | print('\tside = {}'.format(m['side'])) 25 | print('\tbox = {}'.format(m['num'])) 26 | print('\tclasses = {}'.format(m['classes'])) 27 | print('\tscales = {}'.format([sprob, sconf, snoob, scoor])) 28 | 29 | size1 = [None, SS, C] 30 | size2 = [None, SS, B] 31 | 32 | # return the below placeholders 33 | _probs = tf.placeholder(tf.float32, size1) 34 | _confs = tf.placeholder(tf.float32, size2) 35 | _coord = tf.placeholder(tf.float32, size2 + [4]) 36 | # weights term for L2 loss 37 | _proid = tf.placeholder(tf.float32, size1) 38 | # material calculating IOU 39 | _areas = tf.placeholder(tf.float32, size2) 40 | _upleft = tf.placeholder(tf.float32, size2 + [2]) 41 | _botright = tf.placeholder(tf.float32, size2 + [2]) 42 | 43 | self.placeholders = { 44 | 'probs':_probs, 'confs':_confs, 'coord':_coord, 'proid':_proid, 45 | 'areas':_areas, 'upleft':_upleft, 'botright':_botright 46 | } 47 | 48 | # Extract the coordinate prediction from net.out 49 | coords = net_out[:, SS * (C + B):] 50 | coords = tf.reshape(coords, [-1, SS, B, 4]) 51 | wh = tf.pow(coords[:,:,:,2:4], 2) * S # unit: grid cell 52 | area_pred = wh[:,:,:,0] * wh[:,:,:,1] # unit: grid cell^2 53 | centers = coords[:,:,:,0:2] # [batch, SS, B, 2] 54 | floor = centers - (wh * .5) # [batch, SS, B, 2] 55 | ceil = centers + (wh * .5) # [batch, SS, B, 2] 56 | 57 | # calculate the intersection areas 58 | intersect_upleft = tf.maximum(floor, _upleft) 59 | intersect_botright = tf.minimum(ceil , _botright) 60 | intersect_wh = intersect_botright - intersect_upleft 61 | intersect_wh = tf.maximum(intersect_wh, 0.0) 62 | intersect = tf.multiply(intersect_wh[:,:,:,0], intersect_wh[:,:,:,1]) 63 | 64 | # calculate the best IOU, set 0.0 confidence for worse boxes 65 | iou = tf.truediv(intersect, _areas + area_pred - intersect) 66 | best_box = tf.equal(iou, tf.reduce_max(iou, [2], True)) 67 | best_box = tf.to_float(best_box) 68 | confs = tf.multiply(best_box, _confs) 69 | 70 | # take care of the weight terms 71 | conid = snoob * (1. - confs) + sconf * confs 72 | weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3) 73 | cooid = scoor * weight_coo 74 | proid = sprob * _proid 75 | 76 | # flatten 'em all 77 | probs = slim.flatten(_probs) 78 | proid = slim.flatten(proid) 79 | confs = slim.flatten(confs) 80 | conid = slim.flatten(conid) 81 | coord = slim.flatten(_coord) 82 | cooid = slim.flatten(cooid) 83 | 84 | self.fetch += [probs, confs, conid, cooid, proid] 85 | true = tf.concat([probs, confs, coord], 1) 86 | wght = tf.concat([proid, conid, cooid], 1) 87 | print('Building {} loss'.format(m['model'])) 88 | loss = tf.pow(net_out - true, 2) 89 | loss = tf.multiply(loss, wght) 90 | loss = tf.reduce_sum(loss, 1) 91 | self.loss = .5 * tf.reduce_mean(loss) 92 | tf.summary.scalar('{} loss'.format(m['model']), self.loss) 93 | -------------------------------------------------------------------------------- /darkflow/net/yolov2/__init__.py: -------------------------------------------------------------------------------- 1 | from . import train 2 | from . import predict 3 | from . import data 4 | from ..yolo import misc 5 | import numpy as np 6 | -------------------------------------------------------------------------------- /darkflow/net/yolov2/data.py: -------------------------------------------------------------------------------- 1 | from ...utils.pascal_voc_clean_xml import pascal_voc_clean_xml 2 | from numpy.random import permutation as perm 3 | from ..yolo.predict import preprocess 4 | from ..yolo.data import shuffle 5 | from copy import deepcopy 6 | import pickle 7 | import numpy as np 8 | import os 9 | 10 | def _batch(self, chunk): 11 | """ 12 | Takes a chunk of parsed annotations 13 | returns value for placeholders of net's 14 | input & loss layer correspond to this chunk 15 | """ 16 | meta = self.meta 17 | labels = meta['labels'] 18 | 19 | H, W, _ = meta['out_size'] 20 | C, B = meta['classes'], meta['num'] 21 | anchors = meta['anchors'] 22 | 23 | # preprocess 24 | jpg = chunk[0]; w, h, allobj_ = chunk[1] 25 | allobj = deepcopy(allobj_) 26 | path = os.path.join(self.FLAGS.dataset, jpg) 27 | img = self.preprocess(path, allobj) 28 | 29 | # Calculate regression target 30 | cellx = 1. * w / W 31 | celly = 1. * h / H 32 | for obj in allobj: 33 | centerx = .5*(obj[1]+obj[3]) #xmin, xmax 34 | centery = .5*(obj[2]+obj[4]) #ymin, ymax 35 | cx = centerx / cellx 36 | cy = centery / celly 37 | if cx >= W or cy >= H: return None, None 38 | obj[3] = float(obj[3]-obj[1]) / w 39 | obj[4] = float(obj[4]-obj[2]) / h 40 | obj[3] = np.sqrt(obj[3]) 41 | obj[4] = np.sqrt(obj[4]) 42 | obj[1] = cx - np.floor(cx) # centerx 43 | obj[2] = cy - np.floor(cy) # centery 44 | obj += [int(np.floor(cy) * W + np.floor(cx))] 45 | 46 | # show(im, allobj, S, w, h, cellx, celly) # unit test 47 | 48 | # Calculate placeholders' values 49 | probs = np.zeros([H*W,B,C]) 50 | confs = np.zeros([H*W,B]) 51 | coord = np.zeros([H*W,B,4]) 52 | proid = np.zeros([H*W,B,C]) 53 | prear = np.zeros([H*W,4]) 54 | for obj in allobj: 55 | probs[obj[5], :, :] = [[0.]*C] * B 56 | probs[obj[5], :, labels.index(obj[0])] = 1. 57 | proid[obj[5], :, :] = [[1.]*C] * B 58 | coord[obj[5], :, :] = [obj[1:5]] * B 59 | prear[obj[5],0] = obj[1] - obj[3]**2 * .5 * W # xleft 60 | prear[obj[5],1] = obj[2] - obj[4]**2 * .5 * H # yup 61 | prear[obj[5],2] = obj[1] + obj[3]**2 * .5 * W # xright 62 | prear[obj[5],3] = obj[2] + obj[4]**2 * .5 * H # ybot 63 | confs[obj[5], :] = [1.] * B 64 | 65 | # Finalise the placeholders' values 66 | upleft = np.expand_dims(prear[:,0:2], 1) 67 | botright = np.expand_dims(prear[:,2:4], 1) 68 | wh = botright - upleft; 69 | area = wh[:,:,0] * wh[:,:,1] 70 | upleft = np.concatenate([upleft] * B, 1) 71 | botright = np.concatenate([botright] * B, 1) 72 | areas = np.concatenate([area] * B, 1) 73 | 74 | # value for placeholder at input layer 75 | inp_feed_val = img 76 | # value for placeholder at loss layer 77 | loss_feed_val = { 78 | 'probs': probs, 'confs': confs, 79 | 'coord': coord, 'proid': proid, 80 | 'areas': areas, 'upleft': upleft, 81 | 'botright': botright 82 | } 83 | 84 | return inp_feed_val, loss_feed_val 85 | 86 | -------------------------------------------------------------------------------- /darkflow/net/yolov2/predict.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import cv2 4 | import os 5 | import json 6 | #from scipy.special import expit 7 | #from utils.box import BoundBox, box_iou, prob_compare 8 | #from utils.box import prob_compare2, box_intersection 9 | from ...utils.box import BoundBox 10 | from ...cython_utils.cy_yolo2_findboxes import box_constructor 11 | 12 | def expit(x): 13 | return 1. / (1. + np.exp(-x)) 14 | 15 | def _softmax(x): 16 | e_x = np.exp(x - np.max(x)) 17 | out = e_x / e_x.sum() 18 | return out 19 | 20 | def findboxes(self, net_out): 21 | # meta 22 | meta = self.meta 23 | boxes = list() 24 | boxes=box_constructor(meta,net_out) 25 | return boxes 26 | 27 | def postprocess(self, net_out, im, save = True): 28 | """ 29 | Takes net output, draw net_out, save to disk 30 | """ 31 | boxes = self.findboxes(net_out) 32 | 33 | # meta 34 | meta = self.meta 35 | threshold = meta['thresh'] 36 | colors = meta['colors'] 37 | labels = meta['labels'] 38 | if type(im) is not np.ndarray: 39 | imgcv = cv2.imread(im) 40 | else: imgcv = im 41 | h, w, _ = imgcv.shape 42 | 43 | resultsForJSON = [] 44 | for b in boxes: 45 | boxResults = self.process_box(b, h, w, threshold) 46 | if boxResults is None: 47 | continue 48 | left, right, top, bot, mess, max_indx, confidence = boxResults 49 | thick = int((h + w) // 300) 50 | if self.FLAGS.json: 51 | resultsForJSON.append({"label": mess, "confidence": float('%.2f' % confidence), "topleft": {"x": left, "y": top}, "bottomright": {"x": right, "y": bot}}) 52 | continue 53 | 54 | cv2.rectangle(imgcv, 55 | (left, top), (right, bot), 56 | colors[max_indx], thick) 57 | cv2.putText(imgcv, mess, (left, top - 12), 58 | 0, 1e-3 * h, colors[max_indx],thick//3) 59 | 60 | if not save: return imgcv 61 | 62 | outfolder = os.path.join(self.FLAGS.imgdir, 'out') 63 | img_name = os.path.join(outfolder, os.path.basename(im)) 64 | if self.FLAGS.json: 65 | textJSON = json.dumps(resultsForJSON) 66 | textFile = os.path.splitext(img_name)[0] + ".json" 67 | with open(textFile, 'w') as f: 68 | f.write(textJSON) 69 | return 70 | 71 | cv2.imwrite(img_name, imgcv) 72 | -------------------------------------------------------------------------------- /darkflow/net/yolov2/train.py: -------------------------------------------------------------------------------- 1 | import tensorflow.contrib.slim as slim 2 | import pickle 3 | import tensorflow as tf 4 | from ..yolo.misc import show 5 | import numpy as np 6 | import os 7 | import math 8 | 9 | def expit_tensor(x): 10 | return 1. / (1. + tf.exp(-x)) 11 | 12 | def loss(self, net_out): 13 | """ 14 | Takes net.out and placeholders value 15 | returned in batch() func above, 16 | to build train_op and loss 17 | """ 18 | # meta 19 | m = self.meta 20 | sprob = float(m['class_scale']) 21 | sconf = float(m['object_scale']) 22 | snoob = float(m['noobject_scale']) 23 | scoor = float(m['coord_scale']) 24 | H, W, _ = m['out_size'] 25 | B, C = m['num'], m['classes'] 26 | HW = H * W # number of grid cells 27 | anchors = m['anchors'] 28 | 29 | print('{} loss hyper-parameters:'.format(m['model'])) 30 | print('\tH = {}'.format(H)) 31 | print('\tW = {}'.format(W)) 32 | print('\tbox = {}'.format(m['num'])) 33 | print('\tclasses = {}'.format(m['classes'])) 34 | print('\tscales = {}'.format([sprob, sconf, snoob, scoor])) 35 | 36 | size1 = [None, HW, B, C] 37 | size2 = [None, HW, B] 38 | 39 | # return the below placeholders 40 | _probs = tf.placeholder(tf.float32, size1) 41 | _confs = tf.placeholder(tf.float32, size2) 42 | _coord = tf.placeholder(tf.float32, size2 + [4]) 43 | # weights term for L2 loss 44 | _proid = tf.placeholder(tf.float32, size1) 45 | # material calculating IOU 46 | _areas = tf.placeholder(tf.float32, size2) 47 | _upleft = tf.placeholder(tf.float32, size2 + [2]) 48 | _botright = tf.placeholder(tf.float32, size2 + [2]) 49 | 50 | self.placeholders = { 51 | 'probs':_probs, 'confs':_confs, 'coord':_coord, 'proid':_proid, 52 | 'areas':_areas, 'upleft':_upleft, 'botright':_botright 53 | } 54 | 55 | # Extract the coordinate prediction from net.out 56 | net_out_reshape = tf.reshape(net_out, [-1, H, W, B, (4 + 1 + C)]) 57 | coords = net_out_reshape[:, :, :, :, :4] 58 | coords = tf.reshape(coords, [-1, H*W, B, 4]) 59 | adjusted_coords_xy = expit_tensor(coords[:,:,:,0:2]) 60 | adjusted_coords_wh = tf.sqrt(tf.exp(coords[:,:,:,2:4]) * np.reshape(anchors, [1, 1, B, 2]) / np.reshape([W, H], [1, 1, 1, 2])) 61 | coords = tf.concat([adjusted_coords_xy, adjusted_coords_wh], 3) 62 | 63 | adjusted_c = expit_tensor(net_out_reshape[:, :, :, :, 4]) 64 | adjusted_c = tf.reshape(adjusted_c, [-1, H*W, B, 1]) 65 | 66 | adjusted_prob = tf.nn.softmax(net_out_reshape[:, :, :, :, 5:]) 67 | adjusted_prob = tf.reshape(adjusted_prob, [-1, H*W, B, C]) 68 | 69 | adjusted_net_out = tf.concat([adjusted_coords_xy, adjusted_coords_wh, adjusted_c, adjusted_prob], 3) 70 | 71 | wh = tf.pow(coords[:,:,:,2:4], 2) * np.reshape([W, H], [1, 1, 1, 2]) 72 | area_pred = wh[:,:,:,0] * wh[:,:,:,1] 73 | centers = coords[:,:,:,0:2] 74 | floor = centers - (wh * .5) 75 | ceil = centers + (wh * .5) 76 | 77 | # calculate the intersection areas 78 | intersect_upleft = tf.maximum(floor, _upleft) 79 | intersect_botright = tf.minimum(ceil , _botright) 80 | intersect_wh = intersect_botright - intersect_upleft 81 | intersect_wh = tf.maximum(intersect_wh, 0.0) 82 | intersect = tf.multiply(intersect_wh[:,:,:,0], intersect_wh[:,:,:,1]) 83 | 84 | # calculate the best IOU, set 0.0 confidence for worse boxes 85 | iou = tf.truediv(intersect, _areas + area_pred - intersect) 86 | best_box = tf.equal(iou, tf.reduce_max(iou, [2], True)) 87 | best_box = tf.to_float(best_box) 88 | confs = tf.multiply(best_box, _confs) 89 | 90 | # take care of the weight terms 91 | conid = snoob * (1. - confs) + sconf * confs 92 | weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3) 93 | cooid = scoor * weight_coo 94 | weight_pro = tf.concat(C * [tf.expand_dims(confs, -1)], 3) 95 | proid = sprob * weight_pro 96 | 97 | self.fetch += [_probs, confs, conid, cooid, proid] 98 | true = tf.concat([_coord, tf.expand_dims(confs, 3), _probs ], 3) 99 | wght = tf.concat([cooid, tf.expand_dims(conid, 3), proid ], 3) 100 | 101 | print('Building {} loss'.format(m['model'])) 102 | loss = tf.pow(adjusted_net_out - true, 2) 103 | loss = tf.multiply(loss, wght) 104 | loss = tf.reshape(loss, [-1, H*W*B*(4 + 1 + C)]) 105 | loss = tf.reduce_sum(loss, 1) 106 | self.loss = .5 * tf.reduce_mean(loss) 107 | tf.summary.scalar('{} loss'.format(m['model']), self.loss) -------------------------------------------------------------------------------- /darkflow/utils/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class BoundBox: 4 | def __init__(self, classes): 5 | self.x, self.y = float(), float() 6 | self.w, self.h = float(), float() 7 | self.c = float() 8 | self.class_num = classes 9 | self.probs = np.zeros((classes,)) 10 | 11 | def overlap(x1,w1,x2,w2): 12 | l1 = x1 - w1 / 2.; 13 | l2 = x2 - w2 / 2.; 14 | left = max(l1, l2) 15 | r1 = x1 + w1 / 2.; 16 | r2 = x2 + w2 / 2.; 17 | right = min(r1, r2) 18 | return right - left; 19 | 20 | def box_intersection(a, b): 21 | w = overlap(a.x, a.w, b.x, b.w); 22 | h = overlap(a.y, a.h, b.y, b.h); 23 | if w < 0 or h < 0: return 0; 24 | area = w * h; 25 | return area; 26 | 27 | def box_union(a, b): 28 | i = box_intersection(a, b); 29 | u = a.w * a.h + b.w * b.h - i; 30 | return u; 31 | 32 | def box_iou(a, b): 33 | return box_intersection(a, b) / box_union(a, b); 34 | 35 | def prob_compare(box): 36 | return box.probs[box.class_num] 37 | 38 | def prob_compare2(boxa, boxb): 39 | if (boxa.pi < boxb.pi): 40 | return 1 41 | elif(boxa.pi == boxb.pi): 42 | return 0 43 | else: 44 | return -1 -------------------------------------------------------------------------------- /darkflow/utils/im_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | def imcv2_recolor(im, a = .1): 5 | t = [np.random.uniform()] 6 | t += [np.random.uniform()] 7 | t += [np.random.uniform()] 8 | t = np.array(t) * 2. - 1. 9 | 10 | # random amplify each channel 11 | im = im * (1 + t * a) 12 | mx = 255. * (1 + a) 13 | up = np.random.uniform() * 2 - 1 14 | # im = np.power(im/mx, 1. + up * .5) 15 | im = cv2.pow(im/mx, 1. + up * .5) 16 | return np.array(im * 255., np.uint8) 17 | 18 | def imcv2_affine_trans(im): 19 | # Scale and translate 20 | h, w, c = im.shape 21 | scale = np.random.uniform() / 10. + 1. 22 | max_offx = (scale-1.) * w 23 | max_offy = (scale-1.) * h 24 | offx = int(np.random.uniform() * max_offx) 25 | offy = int(np.random.uniform() * max_offy) 26 | 27 | im = cv2.resize(im, (0,0), fx = scale, fy = scale) 28 | im = im[offy : (offy + h), offx : (offx + w)] 29 | flip = np.random.binomial(1, .5) 30 | if flip: im = cv2.flip(im, 1) 31 | return im, [w, h, c], [scale, [offx, offy], flip] 32 | -------------------------------------------------------------------------------- /darkflow/utils/loader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | from .. import dark 4 | import numpy as np 5 | from os.path import basename 6 | 7 | class loader(object): 8 | """ 9 | interface to work with both .weights and .ckpt files 10 | in loading / recollecting / resolving mode 11 | """ 12 | VAR_LAYER = ['convolutional', 'connected', 'local', 13 | 'select', 'conv-select', 14 | 'extract', 'conv-extract'] 15 | 16 | def __init__(self, *args): 17 | self.src_key = list() 18 | self.vals = list() 19 | self.load(*args) 20 | 21 | def __call__(self, key): 22 | for idx in range(len(key)): 23 | val = self.find(key, idx) 24 | if val is not None: return val 25 | return None 26 | 27 | def find(self, key, idx): 28 | up_to = min(len(self.src_key), 4) 29 | for i in range(up_to): 30 | key_b = self.src_key[i] 31 | if key_b[idx:] == key[idx:]: 32 | return self.yields(i) 33 | return None 34 | 35 | def yields(self, idx): 36 | del self.src_key[idx] 37 | temp = self.vals[idx] 38 | del self.vals[idx] 39 | return temp 40 | 41 | class weights_loader(loader): 42 | """one who understands .weights files""" 43 | 44 | _W_ORDER = dict({ # order of param flattened into .weights file 45 | 'convolutional': [ 46 | 'biases','gamma','moving_mean','moving_variance','kernel' 47 | ], 48 | 'connected': ['biases', 'weights'], 49 | 'local': ['biases', 'kernels'] 50 | }) 51 | 52 | def load(self, path, src_layers): 53 | self.src_layers = src_layers 54 | walker = weights_walker(path) 55 | 56 | for i, layer in enumerate(src_layers): 57 | if layer.type not in self.VAR_LAYER: continue 58 | self.src_key.append([layer]) 59 | 60 | if walker.eof: new = None 61 | else: 62 | args = layer.signature 63 | new = dark.darknet.create_darkop(*args) 64 | self.vals.append(new) 65 | 66 | if new is None: continue 67 | order = self._W_ORDER[new.type] 68 | for par in order: 69 | if par not in new.wshape: continue 70 | val = walker.walk(new.wsize[par]) 71 | new.w[par] = val 72 | new.finalize(walker.transpose) 73 | 74 | if walker.path is not None: 75 | assert walker.offset == walker.size, \ 76 | 'expect {} bytes, found {}'.format( 77 | walker.offset, walker.size) 78 | print('Successfully identified {} bytes'.format( 79 | walker.offset)) 80 | 81 | class checkpoint_loader(loader): 82 | """ 83 | one who understands .ckpt files, very much 84 | """ 85 | def load(self, ckpt, ignore): 86 | meta = ckpt + '.meta' 87 | with tf.Graph().as_default() as graph: 88 | with tf.Session().as_default() as sess: 89 | saver = tf.train.import_meta_graph(meta) 90 | saver.restore(sess, ckpt) 91 | for var in tf.global_variables(): 92 | name = var.name.split(':')[0] 93 | packet = [name, var.get_shape().as_list()] 94 | self.src_key += [packet] 95 | self.vals += [var.eval(sess)] 96 | 97 | def create_loader(path, cfg = None): 98 | if path is None: 99 | load_type = weights_loader 100 | elif '.weights' in path: 101 | load_type = weights_loader 102 | else: 103 | load_type = checkpoint_loader 104 | 105 | return load_type(path, cfg) 106 | 107 | class weights_walker(object): 108 | """incremental reader of float32 binary files""" 109 | def __init__(self, path): 110 | self.eof = False # end of file 111 | self.path = path # current pos 112 | if path is None: 113 | self.eof = True 114 | return 115 | else: 116 | self.size = os.path.getsize(path)# save the path 117 | major, minor, revision, seen = np.memmap(path, 118 | shape = (), mode = 'r', offset = 0, 119 | dtype = '({})i4,'.format(4)) 120 | self.transpose = major > 1000 or minor > 1000 121 | self.offset = 16 122 | 123 | def walk(self, size): 124 | if self.eof: return None 125 | end_point = self.offset + 4 * size 126 | assert end_point <= self.size, \ 127 | 'Over-read {}'.format(self.path) 128 | 129 | float32_1D_array = np.memmap( 130 | self.path, shape = (), mode = 'r', 131 | offset = self.offset, 132 | dtype='({})float32,'.format(size) 133 | ) 134 | 135 | self.offset = end_point 136 | if end_point == self.size: 137 | self.eof = True 138 | return float32_1D_array 139 | 140 | def model_name(file_path): 141 | file_name = basename(file_path) 142 | ext = str() 143 | if '.' in file_name: # exclude extension 144 | file_name = file_name.split('.') 145 | ext = file_name[-1] 146 | file_name = '.'.join(file_name[:-1]) 147 | if ext == str() or ext == 'meta': # ckpt file 148 | file_name = file_name.split('-') 149 | num = int(file_name[-1]) 150 | return '-'.join(file_name[:-1]) 151 | if ext == 'weights': 152 | return file_name -------------------------------------------------------------------------------- /darkflow/utils/pascal_voc_clean_xml.py: -------------------------------------------------------------------------------- 1 | """ 2 | parse PASCAL VOC xml annotations 3 | """ 4 | 5 | import os 6 | import sys 7 | import xml.etree.ElementTree as ET 8 | import glob 9 | 10 | 11 | def _pp(l): # pretty printing 12 | for i in l: print('{}: {}'.format(i,l[i])) 13 | 14 | def pascal_voc_clean_xml(ANN, pick, exclusive = False): 15 | print('Parsing for {} {}'.format( 16 | pick, 'exclusively' * int(exclusive))) 17 | 18 | dumps = list() 19 | cur_dir = os.getcwd() 20 | os.chdir(ANN) 21 | annotations = os.listdir('.') 22 | annotations = glob.glob(str(annotations)+'*.xml') 23 | size = len(annotations) 24 | 25 | for i, file in enumerate(annotations): 26 | # progress bar 27 | sys.stdout.write('\r') 28 | percentage = 1. * (i+1) / size 29 | progress = int(percentage * 20) 30 | bar_arg = [progress*'=', ' '*(19-progress), percentage*100] 31 | bar_arg += [file] 32 | sys.stdout.write('[{}>{}]{:.0f}% {}'.format(*bar_arg)) 33 | sys.stdout.flush() 34 | 35 | # actual parsing 36 | in_file = open(file) 37 | tree=ET.parse(in_file) 38 | root = tree.getroot() 39 | jpg = str(root.find('filename').text) 40 | imsize = root.find('size') 41 | w = int(imsize.find('width').text) 42 | h = int(imsize.find('height').text) 43 | all = list() 44 | 45 | for obj in root.iter('object'): 46 | current = list() 47 | name = obj.find('name').text 48 | if name not in pick: 49 | continue 50 | 51 | xmlbox = obj.find('bndbox') 52 | xn = int(float(xmlbox.find('xmin').text)) 53 | xx = int(float(xmlbox.find('xmax').text)) 54 | yn = int(float(xmlbox.find('ymin').text)) 55 | yx = int(float(xmlbox.find('ymax').text)) 56 | current = [name,xn,yn,xx,yx] 57 | all += [current] 58 | 59 | add = [[jpg, [w, h, all]]] 60 | dumps += add 61 | in_file.close() 62 | 63 | # gather all stats 64 | stat = dict() 65 | for dump in dumps: 66 | all = dump[1][2] 67 | for current in all: 68 | if current[0] in pick: 69 | if current[0] in stat: 70 | stat[current[0]]+=1 71 | else: 72 | stat[current[0]] =1 73 | 74 | print('\nStatistics:') 75 | _pp(stat) 76 | print('Dataset size: {}'.format(len(dumps))) 77 | 78 | os.chdir(cur_dir) 79 | return dumps -------------------------------------------------------------------------------- /darkflow/utils/process.py: -------------------------------------------------------------------------------- 1 | """ 2 | WARNING: spaghetti code. 3 | """ 4 | 5 | import numpy as np 6 | import pickle 7 | import os 8 | 9 | def parser(model): 10 | """ 11 | Read the .cfg file to extract layers into `layers` 12 | as well as model-specific parameters into `meta` 13 | """ 14 | def _parse(l, i = 1): 15 | return l.split('=')[i].strip() 16 | 17 | with open(model, 'rb') as f: 18 | lines = f.readlines() 19 | 20 | lines = [line.decode() for line in lines] 21 | 22 | meta = dict(); layers = list() # will contains layers' info 23 | h, w, c = [int()] * 3; layer = dict() 24 | for line in lines: 25 | line = line.strip() 26 | line = line.split('#')[0] 27 | if '[' in line: 28 | if layer != dict(): 29 | if layer['type'] == '[net]': 30 | h = layer['height'] 31 | w = layer['width'] 32 | c = layer['channels'] 33 | meta['net'] = layer 34 | else: 35 | if layer['type'] == '[crop]': 36 | h = layer['crop_height'] 37 | w = layer['crop_width'] 38 | layers += [layer] 39 | layer = {'type': line} 40 | else: 41 | try: 42 | i = float(_parse(line)) 43 | if i == int(i): i = int(i) 44 | layer[line.split('=')[0].strip()] = i 45 | except: 46 | try: 47 | key = _parse(line, 0) 48 | val = _parse(line, 1) 49 | layer[key] = val 50 | except: 51 | 'banana ninja yadayada' 52 | 53 | meta.update(layer) # last layer contains meta info 54 | if 'anchors' in meta: 55 | splits = meta['anchors'].split(',') 56 | anchors = [float(x.strip()) for x in splits] 57 | meta['anchors'] = anchors 58 | meta['model'] = model # path to cfg, not model name 59 | meta['inp_size'] = [h, w, c] 60 | return layers, meta 61 | 62 | def cfg_yielder(model, binary): 63 | """ 64 | yielding each layer information to initialize `layer` 65 | """ 66 | layers, meta = parser(model); yield meta; 67 | h, w, c = meta['inp_size']; l = w * h * c 68 | 69 | # Start yielding 70 | flat = False # flag for 1st dense layer 71 | conv = '.conv.' in model 72 | for i, d in enumerate(layers): 73 | #----------------------------------------------------- 74 | if d['type'] == '[crop]': 75 | yield ['crop', i] 76 | #----------------------------------------------------- 77 | elif d['type'] == '[local]': 78 | n = d.get('filters', 1) 79 | size = d.get('size', 1) 80 | stride = d.get('stride', 1) 81 | pad = d.get('pad', 0) 82 | activation = d.get('activation', 'logistic') 83 | w_ = (w - 1 - (1 - pad) * (size - 1)) // stride + 1 84 | h_ = (h - 1 - (1 - pad) * (size - 1)) // stride + 1 85 | yield ['local', i, size, c, n, stride, 86 | pad, w_, h_, activation] 87 | if activation != 'linear': yield [activation, i] 88 | w, h, c = w_, h_, n 89 | l = w * h * c 90 | #----------------------------------------------------- 91 | elif d['type'] == '[convolutional]': 92 | n = d.get('filters', 1) 93 | size = d.get('size', 1) 94 | stride = d.get('stride', 1) 95 | pad = d.get('pad', 0) 96 | padding = d.get('padding', 0) 97 | if pad: padding = size // 2 98 | activation = d.get('activation', 'logistic') 99 | batch_norm = d.get('batch_normalize', 0) or conv 100 | yield ['convolutional', i, size, c, n, 101 | stride, padding, batch_norm, 102 | activation] 103 | if activation != 'linear': yield [activation, i] 104 | w_ = (w + 2 * padding - size) // stride + 1 105 | h_ = (h + 2 * padding - size) // stride + 1 106 | w, h, c = w_, h_, n 107 | l = w * h * c 108 | #----------------------------------------------------- 109 | elif d['type'] == '[maxpool]': 110 | stride = d.get('stride', 1) 111 | size = d.get('size', stride) 112 | padding = d.get('padding', (size-1) // 2) 113 | yield ['maxpool', i, size, stride, padding] 114 | w_ = (w + 2*padding) // d['stride'] 115 | h_ = (h + 2*padding) // d['stride'] 116 | w, h = w_, h_ 117 | l = w * h * c 118 | #----------------------------------------------------- 119 | elif d['type'] == '[avgpool]': 120 | flat = True; l = c 121 | yield ['avgpool', i] 122 | #----------------------------------------------------- 123 | elif d['type'] == '[softmax]': 124 | yield ['softmax', i, d['groups']] 125 | #----------------------------------------------------- 126 | elif d['type'] == '[connected]': 127 | if not flat: 128 | yield ['flatten', i] 129 | flat = True 130 | activation = d.get('activation', 'logistic') 131 | yield ['connected', i, l, d['output'], activation] 132 | if activation != 'linear': yield [activation, i] 133 | l = d['output'] 134 | #----------------------------------------------------- 135 | elif d['type'] == '[dropout]': 136 | yield ['dropout', i, d['probability']] 137 | #----------------------------------------------------- 138 | elif d['type'] == '[select]': 139 | if not flat: 140 | yield ['flatten', i] 141 | flat = True 142 | inp = d.get('input', None) 143 | if type(inp) is str: 144 | file = inp.split(',')[0] 145 | layer_num = int(inp.split(',')[1]) 146 | with open(file, 'rb') as f: 147 | profiles = pickle.load(f, encoding = 'latin1')[0] 148 | layer = profiles[layer_num] 149 | else: layer = inp 150 | activation = d.get('activation', 'logistic') 151 | d['keep'] = d['keep'].split('/') 152 | classes = int(d['keep'][-1]) 153 | keep = [int(c) for c in d['keep'][0].split(',')] 154 | keep_n = len(keep) 155 | train_from = classes * d['bins'] 156 | for count in range(d['bins']-1): 157 | for num in keep[-keep_n:]: 158 | keep += [num + classes] 159 | k = 1 160 | while layers[i-k]['type'] not in ['[connected]', '[extract]']: 161 | k += 1 162 | if i-k < 0: 163 | break 164 | if i-k < 0: l_ = l 165 | elif layers[i-k]['type'] == 'connected': 166 | l_ = layers[i-k]['output'] 167 | else: 168 | l_ = layers[i-k].get('old',[l])[-1] 169 | yield ['select', i, l_, d['old_output'], 170 | activation, layer, d['output'], 171 | keep, train_from] 172 | if activation != 'linear': yield [activation, i] 173 | l = d['output'] 174 | #----------------------------------------------------- 175 | elif d['type'] == '[conv-select]': 176 | n = d.get('filters', 1) 177 | size = d.get('size', 1) 178 | stride = d.get('stride', 1) 179 | pad = d.get('pad', 0) 180 | padding = d.get('padding', 0) 181 | if pad: padding = size // 2 182 | activation = d.get('activation', 'logistic') 183 | batch_norm = d.get('batch_normalize', 0) or conv 184 | d['keep'] = d['keep'].split('/') 185 | classes = int(d['keep'][-1]) 186 | keep = [int(x) for x in d['keep'][0].split(',')] 187 | 188 | segment = classes + 5 189 | assert n % segment == 0, \ 190 | 'conv-select: segment failed' 191 | bins = n // segment 192 | keep_idx = list() 193 | for j in range(bins): 194 | offset = j * segment 195 | for k in range(5): 196 | keep_idx += [offset + k] 197 | for k in keep: 198 | keep_idx += [offset + 5 + k] 199 | w_ = (w + 2 * padding - size) // stride + 1 200 | h_ = (h + 2 * padding - size) // stride + 1 201 | c_ = len(keep_idx) 202 | yield ['conv-select', i, size, c, n, 203 | stride, padding, batch_norm, 204 | activation, keep_idx, c_] 205 | w, h, c = w_, h_, c_ 206 | l = w * h * c 207 | #----------------------------------------------------- 208 | elif d['type'] == '[conv-extract]': 209 | file = d['profile'] 210 | with open(file, 'rb') as f: 211 | profiles = pickle.load(f, encoding = 'latin1')[0] 212 | inp_layer = None 213 | inp = d['input'] 214 | out = d['output'] 215 | inp_layer = None 216 | if inp >= 0: 217 | inp_layer = profiles[inp] 218 | if inp_layer is not None: 219 | assert len(inp_layer) == c, \ 220 | 'Conv-extract does not match input dimension' 221 | out_layer = profiles[out] 222 | 223 | n = d.get('filters', 1) 224 | size = d.get('size', 1) 225 | stride = d.get('stride', 1) 226 | pad = d.get('pad', 0) 227 | padding = d.get('padding', 0) 228 | if pad: padding = size // 2 229 | activation = d.get('activation', 'logistic') 230 | batch_norm = d.get('batch_normalize', 0) or conv 231 | 232 | k = 1 233 | find = ['[convolutional]','[conv-extract]'] 234 | while layers[i-k]['type'] not in find: 235 | k += 1 236 | if i-k < 0: break 237 | if i-k >= 0: 238 | previous_layer = layers[i-k] 239 | c_ = previous_layer['filters'] 240 | else: 241 | c_ = c 242 | 243 | yield ['conv-extract', i, size, c_, n, 244 | stride, padding, batch_norm, 245 | activation, inp_layer, out_layer] 246 | if activation != 'linear': yield [activation, i] 247 | w_ = (w + 2 * padding - size) // stride + 1 248 | h_ = (h + 2 * padding - size) // stride + 1 249 | w, h, c = w_, h_, len(out_layer) 250 | l = w * h * c 251 | #----------------------------------------------------- 252 | elif d['type'] == '[extract]': 253 | if not flat: 254 | yield['flatten', i] 255 | flat = True 256 | activation = d.get('activation', 'logistic') 257 | file = d['profile'] 258 | with open(file, 'rb') as f: 259 | profiles = pickle.load(f, encoding = 'latin1')[0] 260 | inp_layer = None 261 | inp = d['input'] 262 | out = d['output'] 263 | if inp >= 0: 264 | inp_layer = profiles[inp] 265 | out_layer = profiles[out] 266 | old = d['old'] 267 | old = [int(x) for x in old.split(',')] 268 | if inp_layer is not None: 269 | if len(old) > 2: 270 | h_, w_, c_, n_ = old 271 | new_inp = list() 272 | for p in range(c_): 273 | for q in range(h_): 274 | for r in range(w_): 275 | if p not in inp_layer: 276 | continue 277 | new_inp += [r + w*(q + h*p)] 278 | inp_layer = new_inp 279 | old = [h_ * w_ * c_, n_] 280 | assert len(inp_layer) == l, \ 281 | 'Extract does not match input dimension' 282 | d['old'] = old 283 | yield ['extract', i] + old + [activation] + [inp_layer, out_layer] 284 | if activation != 'linear': yield [activation, i] 285 | l = len(out_layer) 286 | #----------------------------------------------------- 287 | elif d['type'] == '[route]': # add new layer here 288 | routes = d['layers'] 289 | if type(routes) is int: 290 | routes = [routes] 291 | else: 292 | routes = [int(x.strip()) for x in routes.split(',')] 293 | routes = [i + x if x < 0 else x for x in routes] 294 | for j, x in enumerate(routes): 295 | lx = layers[x]; 296 | xtype = lx['type'] 297 | _size = lx['_size'][:3] 298 | if j == 0: 299 | h, w, c = _size 300 | else: 301 | h_, w_, c_ = _size 302 | assert w_ == w and h_ == h, \ 303 | 'Routing incompatible conv sizes' 304 | c += c_ 305 | yield ['route', i, routes] 306 | l = w * h * c 307 | #----------------------------------------------------- 308 | elif d['type'] == '[reorg]': 309 | stride = d.get('stride', 1) 310 | yield ['reorg', i, stride] 311 | w = w // stride; h = h // stride; 312 | c = c * (stride ** 2) 313 | l = w * h * c 314 | #----------------------------------------------------- 315 | else: 316 | exit('Layer {} not implemented'.format(d['type'])) 317 | 318 | d['_size'] = list([h, w, c, l, flat]) 319 | 320 | if not flat: meta['out_size'] = [h, w, c] 321 | else: meta['out_size'] = l -------------------------------------------------------------------------------- /darkflow/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.0' 2 | """Current version of darkflow.""" -------------------------------------------------------------------------------- /demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/demo.gif -------------------------------------------------------------------------------- /flow: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import sys 4 | from darkflow.cli import cliHandler 5 | 6 | cliHandler(sys.argv) 7 | 8 | -------------------------------------------------------------------------------- /labels.txt: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat -------------------------------------------------------------------------------- /preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/preview.png -------------------------------------------------------------------------------- /sample_img/Thumbs.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/Thumbs.db -------------------------------------------------------------------------------- /sample_img/sample_computer.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_computer.jpg -------------------------------------------------------------------------------- /sample_img/sample_dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_dog.jpg -------------------------------------------------------------------------------- /sample_img/sample_eagle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_eagle.jpg -------------------------------------------------------------------------------- /sample_img/sample_giraffe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_giraffe.jpg -------------------------------------------------------------------------------- /sample_img/sample_horses.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_horses.jpg -------------------------------------------------------------------------------- /sample_img/sample_office.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_office.jpg -------------------------------------------------------------------------------- /sample_img/sample_person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_person.jpg -------------------------------------------------------------------------------- /sample_img/sample_scream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_scream.jpg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from setuptools.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | import os 6 | import imp 7 | 8 | VERSION = imp.load_source('version', os.path.join('.', 'darkflow', 'version.py')) 9 | VERSION = VERSION.__version__ 10 | 11 | if os.name =='nt' : 12 | ext_modules=[ 13 | Extension("darkflow.cython_utils.nms", 14 | sources=["darkflow/cython_utils/nms.pyx"], 15 | #libraries=["m"] # Unix-like specific 16 | include_dirs=[numpy.get_include()] 17 | ), 18 | Extension("darkflow.cython_utils.cy_yolo2_findboxes", 19 | sources=["darkflow/cython_utils/cy_yolo2_findboxes.pyx"], 20 | #libraries=["m"] # Unix-like specific 21 | include_dirs=[numpy.get_include()] 22 | ), 23 | Extension("darkflow.cython_utils.cy_yolo_findboxes", 24 | sources=["darkflow/cython_utils/cy_yolo_findboxes.pyx"], 25 | #libraries=["m"] # Unix-like specific 26 | include_dirs=[numpy.get_include()] 27 | ) 28 | ] 29 | 30 | elif os.name =='posix' : 31 | ext_modules=[ 32 | Extension("darkflow.cython_utils.nms", 33 | sources=["darkflow/cython_utils/nms.pyx"], 34 | libraries=["m"], # Unix-like specific 35 | include_dirs=[numpy.get_include()] 36 | ), 37 | Extension("darkflow.cython_utils.cy_yolo2_findboxes", 38 | sources=["darkflow/cython_utils/cy_yolo2_findboxes.pyx"], 39 | libraries=["m"], # Unix-like specific 40 | include_dirs=[numpy.get_include()] 41 | ), 42 | Extension("darkflow.cython_utils.cy_yolo_findboxes", 43 | sources=["darkflow/cython_utils/cy_yolo_findboxes.pyx"], 44 | libraries=["m"], # Unix-like specific 45 | include_dirs=[numpy.get_include()] 46 | ) 47 | ] 48 | 49 | else : 50 | ext_modules=[ 51 | Extension("darkflow.cython_utils.nms", 52 | sources=["darkflow/cython_utils/nms.pyx"], 53 | libraries=["m"] # Unix-like specific 54 | ), 55 | Extension("darkflow.cython_utils.cy_yolo2_findboxes", 56 | sources=["darkflow/cython_utils/cy_yolo2_findboxes.pyx"], 57 | libraries=["m"] # Unix-like specific 58 | ), 59 | Extension("darkflow.cython_utils.cy_yolo_findboxes", 60 | sources=["darkflow/cython_utils/cy_yolo_findboxes.pyx"], 61 | libraries=["m"] # Unix-like specific 62 | ) 63 | ] 64 | 65 | setup( 66 | version=VERSION, 67 | name='darkflow', 68 | description='Darkflow', 69 | license='GPLv3', 70 | url='https://github.com/thtrieu/darkflow', 71 | packages = find_packages(), 72 | scripts = ['flow'], 73 | ext_modules = cythonize(ext_modules) 74 | ) -------------------------------------------------------------------------------- /test/requirements-testing.txt: -------------------------------------------------------------------------------- 1 | tensorflow==1.4.1 2 | pytest 3 | requests 4 | opencv-python 5 | numpy 6 | Cython 7 | codecov 8 | pytest-cov -------------------------------------------------------------------------------- /test/training/annotations/1.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 1.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 336426776 9 | 10 | 11 | Elder Timothy Chaves 12 | Tim Chaves 13 | 14 | 15 | 500 16 | 375 17 | 3 18 | 19 | 0 20 | 21 | person 22 | Left 23 | 0 24 | 0 25 | 26 | 135 27 | 25 28 | 236 29 | 188 30 | 31 | 32 | 33 | bicycle 34 | Left 35 | 0 36 | 0 37 | 38 | 95 39 | 85 40 | 232 41 | 253 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /test/training/annotations/2.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2007 3 | 2.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 329950741 9 | 10 | 11 | Lothar Lenz 12 | Lothar Lenz 13 | 14 | 15 | 500 16 | 332 17 | 3 18 | 19 | 0 20 | 21 | person 22 | Left 23 | 0 24 | 0 25 | 26 | 235 27 | 51 28 | 309 29 | 222 30 | 31 | 32 | 33 | horse 34 | Left 35 | 0 36 | 0 37 | 38 | 157 39 | 106 40 | 426 41 | 294 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /test/training/images/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/test/training/images/1.jpg -------------------------------------------------------------------------------- /test/training/images/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/test/training/images/2.jpg -------------------------------------------------------------------------------- /vbb2voc.py: -------------------------------------------------------------------------------- 1 | import os, glob 2 | import cv2 3 | from scipy.io import loadmat 4 | from collections import defaultdict 5 | import numpy as np 6 | from lxml import etree, objectify 7 | 8 | def vbb_anno2dict(vbb_file, cam_id, person_types=None): 9 | """ 10 | Parse caltech vbb annotation file to dict 11 | Args: 12 | vbb_file: input vbb file path 13 | cam_id: camera id 14 | person_types: list of person type that will be used (total 4 types: person, person-fa, person?, people). 15 | If None, all will be used: 16 | Return: 17 | Annotation info dict with filename as key and anno info as value 18 | """ 19 | filename = os.path.splitext(os.path.basename(vbb_file))[0] 20 | annos = defaultdict(dict) 21 | vbb = loadmat(vbb_file) 22 | # object info in each frame: id, pos, occlusion, lock, posv 23 | objLists = vbb['A'][0][0][1][0] 24 | objLbl = [str(v[0]) for v in vbb['A'][0][0][4][0]] 25 | # person index 26 | if not person_types: 27 | person_types = ["person", "person-fa", "person?", "people"] 28 | person_index_list = [x for x in range(len(objLbl)) if objLbl[x] in person_types] 29 | for frame_id, obj in enumerate(objLists): 30 | if len(obj) > 0: 31 | frame_name = str(cam_id) + "_" + str(filename) + "_" + str(frame_id+1) + ".jpg" 32 | annos[frame_name] = defaultdict(list) 33 | annos[frame_name]["id"] = frame_name 34 | for fid, pos, occl in zip(obj['id'][0], obj['pos'][0], obj['occl'][0]): 35 | fid = int(fid[0][0]) - 1 # for matlab start from 1 not 0 36 | if not fid in person_index_list: # only use bbox whose label is given person type 37 | continue 38 | annos[frame_name]["label"] = objLbl[fid] 39 | pos = pos[0].tolist() 40 | occl = int(occl[0][0]) 41 | annos[frame_name]["occlusion"].append(occl) 42 | annos[frame_name]["bbox"].append(pos) 43 | if not annos[frame_name]["bbox"]: 44 | del annos[frame_name] 45 | return annos 46 | 47 | 48 | def seq2img(annos, seq_file, outdir, cam_id): 49 | """ 50 | Extract frames in seq files to given output directories 51 | Args: 52 | annos: annos dict returned from parsed vbb file 53 | seq_file: seq file path 54 | outdir: frame save dir 55 | cam_id: camera id 56 | Returns: 57 | camera captured image size 58 | """ 59 | cap = cv2.VideoCapture(seq_file) 60 | index = 1 61 | # captured frame list 62 | v_id = os.path.splitext(os.path.basename(seq_file))[0] 63 | cap_frames_index = np.sort([int(os.path.splitext(id)[0].split("_")[2]) for id in annos.keys()]) 64 | while True: 65 | ret, frame = cap.read() 66 | if ret: 67 | if not index in cap_frames_index: 68 | index += 1 69 | continue 70 | if not os.path.exists(outdir): 71 | os.makedirs(outdir) 72 | outname = os.path.join(outdir, str(cam_id)+"_"+v_id+"_"+str(index)+".jpg") 73 | print "Current frame: ", v_id, str(index) 74 | cv2.imwrite(outname, frame) 75 | height, width, _ = frame.shape 76 | else: 77 | break 78 | index += 1 79 | img_size = (width, height) 80 | return img_size 81 | 82 | 83 | def instance2xml_base(anno, img_size, bbox_type='xyxy'): 84 | """ 85 | Parse annotation data to VOC XML format 86 | Args: 87 | anno: annotation info returned by vbb_anno2dict function 88 | img_size: camera captured image size 89 | bbox_type: bbox coordinate record format: xyxy (xmin, ymin, xmax, ymax); xywh (xmin, ymin, width, height) 90 | Returns: 91 | Annotation xml info tree 92 | """ 93 | assert bbox_type in ['xyxy', 'xywh'] 94 | E = objectify.ElementMaker(annotate=False) 95 | anno_tree = E.annotation( 96 | E.folder('VOC2014_instance/person'), 97 | E.filename(anno['id']), 98 | E.source( 99 | E.database('Caltech pedestrian'), 100 | E.annotation('Caltech pedestrian'), 101 | E.image('Caltech pedestrian'), 102 | E.url('None') 103 | ), 104 | E.size( 105 | E.width(img_size[0]), 106 | E.height(img_size[1]), 107 | E.depth(3) 108 | ), 109 | E.segmented(0), 110 | ) 111 | for index, bbox in enumerate(anno['bbox']): 112 | bbox = [float(x) for x in bbox] 113 | if bbox_type == 'xyxy': 114 | xmin, ymin, w, h = bbox 115 | xmax = xmin+w 116 | ymax = ymin+h 117 | else: 118 | xmin, ymin, xmax, ymax = bbox 119 | E = objectify.ElementMaker(annotate=False) 120 | anno_tree.append( 121 | E.object( 122 | E.name(anno['label']), 123 | E.bndbox( 124 | E.xmin(xmin), 125 | E.ymin(ymin), 126 | E.xmax(xmax), 127 | E.ymax(ymax) 128 | ), 129 | E.difficult(0), 130 | E.occlusion(anno["occlusion"][index]) 131 | ) 132 | ) 133 | return anno_tree 134 | 135 | 136 | def parse_anno_file(vbb_inputdir, seq_inputdir, vbb_outputdir, seq_outputdir, person_types=None): 137 | """ 138 | Parse Caltech data stored in seq and vbb files to VOC xml format 139 | Args: 140 | vbb_inputdir: vbb file saved pth 141 | seq_inputdir: seq file saved path 142 | vbb_outputdir: vbb data converted xml file saved path 143 | seq_outputdir: seq data converted frame image file saved path 144 | person_types: list of person type that will be used (total 4 types: person, person-fa, person?, people). 145 | If None, all will be used: 146 | """ 147 | # annotation sub-directories in hda annotation input directory 148 | assert os.path.exists(vbb_inputdir) 149 | sub_dirs = os.listdir(vbb_inputdir) 150 | for sub_dir in sub_dirs: 151 | print "Parsing annotations of camera: ", sub_dir 152 | cam_id = sub_dir 153 | vbb_files = glob.glob(os.path.join(vbb_inputdir, sub_dir, "*.vbb")) 154 | for vbb_file in vbb_files: 155 | annos = vbb_anno2dict(vbb_file, cam_id, person_types=person_types) 156 | if annos: 157 | vbb_outdir = os.path.join(vbb_outputdir, "annotations", sub_dir, "bbox") 158 | # extract frames from seq 159 | seq_file = os.path.join(seq_inputdir, sub_dir, os.path.splitext(os.path.basename(vbb_file))[0]+".seq") 160 | seq_outdir = os.path.join(seq_outputdir, sub_dir, "frame") 161 | if not os.path.exists(vbb_outdir): 162 | os.makedirs(vbb_outdir) 163 | if not os.path.exists(seq_outdir): 164 | os.makedirs(seq_outdir) 165 | img_size = seq2img(annos, seq_file, seq_outdir, cam_id) 166 | for filename, anno in sorted(annos.items(), key=lambda x: x[0]): 167 | if "bbox" in anno: 168 | anno_tree = instance2xml_base(anno, img_size) 169 | outfile = os.path.join(vbb_outdir, os.path.splitext(filename)[0]+".xml") 170 | print "Generating annotation xml file of picture: ", filename 171 | etree.ElementTree(anno_tree).write(outfile, pretty_print=True) 172 | 173 | 174 | def visualize_bbox(xml_file, img_file): 175 | import cv2 176 | tree = etree.parse(xml_file) 177 | # load image 178 | image = cv2.imread(img_file) 179 | # get bbox 180 | for bbox in tree.xpath('//bndbox'): 181 | coord = [] 182 | for corner in bbox.getchildren(): 183 | coord.append(int(float(corner.text))) 184 | # draw rectangle 185 | # coord = [int(x) for x in coord] 186 | image = cv2.rectangle(image, (coord[0], coord[1]), (coord[2], coord[3]), (0, 0, 255), 2) 187 | # visualize image 188 | cv2.imshow("test", image) 189 | cv2.waitKey(0) 190 | 191 | 192 | def main(): 193 | seq_inputdir = "/startdt_data/caltech_pedestrian_dataset" 194 | vbb_inputdir = "/startdt_data/caltech_pedestrian_dataset/annotations" 195 | seq_outputdir = "/startdt_data/caltech_pedestrian_dataset" 196 | vbb_outputdir = "/startdt_data/caltech_pedestrian_dataset" 197 | person_types = ["person", "people"] 198 | parse_anno_file(vbb_inputdir, seq_inputdir, vbb_outputdir, seq_outputdir, person_types=person_types) 199 | # xml_file = "/startdt_data/caltech_pedestrian_dataset/annotations/set00/bbox/set00_V013_1511.xml" 200 | # img_file = "/startdt_data/caltech_pedestrian_dataset/set00/frame/set00_V013_1511.jpg" 201 | # visualize_bbox(xml_file, img_file) 202 | 203 | 204 | if __name__ == "__main__": 205 | main() 206 | --------------------------------------------------------------------------------