├── .coveragerc
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── cfg
├── coco.names
├── extraction.cfg
├── extraction.conv.cfg
├── tiny-yolo-4c.cfg
├── tiny-yolo-voc.cfg
├── tiny-yolo.cfg
├── v1.1
│ ├── person-bottle.cfg
│ ├── tiny-coco.cfg
│ ├── tiny-yolo-4c.cfg
│ ├── tiny-yolov1.cfg
│ ├── yolo-coco.cfg
│ └── yolov1.cfg
├── v1
│ ├── tiny-old.profile
│ ├── tiny.profile
│ ├── yolo-2c.cfg
│ ├── yolo-4c.cfg
│ ├── yolo-full.cfg
│ ├── yolo-small.cfg
│ ├── yolo-tiny-extract.cfg
│ ├── yolo-tiny-extract_.cfg
│ ├── yolo-tiny.cfg
│ └── yolo-tiny4c.cfg
├── yolo-voc.cfg
└── yolo.cfg
├── darkflow
├── cli.py
├── cython_utils
│ ├── cy_yolo2_findboxes.pyx
│ ├── cy_yolo_findboxes.pyx
│ ├── nms.pxd
│ └── nms.pyx
├── dark
│ ├── connected.py
│ ├── convolution.py
│ ├── darknet.py
│ ├── darkop.py
│ └── layer.py
├── defaults.py
├── net
│ ├── build.py
│ ├── flow.py
│ ├── framework.py
│ ├── help.py
│ ├── ops
│ │ ├── __init__.py
│ │ ├── baseop.py
│ │ ├── convolution.py
│ │ └── simple.py
│ ├── vanilla
│ │ ├── __init__.py
│ │ └── train.py
│ ├── yolo
│ │ ├── __init__.py
│ │ ├── data.py
│ │ ├── misc.py
│ │ ├── predict.py
│ │ └── train.py
│ └── yolov2
│ │ ├── __init__.py
│ │ ├── data.py
│ │ ├── predict.py
│ │ └── train.py
├── utils
│ ├── box.py
│ ├── im_transform.py
│ ├── loader.py
│ ├── pascal_voc_clean_xml.py
│ └── process.py
└── version.py
├── demo.gif
├── flow
├── labels.txt
├── preview.png
├── sample_img
├── Thumbs.db
├── sample_computer.jpg
├── sample_dog.jpg
├── sample_eagle.jpg
├── sample_giraffe.jpg
├── sample_horses.jpg
├── sample_office.jpg
├── sample_person.jpg
└── sample_scream.jpg
├── setup.py
├── test
├── requirements-testing.txt
├── test_darkflow.py
└── training
│ ├── annotations
│ ├── 1.xml
│ └── 2.xml
│ └── images
│ ├── 1.jpg
│ └── 2.jpg
└── vbb2voc.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = test/*
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | # Python bytecode
3 | *.pyc
4 |
5 | # Weight files
6 | bin/
7 |
8 | # Sample image data
9 | sample_img/*.jpg
10 | !sample_img/sample_*.jpg
11 | sample_img/out/*
12 |
13 | # Annotated test results
14 | results/
15 |
16 | # Intermediate training data
17 | backup/
18 | tfnet/yolo/parse-history.txt
19 | tfnet/yolo/*.parsed
20 | *.txt
21 | !requirements*.txt
22 | *.pb
23 | /profile
24 | /test.py
25 |
26 | # Built cython files
27 | darkflow/cython_utils/*.pyd
28 | darkflow/cython_utils/*.c
29 |
30 | #egg-info
31 | darkflow.egg-info/
32 |
33 | #Other build stuff
34 | build/
35 |
36 | #TensorBoard logs
37 | summary/
38 |
39 | #Built graphs
40 | built_graph/
41 |
42 | #Training checkpoints
43 | ckpt/*
44 |
45 | #pytest cache
46 | .cache/
47 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | dist: trusty
2 | sudo: required
3 |
4 | language: python
5 | python:
6 | - "3.6"
7 |
8 | cache:
9 | directories:
10 | - bin #cache .weights files
11 |
12 | # command to install dependencies
13 | install:
14 | - pip install -r test/requirements-testing.txt
15 | - pip install -e .
16 |
17 | # command to run tests
18 | script: pytest -x --cov=./
19 |
20 | #Upload code coverage statistics
21 | after_success:
22 | - codecov
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Pedestrian Detection
2 |
3 | I have used Caltech dataset for pedestrian detection. This dataset consists of approximately 10 hours of 640x480 30Hz video. About 250,000 frames with a total of 350,000 bounding boxes and 2300 unique pedestrians were annotated. For more informaton you can refer to [this](http://www.vision.caltech.edu/Image_Datasets/CaltechPedestrians/).
4 |
5 | ### For converting into darkflow format
6 |
7 | The video files in caltech Pedestrian dataset are in .seq and the annotations are in .vbb format. Darkflow needs the images in jpg and annotations in .xml format. To convert the files we have used:
8 |
9 | vbb2voc.py: extract images with person bbox in seq file and convert vbb annotation file to xml files. PS: For Caltech pedestrian dataset, there are 4 kind of persons: person, person-fa, person?, people. In my case, I just need to use person type data. If you want to use other types, specify person_types with corresponding type list (like ['person', 'people']) in vbb_anno2dict function.
10 |
11 | ### Building the Model
12 |
13 | [](https://travis-ci.org/thtrieu/darkflow) [](https://codecov.io/gh/thtrieu/darkflow)
14 |
15 | Real-time object detection and classification. Paper: [version 1](https://arxiv.org/pdf/1506.02640.pdf), [version 2](https://arxiv.org/pdf/1612.08242.pdf).
16 |
17 | Read more about YOLO (in darknet) and download weight files [here](http://pjreddie.com/darknet/yolo/). In case the weight file cannot be found, I uploaded some of mine [here](https://drive.google.com/drive/folders/0B1tW_VtY7onidEwyQ2FtQVplWEU), which include `yolo-full` and `yolo-tiny` of v1.0, `tiny-yolo-v1.1` of v1.1 and `yolo`, `tiny-yolo-voc` of v2.
18 |
19 |
20 | See demo below or see on [this imgur](http://i.imgur.com/EyZZKAA.gif)
21 |
22 |
23 |
24 | ### Dependencies
25 |
26 | Python3, tensorflow 1.0, numpy, opencv 3.
27 |
28 | ### Getting started
29 |
30 | You can choose _one_ of the following three ways to get started with darkflow.
31 |
32 | 1. Just build the Cython extensions in place. NOTE: If installing this way you will have to use `./flow` in the cloned darkflow directory instead of `flow` as darkflow is not installed globally.
33 | ```
34 | python3 setup.py build_ext --inplace
35 | ```
36 |
37 | 2. Let pip install darkflow globally in dev mode (still globally accessible, but changes to the code immediately take effect)
38 | ```
39 | pip install -e .
40 | ```
41 |
42 | 3. Install with pip globally
43 | ```
44 | pip install .
45 | ```
46 |
47 | ### Update
48 |
49 | **Android demo on Tensorflow's** [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowYoloDetector.java)
50 |
51 | **I am looking for help:**
52 | - `help wanted` labels in issue track
53 |
54 | ## Parsing the annotations
55 |
56 | Skip this if you are not training or fine-tuning anything (you simply want to forward flow a trained net)
57 |
58 | For example, if you want to work with only 3 classes `tvmonitor`, `person`, `pottedplant`; edit `labels.txt` as follows
59 |
60 | ```
61 | tvmonitor
62 | person
63 | pottedplant
64 | ```
65 |
66 | And that's it. `darkflow` will take care of the rest. You can also set darkflow to load from a custom labels file with the `--labels` flag (i.e. `--labels myOtherLabelsFile.txt`). This can be helpful when working with multiple models with different sets of output labels. When this flag is not set, darkflow will load from `labels.txt` by default (unless you are using one of the recognized `.cfg` files designed for the COCO or VOC dataset - then the labels file will be ignored and the COCO or VOC labels will be loaded).
67 |
68 | ### Design the net
69 |
70 | Skip this if you are working with one of the original configurations since they are already there. Otherwise, see the following example:
71 |
72 | ```python
73 | ...
74 |
75 | [convolutional]
76 | batch_normalize = 1
77 | size = 3
78 | stride = 1
79 | pad = 1
80 | activation = leaky
81 |
82 | [maxpool]
83 |
84 | [connected]
85 | output = 4096
86 | activation = linear
87 |
88 | ...
89 | ```
90 |
91 | ### Flowing the graph using `flow`
92 |
93 | ```bash
94 | # Have a look at its options
95 | flow --h
96 | ```
97 |
98 | First, let's take a closer look at one of a very useful option `--load`
99 |
100 | ```bash
101 | # 1. Load tiny-yolo.weights
102 | flow --model cfg/tiny-yolo.cfg --load bin/tiny-yolo.weights
103 |
104 | # 2. To completely initialize a model, leave the --load option
105 | flow --model cfg/yolo-new.cfg
106 |
107 | # 3. It is useful to reuse the first identical layers of tiny for `yolo-new`
108 | flow --model cfg/yolo-new.cfg --load bin/tiny-yolo.weights
109 | # this will print out which layers are reused, which are initialized
110 | ```
111 |
112 | All input images from default folder `sample_img/` are flowed through the net and predictions are put in `sample_img/out/`. We can always specify more parameters for such forward passes, such as detection threshold, batch size, images folder, etc.
113 |
114 | ```bash
115 | # Forward all images in sample_img/ using tiny yolo and 100% GPU usage
116 | flow --imgdir sample_img/ --model cfg/tiny-yolo.cfg --load bin/tiny-yolo.weights --gpu 1.0
117 | ```
118 | json output can be generated with descriptions of the pixel location of each bounding box and the pixel location. Each prediction is stored in the `sample_img/out` folder by default. An example json array is shown below.
119 | ```bash
120 | # Forward all images in sample_img/ using tiny yolo and JSON output.
121 | flow --imgdir sample_img/ --model cfg/tiny-yolo.cfg --load bin/tiny-yolo.weights --json
122 | ```
123 | JSON output:
124 | ```json
125 | [{"label":"person", "confidence": 0.56, "topleft": {"x": 184, "y": 101}, "bottomright": {"x": 274, "y": 382}},
126 | {"label": "dog", "confidence": 0.32, "topleft": {"x": 71, "y": 263}, "bottomright": {"x": 193, "y": 353}},
127 | {"label": "horse", "confidence": 0.76, "topleft": {"x": 412, "y": 109}, "bottomright": {"x": 592,"y": 337}}]
128 | ```
129 | - label: self explanatory
130 | - confidence: somewhere between 0 and 1 (how confident yolo is about that detection)
131 | - topleft: pixel coordinate of top left corner of box.
132 | - bottomright: pixel coordinate of bottom right corner of box.
133 |
134 | ### Training new model
135 |
136 | Training is simple as you only have to add option `--train`. Training set and annotation will be parsed if this is the first time a new configuration is trained. To point to training set and annotations, use option `--dataset` and `--annotation`. A few examples:
137 |
138 | ```bash
139 | # Initialize yolo-new from yolo-tiny, then train the net on 100% GPU:
140 | flow --model cfg/yolo-new.cfg --load bin/tiny-yolo.weights --train --gpu 1.0
141 |
142 | # Completely initialize yolo-new and train it with ADAM optimizer
143 | flow --model cfg/yolo-new.cfg --train --trainer adam
144 | ```
145 |
146 | During training, the script will occasionally save intermediate results into Tensorflow checkpoints, stored in `ckpt/`. To resume to any checkpoint before performing training/testing, use `--load [checkpoint_num]` option, if `checkpoint_num < 0`, `darkflow` will load the most recent save by parsing `ckpt/checkpoint`.
147 |
148 | ```bash
149 | # Resume the most recent checkpoint for training
150 | flow --train --model cfg/yolo-new.cfg --load -1
151 |
152 | # Test with checkpoint at step 1500
153 | flow --model cfg/yolo-new.cfg --load 1500
154 |
155 | # Fine tuning yolo-tiny from the original one
156 | flow --train --model cfg/tiny-yolo.cfg --load bin/tiny-yolo.weights
157 | ```
158 |
159 | Example of training on Pascal VOC 2007:
160 | ```bash
161 | # Download the Pascal VOC dataset:
162 | curl -O https://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar
163 | tar xf VOCtest_06-Nov-2007.tar
164 |
165 | # An example of the Pascal VOC annotation format:
166 | vim VOCdevkit/VOC2007/Annotations/000001.xml
167 |
168 | # Train the net on the Pascal dataset:
169 | flow --model cfg/yolo-new.cfg --train --dataset "~/VOCdevkit/VOC2007/JPEGImages" --annotation "~/VOCdevkit/VOC2007/Annotations"
170 | ```
171 |
172 | ### Training on your own dataset
173 |
174 | *The steps below assume we want to use tiny YOLO and our dataset has 3 classes*
175 |
176 | 1. Create a copy of the configuration file `tiny-yolo-voc.cfg` and rename it according to your preference `tiny-yolo-voc-3c.cfg` (It is crucial that you leave the original `tiny-yolo-voc.cfg` file unchanged, see below for explanation).
177 |
178 | 2. In `tiny-yolo-voc-3c.cfg`, change classes in the [region] layer (the last layer) to the number of classes you are going to train for. In our case, classes are set to 3.
179 |
180 | ```python
181 | ...
182 |
183 | [region]
184 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52
185 | bias_match=1
186 | classes=3
187 | coords=4
188 | num=5
189 | softmax=1
190 |
191 | ...
192 | ```
193 |
194 | 3. In `tiny-yolo-voc-3c.cfg`, change filters in the [convolutional] layer (the second to last layer) to num * (classes + 5). In our case, num is 5 and classes are 3 so 5 * (3 + 5) = 40 therefore filters are set to 40.
195 |
196 | ```python
197 | ...
198 |
199 | [convolutional]
200 | size=1
201 | stride=1
202 | pad=1
203 | filters=40
204 | activation=linear
205 |
206 | [region]
207 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52
208 |
209 | ...
210 | ```
211 |
212 | 4. Change `labels.txt` to include the label(s) you want to train on (number of labels should be the same as the number of classes you set in `tiny-yolo-voc-3c.cfg` file). In our case, `labels.txt` will contain 3 labels.
213 |
214 | ```
215 | label1
216 | label2
217 | label3
218 | ```
219 | 5. Reference the `tiny-yolo-voc-3c.cfg` model when you train.
220 |
221 | `flow --model cfg/tiny-yolo-voc-3c.cfg --load bin/tiny-yolo-voc.weights --train --annotation train/Annotations --dataset train/Images`
222 |
223 |
224 | * Why should I leave the original `tiny-yolo-voc.cfg` file unchanged?
225 |
226 | When darkflow sees you are loading `tiny-yolo-voc.weights` it will look for `tiny-yolo-voc.cfg` in your cfg/ folder and compare that configuration file to the new one you have set with `--model cfg/tiny-yolo-voc-3c.cfg`. In this case, every layer will have the same exact number of weights except for the last two, so it will load the weights into all layers up to the last two because they now contain different number of weights.
227 |
228 |
229 | ### Camera/video file demo
230 |
231 | For a demo that entirely runs on the CPU:
232 |
233 | ```bash
234 | flow --model cfg/yolo-new.cfg --load bin/yolo-new.weights --demo videofile.avi
235 | ```
236 |
237 | For a demo that runs 100% on the GPU:
238 |
239 | ```bash
240 | flow --model cfg/yolo-new.cfg --load bin/yolo-new.weights --demo videofile.avi --gpu 1.0
241 | ```
242 |
243 | To use your webcam/camera, simply replace `videofile.avi` with keyword `camera`.
244 |
245 | To save a video with predicted bounding box, add `--saveVideo` option.
246 |
247 | ### Using darkflow from another python application
248 |
249 | Please note that `return_predict(img)` must take an `numpy.ndarray`. Your image must be loaded beforehand and passed to `return_predict(img)`. Passing the file path won't work.
250 |
251 | Result from `return_predict(img)` will be a list of dictionaries representing each detected object's values in the same format as the JSON output listed above.
252 |
253 | ```python
254 | from darkflow.net.build import TFNet
255 | import cv2
256 |
257 | options = {"model": "cfg/yolo.cfg", "load": "bin/yolo.weights", "threshold": 0.1}
258 |
259 | tfnet = TFNet(options)
260 |
261 | imgcv = cv2.imread("./sample_img/sample_dog.jpg")
262 | result = tfnet.return_predict(imgcv)
263 | print(result)
264 | ```
265 |
266 |
267 | ### Save the built graph to a protobuf file (`.pb`)
268 |
269 | ```bash
270 | ## Saving the lastest checkpoint to protobuf file
271 | flow --model cfg/yolo-new.cfg --load -1 --savepb
272 |
273 | ## Saving graph and weights to protobuf file
274 | flow --model cfg/yolo.cfg --load bin/yolo.weights --savepb
275 | ```
276 | When saving the `.pb` file, a `.meta` file will also be generated alongside it. This `.meta` file is a JSON dump of everything in the `meta` dictionary that contains information nessecary for post-processing such as `anchors` and `labels`. This way, everything you need to make predictions from the graph and do post processing is contained in those two files - no need to have the `.cfg` or any labels file tagging along.
277 |
278 | The created `.pb` file can be used to migrate the graph to mobile devices (JAVA / C++ / Objective-C++). The name of input tensor and output tensor are respectively `'input'` and `'output'`. For further usage of this protobuf file, please refer to the official documentation of `Tensorflow` on C++ API [_here_](https://www.tensorflow.org/versions/r0.9/api_docs/cc/index.html). To run it on, say, iOS application, simply add the file to Bundle Resources and update the path to this file inside source code.
279 |
280 | Also, darkflow supports loading from a `.pb` and `.meta` file for generating predictions (instead of loading from a `.cfg` and checkpoint or `.weights`).
281 | ```bash
282 | ## Forward images in sample_img for predictions based on protobuf file
283 | flow --pbLoad built_graph/yolo.pb --metaLoad built_graph/yolo.meta --imgdir sample_img/
284 | ```
285 | If you'd like to load a `.pb` and `.meta` file when using `return_predict()` you can set the `"pbLoad"` and `"metaLoad"` options in place of the `"model"` and `"load"` options you would normally set.
286 |
287 | That's all.
288 |
289 | ### Credits
290 |
291 | Credit for this code goes to https://github.com/thtrieu and for vbb2voc.py goes to https://github.com/CasiaFan/Dataset_to_VOC_converter .
292 |
--------------------------------------------------------------------------------
/cfg/coco.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorbike
5 | aeroplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/cfg/extraction.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=128
3 | subdivisions=1
4 | height=224
5 | width=224
6 | max_crop=320
7 | channels=3
8 | momentum=0.9
9 | decay=0.0005
10 |
11 | learning_rate=0.1
12 | policy=poly
13 | power=4
14 | max_batches=1600000
15 |
16 | [convolutional]
17 | batch_normalize=1
18 | filters=64
19 | size=7
20 | stride=2
21 | pad=1
22 | activation=leaky
23 |
24 | [maxpool]
25 | size=2
26 | stride=2
27 |
28 | [convolutional]
29 | batch_normalize=1
30 | filters=192
31 | size=3
32 | stride=1
33 | pad=1
34 | activation=leaky
35 |
36 | [maxpool]
37 | size=2
38 | stride=2
39 |
40 | [convolutional]
41 | batch_normalize=1
42 | filters=128
43 | size=1
44 | stride=1
45 | pad=1
46 | activation=leaky
47 |
48 | [convolutional]
49 | batch_normalize=1
50 | filters=256
51 | size=3
52 | stride=1
53 | pad=1
54 | activation=leaky
55 |
56 | [convolutional]
57 | batch_normalize=1
58 | filters=256
59 | size=1
60 | stride=1
61 | pad=1
62 | activation=leaky
63 |
64 | [convolutional]
65 | batch_normalize=1
66 | filters=512
67 | size=3
68 | stride=1
69 | pad=1
70 | activation=leaky
71 |
72 | [maxpool]
73 | size=2
74 | stride=2
75 |
76 | [convolutional]
77 | batch_normalize=1
78 | filters=256
79 | size=1
80 | stride=1
81 | pad=1
82 | activation=leaky
83 |
84 | [convolutional]
85 | batch_normalize=1
86 | filters=512
87 | size=3
88 | stride=1
89 | pad=1
90 | activation=leaky
91 |
92 | [convolutional]
93 | batch_normalize=1
94 | filters=256
95 | size=1
96 | stride=1
97 | pad=1
98 | activation=leaky
99 |
100 | [convolutional]
101 | batch_normalize=1
102 | filters=512
103 | size=3
104 | stride=1
105 | pad=1
106 | activation=leaky
107 |
108 | [convolutional]
109 | batch_normalize=1
110 | filters=256
111 | size=1
112 | stride=1
113 | pad=1
114 | activation=leaky
115 |
116 | [convolutional]
117 | batch_normalize=1
118 | filters=512
119 | size=3
120 | stride=1
121 | pad=1
122 | activation=leaky
123 |
124 | [convolutional]
125 | batch_normalize=1
126 | filters=256
127 | size=1
128 | stride=1
129 | pad=1
130 | activation=leaky
131 |
132 | [convolutional]
133 | batch_normalize=1
134 | filters=512
135 | size=3
136 | stride=1
137 | pad=1
138 | activation=leaky
139 |
140 | [convolutional]
141 | batch_normalize=1
142 | filters=512
143 | size=1
144 | stride=1
145 | pad=1
146 | activation=leaky
147 |
148 | [convolutional]
149 | batch_normalize=1
150 | filters=1024
151 | size=3
152 | stride=1
153 | pad=1
154 | activation=leaky
155 |
156 | [maxpool]
157 | size=2
158 | stride=2
159 |
160 | [convolutional]
161 | batch_normalize=1
162 | filters=512
163 | size=1
164 | stride=1
165 | pad=1
166 | activation=leaky
167 |
168 | [convolutional]
169 | batch_normalize=1
170 | filters=1024
171 | size=3
172 | stride=1
173 | pad=1
174 | activation=leaky
175 |
176 | [convolutional]
177 | batch_normalize=1
178 | filters=512
179 | size=1
180 | stride=1
181 | pad=1
182 | activation=leaky
183 |
184 | [convolutional]
185 | batch_normalize=1
186 | filters=1024
187 | size=3
188 | stride=1
189 | pad=1
190 | activation=leaky
191 |
192 | [convolutional]
193 | filters=1000
194 | size=1
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | [avgpool]
200 |
201 | [softmax]
202 | groups=1
203 |
204 | [cost]
205 | type=sse
206 |
207 |
--------------------------------------------------------------------------------
/cfg/extraction.conv.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=1
3 | subdivisions=1
4 | height=256
5 | width=256
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | learning_rate=0.5
11 | policy=poly
12 | power=6
13 | max_batches=500000
14 |
15 | [convolutional]
16 | filters=64
17 | size=7
18 | stride=2
19 | pad=1
20 | activation=leaky
21 |
22 | [maxpool]
23 | size=2
24 | stride=2
25 |
26 | [convolutional]
27 | filters=192
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | [maxpool]
34 | size=2
35 | stride=2
36 |
37 | [convolutional]
38 | filters=128
39 | size=1
40 | stride=1
41 | pad=1
42 | activation=leaky
43 |
44 | [convolutional]
45 | filters=256
46 | size=3
47 | stride=1
48 | pad=1
49 | activation=leaky
50 |
51 | [convolutional]
52 | filters=256
53 | size=1
54 | stride=1
55 | pad=1
56 | activation=leaky
57 |
58 | [convolutional]
59 | filters=512
60 | size=3
61 | stride=1
62 | pad=1
63 | activation=leaky
64 |
65 | [maxpool]
66 | size=2
67 | stride=2
68 |
69 | [convolutional]
70 | filters=256
71 | size=1
72 | stride=1
73 | pad=1
74 | activation=leaky
75 |
76 | [convolutional]
77 | filters=512
78 | size=3
79 | stride=1
80 | pad=1
81 | activation=leaky
82 |
83 | [convolutional]
84 | filters=256
85 | size=1
86 | stride=1
87 | pad=1
88 | activation=leaky
89 |
90 | [convolutional]
91 | filters=512
92 | size=3
93 | stride=1
94 | pad=1
95 | activation=leaky
96 |
97 | [convolutional]
98 | filters=256
99 | size=1
100 | stride=1
101 | pad=1
102 | activation=leaky
103 |
104 | [convolutional]
105 | filters=512
106 | size=3
107 | stride=1
108 | pad=1
109 | activation=leaky
110 |
111 | [convolutional]
112 | filters=256
113 | size=1
114 | stride=1
115 | pad=1
116 | activation=leaky
117 |
118 | [convolutional]
119 | filters=512
120 | size=3
121 | stride=1
122 | pad=1
123 | activation=leaky
124 |
125 | [convolutional]
126 | filters=512
127 | size=1
128 | stride=1
129 | pad=1
130 | activation=leaky
131 |
132 | [convolutional]
133 | filters=1024
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 |
139 | [maxpool]
140 | size=2
141 | stride=2
142 |
143 | [convolutional]
144 | filters=512
145 | size=1
146 | stride=1
147 | pad=1
148 | activation=leaky
149 |
150 | [convolutional]
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 |
157 | [convolutional]
158 | filters=512
159 | size=1
160 | stride=1
161 | pad=1
162 | activation=leaky
163 |
164 | [convolutional]
165 | filters=1024
166 | size=3
167 | stride=1
168 | pad=1
169 | activation=leaky
170 |
171 | [avgpool]
172 |
173 | [connected]
174 | output=1000
175 | activation=leaky
176 |
177 | [softmax]
178 | groups=1
179 |
180 |
--------------------------------------------------------------------------------
/cfg/tiny-yolo-4c.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=8
4 | width=416
5 | height=416
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 | angle=0
10 | saturation = 1.5
11 | exposure = 1.5
12 | hue=.1
13 |
14 | learning_rate=0.001
15 | max_batches = 40100
16 | policy=steps
17 | steps=-1,100,20000,30000
18 | scales=.1,10,.1,.1
19 |
20 | [convolutional]
21 | batch_normalize=1
22 | filters=16
23 | size=3
24 | stride=1
25 | pad=1
26 | activation=leaky
27 |
28 | [maxpool]
29 | size=2
30 | stride=2
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=32
35 | size=3
36 | stride=1
37 | pad=1
38 | activation=leaky
39 |
40 | [maxpool]
41 | size=2
42 | stride=2
43 |
44 | [convolutional]
45 | batch_normalize=1
46 | filters=64
47 | size=3
48 | stride=1
49 | pad=1
50 | activation=leaky
51 |
52 | [maxpool]
53 | size=2
54 | stride=2
55 |
56 | [convolutional]
57 | batch_normalize=1
58 | filters=128
59 | size=3
60 | stride=1
61 | pad=1
62 | activation=leaky
63 |
64 | [maxpool]
65 | size=2
66 | stride=2
67 |
68 | [convolutional]
69 | batch_normalize=1
70 | filters=256
71 | size=3
72 | stride=1
73 | pad=1
74 | activation=leaky
75 |
76 | [maxpool]
77 | size=2
78 | stride=2
79 |
80 | [convolutional]
81 | batch_normalize=1
82 | filters=512
83 | size=3
84 | stride=1
85 | pad=1
86 | activation=leaky
87 |
88 | [maxpool]
89 | size=2
90 | stride=1
91 |
92 | [convolutional]
93 | batch_normalize=1
94 | filters=1024
95 | size=3
96 | stride=1
97 | pad=1
98 | activation=leaky
99 |
100 | ###########
101 |
102 | [convolutional]
103 | batch_normalize=1
104 | size=3
105 | stride=1
106 | pad=1
107 | filters=1024
108 | activation=leaky
109 |
110 | [convolutional]
111 | size=1
112 | stride=1
113 | pad=1
114 | filters=45
115 | activation=linear
116 |
117 | [region]
118 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52
119 | bias_match=1
120 | classes=4
121 | coords=4
122 | num=5
123 | softmax=1
124 | jitter=.2
125 | rescore=1
126 |
127 | object_scale=5
128 | noobject_scale=1
129 | class_scale=1
130 | coord_scale=1
131 |
132 | absolute=1
133 | thresh=.6
134 | random=1
135 |
--------------------------------------------------------------------------------
/cfg/tiny-yolo-voc.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=8
4 | width=416
5 | height=416
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 | angle=0
10 | saturation = 1.5
11 | exposure = 1.5
12 | hue=.1
13 |
14 | learning_rate=0.001
15 | max_batches = 40100
16 | policy=steps
17 | steps=-1,100,20000,30000
18 | scales=.1,10,.1,.1
19 |
20 | [convolutional]
21 | batch_normalize=1
22 | filters=16
23 | size=3
24 | stride=1
25 | pad=1
26 | activation=leaky
27 |
28 | [maxpool]
29 | size=2
30 | stride=2
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=32
35 | size=3
36 | stride=1
37 | pad=1
38 | activation=leaky
39 |
40 | [maxpool]
41 | size=2
42 | stride=2
43 |
44 | [convolutional]
45 | batch_normalize=1
46 | filters=64
47 | size=3
48 | stride=1
49 | pad=1
50 | activation=leaky
51 |
52 | [maxpool]
53 | size=2
54 | stride=2
55 |
56 | [convolutional]
57 | batch_normalize=1
58 | filters=128
59 | size=3
60 | stride=1
61 | pad=1
62 | activation=leaky
63 |
64 | [maxpool]
65 | size=2
66 | stride=2
67 |
68 | [convolutional]
69 | batch_normalize=1
70 | filters=256
71 | size=3
72 | stride=1
73 | pad=1
74 | activation=leaky
75 |
76 | [maxpool]
77 | size=2
78 | stride=2
79 |
80 | [convolutional]
81 | batch_normalize=1
82 | filters=512
83 | size=3
84 | stride=1
85 | pad=1
86 | activation=leaky
87 |
88 | [maxpool]
89 | size=2
90 | stride=1
91 |
92 | [convolutional]
93 | batch_normalize=1
94 | filters=1024
95 | size=3
96 | stride=1
97 | pad=1
98 | activation=leaky
99 |
100 | ###########
101 |
102 | [convolutional]
103 | batch_normalize=1
104 | size=3
105 | stride=1
106 | pad=1
107 | filters=1024
108 | activation=leaky
109 |
110 | [convolutional]
111 | size=1
112 | stride=1
113 | pad=1
114 | filters=125
115 | activation=linear
116 |
117 | [region]
118 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52
119 | bias_match=1
120 | classes=20
121 | coords=4
122 | num=5
123 | softmax=1
124 | jitter=.2
125 | rescore=1
126 |
127 | object_scale=5
128 | noobject_scale=1
129 | class_scale=1
130 | coord_scale=1
131 |
132 | absolute=1
133 | thresh = .5
134 | random=1
135 |
--------------------------------------------------------------------------------
/cfg/tiny-yolo.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=8
4 | width=416
5 | height=416
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 | angle=0
10 | saturation = 1.5
11 | exposure = 1.5
12 | hue=.1
13 |
14 | learning_rate=0.001
15 | max_batches = 120000
16 | policy=steps
17 | steps=-1,100,80000,100000
18 | scales=.1,10,.1,.1
19 |
20 | [convolutional]
21 | batch_normalize=1
22 | filters=16
23 | size=3
24 | stride=1
25 | pad=1
26 | activation=leaky
27 |
28 | [maxpool]
29 | size=2
30 | stride=2
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=32
35 | size=3
36 | stride=1
37 | pad=1
38 | activation=leaky
39 |
40 | [maxpool]
41 | size=2
42 | stride=2
43 |
44 | [convolutional]
45 | batch_normalize=1
46 | filters=64
47 | size=3
48 | stride=1
49 | pad=1
50 | activation=leaky
51 |
52 | [maxpool]
53 | size=2
54 | stride=2
55 |
56 | [convolutional]
57 | batch_normalize=1
58 | filters=128
59 | size=3
60 | stride=1
61 | pad=1
62 | activation=leaky
63 |
64 | [maxpool]
65 | size=2
66 | stride=2
67 |
68 | [convolutional]
69 | batch_normalize=1
70 | filters=256
71 | size=3
72 | stride=1
73 | pad=1
74 | activation=leaky
75 |
76 | [maxpool]
77 | size=2
78 | stride=2
79 |
80 | [convolutional]
81 | batch_normalize=1
82 | filters=512
83 | size=3
84 | stride=1
85 | pad=1
86 | activation=leaky
87 |
88 | [maxpool]
89 | size=2
90 | stride=1
91 |
92 | [convolutional]
93 | batch_normalize=1
94 | filters=1024
95 | size=3
96 | stride=1
97 | pad=1
98 | activation=leaky
99 |
100 | ###########
101 |
102 | [convolutional]
103 | batch_normalize=1
104 | size=3
105 | stride=1
106 | pad=1
107 | filters=1024
108 | activation=leaky
109 |
110 | [convolutional]
111 | size=1
112 | stride=1
113 | pad=1
114 | filters=425
115 | activation=linear
116 |
117 | [region]
118 | anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741
119 | bias_match=1
120 | classes=80
121 | coords=4
122 | num=5
123 | softmax=1
124 | jitter=.2
125 | rescore=1
126 |
127 | object_scale=5
128 | noobject_scale=1
129 | class_scale=1
130 | coord_scale=1
131 |
132 | absolute=1
133 | thresh = .6
134 | random=1
135 |
--------------------------------------------------------------------------------
/cfg/v1.1/person-bottle.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=2
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | saturation=.75
11 | exposure=.75
12 | hue = .1
13 |
14 | learning_rate=0.0005
15 | policy=steps
16 | steps=200,400,600,800,20000,30000
17 | scales=2.5,2,2,2,.1,.1
18 | max_batches = 40000
19 |
20 | [convolutional]
21 | batch_normalize=1
22 | filters=16
23 | size=3
24 | stride=1
25 | pad=1
26 | activation=leaky
27 |
28 | [maxpool]
29 | size=2
30 | stride=2
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=32
35 | size=3
36 | stride=1
37 | pad=1
38 | activation=leaky
39 |
40 | [maxpool]
41 | size=2
42 | stride=2
43 |
44 | [convolutional]
45 | batch_normalize=1
46 | filters=64
47 | size=3
48 | stride=1
49 | pad=1
50 | activation=leaky
51 |
52 | [maxpool]
53 | size=2
54 | stride=2
55 |
56 | [convolutional]
57 | batch_normalize=1
58 | filters=128
59 | size=3
60 | stride=1
61 | pad=1
62 | activation=leaky
63 |
64 | [maxpool]
65 | size=2
66 | stride=2
67 |
68 | [convolutional]
69 | batch_normalize=1
70 | filters=256
71 | size=3
72 | stride=1
73 | pad=1
74 | activation=leaky
75 |
76 | [maxpool]
77 | size=2
78 | stride=2
79 |
80 | [convolutional]
81 | batch_normalize=1
82 | filters=512
83 | size=3
84 | stride=1
85 | pad=1
86 | activation=leaky
87 |
88 | [maxpool]
89 | size=2
90 | stride=2
91 |
92 | [convolutional]
93 | batch_normalize=1
94 | size=3
95 | stride=1
96 | pad=1
97 | filters=1024
98 | activation=leaky
99 |
100 | [convolutional]
101 | batch_normalize=1
102 | size=3
103 | stride=1
104 | pad=1
105 | filters=256
106 | activation=leaky
107 |
108 | [select]
109 | old_output=1470
110 | keep=4,14/20
111 | bins=49
112 | output=588
113 | activation=linear
114 |
115 | [detection]
116 | classes=2
117 | coords=4
118 | rescore=1
119 | side=7
120 | num=2
121 | softmax=0
122 | sqrt=1
123 | jitter=.2
124 |
125 | object_scale=1
126 | noobject_scale=.5
127 | class_scale=1
128 | coord_scale=5
--------------------------------------------------------------------------------
/cfg/v1.1/tiny-coco.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=2
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | hue = .1
11 | saturation=.75
12 | exposure=.75
13 |
14 | learning_rate=0.0005
15 | policy=steps
16 | steps=200,400,600,800,100000,150000
17 | scales=2.5,2,2,2,.1,.1
18 | max_batches = 200000
19 |
20 | [convolutional]
21 | batch_normalize=1
22 | filters=16
23 | size=3
24 | stride=1
25 | pad=1
26 | activation=leaky
27 |
28 | [maxpool]
29 | size=2
30 | stride=2
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=32
35 | size=3
36 | stride=1
37 | pad=1
38 | activation=leaky
39 |
40 | [maxpool]
41 | size=2
42 | stride=2
43 |
44 | [convolutional]
45 | batch_normalize=1
46 | filters=64
47 | size=3
48 | stride=1
49 | pad=1
50 | activation=leaky
51 |
52 | [maxpool]
53 | size=2
54 | stride=2
55 |
56 | [convolutional]
57 | batch_normalize=1
58 | filters=128
59 | size=3
60 | stride=1
61 | pad=1
62 | activation=leaky
63 |
64 | [maxpool]
65 | size=2
66 | stride=2
67 |
68 | [convolutional]
69 | batch_normalize=1
70 | filters=256
71 | size=3
72 | stride=1
73 | pad=1
74 | activation=leaky
75 |
76 | [maxpool]
77 | size=2
78 | stride=2
79 |
80 | [convolutional]
81 | batch_normalize=1
82 | filters=512
83 | size=3
84 | stride=1
85 | pad=1
86 | activation=leaky
87 |
88 | [maxpool]
89 | size=2
90 | stride=2
91 |
92 | [convolutional]
93 | batch_normalize=1
94 | size=3
95 | stride=1
96 | pad=1
97 | filters=1024
98 | activation=leaky
99 |
100 | [convolutional]
101 | batch_normalize=1
102 | size=3
103 | stride=1
104 | pad=1
105 | filters=256
106 | activation=leaky
107 |
108 | [connected]
109 | output= 4655
110 | activation=linear
111 |
112 | [detection]
113 | classes=80
114 | coords=4
115 | rescore=1
116 | side=7
117 | num=3
118 | softmax=0
119 | sqrt=1
120 | jitter=.2
121 |
122 | object_scale=1
123 | noobject_scale=.5
124 | class_scale=1
125 | coord_scale=5
126 |
--------------------------------------------------------------------------------
/cfg/v1.1/tiny-yolo-4c.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=2
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | saturation=.75
11 | exposure=.75
12 | hue = .1
13 |
14 | learning_rate=0.0005
15 | policy=steps
16 | steps=200,400,600,800,20000,30000
17 | scales=2.5,2,2,2,.1,.1
18 | max_batches = 40000
19 |
20 | [convolutional]
21 | batch_normalize=1
22 | filters=16
23 | size=3
24 | stride=1
25 | pad=1
26 | activation=leaky
27 |
28 | [maxpool]
29 | size=2
30 | stride=2
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=32
35 | size=3
36 | stride=1
37 | pad=1
38 | activation=leaky
39 |
40 | [maxpool]
41 | size=2
42 | stride=2
43 |
44 | [convolutional]
45 | batch_normalize=1
46 | filters=64
47 | size=3
48 | stride=1
49 | pad=1
50 | activation=leaky
51 |
52 | [maxpool]
53 | size=2
54 | stride=2
55 |
56 | [convolutional]
57 | batch_normalize=1
58 | filters=128
59 | size=3
60 | stride=1
61 | pad=1
62 | activation=leaky
63 |
64 | [maxpool]
65 | size=2
66 | stride=2
67 |
68 | [convolutional]
69 | batch_normalize=1
70 | filters=256
71 | size=3
72 | stride=1
73 | pad=1
74 | activation=leaky
75 |
76 | [maxpool]
77 | size=2
78 | stride=2
79 |
80 | [convolutional]
81 | batch_normalize=1
82 | filters=512
83 | size=3
84 | stride=1
85 | pad=1
86 | activation=leaky
87 |
88 | [maxpool]
89 | size=2
90 | stride=2
91 |
92 | [convolutional]
93 | batch_normalize=1
94 | size=3
95 | stride=1
96 | pad=1
97 | filters=1024
98 | activation=leaky
99 |
100 | [convolutional]
101 | batch_normalize=1
102 | size=3
103 | stride=1
104 | pad=1
105 | filters=256
106 | activation=leaky
107 |
108 | [select]
109 | old_output=1470
110 | keep=8,14,15,19/20
111 | bins=49
112 | output=686
113 | activation=linear
114 |
115 | [detection]
116 | classes=4
117 | coords=4
118 | rescore=1
119 | side=7
120 | num=2
121 | softmax=0
122 | sqrt=1
123 | jitter=.2
124 |
125 | object_scale=1
126 | noobject_scale=.5
127 | class_scale=1
128 | coord_scale=5
--------------------------------------------------------------------------------
/cfg/v1.1/tiny-yolov1.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=2
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | saturation=.75
11 | exposure=.75
12 | hue = .1
13 |
14 | learning_rate=0.0005
15 | policy=steps
16 | steps=200,400,600,800,20000,30000
17 | scales=2.5,2,2,2,.1,.1
18 | max_batches = 40000
19 |
20 | [convolutional]
21 | batch_normalize=1
22 | filters=16
23 | size=3
24 | stride=1
25 | pad=1
26 | activation=leaky
27 |
28 | [maxpool]
29 | size=2
30 | stride=2
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=32
35 | size=3
36 | stride=1
37 | pad=1
38 | activation=leaky
39 |
40 | [maxpool]
41 | size=2
42 | stride=2
43 |
44 | [convolutional]
45 | batch_normalize=1
46 | filters=64
47 | size=3
48 | stride=1
49 | pad=1
50 | activation=leaky
51 |
52 | [maxpool]
53 | size=2
54 | stride=2
55 |
56 | [convolutional]
57 | batch_normalize=1
58 | filters=128
59 | size=3
60 | stride=1
61 | pad=1
62 | activation=leaky
63 |
64 | [maxpool]
65 | size=2
66 | stride=2
67 |
68 | [convolutional]
69 | batch_normalize=1
70 | filters=256
71 | size=3
72 | stride=1
73 | pad=1
74 | activation=leaky
75 |
76 | [maxpool]
77 | size=2
78 | stride=2
79 |
80 | [convolutional]
81 | batch_normalize=1
82 | filters=512
83 | size=3
84 | stride=1
85 | pad=1
86 | activation=leaky
87 |
88 | [maxpool]
89 | size=2
90 | stride=2
91 |
92 | [convolutional]
93 | batch_normalize=1
94 | size=3
95 | stride=1
96 | pad=1
97 | filters=1024
98 | activation=leaky
99 |
100 | [convolutional]
101 | batch_normalize=1
102 | size=3
103 | stride=1
104 | pad=1
105 | filters=256
106 | activation=leaky
107 |
108 | [connected]
109 | output= 1470
110 | activation=linear
111 |
112 | [detection]
113 | classes=20
114 | coords=4
115 | rescore=1
116 | side=7
117 | num=2
118 | softmax=0
119 | sqrt=1
120 | jitter=.2
121 |
122 | object_scale=1
123 | noobject_scale=.5
124 | class_scale=1
125 | coord_scale=5
126 |
127 |
--------------------------------------------------------------------------------
/cfg/v1.1/yolo-coco.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=4
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | hue = .1
11 | saturation=.75
12 | exposure=.75
13 |
14 | learning_rate=0.0005
15 | policy=steps
16 | steps=200,400,600,800,100000,150000
17 | scales=2.5,2,2,2,.1,.1
18 | max_batches = 200000
19 |
20 | [convolutional]
21 | batch_normalize=1
22 | filters=64
23 | size=7
24 | stride=2
25 | pad=1
26 | activation=leaky
27 |
28 | [maxpool]
29 | size=2
30 | stride=2
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=192
35 | size=3
36 | stride=1
37 | pad=1
38 | activation=leaky
39 |
40 | [maxpool]
41 | size=2
42 | stride=2
43 |
44 | [convolutional]
45 | batch_normalize=1
46 | filters=128
47 | size=1
48 | stride=1
49 | pad=1
50 | activation=leaky
51 |
52 | [convolutional]
53 | batch_normalize=1
54 | filters=256
55 | size=3
56 | stride=1
57 | pad=1
58 | activation=leaky
59 |
60 | [convolutional]
61 | batch_normalize=1
62 | filters=256
63 | size=1
64 | stride=1
65 | pad=1
66 | activation=leaky
67 |
68 | [convolutional]
69 | batch_normalize=1
70 | filters=512
71 | size=3
72 | stride=1
73 | pad=1
74 | activation=leaky
75 |
76 | [maxpool]
77 | size=2
78 | stride=2
79 |
80 | [convolutional]
81 | batch_normalize=1
82 | filters=256
83 | size=1
84 | stride=1
85 | pad=1
86 | activation=leaky
87 |
88 | [convolutional]
89 | batch_normalize=1
90 | filters=512
91 | size=3
92 | stride=1
93 | pad=1
94 | activation=leaky
95 |
96 | [convolutional]
97 | batch_normalize=1
98 | filters=256
99 | size=1
100 | stride=1
101 | pad=1
102 | activation=leaky
103 |
104 | [convolutional]
105 | batch_normalize=1
106 | filters=512
107 | size=3
108 | stride=1
109 | pad=1
110 | activation=leaky
111 |
112 | [convolutional]
113 | batch_normalize=1
114 | filters=256
115 | size=1
116 | stride=1
117 | pad=1
118 | activation=leaky
119 |
120 | [convolutional]
121 | batch_normalize=1
122 | filters=512
123 | size=3
124 | stride=1
125 | pad=1
126 | activation=leaky
127 |
128 | [convolutional]
129 | batch_normalize=1
130 | filters=256
131 | size=1
132 | stride=1
133 | pad=1
134 | activation=leaky
135 |
136 | [convolutional]
137 | batch_normalize=1
138 | filters=512
139 | size=3
140 | stride=1
141 | pad=1
142 | activation=leaky
143 |
144 | [convolutional]
145 | batch_normalize=1
146 | filters=512
147 | size=1
148 | stride=1
149 | pad=1
150 | activation=leaky
151 |
152 | [convolutional]
153 | batch_normalize=1
154 | filters=1024
155 | size=3
156 | stride=1
157 | pad=1
158 | activation=leaky
159 |
160 | [maxpool]
161 | size=2
162 | stride=2
163 |
164 | [convolutional]
165 | batch_normalize=1
166 | filters=512
167 | size=1
168 | stride=1
169 | pad=1
170 | activation=leaky
171 |
172 | [convolutional]
173 | batch_normalize=1
174 | filters=1024
175 | size=3
176 | stride=1
177 | pad=1
178 | activation=leaky
179 |
180 | [convolutional]
181 | batch_normalize=1
182 | filters=512
183 | size=1
184 | stride=1
185 | pad=1
186 | activation=leaky
187 |
188 | [convolutional]
189 | batch_normalize=1
190 | filters=1024
191 | size=3
192 | stride=1
193 | pad=1
194 | activation=leaky
195 |
196 | #######
197 |
198 | [convolutional]
199 | batch_normalize=1
200 | size=3
201 | stride=1
202 | pad=1
203 | filters=1024
204 | activation=leaky
205 |
206 | [convolutional]
207 | batch_normalize=1
208 | size=3
209 | stride=2
210 | pad=1
211 | filters=1024
212 | activation=leaky
213 |
214 | [convolutional]
215 | batch_normalize=1
216 | size=3
217 | stride=1
218 | pad=1
219 | filters=1024
220 | activation=leaky
221 |
222 | [convolutional]
223 | batch_normalize=1
224 | size=3
225 | stride=1
226 | pad=1
227 | filters=1024
228 | activation=leaky
229 |
230 | [local]
231 | size=3
232 | stride=1
233 | pad=1
234 | filters=256
235 | activation=leaky
236 |
237 | [connected]
238 | output= 4655
239 | activation=linear
240 |
241 | [detection]
242 | classes=80
243 | coords=4
244 | rescore=1
245 | side=7
246 | num=3
247 | softmax=0
248 | sqrt=1
249 | jitter=.2
250 |
251 | object_scale=1
252 | noobject_scale=.5
253 | class_scale=1
254 | coord_scale=5
255 |
256 |
--------------------------------------------------------------------------------
/cfg/v1.1/yolov1.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=1
3 | subdivisions=1
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 | saturation=1.5
10 | exposure=1.5
11 | hue=.1
12 |
13 | learning_rate=0.0005
14 | policy=steps
15 | steps=200,400,600,20000,30000
16 | scales=2.5,2,2,.1,.1
17 | max_batches = 40000
18 |
19 | [convolutional]
20 | batch_normalize=1
21 | filters=64
22 | size=7
23 | stride=2
24 | pad=1
25 | activation=leaky
26 |
27 | [maxpool]
28 | size=2
29 | stride=2
30 |
31 | [convolutional]
32 | batch_normalize=1
33 | filters=192
34 | size=3
35 | stride=1
36 | pad=1
37 | activation=leaky
38 |
39 | [maxpool]
40 | size=2
41 | stride=2
42 |
43 | [convolutional]
44 | batch_normalize=1
45 | filters=128
46 | size=1
47 | stride=1
48 | pad=1
49 | activation=leaky
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=256
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [convolutional]
60 | batch_normalize=1
61 | filters=256
62 | size=1
63 | stride=1
64 | pad=1
65 | activation=leaky
66 |
67 | [convolutional]
68 | batch_normalize=1
69 | filters=512
70 | size=3
71 | stride=1
72 | pad=1
73 | activation=leaky
74 |
75 | [maxpool]
76 | size=2
77 | stride=2
78 |
79 | [convolutional]
80 | batch_normalize=1
81 | filters=256
82 | size=1
83 | stride=1
84 | pad=1
85 | activation=leaky
86 |
87 | [convolutional]
88 | batch_normalize=1
89 | filters=512
90 | size=3
91 | stride=1
92 | pad=1
93 | activation=leaky
94 |
95 | [convolutional]
96 | batch_normalize=1
97 | filters=256
98 | size=1
99 | stride=1
100 | pad=1
101 | activation=leaky
102 |
103 | [convolutional]
104 | batch_normalize=1
105 | filters=512
106 | size=3
107 | stride=1
108 | pad=1
109 | activation=leaky
110 |
111 | [convolutional]
112 | batch_normalize=1
113 | filters=256
114 | size=1
115 | stride=1
116 | pad=1
117 | activation=leaky
118 |
119 | [convolutional]
120 | batch_normalize=1
121 | filters=512
122 | size=3
123 | stride=1
124 | pad=1
125 | activation=leaky
126 |
127 | [convolutional]
128 | batch_normalize=1
129 | filters=256
130 | size=1
131 | stride=1
132 | pad=1
133 | activation=leaky
134 |
135 | [convolutional]
136 | batch_normalize=1
137 | filters=512
138 | size=3
139 | stride=1
140 | pad=1
141 | activation=leaky
142 |
143 | [convolutional]
144 | batch_normalize=1
145 | filters=512
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 |
151 | [convolutional]
152 | batch_normalize=1
153 | filters=1024
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [maxpool]
160 | size=2
161 | stride=2
162 |
163 | [convolutional]
164 | batch_normalize=1
165 | filters=512
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 |
171 | [convolutional]
172 | batch_normalize=1
173 | filters=1024
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 |
179 | [convolutional]
180 | batch_normalize=1
181 | filters=512
182 | size=1
183 | stride=1
184 | pad=1
185 | activation=leaky
186 |
187 | [convolutional]
188 | batch_normalize=1
189 | filters=1024
190 | size=3
191 | stride=1
192 | pad=1
193 | activation=leaky
194 |
195 | #######
196 |
197 | [convolutional]
198 | batch_normalize=1
199 | size=3
200 | stride=1
201 | pad=1
202 | filters=1024
203 | activation=leaky
204 |
205 | [convolutional]
206 | batch_normalize=1
207 | size=3
208 | stride=2
209 | pad=1
210 | filters=1024
211 | activation=leaky
212 |
213 | [convolutional]
214 | batch_normalize=1
215 | size=3
216 | stride=1
217 | pad=1
218 | filters=1024
219 | activation=leaky
220 |
221 | [convolutional]
222 | batch_normalize=1
223 | size=3
224 | stride=1
225 | pad=1
226 | filters=1024
227 | activation=leaky
228 |
229 | [local]
230 | size=3
231 | stride=1
232 | pad=1
233 | filters=256
234 | activation=leaky
235 |
236 | [dropout]
237 | probability=.5
238 |
239 | [connected]
240 | output= 1715
241 | activation=linear
242 |
243 | [detection]
244 | classes=20
245 | coords=4
246 | rescore=1
247 | side=7
248 | num=3
249 | softmax=0
250 | sqrt=1
251 | jitter=.2
252 |
253 | object_scale=1
254 | noobject_scale=.5
255 | class_scale=1
256 | coord_scale=5
257 |
258 |
--------------------------------------------------------------------------------
/cfg/v1/tiny-old.profile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/cfg/v1/tiny-old.profile
--------------------------------------------------------------------------------
/cfg/v1/tiny.profile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/cfg/v1/tiny.profile
--------------------------------------------------------------------------------
/cfg/v1/yolo-2c.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=64
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | learning_rate=0.0001
11 | policy=steps
12 | steps=20,40,60,80,20000,30000
13 | scales=5,5,2,2,.1,.1
14 | max_batches = 40000
15 |
16 | [crop]
17 | crop_width=448
18 | crop_height=448
19 | flip=0
20 | angle=0
21 | saturation = 1.5
22 | exposure = 1.5
23 |
24 | [convolutional]
25 | filters=16
26 | size=3
27 | stride=1
28 | pad=1
29 | activation=leaky
30 |
31 | [maxpool]
32 | size=2
33 | stride=2
34 |
35 | [convolutional]
36 | filters=32
37 | size=3
38 | stride=1
39 | pad=1
40 | activation=leaky
41 |
42 | [maxpool]
43 | size=2
44 | stride=2
45 |
46 | [convolutional]
47 | filters=64
48 | size=3
49 | stride=1
50 | pad=1
51 | activation=leaky
52 |
53 | [maxpool]
54 | size=2
55 | stride=2
56 |
57 | [convolutional]
58 | filters=128
59 | size=3
60 | stride=1
61 | pad=1
62 | activation=leaky
63 |
64 | [maxpool]
65 | size=2
66 | stride=2
67 |
68 | [convolutional]
69 | filters=256
70 | size=3
71 | stride=1
72 | pad=1
73 | activation=leaky
74 |
75 | [maxpool]
76 | size=2
77 | stride=2
78 |
79 | [convolutional]
80 | filters=512
81 | size=3
82 | stride=1
83 | pad=1
84 | activation=leaky
85 |
86 | [maxpool]
87 | size=2
88 | stride=2
89 |
90 | [convolutional]
91 | filters=1024
92 | size=3
93 | stride=1
94 | pad=1
95 | activation=leaky
96 |
97 | [convolutional]
98 | filters=1024
99 | size=3
100 | stride=1
101 | pad=1
102 | activation=leaky
103 |
104 | [convolutional]
105 | filters=1024
106 | size=3
107 | stride=1
108 | pad=1
109 | activation=leaky
110 |
111 | [connected]
112 | output=256
113 | activation=linear
114 |
115 | [connected]
116 | output=4096
117 | activation=leaky
118 |
119 | [dropout]
120 | probability=.5
121 |
122 | [select]
123 | old_output=1470
124 | keep=14,19/20
125 | bins=49
126 | output=588
127 | activation=linear
128 |
129 | [detection]
130 | classes=2
131 | coords=4
132 | rescore=1
133 | side=7
134 | num=2
135 | softmax=0
136 | sqrt=1
137 | jitter=.2
138 | object_scale=1
139 | noobject_scale=.5
140 | class_scale=1
141 | coord_scale=5
--------------------------------------------------------------------------------
/cfg/v1/yolo-4c.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=64
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | learning_rate=0.001
11 | policy=steps
12 | steps=200,400,600,20000,30000
13 | scales=2.5,2,2,.1,.1
14 | max_batches = 40000
15 |
16 | [crop]
17 | crop_width=448
18 | crop_height=448
19 | flip=0
20 | angle=0
21 | saturation = 1.5
22 | exposure = 1.5
23 |
24 | [convolutional]
25 | filters=64
26 | size=7
27 | stride=2
28 | pad=1
29 | activation=leaky
30 |
31 | [maxpool]
32 | size=2
33 | stride=2
34 |
35 | [convolutional]
36 | filters=192
37 | size=3
38 | stride=1
39 | pad=1
40 | activation=leaky
41 |
42 | [maxpool]
43 | size=2
44 | stride=2
45 |
46 | [convolutional]
47 | filters=128
48 | size=1
49 | stride=1
50 | pad=1
51 | activation=leaky
52 |
53 | [convolutional]
54 | filters=256
55 | size=3
56 | stride=1
57 | pad=1
58 | activation=leaky
59 |
60 | [convolutional]
61 | filters=256
62 | size=1
63 | stride=1
64 | pad=1
65 | activation=leaky
66 |
67 | [convolutional]
68 | filters=512
69 | size=3
70 | stride=1
71 | pad=1
72 | activation=leaky
73 |
74 | [maxpool]
75 | size=2
76 | stride=2
77 |
78 | [convolutional]
79 | filters=256
80 | size=1
81 | stride=1
82 | pad=1
83 | activation=leaky
84 |
85 | [convolutional]
86 | filters=512
87 | size=3
88 | stride=1
89 | pad=1
90 | activation=leaky
91 |
92 | [convolutional]
93 | filters=256
94 | size=1
95 | stride=1
96 | pad=1
97 | activation=leaky
98 |
99 | [convolutional]
100 | filters=512
101 | size=3
102 | stride=1
103 | pad=1
104 | activation=leaky
105 |
106 | [convolutional]
107 | filters=256
108 | size=1
109 | stride=1
110 | pad=1
111 | activation=leaky
112 |
113 | [convolutional]
114 | filters=512
115 | size=3
116 | stride=1
117 | pad=1
118 | activation=leaky
119 |
120 | [convolutional]
121 | filters=256
122 | size=1
123 | stride=1
124 | pad=1
125 | activation=leaky
126 |
127 | [convolutional]
128 | filters=512
129 | size=3
130 | stride=1
131 | pad=1
132 | activation=leaky
133 |
134 | [convolutional]
135 | filters=512
136 | size=1
137 | stride=1
138 | pad=1
139 | activation=leaky
140 |
141 | [convolutional]
142 | filters=1024
143 | size=3
144 | stride=1
145 | pad=1
146 | activation=leaky
147 |
148 | [maxpool]
149 | size=2
150 | stride=2
151 |
152 | [convolutional]
153 | filters=512
154 | size=1
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [convolutional]
160 | filters=1024
161 | size=3
162 | stride=1
163 | pad=1
164 | activation=leaky
165 |
166 | [convolutional]
167 | filters=512
168 | size=1
169 | stride=1
170 | pad=1
171 | activation=leaky
172 |
173 | [convolutional]
174 | filters=1024
175 | size=3
176 | stride=1
177 | pad=1
178 | activation=leaky
179 |
180 | #######
181 |
182 | [convolutional]
183 | size=3
184 | stride=1
185 | pad=1
186 | filters=1024
187 | activation=leaky
188 |
189 | [convolutional]
190 | size=3
191 | stride=2
192 | pad=1
193 | filters=1024
194 | activation=leaky
195 |
196 | [convolutional]
197 | size=3
198 | stride=1
199 | pad=1
200 | filters=1024
201 | activation=leaky
202 |
203 | [convolutional]
204 | size=3
205 | stride=1
206 | pad=1
207 | filters=1024
208 | activation=leaky
209 |
210 | [connected]
211 | output=4096
212 | activation=leaky
213 |
214 | [dropout]
215 | probability=.5
216 |
217 | [select]
218 | old_output=1470
219 | keep=8,14,15,19/20
220 | bins=49
221 | output=686
222 | activation=linear
223 |
224 | [detection]
225 | classes=4
226 | coords=4
227 | rescore=1
228 | side=7
229 | num=2
230 | softmax=0
231 | sqrt=1
232 | jitter=.2
233 |
234 | object_scale=1
235 | noobject_scale=.5
236 | class_scale=1
237 | coord_scale=5
--------------------------------------------------------------------------------
/cfg/v1/yolo-full.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=64
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | learning_rate=0.001
11 | policy=steps
12 | steps=200,400,600,20000,30000
13 | scales=2.5,2,2,.1,.1
14 | max_batches = 40000
15 |
16 | [crop]
17 | crop_width=448
18 | crop_height=448
19 | flip=0
20 | angle=0
21 | saturation = 1.5
22 | exposure = 1.5
23 |
24 | [convolutional]
25 | filters=64
26 | size=7
27 | stride=2
28 | pad=1
29 | activation=leaky
30 |
31 | [maxpool]
32 | size=2
33 | stride=2
34 |
35 | [convolutional]
36 | filters=192
37 | size=3
38 | stride=1
39 | pad=1
40 | activation=leaky
41 |
42 | [maxpool]
43 | size=2
44 | stride=2
45 |
46 | [convolutional]
47 | filters=128
48 | size=1
49 | stride=1
50 | pad=1
51 | activation=leaky
52 |
53 | [convolutional]
54 | filters=256
55 | size=3
56 | stride=1
57 | pad=1
58 | activation=leaky
59 |
60 | [convolutional]
61 | filters=256
62 | size=1
63 | stride=1
64 | pad=1
65 | activation=leaky
66 |
67 | [convolutional]
68 | filters=512
69 | size=3
70 | stride=1
71 | pad=1
72 | activation=leaky
73 |
74 | [maxpool]
75 | size=2
76 | stride=2
77 |
78 | [convolutional]
79 | filters=256
80 | size=1
81 | stride=1
82 | pad=1
83 | activation=leaky
84 |
85 | [convolutional]
86 | filters=512
87 | size=3
88 | stride=1
89 | pad=1
90 | activation=leaky
91 |
92 | [convolutional]
93 | filters=256
94 | size=1
95 | stride=1
96 | pad=1
97 | activation=leaky
98 |
99 | [convolutional]
100 | filters=512
101 | size=3
102 | stride=1
103 | pad=1
104 | activation=leaky
105 |
106 | [convolutional]
107 | filters=256
108 | size=1
109 | stride=1
110 | pad=1
111 | activation=leaky
112 |
113 | [convolutional]
114 | filters=512
115 | size=3
116 | stride=1
117 | pad=1
118 | activation=leaky
119 |
120 | [convolutional]
121 | filters=256
122 | size=1
123 | stride=1
124 | pad=1
125 | activation=leaky
126 |
127 | [convolutional]
128 | filters=512
129 | size=3
130 | stride=1
131 | pad=1
132 | activation=leaky
133 |
134 | [convolutional]
135 | filters=512
136 | size=1
137 | stride=1
138 | pad=1
139 | activation=leaky
140 |
141 | [convolutional]
142 | filters=1024
143 | size=3
144 | stride=1
145 | pad=1
146 | activation=leaky
147 |
148 | [maxpool]
149 | size=2
150 | stride=2
151 |
152 | [convolutional]
153 | filters=512
154 | size=1
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [convolutional]
160 | filters=1024
161 | size=3
162 | stride=1
163 | pad=1
164 | activation=leaky
165 |
166 | [convolutional]
167 | filters=512
168 | size=1
169 | stride=1
170 | pad=1
171 | activation=leaky
172 |
173 | [convolutional]
174 | filters=1024
175 | size=3
176 | stride=1
177 | pad=1
178 | activation=leaky
179 |
180 | #######
181 |
182 | [convolutional]
183 | size=3
184 | stride=1
185 | pad=1
186 | filters=1024
187 | activation=leaky
188 |
189 | [convolutional]
190 | size=3
191 | stride=2
192 | pad=1
193 | filters=1024
194 | activation=leaky
195 |
196 | [convolutional]
197 | size=3
198 | stride=1
199 | pad=1
200 | filters=1024
201 | activation=leaky
202 |
203 | [convolutional]
204 | size=3
205 | stride=1
206 | pad=1
207 | filters=1024
208 | activation=leaky
209 |
210 | [connected]
211 | output=4096
212 | activation=leaky
213 |
214 | [dropout]
215 | probability=.5
216 |
217 | [connected]
218 | output= 1470
219 | activation=linear
220 |
221 | [detection]
222 | classes=20
223 | coords=4
224 | rescore=1
225 | side=7
226 | num=2
227 | softmax=0
228 | sqrt=1
229 | jitter=.2
230 |
231 | object_scale=1
232 | noobject_scale=.5
233 | class_scale=1
234 | coord_scale=5
--------------------------------------------------------------------------------
/cfg/v1/yolo-small.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=64
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | learning_rate=0.001
11 | policy=steps
12 | steps=200,400,600,20000,30000
13 | scales=2.5,2,2,.1,.1
14 | max_batches = 40000
15 |
16 | [crop]
17 | crop_width=448
18 | crop_height=448
19 | flip=0
20 | angle=0
21 | saturation = 1.5
22 | exposure = 1.5
23 |
24 | [convolutional]
25 | filters=64
26 | size=7
27 | stride=2
28 | pad=1
29 | activation=leaky
30 |
31 | [maxpool]
32 | size=2
33 | stride=2
34 |
35 | [convolutional]
36 | filters=192
37 | size=3
38 | stride=1
39 | pad=1
40 | activation=leaky
41 |
42 | [maxpool]
43 | size=2
44 | stride=2
45 |
46 | [convolutional]
47 | filters=128
48 | size=1
49 | stride=1
50 | pad=1
51 | activation=leaky
52 |
53 | [convolutional]
54 | filters=256
55 | size=3
56 | stride=1
57 | pad=1
58 | activation=leaky
59 |
60 | [convolutional]
61 | filters=256
62 | size=1
63 | stride=1
64 | pad=1
65 | activation=leaky
66 |
67 | [convolutional]
68 | filters=512
69 | size=3
70 | stride=1
71 | pad=1
72 | activation=leaky
73 |
74 | [maxpool]
75 | size=2
76 | stride=2
77 |
78 | [convolutional]
79 | filters=256
80 | size=1
81 | stride=1
82 | pad=1
83 | activation=leaky
84 |
85 | [convolutional]
86 | filters=512
87 | size=3
88 | stride=1
89 | pad=1
90 | activation=leaky
91 |
92 | [convolutional]
93 | filters=256
94 | size=1
95 | stride=1
96 | pad=1
97 | activation=leaky
98 |
99 | [convolutional]
100 | filters=512
101 | size=3
102 | stride=1
103 | pad=1
104 | activation=leaky
105 |
106 | [convolutional]
107 | filters=256
108 | size=1
109 | stride=1
110 | pad=1
111 | activation=leaky
112 |
113 | [convolutional]
114 | filters=512
115 | size=3
116 | stride=1
117 | pad=1
118 | activation=leaky
119 |
120 | [convolutional]
121 | filters=256
122 | size=1
123 | stride=1
124 | pad=1
125 | activation=leaky
126 |
127 | [convolutional]
128 | filters=512
129 | size=3
130 | stride=1
131 | pad=1
132 | activation=leaky
133 |
134 | [convolutional]
135 | filters=512
136 | size=1
137 | stride=1
138 | pad=1
139 | activation=leaky
140 |
141 | [convolutional]
142 | filters=1024
143 | size=3
144 | stride=1
145 | pad=1
146 | activation=leaky
147 |
148 | [maxpool]
149 | size=2
150 | stride=2
151 |
152 | [convolutional]
153 | filters=512
154 | size=1
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [convolutional]
160 | filters=1024
161 | size=3
162 | stride=1
163 | pad=1
164 | activation=leaky
165 |
166 | [convolutional]
167 | filters=512
168 | size=1
169 | stride=1
170 | pad=1
171 | activation=leaky
172 |
173 | [convolutional]
174 | filters=1024
175 | size=3
176 | stride=1
177 | pad=1
178 | activation=leaky
179 |
180 | #######
181 |
182 | [convolutional]
183 | size=3
184 | stride=1
185 | pad=1
186 | filters=1024
187 | activation=leaky
188 |
189 | [convolutional]
190 | size=3
191 | stride=2
192 | pad=1
193 | filters=1024
194 | activation=leaky
195 |
196 | [convolutional]
197 | size=3
198 | stride=1
199 | pad=1
200 | filters=1024
201 | activation=leaky
202 |
203 | [convolutional]
204 | size=3
205 | stride=1
206 | pad=1
207 | filters=1024
208 | activation=leaky
209 |
210 | [connected]
211 | output=512
212 | activation=leaky
213 |
214 | [connected]
215 | output=4096
216 | activation=leaky
217 |
218 | [dropout]
219 | probability=.5
220 |
221 | [connected]
222 | output= 1470
223 | activation=linear
224 |
225 | [detection]
226 | classes=20
227 | coords=4
228 | rescore=1
229 | side=7
230 | num=2
231 | softmax=0
232 | sqrt=1
233 | jitter=.2
234 |
235 | object_scale=1
236 | noobject_scale=.5
237 | class_scale=1
238 | coord_scale=5
239 |
240 |
--------------------------------------------------------------------------------
/cfg/v1/yolo-tiny-extract.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=64
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | learning_rate=0.0001
11 | policy=steps
12 | steps=20,40,60,80,20000,30000
13 | scales=5,5,2,2,.1,.1
14 | max_batches = 40000
15 |
16 | [crop]
17 | crop_width=448
18 | crop_height=448
19 | flip=0
20 | angle=0
21 | saturation = 1.5
22 | exposure = 1.5
23 |
24 | [conv-extract]
25 | profile=cfg/v1/tiny.profile
26 | input=-1
27 | output=0
28 | filters=16
29 | size=3
30 | stride=1
31 | pad=1
32 | activation=leaky
33 |
34 | [maxpool]
35 | size=2
36 | stride=2
37 |
38 | [conv-extract]
39 | profile=cfg/v1/tiny.profile
40 | input=0
41 | output=1
42 | filters=32
43 | size=3
44 | stride=1
45 | pad=1
46 | activation=leaky
47 |
48 | [maxpool]
49 | size=2
50 | stride=2
51 |
52 | [conv-extract]
53 | profile=cfg/v1/tiny.profile
54 | input=1
55 | output=2
56 | filters=64
57 | size=3
58 | stride=1
59 | pad=1
60 | activation=leaky
61 |
62 | [maxpool]
63 | size=2
64 | stride=2
65 |
66 | [conv-extract]
67 | profile=cfg/v1/tiny.profile
68 | input=2
69 | output=3
70 | filters=128
71 | size=3
72 | stride=1
73 | pad=1
74 | activation=leaky
75 |
76 | [maxpool]
77 | size=2
78 | stride=2
79 |
80 | [conv-extract]
81 | profile=cfg/v1/tiny.profile
82 | input=3
83 | output=4
84 | filters=256
85 | size=3
86 | stride=1
87 | pad=1
88 | activation=leaky
89 |
90 | [maxpool]
91 | size=2
92 | stride=2
93 |
94 | [conv-extract]
95 | profile=cfg/v1/tiny.profile
96 | input=4
97 | output=5
98 | filters=512
99 | size=3
100 | stride=1
101 | pad=1
102 | activation=leaky
103 |
104 | [maxpool]
105 | size=2
106 | stride=2
107 |
108 | [conv-extract]
109 | profile=cfg/v1/tiny.profile
110 | input=5
111 | output=6
112 | filters=1024
113 | size=3
114 | stride=1
115 | pad=1
116 | activation=leaky
117 |
118 | [conv-extract]
119 | profile=cfg/v1/tiny.profile
120 | input=6
121 | output=7
122 | filters=1024
123 | size=3
124 | stride=1
125 | pad=1
126 | activation=leaky
127 |
128 | [conv-extract]
129 | profile=cfg/v1/tiny.profile
130 | input=7
131 | output=8
132 | filters=1024
133 | size=3
134 | stride=1
135 | pad=1
136 | activation=leaky
137 |
138 | [extract]
139 | profile=cfg/v1/tiny.profile
140 | input=8
141 | output=9
142 | old=7,7,1024,256
143 | activation=linear
144 |
145 | [extract]
146 | profile=cfg/v1/tiny.profile
147 | input=9
148 | output=10
149 | old=256,4096
150 | activation=leaky
151 |
152 | [dropout]
153 | probability=1.
154 |
155 | [select]
156 | input=cfg/v1/tiny.profile,10
157 | old_output=1470
158 | keep=8,14,15,19/20
159 | bins=49
160 | output=686
161 | activation=linear
162 |
163 | [detection]
164 | classes=4
165 | coords=4
166 | rescore=1
167 | side=7
168 | num=2
169 | softmax=0
170 | sqrt=1
171 | jitter=.2
172 | object_scale=1
173 | noobject_scale=.5
174 | class_scale=1
175 | coord_scale=5
--------------------------------------------------------------------------------
/cfg/v1/yolo-tiny-extract_.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=64
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | learning_rate=0.0001
11 | policy=steps
12 | steps=20,40,60,80,20000,30000
13 | scales=5,5,2,2,.1,.1
14 | max_batches = 40000
15 |
16 | [crop]
17 | crop_width=448
18 | crop_height=448
19 | flip=0
20 | angle=0
21 | saturation = 1.5
22 | exposure = 1.5
23 |
24 | [conv-extract]
25 | profile=cfg/v1/tiny-old.profile
26 | input=-1
27 | output=0
28 | filters=16
29 | size=3
30 | stride=1
31 | pad=1
32 | activation=leaky
33 |
34 | [maxpool]
35 | size=2
36 | stride=2
37 |
38 | [conv-extract]
39 | profile=cfg/v1/tiny-old.profile
40 | input=0
41 | output=1
42 | filters=32
43 | size=3
44 | stride=1
45 | pad=1
46 | activation=leaky
47 |
48 | [maxpool]
49 | size=2
50 | stride=2
51 |
52 | [conv-extract]
53 | profile=cfg/v1/tiny-old.profile
54 | input=1
55 | output=2
56 | filters=64
57 | size=3
58 | stride=1
59 | pad=1
60 | activation=leaky
61 |
62 | [maxpool]
63 | size=2
64 | stride=2
65 |
66 | [conv-extract]
67 | profile=cfg/v1/tiny-old.profile
68 | input=2
69 | output=3
70 | filters=128
71 | size=3
72 | stride=1
73 | pad=1
74 | activation=leaky
75 |
76 | [maxpool]
77 | size=2
78 | stride=2
79 |
80 | [conv-extract]
81 | profile=cfg/v1/tiny-old.profile
82 | input=3
83 | output=4
84 | filters=256
85 | size=3
86 | stride=1
87 | pad=1
88 | activation=leaky
89 |
90 | [maxpool]
91 | size=2
92 | stride=2
93 |
94 | [conv-extract]
95 | profile=cfg/v1/tiny-old.profile
96 | input=4
97 | output=5
98 | filters=512
99 | size=3
100 | stride=1
101 | pad=1
102 | activation=leaky
103 |
104 | [maxpool]
105 | size=2
106 | stride=2
107 |
108 | [conv-extract]
109 | profile=cfg/v1/tiny-old.profile
110 | input=5
111 | output=6
112 | filters=1024
113 | size=3
114 | stride=1
115 | pad=1
116 | activation=leaky
117 |
118 | [conv-extract]
119 | profile=cfg/v1/tiny-old.profile
120 | input=6
121 | output=7
122 | filters=1024
123 | size=3
124 | stride=1
125 | pad=1
126 | activation=leaky
127 |
128 | [conv-extract]
129 | profile=cfg/v1/tiny-old.profile
130 | input=7
131 | output=8
132 | filters=1024
133 | size=3
134 | stride=1
135 | pad=1
136 | activation=leaky
137 |
138 | [extract]
139 | profile=cfg/v1/tiny-old.profile
140 | input=8
141 | output=9
142 | old=7,7,1024,256
143 | activation=linear
144 |
145 | [extract]
146 | profile=cfg/v1/tiny-old.profile
147 | input=9
148 | output=10
149 | old=256,4096
150 | activation=leaky
151 |
152 | [dropout]
153 | probability=1.
154 |
155 | [select]
156 | input=cfg/v1/tiny-old.profile,10
157 | old_output=1470
158 | keep=8,14,15,19/20
159 | bins=49
160 | output=686
161 | activation=linear
162 |
163 | [detection]
164 | classes=4
165 | coords=4
166 | rescore=1
167 | side=7
168 | num=2
169 | softmax=0
170 | sqrt=1
171 | jitter=.2
172 | object_scale=2.5
173 | noobject_scale=2
174 | class_scale=2.5
175 | coord_scale=5
176 |
177 | save=11250
--------------------------------------------------------------------------------
/cfg/v1/yolo-tiny.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=64
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | learning_rate=0.0001
11 | policy=steps
12 | steps=20,40,60,80,20000,30000
13 | scales=5,5,2,2,.1,.1
14 | max_batches = 40000
15 |
16 | [crop]
17 | crop_width=448
18 | crop_height=448
19 | flip=0
20 | angle=0
21 | saturation = 1.5
22 | exposure = 1.5
23 |
24 | [convolutional]
25 | filters=16
26 | size=3
27 | stride=1
28 | pad=1
29 | activation=leaky
30 |
31 | [maxpool]
32 | size=2
33 | stride=2
34 |
35 | [convolutional]
36 | filters=32
37 | size=3
38 | stride=1
39 | pad=1
40 | activation=leaky
41 |
42 | [maxpool]
43 | size=2
44 | stride=2
45 |
46 | [convolutional]
47 | filters=64
48 | size=3
49 | stride=1
50 | pad=1
51 | activation=leaky
52 |
53 | [maxpool]
54 | size=2
55 | stride=2
56 |
57 | [convolutional]
58 | filters=128
59 | size=3
60 | stride=1
61 | pad=1
62 | activation=leaky
63 |
64 | [maxpool]
65 | size=2
66 | stride=2
67 |
68 | [convolutional]
69 | filters=256
70 | size=3
71 | stride=1
72 | pad=1
73 | activation=leaky
74 |
75 | [maxpool]
76 | size=2
77 | stride=2
78 |
79 | [convolutional]
80 | filters=512
81 | size=3
82 | stride=1
83 | pad=1
84 | activation=leaky
85 |
86 | [maxpool]
87 | size=2
88 | stride=2
89 |
90 | [convolutional]
91 | filters=1024
92 | size=3
93 | stride=1
94 | pad=1
95 | activation=leaky
96 |
97 | [convolutional]
98 | filters=1024
99 | size=3
100 | stride=1
101 | pad=1
102 | activation=leaky
103 |
104 | [convolutional]
105 | filters=1024
106 | size=3
107 | stride=1
108 | pad=1
109 | activation=leaky
110 |
111 | [connected]
112 | output=256
113 | activation=linear
114 |
115 | [connected]
116 | output=4096
117 | activation=leaky
118 |
119 | [dropout]
120 | probability=.5
121 |
122 | [connected]
123 | output= 1470
124 | activation=linear
125 |
126 | [detection]
127 | classes=20
128 | coords=4
129 | rescore=1
130 | side=7
131 | num=2
132 | softmax=0
133 | sqrt=1
134 | jitter=.2
135 | object_scale=1
136 | noobject_scale=.5
137 | class_scale=1
138 | coord_scale=5
--------------------------------------------------------------------------------
/cfg/v1/yolo-tiny4c.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=64
4 | height=448
5 | width=448
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 |
10 | learning_rate=0.0001
11 | policy=steps
12 | steps=20,40,60,80,20000,30000
13 | scales=5,5,2,2,.1,.1
14 | max_batches = 40000
15 |
16 | [crop]
17 | crop_width=448
18 | crop_height=448
19 | flip=0
20 | angle=0
21 | saturation = 1.5
22 | exposure = 1.5
23 |
24 | [convolutional]
25 | filters=16
26 | size=3
27 | stride=1
28 | pad=1
29 | activation=leaky
30 |
31 | [maxpool]
32 | size=2
33 | stride=2
34 |
35 | [convolutional]
36 | filters=32
37 | size=3
38 | stride=1
39 | pad=1
40 | activation=leaky
41 |
42 | [maxpool]
43 | size=2
44 | stride=2
45 |
46 | [convolutional]
47 | filters=64
48 | size=3
49 | stride=1
50 | pad=1
51 | activation=leaky
52 |
53 | [maxpool]
54 | size=2
55 | stride=2
56 |
57 | [convolutional]
58 | filters=128
59 | size=3
60 | stride=1
61 | pad=1
62 | activation=leaky
63 |
64 | [maxpool]
65 | size=2
66 | stride=2
67 |
68 | [convolutional]
69 | filters=256
70 | size=3
71 | stride=1
72 | pad=1
73 | activation=leaky
74 |
75 | [maxpool]
76 | size=2
77 | stride=2
78 |
79 | [convolutional]
80 | filters=512
81 | size=3
82 | stride=1
83 | pad=1
84 | activation=leaky
85 |
86 | [maxpool]
87 | size=2
88 | stride=2
89 |
90 | [convolutional]
91 | filters=1024
92 | size=3
93 | stride=1
94 | pad=1
95 | activation=leaky
96 |
97 | [convolutional]
98 | filters=1024
99 | size=3
100 | stride=1
101 | pad=1
102 | activation=leaky
103 |
104 | [convolutional]
105 | filters=1024
106 | size=3
107 | stride=1
108 | pad=1
109 | activation=leaky
110 |
111 | [connected]
112 | output=256
113 | activation=linear
114 |
115 | [connected]
116 | output=4096
117 | activation=leaky
118 |
119 | [dropout]
120 | probability=.5
121 |
122 | [select]
123 | old_output=1470
124 | keep=8,14,15,19/20
125 | bins=49
126 | output=686
127 | activation=linear
128 |
129 | [detection]
130 | classes=4
131 | coords=4
132 | rescore=1
133 | side=7
134 | num=2
135 | softmax=0
136 | sqrt=1
137 | jitter=.2
138 | object_scale=1
139 | noobject_scale=.5
140 | class_scale=1
141 | coord_scale=5
--------------------------------------------------------------------------------
/cfg/yolo-voc.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=8
4 | height=416
5 | width=416
6 | channels=3
7 | momentum=0.9
8 | decay=0.0005
9 | angle=0
10 | saturation = 1.5
11 | exposure = 1.5
12 | hue=.1
13 |
14 | learning_rate=0.0001
15 | max_batches = 45000
16 | policy=steps
17 | steps=100,25000,35000
18 | scales=10,.1,.1
19 |
20 | [convolutional]
21 | batch_normalize=1
22 | filters=32
23 | size=3
24 | stride=1
25 | pad=1
26 | activation=leaky
27 |
28 | [maxpool]
29 | size=2
30 | stride=2
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=64
35 | size=3
36 | stride=1
37 | pad=1
38 | activation=leaky
39 |
40 | [maxpool]
41 | size=2
42 | stride=2
43 |
44 | [convolutional]
45 | batch_normalize=1
46 | filters=128
47 | size=3
48 | stride=1
49 | pad=1
50 | activation=leaky
51 |
52 | [convolutional]
53 | batch_normalize=1
54 | filters=64
55 | size=1
56 | stride=1
57 | pad=1
58 | activation=leaky
59 |
60 | [convolutional]
61 | batch_normalize=1
62 | filters=128
63 | size=3
64 | stride=1
65 | pad=1
66 | activation=leaky
67 |
68 | [maxpool]
69 | size=2
70 | stride=2
71 |
72 | [convolutional]
73 | batch_normalize=1
74 | filters=256
75 | size=3
76 | stride=1
77 | pad=1
78 | activation=leaky
79 |
80 | [convolutional]
81 | batch_normalize=1
82 | filters=128
83 | size=1
84 | stride=1
85 | pad=1
86 | activation=leaky
87 |
88 | [convolutional]
89 | batch_normalize=1
90 | filters=256
91 | size=3
92 | stride=1
93 | pad=1
94 | activation=leaky
95 |
96 | [maxpool]
97 | size=2
98 | stride=2
99 |
100 | [convolutional]
101 | batch_normalize=1
102 | filters=512
103 | size=3
104 | stride=1
105 | pad=1
106 | activation=leaky
107 |
108 | [convolutional]
109 | batch_normalize=1
110 | filters=256
111 | size=1
112 | stride=1
113 | pad=1
114 | activation=leaky
115 |
116 | [convolutional]
117 | batch_normalize=1
118 | filters=512
119 | size=3
120 | stride=1
121 | pad=1
122 | activation=leaky
123 |
124 | [convolutional]
125 | batch_normalize=1
126 | filters=256
127 | size=1
128 | stride=1
129 | pad=1
130 | activation=leaky
131 |
132 | [convolutional]
133 | batch_normalize=1
134 | filters=512
135 | size=3
136 | stride=1
137 | pad=1
138 | activation=leaky
139 |
140 | [maxpool]
141 | size=2
142 | stride=2
143 |
144 | [convolutional]
145 | batch_normalize=1
146 | filters=1024
147 | size=3
148 | stride=1
149 | pad=1
150 | activation=leaky
151 |
152 | [convolutional]
153 | batch_normalize=1
154 | filters=512
155 | size=1
156 | stride=1
157 | pad=1
158 | activation=leaky
159 |
160 | [convolutional]
161 | batch_normalize=1
162 | filters=1024
163 | size=3
164 | stride=1
165 | pad=1
166 | activation=leaky
167 |
168 | [convolutional]
169 | batch_normalize=1
170 | filters=512
171 | size=1
172 | stride=1
173 | pad=1
174 | activation=leaky
175 |
176 | [convolutional]
177 | batch_normalize=1
178 | filters=1024
179 | size=3
180 | stride=1
181 | pad=1
182 | activation=leaky
183 |
184 |
185 | #######
186 |
187 | [convolutional]
188 | batch_normalize=1
189 | size=3
190 | stride=1
191 | pad=1
192 | filters=1024
193 | activation=leaky
194 |
195 | [convolutional]
196 | batch_normalize=1
197 | size=3
198 | stride=1
199 | pad=1
200 | filters=1024
201 | activation=leaky
202 |
203 | [route]
204 | layers=-9
205 |
206 | [reorg]
207 | stride=2
208 |
209 | [route]
210 | layers=-1,-3
211 |
212 | [convolutional]
213 | batch_normalize=1
214 | size=3
215 | stride=1
216 | pad=1
217 | filters=1024
218 | activation=leaky
219 |
220 | [convolutional]
221 | size=1
222 | stride=1
223 | pad=1
224 | filters=125
225 | activation=linear
226 |
227 | [region]
228 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52
229 | bias_match=1
230 | classes=20
231 | coords=4
232 | num=5
233 | softmax=1
234 | jitter=.2
235 | rescore=1
236 |
237 | object_scale=5
238 | noobject_scale=1
239 | class_scale=1
240 | coord_scale=1
241 |
242 | absolute=1
243 | thresh = .6
244 | random=0
245 |
--------------------------------------------------------------------------------
/cfg/yolo.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | batch=1
4 | subdivisions=1
5 | # Training
6 | # batch=64
7 | # subdivisions=8
8 | width=608
9 | height=608
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=32
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | [maxpool]
34 | size=2
35 | stride=2
36 |
37 | [convolutional]
38 | batch_normalize=1
39 | filters=64
40 | size=3
41 | stride=1
42 | pad=1
43 | activation=leaky
44 |
45 | [maxpool]
46 | size=2
47 | stride=2
48 |
49 | [convolutional]
50 | batch_normalize=1
51 | filters=128
52 | size=3
53 | stride=1
54 | pad=1
55 | activation=leaky
56 |
57 | [convolutional]
58 | batch_normalize=1
59 | filters=64
60 | size=1
61 | stride=1
62 | pad=1
63 | activation=leaky
64 |
65 | [convolutional]
66 | batch_normalize=1
67 | filters=128
68 | size=3
69 | stride=1
70 | pad=1
71 | activation=leaky
72 |
73 | [maxpool]
74 | size=2
75 | stride=2
76 |
77 | [convolutional]
78 | batch_normalize=1
79 | filters=256
80 | size=3
81 | stride=1
82 | pad=1
83 | activation=leaky
84 |
85 | [convolutional]
86 | batch_normalize=1
87 | filters=128
88 | size=1
89 | stride=1
90 | pad=1
91 | activation=leaky
92 |
93 | [convolutional]
94 | batch_normalize=1
95 | filters=256
96 | size=3
97 | stride=1
98 | pad=1
99 | activation=leaky
100 |
101 | [maxpool]
102 | size=2
103 | stride=2
104 |
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 |
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 |
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 |
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 |
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 |
145 | [maxpool]
146 | size=2
147 | stride=2
148 |
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 |
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 |
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 |
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 |
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 |
189 |
190 | #######
191 |
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 |
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 |
208 | [route]
209 | layers=-9
210 |
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 |
219 | [reorg]
220 | stride=2
221 |
222 | [route]
223 | layers=-1,-4
224 |
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 |
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=425
238 | activation=linear
239 |
240 |
241 | [region]
242 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
243 | bias_match=1
244 | classes=80
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 |
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 |
256 | absolute=1
257 | thresh = .1
258 | random=1
259 |
--------------------------------------------------------------------------------
/darkflow/cli.py:
--------------------------------------------------------------------------------
1 | from .defaults import argHandler #Import the default arguments
2 | import os
3 | from .net.build import TFNet
4 |
5 | def cliHandler(args):
6 | FLAGS = argHandler()
7 | FLAGS.setDefaults()
8 | FLAGS.parseArgs(args)
9 |
10 | # make sure all necessary dirs exist
11 | def _get_dir(dirs):
12 | for d in dirs:
13 | this = os.path.abspath(os.path.join(os.path.curdir, d))
14 | if not os.path.exists(this): os.makedirs(this)
15 |
16 | requiredDirectories = [FLAGS.imgdir, FLAGS.binary, FLAGS.backup, os.path.join(FLAGS.imgdir,'out')]
17 | if FLAGS.summary:
18 | requiredDirectories.append(FLAGS.summary)
19 |
20 | _get_dir(requiredDirectories)
21 |
22 | # fix FLAGS.load to appropriate type
23 | try: FLAGS.load = int(FLAGS.load)
24 | except: pass
25 |
26 | tfnet = TFNet(FLAGS)
27 |
28 | if FLAGS.demo:
29 | tfnet.camera()
30 | exit('Demo stopped, exit.')
31 |
32 | if FLAGS.train:
33 | print('Enter training ...'); tfnet.train()
34 | if not FLAGS.savepb:
35 | exit('Training finished, exit.')
36 |
37 | if FLAGS.savepb:
38 | print('Rebuild a constant version ...')
39 | tfnet.savepb(); exit('Done')
40 |
41 | tfnet.predict()
42 |
--------------------------------------------------------------------------------
/darkflow/cython_utils/cy_yolo2_findboxes.pyx:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | cimport numpy as np
3 | cimport cython
4 | ctypedef np.float_t DTYPE_t
5 | from libc.math cimport exp
6 | from ..utils.box import BoundBox
7 | from nms cimport NMS
8 |
9 | #expit
10 | @cython.boundscheck(False) # turn off bounds-checking for entire function
11 | @cython.wraparound(False) # turn off negative index wrapping for entire function
12 | @cython.cdivision(True)
13 | cdef float expit_c(float x):
14 | cdef float y= 1/(1+exp(-x))
15 | return y
16 |
17 | #MAX
18 | @cython.boundscheck(False) # turn off bounds-checking for entire function
19 | @cython.wraparound(False) # turn off negative index wrapping for entire function
20 | @cython.cdivision(True)
21 | cdef float max_c(float a, float b):
22 | if(a>b):
23 | return a
24 | return b
25 |
26 | """
27 | #SOFTMAX!
28 | @cython.cdivision(True)
29 | @cython.boundscheck(False) # turn off bounds-checking for entire function
30 | @cython.wraparound(False) # turn off negative index wrapping for entire function
31 | cdef void _softmax_c(float* x, int classes):
32 | cdef:
33 | float sum = 0
34 | np.intp_t k
35 | float arr_max = 0
36 | for k in range(classes):
37 | arr_max = max(arr_max,x[k])
38 |
39 | for k in range(classes):
40 | x[k] = exp(x[k]-arr_max)
41 | sum += x[k]
42 |
43 | for k in range(classes):
44 | x[k] = x[k]/sum
45 | """
46 |
47 |
48 |
49 | #BOX CONSTRUCTOR
50 | @cython.cdivision(True)
51 | @cython.boundscheck(False) # turn off bounds-checking for entire function
52 | @cython.wraparound(False) # turn off negative index wrapping for entire function
53 | def box_constructor(meta,np.ndarray[float,ndim=3] net_out_in):
54 | cdef:
55 | np.intp_t H, W, _, C, B, row, col, box_loop, class_loop
56 | np.intp_t row1, col1, box_loop1,index,index2
57 | float threshold = meta['thresh']
58 | float tempc,arr_max=0,sum=0
59 | double[:] anchors = np.asarray(meta['anchors'])
60 | list boxes = list()
61 |
62 | H, W, _ = meta['out_size']
63 | C = meta['classes']
64 | B = meta['num']
65 |
66 | cdef:
67 | float[:, :, :, ::1] net_out = net_out_in.reshape([H, W, B, net_out_in.shape[2]/B])
68 | float[:, :, :, ::1] Classes = net_out[:, :, :, 5:]
69 | float[:, :, :, ::1] Bbox_pred = net_out[:, :, :, :5]
70 | float[:, :, :, ::1] probs = np.zeros((H, W, B, C), dtype=np.float32)
71 |
72 | for row in range(H):
73 | for col in range(W):
74 | for box_loop in range(B):
75 | arr_max=0
76 | sum=0;
77 | Bbox_pred[row, col, box_loop, 4] = expit_c(Bbox_pred[row, col, box_loop, 4])
78 | Bbox_pred[row, col, box_loop, 0] = (col + expit_c(Bbox_pred[row, col, box_loop, 0])) / W
79 | Bbox_pred[row, col, box_loop, 1] = (row + expit_c(Bbox_pred[row, col, box_loop, 1])) / H
80 | Bbox_pred[row, col, box_loop, 2] = exp(Bbox_pred[row, col, box_loop, 2]) * anchors[2 * box_loop + 0] / W
81 | Bbox_pred[row, col, box_loop, 3] = exp(Bbox_pred[row, col, box_loop, 3]) * anchors[2 * box_loop + 1] / H
82 | #SOFTMAX BLOCK, no more pointer juggling
83 | for class_loop in range(C):
84 | arr_max=max_c(arr_max,Classes[row,col,box_loop,class_loop])
85 |
86 | for class_loop in range(C):
87 | Classes[row,col,box_loop,class_loop]=exp(Classes[row,col,box_loop,class_loop]-arr_max)
88 | sum+=Classes[row,col,box_loop,class_loop]
89 |
90 | for class_loop in range(C):
91 | tempc = Classes[row, col, box_loop, class_loop] * Bbox_pred[row, col, box_loop, 4]/sum
92 | if(tempc > threshold):
93 | probs[row, col, box_loop, class_loop] = tempc
94 |
95 |
96 | #NMS
97 | return NMS(np.ascontiguousarray(probs).reshape(H*W*B,C), np.ascontiguousarray(Bbox_pred).reshape(H*B*W,5))
98 |
--------------------------------------------------------------------------------
/darkflow/cython_utils/cy_yolo_findboxes.pyx:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | cimport numpy as np
3 | cimport cython
4 | ctypedef np.float_t DTYPE_t
5 | from libc.math cimport exp
6 | from ..utils.box import BoundBox
7 | from nms cimport NMS
8 |
9 |
10 |
11 | @cython.cdivision(True)
12 | @cython.boundscheck(False) # turn off bounds-checking for entire function
13 | @cython.wraparound(False) # turn off negative index wrapping for entire function
14 | def yolo_box_constructor(meta,np.ndarray[float] net_out, float threshold):
15 |
16 | cdef:
17 | float sqrt
18 | int C,B,S
19 | int SS,prob_size,conf_size
20 | int grid, b
21 | int class_loop
22 |
23 |
24 | sqrt = meta['sqrt'] + 1
25 | C, B, S = meta['classes'], meta['num'], meta['side']
26 | boxes = []
27 | SS = S * S # number of grid cells
28 | prob_size = SS * C # class probabilities
29 | conf_size = SS * B # confidences for each grid cell
30 |
31 | cdef:
32 | float [:,::1] probs = np.ascontiguousarray(net_out[0 : prob_size]).reshape([SS,C])
33 | float [:,::1] confs = np.ascontiguousarray(net_out[prob_size : (prob_size + conf_size)]).reshape([SS,B])
34 | float [: , : ,::1] coords = np.ascontiguousarray(net_out[(prob_size + conf_size) : ]).reshape([SS, B, 4])
35 | float [:,:,::1] final_probs = np.zeros([SS,B,C],dtype=np.float32)
36 |
37 |
38 | for grid in range(SS):
39 | for b in range(B):
40 | coords[grid, b, 0] = (coords[grid, b, 0] + grid % S) / S
41 | coords[grid, b, 1] = (coords[grid, b, 1] + grid // S) / S
42 | coords[grid, b, 2] = coords[grid, b, 2] ** sqrt
43 | coords[grid, b, 3] = coords[grid, b, 3] ** sqrt
44 | for class_loop in range(C):
45 | probs[grid, class_loop] = probs[grid, class_loop] * confs[grid, b]
46 | #print("PROBS",probs[grid,class_loop])
47 | if(probs[grid,class_loop] > threshold ):
48 | final_probs[grid, b, class_loop] = probs[grid, class_loop]
49 |
50 |
51 | return NMS(np.ascontiguousarray(final_probs).reshape(SS*B, C) , np.ascontiguousarray(coords).reshape(SS*B, 4))
52 |
--------------------------------------------------------------------------------
/darkflow/cython_utils/nms.pxd:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | cimport numpy as np
3 | cimport cython
4 | ctypedef np.float_t DTYPE_t
5 | from libc.math cimport exp
6 | from utils.box import BoundBox
7 |
8 |
9 | cdef NMS(float[:, ::1] , float[:, ::1] )
10 |
11 |
12 |
--------------------------------------------------------------------------------
/darkflow/cython_utils/nms.pyx:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | cimport numpy as np
3 | cimport cython
4 | from libc.math cimport exp
5 | from ..utils.box import BoundBox
6 |
7 |
8 |
9 | #OVERLAP
10 | @cython.boundscheck(False) # turn off bounds-checking for entire function
11 | @cython.wraparound(False) # turn off negative index wrapping for entire function
12 | @cython.cdivision(True)
13 | cdef float overlap_c(float x1, float w1 , float x2 , float w2):
14 | cdef:
15 | float l1,l2,left,right
16 | l1 = x1 - w1 /2.
17 | l2 = x2 - w2 /2.
18 | left = max(l1,l2)
19 | r1 = x1 + w1 /2.
20 | r2 = x2 + w2 /2.
21 | right = min(r1, r2)
22 | return right - left;
23 |
24 | #BOX INTERSECTION
25 | @cython.boundscheck(False) # turn off bounds-checking for entire function
26 | @cython.wraparound(False) # turn off negative index wrapping for entire function
27 | @cython.cdivision(True)
28 | cdef float box_intersection_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh):
29 | cdef:
30 | float w,h,area
31 | w = overlap_c(ax, aw, bx, bw)
32 | h = overlap_c(ay, ah, by, bh)
33 | if w < 0 or h < 0: return 0
34 | area = w * h
35 | return area
36 |
37 | #BOX UNION
38 | @cython.boundscheck(False) # turn off bounds-checking for entire function
39 | @cython.wraparound(False) # turn off negative index wrapping for entire function
40 | @cython.cdivision(True)
41 | cdef float box_union_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh):
42 | cdef:
43 | float i,u
44 | i = box_intersection_c(ax, ay, aw, ah, bx, by, bw, bh)
45 | u = aw * ah + bw * bh -i
46 | return u
47 |
48 |
49 | #BOX IOU
50 | @cython.boundscheck(False) # turn off bounds-checking for entire function
51 | @cython.wraparound(False) # turn off negative index wrapping for entire function
52 | @cython.cdivision(True)
53 | cdef float box_iou_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh):
54 | return box_intersection_c(ax, ay, aw, ah, bx, by, bw, bh) / box_union_c(ax, ay, aw, ah, bx, by, bw, bh);
55 |
56 |
57 |
58 |
59 | #NMS
60 | @cython.boundscheck(False) # turn off bounds-checking for entire function
61 | @cython.wraparound(False) # turn off negative index wrapping for entire function
62 | @cython.cdivision(True)
63 | cdef NMS(float[:, ::1] final_probs , float[:, ::1] final_bbox):
64 | cdef list boxes = list()
65 | cdef set indices = set()
66 | cdef:
67 | np.intp_t pred_length,class_length,class_loop,index,index2
68 |
69 |
70 | pred_length = final_bbox.shape[0]
71 | class_length = final_probs.shape[1]
72 | for class_loop in range(class_length):
73 | for index in range(pred_length):
74 | if final_probs[index,class_loop] == 0: continue
75 | for index2 in range(index+1,pred_length):
76 | if final_probs[index2,class_loop] == 0: continue
77 | if index==index2 : continue
78 | if box_iou_c(final_bbox[index,0],final_bbox[index,1],final_bbox[index,2],final_bbox[index,3],final_bbox[index2,0],final_bbox[index2,1],final_bbox[index2,2],final_bbox[index2,3]) >= 0.4:
79 | if final_probs[index2,class_loop] > final_probs[index, class_loop] :
80 | final_probs[index, class_loop] =0
81 | break
82 | final_probs[index2,class_loop]=0
83 |
84 | if index not in indices:
85 | bb=BoundBox(class_length)
86 | bb.x = final_bbox[index, 0]
87 | bb.y = final_bbox[index, 1]
88 | bb.w = final_bbox[index, 2]
89 | bb.h = final_bbox[index, 3]
90 | bb.c = final_bbox[index, 4]
91 | bb.probs = np.asarray(final_probs[index,:])
92 | boxes.append(bb)
93 | indices.add(index)
94 | return boxes
95 |
96 | # cdef NMS(float[:, ::1] final_probs , float[:, ::1] final_bbox):
97 | # cdef list boxes = list()
98 | # cdef:
99 | # np.intp_t pred_length,class_length,class_loop,index,index2, i, j
100 |
101 |
102 | # pred_length = final_bbox.shape[0]
103 | # class_length = final_probs.shape[1]
104 |
105 | # for class_loop in range(class_length):
106 | # order = np.argsort(final_probs[:,class_loop])[::-1]
107 | # # First box
108 | # for i in range(pred_length):
109 | # index = order[i]
110 | # if final_probs[index, class_loop] == 0.:
111 | # continue
112 | # # Second box
113 | # for j in range(i+1, pred_length):
114 | # index2 = order[j]
115 | # if box_iou_c(
116 | # final_bbox[index,0],final_bbox[index,1],
117 | # final_bbox[index,2],final_bbox[index,3],
118 | # final_bbox[index2,0],final_bbox[index2,1],
119 | # final_bbox[index2,2],final_bbox[index2,3]) >= 0.4:
120 | # final_probs[index2, class_loop] = 0.
121 |
122 | # bb = BoundBox(class_length)
123 | # bb.x = final_bbox[index, 0]
124 | # bb.y = final_bbox[index, 1]
125 | # bb.w = final_bbox[index, 2]
126 | # bb.h = final_bbox[index, 3]
127 | # bb.c = final_bbox[index, 4]
128 | # bb.probs = np.asarray(final_probs[index,:])
129 | # boxes.append(bb)
130 |
131 | # return boxes
132 |
--------------------------------------------------------------------------------
/darkflow/dark/connected.py:
--------------------------------------------------------------------------------
1 | from .layer import Layer
2 | import numpy as np
3 |
4 | class extract_layer(Layer):
5 | def setup(self, old_inp, old_out,
6 | activation, inp, out):
7 | if inp is None: inp = range(old_inp)
8 | self.activation = activation
9 | self.old_inp = old_inp
10 | self.old_out = old_out
11 | self.inp = inp
12 | self.out = out
13 | self.wshape = {
14 | 'biases': [len(self.out)],
15 | 'weights': [len(self.inp), len(self.out)]
16 | }
17 |
18 | @property
19 | def signature(self):
20 | sig = ['connected']
21 | sig += self._signature[1:-2]
22 | return sig
23 |
24 | def present(self):
25 | args = self.signature
26 | self.presenter = connected_layer(*args)
27 |
28 | def recollect(self, val):
29 | w = val['weights']
30 | b = val['biases']
31 | if w is None: self.w = val; return
32 | w = np.take(w, self.inp, 0)
33 | w = np.take(w, self.out, 1)
34 | b = np.take(b, self.out)
35 | assert1 = w.shape == tuple(self.wshape['weights'])
36 | assert2 = b.shape == tuple(self.wshape['biases'])
37 | assert assert1 and assert2, \
38 | 'Dimension does not match in {} recollect'.format(
39 | self._signature)
40 |
41 | self.w['weights'] = w
42 | self.w['biases'] = b
43 |
44 |
45 |
46 | class select_layer(Layer):
47 | def setup(self, inp, old,
48 | activation, inp_idx,
49 | out, keep, train):
50 | self.old = old
51 | self.keep = keep
52 | self.train = train
53 | self.inp_idx = inp_idx
54 | self.activation = activation
55 | inp_dim = inp
56 | if inp_idx is not None:
57 | inp_dim = len(inp_idx)
58 | self.inp = inp_dim
59 | self.out = out
60 | self.wshape = {
61 | 'biases': [out],
62 | 'weights': [inp_dim, out]
63 | }
64 |
65 | @property
66 | def signature(self):
67 | sig = ['connected']
68 | sig += self._signature[1:-4]
69 | return sig
70 |
71 | def present(self):
72 | args = self.signature
73 | self.presenter = connected_layer(*args)
74 |
75 | def recollect(self, val):
76 | w = val['weights']
77 | b = val['biases']
78 | if w is None: self.w = val; return
79 | if self.inp_idx is not None:
80 | w = np.take(w, self.inp_idx, 0)
81 |
82 | keep_b = np.take(b, self.keep)
83 | keep_w = np.take(w, self.keep, 1)
84 | train_b = b[self.train:]
85 | train_w = w[:, self.train:]
86 | self.w['biases'] = np.concatenate(
87 | (keep_b, train_b), axis = 0)
88 | self.w['weights'] = np.concatenate(
89 | (keep_w, train_w), axis = 1)
90 |
91 |
92 | class connected_layer(Layer):
93 | def setup(self, input_size,
94 | output_size, activation):
95 | self.activation = activation
96 | self.inp = input_size
97 | self.out = output_size
98 | self.wshape = {
99 | 'biases': [self.out],
100 | 'weights': [self.inp, self.out]
101 | }
102 |
103 | def finalize(self, transpose):
104 | weights = self.w['weights']
105 | if weights is None: return
106 | shp = self.wshape['weights']
107 | if not transpose:
108 | weights = weights.reshape(shp[::-1])
109 | weights = weights.transpose([1,0])
110 | else: weights = weights.reshape(shp)
111 | self.w['weights'] = weights
--------------------------------------------------------------------------------
/darkflow/dark/convolution.py:
--------------------------------------------------------------------------------
1 | from .layer import Layer
2 | import numpy as np
3 |
4 | class local_layer(Layer):
5 | def setup(self, ksize, c, n, stride,
6 | pad, w_, h_, activation):
7 | self.pad = pad * int(ksize / 2)
8 | self.activation = activation
9 | self.stride = stride
10 | self.ksize = ksize
11 | self.h_out = h_
12 | self.w_out = w_
13 |
14 | self.dnshape = [h_ * w_, n, c, ksize, ksize]
15 | self.wshape = dict({
16 | 'biases': [h_ * w_ * n],
17 | 'kernels': [h_ * w_, ksize, ksize, c, n]
18 | })
19 |
20 | def finalize(self, _):
21 | weights = self.w['kernels']
22 | if weights is None: return
23 | weights = weights.reshape(self.dnshape)
24 | weights = weights.transpose([0,3,4,2,1])
25 | self.w['kernels'] = weights
26 |
27 | class conv_extract_layer(Layer):
28 | def setup(self, ksize, c, n, stride,
29 | pad, batch_norm, activation,
30 | inp, out):
31 | if inp is None: inp = range(c)
32 | self.activation = activation
33 | self.batch_norm = batch_norm
34 | self.stride = stride
35 | self.ksize = ksize
36 | self.pad = pad
37 | self.inp = inp
38 | self.out = out
39 | self.wshape = dict({
40 | 'biases': [len(out)],
41 | 'kernel': [ksize, ksize, len(inp), len(out)]
42 | })
43 |
44 | @property
45 | def signature(self):
46 | sig = ['convolutional']
47 | sig += self._signature[1:-2]
48 | return sig
49 |
50 | def present(self):
51 | args = self.signature
52 | self.presenter = convolutional_layer(*args)
53 |
54 | def recollect(self, w):
55 | if w is None:
56 | self.w = w
57 | return
58 | k = w['kernel']
59 | b = w['biases']
60 | k = np.take(k, self.inp, 2)
61 | k = np.take(k, self.out, 3)
62 | b = np.take(b, self.out)
63 | assert1 = k.shape == tuple(self.wshape['kernel'])
64 | assert2 = b.shape == tuple(self.wshape['biases'])
65 | assert assert1 and assert2, \
66 | 'Dimension not matching in {} recollect'.format(
67 | self._signature)
68 | self.w['kernel'] = k
69 | self.w['biases'] = b
70 |
71 |
72 | class conv_select_layer(Layer):
73 | def setup(self, ksize, c, n, stride,
74 | pad, batch_norm, activation,
75 | keep_idx, real_n):
76 | self.batch_norm = bool(batch_norm)
77 | self.activation = activation
78 | self.keep_idx = keep_idx
79 | self.stride = stride
80 | self.ksize = ksize
81 | self.pad = pad
82 | self.wshape = dict({
83 | 'biases': [real_n],
84 | 'kernel': [ksize, ksize, c, real_n]
85 | })
86 | if self.batch_norm:
87 | self.wshape.update({
88 | 'moving_variance' : [real_n],
89 | 'moving_mean': [real_n],
90 | 'gamma' : [real_n]
91 | })
92 | self.h['is_training'] = {
93 | 'shape': (),
94 | 'feed': True,
95 | 'dfault': False
96 | }
97 |
98 | @property
99 | def signature(self):
100 | sig = ['convolutional']
101 | sig += self._signature[1:-2]
102 | return sig
103 |
104 | def present(self):
105 | args = self.signature
106 | self.presenter = convolutional_layer(*args)
107 |
108 | def recollect(self, w):
109 | if w is None:
110 | self.w = w
111 | return
112 | idx = self.keep_idx
113 | k = w['kernel']
114 | b = w['biases']
115 | self.w['kernel'] = np.take(k, idx, 3)
116 | self.w['biases'] = np.take(b, idx)
117 | if self.batch_norm:
118 | m = w['moving_mean']
119 | v = w['moving_variance']
120 | g = w['gamma']
121 | self.w['moving_mean'] = np.take(m, idx)
122 | self.w['moving_variance'] = np.take(v, idx)
123 | self.w['gamma'] = np.take(g, idx)
124 |
125 | class convolutional_layer(Layer):
126 | def setup(self, ksize, c, n, stride,
127 | pad, batch_norm, activation):
128 | self.batch_norm = bool(batch_norm)
129 | self.activation = activation
130 | self.stride = stride
131 | self.ksize = ksize
132 | self.pad = pad
133 | self.dnshape = [n, c, ksize, ksize] # darknet shape
134 | self.wshape = dict({
135 | 'biases': [n],
136 | 'kernel': [ksize, ksize, c, n]
137 | })
138 | if self.batch_norm:
139 | self.wshape.update({
140 | 'moving_variance' : [n],
141 | 'moving_mean': [n],
142 | 'gamma' : [n]
143 | })
144 | self.h['is_training'] = {
145 | 'feed': True,
146 | 'dfault': False,
147 | 'shape': ()
148 | }
149 |
150 | def finalize(self, _):
151 | """deal with darknet"""
152 | kernel = self.w['kernel']
153 | if kernel is None: return
154 | kernel = kernel.reshape(self.dnshape)
155 | kernel = kernel.transpose([2,3,1,0])
156 | self.w['kernel'] = kernel
--------------------------------------------------------------------------------
/darkflow/dark/darknet.py:
--------------------------------------------------------------------------------
1 | from ..utils.process import cfg_yielder
2 | from .darkop import create_darkop
3 | from ..utils import loader
4 | import warnings
5 | import time
6 | import os
7 |
8 | class Darknet(object):
9 |
10 | _EXT = '.weights'
11 |
12 | def __init__(self, FLAGS):
13 | self.get_weight_src(FLAGS)
14 | self.modify = False
15 |
16 | print('Parsing {}'.format(self.src_cfg))
17 | src_parsed = self.parse_cfg(self.src_cfg, FLAGS)
18 | self.src_meta, self.src_layers = src_parsed
19 |
20 | if self.src_cfg == FLAGS.model:
21 | self.meta, self.layers = src_parsed
22 | else:
23 | print('Parsing {}'.format(FLAGS.model))
24 | des_parsed = self.parse_cfg(FLAGS.model, FLAGS)
25 | self.meta, self.layers = des_parsed
26 |
27 | self.load_weights()
28 |
29 | def get_weight_src(self, FLAGS):
30 | """
31 | analyse FLAGS.load to know where is the
32 | source binary and what is its config.
33 | can be: None, FLAGS.model, or some other
34 | """
35 | self.src_bin = FLAGS.model + self._EXT
36 | self.src_bin = FLAGS.binary + self.src_bin
37 | self.src_bin = os.path.abspath(self.src_bin)
38 | exist = os.path.isfile(self.src_bin)
39 |
40 | if FLAGS.load == str(): FLAGS.load = int()
41 | if type(FLAGS.load) is int:
42 | self.src_cfg = FLAGS.model
43 | if FLAGS.load: self.src_bin = None
44 | elif not exist: self.src_bin = None
45 | else:
46 | assert os.path.isfile(FLAGS.load), \
47 | '{} not found'.format(FLAGS.load)
48 | self.src_bin = FLAGS.load
49 | name = loader.model_name(FLAGS.load)
50 | cfg_path = os.path.join(FLAGS.config, name + '.cfg')
51 | if not os.path.isfile(cfg_path):
52 | warnings.warn(
53 | '{} not found, use {} instead'.format(
54 | cfg_path, FLAGS.model))
55 | cfg_path = FLAGS.model
56 | self.src_cfg = cfg_path
57 | FLAGS.load = int()
58 |
59 |
60 | def parse_cfg(self, model, FLAGS):
61 | """
62 | return a list of `layers` objects (darkop.py)
63 | given path to binaries/ and configs/
64 | """
65 | args = [model, FLAGS.binary]
66 | cfg_layers = cfg_yielder(*args)
67 | meta = dict(); layers = list()
68 | for i, info in enumerate(cfg_layers):
69 | if i == 0: meta = info; continue
70 | else: new = create_darkop(*info)
71 | layers.append(new)
72 | return meta, layers
73 |
74 | def load_weights(self):
75 | """
76 | Use `layers` and Loader to load .weights file
77 | """
78 | print('Loading {} ...'.format(self.src_bin))
79 | start = time.time()
80 |
81 | args = [self.src_bin, self.src_layers]
82 | wgts_loader = loader.create_loader(*args)
83 | for layer in self.layers: layer.load(wgts_loader)
84 |
85 | stop = time.time()
86 | print('Finished in {}s'.format(stop - start))
--------------------------------------------------------------------------------
/darkflow/dark/darkop.py:
--------------------------------------------------------------------------------
1 | from .layer import Layer
2 | from .convolution import *
3 | from .connected import *
4 |
5 | class avgpool_layer(Layer):
6 | pass
7 |
8 | class crop_layer(Layer):
9 | pass
10 |
11 | class maxpool_layer(Layer):
12 | def setup(self, ksize, stride, pad):
13 | self.stride = stride
14 | self.ksize = ksize
15 | self.pad = pad
16 |
17 | class softmax_layer(Layer):
18 | def setup(self, groups):
19 | self.groups = groups
20 |
21 | class dropout_layer(Layer):
22 | def setup(self, p):
23 | self.h['pdrop'] = dict({
24 | 'feed': p, # for training
25 | 'dfault': 1.0, # for testing
26 | 'shape': ()
27 | })
28 |
29 | class route_layer(Layer):
30 | def setup(self, routes):
31 | self.routes = routes
32 |
33 | class reorg_layer(Layer):
34 | def setup(self, stride):
35 | self.stride = stride
36 |
37 | """
38 | Darkop Factory
39 | """
40 |
41 | darkops = {
42 | 'dropout': dropout_layer,
43 | 'connected': connected_layer,
44 | 'maxpool': maxpool_layer,
45 | 'convolutional': convolutional_layer,
46 | 'avgpool': avgpool_layer,
47 | 'softmax': softmax_layer,
48 | 'crop': crop_layer,
49 | 'local': local_layer,
50 | 'select': select_layer,
51 | 'route': route_layer,
52 | 'reorg': reorg_layer,
53 | 'conv-select': conv_select_layer,
54 | 'conv-extract': conv_extract_layer,
55 | 'extract': extract_layer
56 | }
57 |
58 | def create_darkop(ltype, num, *args):
59 | op_class = darkops.get(ltype, Layer)
60 | return op_class(ltype, num, *args)
--------------------------------------------------------------------------------
/darkflow/dark/layer.py:
--------------------------------------------------------------------------------
1 | from ..utils import loader
2 | import numpy as np
3 |
4 | class Layer(object):
5 |
6 | def __init__(self, *args):
7 | self._signature = list(args)
8 | self.type = list(args)[0]
9 | self.number = list(args)[1]
10 |
11 | self.w = dict() # weights
12 | self.h = dict() # placeholders
13 | self.wshape = dict() # weight shape
14 | self.wsize = dict() # weight size
15 | self.setup(*args[2:]) # set attr up
16 | self.present()
17 | for var in self.wshape:
18 | shp = self.wshape[var]
19 | size = np.prod(shp)
20 | self.wsize[var] = size
21 |
22 | def load(self, src_loader):
23 | var_lay = src_loader.VAR_LAYER
24 | if self.type not in var_lay: return
25 |
26 | src_type = type(src_loader)
27 | if src_type is loader.weights_loader:
28 | wdict = self.load_weights(src_loader)
29 | else:
30 | wdict = self.load_ckpt(src_loader)
31 | if wdict is not None:
32 | self.recollect(wdict)
33 |
34 | def load_weights(self, src_loader):
35 | val = src_loader([self.presenter])
36 | if val is None: return None
37 | else: return val.w
38 |
39 | def load_ckpt(self, src_loader):
40 | result = dict()
41 | presenter = self.presenter
42 | for var in presenter.wshape:
43 | name = presenter.varsig(var)
44 | shape = presenter.wshape[var]
45 | key = [name, shape]
46 | val = src_loader(key)
47 | result[var] = val
48 | return result
49 |
50 | @property
51 | def signature(self):
52 | return self._signature
53 |
54 | # For comparing two layers
55 | def __eq__(self, other):
56 | return self.signature == other.signature
57 | def __ne__(self, other):
58 | return not self.__eq__(other)
59 |
60 | def varsig(self, var):
61 | if var not in self.wshape:
62 | return None
63 | sig = str(self.number)
64 | sig += '-' + self.type
65 | sig += '/' + var
66 | return sig
67 |
68 | def recollect(self, w): self.w = w
69 | def present(self): self.presenter = self
70 | def setup(self, *args): pass
71 | def finalize(self): pass
--------------------------------------------------------------------------------
/darkflow/defaults.py:
--------------------------------------------------------------------------------
1 | class argHandler(dict):
2 | #A super duper fancy custom made CLI argument handler!!
3 | __getattr__ = dict.get
4 | __setattr__ = dict.__setitem__
5 | __delattr__ = dict.__delitem__
6 | _descriptions = {'help, --h, -h': 'show this super helpful message and exit'}
7 |
8 | def setDefaults(self):
9 | self.define('imgdir', './sample_img/', 'path to testing directory with images')
10 | self.define('binary', './bin/', 'path to .weights directory')
11 | self.define('config', './cfg/', 'path to .cfg directory')
12 | self.define('dataset', '../pascal/VOCdevkit/IMG/', 'path to dataset directory')
13 | self.define('labels', 'labels.txt', 'path to labels file')
14 | self.define('backup', './ckpt/', 'path to backup folder')
15 | self.define('summary', '', 'path to TensorBoard summaries directory')
16 | self.define('annotation', '../pascal/VOCdevkit/ANN/', 'path to annotation directory')
17 | self.define('threshold', -0.1, 'detection threshold')
18 | self.define('model', '', 'configuration of choice')
19 | self.define('trainer', 'rmsprop', 'training algorithm')
20 | self.define('momentum', 0.0, 'applicable for rmsprop and momentum optimizers')
21 | self.define('verbalise', True, 'say out loud while building graph')
22 | self.define('train', False, 'train the whole net')
23 | self.define('load', '', 'how to initialize the net? Either from .weights or a checkpoint, or even from scratch')
24 | self.define('savepb', False, 'save net and weight to a .pb file')
25 | self.define('gpu', 0.0, 'how much gpu (from 0.0 to 1.0)')
26 | self.define('gpuName', '/gpu:0', 'GPU device name')
27 | self.define('lr', 1e-5, 'learning rate')
28 | self.define('keep',20,'Number of most recent training results to save')
29 | self.define('batch', 16, 'batch size')
30 | self.define('epoch', 1000, 'number of epoch')
31 | self.define('save', 2000, 'save checkpoint every ? training examples')
32 | self.define('demo', '', 'demo on webcam')
33 | self.define('queue', 1, 'process demo in batch')
34 | self.define('json', False, 'Outputs bounding box information in json format.')
35 | self.define('saveVideo', False, 'Records video from input video or camera')
36 | self.define('pbLoad', '', 'path to .pb protobuf file (metaLoad must also be specified)')
37 | self.define('metaLoad', '', 'path to .meta file generated during --savepb that corresponds to .pb file')
38 |
39 | def define(self, argName, default, description):
40 | self[argName] = default
41 | self._descriptions[argName] = description
42 |
43 | def help(self):
44 | print('Example usage: flow --imgdir sample_img/ --model cfg/yolo.cfg --load bin/yolo.weights')
45 | print('')
46 | print('Arguments:')
47 | spacing = max([len(i) for i in self._descriptions.keys()]) + 2
48 | for item in self._descriptions:
49 | currentSpacing = spacing - len(item)
50 | print(' --' + item + (' ' * currentSpacing) + self._descriptions[item])
51 | print('')
52 | exit()
53 |
54 | def parseArgs(self, args):
55 | print('')
56 | i = 1
57 | while i < len(args):
58 | if args[i] == '-h' or args[i] == '--h' or args[i] == '--help':
59 | self.help() #Time for some self help! :)
60 | if len(args[i]) < 2:
61 | print('ERROR - Invalid argument: ' + args[i])
62 | print('Try running flow --help')
63 | exit()
64 | argumentName = args[i][2:]
65 | if isinstance(self.get(argumentName), bool):
66 | if not (i + 1) >= len(args) and (args[i + 1].lower() != 'false' and args[i + 1].lower() != 'true') and not args[i + 1].startswith('--'):
67 | print('ERROR - Expected boolean value (or no value) following argument: ' + args[i])
68 | print('Try running flow --help')
69 | exit()
70 | elif not (i + 1) >= len(args) and (args[i + 1].lower() == 'false' or args[i + 1].lower() == 'true'):
71 | self[argumentName] = (args[i + 1].lower() == 'true')
72 | i += 1
73 | else:
74 | self[argumentName] = True
75 | elif args[i].startswith('--') and not (i + 1) >= len(args) and not args[i + 1].startswith('--') and argumentName in self:
76 | if isinstance(self[argumentName], float):
77 | try:
78 | args[i + 1] = float(args[i + 1])
79 | except:
80 | print('ERROR - Expected float for argument: ' + args[i])
81 | print('Try running flow --help')
82 | exit()
83 | elif isinstance(self[argumentName], int):
84 | try:
85 | args[i + 1] = int(args[i + 1])
86 | except:
87 | print('ERROR - Expected int for argument: ' + args[i])
88 | print('Try running flow --help')
89 | exit()
90 | self[argumentName] = args[i + 1]
91 | i += 1
92 | else:
93 | print('ERROR - Invalid argument: ' + args[i])
94 | print('Try running flow --help')
95 | exit()
96 | i += 1
97 |
--------------------------------------------------------------------------------
/darkflow/net/build.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import time
3 | from . import help
4 | from . import flow
5 | from .ops import op_create, identity
6 | from .ops import HEADER, LINE
7 | from .framework import create_framework
8 | from ..dark.darknet import Darknet
9 | import json
10 | import os
11 |
12 | class TFNet(object):
13 |
14 | _TRAINER = dict({
15 | 'rmsprop': tf.train.RMSPropOptimizer,
16 | 'adadelta': tf.train.AdadeltaOptimizer,
17 | 'adagrad': tf.train.AdagradOptimizer,
18 | 'adagradDA': tf.train.AdagradDAOptimizer,
19 | 'momentum': tf.train.MomentumOptimizer,
20 | 'adam': tf.train.AdamOptimizer,
21 | 'ftrl': tf.train.FtrlOptimizer,
22 | 'sgd': tf.train.GradientDescentOptimizer
23 | })
24 |
25 | # imported methods
26 | _get_fps = help._get_fps
27 | say = help.say
28 | train = flow.train
29 | camera = help.camera
30 | predict = flow.predict
31 | return_predict = flow.return_predict
32 | to_darknet = help.to_darknet
33 | build_train_op = help.build_train_op
34 | load_from_ckpt = help.load_from_ckpt
35 |
36 | def __init__(self, FLAGS, darknet = None):
37 | self.ntrain = 0
38 |
39 | if isinstance(FLAGS, dict):
40 | from ..defaults import argHandler
41 | newFLAGS = argHandler()
42 | newFLAGS.setDefaults()
43 | newFLAGS.update(FLAGS)
44 | FLAGS = newFLAGS
45 |
46 | self.FLAGS = FLAGS
47 | if self.FLAGS.pbLoad and self.FLAGS.metaLoad:
48 | self.say('\nLoading from .pb and .meta')
49 | self.graph = tf.Graph()
50 | device_name = FLAGS.gpuName \
51 | if FLAGS.gpu > 0.0 else None
52 | with tf.device(device_name):
53 | with self.graph.as_default() as g:
54 | self.build_from_pb()
55 | return
56 |
57 | if darknet is None:
58 | darknet = Darknet(FLAGS)
59 | self.ntrain = len(darknet.layers)
60 |
61 | self.darknet = darknet
62 | args = [darknet.meta, FLAGS]
63 | self.num_layer = len(darknet.layers)
64 | self.framework = create_framework(*args)
65 |
66 | self.meta = darknet.meta
67 |
68 | self.say('\nBuilding net ...')
69 | start = time.time()
70 | self.graph = tf.Graph()
71 | device_name = FLAGS.gpuName \
72 | if FLAGS.gpu > 0.0 else None
73 | with tf.device(device_name):
74 | with self.graph.as_default() as g:
75 | self.build_forward()
76 | self.setup_meta_ops()
77 | self.say('Finished in {}s\n'.format(
78 | time.time() - start))
79 |
80 | def build_from_pb(self):
81 | with tf.gfile.FastGFile(self.FLAGS.pbLoad, "rb") as f:
82 | graph_def = tf.GraphDef()
83 | graph_def.ParseFromString(f.read())
84 |
85 | tf.import_graph_def(
86 | graph_def,
87 | name=""
88 | )
89 | with open(self.FLAGS.metaLoad, 'r') as fp:
90 | self.meta = json.load(fp)
91 | self.framework = create_framework(self.meta, self.FLAGS)
92 |
93 | # Placeholders
94 | self.inp = tf.get_default_graph().get_tensor_by_name('input:0')
95 | self.feed = dict() # other placeholders
96 | self.out = tf.get_default_graph().get_tensor_by_name('output:0')
97 |
98 | self.setup_meta_ops()
99 |
100 | def build_forward(self):
101 | verbalise = self.FLAGS.verbalise
102 |
103 | # Placeholders
104 | inp_size = [None] + self.meta['inp_size']
105 | self.inp = tf.placeholder(tf.float32, inp_size, 'input')
106 | self.feed = dict() # other placeholders
107 |
108 | # Build the forward pass
109 | state = identity(self.inp)
110 | roof = self.num_layer - self.ntrain
111 | self.say(HEADER, LINE)
112 | for i, layer in enumerate(self.darknet.layers):
113 | scope = '{}-{}'.format(str(i),layer.type)
114 | args = [layer, state, i, roof, self.feed]
115 | state = op_create(*args)
116 | mess = state.verbalise()
117 | self.say(mess)
118 | self.say(LINE)
119 |
120 | self.top = state
121 | self.out = tf.identity(state.out, name='output')
122 |
123 | def setup_meta_ops(self):
124 | cfg = dict({
125 | 'allow_soft_placement': False,
126 | 'log_device_placement': False
127 | })
128 |
129 | utility = min(self.FLAGS.gpu, 1.)
130 | if utility > 0.0:
131 | self.say('GPU mode with {} usage'.format(utility))
132 | cfg['gpu_options'] = tf.GPUOptions(
133 | per_process_gpu_memory_fraction = utility)
134 | cfg['allow_soft_placement'] = True
135 | else:
136 | self.say('Running entirely on CPU')
137 | cfg['device_count'] = {'GPU': 0}
138 |
139 | if self.FLAGS.train: self.build_train_op()
140 |
141 | if self.FLAGS.summary:
142 | self.summary_op = tf.summary.merge_all()
143 | self.writer = tf.summary.FileWriter(self.FLAGS.summary + 'train')
144 |
145 | self.sess = tf.Session(config = tf.ConfigProto(**cfg))
146 | self.sess.run(tf.global_variables_initializer())
147 |
148 | if not self.ntrain: return
149 | self.saver = tf.train.Saver(tf.global_variables(),
150 | max_to_keep = self.FLAGS.keep)
151 | if self.FLAGS.load != 0: self.load_from_ckpt()
152 |
153 | if self.FLAGS.summary:
154 | self.writer.add_graph(self.sess.graph)
155 |
156 | def savepb(self):
157 | """
158 | Create a standalone const graph def that
159 | C++ can load and run.
160 | """
161 | darknet_pb = self.to_darknet()
162 | flags_pb = self.FLAGS
163 | flags_pb.verbalise = False
164 |
165 | flags_pb.train = False
166 | # rebuild another tfnet. all const.
167 | tfnet_pb = TFNet(flags_pb, darknet_pb)
168 | tfnet_pb.sess = tf.Session(graph = tfnet_pb.graph)
169 | # tfnet_pb.predict() # uncomment for unit testing
170 | name = 'built_graph/{}.pb'.format(self.meta['name'])
171 | os.makedirs(os.path.dirname(name), exist_ok=True)
172 | #Save dump of everything in meta
173 | with open('built_graph/{}.meta'.format(self.meta['name']), 'w') as fp:
174 | json.dump(self.meta, fp)
175 | self.say('Saving const graph def to {}'.format(name))
176 | graph_def = tfnet_pb.sess.graph_def
177 | tf.train.write_graph(graph_def,'./', name, False)
--------------------------------------------------------------------------------
/darkflow/net/flow.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import numpy as np
4 | import tensorflow as tf
5 | import pickle
6 | from multiprocessing.pool import ThreadPool
7 |
8 | train_stats = (
9 | 'Training statistics: \n'
10 | '\tLearning rate : {}\n'
11 | '\tBatch size : {}\n'
12 | '\tEpoch number : {}\n'
13 | '\tBackup every : {}'
14 | )
15 | pool = ThreadPool()
16 |
17 | def _save_ckpt(self, step, loss_profile):
18 | file = '{}-{}{}'
19 | model = self.meta['name']
20 |
21 | profile = file.format(model, step, '.profile')
22 | profile = os.path.join(self.FLAGS.backup, profile)
23 | with open(profile, 'wb') as profile_ckpt:
24 | pickle.dump(loss_profile, profile_ckpt)
25 |
26 | ckpt = file.format(model, step, '')
27 | ckpt = os.path.join(self.FLAGS.backup, ckpt)
28 | self.say('Checkpoint at step {}'.format(step))
29 | self.saver.save(self.sess, ckpt)
30 |
31 |
32 | def train(self):
33 | loss_ph = self.framework.placeholders
34 | loss_mva = None; profile = list()
35 |
36 | batches = self.framework.shuffle()
37 | loss_op = self.framework.loss
38 |
39 | for i, (x_batch, datum) in enumerate(batches):
40 | if not i: self.say(train_stats.format(
41 | self.FLAGS.lr, self.FLAGS.batch,
42 | self.FLAGS.epoch, self.FLAGS.save
43 | ))
44 |
45 | feed_dict = {
46 | loss_ph[key]: datum[key]
47 | for key in loss_ph }
48 | feed_dict[self.inp] = x_batch
49 | feed_dict.update(self.feed)
50 |
51 | fetches = [self.train_op, loss_op]
52 |
53 | if self.FLAGS.summary:
54 | fetches.append(self.summary_op)
55 |
56 | fetched = self.sess.run(fetches, feed_dict)
57 | loss = fetched[1]
58 |
59 | if loss_mva is None: loss_mva = loss
60 | loss_mva = .9 * loss_mva + .1 * loss
61 | step_now = self.FLAGS.load + i + 1
62 |
63 | if self.FLAGS.summary:
64 | self.writer.add_summary(fetched[2], step_now)
65 |
66 | form = 'step {} - loss {} - moving ave loss {}'
67 | self.say(form.format(step_now, loss, loss_mva))
68 | profile += [(loss, loss_mva)]
69 |
70 | ckpt = (i+1) % (self.FLAGS.save // self.FLAGS.batch)
71 | args = [step_now, profile]
72 | if not ckpt: _save_ckpt(self, *args)
73 |
74 | if ckpt: _save_ckpt(self, *args)
75 |
76 | def return_predict(self, im):
77 | assert isinstance(im, np.ndarray), \
78 | 'Image is not a np.ndarray'
79 | h, w, _ = im.shape
80 | im = self.framework.resize_input(im)
81 | this_inp = np.expand_dims(im, 0)
82 | feed_dict = {self.inp : this_inp}
83 |
84 | out = self.sess.run(self.out, feed_dict)[0]
85 | boxes = self.framework.findboxes(out)
86 | threshold = self.FLAGS.threshold
87 | boxesInfo = list()
88 | for box in boxes:
89 | tmpBox = self.framework.process_box(box, h, w, threshold)
90 | if tmpBox is None:
91 | continue
92 | boxesInfo.append({
93 | "label": tmpBox[4],
94 | "confidence": tmpBox[6],
95 | "topleft": {
96 | "x": tmpBox[0],
97 | "y": tmpBox[2]},
98 | "bottomright": {
99 | "x": tmpBox[1],
100 | "y": tmpBox[3]}
101 | })
102 | return boxesInfo
103 |
104 | import math
105 |
106 | def predict(self):
107 | inp_path = self.FLAGS.imgdir
108 | all_inps = os.listdir(inp_path)
109 | all_inps = [i for i in all_inps if self.framework.is_inp(i)]
110 | if not all_inps:
111 | msg = 'Failed to find any images in {} .'
112 | exit('Error: {}'.format(msg.format(inp_path)))
113 |
114 | batch = min(self.FLAGS.batch, len(all_inps))
115 |
116 | # predict in batches
117 | n_batch = int(math.ceil(len(all_inps) / batch))
118 | for j in range(n_batch):
119 | from_idx = j * batch
120 | to_idx = min(from_idx + batch, len(all_inps))
121 |
122 | # collect images input in the batch
123 | this_batch = all_inps[from_idx:to_idx]
124 | inp_feed = pool.map(lambda inp: (
125 | np.expand_dims(self.framework.preprocess(
126 | os.path.join(inp_path, inp)), 0)), this_batch)
127 |
128 | # Feed to the net
129 | feed_dict = {self.inp : np.concatenate(inp_feed, 0)}
130 | self.say('Forwarding {} inputs ...'.format(len(inp_feed)))
131 | start = time.time()
132 | out = self.sess.run(self.out, feed_dict)
133 | stop = time.time(); last = stop - start
134 | self.say('Total time = {}s / {} inps = {} ips'.format(
135 | last, len(inp_feed), len(inp_feed) / last))
136 |
137 | # Post processing
138 | self.say('Post processing {} inputs ...'.format(len(inp_feed)))
139 | start = time.time()
140 | pool.map(lambda p: (lambda i, prediction:
141 | self.framework.postprocess(
142 | prediction, os.path.join(inp_path, this_batch[i])))(*p),
143 | enumerate(out))
144 | stop = time.time(); last = stop - start
145 |
146 | # Timing
147 | self.say('Total time = {}s / {} inps = {} ips'.format(
148 | last, len(inp_feed), len(inp_feed) / last))
149 |
--------------------------------------------------------------------------------
/darkflow/net/framework.py:
--------------------------------------------------------------------------------
1 | from . import yolo
2 | from . import yolov2
3 | from . import vanilla
4 | from os.path import basename
5 |
6 | class framework(object):
7 | constructor = vanilla.constructor
8 | loss = vanilla.train.loss
9 |
10 | def __init__(self, meta, FLAGS):
11 | model = basename(meta['model'])
12 | model = '.'.join(model.split('.')[:-1])
13 | meta['name'] = model
14 |
15 | self.constructor(meta, FLAGS)
16 |
17 | def is_inp(self, file_name):
18 | return True
19 |
20 | class YOLO(framework):
21 | constructor = yolo.constructor
22 | parse = yolo.data.parse
23 | shuffle = yolo.data.shuffle
24 | preprocess = yolo.predict.preprocess
25 | postprocess = yolo.predict.postprocess
26 | loss = yolo.train.loss
27 | is_inp = yolo.misc.is_inp
28 | profile = yolo.misc.profile
29 | _batch = yolo.data._batch
30 | resize_input = yolo.predict.resize_input
31 | findboxes = yolo.predict.findboxes
32 | process_box = yolo.predict.process_box
33 |
34 | class YOLOv2(framework):
35 | constructor = yolo.constructor
36 | parse = yolo.data.parse
37 | shuffle = yolov2.data.shuffle
38 | preprocess = yolo.predict.preprocess
39 | loss = yolov2.train.loss
40 | is_inp = yolo.misc.is_inp
41 | postprocess = yolov2.predict.postprocess
42 | _batch = yolov2.data._batch
43 | resize_input = yolo.predict.resize_input
44 | findboxes = yolov2.predict.findboxes
45 | process_box = yolo.predict.process_box
46 |
47 | """
48 | framework factory
49 | """
50 |
51 | types = {
52 | '[detection]': YOLO,
53 | '[region]': YOLOv2
54 | }
55 |
56 | def create_framework(meta, FLAGS):
57 | net_type = meta['type']
58 | this = types.get(net_type, framework)
59 | return this(meta, FLAGS)
--------------------------------------------------------------------------------
/darkflow/net/help.py:
--------------------------------------------------------------------------------
1 | """
2 | tfnet secondary (helper) methods
3 | """
4 | from ..utils.loader import create_loader
5 | from time import time as timer
6 | import tensorflow as tf
7 | import numpy as np
8 | import sys
9 | import cv2
10 | import os
11 |
12 | old_graph_msg = 'Resolving old graph def {} (no guarantee)'
13 |
14 | def build_train_op(self):
15 | self.framework.loss(self.out)
16 | self.say('Building {} train op'.format(self.meta['model']))
17 | optimizer = self._TRAINER[self.FLAGS.trainer](self.FLAGS.lr)
18 | gradients = optimizer.compute_gradients(self.framework.loss)
19 | self.train_op = optimizer.apply_gradients(gradients)
20 |
21 | def load_from_ckpt(self):
22 | if self.FLAGS.load < 0: # load lastest ckpt
23 | with open(os.path.join(self.FLAGS.backup, 'checkpoint'), 'r') as f:
24 | last = f.readlines()[-1].strip()
25 | load_point = last.split(' ')[1]
26 | load_point = load_point.split('"')[1]
27 | load_point = load_point.split('-')[-1]
28 | self.FLAGS.load = int(load_point)
29 |
30 | load_point = os.path.join(self.FLAGS.backup, self.meta['name'])
31 | load_point = '{}-{}'.format(load_point, self.FLAGS.load)
32 | self.say('Loading from {}'.format(load_point))
33 | try: self.saver.restore(self.sess, load_point)
34 | except: load_old_graph(self, load_point)
35 |
36 | def say(self, *msgs):
37 | if not self.FLAGS.verbalise:
38 | return
39 | msgs = list(msgs)
40 | for msg in msgs:
41 | if msg is None: continue
42 | print(msg)
43 |
44 | def load_old_graph(self, ckpt):
45 | ckpt_loader = create_loader(ckpt)
46 | self.say(old_graph_msg.format(ckpt))
47 |
48 | for var in tf.global_variables():
49 | name = var.name.split(':')[0]
50 | args = [name, var.get_shape()]
51 | val = ckpt_loader(args)
52 | assert val is not None, \
53 | 'Cannot find and load {}'.format(var.name)
54 | shp = val.shape
55 | plh = tf.placeholder(tf.float32, shp)
56 | op = tf.assign(var, plh)
57 | self.sess.run(op, {plh: val})
58 |
59 | def _get_fps(self, frame):
60 | elapsed = int()
61 | start = timer()
62 | preprocessed = self.framework.preprocess(frame)
63 | feed_dict = {self.inp: [preprocessed]}
64 | net_out = self.sess.run(self.out, feed_dict)[0]
65 | processed = self.framework.postprocess(net_out, frame, False)
66 | return timer() - start
67 |
68 | def camera(self):
69 | file = self.FLAGS.demo
70 | SaveVideo = self.FLAGS.saveVideo
71 |
72 | if file == 'camera':
73 | file = 0
74 | else:
75 | assert os.path.isfile(file), \
76 | 'file {} does not exist'.format(file)
77 |
78 | camera = cv2.VideoCapture(file)
79 |
80 | if file == 0:
81 | self.say('Press [ESC] to quit demo')
82 |
83 | assert camera.isOpened(), \
84 | 'Cannot capture source'
85 |
86 | if file == 0:#camera window
87 | cv2.namedWindow('', 0)
88 | _, frame = camera.read()
89 | height, width, _ = frame.shape
90 | cv2.resizeWindow('', width, height)
91 | else:
92 | _, frame = camera.read()
93 | height, width, _ = frame.shape
94 |
95 | if SaveVideo:
96 | fourcc = cv2.VideoWriter_fourcc(*'XVID')
97 | if file == 0:#camera window
98 | fps = 1 / self._get_fps(frame)
99 | if fps < 1:
100 | fps = 1
101 | else:
102 | fps = round(camera.get(cv2.CAP_PROP_FPS))
103 | videoWriter = cv2.VideoWriter(
104 | 'video.avi', fourcc, fps, (width, height))
105 |
106 | # buffers for demo in batch
107 | buffer_inp = list()
108 | buffer_pre = list()
109 |
110 | elapsed = int()
111 | start = timer()
112 | self.say('Press [ESC] to quit demo')
113 | # Loop through frames
114 | while camera.isOpened():
115 | elapsed += 1
116 | _, frame = camera.read()
117 | if frame is None:
118 | print ('\nEnd of Video')
119 | break
120 | preprocessed = self.framework.preprocess(frame)
121 | buffer_inp.append(frame)
122 | buffer_pre.append(preprocessed)
123 |
124 | # Only process and imshow when queue is full
125 | if elapsed % self.FLAGS.queue == 0:
126 | feed_dict = {self.inp: buffer_pre}
127 | net_out = self.sess.run(self.out, feed_dict)
128 | for img, single_out in zip(buffer_inp, net_out):
129 | postprocessed = self.framework.postprocess(
130 | single_out, img, False)
131 | if SaveVideo:
132 | videoWriter.write(postprocessed)
133 | if file == 0: #camera window
134 | cv2.imshow('', postprocessed)
135 | # Clear Buffers
136 | buffer_inp = list()
137 | buffer_pre = list()
138 |
139 | if elapsed % 5 == 0:
140 | sys.stdout.write('\r')
141 | sys.stdout.write('{0:3.3f} FPS'.format(
142 | elapsed / (timer() - start)))
143 | sys.stdout.flush()
144 | if file == 0: #camera window
145 | choice = cv2.waitKey(1)
146 | if choice == 27: break
147 |
148 | sys.stdout.write('\n')
149 | if SaveVideo:
150 | videoWriter.release()
151 | camera.release()
152 | if file == 0: #camera window
153 | cv2.destroyAllWindows()
154 |
155 | def to_darknet(self):
156 | darknet_ckpt = self.darknet
157 |
158 | with self.graph.as_default() as g:
159 | for var in tf.global_variables():
160 | name = var.name.split(':')[0]
161 | var_name = name.split('-')
162 | l_idx = int(var_name[0])
163 | w_sig = var_name[1].split('/')[-1]
164 | l = darknet_ckpt.layers[l_idx]
165 | l.w[w_sig] = var.eval(self.sess)
166 |
167 | for layer in darknet_ckpt.layers:
168 | for ph in layer.h:
169 | layer.h[ph] = None
170 |
171 | return darknet_ckpt
172 |
--------------------------------------------------------------------------------
/darkflow/net/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .simple import *
2 | from .convolution import *
3 | from .baseop import HEADER, LINE
4 |
5 | op_types = {
6 | 'convolutional': convolutional,
7 | 'conv-select': conv_select,
8 | 'connected': connected,
9 | 'maxpool': maxpool,
10 | 'leaky': leaky,
11 | 'dropout': dropout,
12 | 'flatten': flatten,
13 | 'avgpool': avgpool,
14 | 'softmax': softmax,
15 | 'identity': identity,
16 | 'crop': crop,
17 | 'local': local,
18 | 'select': select,
19 | 'route': route,
20 | 'reorg': reorg,
21 | 'conv-extract': conv_extract,
22 | 'extract': extract
23 | }
24 |
25 | def op_create(*args):
26 | layer_type = list(args)[0].type
27 | return op_types[layer_type](*args)
--------------------------------------------------------------------------------
/darkflow/net/ops/baseop.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 |
4 | FORM = '{:>6} | {:>6} | {:<32} | {}'
5 | FORM_ = '{}+{}+{}+{}'
6 | LINE = FORM_.format('-'*7, '-'*8, '-'*34, '-'*15)
7 | HEADER = FORM.format(
8 | 'Source', 'Train?','Layer description', 'Output size')
9 |
10 | def _shape(tensor): # work for both tf.Tensor & np.ndarray
11 | if type(tensor) in [tf.Variable, tf.Tensor]:
12 | return tensor.get_shape()
13 | else: return tensor.shape
14 |
15 | def _name(tensor):
16 | return tensor.name.split(':')[0]
17 |
18 | class BaseOp(object):
19 | """
20 | BaseOp objects initialise with a darknet's `layer` object
21 | and input tensor of that layer `inp`, it calculates the
22 | output of this layer and place the result in self.out
23 | """
24 |
25 | # let slim take care of the following vars
26 | _SLIM = ['gamma', 'moving_mean', 'moving_variance']
27 |
28 | def __init__(self, layer, inp, num, roof, feed):
29 | self.inp = inp # BaseOp
30 | self.num = num # int
31 | self.out = None # tf.Tensor
32 | self.lay = layer
33 |
34 | self.scope = '{}-{}'.format(
35 | str(self.num), self.lay.type)
36 | self.gap = roof - self.num
37 | self.var = not self.gap > 0
38 | self.act = 'Load '
39 | self.convert(feed)
40 | if self.var: self.train_msg = 'Yep! '
41 | else: self.train_msg = 'Nope '
42 | self.forward()
43 |
44 | def convert(self, feed):
45 | """convert self.lay to variables & placeholders"""
46 | for var in self.lay.wshape:
47 | self.wrap_variable(var)
48 | for ph in self.lay.h:
49 | self.wrap_pholder(ph, feed)
50 |
51 | def wrap_variable(self, var):
52 | """wrap layer.w into variables"""
53 | val = self.lay.w.get(var, None)
54 | if val is None:
55 | shape = self.lay.wshape[var]
56 | args = [0., 1e-2, shape]
57 | if 'moving_mean' in var:
58 | val = np.zeros(shape)
59 | elif 'moving_variance' in var:
60 | val = np.ones(shape)
61 | else:
62 | val = np.random.normal(*args)
63 | self.lay.w[var] = val.astype(np.float32)
64 | self.act = 'Init '
65 | if not self.var: return
66 |
67 | val = self.lay.w[var]
68 | self.lay.w[var] = tf.constant_initializer(val)
69 | if var in self._SLIM: return
70 | with tf.variable_scope(self.scope):
71 | self.lay.w[var] = tf.get_variable(var,
72 | shape = self.lay.wshape[var],
73 | dtype = tf.float32,
74 | initializer = self.lay.w[var])
75 |
76 | def wrap_pholder(self, ph, feed):
77 | """wrap layer.h into placeholders"""
78 | phtype = type(self.lay.h[ph])
79 | if phtype is not dict: return
80 |
81 | sig = '{}/{}'.format(self.scope, ph)
82 | val = self.lay.h[ph]
83 |
84 | self.lay.h[ph] = tf.placeholder_with_default(
85 | val['dfault'], val['shape'], name = sig)
86 | feed[self.lay.h[ph]] = val['feed']
87 |
88 | def verbalise(self): # console speaker
89 | msg = str()
90 | inp = _name(self.inp.out)
91 | if inp == 'input': \
92 | msg = FORM.format(
93 | '', '', 'input',
94 | _shape(self.inp.out)) + '\n'
95 | if not self.act: return msg
96 | return msg + FORM.format(
97 | self.act, self.train_msg,
98 | self.speak(), _shape(self.out))
99 |
100 | def speak(self): pass
--------------------------------------------------------------------------------
/darkflow/net/ops/convolution.py:
--------------------------------------------------------------------------------
1 | import tensorflow.contrib.slim as slim
2 | from .baseop import BaseOp
3 | import tensorflow as tf
4 | import numpy as np
5 |
6 | class reorg(BaseOp):
7 | def _forward(self):
8 | inp = self.inp.out
9 | shape = inp.get_shape().as_list()
10 | _, h, w, c = shape
11 | s = self.lay.stride
12 | out = list()
13 | for i in range(int(h/s)):
14 | row_i = list()
15 | for j in range(int(w/s)):
16 | si, sj = s * i, s * j
17 | boxij = inp[:, si: si+s, sj: sj+s,:]
18 | flatij = tf.reshape(boxij, [-1,1,1,c*s*s])
19 | row_i += [flatij]
20 | out += [tf.concat(row_i, 2)]
21 |
22 | self.out = tf.concat(out, 1)
23 |
24 | def forward(self):
25 | inp = self.inp.out
26 | s = self.lay.stride
27 | self.out = tf.extract_image_patches(
28 | inp, [1,s,s,1], [1,s,s,1], [1,1,1,1], 'VALID')
29 |
30 | def speak(self):
31 | args = [self.lay.stride] * 2
32 | msg = 'local flatten {}x{}'
33 | return msg.format(*args)
34 |
35 |
36 | class local(BaseOp):
37 | def forward(self):
38 | pad = [[self.lay.pad, self.lay.pad]] * 2;
39 | temp = tf.pad(self.inp.out, [[0, 0]] + pad + [[0, 0]])
40 |
41 | k = self.lay.w['kernels']
42 | ksz = self.lay.ksize
43 | half = int(ksz / 2)
44 | out = list()
45 | for i in range(self.lay.h_out):
46 | row_i = list()
47 | for j in range(self.lay.w_out):
48 | kij = k[i * self.lay.w_out + j]
49 | i_, j_ = i + 1 - half, j + 1 - half
50 | tij = temp[:, i_ : i_ + ksz, j_ : j_ + ksz,:]
51 | row_i.append(
52 | tf.nn.conv2d(tij, kij,
53 | padding = 'VALID',
54 | strides = [1] * 4))
55 | out += [tf.concat(row_i, 2)]
56 |
57 | self.out = tf.concat(out, 1)
58 |
59 | def speak(self):
60 | l = self.lay
61 | args = [l.ksize] * 2 + [l.pad] + [l.stride]
62 | args += [l.activation]
63 | msg = 'loca {}x{}p{}_{} {}'.format(*args)
64 | return msg
65 |
66 | class convolutional(BaseOp):
67 | def forward(self):
68 | pad = [[self.lay.pad, self.lay.pad]] * 2;
69 | temp = tf.pad(self.inp.out, [[0, 0]] + pad + [[0, 0]])
70 | temp = tf.nn.conv2d(temp, self.lay.w['kernel'], padding = 'VALID',
71 | name = self.scope, strides = [1] + [self.lay.stride] * 2 + [1])
72 | if self.lay.batch_norm:
73 | temp = self.batchnorm(self.lay, temp)
74 | self.out = tf.nn.bias_add(temp, self.lay.w['biases'])
75 |
76 | def batchnorm(self, layer, inp):
77 | if not self.var:
78 | temp = (inp - layer.w['moving_mean'])
79 | temp /= (np.sqrt(layer.w['moving_variance']) + 1e-5)
80 | temp *= layer.w['gamma']
81 | return temp
82 | else:
83 | args = dict({
84 | 'center' : False, 'scale' : True,
85 | 'epsilon': 1e-5, 'scope' : self.scope,
86 | 'updates_collections' : None,
87 | 'is_training': layer.h['is_training'],
88 | 'param_initializers': layer.w
89 | })
90 | return slim.batch_norm(inp, **args)
91 |
92 | def speak(self):
93 | l = self.lay
94 | args = [l.ksize] * 2 + [l.pad] + [l.stride]
95 | args += [l.batch_norm * '+bnorm']
96 | args += [l.activation]
97 | msg = 'conv {}x{}p{}_{} {} {}'.format(*args)
98 | return msg
99 |
100 | class conv_select(convolutional):
101 | def speak(self):
102 | l = self.lay
103 | args = [l.ksize] * 2 + [l.pad] + [l.stride]
104 | args += [l.batch_norm * '+bnorm']
105 | args += [l.activation]
106 | msg = 'sele {}x{}p{}_{} {} {}'.format(*args)
107 | return msg
108 |
109 | class conv_extract(convolutional):
110 | def speak(self):
111 | l = self.lay
112 | args = [l.ksize] * 2 + [l.pad] + [l.stride]
113 | args += [l.batch_norm * '+bnorm']
114 | args += [l.activation]
115 | msg = 'extr {}x{}p{}_{} {} {}'.format(*args)
116 | return msg
--------------------------------------------------------------------------------
/darkflow/net/ops/simple.py:
--------------------------------------------------------------------------------
1 | import tensorflow.contrib.slim as slim
2 | from .baseop import BaseOp
3 | import tensorflow as tf
4 | from distutils.version import StrictVersion
5 |
6 | class route(BaseOp):
7 | def forward(self):
8 | routes = self.lay.routes
9 | routes_out = list()
10 | for r in routes:
11 | this = self.inp
12 | while this.lay.number != r:
13 | this = this.inp
14 | assert this is not None, \
15 | 'Routing to non-existence {}'.format(r)
16 | routes_out += [this.out]
17 | self.out = tf.concat(routes_out, 3)
18 |
19 | def speak(self):
20 | msg = 'concat {}'
21 | return msg.format(self.lay.routes)
22 |
23 | class connected(BaseOp):
24 | def forward(self):
25 | self.out = tf.nn.xw_plus_b(
26 | self.inp.out,
27 | self.lay.w['weights'],
28 | self.lay.w['biases'],
29 | name = self.scope)
30 |
31 | def speak(self):
32 | layer = self.lay
33 | args = [layer.inp, layer.out]
34 | args += [layer.activation]
35 | msg = 'full {} x {} {}'
36 | return msg.format(*args)
37 |
38 | class select(connected):
39 | """a weird connected layer"""
40 | def speak(self):
41 | layer = self.lay
42 | args = [layer.inp, layer.out]
43 | args += [layer.activation]
44 | msg = 'sele {} x {} {}'
45 | return msg.format(*args)
46 |
47 | class extract(connected):
48 | """a weird connected layer"""
49 | def speak(self):
50 | layer = self.lay
51 | args = [len(layer.inp), len(layer.out)]
52 | args += [layer.activation]
53 | msg = 'extr {} x {} {}'
54 | return msg.format(*args)
55 |
56 | class flatten(BaseOp):
57 | def forward(self):
58 | temp = tf.transpose(
59 | self.inp.out, [0,3,1,2])
60 | self.out = slim.flatten(
61 | temp, scope = self.scope)
62 |
63 | def speak(self): return 'flat'
64 |
65 |
66 | class softmax(BaseOp):
67 | def forward(self):
68 | self.out = tf.nn.softmax(self.inp.out)
69 |
70 | def speak(self): return 'softmax()'
71 |
72 |
73 | class avgpool(BaseOp):
74 | def forward(self):
75 | self.out = tf.reduce_mean(
76 | self.inp.out, [1, 2],
77 | name = self.scope
78 | )
79 |
80 | def speak(self): return 'avgpool()'
81 |
82 |
83 | class dropout(BaseOp):
84 | def forward(self):
85 | if self.lay.h['pdrop'] is None:
86 | self.lay.h['pdrop'] = 1.0
87 | self.out = tf.nn.dropout(
88 | self.inp.out,
89 | self.lay.h['pdrop'],
90 | name = self.scope
91 | )
92 |
93 | def speak(self): return 'drop'
94 |
95 |
96 | class crop(BaseOp):
97 | def forward(self):
98 | self.out = self.inp.out * 2. - 1.
99 |
100 | def speak(self):
101 | return 'scale to (-1, 1)'
102 |
103 |
104 | class maxpool(BaseOp):
105 | def forward(self):
106 | self.out = tf.nn.max_pool(
107 | self.inp.out, padding = 'SAME',
108 | ksize = [1] + [self.lay.ksize]*2 + [1],
109 | strides = [1] + [self.lay.stride]*2 + [1],
110 | name = self.scope
111 | )
112 |
113 | def speak(self):
114 | l = self.lay
115 | return 'maxp {}x{}p{}_{}'.format(
116 | l.ksize, l.ksize, l.pad, l.stride)
117 |
118 |
119 | class leaky(BaseOp):
120 | def forward(self):
121 | self.out = tf.maximum(
122 | .1 * self.inp.out,
123 | self.inp.out,
124 | name = self.scope
125 | )
126 |
127 | def verbalise(self): pass
128 |
129 |
130 | class identity(BaseOp):
131 | def __init__(self, inp):
132 | self.inp = None
133 | self.out = inp
134 |
--------------------------------------------------------------------------------
/darkflow/net/vanilla/__init__.py:
--------------------------------------------------------------------------------
1 | from . import train
2 |
3 | def constructor(self, meta, FLAGS):
4 | self.meta, self.FLAGS = meta, FLAGS
--------------------------------------------------------------------------------
/darkflow/net/vanilla/train.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | _LOSS_TYPE = ['sse','l2', 'smooth',
4 | 'sparse', 'l1', 'softmax',
5 | 'svm', 'fisher']
6 |
7 | def loss(self, net_out):
8 | m = self.meta
9 | loss_type = self.meta['type']
10 | assert loss_type in _LOSS_TYPE, \
11 | 'Loss type {} not implemented'.format(loss_type)
12 |
13 | out = net_out
14 | out_shape = out.get_shape()
15 | out_dtype = out.dtype.base_dtype
16 | _truth = tf.placeholders(out_dtype, out_shape)
17 |
18 | self.placeholders = dict({
19 | 'truth': _truth
20 | })
21 |
22 | diff = _truth - out
23 | if loss_type in ['sse','12']:
24 | loss = tf.nn.l2_loss(diff)
25 |
26 | elif loss_type == ['smooth']:
27 | small = tf.cast(diff < 1, tf.float32)
28 | large = 1. - small
29 | l1_loss = tf.nn.l1_loss(tf.multiply(diff, large))
30 | l2_loss = tf.nn.l2_loss(tf.multiply(diff, small))
31 | loss = l1_loss + l2_loss
32 |
33 | elif loss_type in ['sparse', 'l1']:
34 | loss = l1_loss(diff)
35 |
36 | elif loss_type == 'softmax':
37 | loss = tf.nn.softmax_cross_entropy_with_logits(logits, y)
38 | loss = tf.reduce_mean(loss)
39 |
40 | elif loss_type == 'svm':
41 | assert 'train_size' in m, \
42 | 'Must specify'
43 | size = m['train_size']
44 | self.nu = tf.Variable(tf.ones([train_size, num_classes]))
45 |
--------------------------------------------------------------------------------
/darkflow/net/yolo/__init__.py:
--------------------------------------------------------------------------------
1 | from . import train
2 | from . import predict
3 | from . import data
4 | from . import misc
5 | import numpy as np
6 |
7 |
8 | """ YOLO framework __init__ equivalent"""
9 |
10 | def constructor(self, meta, FLAGS):
11 |
12 | def _to_color(indx, base):
13 | """ return (b, r, g) tuple"""
14 | base2 = base * base
15 | b = 2 - indx / base2
16 | r = 2 - (indx % base2) / base
17 | g = 2 - (indx % base2) % base
18 | return (b * 127, r * 127, g * 127)
19 | if 'labels' not in meta:
20 | misc.labels(meta, FLAGS) #We're not loading from a .pb so we do need to load the labels
21 | assert len(meta['labels']) == meta['classes'], (
22 | 'labels.txt and {} indicate' + ' '
23 | 'inconsistent class numbers'
24 | ).format(meta['model'])
25 |
26 | # assign a color for each label
27 | colors = list()
28 | base = int(np.ceil(pow(meta['classes'], 1./3)))
29 | for x in range(len(meta['labels'])):
30 | colors += [_to_color(x, base)]
31 | meta['colors'] = colors
32 | self.fetch = list()
33 | self.meta, self.FLAGS = meta, FLAGS
34 |
35 | # over-ride the threshold in meta if FLAGS has it.
36 | if FLAGS.threshold > 0.0:
37 | self.meta['thresh'] = FLAGS.threshold
--------------------------------------------------------------------------------
/darkflow/net/yolo/data.py:
--------------------------------------------------------------------------------
1 | from ...utils.pascal_voc_clean_xml import pascal_voc_clean_xml
2 | from numpy.random import permutation as perm
3 | from .predict import preprocess
4 | # from .misc import show
5 | from copy import deepcopy
6 | import pickle
7 | import numpy as np
8 | import os
9 |
10 | def parse(self, exclusive = False):
11 | meta = self.meta
12 | ext = '.parsed'
13 | ann = self.FLAGS.annotation
14 | if not os.path.isdir(ann):
15 | msg = 'Annotation directory not found {} .'
16 | exit('Error: {}'.format(msg.format(ann)))
17 | print('\n{} parsing {}'.format(meta['model'], ann))
18 | dumps = pascal_voc_clean_xml(ann, meta['labels'], exclusive)
19 | return dumps
20 |
21 |
22 | def _batch(self, chunk):
23 | """
24 | Takes a chunk of parsed annotations
25 | returns value for placeholders of net's
26 | input & loss layer correspond to this chunk
27 | """
28 | meta = self.meta
29 | S, B = meta['side'], meta['num']
30 | C, labels = meta['classes'], meta['labels']
31 |
32 | # preprocess
33 | jpg = chunk[0]; w, h, allobj_ = chunk[1]
34 | allobj = deepcopy(allobj_)
35 | path = os.path.join(self.FLAGS.dataset, jpg)
36 | img = self.preprocess(path, allobj)
37 |
38 | # Calculate regression target
39 | cellx = 1. * w / S
40 | celly = 1. * h / S
41 | for obj in allobj:
42 | centerx = .5*(obj[1]+obj[3]) #xmin, xmax
43 | centery = .5*(obj[2]+obj[4]) #ymin, ymax
44 | cx = centerx / cellx
45 | cy = centery / celly
46 | if cx >= S or cy >= S: return None, None
47 | obj[3] = float(obj[3]-obj[1]) / w
48 | obj[4] = float(obj[4]-obj[2]) / h
49 | obj[3] = np.sqrt(obj[3])
50 | obj[4] = np.sqrt(obj[4])
51 | obj[1] = cx - np.floor(cx) # centerx
52 | obj[2] = cy - np.floor(cy) # centery
53 | obj += [int(np.floor(cy) * S + np.floor(cx))]
54 |
55 | # show(im, allobj, S, w, h, cellx, celly) # unit test
56 |
57 | # Calculate placeholders' values
58 | probs = np.zeros([S*S,C])
59 | confs = np.zeros([S*S,B])
60 | coord = np.zeros([S*S,B,4])
61 | proid = np.zeros([S*S,C])
62 | prear = np.zeros([S*S,4])
63 | for obj in allobj:
64 | probs[obj[5], :] = [0.] * C
65 | probs[obj[5], labels.index(obj[0])] = 1.
66 | proid[obj[5], :] = [1] * C
67 | coord[obj[5], :, :] = [obj[1:5]] * B
68 | prear[obj[5],0] = obj[1] - obj[3]**2 * .5 * S # xleft
69 | prear[obj[5],1] = obj[2] - obj[4]**2 * .5 * S # yup
70 | prear[obj[5],2] = obj[1] + obj[3]**2 * .5 * S # xright
71 | prear[obj[5],3] = obj[2] + obj[4]**2 * .5 * S # ybot
72 | confs[obj[5], :] = [1.] * B
73 |
74 | # Finalise the placeholders' values
75 | upleft = np.expand_dims(prear[:,0:2], 1)
76 | botright = np.expand_dims(prear[:,2:4], 1)
77 | wh = botright - upleft;
78 | area = wh[:,:,0] * wh[:,:,1]
79 | upleft = np.concatenate([upleft] * B, 1)
80 | botright = np.concatenate([botright] * B, 1)
81 | areas = np.concatenate([area] * B, 1)
82 |
83 | # value for placeholder at input layer
84 | inp_feed_val = img
85 | # value for placeholder at loss layer
86 | loss_feed_val = {
87 | 'probs': probs, 'confs': confs,
88 | 'coord': coord, 'proid': proid,
89 | 'areas': areas, 'upleft': upleft,
90 | 'botright': botright
91 | }
92 |
93 | return inp_feed_val, loss_feed_val
94 |
95 | def shuffle(self):
96 | batch = self.FLAGS.batch
97 | data = self.parse()
98 | size = len(data)
99 |
100 | print('Dataset of {} instance(s)'.format(size))
101 | if batch > size: self.FLAGS.batch = batch = size
102 | batch_per_epoch = int(size / batch)
103 |
104 | for i in range(self.FLAGS.epoch):
105 | shuffle_idx = perm(np.arange(size))
106 | for b in range(batch_per_epoch):
107 | # yield these
108 | x_batch = list()
109 | feed_batch = dict()
110 |
111 | for j in range(b*batch, b*batch+batch):
112 | train_instance = data[shuffle_idx[j]]
113 | try:
114 | inp, new_feed = self._batch(train_instance)
115 | except ZeroDivisionError:
116 | print("This image's width or height are zeros: ", train_instance[0])
117 | print('train_instance:', train_instance)
118 | print('Please remove or fix it then try again.')
119 | raise
120 |
121 | if inp is None: continue
122 | x_batch += [np.expand_dims(inp, 0)]
123 |
124 | for key in new_feed:
125 | new = new_feed[key]
126 | old_feed = feed_batch.get(key,
127 | np.zeros((0,) + new.shape))
128 | feed_batch[key] = np.concatenate([
129 | old_feed, [new]
130 | ])
131 |
132 | x_batch = np.concatenate(x_batch, 0)
133 | yield x_batch, feed_batch
134 |
135 | print('Finish {} epoch(es)'.format(i + 1))
136 |
137 |
--------------------------------------------------------------------------------
/darkflow/net/yolo/misc.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import numpy as np
3 | import cv2
4 | import os
5 |
6 | labels20 = ["aeroplane", "bicycle", "bird", "boat", "bottle",
7 | "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
8 | "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
9 | "train", "tvmonitor"]
10 |
11 | # 8, 14, 15, 19
12 |
13 | voc_models = ['yolo-full', 'yolo-tiny', 'yolo-small', # <- v1
14 | 'yolov1', 'tiny-yolov1', # <- v1.1
15 | 'tiny-yolo-voc', 'yolo-voc'] # <- v2
16 |
17 | coco_models = ['tiny-coco', 'yolo-coco', # <- v1.1
18 | 'yolo', 'tiny-yolo'] # <- v2
19 |
20 | coco_names = 'coco.names'
21 | nine_names = '9k.names'
22 |
23 | def labels(meta, FLAGS):
24 | model = os.path.basename(meta['name'])
25 | if model in voc_models:
26 | print("Model has a VOC model name, loading VOC labels.")
27 | meta['labels'] = labels20
28 | else:
29 | file = FLAGS.labels
30 | if model in coco_models:
31 | print("Model has a coco model name, loading coco labels.")
32 | file = os.path.join(FLAGS.config, coco_names)
33 | elif model == 'yolo9000':
34 | print("Model has name yolo9000, loading yolo9000 labels.")
35 | file = os.path.join(FLAGS.config, nine_names)
36 | with open(file, 'r') as f:
37 | meta['labels'] = list()
38 | labs = [l.strip() for l in f.readlines()]
39 | for lab in labs:
40 | if lab == '----': break
41 | meta['labels'] += [lab]
42 | if len(meta['labels']) == 0:
43 | meta['labels'] = labels20
44 |
45 | def is_inp(self, name):
46 | return name.lower().endswith(('.jpg', '.jpeg', '.png'))
47 |
48 | def show(im, allobj, S, w, h, cellx, celly):
49 | for obj in allobj:
50 | a = obj[5] % S
51 | b = obj[5] // S
52 | cx = a + obj[1]
53 | cy = b + obj[2]
54 | centerx = cx * cellx
55 | centery = cy * celly
56 | ww = obj[3]**2 * w
57 | hh = obj[4]**2 * h
58 | cv2.rectangle(im,
59 | (int(centerx - ww/2), int(centery - hh/2)),
60 | (int(centerx + ww/2), int(centery + hh/2)),
61 | (0,0,255), 2)
62 | cv2.imshow('result', im)
63 | cv2.waitKey()
64 | cv2.destroyAllWindows()
65 |
66 | def show2(im, allobj):
67 | for obj in allobj:
68 | cv2.rectangle(im,
69 | (obj[1], obj[2]),
70 | (obj[3], obj[4]),
71 | (0,0,255),2)
72 | cv2.imshow('result', im)
73 | cv2.waitKey()
74 | cv2.destroyAllWindows()
75 |
76 |
77 | _MVA = .05
78 |
79 | def profile(self, net):
80 | pass
81 | # data = self.parse(exclusive = True)
82 | # size = len(data); batch = self.FLAGS.batch
83 | # all_inp_ = [x[0] for x in data]
84 | # net.say('Will cycle through {} examples {} times'.format(
85 | # len(all_inp_), net.FLAGS.epoch))
86 |
87 | # fetch = list(); mvave = list(); names = list();
88 | # this = net.top
89 | # conv_lay = ['convolutional', 'connected', 'local', 'conv-select']
90 | # while this.inp is not None:
91 | # if this.lay.type in conv_lay:
92 | # fetch = [this.out] + fetch
93 | # names = [this.lay.signature] + names
94 | # mvave = [None] + mvave
95 | # this = this.inp
96 | # print(names)
97 |
98 | # total = int(); allofthem = len(all_inp_) * net.FLAGS.epoch
99 | # batch = min(net.FLAGS.batch, len(all_inp_))
100 | # for count in range(net.FLAGS.epoch):
101 | # net.say('EPOCH {}'.format(count))
102 | # for j in range(len(all_inp_)/batch):
103 | # inp_feed = list(); new_all = list()
104 | # all_inp = all_inp_[j*batch: (j*batch+batch)]
105 | # for inp in all_inp:
106 | # new_all += [inp]
107 | # this_inp = os.path.join(net.FLAGS.dataset, inp)
108 | # this_inp = net.framework.preprocess(this_inp)
109 | # expanded = np.expand_dims(this_inp, 0)
110 | # inp_feed.append(expanded)
111 | # all_inp = new_all
112 | # feed_dict = {net.inp : np.concatenate(inp_feed, 0)}
113 | # out = net.sess.run(fetch, feed_dict)
114 |
115 | # for i, o in enumerate(out):
116 | # oi = out[i];
117 | # dim = len(oi.shape) - 1
118 | # ai = mvave[i];
119 | # mi = np.mean(oi, tuple(range(dim)))
120 | # vi = np.var(oi, tuple(range(dim)))
121 | # if ai is None: mvave[i] = [mi, vi]
122 | # elif 'banana ninja yada yada':
123 | # ai[0] = (1 - _MVA) * ai[0] + _MVA * mi
124 | # ai[1] = (1 - _MVA) * ai[1] + _MVA * vi
125 | # total += len(inp_feed)
126 | # net.say('{} / {} = {}%'.format(
127 | # total, allofthem, 100. * total / allofthem))
128 |
129 | # with open('profile', 'wb') as f:
130 | # pickle.dump([mvave], f, protocol = -1)
131 |
--------------------------------------------------------------------------------
/darkflow/net/yolo/predict.py:
--------------------------------------------------------------------------------
1 | from ...utils.im_transform import imcv2_recolor, imcv2_affine_trans
2 | from ...utils.box import BoundBox, box_iou, prob_compare
3 | import numpy as np
4 | import cv2
5 | import os
6 | import json
7 | from ...cython_utils.cy_yolo_findboxes import yolo_box_constructor
8 |
9 | def _fix(obj, dims, scale, offs):
10 | for i in range(1, 5):
11 | dim = dims[(i + 1) % 2]
12 | off = offs[(i + 1) % 2]
13 | obj[i] = int(obj[i] * scale - off)
14 | obj[i] = max(min(obj[i], dim), 0)
15 |
16 | def resize_input(self, im):
17 | h, w, c = self.meta['inp_size']
18 | imsz = cv2.resize(im, (w, h))
19 | imsz = imsz / 255.
20 | imsz = imsz[:,:,::-1]
21 | return imsz
22 |
23 | def process_box(self, b, h, w, threshold):
24 | max_indx = np.argmax(b.probs)
25 | max_prob = b.probs[max_indx]
26 | label = self.meta['labels'][max_indx]
27 | if max_prob > threshold:
28 | left = int ((b.x - b.w/2.) * w)
29 | right = int ((b.x + b.w/2.) * w)
30 | top = int ((b.y - b.h/2.) * h)
31 | bot = int ((b.y + b.h/2.) * h)
32 | if left < 0 : left = 0
33 | if right > w - 1: right = w - 1
34 | if top < 0 : top = 0
35 | if bot > h - 1: bot = h - 1
36 | mess = '{}'.format(label)
37 | return (left, right, top, bot, mess, max_indx, max_prob)
38 | return None
39 |
40 | def findboxes(self, net_out):
41 | meta, FLAGS = self.meta, self.FLAGS
42 | threshold = FLAGS.threshold
43 |
44 | boxes = []
45 | boxes = yolo_box_constructor(meta, net_out, threshold)
46 |
47 | return boxes
48 |
49 | def preprocess(self, im, allobj = None):
50 | """
51 | Takes an image, return it as a numpy tensor that is readily
52 | to be fed into tfnet. If there is an accompanied annotation (allobj),
53 | meaning this preprocessing is serving the train process, then this
54 | image will be transformed with random noise to augment training data,
55 | using scale, translation, flipping and recolor. The accompanied
56 | parsed annotation (allobj) will also be modified accordingly.
57 | """
58 | if type(im) is not np.ndarray:
59 | im = cv2.imread(im)
60 |
61 | if allobj is not None: # in training mode
62 | result = imcv2_affine_trans(im)
63 | im, dims, trans_param = result
64 | scale, offs, flip = trans_param
65 | for obj in allobj:
66 | _fix(obj, dims, scale, offs)
67 | if not flip: continue
68 | obj_1_ = obj[1]
69 | obj[1] = dims[0] - obj[3]
70 | obj[3] = dims[0] - obj_1_
71 | im = imcv2_recolor(im)
72 |
73 | im = self.resize_input(im)
74 | if allobj is None: return im
75 | return im#, np.array(im) # for unit testing
76 |
77 | def postprocess(self, net_out, im, save = True):
78 | """
79 | Takes net output, draw predictions, save to disk
80 | """
81 | meta, FLAGS = self.meta, self.FLAGS
82 | threshold = FLAGS.threshold
83 | colors, labels = meta['colors'], meta['labels']
84 |
85 | boxes = self.findboxes(net_out)
86 |
87 | if type(im) is not np.ndarray:
88 | imgcv = cv2.imread(im)
89 | else: imgcv = im
90 |
91 | h, w, _ = imgcv.shape
92 | resultsForJSON = []
93 | for b in boxes:
94 | boxResults = self.process_box(b, h, w, threshold)
95 | if boxResults is None:
96 | continue
97 | left, right, top, bot, mess, max_indx, confidence = boxResults
98 | thick = int((h + w) // 300)
99 | if self.FLAGS.json:
100 | resultsForJSON.append({"label": mess, "confidence": float('%.2f' % confidence), "topleft": {"x": left, "y": top}, "bottomright": {"x": right, "y": bot}})
101 | continue
102 |
103 | cv2.rectangle(imgcv,
104 | (left, top), (right, bot),
105 | self.meta['colors'][max_indx], thick)
106 | cv2.putText(
107 | imgcv, mess, (left, top - 12),
108 | 0, 1e-3 * h, self.meta['colors'][max_indx],
109 | thick // 3)
110 |
111 |
112 | if not save: return imgcv
113 |
114 | outfolder = os.path.join(self.FLAGS.imgdir, 'out')
115 | img_name = os.path.join(outfolder, os.path.basename(im))
116 | if self.FLAGS.json:
117 | textJSON = json.dumps(resultsForJSON)
118 | textFile = os.path.splitext(img_name)[0] + ".json"
119 | with open(textFile, 'w') as f:
120 | f.write(textJSON)
121 | return
122 |
123 | cv2.imwrite(img_name, imgcv)
124 |
--------------------------------------------------------------------------------
/darkflow/net/yolo/train.py:
--------------------------------------------------------------------------------
1 | import tensorflow.contrib.slim as slim
2 | import pickle
3 | import tensorflow as tf
4 | from .misc import show
5 | import numpy as np
6 | import os
7 |
8 | def loss(self, net_out):
9 | """
10 | Takes net.out and placeholders value
11 | returned in batch() func above,
12 | to build train_op and loss
13 | """
14 | # meta
15 | m = self.meta
16 | sprob = float(m['class_scale'])
17 | sconf = float(m['object_scale'])
18 | snoob = float(m['noobject_scale'])
19 | scoor = float(m['coord_scale'])
20 | S, B, C = m['side'], m['num'], m['classes']
21 | SS = S * S # number of grid cells
22 |
23 | print('{} loss hyper-parameters:'.format(m['model']))
24 | print('\tside = {}'.format(m['side']))
25 | print('\tbox = {}'.format(m['num']))
26 | print('\tclasses = {}'.format(m['classes']))
27 | print('\tscales = {}'.format([sprob, sconf, snoob, scoor]))
28 |
29 | size1 = [None, SS, C]
30 | size2 = [None, SS, B]
31 |
32 | # return the below placeholders
33 | _probs = tf.placeholder(tf.float32, size1)
34 | _confs = tf.placeholder(tf.float32, size2)
35 | _coord = tf.placeholder(tf.float32, size2 + [4])
36 | # weights term for L2 loss
37 | _proid = tf.placeholder(tf.float32, size1)
38 | # material calculating IOU
39 | _areas = tf.placeholder(tf.float32, size2)
40 | _upleft = tf.placeholder(tf.float32, size2 + [2])
41 | _botright = tf.placeholder(tf.float32, size2 + [2])
42 |
43 | self.placeholders = {
44 | 'probs':_probs, 'confs':_confs, 'coord':_coord, 'proid':_proid,
45 | 'areas':_areas, 'upleft':_upleft, 'botright':_botright
46 | }
47 |
48 | # Extract the coordinate prediction from net.out
49 | coords = net_out[:, SS * (C + B):]
50 | coords = tf.reshape(coords, [-1, SS, B, 4])
51 | wh = tf.pow(coords[:,:,:,2:4], 2) * S # unit: grid cell
52 | area_pred = wh[:,:,:,0] * wh[:,:,:,1] # unit: grid cell^2
53 | centers = coords[:,:,:,0:2] # [batch, SS, B, 2]
54 | floor = centers - (wh * .5) # [batch, SS, B, 2]
55 | ceil = centers + (wh * .5) # [batch, SS, B, 2]
56 |
57 | # calculate the intersection areas
58 | intersect_upleft = tf.maximum(floor, _upleft)
59 | intersect_botright = tf.minimum(ceil , _botright)
60 | intersect_wh = intersect_botright - intersect_upleft
61 | intersect_wh = tf.maximum(intersect_wh, 0.0)
62 | intersect = tf.multiply(intersect_wh[:,:,:,0], intersect_wh[:,:,:,1])
63 |
64 | # calculate the best IOU, set 0.0 confidence for worse boxes
65 | iou = tf.truediv(intersect, _areas + area_pred - intersect)
66 | best_box = tf.equal(iou, tf.reduce_max(iou, [2], True))
67 | best_box = tf.to_float(best_box)
68 | confs = tf.multiply(best_box, _confs)
69 |
70 | # take care of the weight terms
71 | conid = snoob * (1. - confs) + sconf * confs
72 | weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3)
73 | cooid = scoor * weight_coo
74 | proid = sprob * _proid
75 |
76 | # flatten 'em all
77 | probs = slim.flatten(_probs)
78 | proid = slim.flatten(proid)
79 | confs = slim.flatten(confs)
80 | conid = slim.flatten(conid)
81 | coord = slim.flatten(_coord)
82 | cooid = slim.flatten(cooid)
83 |
84 | self.fetch += [probs, confs, conid, cooid, proid]
85 | true = tf.concat([probs, confs, coord], 1)
86 | wght = tf.concat([proid, conid, cooid], 1)
87 | print('Building {} loss'.format(m['model']))
88 | loss = tf.pow(net_out - true, 2)
89 | loss = tf.multiply(loss, wght)
90 | loss = tf.reduce_sum(loss, 1)
91 | self.loss = .5 * tf.reduce_mean(loss)
92 | tf.summary.scalar('{} loss'.format(m['model']), self.loss)
93 |
--------------------------------------------------------------------------------
/darkflow/net/yolov2/__init__.py:
--------------------------------------------------------------------------------
1 | from . import train
2 | from . import predict
3 | from . import data
4 | from ..yolo import misc
5 | import numpy as np
6 |
--------------------------------------------------------------------------------
/darkflow/net/yolov2/data.py:
--------------------------------------------------------------------------------
1 | from ...utils.pascal_voc_clean_xml import pascal_voc_clean_xml
2 | from numpy.random import permutation as perm
3 | from ..yolo.predict import preprocess
4 | from ..yolo.data import shuffle
5 | from copy import deepcopy
6 | import pickle
7 | import numpy as np
8 | import os
9 |
10 | def _batch(self, chunk):
11 | """
12 | Takes a chunk of parsed annotations
13 | returns value for placeholders of net's
14 | input & loss layer correspond to this chunk
15 | """
16 | meta = self.meta
17 | labels = meta['labels']
18 |
19 | H, W, _ = meta['out_size']
20 | C, B = meta['classes'], meta['num']
21 | anchors = meta['anchors']
22 |
23 | # preprocess
24 | jpg = chunk[0]; w, h, allobj_ = chunk[1]
25 | allobj = deepcopy(allobj_)
26 | path = os.path.join(self.FLAGS.dataset, jpg)
27 | img = self.preprocess(path, allobj)
28 |
29 | # Calculate regression target
30 | cellx = 1. * w / W
31 | celly = 1. * h / H
32 | for obj in allobj:
33 | centerx = .5*(obj[1]+obj[3]) #xmin, xmax
34 | centery = .5*(obj[2]+obj[4]) #ymin, ymax
35 | cx = centerx / cellx
36 | cy = centery / celly
37 | if cx >= W or cy >= H: return None, None
38 | obj[3] = float(obj[3]-obj[1]) / w
39 | obj[4] = float(obj[4]-obj[2]) / h
40 | obj[3] = np.sqrt(obj[3])
41 | obj[4] = np.sqrt(obj[4])
42 | obj[1] = cx - np.floor(cx) # centerx
43 | obj[2] = cy - np.floor(cy) # centery
44 | obj += [int(np.floor(cy) * W + np.floor(cx))]
45 |
46 | # show(im, allobj, S, w, h, cellx, celly) # unit test
47 |
48 | # Calculate placeholders' values
49 | probs = np.zeros([H*W,B,C])
50 | confs = np.zeros([H*W,B])
51 | coord = np.zeros([H*W,B,4])
52 | proid = np.zeros([H*W,B,C])
53 | prear = np.zeros([H*W,4])
54 | for obj in allobj:
55 | probs[obj[5], :, :] = [[0.]*C] * B
56 | probs[obj[5], :, labels.index(obj[0])] = 1.
57 | proid[obj[5], :, :] = [[1.]*C] * B
58 | coord[obj[5], :, :] = [obj[1:5]] * B
59 | prear[obj[5],0] = obj[1] - obj[3]**2 * .5 * W # xleft
60 | prear[obj[5],1] = obj[2] - obj[4]**2 * .5 * H # yup
61 | prear[obj[5],2] = obj[1] + obj[3]**2 * .5 * W # xright
62 | prear[obj[5],3] = obj[2] + obj[4]**2 * .5 * H # ybot
63 | confs[obj[5], :] = [1.] * B
64 |
65 | # Finalise the placeholders' values
66 | upleft = np.expand_dims(prear[:,0:2], 1)
67 | botright = np.expand_dims(prear[:,2:4], 1)
68 | wh = botright - upleft;
69 | area = wh[:,:,0] * wh[:,:,1]
70 | upleft = np.concatenate([upleft] * B, 1)
71 | botright = np.concatenate([botright] * B, 1)
72 | areas = np.concatenate([area] * B, 1)
73 |
74 | # value for placeholder at input layer
75 | inp_feed_val = img
76 | # value for placeholder at loss layer
77 | loss_feed_val = {
78 | 'probs': probs, 'confs': confs,
79 | 'coord': coord, 'proid': proid,
80 | 'areas': areas, 'upleft': upleft,
81 | 'botright': botright
82 | }
83 |
84 | return inp_feed_val, loss_feed_val
85 |
86 |
--------------------------------------------------------------------------------
/darkflow/net/yolov2/predict.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import math
3 | import cv2
4 | import os
5 | import json
6 | #from scipy.special import expit
7 | #from utils.box import BoundBox, box_iou, prob_compare
8 | #from utils.box import prob_compare2, box_intersection
9 | from ...utils.box import BoundBox
10 | from ...cython_utils.cy_yolo2_findboxes import box_constructor
11 |
12 | def expit(x):
13 | return 1. / (1. + np.exp(-x))
14 |
15 | def _softmax(x):
16 | e_x = np.exp(x - np.max(x))
17 | out = e_x / e_x.sum()
18 | return out
19 |
20 | def findboxes(self, net_out):
21 | # meta
22 | meta = self.meta
23 | boxes = list()
24 | boxes=box_constructor(meta,net_out)
25 | return boxes
26 |
27 | def postprocess(self, net_out, im, save = True):
28 | """
29 | Takes net output, draw net_out, save to disk
30 | """
31 | boxes = self.findboxes(net_out)
32 |
33 | # meta
34 | meta = self.meta
35 | threshold = meta['thresh']
36 | colors = meta['colors']
37 | labels = meta['labels']
38 | if type(im) is not np.ndarray:
39 | imgcv = cv2.imread(im)
40 | else: imgcv = im
41 | h, w, _ = imgcv.shape
42 |
43 | resultsForJSON = []
44 | for b in boxes:
45 | boxResults = self.process_box(b, h, w, threshold)
46 | if boxResults is None:
47 | continue
48 | left, right, top, bot, mess, max_indx, confidence = boxResults
49 | thick = int((h + w) // 300)
50 | if self.FLAGS.json:
51 | resultsForJSON.append({"label": mess, "confidence": float('%.2f' % confidence), "topleft": {"x": left, "y": top}, "bottomright": {"x": right, "y": bot}})
52 | continue
53 |
54 | cv2.rectangle(imgcv,
55 | (left, top), (right, bot),
56 | colors[max_indx], thick)
57 | cv2.putText(imgcv, mess, (left, top - 12),
58 | 0, 1e-3 * h, colors[max_indx],thick//3)
59 |
60 | if not save: return imgcv
61 |
62 | outfolder = os.path.join(self.FLAGS.imgdir, 'out')
63 | img_name = os.path.join(outfolder, os.path.basename(im))
64 | if self.FLAGS.json:
65 | textJSON = json.dumps(resultsForJSON)
66 | textFile = os.path.splitext(img_name)[0] + ".json"
67 | with open(textFile, 'w') as f:
68 | f.write(textJSON)
69 | return
70 |
71 | cv2.imwrite(img_name, imgcv)
72 |
--------------------------------------------------------------------------------
/darkflow/net/yolov2/train.py:
--------------------------------------------------------------------------------
1 | import tensorflow.contrib.slim as slim
2 | import pickle
3 | import tensorflow as tf
4 | from ..yolo.misc import show
5 | import numpy as np
6 | import os
7 | import math
8 |
9 | def expit_tensor(x):
10 | return 1. / (1. + tf.exp(-x))
11 |
12 | def loss(self, net_out):
13 | """
14 | Takes net.out and placeholders value
15 | returned in batch() func above,
16 | to build train_op and loss
17 | """
18 | # meta
19 | m = self.meta
20 | sprob = float(m['class_scale'])
21 | sconf = float(m['object_scale'])
22 | snoob = float(m['noobject_scale'])
23 | scoor = float(m['coord_scale'])
24 | H, W, _ = m['out_size']
25 | B, C = m['num'], m['classes']
26 | HW = H * W # number of grid cells
27 | anchors = m['anchors']
28 |
29 | print('{} loss hyper-parameters:'.format(m['model']))
30 | print('\tH = {}'.format(H))
31 | print('\tW = {}'.format(W))
32 | print('\tbox = {}'.format(m['num']))
33 | print('\tclasses = {}'.format(m['classes']))
34 | print('\tscales = {}'.format([sprob, sconf, snoob, scoor]))
35 |
36 | size1 = [None, HW, B, C]
37 | size2 = [None, HW, B]
38 |
39 | # return the below placeholders
40 | _probs = tf.placeholder(tf.float32, size1)
41 | _confs = tf.placeholder(tf.float32, size2)
42 | _coord = tf.placeholder(tf.float32, size2 + [4])
43 | # weights term for L2 loss
44 | _proid = tf.placeholder(tf.float32, size1)
45 | # material calculating IOU
46 | _areas = tf.placeholder(tf.float32, size2)
47 | _upleft = tf.placeholder(tf.float32, size2 + [2])
48 | _botright = tf.placeholder(tf.float32, size2 + [2])
49 |
50 | self.placeholders = {
51 | 'probs':_probs, 'confs':_confs, 'coord':_coord, 'proid':_proid,
52 | 'areas':_areas, 'upleft':_upleft, 'botright':_botright
53 | }
54 |
55 | # Extract the coordinate prediction from net.out
56 | net_out_reshape = tf.reshape(net_out, [-1, H, W, B, (4 + 1 + C)])
57 | coords = net_out_reshape[:, :, :, :, :4]
58 | coords = tf.reshape(coords, [-1, H*W, B, 4])
59 | adjusted_coords_xy = expit_tensor(coords[:,:,:,0:2])
60 | adjusted_coords_wh = tf.sqrt(tf.exp(coords[:,:,:,2:4]) * np.reshape(anchors, [1, 1, B, 2]) / np.reshape([W, H], [1, 1, 1, 2]))
61 | coords = tf.concat([adjusted_coords_xy, adjusted_coords_wh], 3)
62 |
63 | adjusted_c = expit_tensor(net_out_reshape[:, :, :, :, 4])
64 | adjusted_c = tf.reshape(adjusted_c, [-1, H*W, B, 1])
65 |
66 | adjusted_prob = tf.nn.softmax(net_out_reshape[:, :, :, :, 5:])
67 | adjusted_prob = tf.reshape(adjusted_prob, [-1, H*W, B, C])
68 |
69 | adjusted_net_out = tf.concat([adjusted_coords_xy, adjusted_coords_wh, adjusted_c, adjusted_prob], 3)
70 |
71 | wh = tf.pow(coords[:,:,:,2:4], 2) * np.reshape([W, H], [1, 1, 1, 2])
72 | area_pred = wh[:,:,:,0] * wh[:,:,:,1]
73 | centers = coords[:,:,:,0:2]
74 | floor = centers - (wh * .5)
75 | ceil = centers + (wh * .5)
76 |
77 | # calculate the intersection areas
78 | intersect_upleft = tf.maximum(floor, _upleft)
79 | intersect_botright = tf.minimum(ceil , _botright)
80 | intersect_wh = intersect_botright - intersect_upleft
81 | intersect_wh = tf.maximum(intersect_wh, 0.0)
82 | intersect = tf.multiply(intersect_wh[:,:,:,0], intersect_wh[:,:,:,1])
83 |
84 | # calculate the best IOU, set 0.0 confidence for worse boxes
85 | iou = tf.truediv(intersect, _areas + area_pred - intersect)
86 | best_box = tf.equal(iou, tf.reduce_max(iou, [2], True))
87 | best_box = tf.to_float(best_box)
88 | confs = tf.multiply(best_box, _confs)
89 |
90 | # take care of the weight terms
91 | conid = snoob * (1. - confs) + sconf * confs
92 | weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3)
93 | cooid = scoor * weight_coo
94 | weight_pro = tf.concat(C * [tf.expand_dims(confs, -1)], 3)
95 | proid = sprob * weight_pro
96 |
97 | self.fetch += [_probs, confs, conid, cooid, proid]
98 | true = tf.concat([_coord, tf.expand_dims(confs, 3), _probs ], 3)
99 | wght = tf.concat([cooid, tf.expand_dims(conid, 3), proid ], 3)
100 |
101 | print('Building {} loss'.format(m['model']))
102 | loss = tf.pow(adjusted_net_out - true, 2)
103 | loss = tf.multiply(loss, wght)
104 | loss = tf.reshape(loss, [-1, H*W*B*(4 + 1 + C)])
105 | loss = tf.reduce_sum(loss, 1)
106 | self.loss = .5 * tf.reduce_mean(loss)
107 | tf.summary.scalar('{} loss'.format(m['model']), self.loss)
--------------------------------------------------------------------------------
/darkflow/utils/box.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class BoundBox:
4 | def __init__(self, classes):
5 | self.x, self.y = float(), float()
6 | self.w, self.h = float(), float()
7 | self.c = float()
8 | self.class_num = classes
9 | self.probs = np.zeros((classes,))
10 |
11 | def overlap(x1,w1,x2,w2):
12 | l1 = x1 - w1 / 2.;
13 | l2 = x2 - w2 / 2.;
14 | left = max(l1, l2)
15 | r1 = x1 + w1 / 2.;
16 | r2 = x2 + w2 / 2.;
17 | right = min(r1, r2)
18 | return right - left;
19 |
20 | def box_intersection(a, b):
21 | w = overlap(a.x, a.w, b.x, b.w);
22 | h = overlap(a.y, a.h, b.y, b.h);
23 | if w < 0 or h < 0: return 0;
24 | area = w * h;
25 | return area;
26 |
27 | def box_union(a, b):
28 | i = box_intersection(a, b);
29 | u = a.w * a.h + b.w * b.h - i;
30 | return u;
31 |
32 | def box_iou(a, b):
33 | return box_intersection(a, b) / box_union(a, b);
34 |
35 | def prob_compare(box):
36 | return box.probs[box.class_num]
37 |
38 | def prob_compare2(boxa, boxb):
39 | if (boxa.pi < boxb.pi):
40 | return 1
41 | elif(boxa.pi == boxb.pi):
42 | return 0
43 | else:
44 | return -1
--------------------------------------------------------------------------------
/darkflow/utils/im_transform.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 | def imcv2_recolor(im, a = .1):
5 | t = [np.random.uniform()]
6 | t += [np.random.uniform()]
7 | t += [np.random.uniform()]
8 | t = np.array(t) * 2. - 1.
9 |
10 | # random amplify each channel
11 | im = im * (1 + t * a)
12 | mx = 255. * (1 + a)
13 | up = np.random.uniform() * 2 - 1
14 | # im = np.power(im/mx, 1. + up * .5)
15 | im = cv2.pow(im/mx, 1. + up * .5)
16 | return np.array(im * 255., np.uint8)
17 |
18 | def imcv2_affine_trans(im):
19 | # Scale and translate
20 | h, w, c = im.shape
21 | scale = np.random.uniform() / 10. + 1.
22 | max_offx = (scale-1.) * w
23 | max_offy = (scale-1.) * h
24 | offx = int(np.random.uniform() * max_offx)
25 | offy = int(np.random.uniform() * max_offy)
26 |
27 | im = cv2.resize(im, (0,0), fx = scale, fy = scale)
28 | im = im[offy : (offy + h), offx : (offx + w)]
29 | flip = np.random.binomial(1, .5)
30 | if flip: im = cv2.flip(im, 1)
31 | return im, [w, h, c], [scale, [offx, offy], flip]
32 |
--------------------------------------------------------------------------------
/darkflow/utils/loader.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os
3 | from .. import dark
4 | import numpy as np
5 | from os.path import basename
6 |
7 | class loader(object):
8 | """
9 | interface to work with both .weights and .ckpt files
10 | in loading / recollecting / resolving mode
11 | """
12 | VAR_LAYER = ['convolutional', 'connected', 'local',
13 | 'select', 'conv-select',
14 | 'extract', 'conv-extract']
15 |
16 | def __init__(self, *args):
17 | self.src_key = list()
18 | self.vals = list()
19 | self.load(*args)
20 |
21 | def __call__(self, key):
22 | for idx in range(len(key)):
23 | val = self.find(key, idx)
24 | if val is not None: return val
25 | return None
26 |
27 | def find(self, key, idx):
28 | up_to = min(len(self.src_key), 4)
29 | for i in range(up_to):
30 | key_b = self.src_key[i]
31 | if key_b[idx:] == key[idx:]:
32 | return self.yields(i)
33 | return None
34 |
35 | def yields(self, idx):
36 | del self.src_key[idx]
37 | temp = self.vals[idx]
38 | del self.vals[idx]
39 | return temp
40 |
41 | class weights_loader(loader):
42 | """one who understands .weights files"""
43 |
44 | _W_ORDER = dict({ # order of param flattened into .weights file
45 | 'convolutional': [
46 | 'biases','gamma','moving_mean','moving_variance','kernel'
47 | ],
48 | 'connected': ['biases', 'weights'],
49 | 'local': ['biases', 'kernels']
50 | })
51 |
52 | def load(self, path, src_layers):
53 | self.src_layers = src_layers
54 | walker = weights_walker(path)
55 |
56 | for i, layer in enumerate(src_layers):
57 | if layer.type not in self.VAR_LAYER: continue
58 | self.src_key.append([layer])
59 |
60 | if walker.eof: new = None
61 | else:
62 | args = layer.signature
63 | new = dark.darknet.create_darkop(*args)
64 | self.vals.append(new)
65 |
66 | if new is None: continue
67 | order = self._W_ORDER[new.type]
68 | for par in order:
69 | if par not in new.wshape: continue
70 | val = walker.walk(new.wsize[par])
71 | new.w[par] = val
72 | new.finalize(walker.transpose)
73 |
74 | if walker.path is not None:
75 | assert walker.offset == walker.size, \
76 | 'expect {} bytes, found {}'.format(
77 | walker.offset, walker.size)
78 | print('Successfully identified {} bytes'.format(
79 | walker.offset))
80 |
81 | class checkpoint_loader(loader):
82 | """
83 | one who understands .ckpt files, very much
84 | """
85 | def load(self, ckpt, ignore):
86 | meta = ckpt + '.meta'
87 | with tf.Graph().as_default() as graph:
88 | with tf.Session().as_default() as sess:
89 | saver = tf.train.import_meta_graph(meta)
90 | saver.restore(sess, ckpt)
91 | for var in tf.global_variables():
92 | name = var.name.split(':')[0]
93 | packet = [name, var.get_shape().as_list()]
94 | self.src_key += [packet]
95 | self.vals += [var.eval(sess)]
96 |
97 | def create_loader(path, cfg = None):
98 | if path is None:
99 | load_type = weights_loader
100 | elif '.weights' in path:
101 | load_type = weights_loader
102 | else:
103 | load_type = checkpoint_loader
104 |
105 | return load_type(path, cfg)
106 |
107 | class weights_walker(object):
108 | """incremental reader of float32 binary files"""
109 | def __init__(self, path):
110 | self.eof = False # end of file
111 | self.path = path # current pos
112 | if path is None:
113 | self.eof = True
114 | return
115 | else:
116 | self.size = os.path.getsize(path)# save the path
117 | major, minor, revision, seen = np.memmap(path,
118 | shape = (), mode = 'r', offset = 0,
119 | dtype = '({})i4,'.format(4))
120 | self.transpose = major > 1000 or minor > 1000
121 | self.offset = 16
122 |
123 | def walk(self, size):
124 | if self.eof: return None
125 | end_point = self.offset + 4 * size
126 | assert end_point <= self.size, \
127 | 'Over-read {}'.format(self.path)
128 |
129 | float32_1D_array = np.memmap(
130 | self.path, shape = (), mode = 'r',
131 | offset = self.offset,
132 | dtype='({})float32,'.format(size)
133 | )
134 |
135 | self.offset = end_point
136 | if end_point == self.size:
137 | self.eof = True
138 | return float32_1D_array
139 |
140 | def model_name(file_path):
141 | file_name = basename(file_path)
142 | ext = str()
143 | if '.' in file_name: # exclude extension
144 | file_name = file_name.split('.')
145 | ext = file_name[-1]
146 | file_name = '.'.join(file_name[:-1])
147 | if ext == str() or ext == 'meta': # ckpt file
148 | file_name = file_name.split('-')
149 | num = int(file_name[-1])
150 | return '-'.join(file_name[:-1])
151 | if ext == 'weights':
152 | return file_name
--------------------------------------------------------------------------------
/darkflow/utils/pascal_voc_clean_xml.py:
--------------------------------------------------------------------------------
1 | """
2 | parse PASCAL VOC xml annotations
3 | """
4 |
5 | import os
6 | import sys
7 | import xml.etree.ElementTree as ET
8 | import glob
9 |
10 |
11 | def _pp(l): # pretty printing
12 | for i in l: print('{}: {}'.format(i,l[i]))
13 |
14 | def pascal_voc_clean_xml(ANN, pick, exclusive = False):
15 | print('Parsing for {} {}'.format(
16 | pick, 'exclusively' * int(exclusive)))
17 |
18 | dumps = list()
19 | cur_dir = os.getcwd()
20 | os.chdir(ANN)
21 | annotations = os.listdir('.')
22 | annotations = glob.glob(str(annotations)+'*.xml')
23 | size = len(annotations)
24 |
25 | for i, file in enumerate(annotations):
26 | # progress bar
27 | sys.stdout.write('\r')
28 | percentage = 1. * (i+1) / size
29 | progress = int(percentage * 20)
30 | bar_arg = [progress*'=', ' '*(19-progress), percentage*100]
31 | bar_arg += [file]
32 | sys.stdout.write('[{}>{}]{:.0f}% {}'.format(*bar_arg))
33 | sys.stdout.flush()
34 |
35 | # actual parsing
36 | in_file = open(file)
37 | tree=ET.parse(in_file)
38 | root = tree.getroot()
39 | jpg = str(root.find('filename').text)
40 | imsize = root.find('size')
41 | w = int(imsize.find('width').text)
42 | h = int(imsize.find('height').text)
43 | all = list()
44 |
45 | for obj in root.iter('object'):
46 | current = list()
47 | name = obj.find('name').text
48 | if name not in pick:
49 | continue
50 |
51 | xmlbox = obj.find('bndbox')
52 | xn = int(float(xmlbox.find('xmin').text))
53 | xx = int(float(xmlbox.find('xmax').text))
54 | yn = int(float(xmlbox.find('ymin').text))
55 | yx = int(float(xmlbox.find('ymax').text))
56 | current = [name,xn,yn,xx,yx]
57 | all += [current]
58 |
59 | add = [[jpg, [w, h, all]]]
60 | dumps += add
61 | in_file.close()
62 |
63 | # gather all stats
64 | stat = dict()
65 | for dump in dumps:
66 | all = dump[1][2]
67 | for current in all:
68 | if current[0] in pick:
69 | if current[0] in stat:
70 | stat[current[0]]+=1
71 | else:
72 | stat[current[0]] =1
73 |
74 | print('\nStatistics:')
75 | _pp(stat)
76 | print('Dataset size: {}'.format(len(dumps)))
77 |
78 | os.chdir(cur_dir)
79 | return dumps
--------------------------------------------------------------------------------
/darkflow/utils/process.py:
--------------------------------------------------------------------------------
1 | """
2 | WARNING: spaghetti code.
3 | """
4 |
5 | import numpy as np
6 | import pickle
7 | import os
8 |
9 | def parser(model):
10 | """
11 | Read the .cfg file to extract layers into `layers`
12 | as well as model-specific parameters into `meta`
13 | """
14 | def _parse(l, i = 1):
15 | return l.split('=')[i].strip()
16 |
17 | with open(model, 'rb') as f:
18 | lines = f.readlines()
19 |
20 | lines = [line.decode() for line in lines]
21 |
22 | meta = dict(); layers = list() # will contains layers' info
23 | h, w, c = [int()] * 3; layer = dict()
24 | for line in lines:
25 | line = line.strip()
26 | line = line.split('#')[0]
27 | if '[' in line:
28 | if layer != dict():
29 | if layer['type'] == '[net]':
30 | h = layer['height']
31 | w = layer['width']
32 | c = layer['channels']
33 | meta['net'] = layer
34 | else:
35 | if layer['type'] == '[crop]':
36 | h = layer['crop_height']
37 | w = layer['crop_width']
38 | layers += [layer]
39 | layer = {'type': line}
40 | else:
41 | try:
42 | i = float(_parse(line))
43 | if i == int(i): i = int(i)
44 | layer[line.split('=')[0].strip()] = i
45 | except:
46 | try:
47 | key = _parse(line, 0)
48 | val = _parse(line, 1)
49 | layer[key] = val
50 | except:
51 | 'banana ninja yadayada'
52 |
53 | meta.update(layer) # last layer contains meta info
54 | if 'anchors' in meta:
55 | splits = meta['anchors'].split(',')
56 | anchors = [float(x.strip()) for x in splits]
57 | meta['anchors'] = anchors
58 | meta['model'] = model # path to cfg, not model name
59 | meta['inp_size'] = [h, w, c]
60 | return layers, meta
61 |
62 | def cfg_yielder(model, binary):
63 | """
64 | yielding each layer information to initialize `layer`
65 | """
66 | layers, meta = parser(model); yield meta;
67 | h, w, c = meta['inp_size']; l = w * h * c
68 |
69 | # Start yielding
70 | flat = False # flag for 1st dense layer
71 | conv = '.conv.' in model
72 | for i, d in enumerate(layers):
73 | #-----------------------------------------------------
74 | if d['type'] == '[crop]':
75 | yield ['crop', i]
76 | #-----------------------------------------------------
77 | elif d['type'] == '[local]':
78 | n = d.get('filters', 1)
79 | size = d.get('size', 1)
80 | stride = d.get('stride', 1)
81 | pad = d.get('pad', 0)
82 | activation = d.get('activation', 'logistic')
83 | w_ = (w - 1 - (1 - pad) * (size - 1)) // stride + 1
84 | h_ = (h - 1 - (1 - pad) * (size - 1)) // stride + 1
85 | yield ['local', i, size, c, n, stride,
86 | pad, w_, h_, activation]
87 | if activation != 'linear': yield [activation, i]
88 | w, h, c = w_, h_, n
89 | l = w * h * c
90 | #-----------------------------------------------------
91 | elif d['type'] == '[convolutional]':
92 | n = d.get('filters', 1)
93 | size = d.get('size', 1)
94 | stride = d.get('stride', 1)
95 | pad = d.get('pad', 0)
96 | padding = d.get('padding', 0)
97 | if pad: padding = size // 2
98 | activation = d.get('activation', 'logistic')
99 | batch_norm = d.get('batch_normalize', 0) or conv
100 | yield ['convolutional', i, size, c, n,
101 | stride, padding, batch_norm,
102 | activation]
103 | if activation != 'linear': yield [activation, i]
104 | w_ = (w + 2 * padding - size) // stride + 1
105 | h_ = (h + 2 * padding - size) // stride + 1
106 | w, h, c = w_, h_, n
107 | l = w * h * c
108 | #-----------------------------------------------------
109 | elif d['type'] == '[maxpool]':
110 | stride = d.get('stride', 1)
111 | size = d.get('size', stride)
112 | padding = d.get('padding', (size-1) // 2)
113 | yield ['maxpool', i, size, stride, padding]
114 | w_ = (w + 2*padding) // d['stride']
115 | h_ = (h + 2*padding) // d['stride']
116 | w, h = w_, h_
117 | l = w * h * c
118 | #-----------------------------------------------------
119 | elif d['type'] == '[avgpool]':
120 | flat = True; l = c
121 | yield ['avgpool', i]
122 | #-----------------------------------------------------
123 | elif d['type'] == '[softmax]':
124 | yield ['softmax', i, d['groups']]
125 | #-----------------------------------------------------
126 | elif d['type'] == '[connected]':
127 | if not flat:
128 | yield ['flatten', i]
129 | flat = True
130 | activation = d.get('activation', 'logistic')
131 | yield ['connected', i, l, d['output'], activation]
132 | if activation != 'linear': yield [activation, i]
133 | l = d['output']
134 | #-----------------------------------------------------
135 | elif d['type'] == '[dropout]':
136 | yield ['dropout', i, d['probability']]
137 | #-----------------------------------------------------
138 | elif d['type'] == '[select]':
139 | if not flat:
140 | yield ['flatten', i]
141 | flat = True
142 | inp = d.get('input', None)
143 | if type(inp) is str:
144 | file = inp.split(',')[0]
145 | layer_num = int(inp.split(',')[1])
146 | with open(file, 'rb') as f:
147 | profiles = pickle.load(f, encoding = 'latin1')[0]
148 | layer = profiles[layer_num]
149 | else: layer = inp
150 | activation = d.get('activation', 'logistic')
151 | d['keep'] = d['keep'].split('/')
152 | classes = int(d['keep'][-1])
153 | keep = [int(c) for c in d['keep'][0].split(',')]
154 | keep_n = len(keep)
155 | train_from = classes * d['bins']
156 | for count in range(d['bins']-1):
157 | for num in keep[-keep_n:]:
158 | keep += [num + classes]
159 | k = 1
160 | while layers[i-k]['type'] not in ['[connected]', '[extract]']:
161 | k += 1
162 | if i-k < 0:
163 | break
164 | if i-k < 0: l_ = l
165 | elif layers[i-k]['type'] == 'connected':
166 | l_ = layers[i-k]['output']
167 | else:
168 | l_ = layers[i-k].get('old',[l])[-1]
169 | yield ['select', i, l_, d['old_output'],
170 | activation, layer, d['output'],
171 | keep, train_from]
172 | if activation != 'linear': yield [activation, i]
173 | l = d['output']
174 | #-----------------------------------------------------
175 | elif d['type'] == '[conv-select]':
176 | n = d.get('filters', 1)
177 | size = d.get('size', 1)
178 | stride = d.get('stride', 1)
179 | pad = d.get('pad', 0)
180 | padding = d.get('padding', 0)
181 | if pad: padding = size // 2
182 | activation = d.get('activation', 'logistic')
183 | batch_norm = d.get('batch_normalize', 0) or conv
184 | d['keep'] = d['keep'].split('/')
185 | classes = int(d['keep'][-1])
186 | keep = [int(x) for x in d['keep'][0].split(',')]
187 |
188 | segment = classes + 5
189 | assert n % segment == 0, \
190 | 'conv-select: segment failed'
191 | bins = n // segment
192 | keep_idx = list()
193 | for j in range(bins):
194 | offset = j * segment
195 | for k in range(5):
196 | keep_idx += [offset + k]
197 | for k in keep:
198 | keep_idx += [offset + 5 + k]
199 | w_ = (w + 2 * padding - size) // stride + 1
200 | h_ = (h + 2 * padding - size) // stride + 1
201 | c_ = len(keep_idx)
202 | yield ['conv-select', i, size, c, n,
203 | stride, padding, batch_norm,
204 | activation, keep_idx, c_]
205 | w, h, c = w_, h_, c_
206 | l = w * h * c
207 | #-----------------------------------------------------
208 | elif d['type'] == '[conv-extract]':
209 | file = d['profile']
210 | with open(file, 'rb') as f:
211 | profiles = pickle.load(f, encoding = 'latin1')[0]
212 | inp_layer = None
213 | inp = d['input']
214 | out = d['output']
215 | inp_layer = None
216 | if inp >= 0:
217 | inp_layer = profiles[inp]
218 | if inp_layer is not None:
219 | assert len(inp_layer) == c, \
220 | 'Conv-extract does not match input dimension'
221 | out_layer = profiles[out]
222 |
223 | n = d.get('filters', 1)
224 | size = d.get('size', 1)
225 | stride = d.get('stride', 1)
226 | pad = d.get('pad', 0)
227 | padding = d.get('padding', 0)
228 | if pad: padding = size // 2
229 | activation = d.get('activation', 'logistic')
230 | batch_norm = d.get('batch_normalize', 0) or conv
231 |
232 | k = 1
233 | find = ['[convolutional]','[conv-extract]']
234 | while layers[i-k]['type'] not in find:
235 | k += 1
236 | if i-k < 0: break
237 | if i-k >= 0:
238 | previous_layer = layers[i-k]
239 | c_ = previous_layer['filters']
240 | else:
241 | c_ = c
242 |
243 | yield ['conv-extract', i, size, c_, n,
244 | stride, padding, batch_norm,
245 | activation, inp_layer, out_layer]
246 | if activation != 'linear': yield [activation, i]
247 | w_ = (w + 2 * padding - size) // stride + 1
248 | h_ = (h + 2 * padding - size) // stride + 1
249 | w, h, c = w_, h_, len(out_layer)
250 | l = w * h * c
251 | #-----------------------------------------------------
252 | elif d['type'] == '[extract]':
253 | if not flat:
254 | yield['flatten', i]
255 | flat = True
256 | activation = d.get('activation', 'logistic')
257 | file = d['profile']
258 | with open(file, 'rb') as f:
259 | profiles = pickle.load(f, encoding = 'latin1')[0]
260 | inp_layer = None
261 | inp = d['input']
262 | out = d['output']
263 | if inp >= 0:
264 | inp_layer = profiles[inp]
265 | out_layer = profiles[out]
266 | old = d['old']
267 | old = [int(x) for x in old.split(',')]
268 | if inp_layer is not None:
269 | if len(old) > 2:
270 | h_, w_, c_, n_ = old
271 | new_inp = list()
272 | for p in range(c_):
273 | for q in range(h_):
274 | for r in range(w_):
275 | if p not in inp_layer:
276 | continue
277 | new_inp += [r + w*(q + h*p)]
278 | inp_layer = new_inp
279 | old = [h_ * w_ * c_, n_]
280 | assert len(inp_layer) == l, \
281 | 'Extract does not match input dimension'
282 | d['old'] = old
283 | yield ['extract', i] + old + [activation] + [inp_layer, out_layer]
284 | if activation != 'linear': yield [activation, i]
285 | l = len(out_layer)
286 | #-----------------------------------------------------
287 | elif d['type'] == '[route]': # add new layer here
288 | routes = d['layers']
289 | if type(routes) is int:
290 | routes = [routes]
291 | else:
292 | routes = [int(x.strip()) for x in routes.split(',')]
293 | routes = [i + x if x < 0 else x for x in routes]
294 | for j, x in enumerate(routes):
295 | lx = layers[x];
296 | xtype = lx['type']
297 | _size = lx['_size'][:3]
298 | if j == 0:
299 | h, w, c = _size
300 | else:
301 | h_, w_, c_ = _size
302 | assert w_ == w and h_ == h, \
303 | 'Routing incompatible conv sizes'
304 | c += c_
305 | yield ['route', i, routes]
306 | l = w * h * c
307 | #-----------------------------------------------------
308 | elif d['type'] == '[reorg]':
309 | stride = d.get('stride', 1)
310 | yield ['reorg', i, stride]
311 | w = w // stride; h = h // stride;
312 | c = c * (stride ** 2)
313 | l = w * h * c
314 | #-----------------------------------------------------
315 | else:
316 | exit('Layer {} not implemented'.format(d['type']))
317 |
318 | d['_size'] = list([h, w, c, l, flat])
319 |
320 | if not flat: meta['out_size'] = [h, w, c]
321 | else: meta['out_size'] = l
--------------------------------------------------------------------------------
/darkflow/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.0.0'
2 | """Current version of darkflow."""
--------------------------------------------------------------------------------
/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/demo.gif
--------------------------------------------------------------------------------
/flow:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 |
3 | import sys
4 | from darkflow.cli import cliHandler
5 |
6 | cliHandler(sys.argv)
7 |
8 |
--------------------------------------------------------------------------------
/labels.txt:
--------------------------------------------------------------------------------
1 | aeroplane
2 | bicycle
3 | bird
4 | boat
--------------------------------------------------------------------------------
/preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/preview.png
--------------------------------------------------------------------------------
/sample_img/Thumbs.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/Thumbs.db
--------------------------------------------------------------------------------
/sample_img/sample_computer.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_computer.jpg
--------------------------------------------------------------------------------
/sample_img/sample_dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_dog.jpg
--------------------------------------------------------------------------------
/sample_img/sample_eagle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_eagle.jpg
--------------------------------------------------------------------------------
/sample_img/sample_giraffe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_giraffe.jpg
--------------------------------------------------------------------------------
/sample_img/sample_horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_horses.jpg
--------------------------------------------------------------------------------
/sample_img/sample_office.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_office.jpg
--------------------------------------------------------------------------------
/sample_img/sample_person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_person.jpg
--------------------------------------------------------------------------------
/sample_img/sample_scream.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/sample_img/sample_scream.jpg
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | from setuptools.extension import Extension
3 | from Cython.Build import cythonize
4 | import numpy
5 | import os
6 | import imp
7 |
8 | VERSION = imp.load_source('version', os.path.join('.', 'darkflow', 'version.py'))
9 | VERSION = VERSION.__version__
10 |
11 | if os.name =='nt' :
12 | ext_modules=[
13 | Extension("darkflow.cython_utils.nms",
14 | sources=["darkflow/cython_utils/nms.pyx"],
15 | #libraries=["m"] # Unix-like specific
16 | include_dirs=[numpy.get_include()]
17 | ),
18 | Extension("darkflow.cython_utils.cy_yolo2_findboxes",
19 | sources=["darkflow/cython_utils/cy_yolo2_findboxes.pyx"],
20 | #libraries=["m"] # Unix-like specific
21 | include_dirs=[numpy.get_include()]
22 | ),
23 | Extension("darkflow.cython_utils.cy_yolo_findboxes",
24 | sources=["darkflow/cython_utils/cy_yolo_findboxes.pyx"],
25 | #libraries=["m"] # Unix-like specific
26 | include_dirs=[numpy.get_include()]
27 | )
28 | ]
29 |
30 | elif os.name =='posix' :
31 | ext_modules=[
32 | Extension("darkflow.cython_utils.nms",
33 | sources=["darkflow/cython_utils/nms.pyx"],
34 | libraries=["m"], # Unix-like specific
35 | include_dirs=[numpy.get_include()]
36 | ),
37 | Extension("darkflow.cython_utils.cy_yolo2_findboxes",
38 | sources=["darkflow/cython_utils/cy_yolo2_findboxes.pyx"],
39 | libraries=["m"], # Unix-like specific
40 | include_dirs=[numpy.get_include()]
41 | ),
42 | Extension("darkflow.cython_utils.cy_yolo_findboxes",
43 | sources=["darkflow/cython_utils/cy_yolo_findboxes.pyx"],
44 | libraries=["m"], # Unix-like specific
45 | include_dirs=[numpy.get_include()]
46 | )
47 | ]
48 |
49 | else :
50 | ext_modules=[
51 | Extension("darkflow.cython_utils.nms",
52 | sources=["darkflow/cython_utils/nms.pyx"],
53 | libraries=["m"] # Unix-like specific
54 | ),
55 | Extension("darkflow.cython_utils.cy_yolo2_findboxes",
56 | sources=["darkflow/cython_utils/cy_yolo2_findboxes.pyx"],
57 | libraries=["m"] # Unix-like specific
58 | ),
59 | Extension("darkflow.cython_utils.cy_yolo_findboxes",
60 | sources=["darkflow/cython_utils/cy_yolo_findboxes.pyx"],
61 | libraries=["m"] # Unix-like specific
62 | )
63 | ]
64 |
65 | setup(
66 | version=VERSION,
67 | name='darkflow',
68 | description='Darkflow',
69 | license='GPLv3',
70 | url='https://github.com/thtrieu/darkflow',
71 | packages = find_packages(),
72 | scripts = ['flow'],
73 | ext_modules = cythonize(ext_modules)
74 | )
--------------------------------------------------------------------------------
/test/requirements-testing.txt:
--------------------------------------------------------------------------------
1 | tensorflow==1.4.1
2 | pytest
3 | requests
4 | opencv-python
5 | numpy
6 | Cython
7 | codecov
8 | pytest-cov
--------------------------------------------------------------------------------
/test/training/annotations/1.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2007
3 | 1.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 | 336426776
9 |
10 |
11 | Elder Timothy Chaves
12 | Tim Chaves
13 |
14 |
15 | 500
16 | 375
17 | 3
18 |
19 | 0
20 |
32 |
44 |
45 |
--------------------------------------------------------------------------------
/test/training/annotations/2.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2007
3 | 2.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 | 329950741
9 |
10 |
11 | Lothar Lenz
12 | Lothar Lenz
13 |
14 |
15 | 500
16 | 332
17 | 3
18 |
19 | 0
20 |
32 |
44 |
45 |
--------------------------------------------------------------------------------
/test/training/images/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/test/training/images/1.jpg
--------------------------------------------------------------------------------
/test/training/images/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ashwini-Analytics/Pedestrian-Detection-using-Darkflow/73f3ca26b217b4d82cdf57da14a51facbeb3511f/test/training/images/2.jpg
--------------------------------------------------------------------------------
/vbb2voc.py:
--------------------------------------------------------------------------------
1 | import os, glob
2 | import cv2
3 | from scipy.io import loadmat
4 | from collections import defaultdict
5 | import numpy as np
6 | from lxml import etree, objectify
7 |
8 | def vbb_anno2dict(vbb_file, cam_id, person_types=None):
9 | """
10 | Parse caltech vbb annotation file to dict
11 | Args:
12 | vbb_file: input vbb file path
13 | cam_id: camera id
14 | person_types: list of person type that will be used (total 4 types: person, person-fa, person?, people).
15 | If None, all will be used:
16 | Return:
17 | Annotation info dict with filename as key and anno info as value
18 | """
19 | filename = os.path.splitext(os.path.basename(vbb_file))[0]
20 | annos = defaultdict(dict)
21 | vbb = loadmat(vbb_file)
22 | # object info in each frame: id, pos, occlusion, lock, posv
23 | objLists = vbb['A'][0][0][1][0]
24 | objLbl = [str(v[0]) for v in vbb['A'][0][0][4][0]]
25 | # person index
26 | if not person_types:
27 | person_types = ["person", "person-fa", "person?", "people"]
28 | person_index_list = [x for x in range(len(objLbl)) if objLbl[x] in person_types]
29 | for frame_id, obj in enumerate(objLists):
30 | if len(obj) > 0:
31 | frame_name = str(cam_id) + "_" + str(filename) + "_" + str(frame_id+1) + ".jpg"
32 | annos[frame_name] = defaultdict(list)
33 | annos[frame_name]["id"] = frame_name
34 | for fid, pos, occl in zip(obj['id'][0], obj['pos'][0], obj['occl'][0]):
35 | fid = int(fid[0][0]) - 1 # for matlab start from 1 not 0
36 | if not fid in person_index_list: # only use bbox whose label is given person type
37 | continue
38 | annos[frame_name]["label"] = objLbl[fid]
39 | pos = pos[0].tolist()
40 | occl = int(occl[0][0])
41 | annos[frame_name]["occlusion"].append(occl)
42 | annos[frame_name]["bbox"].append(pos)
43 | if not annos[frame_name]["bbox"]:
44 | del annos[frame_name]
45 | return annos
46 |
47 |
48 | def seq2img(annos, seq_file, outdir, cam_id):
49 | """
50 | Extract frames in seq files to given output directories
51 | Args:
52 | annos: annos dict returned from parsed vbb file
53 | seq_file: seq file path
54 | outdir: frame save dir
55 | cam_id: camera id
56 | Returns:
57 | camera captured image size
58 | """
59 | cap = cv2.VideoCapture(seq_file)
60 | index = 1
61 | # captured frame list
62 | v_id = os.path.splitext(os.path.basename(seq_file))[0]
63 | cap_frames_index = np.sort([int(os.path.splitext(id)[0].split("_")[2]) for id in annos.keys()])
64 | while True:
65 | ret, frame = cap.read()
66 | if ret:
67 | if not index in cap_frames_index:
68 | index += 1
69 | continue
70 | if not os.path.exists(outdir):
71 | os.makedirs(outdir)
72 | outname = os.path.join(outdir, str(cam_id)+"_"+v_id+"_"+str(index)+".jpg")
73 | print "Current frame: ", v_id, str(index)
74 | cv2.imwrite(outname, frame)
75 | height, width, _ = frame.shape
76 | else:
77 | break
78 | index += 1
79 | img_size = (width, height)
80 | return img_size
81 |
82 |
83 | def instance2xml_base(anno, img_size, bbox_type='xyxy'):
84 | """
85 | Parse annotation data to VOC XML format
86 | Args:
87 | anno: annotation info returned by vbb_anno2dict function
88 | img_size: camera captured image size
89 | bbox_type: bbox coordinate record format: xyxy (xmin, ymin, xmax, ymax); xywh (xmin, ymin, width, height)
90 | Returns:
91 | Annotation xml info tree
92 | """
93 | assert bbox_type in ['xyxy', 'xywh']
94 | E = objectify.ElementMaker(annotate=False)
95 | anno_tree = E.annotation(
96 | E.folder('VOC2014_instance/person'),
97 | E.filename(anno['id']),
98 | E.source(
99 | E.database('Caltech pedestrian'),
100 | E.annotation('Caltech pedestrian'),
101 | E.image('Caltech pedestrian'),
102 | E.url('None')
103 | ),
104 | E.size(
105 | E.width(img_size[0]),
106 | E.height(img_size[1]),
107 | E.depth(3)
108 | ),
109 | E.segmented(0),
110 | )
111 | for index, bbox in enumerate(anno['bbox']):
112 | bbox = [float(x) for x in bbox]
113 | if bbox_type == 'xyxy':
114 | xmin, ymin, w, h = bbox
115 | xmax = xmin+w
116 | ymax = ymin+h
117 | else:
118 | xmin, ymin, xmax, ymax = bbox
119 | E = objectify.ElementMaker(annotate=False)
120 | anno_tree.append(
121 | E.object(
122 | E.name(anno['label']),
123 | E.bndbox(
124 | E.xmin(xmin),
125 | E.ymin(ymin),
126 | E.xmax(xmax),
127 | E.ymax(ymax)
128 | ),
129 | E.difficult(0),
130 | E.occlusion(anno["occlusion"][index])
131 | )
132 | )
133 | return anno_tree
134 |
135 |
136 | def parse_anno_file(vbb_inputdir, seq_inputdir, vbb_outputdir, seq_outputdir, person_types=None):
137 | """
138 | Parse Caltech data stored in seq and vbb files to VOC xml format
139 | Args:
140 | vbb_inputdir: vbb file saved pth
141 | seq_inputdir: seq file saved path
142 | vbb_outputdir: vbb data converted xml file saved path
143 | seq_outputdir: seq data converted frame image file saved path
144 | person_types: list of person type that will be used (total 4 types: person, person-fa, person?, people).
145 | If None, all will be used:
146 | """
147 | # annotation sub-directories in hda annotation input directory
148 | assert os.path.exists(vbb_inputdir)
149 | sub_dirs = os.listdir(vbb_inputdir)
150 | for sub_dir in sub_dirs:
151 | print "Parsing annotations of camera: ", sub_dir
152 | cam_id = sub_dir
153 | vbb_files = glob.glob(os.path.join(vbb_inputdir, sub_dir, "*.vbb"))
154 | for vbb_file in vbb_files:
155 | annos = vbb_anno2dict(vbb_file, cam_id, person_types=person_types)
156 | if annos:
157 | vbb_outdir = os.path.join(vbb_outputdir, "annotations", sub_dir, "bbox")
158 | # extract frames from seq
159 | seq_file = os.path.join(seq_inputdir, sub_dir, os.path.splitext(os.path.basename(vbb_file))[0]+".seq")
160 | seq_outdir = os.path.join(seq_outputdir, sub_dir, "frame")
161 | if not os.path.exists(vbb_outdir):
162 | os.makedirs(vbb_outdir)
163 | if not os.path.exists(seq_outdir):
164 | os.makedirs(seq_outdir)
165 | img_size = seq2img(annos, seq_file, seq_outdir, cam_id)
166 | for filename, anno in sorted(annos.items(), key=lambda x: x[0]):
167 | if "bbox" in anno:
168 | anno_tree = instance2xml_base(anno, img_size)
169 | outfile = os.path.join(vbb_outdir, os.path.splitext(filename)[0]+".xml")
170 | print "Generating annotation xml file of picture: ", filename
171 | etree.ElementTree(anno_tree).write(outfile, pretty_print=True)
172 |
173 |
174 | def visualize_bbox(xml_file, img_file):
175 | import cv2
176 | tree = etree.parse(xml_file)
177 | # load image
178 | image = cv2.imread(img_file)
179 | # get bbox
180 | for bbox in tree.xpath('//bndbox'):
181 | coord = []
182 | for corner in bbox.getchildren():
183 | coord.append(int(float(corner.text)))
184 | # draw rectangle
185 | # coord = [int(x) for x in coord]
186 | image = cv2.rectangle(image, (coord[0], coord[1]), (coord[2], coord[3]), (0, 0, 255), 2)
187 | # visualize image
188 | cv2.imshow("test", image)
189 | cv2.waitKey(0)
190 |
191 |
192 | def main():
193 | seq_inputdir = "/startdt_data/caltech_pedestrian_dataset"
194 | vbb_inputdir = "/startdt_data/caltech_pedestrian_dataset/annotations"
195 | seq_outputdir = "/startdt_data/caltech_pedestrian_dataset"
196 | vbb_outputdir = "/startdt_data/caltech_pedestrian_dataset"
197 | person_types = ["person", "people"]
198 | parse_anno_file(vbb_inputdir, seq_inputdir, vbb_outputdir, seq_outputdir, person_types=person_types)
199 | # xml_file = "/startdt_data/caltech_pedestrian_dataset/annotations/set00/bbox/set00_V013_1511.xml"
200 | # img_file = "/startdt_data/caltech_pedestrian_dataset/set00/frame/set00_V013_1511.jpg"
201 | # visualize_bbox(xml_file, img_file)
202 |
203 |
204 | if __name__ == "__main__":
205 | main()
206 |
--------------------------------------------------------------------------------