├── LICENSE ├── README.md ├── build ├── lib.linux-x86_64-3.6 │ └── darkflow │ │ └── cython_utils │ │ ├── cy_yolo2_findboxes.cpython-36m-x86_64-linux-gnu.so │ │ ├── cy_yolo_findboxes.cpython-36m-x86_64-linux-gnu.so │ │ └── nms.cpython-36m-x86_64-linux-gnu.so └── temp.linux-x86_64-3.6 │ └── darkflow │ └── cython_utils │ ├── cy_yolo2_findboxes.o │ ├── cy_yolo_findboxes.o │ └── nms.o ├── cfg ├── coco.names ├── extraction.cfg ├── extraction.conv.cfg ├── tiny-yolo-4c.cfg ├── tiny-yolo-voc.cfg ├── tiny-yolo.cfg ├── v1.1 │ ├── person-bottle.cfg │ ├── tiny-coco.cfg │ ├── tiny-yolo-4c.cfg │ ├── tiny-yolov1.cfg │ ├── yolo-coco.cfg │ └── yolov1.cfg ├── v1 │ ├── tiny-old.profile │ ├── tiny.profile │ ├── yolo-2c.cfg │ ├── yolo-4c.cfg │ ├── yolo-full.cfg │ ├── yolo-small.cfg │ ├── yolo-tiny-extract.cfg │ ├── yolo-tiny-extract_.cfg │ ├── yolo-tiny.cfg │ └── yolo-tiny4c.cfg ├── yolo-voc.cfg ├── yolo.cfg ├── yolov2-tiny-voc-1c.cfg └── yolov2-tiny-voc.cfg ├── ckpt ├── checkpoint ├── yolov2-tiny-voc-1c-4000.index └── yolov2-tiny-voc-1c-4000.profile ├── darkflow ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── defaults.cpython-36.pyc │ └── version.cpython-36.pyc ├── cli.py ├── cython_utils │ ├── __pycache__ │ │ └── __init__.cpython-36.pyc │ ├── cy_yolo2_findboxes.c │ ├── cy_yolo2_findboxes.cpython-36m-x86_64-linux-gnu.so │ ├── cy_yolo2_findboxes.pyx │ ├── cy_yolo_findboxes.c │ ├── cy_yolo_findboxes.cpython-36m-x86_64-linux-gnu.so │ ├── cy_yolo_findboxes.pyx │ ├── nms.c │ ├── nms.cpython-36m-x86_64-linux-gnu.so │ ├── nms.pxd │ └── nms.pyx ├── dark │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── connected.cpython-36.pyc │ │ ├── convolution.cpython-36.pyc │ │ ├── darknet.cpython-36.pyc │ │ ├── darkop.cpython-36.pyc │ │ └── layer.cpython-36.pyc │ ├── connected.py │ ├── convolution.py │ ├── darknet.py │ ├── darkop.py │ └── layer.py ├── defaults.py ├── net │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── build.cpython-36.pyc │ │ ├── flow.cpython-36.pyc │ │ ├── framework.cpython-36.pyc │ │ └── help.cpython-36.pyc │ ├── build.py │ ├── flow.py │ ├── framework.py │ ├── help.py │ ├── ops │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── baseop.cpython-36.pyc │ │ │ ├── convolution.cpython-36.pyc │ │ │ └── simple.cpython-36.pyc │ │ ├── baseop.py │ │ ├── convolution.py │ │ └── simple.py │ ├── vanilla │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ └── train.cpython-36.pyc │ │ └── train.py │ ├── yolo │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── data.cpython-36.pyc │ │ │ ├── misc.cpython-36.pyc │ │ │ ├── predict.cpython-36.pyc │ │ │ └── train.cpython-36.pyc │ │ ├── data.py │ │ ├── misc.py │ │ ├── predict.py │ │ └── train.py │ └── yolov2 │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── data.cpython-36.pyc │ │ ├── predict.cpython-36.pyc │ │ └── train.cpython-36.pyc │ │ ├── data.py │ │ ├── predict.py │ │ └── train.py ├── utils │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── box.cpython-36.pyc │ │ ├── im_transform.cpython-36.pyc │ │ ├── loader.cpython-36.pyc │ │ ├── pascal_voc_clean_xml.cpython-36.pyc │ │ └── process.cpython-36.pyc │ ├── box.py │ ├── im_transform.py │ ├── loader.py │ ├── pascal_voc_clean_xml.py │ └── process.py └── version.py ├── data_augment.py ├── flow ├── issues.png ├── labels.txt ├── mrcnn ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── config.cpython-36.pyc │ ├── model.cpython-36.pyc │ └── utils.cpython-36.pyc ├── config.py ├── model.py ├── parallel_model.py ├── utils.py └── visualize.py ├── outputs ├── Screenshot from 2019-01-20 11-59-20.png ├── Screenshot from 2019-01-20 11-59-30.png ├── Screenshot from 2019-01-20 11-59-39.png ├── Screenshot from 2019-01-20 11-59-48.png ├── Screenshot from 2019-01-20 12-00-06.png ├── Screenshot from 2019-01-20 12-00-15.png ├── Screenshot from 2019-01-20 12-00-23.png ├── Screenshot from 2019-01-20 12-00-33.png ├── Screenshot from 2019-01-20 12-00-42.png ├── Screenshot from 2019-01-20 12-00-51.png ├── Screenshot from 2019-01-20 12-01-00.png ├── Screenshot from 2019-01-20 12-01-08.png ├── Screenshot from 2019-01-20 12-01-17.png ├── Screenshot from 2019-01-20 12-01-26.png ├── Screenshot from 2019-01-20 12-01-36.png └── Screenshot from 2019-01-20 12-01-44.png ├── pipeline.png ├── predict.py ├── setup.py ├── train_mask.py └── yolo_prediction.py /README.md: -------------------------------------------------------------------------------- 1 | # Occluded Object Detection Using Segmentation 2 | 3 | This project was submitted in iHack Hackathon at IIT Bombay 2019 4 | 5 | The whole idea of the project is to detect an object whether it is covered by any obstacle or clearly visible. We propose a solution using a combination of Deep-Mask and YOLOv2. 6 | 7 | YOLOv2 or such similar networks are good for complete object detection, whereas Mask-RCNN model is good in segment detection, therefore a combination of these can be used for detection of an occluded object. 8 | 9 | Incase where multiple objects exists overlapping each other, Depth images can be used to find which object is on top/front of another object. 10 | 11 | ## Issues 12 | 13 | - Traditional methods include object detection based on region-proposals and localization but they fail if object is occluded. 14 | 15 | - Using only Mask-RCNN for object detection may lead to multiple detection of same object. 16 | 17 |

18 | 19 | ## Pipeline 20 | 21 |

22 | 23 | ## Dataset 24 | 25 | Mask-RCNN and YOLOv2, both networks were trained on [Microsoft Kinet RGB-D](http://rgbd-dataset.cs.washington.edu/index.html) dataset containing 548 images of a Coffee Cup. 26 | 27 | ## Applications: 28 | 29 | - Relative distance of object from each other (3D Estimation) 30 | 31 | - Object Detection (Person detection) in crowded scenarios 32 | 33 | - Estimation of object count (For example how many people have attended an event based on crowd images) 34 | 35 | ## Usage: 36 | Download the Complete Project 37 | 38 | git clone https://github.com/jatinmandav/Mask-YOLO 39 | 40 | Navigate to Mask-YOLO Directory 41 | 42 | cd Mask-YOLO 43 | 44 | Download Mask-RCNN and YOLO trained weights from release and extract inside Mask-YOLO Directory. 45 | 46 | For Predicting on new image: 47 | 48 | usage: predict.py [-h] --weights /path/to/weights.h5 --image path or URL to 49 | image 50 | 51 | Detect coffee_cups with MaskRCNN+YOLO Network 52 | 53 | optional arguments: 54 | -h, --help show this help message and exit 55 | --weights /path/to/weights.h5 56 | Path to weights .h5 file 57 | --image path to image 58 | 59 | To Train on your custom dataset, prepare the dataset and use train_mask.py to train Mask-RCNN and follow ([Thtrieu's Darkflow](https://github.com/thtrieu/darkflow) 60 | 61 | usage: train_mask.py [-h] --dataset /path/to/coffee_cup/dataset/ --weights 62 | /path/to/weights.h5 [--logs /path/to/logs/] 63 | 64 | Train Mask R-CNN to detect Coffee Cup. 65 | 66 | optional arguments: 67 | -h, --help show this help message and exit 68 | --dataset /path/to/coffee_cup/dataset/ 69 | Directory of the coffee_cup dataset 70 | --weights /path/to/weights.h5 71 | Path to weights .h5 file or 'coco' 72 | --logs /path/to/logs/ 73 | Logs and checkpoints directory (default=logs/) 74 | 75 | 76 | This project is based on the work of [Thtrieu's Darkflow](https://github.com/thtrieu/darkflow) and [Matterport's Mask-RCNN](https://github.com/matterport/Mask_RCNN) 77 | 78 | 79 | ## Sample Outputs 80 | 81 |

82 | 83 |

84 | 85 |

86 | 87 |

88 | 89 |

90 | -------------------------------------------------------------------------------- /build/lib.linux-x86_64-3.6/darkflow/cython_utils/cy_yolo2_findboxes.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/build/lib.linux-x86_64-3.6/darkflow/cython_utils/cy_yolo2_findboxes.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /build/lib.linux-x86_64-3.6/darkflow/cython_utils/cy_yolo_findboxes.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/build/lib.linux-x86_64-3.6/darkflow/cython_utils/cy_yolo_findboxes.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /build/lib.linux-x86_64-3.6/darkflow/cython_utils/nms.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/build/lib.linux-x86_64-3.6/darkflow/cython_utils/nms.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /build/temp.linux-x86_64-3.6/darkflow/cython_utils/cy_yolo2_findboxes.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/build/temp.linux-x86_64-3.6/darkflow/cython_utils/cy_yolo2_findboxes.o -------------------------------------------------------------------------------- /build/temp.linux-x86_64-3.6/darkflow/cython_utils/cy_yolo_findboxes.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/build/temp.linux-x86_64-3.6/darkflow/cython_utils/cy_yolo_findboxes.o -------------------------------------------------------------------------------- /build/temp.linux-x86_64-3.6/darkflow/cython_utils/nms.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/build/temp.linux-x86_64-3.6/darkflow/cython_utils/nms.o -------------------------------------------------------------------------------- /cfg/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /cfg/extraction.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=224 5 | width=224 6 | max_crop=320 7 | channels=3 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | learning_rate=0.1 12 | policy=poly 13 | power=4 14 | max_batches=1600000 15 | 16 | [convolutional] 17 | batch_normalize=1 18 | filters=64 19 | size=7 20 | stride=2 21 | pad=1 22 | activation=leaky 23 | 24 | [maxpool] 25 | size=2 26 | stride=2 27 | 28 | [convolutional] 29 | batch_normalize=1 30 | filters=192 31 | size=3 32 | stride=1 33 | pad=1 34 | activation=leaky 35 | 36 | [maxpool] 37 | size=2 38 | stride=2 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=128 43 | size=1 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=256 51 | size=3 52 | stride=1 53 | pad=1 54 | activation=leaky 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=256 59 | size=1 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [convolutional] 65 | batch_normalize=1 66 | filters=512 67 | size=3 68 | stride=1 69 | pad=1 70 | activation=leaky 71 | 72 | [maxpool] 73 | size=2 74 | stride=2 75 | 76 | [convolutional] 77 | batch_normalize=1 78 | filters=256 79 | size=1 80 | stride=1 81 | pad=1 82 | activation=leaky 83 | 84 | [convolutional] 85 | batch_normalize=1 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=256 95 | size=1 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | filters=512 103 | size=3 104 | stride=1 105 | pad=1 106 | activation=leaky 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | batch_normalize=1 126 | filters=256 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | batch_normalize=1 134 | filters=512 135 | size=3 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [convolutional] 141 | batch_normalize=1 142 | filters=512 143 | size=1 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [convolutional] 149 | batch_normalize=1 150 | filters=1024 151 | size=3 152 | stride=1 153 | pad=1 154 | activation=leaky 155 | 156 | [maxpool] 157 | size=2 158 | stride=2 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=512 163 | size=1 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | batch_normalize=1 170 | filters=1024 171 | size=3 172 | stride=1 173 | pad=1 174 | activation=leaky 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=512 179 | size=1 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | [convolutional] 185 | batch_normalize=1 186 | filters=1024 187 | size=3 188 | stride=1 189 | pad=1 190 | activation=leaky 191 | 192 | [convolutional] 193 | filters=1000 194 | size=1 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [avgpool] 200 | 201 | [softmax] 202 | groups=1 203 | 204 | [cost] 205 | type=sse 206 | 207 | -------------------------------------------------------------------------------- /cfg/extraction.conv.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=256 5 | width=256 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.5 11 | policy=poly 12 | power=6 13 | max_batches=500000 14 | 15 | [convolutional] 16 | filters=64 17 | size=7 18 | stride=2 19 | pad=1 20 | activation=leaky 21 | 22 | [maxpool] 23 | size=2 24 | stride=2 25 | 26 | [convolutional] 27 | filters=192 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | filters=128 39 | size=1 40 | stride=1 41 | pad=1 42 | activation=leaky 43 | 44 | [convolutional] 45 | filters=256 46 | size=3 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | filters=256 53 | size=1 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [convolutional] 59 | filters=512 60 | size=3 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [maxpool] 66 | size=2 67 | stride=2 68 | 69 | [convolutional] 70 | filters=256 71 | size=1 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [convolutional] 77 | filters=512 78 | size=3 79 | stride=1 80 | pad=1 81 | activation=leaky 82 | 83 | [convolutional] 84 | filters=256 85 | size=1 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [convolutional] 91 | filters=512 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=256 99 | size=1 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [convolutional] 112 | filters=256 113 | size=1 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [convolutional] 119 | filters=512 120 | size=3 121 | stride=1 122 | pad=1 123 | activation=leaky 124 | 125 | [convolutional] 126 | filters=512 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | filters=1024 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [maxpool] 140 | size=2 141 | stride=2 142 | 143 | [convolutional] 144 | filters=512 145 | size=1 146 | stride=1 147 | pad=1 148 | activation=leaky 149 | 150 | [convolutional] 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | filters=512 159 | size=1 160 | stride=1 161 | pad=1 162 | activation=leaky 163 | 164 | [convolutional] 165 | filters=1024 166 | size=3 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [avgpool] 172 | 173 | [connected] 174 | output=1000 175 | activation=leaky 176 | 177 | [softmax] 178 | groups=1 179 | 180 | -------------------------------------------------------------------------------- /cfg/tiny-yolo-4c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 40100 16 | policy=steps 17 | steps=-1,100,20000,30000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=45 115 | activation=linear 116 | 117 | [region] 118 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 119 | bias_match=1 120 | classes=4 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh=.6 134 | random=1 135 | -------------------------------------------------------------------------------- /cfg/tiny-yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 40100 16 | policy=steps 17 | steps=-1,100,20000,30000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=125 115 | activation=linear 116 | 117 | [region] 118 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 119 | bias_match=1 120 | classes=20 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh = .5 134 | random=1 135 | -------------------------------------------------------------------------------- /cfg/tiny-yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 120000 16 | policy=steps 17 | steps=-1,100,80000,100000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=425 115 | activation=linear 116 | 117 | [region] 118 | anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741 119 | bias_match=1 120 | classes=80 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh = .6 134 | random=1 135 | -------------------------------------------------------------------------------- /cfg/v1.1/person-bottle.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | saturation=.75 11 | exposure=.75 12 | hue = .1 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,20000,30000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 40000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [select] 109 | old_output=1470 110 | keep=4,14/20 111 | bins=49 112 | output=588 113 | activation=linear 114 | 115 | [detection] 116 | classes=2 117 | coords=4 118 | rescore=1 119 | side=7 120 | num=2 121 | softmax=0 122 | sqrt=1 123 | jitter=.2 124 | 125 | object_scale=1 126 | noobject_scale=.5 127 | class_scale=1 128 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1.1/tiny-coco.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | hue = .1 11 | saturation=.75 12 | exposure=.75 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,100000,150000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 200000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [connected] 109 | output= 4655 110 | activation=linear 111 | 112 | [detection] 113 | classes=80 114 | coords=4 115 | rescore=1 116 | side=7 117 | num=3 118 | softmax=0 119 | sqrt=1 120 | jitter=.2 121 | 122 | object_scale=1 123 | noobject_scale=.5 124 | class_scale=1 125 | coord_scale=5 126 | -------------------------------------------------------------------------------- /cfg/v1.1/tiny-yolo-4c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | saturation=.75 11 | exposure=.75 12 | hue = .1 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,20000,30000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 40000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [select] 109 | old_output=1470 110 | keep=8,14,15,19/20 111 | bins=49 112 | output=686 113 | activation=linear 114 | 115 | [detection] 116 | classes=4 117 | coords=4 118 | rescore=1 119 | side=7 120 | num=2 121 | softmax=0 122 | sqrt=1 123 | jitter=.2 124 | 125 | object_scale=1 126 | noobject_scale=.5 127 | class_scale=1 128 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1.1/tiny-yolov1.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | saturation=.75 11 | exposure=.75 12 | hue = .1 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,20000,30000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 40000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | size=3 95 | stride=1 96 | pad=1 97 | filters=1024 98 | activation=leaky 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | size=3 103 | stride=1 104 | pad=1 105 | filters=256 106 | activation=leaky 107 | 108 | [connected] 109 | output= 1470 110 | activation=linear 111 | 112 | [detection] 113 | classes=20 114 | coords=4 115 | rescore=1 116 | side=7 117 | num=2 118 | softmax=0 119 | sqrt=1 120 | jitter=.2 121 | 122 | object_scale=1 123 | noobject_scale=.5 124 | class_scale=1 125 | coord_scale=5 126 | 127 | -------------------------------------------------------------------------------- /cfg/v1.1/yolo-coco.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=4 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | hue = .1 11 | saturation=.75 12 | exposure=.75 13 | 14 | learning_rate=0.0005 15 | policy=steps 16 | steps=200,400,600,800,100000,150000 17 | scales=2.5,2,2,2,.1,.1 18 | max_batches = 200000 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=64 23 | size=7 24 | stride=2 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=192 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=128 47 | size=1 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=256 63 | size=1 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=512 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=256 83 | size=1 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [convolutional] 89 | batch_normalize=1 90 | filters=512 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=leaky 95 | 96 | [convolutional] 97 | batch_normalize=1 98 | filters=256 99 | size=1 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | batch_normalize=1 106 | filters=512 107 | size=3 108 | stride=1 109 | pad=1 110 | activation=leaky 111 | 112 | [convolutional] 113 | batch_normalize=1 114 | filters=256 115 | size=1 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | batch_normalize=1 122 | filters=512 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [convolutional] 129 | batch_normalize=1 130 | filters=256 131 | size=1 132 | stride=1 133 | pad=1 134 | activation=leaky 135 | 136 | [convolutional] 137 | batch_normalize=1 138 | filters=512 139 | size=3 140 | stride=1 141 | pad=1 142 | activation=leaky 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=512 147 | size=1 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=1024 155 | size=3 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [maxpool] 161 | size=2 162 | stride=2 163 | 164 | [convolutional] 165 | batch_normalize=1 166 | filters=512 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | [convolutional] 173 | batch_normalize=1 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | [convolutional] 181 | batch_normalize=1 182 | filters=512 183 | size=1 184 | stride=1 185 | pad=1 186 | activation=leaky 187 | 188 | [convolutional] 189 | batch_normalize=1 190 | filters=1024 191 | size=3 192 | stride=1 193 | pad=1 194 | activation=leaky 195 | 196 | ####### 197 | 198 | [convolutional] 199 | batch_normalize=1 200 | size=3 201 | stride=1 202 | pad=1 203 | filters=1024 204 | activation=leaky 205 | 206 | [convolutional] 207 | batch_normalize=1 208 | size=3 209 | stride=2 210 | pad=1 211 | filters=1024 212 | activation=leaky 213 | 214 | [convolutional] 215 | batch_normalize=1 216 | size=3 217 | stride=1 218 | pad=1 219 | filters=1024 220 | activation=leaky 221 | 222 | [convolutional] 223 | batch_normalize=1 224 | size=3 225 | stride=1 226 | pad=1 227 | filters=1024 228 | activation=leaky 229 | 230 | [local] 231 | size=3 232 | stride=1 233 | pad=1 234 | filters=256 235 | activation=leaky 236 | 237 | [connected] 238 | output= 4655 239 | activation=linear 240 | 241 | [detection] 242 | classes=80 243 | coords=4 244 | rescore=1 245 | side=7 246 | num=3 247 | softmax=0 248 | sqrt=1 249 | jitter=.2 250 | 251 | object_scale=1 252 | noobject_scale=.5 253 | class_scale=1 254 | coord_scale=5 255 | 256 | -------------------------------------------------------------------------------- /cfg/v1.1/yolov1.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | saturation=1.5 10 | exposure=1.5 11 | hue=.1 12 | 13 | learning_rate=0.0005 14 | policy=steps 15 | steps=200,400,600,20000,30000 16 | scales=2.5,2,2,.1,.1 17 | max_batches = 40000 18 | 19 | [convolutional] 20 | batch_normalize=1 21 | filters=64 22 | size=7 23 | stride=2 24 | pad=1 25 | activation=leaky 26 | 27 | [maxpool] 28 | size=2 29 | stride=2 30 | 31 | [convolutional] 32 | batch_normalize=1 33 | filters=192 34 | size=3 35 | stride=1 36 | pad=1 37 | activation=leaky 38 | 39 | [maxpool] 40 | size=2 41 | stride=2 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=128 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=256 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=512 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | batch_normalize=1 81 | filters=256 82 | size=1 83 | stride=1 84 | pad=1 85 | activation=leaky 86 | 87 | [convolutional] 88 | batch_normalize=1 89 | filters=512 90 | size=3 91 | stride=1 92 | pad=1 93 | activation=leaky 94 | 95 | [convolutional] 96 | batch_normalize=1 97 | filters=256 98 | size=1 99 | stride=1 100 | pad=1 101 | activation=leaky 102 | 103 | [convolutional] 104 | batch_normalize=1 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [convolutional] 112 | batch_normalize=1 113 | filters=256 114 | size=1 115 | stride=1 116 | pad=1 117 | activation=leaky 118 | 119 | [convolutional] 120 | batch_normalize=1 121 | filters=512 122 | size=3 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | batch_normalize=1 129 | filters=256 130 | size=1 131 | stride=1 132 | pad=1 133 | activation=leaky 134 | 135 | [convolutional] 136 | batch_normalize=1 137 | filters=512 138 | size=3 139 | stride=1 140 | pad=1 141 | activation=leaky 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=512 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=1024 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [maxpool] 160 | size=2 161 | stride=2 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=512 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=1024 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [convolutional] 180 | batch_normalize=1 181 | filters=512 182 | size=1 183 | stride=1 184 | pad=1 185 | activation=leaky 186 | 187 | [convolutional] 188 | batch_normalize=1 189 | filters=1024 190 | size=3 191 | stride=1 192 | pad=1 193 | activation=leaky 194 | 195 | ####### 196 | 197 | [convolutional] 198 | batch_normalize=1 199 | size=3 200 | stride=1 201 | pad=1 202 | filters=1024 203 | activation=leaky 204 | 205 | [convolutional] 206 | batch_normalize=1 207 | size=3 208 | stride=2 209 | pad=1 210 | filters=1024 211 | activation=leaky 212 | 213 | [convolutional] 214 | batch_normalize=1 215 | size=3 216 | stride=1 217 | pad=1 218 | filters=1024 219 | activation=leaky 220 | 221 | [convolutional] 222 | batch_normalize=1 223 | size=3 224 | stride=1 225 | pad=1 226 | filters=1024 227 | activation=leaky 228 | 229 | [local] 230 | size=3 231 | stride=1 232 | pad=1 233 | filters=256 234 | activation=leaky 235 | 236 | [dropout] 237 | probability=.5 238 | 239 | [connected] 240 | output= 1715 241 | activation=linear 242 | 243 | [detection] 244 | classes=20 245 | coords=4 246 | rescore=1 247 | side=7 248 | num=3 249 | softmax=0 250 | sqrt=1 251 | jitter=.2 252 | 253 | object_scale=1 254 | noobject_scale=.5 255 | class_scale=1 256 | coord_scale=5 257 | 258 | -------------------------------------------------------------------------------- /cfg/v1/tiny-old.profile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/cfg/v1/tiny-old.profile -------------------------------------------------------------------------------- /cfg/v1/tiny.profile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/cfg/v1/tiny.profile -------------------------------------------------------------------------------- /cfg/v1/yolo-2c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=16 26 | size=3 27 | stride=1 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=32 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=64 48 | size=3 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [maxpool] 54 | size=2 55 | stride=2 56 | 57 | [convolutional] 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | filters=256 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=leaky 85 | 86 | [maxpool] 87 | size=2 88 | stride=2 89 | 90 | [convolutional] 91 | filters=1024 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=1024 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [connected] 112 | output=256 113 | activation=linear 114 | 115 | [connected] 116 | output=4096 117 | activation=leaky 118 | 119 | [dropout] 120 | probability=.5 121 | 122 | [select] 123 | old_output=1470 124 | keep=14,19/20 125 | bins=49 126 | output=588 127 | activation=linear 128 | 129 | [detection] 130 | classes=2 131 | coords=4 132 | rescore=1 133 | side=7 134 | num=2 135 | softmax=0 136 | sqrt=1 137 | jitter=.2 138 | object_scale=1 139 | noobject_scale=.5 140 | class_scale=1 141 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-4c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.001 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=4096 212 | activation=leaky 213 | 214 | [dropout] 215 | probability=.5 216 | 217 | [select] 218 | old_output=1470 219 | keep=8,14,15,19/20 220 | bins=49 221 | output=686 222 | activation=linear 223 | 224 | [detection] 225 | classes=4 226 | coords=4 227 | rescore=1 228 | side=7 229 | num=2 230 | softmax=0 231 | sqrt=1 232 | jitter=.2 233 | 234 | object_scale=1 235 | noobject_scale=.5 236 | class_scale=1 237 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-full.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.001 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=4096 212 | activation=leaky 213 | 214 | [dropout] 215 | probability=.5 216 | 217 | [connected] 218 | output= 1470 219 | activation=linear 220 | 221 | [detection] 222 | classes=20 223 | coords=4 224 | rescore=1 225 | side=7 226 | num=2 227 | softmax=0 228 | sqrt=1 229 | jitter=.2 230 | 231 | object_scale=1 232 | noobject_scale=.5 233 | class_scale=1 234 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-small.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.001 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=512 212 | activation=leaky 213 | 214 | [connected] 215 | output=4096 216 | activation=leaky 217 | 218 | [dropout] 219 | probability=.5 220 | 221 | [connected] 222 | output= 1470 223 | activation=linear 224 | 225 | [detection] 226 | classes=20 227 | coords=4 228 | rescore=1 229 | side=7 230 | num=2 231 | softmax=0 232 | sqrt=1 233 | jitter=.2 234 | 235 | object_scale=1 236 | noobject_scale=.5 237 | class_scale=1 238 | coord_scale=5 239 | 240 | -------------------------------------------------------------------------------- /cfg/v1/yolo-tiny-extract.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [conv-extract] 25 | profile=cfg/v1/tiny.profile 26 | input=-1 27 | output=0 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [maxpool] 35 | size=2 36 | stride=2 37 | 38 | [conv-extract] 39 | profile=cfg/v1/tiny.profile 40 | input=0 41 | output=1 42 | filters=32 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [maxpool] 49 | size=2 50 | stride=2 51 | 52 | [conv-extract] 53 | profile=cfg/v1/tiny.profile 54 | input=1 55 | output=2 56 | filters=64 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | [maxpool] 63 | size=2 64 | stride=2 65 | 66 | [conv-extract] 67 | profile=cfg/v1/tiny.profile 68 | input=2 69 | output=3 70 | filters=128 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [conv-extract] 81 | profile=cfg/v1/tiny.profile 82 | input=3 83 | output=4 84 | filters=256 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [maxpool] 91 | size=2 92 | stride=2 93 | 94 | [conv-extract] 95 | profile=cfg/v1/tiny.profile 96 | input=4 97 | output=5 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [maxpool] 105 | size=2 106 | stride=2 107 | 108 | [conv-extract] 109 | profile=cfg/v1/tiny.profile 110 | input=5 111 | output=6 112 | filters=1024 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [conv-extract] 119 | profile=cfg/v1/tiny.profile 120 | input=6 121 | output=7 122 | filters=1024 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [conv-extract] 129 | profile=cfg/v1/tiny.profile 130 | input=7 131 | output=8 132 | filters=1024 133 | size=3 134 | stride=1 135 | pad=1 136 | activation=leaky 137 | 138 | [extract] 139 | profile=cfg/v1/tiny.profile 140 | input=8 141 | output=9 142 | old=7,7,1024,256 143 | activation=linear 144 | 145 | [extract] 146 | profile=cfg/v1/tiny.profile 147 | input=9 148 | output=10 149 | old=256,4096 150 | activation=leaky 151 | 152 | [dropout] 153 | probability=1. 154 | 155 | [select] 156 | input=cfg/v1/tiny.profile,10 157 | old_output=1470 158 | keep=8,14,15,19/20 159 | bins=49 160 | output=686 161 | activation=linear 162 | 163 | [detection] 164 | classes=4 165 | coords=4 166 | rescore=1 167 | side=7 168 | num=2 169 | softmax=0 170 | sqrt=1 171 | jitter=.2 172 | object_scale=1 173 | noobject_scale=.5 174 | class_scale=1 175 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-tiny-extract_.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [conv-extract] 25 | profile=cfg/v1/tiny-old.profile 26 | input=-1 27 | output=0 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [maxpool] 35 | size=2 36 | stride=2 37 | 38 | [conv-extract] 39 | profile=cfg/v1/tiny-old.profile 40 | input=0 41 | output=1 42 | filters=32 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [maxpool] 49 | size=2 50 | stride=2 51 | 52 | [conv-extract] 53 | profile=cfg/v1/tiny-old.profile 54 | input=1 55 | output=2 56 | filters=64 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | [maxpool] 63 | size=2 64 | stride=2 65 | 66 | [conv-extract] 67 | profile=cfg/v1/tiny-old.profile 68 | input=2 69 | output=3 70 | filters=128 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [conv-extract] 81 | profile=cfg/v1/tiny-old.profile 82 | input=3 83 | output=4 84 | filters=256 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [maxpool] 91 | size=2 92 | stride=2 93 | 94 | [conv-extract] 95 | profile=cfg/v1/tiny-old.profile 96 | input=4 97 | output=5 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [maxpool] 105 | size=2 106 | stride=2 107 | 108 | [conv-extract] 109 | profile=cfg/v1/tiny-old.profile 110 | input=5 111 | output=6 112 | filters=1024 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [conv-extract] 119 | profile=cfg/v1/tiny-old.profile 120 | input=6 121 | output=7 122 | filters=1024 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [conv-extract] 129 | profile=cfg/v1/tiny-old.profile 130 | input=7 131 | output=8 132 | filters=1024 133 | size=3 134 | stride=1 135 | pad=1 136 | activation=leaky 137 | 138 | [extract] 139 | profile=cfg/v1/tiny-old.profile 140 | input=8 141 | output=9 142 | old=7,7,1024,256 143 | activation=linear 144 | 145 | [extract] 146 | profile=cfg/v1/tiny-old.profile 147 | input=9 148 | output=10 149 | old=256,4096 150 | activation=leaky 151 | 152 | [dropout] 153 | probability=1. 154 | 155 | [select] 156 | input=cfg/v1/tiny-old.profile,10 157 | old_output=1470 158 | keep=8,14,15,19/20 159 | bins=49 160 | output=686 161 | activation=linear 162 | 163 | [detection] 164 | classes=4 165 | coords=4 166 | rescore=1 167 | side=7 168 | num=2 169 | softmax=0 170 | sqrt=1 171 | jitter=.2 172 | object_scale=2.5 173 | noobject_scale=2 174 | class_scale=2.5 175 | coord_scale=5 176 | 177 | save=11250 -------------------------------------------------------------------------------- /cfg/v1/yolo-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=16 26 | size=3 27 | stride=1 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=32 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=64 48 | size=3 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [maxpool] 54 | size=2 55 | stride=2 56 | 57 | [convolutional] 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | filters=256 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=leaky 85 | 86 | [maxpool] 87 | size=2 88 | stride=2 89 | 90 | [convolutional] 91 | filters=1024 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=1024 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [connected] 112 | output=256 113 | activation=linear 114 | 115 | [connected] 116 | output=4096 117 | activation=leaky 118 | 119 | [dropout] 120 | probability=.5 121 | 122 | [connected] 123 | output= 1470 124 | activation=linear 125 | 126 | [detection] 127 | classes=20 128 | coords=4 129 | rescore=1 130 | side=7 131 | num=2 132 | softmax=0 133 | sqrt=1 134 | jitter=.2 135 | object_scale=1 136 | noobject_scale=.5 137 | class_scale=1 138 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/v1/yolo-tiny4c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=16 26 | size=3 27 | stride=1 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=32 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=64 48 | size=3 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [maxpool] 54 | size=2 55 | stride=2 56 | 57 | [convolutional] 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | filters=256 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=leaky 85 | 86 | [maxpool] 87 | size=2 88 | stride=2 89 | 90 | [convolutional] 91 | filters=1024 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=1024 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [connected] 112 | output=256 113 | activation=linear 114 | 115 | [connected] 116 | output=4096 117 | activation=leaky 118 | 119 | [dropout] 120 | probability=.5 121 | 122 | [select] 123 | old_output=1470 124 | keep=8,14,15,19/20 125 | bins=49 126 | output=686 127 | activation=linear 128 | 129 | [detection] 130 | classes=4 131 | coords=4 132 | rescore=1 133 | side=7 134 | num=2 135 | softmax=0 136 | sqrt=1 137 | jitter=.2 138 | object_scale=1 139 | noobject_scale=.5 140 | class_scale=1 141 | coord_scale=5 -------------------------------------------------------------------------------- /cfg/yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | height=416 5 | width=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.0001 15 | max_batches = 45000 16 | policy=steps 17 | steps=100,25000,35000 18 | scales=10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=32 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=64 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=128 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=64 55 | size=1 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=128 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [maxpool] 69 | size=2 70 | stride=2 71 | 72 | [convolutional] 73 | batch_normalize=1 74 | filters=256 75 | size=3 76 | stride=1 77 | pad=1 78 | activation=leaky 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=128 83 | size=1 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [convolutional] 89 | batch_normalize=1 90 | filters=256 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=leaky 95 | 96 | [maxpool] 97 | size=2 98 | stride=2 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | filters=512 103 | size=3 104 | stride=1 105 | pad=1 106 | activation=leaky 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | batch_normalize=1 126 | filters=256 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | batch_normalize=1 134 | filters=512 135 | size=3 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [maxpool] 141 | size=2 142 | stride=2 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=1024 147 | size=3 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=512 155 | size=1 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=1024 163 | size=3 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | batch_normalize=1 170 | filters=512 171 | size=1 172 | stride=1 173 | pad=1 174 | activation=leaky 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=1024 179 | size=3 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | 185 | ####### 186 | 187 | [convolutional] 188 | batch_normalize=1 189 | size=3 190 | stride=1 191 | pad=1 192 | filters=1024 193 | activation=leaky 194 | 195 | [convolutional] 196 | batch_normalize=1 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [route] 204 | layers=-9 205 | 206 | [reorg] 207 | stride=2 208 | 209 | [route] 210 | layers=-1,-3 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | size=3 215 | stride=1 216 | pad=1 217 | filters=1024 218 | activation=leaky 219 | 220 | [convolutional] 221 | size=1 222 | stride=1 223 | pad=1 224 | filters=125 225 | activation=linear 226 | 227 | [region] 228 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 229 | bias_match=1 230 | classes=20 231 | coords=4 232 | num=5 233 | softmax=1 234 | jitter=.2 235 | rescore=1 236 | 237 | object_scale=5 238 | noobject_scale=1 239 | class_scale=1 240 | coord_scale=1 241 | 242 | absolute=1 243 | thresh = .6 244 | random=0 245 | -------------------------------------------------------------------------------- /cfg/yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=425 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 243 | bias_match=1 244 | classes=80 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .1 258 | random=1 259 | -------------------------------------------------------------------------------- /cfg/yolov2-tiny-voc-1c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | max_batches = 40200 20 | policy=steps 21 | steps=-1,100,20000,30000 22 | scales=.1,10,.1,.1 23 | 24 | [convolutional] 25 | batch_normalize=1 26 | filters=16 27 | size=3 28 | stride=1 29 | pad=1 30 | activation=leaky 31 | 32 | [maxpool] 33 | size=2 34 | stride=2 35 | 36 | [convolutional] 37 | batch_normalize=1 38 | filters=32 39 | size=3 40 | stride=1 41 | pad=1 42 | activation=leaky 43 | 44 | [maxpool] 45 | size=2 46 | stride=2 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=64 51 | size=3 52 | stride=1 53 | pad=1 54 | activation=leaky 55 | 56 | [maxpool] 57 | size=2 58 | stride=2 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=128 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [maxpool] 69 | size=2 70 | stride=2 71 | 72 | [convolutional] 73 | batch_normalize=1 74 | filters=256 75 | size=3 76 | stride=1 77 | pad=1 78 | activation=leaky 79 | 80 | [maxpool] 81 | size=2 82 | stride=2 83 | 84 | [convolutional] 85 | batch_normalize=1 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [maxpool] 93 | size=2 94 | stride=1 95 | 96 | [convolutional] 97 | batch_normalize=1 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | ########### 105 | 106 | [convolutional] 107 | batch_normalize=1 108 | size=3 109 | stride=1 110 | pad=1 111 | filters=1024 112 | activation=leaky 113 | 114 | [convolutional] 115 | size=1 116 | stride=1 117 | pad=1 118 | filters=30 119 | activation=linear 120 | 121 | [region] 122 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 123 | bias_match=1 124 | classes=1 125 | coords=4 126 | num=5 127 | softmax=1 128 | jitter=.2 129 | rescore=1 130 | 131 | object_scale=5 132 | noobject_scale=1 133 | class_scale=1 134 | coord_scale=1 135 | 136 | absolute=1 137 | thresh = .6 138 | random=1 139 | -------------------------------------------------------------------------------- /cfg/yolov2-tiny-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | max_batches = 40200 20 | policy=steps 21 | steps=-1,100,20000,30000 22 | scales=.1,10,.1,.1 23 | 24 | [convolutional] 25 | batch_normalize=1 26 | filters=16 27 | size=3 28 | stride=1 29 | pad=1 30 | activation=leaky 31 | 32 | [maxpool] 33 | size=2 34 | stride=2 35 | 36 | [convolutional] 37 | batch_normalize=1 38 | filters=32 39 | size=3 40 | stride=1 41 | pad=1 42 | activation=leaky 43 | 44 | [maxpool] 45 | size=2 46 | stride=2 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=64 51 | size=3 52 | stride=1 53 | pad=1 54 | activation=leaky 55 | 56 | [maxpool] 57 | size=2 58 | stride=2 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=128 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [maxpool] 69 | size=2 70 | stride=2 71 | 72 | [convolutional] 73 | batch_normalize=1 74 | filters=256 75 | size=3 76 | stride=1 77 | pad=1 78 | activation=leaky 79 | 80 | [maxpool] 81 | size=2 82 | stride=2 83 | 84 | [convolutional] 85 | batch_normalize=1 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [maxpool] 93 | size=2 94 | stride=1 95 | 96 | [convolutional] 97 | batch_normalize=1 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | ########### 105 | 106 | [convolutional] 107 | batch_normalize=1 108 | size=3 109 | stride=1 110 | pad=1 111 | filters=1024 112 | activation=leaky 113 | 114 | [convolutional] 115 | size=1 116 | stride=1 117 | pad=1 118 | filters=125 119 | activation=linear 120 | 121 | [region] 122 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 123 | bias_match=1 124 | classes=20 125 | coords=4 126 | num=5 127 | softmax=1 128 | jitter=.2 129 | rescore=1 130 | 131 | object_scale=5 132 | noobject_scale=1 133 | class_scale=1 134 | coord_scale=1 135 | 136 | absolute=1 137 | thresh = .6 138 | random=1 139 | -------------------------------------------------------------------------------- /ckpt/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "yolov2-tiny-voc-1c-13700" 2 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-1000" 3 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-2000" 4 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-3000" 5 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-4000" 6 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-5000" 7 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-6000" 8 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-7000" 9 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-8000" 10 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-9000" 11 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-10000" 12 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-11000" 13 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-12000" 14 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-13000" 15 | all_model_checkpoint_paths: "yolov2-tiny-voc-1c-13700" 16 | -------------------------------------------------------------------------------- /ckpt/yolov2-tiny-voc-1c-4000.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/ckpt/yolov2-tiny-voc-1c-4000.index -------------------------------------------------------------------------------- /ckpt/yolov2-tiny-voc-1c-4000.profile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/ckpt/yolov2-tiny-voc-1c-4000.profile -------------------------------------------------------------------------------- /darkflow/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/__pycache__/defaults.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/__pycache__/defaults.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/__pycache__/version.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/__pycache__/version.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/cli.py: -------------------------------------------------------------------------------- 1 | from .defaults import argHandler #Import the default arguments 2 | import os 3 | from .net.build import TFNet 4 | 5 | def cliHandler(args): 6 | FLAGS = argHandler() 7 | FLAGS.setDefaults() 8 | FLAGS.parseArgs(args) 9 | 10 | # make sure all necessary dirs exist 11 | def _get_dir(dirs): 12 | for d in dirs: 13 | this = os.path.abspath(os.path.join(os.path.curdir, d)) 14 | if not os.path.exists(this): os.makedirs(this) 15 | 16 | requiredDirectories = [FLAGS.imgdir, FLAGS.binary, FLAGS.backup, os.path.join(FLAGS.imgdir,'out')] 17 | if FLAGS.summary: 18 | requiredDirectories.append(FLAGS.summary) 19 | 20 | _get_dir(requiredDirectories) 21 | 22 | # fix FLAGS.load to appropriate type 23 | try: FLAGS.load = int(FLAGS.load) 24 | except: pass 25 | 26 | tfnet = TFNet(FLAGS) 27 | 28 | if FLAGS.demo: 29 | tfnet.camera() 30 | exit('Demo stopped, exit.') 31 | 32 | if FLAGS.train: 33 | print('Enter training ...'); tfnet.train() 34 | if not FLAGS.savepb: 35 | exit('Training finished, exit.') 36 | 37 | if FLAGS.savepb: 38 | print('Rebuild a constant version ...') 39 | tfnet.savepb(); exit('Done') 40 | 41 | tfnet.predict() 42 | -------------------------------------------------------------------------------- /darkflow/cython_utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/cython_utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/cython_utils/cy_yolo2_findboxes.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/cython_utils/cy_yolo2_findboxes.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /darkflow/cython_utils/cy_yolo2_findboxes.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | ctypedef np.float_t DTYPE_t 5 | from libc.math cimport exp 6 | from ..utils.box import BoundBox 7 | from nms cimport NMS 8 | 9 | #expit 10 | @cython.boundscheck(False) # turn off bounds-checking for entire function 11 | @cython.wraparound(False) # turn off negative index wrapping for entire function 12 | @cython.cdivision(True) 13 | cdef float expit_c(float x): 14 | cdef float y= 1/(1+exp(-x)) 15 | return y 16 | 17 | #MAX 18 | @cython.boundscheck(False) # turn off bounds-checking for entire function 19 | @cython.wraparound(False) # turn off negative index wrapping for entire function 20 | @cython.cdivision(True) 21 | cdef float max_c(float a, float b): 22 | if(a>b): 23 | return a 24 | return b 25 | 26 | """ 27 | #SOFTMAX! 28 | @cython.cdivision(True) 29 | @cython.boundscheck(False) # turn off bounds-checking for entire function 30 | @cython.wraparound(False) # turn off negative index wrapping for entire function 31 | cdef void _softmax_c(float* x, int classes): 32 | cdef: 33 | float sum = 0 34 | np.intp_t k 35 | float arr_max = 0 36 | for k in range(classes): 37 | arr_max = max(arr_max,x[k]) 38 | 39 | for k in range(classes): 40 | x[k] = exp(x[k]-arr_max) 41 | sum += x[k] 42 | 43 | for k in range(classes): 44 | x[k] = x[k]/sum 45 | """ 46 | 47 | 48 | 49 | #BOX CONSTRUCTOR 50 | @cython.cdivision(True) 51 | @cython.boundscheck(False) # turn off bounds-checking for entire function 52 | @cython.wraparound(False) # turn off negative index wrapping for entire function 53 | def box_constructor(meta,np.ndarray[float,ndim=3] net_out_in): 54 | cdef: 55 | np.intp_t H, W, _, C, B, row, col, box_loop, class_loop 56 | np.intp_t row1, col1, box_loop1,index,index2 57 | float threshold = meta['thresh'] 58 | float tempc,arr_max=0,sum=0 59 | double[:] anchors = np.asarray(meta['anchors']) 60 | list boxes = list() 61 | 62 | H, W, _ = meta['out_size'] 63 | C = meta['classes'] 64 | B = meta['num'] 65 | 66 | cdef: 67 | float[:, :, :, ::1] net_out = net_out_in.reshape([H, W, B, net_out_in.shape[2]/B]) 68 | float[:, :, :, ::1] Classes = net_out[:, :, :, 5:] 69 | float[:, :, :, ::1] Bbox_pred = net_out[:, :, :, :5] 70 | float[:, :, :, ::1] probs = np.zeros((H, W, B, C), dtype=np.float32) 71 | 72 | for row in range(H): 73 | for col in range(W): 74 | for box_loop in range(B): 75 | arr_max=0 76 | sum=0; 77 | Bbox_pred[row, col, box_loop, 4] = expit_c(Bbox_pred[row, col, box_loop, 4]) 78 | Bbox_pred[row, col, box_loop, 0] = (col + expit_c(Bbox_pred[row, col, box_loop, 0])) / W 79 | Bbox_pred[row, col, box_loop, 1] = (row + expit_c(Bbox_pred[row, col, box_loop, 1])) / H 80 | Bbox_pred[row, col, box_loop, 2] = exp(Bbox_pred[row, col, box_loop, 2]) * anchors[2 * box_loop + 0] / W 81 | Bbox_pred[row, col, box_loop, 3] = exp(Bbox_pred[row, col, box_loop, 3]) * anchors[2 * box_loop + 1] / H 82 | #SOFTMAX BLOCK, no more pointer juggling 83 | for class_loop in range(C): 84 | arr_max=max_c(arr_max,Classes[row,col,box_loop,class_loop]) 85 | 86 | for class_loop in range(C): 87 | Classes[row,col,box_loop,class_loop]=exp(Classes[row,col,box_loop,class_loop]-arr_max) 88 | sum+=Classes[row,col,box_loop,class_loop] 89 | 90 | for class_loop in range(C): 91 | tempc = Classes[row, col, box_loop, class_loop] * Bbox_pred[row, col, box_loop, 4]/sum 92 | if(tempc > threshold): 93 | probs[row, col, box_loop, class_loop] = tempc 94 | 95 | 96 | #NMS 97 | return NMS(np.ascontiguousarray(probs).reshape(H*W*B,C), np.ascontiguousarray(Bbox_pred).reshape(H*B*W,5)) 98 | -------------------------------------------------------------------------------- /darkflow/cython_utils/cy_yolo_findboxes.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/cython_utils/cy_yolo_findboxes.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /darkflow/cython_utils/cy_yolo_findboxes.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | ctypedef np.float_t DTYPE_t 5 | from libc.math cimport exp 6 | from ..utils.box import BoundBox 7 | from nms cimport NMS 8 | 9 | 10 | 11 | @cython.cdivision(True) 12 | @cython.boundscheck(False) # turn off bounds-checking for entire function 13 | @cython.wraparound(False) # turn off negative index wrapping for entire function 14 | def yolo_box_constructor(meta,np.ndarray[float] net_out, float threshold): 15 | 16 | cdef: 17 | float sqrt 18 | int C,B,S 19 | int SS,prob_size,conf_size 20 | int grid, b 21 | int class_loop 22 | 23 | 24 | sqrt = meta['sqrt'] + 1 25 | C, B, S = meta['classes'], meta['num'], meta['side'] 26 | boxes = [] 27 | SS = S * S # number of grid cells 28 | prob_size = SS * C # class probabilities 29 | conf_size = SS * B # confidences for each grid cell 30 | 31 | cdef: 32 | float [:,::1] probs = np.ascontiguousarray(net_out[0 : prob_size]).reshape([SS,C]) 33 | float [:,::1] confs = np.ascontiguousarray(net_out[prob_size : (prob_size + conf_size)]).reshape([SS,B]) 34 | float [: , : ,::1] coords = np.ascontiguousarray(net_out[(prob_size + conf_size) : ]).reshape([SS, B, 4]) 35 | float [:,:,::1] final_probs = np.zeros([SS,B,C],dtype=np.float32) 36 | 37 | 38 | for grid in range(SS): 39 | for b in range(B): 40 | coords[grid, b, 0] = (coords[grid, b, 0] + grid % S) / S 41 | coords[grid, b, 1] = (coords[grid, b, 1] + grid // S) / S 42 | coords[grid, b, 2] = coords[grid, b, 2] ** sqrt 43 | coords[grid, b, 3] = coords[grid, b, 3] ** sqrt 44 | for class_loop in range(C): 45 | probs[grid, class_loop] = probs[grid, class_loop] * confs[grid, b] 46 | #print("PROBS",probs[grid,class_loop]) 47 | if(probs[grid,class_loop] > threshold ): 48 | final_probs[grid, b, class_loop] = probs[grid, class_loop] 49 | 50 | 51 | return NMS(np.ascontiguousarray(final_probs).reshape(SS*B, C) , np.ascontiguousarray(coords).reshape(SS*B, 4)) 52 | -------------------------------------------------------------------------------- /darkflow/cython_utils/nms.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/cython_utils/nms.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /darkflow/cython_utils/nms.pxd: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | ctypedef np.float_t DTYPE_t 5 | from libc.math cimport exp 6 | from utils.box import BoundBox 7 | 8 | 9 | cdef NMS(float[:, ::1] , float[:, ::1] ) 10 | 11 | 12 | -------------------------------------------------------------------------------- /darkflow/cython_utils/nms.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | from libc.math cimport exp 5 | from ..utils.box import BoundBox 6 | 7 | 8 | 9 | #OVERLAP 10 | @cython.boundscheck(False) # turn off bounds-checking for entire function 11 | @cython.wraparound(False) # turn off negative index wrapping for entire function 12 | @cython.cdivision(True) 13 | cdef float overlap_c(float x1, float w1 , float x2 , float w2): 14 | cdef: 15 | float l1,l2,left,right 16 | l1 = x1 - w1 /2. 17 | l2 = x2 - w2 /2. 18 | left = max(l1,l2) 19 | r1 = x1 + w1 /2. 20 | r2 = x2 + w2 /2. 21 | right = min(r1, r2) 22 | return right - left; 23 | 24 | #BOX INTERSECTION 25 | @cython.boundscheck(False) # turn off bounds-checking for entire function 26 | @cython.wraparound(False) # turn off negative index wrapping for entire function 27 | @cython.cdivision(True) 28 | cdef float box_intersection_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh): 29 | cdef: 30 | float w,h,area 31 | w = overlap_c(ax, aw, bx, bw) 32 | h = overlap_c(ay, ah, by, bh) 33 | if w < 0 or h < 0: return 0 34 | area = w * h 35 | return area 36 | 37 | #BOX UNION 38 | @cython.boundscheck(False) # turn off bounds-checking for entire function 39 | @cython.wraparound(False) # turn off negative index wrapping for entire function 40 | @cython.cdivision(True) 41 | cdef float box_union_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh): 42 | cdef: 43 | float i,u 44 | i = box_intersection_c(ax, ay, aw, ah, bx, by, bw, bh) 45 | u = aw * ah + bw * bh -i 46 | return u 47 | 48 | 49 | #BOX IOU 50 | @cython.boundscheck(False) # turn off bounds-checking for entire function 51 | @cython.wraparound(False) # turn off negative index wrapping for entire function 52 | @cython.cdivision(True) 53 | cdef float box_iou_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh): 54 | return box_intersection_c(ax, ay, aw, ah, bx, by, bw, bh) / box_union_c(ax, ay, aw, ah, bx, by, bw, bh); 55 | 56 | 57 | 58 | 59 | #NMS 60 | @cython.boundscheck(False) # turn off bounds-checking for entire function 61 | @cython.wraparound(False) # turn off negative index wrapping for entire function 62 | @cython.cdivision(True) 63 | cdef NMS(float[:, ::1] final_probs , float[:, ::1] final_bbox): 64 | cdef list boxes = list() 65 | cdef set indices = set() 66 | cdef: 67 | np.intp_t pred_length,class_length,class_loop,index,index2 68 | 69 | 70 | pred_length = final_bbox.shape[0] 71 | class_length = final_probs.shape[1] 72 | for class_loop in range(class_length): 73 | for index in range(pred_length): 74 | if final_probs[index,class_loop] == 0: continue 75 | for index2 in range(index+1,pred_length): 76 | if final_probs[index2,class_loop] == 0: continue 77 | if index==index2 : continue 78 | if box_iou_c(final_bbox[index,0],final_bbox[index,1],final_bbox[index,2],final_bbox[index,3],final_bbox[index2,0],final_bbox[index2,1],final_bbox[index2,2],final_bbox[index2,3]) >= 0.4: 79 | if final_probs[index2,class_loop] > final_probs[index, class_loop] : 80 | final_probs[index, class_loop] =0 81 | break 82 | final_probs[index2,class_loop]=0 83 | 84 | if index not in indices: 85 | bb=BoundBox(class_length) 86 | bb.x = final_bbox[index, 0] 87 | bb.y = final_bbox[index, 1] 88 | bb.w = final_bbox[index, 2] 89 | bb.h = final_bbox[index, 3] 90 | bb.c = final_bbox[index, 4] 91 | bb.probs = np.asarray(final_probs[index,:]) 92 | boxes.append(bb) 93 | indices.add(index) 94 | return boxes 95 | 96 | # cdef NMS(float[:, ::1] final_probs , float[:, ::1] final_bbox): 97 | # cdef list boxes = list() 98 | # cdef: 99 | # np.intp_t pred_length,class_length,class_loop,index,index2, i, j 100 | 101 | 102 | # pred_length = final_bbox.shape[0] 103 | # class_length = final_probs.shape[1] 104 | 105 | # for class_loop in range(class_length): 106 | # order = np.argsort(final_probs[:,class_loop])[::-1] 107 | # # First box 108 | # for i in range(pred_length): 109 | # index = order[i] 110 | # if final_probs[index, class_loop] == 0.: 111 | # continue 112 | # # Second box 113 | # for j in range(i+1, pred_length): 114 | # index2 = order[j] 115 | # if box_iou_c( 116 | # final_bbox[index,0],final_bbox[index,1], 117 | # final_bbox[index,2],final_bbox[index,3], 118 | # final_bbox[index2,0],final_bbox[index2,1], 119 | # final_bbox[index2,2],final_bbox[index2,3]) >= 0.4: 120 | # final_probs[index2, class_loop] = 0. 121 | 122 | # bb = BoundBox(class_length) 123 | # bb.x = final_bbox[index, 0] 124 | # bb.y = final_bbox[index, 1] 125 | # bb.w = final_bbox[index, 2] 126 | # bb.h = final_bbox[index, 3] 127 | # bb.c = final_bbox[index, 4] 128 | # bb.probs = np.asarray(final_probs[index,:]) 129 | # boxes.append(bb) 130 | 131 | # return boxes 132 | -------------------------------------------------------------------------------- /darkflow/dark/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/dark/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/dark/__pycache__/connected.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/dark/__pycache__/connected.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/dark/__pycache__/convolution.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/dark/__pycache__/convolution.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/dark/__pycache__/darknet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/dark/__pycache__/darknet.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/dark/__pycache__/darkop.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/dark/__pycache__/darkop.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/dark/__pycache__/layer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/dark/__pycache__/layer.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/dark/connected.py: -------------------------------------------------------------------------------- 1 | from .layer import Layer 2 | import numpy as np 3 | 4 | class extract_layer(Layer): 5 | def setup(self, old_inp, old_out, 6 | activation, inp, out): 7 | if inp is None: inp = range(old_inp) 8 | self.activation = activation 9 | self.old_inp = old_inp 10 | self.old_out = old_out 11 | self.inp = inp 12 | self.out = out 13 | self.wshape = { 14 | 'biases': [len(self.out)], 15 | 'weights': [len(self.inp), len(self.out)] 16 | } 17 | 18 | @property 19 | def signature(self): 20 | sig = ['connected'] 21 | sig += self._signature[1:-2] 22 | return sig 23 | 24 | def present(self): 25 | args = self.signature 26 | self.presenter = connected_layer(*args) 27 | 28 | def recollect(self, val): 29 | w = val['weights'] 30 | b = val['biases'] 31 | if w is None: self.w = val; return 32 | w = np.take(w, self.inp, 0) 33 | w = np.take(w, self.out, 1) 34 | b = np.take(b, self.out) 35 | assert1 = w.shape == tuple(self.wshape['weights']) 36 | assert2 = b.shape == tuple(self.wshape['biases']) 37 | assert assert1 and assert2, \ 38 | 'Dimension does not match in {} recollect'.format( 39 | self._signature) 40 | 41 | self.w['weights'] = w 42 | self.w['biases'] = b 43 | 44 | 45 | 46 | class select_layer(Layer): 47 | def setup(self, inp, old, 48 | activation, inp_idx, 49 | out, keep, train): 50 | self.old = old 51 | self.keep = keep 52 | self.train = train 53 | self.inp_idx = inp_idx 54 | self.activation = activation 55 | inp_dim = inp 56 | if inp_idx is not None: 57 | inp_dim = len(inp_idx) 58 | self.inp = inp_dim 59 | self.out = out 60 | self.wshape = { 61 | 'biases': [out], 62 | 'weights': [inp_dim, out] 63 | } 64 | 65 | @property 66 | def signature(self): 67 | sig = ['connected'] 68 | sig += self._signature[1:-4] 69 | return sig 70 | 71 | def present(self): 72 | args = self.signature 73 | self.presenter = connected_layer(*args) 74 | 75 | def recollect(self, val): 76 | w = val['weights'] 77 | b = val['biases'] 78 | if w is None: self.w = val; return 79 | if self.inp_idx is not None: 80 | w = np.take(w, self.inp_idx, 0) 81 | 82 | keep_b = np.take(b, self.keep) 83 | keep_w = np.take(w, self.keep, 1) 84 | train_b = b[self.train:] 85 | train_w = w[:, self.train:] 86 | self.w['biases'] = np.concatenate( 87 | (keep_b, train_b), axis = 0) 88 | self.w['weights'] = np.concatenate( 89 | (keep_w, train_w), axis = 1) 90 | 91 | 92 | class connected_layer(Layer): 93 | def setup(self, input_size, 94 | output_size, activation): 95 | self.activation = activation 96 | self.inp = input_size 97 | self.out = output_size 98 | self.wshape = { 99 | 'biases': [self.out], 100 | 'weights': [self.inp, self.out] 101 | } 102 | 103 | def finalize(self, transpose): 104 | weights = self.w['weights'] 105 | if weights is None: return 106 | shp = self.wshape['weights'] 107 | if not transpose: 108 | weights = weights.reshape(shp[::-1]) 109 | weights = weights.transpose([1,0]) 110 | else: weights = weights.reshape(shp) 111 | self.w['weights'] = weights -------------------------------------------------------------------------------- /darkflow/dark/convolution.py: -------------------------------------------------------------------------------- 1 | from .layer import Layer 2 | import numpy as np 3 | 4 | class local_layer(Layer): 5 | def setup(self, ksize, c, n, stride, 6 | pad, w_, h_, activation): 7 | self.pad = pad * int(ksize / 2) 8 | self.activation = activation 9 | self.stride = stride 10 | self.ksize = ksize 11 | self.h_out = h_ 12 | self.w_out = w_ 13 | 14 | self.dnshape = [h_ * w_, n, c, ksize, ksize] 15 | self.wshape = dict({ 16 | 'biases': [h_ * w_ * n], 17 | 'kernels': [h_ * w_, ksize, ksize, c, n] 18 | }) 19 | 20 | def finalize(self, _): 21 | weights = self.w['kernels'] 22 | if weights is None: return 23 | weights = weights.reshape(self.dnshape) 24 | weights = weights.transpose([0,3,4,2,1]) 25 | self.w['kernels'] = weights 26 | 27 | class conv_extract_layer(Layer): 28 | def setup(self, ksize, c, n, stride, 29 | pad, batch_norm, activation, 30 | inp, out): 31 | if inp is None: inp = range(c) 32 | self.activation = activation 33 | self.batch_norm = batch_norm 34 | self.stride = stride 35 | self.ksize = ksize 36 | self.pad = pad 37 | self.inp = inp 38 | self.out = out 39 | self.wshape = dict({ 40 | 'biases': [len(out)], 41 | 'kernel': [ksize, ksize, len(inp), len(out)] 42 | }) 43 | 44 | @property 45 | def signature(self): 46 | sig = ['convolutional'] 47 | sig += self._signature[1:-2] 48 | return sig 49 | 50 | def present(self): 51 | args = self.signature 52 | self.presenter = convolutional_layer(*args) 53 | 54 | def recollect(self, w): 55 | if w is None: 56 | self.w = w 57 | return 58 | k = w['kernel'] 59 | b = w['biases'] 60 | k = np.take(k, self.inp, 2) 61 | k = np.take(k, self.out, 3) 62 | b = np.take(b, self.out) 63 | assert1 = k.shape == tuple(self.wshape['kernel']) 64 | assert2 = b.shape == tuple(self.wshape['biases']) 65 | assert assert1 and assert2, \ 66 | 'Dimension not matching in {} recollect'.format( 67 | self._signature) 68 | self.w['kernel'] = k 69 | self.w['biases'] = b 70 | 71 | 72 | class conv_select_layer(Layer): 73 | def setup(self, ksize, c, n, stride, 74 | pad, batch_norm, activation, 75 | keep_idx, real_n): 76 | self.batch_norm = bool(batch_norm) 77 | self.activation = activation 78 | self.keep_idx = keep_idx 79 | self.stride = stride 80 | self.ksize = ksize 81 | self.pad = pad 82 | self.wshape = dict({ 83 | 'biases': [real_n], 84 | 'kernel': [ksize, ksize, c, real_n] 85 | }) 86 | if self.batch_norm: 87 | self.wshape.update({ 88 | 'moving_variance' : [real_n], 89 | 'moving_mean': [real_n], 90 | 'gamma' : [real_n] 91 | }) 92 | self.h['is_training'] = { 93 | 'shape': (), 94 | 'feed': True, 95 | 'dfault': False 96 | } 97 | 98 | @property 99 | def signature(self): 100 | sig = ['convolutional'] 101 | sig += self._signature[1:-2] 102 | return sig 103 | 104 | def present(self): 105 | args = self.signature 106 | self.presenter = convolutional_layer(*args) 107 | 108 | def recollect(self, w): 109 | if w is None: 110 | self.w = w 111 | return 112 | idx = self.keep_idx 113 | k = w['kernel'] 114 | b = w['biases'] 115 | self.w['kernel'] = np.take(k, idx, 3) 116 | self.w['biases'] = np.take(b, idx) 117 | if self.batch_norm: 118 | m = w['moving_mean'] 119 | v = w['moving_variance'] 120 | g = w['gamma'] 121 | self.w['moving_mean'] = np.take(m, idx) 122 | self.w['moving_variance'] = np.take(v, idx) 123 | self.w['gamma'] = np.take(g, idx) 124 | 125 | class convolutional_layer(Layer): 126 | def setup(self, ksize, c, n, stride, 127 | pad, batch_norm, activation): 128 | self.batch_norm = bool(batch_norm) 129 | self.activation = activation 130 | self.stride = stride 131 | self.ksize = ksize 132 | self.pad = pad 133 | self.dnshape = [n, c, ksize, ksize] # darknet shape 134 | self.wshape = dict({ 135 | 'biases': [n], 136 | 'kernel': [ksize, ksize, c, n] 137 | }) 138 | if self.batch_norm: 139 | self.wshape.update({ 140 | 'moving_variance' : [n], 141 | 'moving_mean': [n], 142 | 'gamma' : [n] 143 | }) 144 | self.h['is_training'] = { 145 | 'feed': True, 146 | 'dfault': False, 147 | 'shape': () 148 | } 149 | 150 | def finalize(self, _): 151 | """deal with darknet""" 152 | kernel = self.w['kernel'] 153 | if kernel is None: return 154 | kernel = kernel.reshape(self.dnshape) 155 | kernel = kernel.transpose([2,3,1,0]) 156 | self.w['kernel'] = kernel -------------------------------------------------------------------------------- /darkflow/dark/darknet.py: -------------------------------------------------------------------------------- 1 | from ..utils.process import cfg_yielder 2 | from .darkop import create_darkop 3 | from ..utils import loader 4 | import warnings 5 | import time 6 | import os 7 | 8 | class Darknet(object): 9 | 10 | _EXT = '.weights' 11 | 12 | def __init__(self, FLAGS): 13 | self.get_weight_src(FLAGS) 14 | self.modify = False 15 | 16 | print('Parsing {}'.format(self.src_cfg)) 17 | src_parsed = self.parse_cfg(self.src_cfg, FLAGS) 18 | self.src_meta, self.src_layers = src_parsed 19 | 20 | if self.src_cfg == FLAGS.model: 21 | self.meta, self.layers = src_parsed 22 | else: 23 | print('Parsing {}'.format(FLAGS.model)) 24 | des_parsed = self.parse_cfg(FLAGS.model, FLAGS) 25 | self.meta, self.layers = des_parsed 26 | 27 | self.load_weights() 28 | 29 | def get_weight_src(self, FLAGS): 30 | """ 31 | analyse FLAGS.load to know where is the 32 | source binary and what is its config. 33 | can be: None, FLAGS.model, or some other 34 | """ 35 | self.src_bin = FLAGS.model + self._EXT 36 | self.src_bin = FLAGS.binary + self.src_bin 37 | self.src_bin = os.path.abspath(self.src_bin) 38 | exist = os.path.isfile(self.src_bin) 39 | 40 | if FLAGS.load == str(): FLAGS.load = int() 41 | if type(FLAGS.load) is int: 42 | self.src_cfg = FLAGS.model 43 | if FLAGS.load: self.src_bin = None 44 | elif not exist: self.src_bin = None 45 | else: 46 | assert os.path.isfile(FLAGS.load), \ 47 | '{} not found'.format(FLAGS.load) 48 | self.src_bin = FLAGS.load 49 | name = loader.model_name(FLAGS.load) 50 | cfg_path = os.path.join(FLAGS.config, name + '.cfg') 51 | if not os.path.isfile(cfg_path): 52 | warnings.warn( 53 | '{} not found, use {} instead'.format( 54 | cfg_path, FLAGS.model)) 55 | cfg_path = FLAGS.model 56 | self.src_cfg = cfg_path 57 | FLAGS.load = int() 58 | 59 | 60 | def parse_cfg(self, model, FLAGS): 61 | """ 62 | return a list of `layers` objects (darkop.py) 63 | given path to binaries/ and configs/ 64 | """ 65 | args = [model, FLAGS.binary] 66 | cfg_layers = cfg_yielder(*args) 67 | meta = dict(); layers = list() 68 | for i, info in enumerate(cfg_layers): 69 | if i == 0: meta = info; continue 70 | else: new = create_darkop(*info) 71 | layers.append(new) 72 | return meta, layers 73 | 74 | def load_weights(self): 75 | """ 76 | Use `layers` and Loader to load .weights file 77 | """ 78 | print('Loading {} ...'.format(self.src_bin)) 79 | start = time.time() 80 | 81 | args = [self.src_bin, self.src_layers] 82 | wgts_loader = loader.create_loader(*args) 83 | for layer in self.layers: layer.load(wgts_loader) 84 | 85 | stop = time.time() 86 | print('Finished in {}s'.format(stop - start)) -------------------------------------------------------------------------------- /darkflow/dark/darkop.py: -------------------------------------------------------------------------------- 1 | from .layer import Layer 2 | from .convolution import * 3 | from .connected import * 4 | 5 | class avgpool_layer(Layer): 6 | pass 7 | 8 | class crop_layer(Layer): 9 | pass 10 | 11 | class maxpool_layer(Layer): 12 | def setup(self, ksize, stride, pad): 13 | self.stride = stride 14 | self.ksize = ksize 15 | self.pad = pad 16 | 17 | class softmax_layer(Layer): 18 | def setup(self, groups): 19 | self.groups = groups 20 | 21 | class dropout_layer(Layer): 22 | def setup(self, p): 23 | self.h['pdrop'] = dict({ 24 | 'feed': p, # for training 25 | 'dfault': 1.0, # for testing 26 | 'shape': () 27 | }) 28 | 29 | class route_layer(Layer): 30 | def setup(self, routes): 31 | self.routes = routes 32 | 33 | class reorg_layer(Layer): 34 | def setup(self, stride): 35 | self.stride = stride 36 | 37 | """ 38 | Darkop Factory 39 | """ 40 | 41 | darkops = { 42 | 'dropout': dropout_layer, 43 | 'connected': connected_layer, 44 | 'maxpool': maxpool_layer, 45 | 'convolutional': convolutional_layer, 46 | 'avgpool': avgpool_layer, 47 | 'softmax': softmax_layer, 48 | 'crop': crop_layer, 49 | 'local': local_layer, 50 | 'select': select_layer, 51 | 'route': route_layer, 52 | 'reorg': reorg_layer, 53 | 'conv-select': conv_select_layer, 54 | 'conv-extract': conv_extract_layer, 55 | 'extract': extract_layer 56 | } 57 | 58 | def create_darkop(ltype, num, *args): 59 | op_class = darkops.get(ltype, Layer) 60 | return op_class(ltype, num, *args) -------------------------------------------------------------------------------- /darkflow/dark/layer.py: -------------------------------------------------------------------------------- 1 | from ..utils import loader 2 | import numpy as np 3 | 4 | class Layer(object): 5 | 6 | def __init__(self, *args): 7 | self._signature = list(args) 8 | self.type = list(args)[0] 9 | self.number = list(args)[1] 10 | 11 | self.w = dict() # weights 12 | self.h = dict() # placeholders 13 | self.wshape = dict() # weight shape 14 | self.wsize = dict() # weight size 15 | self.setup(*args[2:]) # set attr up 16 | self.present() 17 | for var in self.wshape: 18 | shp = self.wshape[var] 19 | size = np.prod(shp) 20 | self.wsize[var] = size 21 | 22 | def load(self, src_loader): 23 | var_lay = src_loader.VAR_LAYER 24 | if self.type not in var_lay: return 25 | 26 | src_type = type(src_loader) 27 | if src_type is loader.weights_loader: 28 | wdict = self.load_weights(src_loader) 29 | else: 30 | wdict = self.load_ckpt(src_loader) 31 | if wdict is not None: 32 | self.recollect(wdict) 33 | 34 | def load_weights(self, src_loader): 35 | val = src_loader([self.presenter]) 36 | if val is None: return None 37 | else: return val.w 38 | 39 | def load_ckpt(self, src_loader): 40 | result = dict() 41 | presenter = self.presenter 42 | for var in presenter.wshape: 43 | name = presenter.varsig(var) 44 | shape = presenter.wshape[var] 45 | key = [name, shape] 46 | val = src_loader(key) 47 | result[var] = val 48 | return result 49 | 50 | @property 51 | def signature(self): 52 | return self._signature 53 | 54 | # For comparing two layers 55 | def __eq__(self, other): 56 | return self.signature == other.signature 57 | def __ne__(self, other): 58 | return not self.__eq__(other) 59 | 60 | def varsig(self, var): 61 | if var not in self.wshape: 62 | return None 63 | sig = str(self.number) 64 | sig += '-' + self.type 65 | sig += '/' + var 66 | return sig 67 | 68 | def recollect(self, w): self.w = w 69 | def present(self): self.presenter = self 70 | def setup(self, *args): pass 71 | def finalize(self): pass -------------------------------------------------------------------------------- /darkflow/defaults.py: -------------------------------------------------------------------------------- 1 | class argHandler(dict): 2 | #A super duper fancy custom made CLI argument handler!! 3 | __getattr__ = dict.get 4 | __setattr__ = dict.__setitem__ 5 | __delattr__ = dict.__delitem__ 6 | _descriptions = {'help, --h, -h': 'show this super helpful message and exit'} 7 | 8 | def setDefaults(self): 9 | self.define('imgdir', './sample_img/', 'path to testing directory with images') 10 | self.define('binary', './bin/', 'path to .weights directory') 11 | self.define('config', './cfg/', 'path to .cfg directory') 12 | self.define('dataset', '../pascal/VOCdevkit/IMG/', 'path to dataset directory') 13 | self.define('labels', 'labels.txt', 'path to labels file') 14 | self.define('backup', './ckpt/', 'path to backup folder') 15 | self.define('summary', '', 'path to TensorBoard summaries directory') 16 | self.define('annotation', '../pascal/VOCdevkit/ANN/', 'path to annotation directory') 17 | self.define('threshold', -0.1, 'detection threshold') 18 | self.define('model', '', 'configuration of choice') 19 | self.define('trainer', 'rmsprop', 'training algorithm') 20 | self.define('momentum', 0.0, 'applicable for rmsprop and momentum optimizers') 21 | self.define('verbalise', True, 'say out loud while building graph') 22 | self.define('train', False, 'train the whole net') 23 | self.define('load', '', 'how to initialize the net? Either from .weights or a checkpoint, or even from scratch') 24 | self.define('savepb', False, 'save net and weight to a .pb file') 25 | self.define('gpu', 0.0, 'how much gpu (from 0.0 to 1.0)') 26 | self.define('gpuName', '/gpu:0', 'GPU device name') 27 | self.define('lr', 1e-5, 'learning rate') 28 | self.define('keep',20,'Number of most recent training results to save') 29 | self.define('batch', 16, 'batch size') 30 | self.define('epoch', 1000, 'number of epoch') 31 | self.define('save', 2000, 'save checkpoint every ? training examples') 32 | self.define('demo', '', 'demo on webcam') 33 | self.define('queue', 1, 'process demo in batch') 34 | self.define('json', False, 'Outputs bounding box information in json format.') 35 | self.define('saveVideo', False, 'Records video from input video or camera') 36 | self.define('pbLoad', '', 'path to .pb protobuf file (metaLoad must also be specified)') 37 | self.define('metaLoad', '', 'path to .meta file generated during --savepb that corresponds to .pb file') 38 | 39 | def define(self, argName, default, description): 40 | self[argName] = default 41 | self._descriptions[argName] = description 42 | 43 | def help(self): 44 | print('Example usage: flow --imgdir sample_img/ --model cfg/yolo.cfg --load bin/yolo.weights') 45 | print('') 46 | print('Arguments:') 47 | spacing = max([len(i) for i in self._descriptions.keys()]) + 2 48 | for item in self._descriptions: 49 | currentSpacing = spacing - len(item) 50 | print(' --' + item + (' ' * currentSpacing) + self._descriptions[item]) 51 | print('') 52 | exit() 53 | 54 | def parseArgs(self, args): 55 | print('') 56 | i = 1 57 | while i < len(args): 58 | if args[i] == '-h' or args[i] == '--h' or args[i] == '--help': 59 | self.help() #Time for some self help! :) 60 | if len(args[i]) < 2: 61 | print('ERROR - Invalid argument: ' + args[i]) 62 | print('Try running flow --help') 63 | exit() 64 | argumentName = args[i][2:] 65 | if isinstance(self.get(argumentName), bool): 66 | if not (i + 1) >= len(args) and (args[i + 1].lower() != 'false' and args[i + 1].lower() != 'true') and not args[i + 1].startswith('--'): 67 | print('ERROR - Expected boolean value (or no value) following argument: ' + args[i]) 68 | print('Try running flow --help') 69 | exit() 70 | elif not (i + 1) >= len(args) and (args[i + 1].lower() == 'false' or args[i + 1].lower() == 'true'): 71 | self[argumentName] = (args[i + 1].lower() == 'true') 72 | i += 1 73 | else: 74 | self[argumentName] = True 75 | elif args[i].startswith('--') and not (i + 1) >= len(args) and not args[i + 1].startswith('--') and argumentName in self: 76 | if isinstance(self[argumentName], float): 77 | try: 78 | args[i + 1] = float(args[i + 1]) 79 | except: 80 | print('ERROR - Expected float for argument: ' + args[i]) 81 | print('Try running flow --help') 82 | exit() 83 | elif isinstance(self[argumentName], int): 84 | try: 85 | args[i + 1] = int(args[i + 1]) 86 | except: 87 | print('ERROR - Expected int for argument: ' + args[i]) 88 | print('Try running flow --help') 89 | exit() 90 | self[argumentName] = args[i + 1] 91 | i += 1 92 | else: 93 | print('ERROR - Invalid argument: ' + args[i]) 94 | print('Try running flow --help') 95 | exit() 96 | i += 1 97 | -------------------------------------------------------------------------------- /darkflow/net/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/__pycache__/build.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/__pycache__/build.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/__pycache__/flow.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/__pycache__/flow.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/__pycache__/framework.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/__pycache__/framework.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/__pycache__/help.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/__pycache__/help.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/build.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import time 3 | from . import help 4 | from . import flow 5 | from .ops import op_create, identity 6 | from .ops import HEADER, LINE 7 | from .framework import create_framework 8 | from ..dark.darknet import Darknet 9 | import json 10 | import os 11 | 12 | class TFNet(object): 13 | 14 | _TRAINER = dict({ 15 | 'rmsprop': tf.train.RMSPropOptimizer, 16 | 'adadelta': tf.train.AdadeltaOptimizer, 17 | 'adagrad': tf.train.AdagradOptimizer, 18 | 'adagradDA': tf.train.AdagradDAOptimizer, 19 | 'momentum': tf.train.MomentumOptimizer, 20 | 'adam': tf.train.AdamOptimizer, 21 | 'ftrl': tf.train.FtrlOptimizer, 22 | 'sgd': tf.train.GradientDescentOptimizer 23 | }) 24 | 25 | # imported methods 26 | _get_fps = help._get_fps 27 | say = help.say 28 | train = flow.train 29 | camera = help.camera 30 | predict = flow.predict 31 | return_predict = flow.return_predict 32 | to_darknet = help.to_darknet 33 | build_train_op = help.build_train_op 34 | load_from_ckpt = help.load_from_ckpt 35 | 36 | def __init__(self, FLAGS, darknet = None): 37 | self.ntrain = 0 38 | 39 | if isinstance(FLAGS, dict): 40 | from ..defaults import argHandler 41 | newFLAGS = argHandler() 42 | newFLAGS.setDefaults() 43 | newFLAGS.update(FLAGS) 44 | FLAGS = newFLAGS 45 | 46 | self.FLAGS = FLAGS 47 | if self.FLAGS.pbLoad and self.FLAGS.metaLoad: 48 | self.say('\nLoading from .pb and .meta') 49 | self.graph = tf.Graph() 50 | device_name = FLAGS.gpuName \ 51 | if FLAGS.gpu > 0.0 else None 52 | with tf.device(device_name): 53 | with self.graph.as_default() as g: 54 | self.build_from_pb() 55 | return 56 | 57 | if darknet is None: 58 | darknet = Darknet(FLAGS) 59 | self.ntrain = len(darknet.layers) 60 | 61 | self.darknet = darknet 62 | args = [darknet.meta, FLAGS] 63 | self.num_layer = len(darknet.layers) 64 | self.framework = create_framework(*args) 65 | 66 | self.meta = darknet.meta 67 | 68 | self.say('\nBuilding net ...') 69 | start = time.time() 70 | self.graph = tf.Graph() 71 | device_name = FLAGS.gpuName \ 72 | if FLAGS.gpu > 0.0 else None 73 | with tf.device(device_name): 74 | with self.graph.as_default() as g: 75 | self.build_forward() 76 | self.setup_meta_ops() 77 | self.say('Finished in {}s\n'.format( 78 | time.time() - start)) 79 | 80 | def build_from_pb(self): 81 | with tf.gfile.FastGFile(self.FLAGS.pbLoad, "rb") as f: 82 | graph_def = tf.GraphDef() 83 | graph_def.ParseFromString(f.read()) 84 | 85 | tf.import_graph_def( 86 | graph_def, 87 | name="" 88 | ) 89 | with open(self.FLAGS.metaLoad, 'r') as fp: 90 | self.meta = json.load(fp) 91 | self.framework = create_framework(self.meta, self.FLAGS) 92 | 93 | # Placeholders 94 | self.inp = tf.get_default_graph().get_tensor_by_name('input:0') 95 | self.feed = dict() # other placeholders 96 | self.out = tf.get_default_graph().get_tensor_by_name('output:0') 97 | 98 | self.setup_meta_ops() 99 | 100 | def build_forward(self): 101 | verbalise = self.FLAGS.verbalise 102 | 103 | # Placeholders 104 | inp_size = [None] + self.meta['inp_size'] 105 | self.inp = tf.placeholder(tf.float32, inp_size, 'input') 106 | self.feed = dict() # other placeholders 107 | 108 | # Build the forward pass 109 | state = identity(self.inp) 110 | roof = self.num_layer - self.ntrain 111 | self.say(HEADER, LINE) 112 | for i, layer in enumerate(self.darknet.layers): 113 | scope = '{}-{}'.format(str(i),layer.type) 114 | args = [layer, state, i, roof, self.feed] 115 | state = op_create(*args) 116 | mess = state.verbalise() 117 | self.say(mess) 118 | self.say(LINE) 119 | 120 | self.top = state 121 | self.out = tf.identity(state.out, name='output') 122 | 123 | def setup_meta_ops(self): 124 | cfg = dict({ 125 | 'allow_soft_placement': False, 126 | 'log_device_placement': False 127 | }) 128 | 129 | utility = min(self.FLAGS.gpu, 1.) 130 | if utility > 0.0: 131 | self.say('GPU mode with {} usage'.format(utility)) 132 | cfg['gpu_options'] = tf.GPUOptions( 133 | per_process_gpu_memory_fraction = utility) 134 | cfg['allow_soft_placement'] = True 135 | else: 136 | self.say('Running entirely on CPU') 137 | cfg['device_count'] = {'GPU': 0} 138 | 139 | if self.FLAGS.train: self.build_train_op() 140 | 141 | if self.FLAGS.summary: 142 | self.summary_op = tf.summary.merge_all() 143 | self.writer = tf.summary.FileWriter(self.FLAGS.summary + 'train') 144 | 145 | self.sess = tf.Session(config = tf.ConfigProto(**cfg)) 146 | self.sess.run(tf.global_variables_initializer()) 147 | 148 | if not self.ntrain: return 149 | self.saver = tf.train.Saver(tf.global_variables(), 150 | max_to_keep = self.FLAGS.keep) 151 | if self.FLAGS.load != 0: self.load_from_ckpt() 152 | 153 | if self.FLAGS.summary: 154 | self.writer.add_graph(self.sess.graph) 155 | 156 | def savepb(self): 157 | """ 158 | Create a standalone const graph def that 159 | C++ can load and run. 160 | """ 161 | darknet_pb = self.to_darknet() 162 | flags_pb = self.FLAGS 163 | flags_pb.verbalise = False 164 | 165 | flags_pb.train = False 166 | # rebuild another tfnet. all const. 167 | tfnet_pb = TFNet(flags_pb, darknet_pb) 168 | tfnet_pb.sess = tf.Session(graph = tfnet_pb.graph) 169 | # tfnet_pb.predict() # uncomment for unit testing 170 | name = 'built_graph/{}.pb'.format(self.meta['name']) 171 | os.makedirs(os.path.dirname(name), exist_ok=True) 172 | #Save dump of everything in meta 173 | with open('built_graph/{}.meta'.format(self.meta['name']), 'w') as fp: 174 | json.dump(self.meta, fp) 175 | self.say('Saving const graph def to {}'.format(name)) 176 | graph_def = tfnet_pb.sess.graph_def 177 | tf.train.write_graph(graph_def,'./', name, False) -------------------------------------------------------------------------------- /darkflow/net/flow.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | import tensorflow as tf 5 | import pickle 6 | from multiprocessing.pool import ThreadPool 7 | 8 | train_stats = ( 9 | 'Training statistics: \n' 10 | '\tLearning rate : {}\n' 11 | '\tBatch size : {}\n' 12 | '\tEpoch number : {}\n' 13 | '\tBackup every : {}' 14 | ) 15 | pool = ThreadPool() 16 | 17 | def _save_ckpt(self, step, loss_profile): 18 | file = '{}-{}{}' 19 | model = self.meta['name'] 20 | 21 | profile = file.format(model, step, '.profile') 22 | profile = os.path.join(self.FLAGS.backup, profile) 23 | with open(profile, 'wb') as profile_ckpt: 24 | pickle.dump(loss_profile, profile_ckpt) 25 | 26 | ckpt = file.format(model, step, '') 27 | ckpt = os.path.join(self.FLAGS.backup, ckpt) 28 | self.say('Checkpoint at step {}'.format(step)) 29 | self.saver.save(self.sess, ckpt) 30 | 31 | 32 | def train(self): 33 | loss_ph = self.framework.placeholders 34 | loss_mva = None; profile = list() 35 | 36 | batches = self.framework.shuffle() 37 | loss_op = self.framework.loss 38 | 39 | for i, (x_batch, datum) in enumerate(batches): 40 | if not i: self.say(train_stats.format( 41 | self.FLAGS.lr, self.FLAGS.batch, 42 | self.FLAGS.epoch, self.FLAGS.save 43 | )) 44 | 45 | feed_dict = { 46 | loss_ph[key]: datum[key] 47 | for key in loss_ph } 48 | feed_dict[self.inp] = x_batch 49 | feed_dict.update(self.feed) 50 | 51 | fetches = [self.train_op, loss_op] 52 | 53 | if self.FLAGS.summary: 54 | fetches.append(self.summary_op) 55 | 56 | fetched = self.sess.run(fetches, feed_dict) 57 | loss = fetched[1] 58 | 59 | if loss_mva is None: loss_mva = loss 60 | loss_mva = .9 * loss_mva + .1 * loss 61 | step_now = self.FLAGS.load + i + 1 62 | 63 | if self.FLAGS.summary: 64 | self.writer.add_summary(fetched[2], step_now) 65 | 66 | form = 'step {} - loss {} - moving ave loss {}' 67 | self.say(form.format(step_now, loss, loss_mva)) 68 | profile += [(loss, loss_mva)] 69 | 70 | ckpt = (i+1) % (self.FLAGS.save // self.FLAGS.batch) 71 | args = [step_now, profile] 72 | if not ckpt: _save_ckpt(self, *args) 73 | 74 | if ckpt: _save_ckpt(self, *args) 75 | 76 | def return_predict(self, im): 77 | assert isinstance(im, np.ndarray), \ 78 | 'Image is not a np.ndarray' 79 | h, w, _ = im.shape 80 | im = self.framework.resize_input(im) 81 | this_inp = np.expand_dims(im, 0) 82 | feed_dict = {self.inp : this_inp} 83 | 84 | out = self.sess.run(self.out, feed_dict)[0] 85 | boxes = self.framework.findboxes(out) 86 | threshold = self.FLAGS.threshold 87 | boxesInfo = list() 88 | for box in boxes: 89 | tmpBox = self.framework.process_box(box, h, w, threshold) 90 | if tmpBox is None: 91 | continue 92 | boxesInfo.append({ 93 | "label": tmpBox[4], 94 | "confidence": tmpBox[6], 95 | "topleft": { 96 | "x": tmpBox[0], 97 | "y": tmpBox[2]}, 98 | "bottomright": { 99 | "x": tmpBox[1], 100 | "y": tmpBox[3]} 101 | }) 102 | return boxesInfo 103 | 104 | import math 105 | 106 | def predict(self): 107 | inp_path = self.FLAGS.imgdir 108 | all_inps = os.listdir(inp_path) 109 | all_inps = [i for i in all_inps if self.framework.is_inp(i)] 110 | if not all_inps: 111 | msg = 'Failed to find any images in {} .' 112 | exit('Error: {}'.format(msg.format(inp_path))) 113 | 114 | batch = min(self.FLAGS.batch, len(all_inps)) 115 | 116 | # predict in batches 117 | n_batch = int(math.ceil(len(all_inps) / batch)) 118 | for j in range(n_batch): 119 | from_idx = j * batch 120 | to_idx = min(from_idx + batch, len(all_inps)) 121 | 122 | # collect images input in the batch 123 | this_batch = all_inps[from_idx:to_idx] 124 | inp_feed = pool.map(lambda inp: ( 125 | np.expand_dims(self.framework.preprocess( 126 | os.path.join(inp_path, inp)), 0)), this_batch) 127 | 128 | # Feed to the net 129 | feed_dict = {self.inp : np.concatenate(inp_feed, 0)} 130 | self.say('Forwarding {} inputs ...'.format(len(inp_feed))) 131 | start = time.time() 132 | out = self.sess.run(self.out, feed_dict) 133 | stop = time.time(); last = stop - start 134 | self.say('Total time = {}s / {} inps = {} ips'.format( 135 | last, len(inp_feed), len(inp_feed) / last)) 136 | 137 | # Post processing 138 | self.say('Post processing {} inputs ...'.format(len(inp_feed))) 139 | start = time.time() 140 | pool.map(lambda p: (lambda i, prediction: 141 | self.framework.postprocess( 142 | prediction, os.path.join(inp_path, this_batch[i])))(*p), 143 | enumerate(out)) 144 | stop = time.time(); last = stop - start 145 | 146 | # Timing 147 | self.say('Total time = {}s / {} inps = {} ips'.format( 148 | last, len(inp_feed), len(inp_feed) / last)) 149 | -------------------------------------------------------------------------------- /darkflow/net/framework.py: -------------------------------------------------------------------------------- 1 | from . import yolo 2 | from . import yolov2 3 | from . import vanilla 4 | from os.path import basename 5 | 6 | class framework(object): 7 | constructor = vanilla.constructor 8 | loss = vanilla.train.loss 9 | 10 | def __init__(self, meta, FLAGS): 11 | model = basename(meta['model']) 12 | model = '.'.join(model.split('.')[:-1]) 13 | meta['name'] = model 14 | 15 | self.constructor(meta, FLAGS) 16 | 17 | def is_inp(self, file_name): 18 | return True 19 | 20 | class YOLO(framework): 21 | constructor = yolo.constructor 22 | parse = yolo.data.parse 23 | shuffle = yolo.data.shuffle 24 | preprocess = yolo.predict.preprocess 25 | postprocess = yolo.predict.postprocess 26 | loss = yolo.train.loss 27 | is_inp = yolo.misc.is_inp 28 | profile = yolo.misc.profile 29 | _batch = yolo.data._batch 30 | resize_input = yolo.predict.resize_input 31 | findboxes = yolo.predict.findboxes 32 | process_box = yolo.predict.process_box 33 | 34 | class YOLOv2(framework): 35 | constructor = yolo.constructor 36 | parse = yolo.data.parse 37 | shuffle = yolov2.data.shuffle 38 | preprocess = yolo.predict.preprocess 39 | loss = yolov2.train.loss 40 | is_inp = yolo.misc.is_inp 41 | postprocess = yolov2.predict.postprocess 42 | _batch = yolov2.data._batch 43 | resize_input = yolo.predict.resize_input 44 | findboxes = yolov2.predict.findboxes 45 | process_box = yolo.predict.process_box 46 | 47 | """ 48 | framework factory 49 | """ 50 | 51 | types = { 52 | '[detection]': YOLO, 53 | '[region]': YOLOv2 54 | } 55 | 56 | def create_framework(meta, FLAGS): 57 | net_type = meta['type'] 58 | this = types.get(net_type, framework) 59 | return this(meta, FLAGS) -------------------------------------------------------------------------------- /darkflow/net/help.py: -------------------------------------------------------------------------------- 1 | """ 2 | tfnet secondary (helper) methods 3 | """ 4 | from ..utils.loader import create_loader 5 | from time import time as timer 6 | import tensorflow as tf 7 | import numpy as np 8 | import sys 9 | import cv2 10 | import os 11 | 12 | old_graph_msg = 'Resolving old graph def {} (no guarantee)' 13 | 14 | def build_train_op(self): 15 | self.framework.loss(self.out) 16 | self.say('Building {} train op'.format(self.meta['model'])) 17 | optimizer = self._TRAINER[self.FLAGS.trainer](self.FLAGS.lr) 18 | gradients = optimizer.compute_gradients(self.framework.loss) 19 | self.train_op = optimizer.apply_gradients(gradients) 20 | 21 | def load_from_ckpt(self): 22 | if self.FLAGS.load < 0: # load lastest ckpt 23 | with open(os.path.join(self.FLAGS.backup, 'checkpoint'), 'r') as f: 24 | last = f.readlines()[-1].strip() 25 | load_point = last.split(' ')[1] 26 | load_point = load_point.split('"')[1] 27 | load_point = load_point.split('-')[-1] 28 | self.FLAGS.load = int(load_point) 29 | 30 | load_point = os.path.join(self.FLAGS.backup, self.meta['name']) 31 | load_point = '{}-{}'.format(load_point, self.FLAGS.load) 32 | self.say('Loading from {}'.format(load_point)) 33 | try: self.saver.restore(self.sess, load_point) 34 | except: load_old_graph(self, load_point) 35 | 36 | def say(self, *msgs): 37 | if not self.FLAGS.verbalise: 38 | return 39 | msgs = list(msgs) 40 | for msg in msgs: 41 | if msg is None: continue 42 | print(msg) 43 | 44 | def load_old_graph(self, ckpt): 45 | ckpt_loader = create_loader(ckpt) 46 | self.say(old_graph_msg.format(ckpt)) 47 | 48 | for var in tf.global_variables(): 49 | name = var.name.split(':')[0] 50 | args = [name, var.get_shape()] 51 | val = ckpt_loader(args) 52 | assert val is not None, \ 53 | 'Cannot find and load {}'.format(var.name) 54 | shp = val.shape 55 | plh = tf.placeholder(tf.float32, shp) 56 | op = tf.assign(var, plh) 57 | self.sess.run(op, {plh: val}) 58 | 59 | def _get_fps(self, frame): 60 | elapsed = int() 61 | start = timer() 62 | preprocessed = self.framework.preprocess(frame) 63 | feed_dict = {self.inp: [preprocessed]} 64 | net_out = self.sess.run(self.out, feed_dict)[0] 65 | processed = self.framework.postprocess(net_out, frame, False) 66 | return timer() - start 67 | 68 | def camera(self): 69 | file = self.FLAGS.demo 70 | SaveVideo = self.FLAGS.saveVideo 71 | 72 | if file == 'camera': 73 | file = 0 74 | else: 75 | assert os.path.isfile(file), \ 76 | 'file {} does not exist'.format(file) 77 | 78 | camera = cv2.VideoCapture(file) 79 | 80 | if file == 0: 81 | self.say('Press [ESC] to quit demo') 82 | 83 | assert camera.isOpened(), \ 84 | 'Cannot capture source' 85 | 86 | if file == 0:#camera window 87 | cv2.namedWindow('', 0) 88 | _, frame = camera.read() 89 | height, width, _ = frame.shape 90 | cv2.resizeWindow('', width, height) 91 | else: 92 | _, frame = camera.read() 93 | height, width, _ = frame.shape 94 | 95 | if SaveVideo: 96 | fourcc = cv2.VideoWriter_fourcc(*'XVID') 97 | if file == 0:#camera window 98 | fps = 1 / self._get_fps(frame) 99 | if fps < 1: 100 | fps = 1 101 | else: 102 | fps = round(camera.get(cv2.CAP_PROP_FPS)) 103 | videoWriter = cv2.VideoWriter( 104 | 'video.avi', fourcc, fps, (width, height)) 105 | 106 | # buffers for demo in batch 107 | buffer_inp = list() 108 | buffer_pre = list() 109 | 110 | elapsed = int() 111 | start = timer() 112 | self.say('Press [ESC] to quit demo') 113 | # Loop through frames 114 | while camera.isOpened(): 115 | elapsed += 1 116 | _, frame = camera.read() 117 | if frame is None: 118 | print ('\nEnd of Video') 119 | break 120 | preprocessed = self.framework.preprocess(frame) 121 | buffer_inp.append(frame) 122 | buffer_pre.append(preprocessed) 123 | 124 | # Only process and imshow when queue is full 125 | if elapsed % self.FLAGS.queue == 0: 126 | feed_dict = {self.inp: buffer_pre} 127 | net_out = self.sess.run(self.out, feed_dict) 128 | for img, single_out in zip(buffer_inp, net_out): 129 | postprocessed = self.framework.postprocess( 130 | single_out, img, False) 131 | if SaveVideo: 132 | videoWriter.write(postprocessed) 133 | if file == 0: #camera window 134 | cv2.imshow('', postprocessed) 135 | # Clear Buffers 136 | buffer_inp = list() 137 | buffer_pre = list() 138 | 139 | if elapsed % 5 == 0: 140 | sys.stdout.write('\r') 141 | sys.stdout.write('{0:3.3f} FPS'.format( 142 | elapsed / (timer() - start))) 143 | sys.stdout.flush() 144 | if file == 0: #camera window 145 | choice = cv2.waitKey(1) 146 | if choice == 27: break 147 | 148 | sys.stdout.write('\n') 149 | if SaveVideo: 150 | videoWriter.release() 151 | camera.release() 152 | if file == 0: #camera window 153 | cv2.destroyAllWindows() 154 | 155 | def to_darknet(self): 156 | darknet_ckpt = self.darknet 157 | 158 | with self.graph.as_default() as g: 159 | for var in tf.global_variables(): 160 | name = var.name.split(':')[0] 161 | var_name = name.split('-') 162 | l_idx = int(var_name[0]) 163 | w_sig = var_name[1].split('/')[-1] 164 | l = darknet_ckpt.layers[l_idx] 165 | l.w[w_sig] = var.eval(self.sess) 166 | 167 | for layer in darknet_ckpt.layers: 168 | for ph in layer.h: 169 | layer.h[ph] = None 170 | 171 | return darknet_ckpt 172 | -------------------------------------------------------------------------------- /darkflow/net/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .simple import * 2 | from .convolution import * 3 | from .baseop import HEADER, LINE 4 | 5 | op_types = { 6 | 'convolutional': convolutional, 7 | 'conv-select': conv_select, 8 | 'connected': connected, 9 | 'maxpool': maxpool, 10 | 'leaky': leaky, 11 | 'dropout': dropout, 12 | 'flatten': flatten, 13 | 'avgpool': avgpool, 14 | 'softmax': softmax, 15 | 'identity': identity, 16 | 'crop': crop, 17 | 'local': local, 18 | 'select': select, 19 | 'route': route, 20 | 'reorg': reorg, 21 | 'conv-extract': conv_extract, 22 | 'extract': extract 23 | } 24 | 25 | def op_create(*args): 26 | layer_type = list(args)[0].type 27 | return op_types[layer_type](*args) -------------------------------------------------------------------------------- /darkflow/net/ops/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/ops/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/ops/__pycache__/baseop.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/ops/__pycache__/baseop.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/ops/__pycache__/convolution.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/ops/__pycache__/convolution.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/ops/__pycache__/simple.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/ops/__pycache__/simple.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/ops/baseop.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | FORM = '{:>6} | {:>6} | {:<32} | {}' 5 | FORM_ = '{}+{}+{}+{}' 6 | LINE = FORM_.format('-'*7, '-'*8, '-'*34, '-'*15) 7 | HEADER = FORM.format( 8 | 'Source', 'Train?','Layer description', 'Output size') 9 | 10 | def _shape(tensor): # work for both tf.Tensor & np.ndarray 11 | if type(tensor) in [tf.Variable, tf.Tensor]: 12 | return tensor.get_shape() 13 | else: return tensor.shape 14 | 15 | def _name(tensor): 16 | return tensor.name.split(':')[0] 17 | 18 | class BaseOp(object): 19 | """ 20 | BaseOp objects initialise with a darknet's `layer` object 21 | and input tensor of that layer `inp`, it calculates the 22 | output of this layer and place the result in self.out 23 | """ 24 | 25 | # let slim take care of the following vars 26 | _SLIM = ['gamma', 'moving_mean', 'moving_variance'] 27 | 28 | def __init__(self, layer, inp, num, roof, feed): 29 | self.inp = inp # BaseOp 30 | self.num = num # int 31 | self.out = None # tf.Tensor 32 | self.lay = layer 33 | 34 | self.scope = '{}-{}'.format( 35 | str(self.num), self.lay.type) 36 | self.gap = roof - self.num 37 | self.var = not self.gap > 0 38 | self.act = 'Load ' 39 | self.convert(feed) 40 | if self.var: self.train_msg = 'Yep! ' 41 | else: self.train_msg = 'Nope ' 42 | self.forward() 43 | 44 | def convert(self, feed): 45 | """convert self.lay to variables & placeholders""" 46 | for var in self.lay.wshape: 47 | self.wrap_variable(var) 48 | for ph in self.lay.h: 49 | self.wrap_pholder(ph, feed) 50 | 51 | def wrap_variable(self, var): 52 | """wrap layer.w into variables""" 53 | val = self.lay.w.get(var, None) 54 | if val is None: 55 | shape = self.lay.wshape[var] 56 | args = [0., 1e-2, shape] 57 | if 'moving_mean' in var: 58 | val = np.zeros(shape) 59 | elif 'moving_variance' in var: 60 | val = np.ones(shape) 61 | else: 62 | val = np.random.normal(*args) 63 | self.lay.w[var] = val.astype(np.float32) 64 | self.act = 'Init ' 65 | if not self.var: return 66 | 67 | val = self.lay.w[var] 68 | self.lay.w[var] = tf.constant_initializer(val) 69 | if var in self._SLIM: return 70 | with tf.variable_scope(self.scope): 71 | self.lay.w[var] = tf.get_variable(var, 72 | shape = self.lay.wshape[var], 73 | dtype = tf.float32, 74 | initializer = self.lay.w[var]) 75 | 76 | def wrap_pholder(self, ph, feed): 77 | """wrap layer.h into placeholders""" 78 | phtype = type(self.lay.h[ph]) 79 | if phtype is not dict: return 80 | 81 | sig = '{}/{}'.format(self.scope, ph) 82 | val = self.lay.h[ph] 83 | 84 | self.lay.h[ph] = tf.placeholder_with_default( 85 | val['dfault'], val['shape'], name = sig) 86 | feed[self.lay.h[ph]] = val['feed'] 87 | 88 | def verbalise(self): # console speaker 89 | msg = str() 90 | inp = _name(self.inp.out) 91 | if inp == 'input': \ 92 | msg = FORM.format( 93 | '', '', 'input', 94 | _shape(self.inp.out)) + '\n' 95 | if not self.act: return msg 96 | return msg + FORM.format( 97 | self.act, self.train_msg, 98 | self.speak(), _shape(self.out)) 99 | 100 | def speak(self): pass -------------------------------------------------------------------------------- /darkflow/net/ops/convolution.py: -------------------------------------------------------------------------------- 1 | import tensorflow.contrib.slim as slim 2 | from .baseop import BaseOp 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | class reorg(BaseOp): 7 | def _forward(self): 8 | inp = self.inp.out 9 | shape = inp.get_shape().as_list() 10 | _, h, w, c = shape 11 | s = self.lay.stride 12 | out = list() 13 | for i in range(int(h/s)): 14 | row_i = list() 15 | for j in range(int(w/s)): 16 | si, sj = s * i, s * j 17 | boxij = inp[:, si: si+s, sj: sj+s,:] 18 | flatij = tf.reshape(boxij, [-1,1,1,c*s*s]) 19 | row_i += [flatij] 20 | out += [tf.concat(row_i, 2)] 21 | 22 | self.out = tf.concat(out, 1) 23 | 24 | def forward(self): 25 | inp = self.inp.out 26 | s = self.lay.stride 27 | self.out = tf.extract_image_patches( 28 | inp, [1,s,s,1], [1,s,s,1], [1,1,1,1], 'VALID') 29 | 30 | def speak(self): 31 | args = [self.lay.stride] * 2 32 | msg = 'local flatten {}x{}' 33 | return msg.format(*args) 34 | 35 | 36 | class local(BaseOp): 37 | def forward(self): 38 | pad = [[self.lay.pad, self.lay.pad]] * 2; 39 | temp = tf.pad(self.inp.out, [[0, 0]] + pad + [[0, 0]]) 40 | 41 | k = self.lay.w['kernels'] 42 | ksz = self.lay.ksize 43 | half = int(ksz / 2) 44 | out = list() 45 | for i in range(self.lay.h_out): 46 | row_i = list() 47 | for j in range(self.lay.w_out): 48 | kij = k[i * self.lay.w_out + j] 49 | i_, j_ = i + 1 - half, j + 1 - half 50 | tij = temp[:, i_ : i_ + ksz, j_ : j_ + ksz,:] 51 | row_i.append( 52 | tf.nn.conv2d(tij, kij, 53 | padding = 'VALID', 54 | strides = [1] * 4)) 55 | out += [tf.concat(row_i, 2)] 56 | 57 | self.out = tf.concat(out, 1) 58 | 59 | def speak(self): 60 | l = self.lay 61 | args = [l.ksize] * 2 + [l.pad] + [l.stride] 62 | args += [l.activation] 63 | msg = 'loca {}x{}p{}_{} {}'.format(*args) 64 | return msg 65 | 66 | class convolutional(BaseOp): 67 | def forward(self): 68 | pad = [[self.lay.pad, self.lay.pad]] * 2; 69 | temp = tf.pad(self.inp.out, [[0, 0]] + pad + [[0, 0]]) 70 | temp = tf.nn.conv2d(temp, self.lay.w['kernel'], padding = 'VALID', 71 | name = self.scope, strides = [1] + [self.lay.stride] * 2 + [1]) 72 | if self.lay.batch_norm: 73 | temp = self.batchnorm(self.lay, temp) 74 | self.out = tf.nn.bias_add(temp, self.lay.w['biases']) 75 | 76 | def batchnorm(self, layer, inp): 77 | if not self.var: 78 | temp = (inp - layer.w['moving_mean']) 79 | temp /= (np.sqrt(layer.w['moving_variance']) + 1e-5) 80 | temp *= layer.w['gamma'] 81 | return temp 82 | else: 83 | args = dict({ 84 | 'center' : False, 'scale' : True, 85 | 'epsilon': 1e-5, 'scope' : self.scope, 86 | 'updates_collections' : None, 87 | 'is_training': layer.h['is_training'], 88 | 'param_initializers': layer.w 89 | }) 90 | return slim.batch_norm(inp, **args) 91 | 92 | def speak(self): 93 | l = self.lay 94 | args = [l.ksize] * 2 + [l.pad] + [l.stride] 95 | args += [l.batch_norm * '+bnorm'] 96 | args += [l.activation] 97 | msg = 'conv {}x{}p{}_{} {} {}'.format(*args) 98 | return msg 99 | 100 | class conv_select(convolutional): 101 | def speak(self): 102 | l = self.lay 103 | args = [l.ksize] * 2 + [l.pad] + [l.stride] 104 | args += [l.batch_norm * '+bnorm'] 105 | args += [l.activation] 106 | msg = 'sele {}x{}p{}_{} {} {}'.format(*args) 107 | return msg 108 | 109 | class conv_extract(convolutional): 110 | def speak(self): 111 | l = self.lay 112 | args = [l.ksize] * 2 + [l.pad] + [l.stride] 113 | args += [l.batch_norm * '+bnorm'] 114 | args += [l.activation] 115 | msg = 'extr {}x{}p{}_{} {} {}'.format(*args) 116 | return msg -------------------------------------------------------------------------------- /darkflow/net/ops/simple.py: -------------------------------------------------------------------------------- 1 | import tensorflow.contrib.slim as slim 2 | from .baseop import BaseOp 3 | import tensorflow as tf 4 | from distutils.version import StrictVersion 5 | 6 | class route(BaseOp): 7 | def forward(self): 8 | routes = self.lay.routes 9 | routes_out = list() 10 | for r in routes: 11 | this = self.inp 12 | while this.lay.number != r: 13 | this = this.inp 14 | assert this is not None, \ 15 | 'Routing to non-existence {}'.format(r) 16 | routes_out += [this.out] 17 | self.out = tf.concat(routes_out, 3) 18 | 19 | def speak(self): 20 | msg = 'concat {}' 21 | return msg.format(self.lay.routes) 22 | 23 | class connected(BaseOp): 24 | def forward(self): 25 | self.out = tf.nn.xw_plus_b( 26 | self.inp.out, 27 | self.lay.w['weights'], 28 | self.lay.w['biases'], 29 | name = self.scope) 30 | 31 | def speak(self): 32 | layer = self.lay 33 | args = [layer.inp, layer.out] 34 | args += [layer.activation] 35 | msg = 'full {} x {} {}' 36 | return msg.format(*args) 37 | 38 | class select(connected): 39 | """a weird connected layer""" 40 | def speak(self): 41 | layer = self.lay 42 | args = [layer.inp, layer.out] 43 | args += [layer.activation] 44 | msg = 'sele {} x {} {}' 45 | return msg.format(*args) 46 | 47 | class extract(connected): 48 | """a weird connected layer""" 49 | def speak(self): 50 | layer = self.lay 51 | args = [len(layer.inp), len(layer.out)] 52 | args += [layer.activation] 53 | msg = 'extr {} x {} {}' 54 | return msg.format(*args) 55 | 56 | class flatten(BaseOp): 57 | def forward(self): 58 | temp = tf.transpose( 59 | self.inp.out, [0,3,1,2]) 60 | self.out = slim.flatten( 61 | temp, scope = self.scope) 62 | 63 | def speak(self): return 'flat' 64 | 65 | 66 | class softmax(BaseOp): 67 | def forward(self): 68 | self.out = tf.nn.softmax(self.inp.out) 69 | 70 | def speak(self): return 'softmax()' 71 | 72 | 73 | class avgpool(BaseOp): 74 | def forward(self): 75 | self.out = tf.reduce_mean( 76 | self.inp.out, [1, 2], 77 | name = self.scope 78 | ) 79 | 80 | def speak(self): return 'avgpool()' 81 | 82 | 83 | class dropout(BaseOp): 84 | def forward(self): 85 | if self.lay.h['pdrop'] is None: 86 | self.lay.h['pdrop'] = 1.0 87 | self.out = tf.nn.dropout( 88 | self.inp.out, 89 | self.lay.h['pdrop'], 90 | name = self.scope 91 | ) 92 | 93 | def speak(self): return 'drop' 94 | 95 | 96 | class crop(BaseOp): 97 | def forward(self): 98 | self.out = self.inp.out * 2. - 1. 99 | 100 | def speak(self): 101 | return 'scale to (-1, 1)' 102 | 103 | 104 | class maxpool(BaseOp): 105 | def forward(self): 106 | self.out = tf.nn.max_pool( 107 | self.inp.out, padding = 'SAME', 108 | ksize = [1] + [self.lay.ksize]*2 + [1], 109 | strides = [1] + [self.lay.stride]*2 + [1], 110 | name = self.scope 111 | ) 112 | 113 | def speak(self): 114 | l = self.lay 115 | return 'maxp {}x{}p{}_{}'.format( 116 | l.ksize, l.ksize, l.pad, l.stride) 117 | 118 | 119 | class leaky(BaseOp): 120 | def forward(self): 121 | self.out = tf.maximum( 122 | .1 * self.inp.out, 123 | self.inp.out, 124 | name = self.scope 125 | ) 126 | 127 | def verbalise(self): pass 128 | 129 | 130 | class identity(BaseOp): 131 | def __init__(self, inp): 132 | self.inp = None 133 | self.out = inp 134 | -------------------------------------------------------------------------------- /darkflow/net/vanilla/__init__.py: -------------------------------------------------------------------------------- 1 | from . import train 2 | 3 | def constructor(self, meta, FLAGS): 4 | self.meta, self.FLAGS = meta, FLAGS -------------------------------------------------------------------------------- /darkflow/net/vanilla/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/vanilla/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/vanilla/__pycache__/train.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/vanilla/__pycache__/train.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/vanilla/train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | _LOSS_TYPE = ['sse','l2', 'smooth', 4 | 'sparse', 'l1', 'softmax', 5 | 'svm', 'fisher'] 6 | 7 | def loss(self, net_out): 8 | m = self.meta 9 | loss_type = self.meta['type'] 10 | assert loss_type in _LOSS_TYPE, \ 11 | 'Loss type {} not implemented'.format(loss_type) 12 | 13 | out = net_out 14 | out_shape = out.get_shape() 15 | out_dtype = out.dtype.base_dtype 16 | _truth = tf.placeholders(out_dtype, out_shape) 17 | 18 | self.placeholders = dict({ 19 | 'truth': _truth 20 | }) 21 | 22 | diff = _truth - out 23 | if loss_type in ['sse','12']: 24 | loss = tf.nn.l2_loss(diff) 25 | 26 | elif loss_type == ['smooth']: 27 | small = tf.cast(diff < 1, tf.float32) 28 | large = 1. - small 29 | l1_loss = tf.nn.l1_loss(tf.multiply(diff, large)) 30 | l2_loss = tf.nn.l2_loss(tf.multiply(diff, small)) 31 | loss = l1_loss + l2_loss 32 | 33 | elif loss_type in ['sparse', 'l1']: 34 | loss = l1_loss(diff) 35 | 36 | elif loss_type == 'softmax': 37 | loss = tf.nn.softmax_cross_entropy_with_logits(logits, y) 38 | loss = tf.reduce_mean(loss) 39 | 40 | elif loss_type == 'svm': 41 | assert 'train_size' in m, \ 42 | 'Must specify' 43 | size = m['train_size'] 44 | self.nu = tf.Variable(tf.ones([train_size, num_classes])) 45 | -------------------------------------------------------------------------------- /darkflow/net/yolo/__init__.py: -------------------------------------------------------------------------------- 1 | from . import train 2 | from . import predict 3 | from . import data 4 | from . import misc 5 | import numpy as np 6 | 7 | 8 | """ YOLO framework __init__ equivalent""" 9 | 10 | def constructor(self, meta, FLAGS): 11 | 12 | def _to_color(indx, base): 13 | """ return (b, r, g) tuple""" 14 | base2 = base * base 15 | b = 2 - indx / base2 16 | r = 2 - (indx % base2) / base 17 | g = 2 - (indx % base2) % base 18 | return (b * 127, r * 127, g * 127) 19 | if 'labels' not in meta: 20 | misc.labels(meta, FLAGS) #We're not loading from a .pb so we do need to load the labels 21 | assert len(meta['labels']) == meta['classes'], ( 22 | 'labels.txt and {} indicate' + ' ' 23 | 'inconsistent class numbers' 24 | ).format(meta['model']) 25 | 26 | # assign a color for each label 27 | colors = list() 28 | base = int(np.ceil(pow(meta['classes'], 1./3))) 29 | for x in range(len(meta['labels'])): 30 | colors += [_to_color(x, base)] 31 | meta['colors'] = colors 32 | self.fetch = list() 33 | self.meta, self.FLAGS = meta, FLAGS 34 | 35 | # over-ride the threshold in meta if FLAGS has it. 36 | if FLAGS.threshold > 0.0: 37 | self.meta['thresh'] = FLAGS.threshold -------------------------------------------------------------------------------- /darkflow/net/yolo/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/yolo/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/yolo/__pycache__/data.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/yolo/__pycache__/data.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/yolo/__pycache__/misc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/yolo/__pycache__/misc.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/yolo/__pycache__/predict.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/yolo/__pycache__/predict.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/yolo/__pycache__/train.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/yolo/__pycache__/train.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/yolo/data.py: -------------------------------------------------------------------------------- 1 | from ...utils.pascal_voc_clean_xml import pascal_voc_clean_xml 2 | from numpy.random import permutation as perm 3 | from .predict import preprocess 4 | # from .misc import show 5 | from copy import deepcopy 6 | import pickle 7 | import numpy as np 8 | import os 9 | 10 | def parse(self, exclusive = False): 11 | meta = self.meta 12 | ext = '.parsed' 13 | ann = self.FLAGS.annotation 14 | if not os.path.isdir(ann): 15 | msg = 'Annotation directory not found {} .' 16 | exit('Error: {}'.format(msg.format(ann))) 17 | print('\n{} parsing {}'.format(meta['model'], ann)) 18 | dumps = pascal_voc_clean_xml(ann, meta['labels'], exclusive) 19 | return dumps 20 | 21 | 22 | def _batch(self, chunk): 23 | """ 24 | Takes a chunk of parsed annotations 25 | returns value for placeholders of net's 26 | input & loss layer correspond to this chunk 27 | """ 28 | meta = self.meta 29 | S, B = meta['side'], meta['num'] 30 | C, labels = meta['classes'], meta['labels'] 31 | 32 | # preprocess 33 | jpg = chunk[0]; w, h, allobj_ = chunk[1] 34 | allobj = deepcopy(allobj_) 35 | path = os.path.join(self.FLAGS.dataset, jpg) 36 | img = self.preprocess(path, allobj) 37 | 38 | # Calculate regression target 39 | cellx = 1. * w / S 40 | celly = 1. * h / S 41 | for obj in allobj: 42 | centerx = .5*(obj[1]+obj[3]) #xmin, xmax 43 | centery = .5*(obj[2]+obj[4]) #ymin, ymax 44 | cx = centerx / cellx 45 | cy = centery / celly 46 | if cx >= S or cy >= S: return None, None 47 | obj[3] = float(obj[3]-obj[1]) / w 48 | obj[4] = float(obj[4]-obj[2]) / h 49 | obj[3] = np.sqrt(obj[3]) 50 | obj[4] = np.sqrt(obj[4]) 51 | obj[1] = cx - np.floor(cx) # centerx 52 | obj[2] = cy - np.floor(cy) # centery 53 | obj += [int(np.floor(cy) * S + np.floor(cx))] 54 | 55 | # show(im, allobj, S, w, h, cellx, celly) # unit test 56 | 57 | # Calculate placeholders' values 58 | probs = np.zeros([S*S,C]) 59 | confs = np.zeros([S*S,B]) 60 | coord = np.zeros([S*S,B,4]) 61 | proid = np.zeros([S*S,C]) 62 | prear = np.zeros([S*S,4]) 63 | for obj in allobj: 64 | probs[obj[5], :] = [0.] * C 65 | probs[obj[5], labels.index(obj[0])] = 1. 66 | proid[obj[5], :] = [1] * C 67 | coord[obj[5], :, :] = [obj[1:5]] * B 68 | prear[obj[5],0] = obj[1] - obj[3]**2 * .5 * S # xleft 69 | prear[obj[5],1] = obj[2] - obj[4]**2 * .5 * S # yup 70 | prear[obj[5],2] = obj[1] + obj[3]**2 * .5 * S # xright 71 | prear[obj[5],3] = obj[2] + obj[4]**2 * .5 * S # ybot 72 | confs[obj[5], :] = [1.] * B 73 | 74 | # Finalise the placeholders' values 75 | upleft = np.expand_dims(prear[:,0:2], 1) 76 | botright = np.expand_dims(prear[:,2:4], 1) 77 | wh = botright - upleft; 78 | area = wh[:,:,0] * wh[:,:,1] 79 | upleft = np.concatenate([upleft] * B, 1) 80 | botright = np.concatenate([botright] * B, 1) 81 | areas = np.concatenate([area] * B, 1) 82 | 83 | # value for placeholder at input layer 84 | inp_feed_val = img 85 | # value for placeholder at loss layer 86 | loss_feed_val = { 87 | 'probs': probs, 'confs': confs, 88 | 'coord': coord, 'proid': proid, 89 | 'areas': areas, 'upleft': upleft, 90 | 'botright': botright 91 | } 92 | 93 | return inp_feed_val, loss_feed_val 94 | 95 | def shuffle(self): 96 | batch = self.FLAGS.batch 97 | data = self.parse() 98 | size = len(data) 99 | 100 | print('Dataset of {} instance(s)'.format(size)) 101 | if batch > size: self.FLAGS.batch = batch = size 102 | batch_per_epoch = int(size / batch) 103 | 104 | for i in range(self.FLAGS.epoch): 105 | shuffle_idx = perm(np.arange(size)) 106 | for b in range(batch_per_epoch): 107 | # yield these 108 | x_batch = list() 109 | feed_batch = dict() 110 | 111 | for j in range(b*batch, b*batch+batch): 112 | train_instance = data[shuffle_idx[j]] 113 | try: 114 | inp, new_feed = self._batch(train_instance) 115 | except ZeroDivisionError: 116 | print("This image's width or height are zeros: ", train_instance[0]) 117 | print('train_instance:', train_instance) 118 | print('Please remove or fix it then try again.') 119 | raise 120 | 121 | if inp is None: continue 122 | x_batch += [np.expand_dims(inp, 0)] 123 | 124 | for key in new_feed: 125 | new = new_feed[key] 126 | old_feed = feed_batch.get(key, 127 | np.zeros((0,) + new.shape)) 128 | feed_batch[key] = np.concatenate([ 129 | old_feed, [new] 130 | ]) 131 | 132 | x_batch = np.concatenate(x_batch, 0) 133 | yield x_batch, feed_batch 134 | 135 | print('Finish {} epoch(es)'.format(i + 1)) 136 | 137 | -------------------------------------------------------------------------------- /darkflow/net/yolo/misc.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | import cv2 4 | import os 5 | 6 | labels20 = ["aeroplane", "bicycle", "bird", "boat", "bottle", 7 | "bus", "car", "cat", "chair", "cow", "diningtable", "dog", 8 | "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", 9 | "train", "tvmonitor"] 10 | 11 | # 8, 14, 15, 19 12 | 13 | voc_models = ['yolo-full', 'yolo-tiny', 'yolo-small', # <- v1 14 | 'yolov1', 'tiny-yolov1', # <- v1.1 15 | 'tiny-yolo-voc', 'yolo-voc'] # <- v2 16 | 17 | coco_models = ['tiny-coco', 'yolo-coco', # <- v1.1 18 | 'yolo', 'tiny-yolo'] # <- v2 19 | 20 | coco_names = 'coco.names' 21 | nine_names = '9k.names' 22 | 23 | def labels(meta, FLAGS): 24 | model = os.path.basename(meta['name']) 25 | if model in voc_models: 26 | print("Model has a VOC model name, loading VOC labels.") 27 | meta['labels'] = labels20 28 | else: 29 | file = FLAGS.labels 30 | if model in coco_models: 31 | print("Model has a coco model name, loading coco labels.") 32 | file = os.path.join(FLAGS.config, coco_names) 33 | elif model == 'yolo9000': 34 | print("Model has name yolo9000, loading yolo9000 labels.") 35 | file = os.path.join(FLAGS.config, nine_names) 36 | with open(file, 'r') as f: 37 | meta['labels'] = list() 38 | labs = [l.strip() for l in f.readlines()] 39 | for lab in labs: 40 | if lab == '----': break 41 | meta['labels'] += [lab] 42 | if len(meta['labels']) == 0: 43 | meta['labels'] = labels20 44 | 45 | def is_inp(self, name): 46 | return name.lower().endswith(('.jpg', '.jpeg', '.png')) 47 | 48 | def show(im, allobj, S, w, h, cellx, celly): 49 | for obj in allobj: 50 | a = obj[5] % S 51 | b = obj[5] // S 52 | cx = a + obj[1] 53 | cy = b + obj[2] 54 | centerx = cx * cellx 55 | centery = cy * celly 56 | ww = obj[3]**2 * w 57 | hh = obj[4]**2 * h 58 | cv2.rectangle(im, 59 | (int(centerx - ww/2), int(centery - hh/2)), 60 | (int(centerx + ww/2), int(centery + hh/2)), 61 | (0,0,255), 2) 62 | cv2.imshow('result', im) 63 | cv2.waitKey() 64 | cv2.destroyAllWindows() 65 | 66 | def show2(im, allobj): 67 | for obj in allobj: 68 | cv2.rectangle(im, 69 | (obj[1], obj[2]), 70 | (obj[3], obj[4]), 71 | (0,0,255),2) 72 | cv2.imshow('result', im) 73 | cv2.waitKey() 74 | cv2.destroyAllWindows() 75 | 76 | 77 | _MVA = .05 78 | 79 | def profile(self, net): 80 | pass 81 | # data = self.parse(exclusive = True) 82 | # size = len(data); batch = self.FLAGS.batch 83 | # all_inp_ = [x[0] for x in data] 84 | # net.say('Will cycle through {} examples {} times'.format( 85 | # len(all_inp_), net.FLAGS.epoch)) 86 | 87 | # fetch = list(); mvave = list(); names = list(); 88 | # this = net.top 89 | # conv_lay = ['convolutional', 'connected', 'local', 'conv-select'] 90 | # while this.inp is not None: 91 | # if this.lay.type in conv_lay: 92 | # fetch = [this.out] + fetch 93 | # names = [this.lay.signature] + names 94 | # mvave = [None] + mvave 95 | # this = this.inp 96 | # print(names) 97 | 98 | # total = int(); allofthem = len(all_inp_) * net.FLAGS.epoch 99 | # batch = min(net.FLAGS.batch, len(all_inp_)) 100 | # for count in range(net.FLAGS.epoch): 101 | # net.say('EPOCH {}'.format(count)) 102 | # for j in range(len(all_inp_)/batch): 103 | # inp_feed = list(); new_all = list() 104 | # all_inp = all_inp_[j*batch: (j*batch+batch)] 105 | # for inp in all_inp: 106 | # new_all += [inp] 107 | # this_inp = os.path.join(net.FLAGS.dataset, inp) 108 | # this_inp = net.framework.preprocess(this_inp) 109 | # expanded = np.expand_dims(this_inp, 0) 110 | # inp_feed.append(expanded) 111 | # all_inp = new_all 112 | # feed_dict = {net.inp : np.concatenate(inp_feed, 0)} 113 | # out = net.sess.run(fetch, feed_dict) 114 | 115 | # for i, o in enumerate(out): 116 | # oi = out[i]; 117 | # dim = len(oi.shape) - 1 118 | # ai = mvave[i]; 119 | # mi = np.mean(oi, tuple(range(dim))) 120 | # vi = np.var(oi, tuple(range(dim))) 121 | # if ai is None: mvave[i] = [mi, vi] 122 | # elif 'banana ninja yada yada': 123 | # ai[0] = (1 - _MVA) * ai[0] + _MVA * mi 124 | # ai[1] = (1 - _MVA) * ai[1] + _MVA * vi 125 | # total += len(inp_feed) 126 | # net.say('{} / {} = {}%'.format( 127 | # total, allofthem, 100. * total / allofthem)) 128 | 129 | # with open('profile', 'wb') as f: 130 | # pickle.dump([mvave], f, protocol = -1) 131 | -------------------------------------------------------------------------------- /darkflow/net/yolo/predict.py: -------------------------------------------------------------------------------- 1 | from ...utils.im_transform import imcv2_recolor, imcv2_affine_trans 2 | from ...utils.box import BoundBox, box_iou, prob_compare 3 | import numpy as np 4 | import cv2 5 | import os 6 | import json 7 | from ...cython_utils.cy_yolo_findboxes import yolo_box_constructor 8 | 9 | def _fix(obj, dims, scale, offs): 10 | for i in range(1, 5): 11 | dim = dims[(i + 1) % 2] 12 | off = offs[(i + 1) % 2] 13 | obj[i] = int(obj[i] * scale - off) 14 | obj[i] = max(min(obj[i], dim), 0) 15 | 16 | def resize_input(self, im): 17 | h, w, c = self.meta['inp_size'] 18 | imsz = cv2.resize(im, (w, h)) 19 | imsz = imsz / 255. 20 | imsz = imsz[:,:,::-1] 21 | return imsz 22 | 23 | def process_box(self, b, h, w, threshold): 24 | max_indx = np.argmax(b.probs) 25 | max_prob = b.probs[max_indx] 26 | label = self.meta['labels'][max_indx] 27 | if max_prob > threshold: 28 | left = int ((b.x - b.w/2.) * w) 29 | right = int ((b.x + b.w/2.) * w) 30 | top = int ((b.y - b.h/2.) * h) 31 | bot = int ((b.y + b.h/2.) * h) 32 | if left < 0 : left = 0 33 | if right > w - 1: right = w - 1 34 | if top < 0 : top = 0 35 | if bot > h - 1: bot = h - 1 36 | mess = '{}'.format(label) 37 | return (left, right, top, bot, mess, max_indx, max_prob) 38 | return None 39 | 40 | def findboxes(self, net_out): 41 | meta, FLAGS = self.meta, self.FLAGS 42 | threshold = FLAGS.threshold 43 | 44 | boxes = [] 45 | boxes = yolo_box_constructor(meta, net_out, threshold) 46 | 47 | return boxes 48 | 49 | def preprocess(self, im, allobj = None): 50 | """ 51 | Takes an image, return it as a numpy tensor that is readily 52 | to be fed into tfnet. If there is an accompanied annotation (allobj), 53 | meaning this preprocessing is serving the train process, then this 54 | image will be transformed with random noise to augment training data, 55 | using scale, translation, flipping and recolor. The accompanied 56 | parsed annotation (allobj) will also be modified accordingly. 57 | """ 58 | if type(im) is not np.ndarray: 59 | im = cv2.imread(im) 60 | 61 | if allobj is not None: # in training mode 62 | result = imcv2_affine_trans(im) 63 | im, dims, trans_param = result 64 | scale, offs, flip = trans_param 65 | for obj in allobj: 66 | _fix(obj, dims, scale, offs) 67 | if not flip: continue 68 | obj_1_ = obj[1] 69 | obj[1] = dims[0] - obj[3] 70 | obj[3] = dims[0] - obj_1_ 71 | im = imcv2_recolor(im) 72 | 73 | im = self.resize_input(im) 74 | if allobj is None: return im 75 | return im#, np.array(im) # for unit testing 76 | 77 | def postprocess(self, net_out, im, save = True): 78 | """ 79 | Takes net output, draw predictions, save to disk 80 | """ 81 | meta, FLAGS = self.meta, self.FLAGS 82 | threshold = FLAGS.threshold 83 | colors, labels = meta['colors'], meta['labels'] 84 | 85 | boxes = self.findboxes(net_out) 86 | 87 | if type(im) is not np.ndarray: 88 | imgcv = cv2.imread(im) 89 | else: imgcv = im 90 | 91 | h, w, _ = imgcv.shape 92 | resultsForJSON = [] 93 | for b in boxes: 94 | boxResults = self.process_box(b, h, w, threshold) 95 | if boxResults is None: 96 | continue 97 | left, right, top, bot, mess, max_indx, confidence = boxResults 98 | thick = int((h + w) // 300) 99 | if self.FLAGS.json: 100 | resultsForJSON.append({"label": mess, "confidence": float('%.2f' % confidence), "topleft": {"x": left, "y": top}, "bottomright": {"x": right, "y": bot}}) 101 | continue 102 | 103 | cv2.rectangle(imgcv, 104 | (left, top), (right, bot), 105 | self.meta['colors'][max_indx], thick) 106 | cv2.putText( 107 | imgcv, mess, (left, top - 12), 108 | 0, 1e-3 * h, self.meta['colors'][max_indx], 109 | thick // 3) 110 | 111 | 112 | if not save: return imgcv 113 | 114 | outfolder = os.path.join(self.FLAGS.imgdir, 'out') 115 | img_name = os.path.join(outfolder, os.path.basename(im)) 116 | if self.FLAGS.json: 117 | textJSON = json.dumps(resultsForJSON) 118 | textFile = os.path.splitext(img_name)[0] + ".json" 119 | with open(textFile, 'w') as f: 120 | f.write(textJSON) 121 | return 122 | 123 | cv2.imwrite(img_name, imgcv) 124 | -------------------------------------------------------------------------------- /darkflow/net/yolo/train.py: -------------------------------------------------------------------------------- 1 | import tensorflow.contrib.slim as slim 2 | import pickle 3 | import tensorflow as tf 4 | from .misc import show 5 | import numpy as np 6 | import os 7 | 8 | def loss(self, net_out): 9 | """ 10 | Takes net.out and placeholders value 11 | returned in batch() func above, 12 | to build train_op and loss 13 | """ 14 | # meta 15 | m = self.meta 16 | sprob = float(m['class_scale']) 17 | sconf = float(m['object_scale']) 18 | snoob = float(m['noobject_scale']) 19 | scoor = float(m['coord_scale']) 20 | S, B, C = m['side'], m['num'], m['classes'] 21 | SS = S * S # number of grid cells 22 | 23 | print('{} loss hyper-parameters:'.format(m['model'])) 24 | print('\tside = {}'.format(m['side'])) 25 | print('\tbox = {}'.format(m['num'])) 26 | print('\tclasses = {}'.format(m['classes'])) 27 | print('\tscales = {}'.format([sprob, sconf, snoob, scoor])) 28 | 29 | size1 = [None, SS, C] 30 | size2 = [None, SS, B] 31 | 32 | # return the below placeholders 33 | _probs = tf.placeholder(tf.float32, size1) 34 | _confs = tf.placeholder(tf.float32, size2) 35 | _coord = tf.placeholder(tf.float32, size2 + [4]) 36 | # weights term for L2 loss 37 | _proid = tf.placeholder(tf.float32, size1) 38 | # material calculating IOU 39 | _areas = tf.placeholder(tf.float32, size2) 40 | _upleft = tf.placeholder(tf.float32, size2 + [2]) 41 | _botright = tf.placeholder(tf.float32, size2 + [2]) 42 | 43 | self.placeholders = { 44 | 'probs':_probs, 'confs':_confs, 'coord':_coord, 'proid':_proid, 45 | 'areas':_areas, 'upleft':_upleft, 'botright':_botright 46 | } 47 | 48 | # Extract the coordinate prediction from net.out 49 | coords = net_out[:, SS * (C + B):] 50 | coords = tf.reshape(coords, [-1, SS, B, 4]) 51 | wh = tf.pow(coords[:,:,:,2:4], 2) * S # unit: grid cell 52 | area_pred = wh[:,:,:,0] * wh[:,:,:,1] # unit: grid cell^2 53 | centers = coords[:,:,:,0:2] # [batch, SS, B, 2] 54 | floor = centers - (wh * .5) # [batch, SS, B, 2] 55 | ceil = centers + (wh * .5) # [batch, SS, B, 2] 56 | 57 | # calculate the intersection areas 58 | intersect_upleft = tf.maximum(floor, _upleft) 59 | intersect_botright = tf.minimum(ceil , _botright) 60 | intersect_wh = intersect_botright - intersect_upleft 61 | intersect_wh = tf.maximum(intersect_wh, 0.0) 62 | intersect = tf.multiply(intersect_wh[:,:,:,0], intersect_wh[:,:,:,1]) 63 | 64 | # calculate the best IOU, set 0.0 confidence for worse boxes 65 | iou = tf.truediv(intersect, _areas + area_pred - intersect) 66 | best_box = tf.equal(iou, tf.reduce_max(iou, [2], True)) 67 | best_box = tf.to_float(best_box) 68 | confs = tf.multiply(best_box, _confs) 69 | 70 | # take care of the weight terms 71 | conid = snoob * (1. - confs) + sconf * confs 72 | weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3) 73 | cooid = scoor * weight_coo 74 | proid = sprob * _proid 75 | 76 | # flatten 'em all 77 | probs = slim.flatten(_probs) 78 | proid = slim.flatten(proid) 79 | confs = slim.flatten(confs) 80 | conid = slim.flatten(conid) 81 | coord = slim.flatten(_coord) 82 | cooid = slim.flatten(cooid) 83 | 84 | self.fetch += [probs, confs, conid, cooid, proid] 85 | true = tf.concat([probs, confs, coord], 1) 86 | wght = tf.concat([proid, conid, cooid], 1) 87 | print('Building {} loss'.format(m['model'])) 88 | loss = tf.pow(net_out - true, 2) 89 | loss = tf.multiply(loss, wght) 90 | loss = tf.reduce_sum(loss, 1) 91 | self.loss = .5 * tf.reduce_mean(loss) 92 | tf.summary.scalar('{} loss'.format(m['model']), self.loss) 93 | -------------------------------------------------------------------------------- /darkflow/net/yolov2/__init__.py: -------------------------------------------------------------------------------- 1 | from . import train 2 | from . import predict 3 | from . import data 4 | from ..yolo import misc 5 | import numpy as np 6 | -------------------------------------------------------------------------------- /darkflow/net/yolov2/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/yolov2/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/yolov2/__pycache__/data.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/yolov2/__pycache__/data.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/yolov2/__pycache__/predict.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/yolov2/__pycache__/predict.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/yolov2/__pycache__/train.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/net/yolov2/__pycache__/train.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/net/yolov2/data.py: -------------------------------------------------------------------------------- 1 | from ...utils.pascal_voc_clean_xml import pascal_voc_clean_xml 2 | from numpy.random import permutation as perm 3 | from ..yolo.predict import preprocess 4 | from ..yolo.data import shuffle 5 | from copy import deepcopy 6 | import pickle 7 | import numpy as np 8 | import os 9 | 10 | def _batch(self, chunk): 11 | """ 12 | Takes a chunk of parsed annotations 13 | returns value for placeholders of net's 14 | input & loss layer correspond to this chunk 15 | """ 16 | meta = self.meta 17 | labels = meta['labels'] 18 | 19 | H, W, _ = meta['out_size'] 20 | C, B = meta['classes'], meta['num'] 21 | anchors = meta['anchors'] 22 | 23 | # preprocess 24 | jpg = chunk[0]; w, h, allobj_ = chunk[1] 25 | allobj = deepcopy(allobj_) 26 | path = os.path.join(self.FLAGS.dataset, jpg) 27 | img = self.preprocess(path, allobj) 28 | 29 | # Calculate regression target 30 | cellx = 1. * w / W 31 | celly = 1. * h / H 32 | for obj in allobj: 33 | centerx = .5*(obj[1]+obj[3]) #xmin, xmax 34 | centery = .5*(obj[2]+obj[4]) #ymin, ymax 35 | cx = centerx / cellx 36 | cy = centery / celly 37 | if cx >= W or cy >= H: return None, None 38 | obj[3] = float(obj[3]-obj[1]) / w 39 | obj[4] = float(obj[4]-obj[2]) / h 40 | obj[3] = np.sqrt(obj[3]) 41 | obj[4] = np.sqrt(obj[4]) 42 | obj[1] = cx - np.floor(cx) # centerx 43 | obj[2] = cy - np.floor(cy) # centery 44 | obj += [int(np.floor(cy) * W + np.floor(cx))] 45 | 46 | # show(im, allobj, S, w, h, cellx, celly) # unit test 47 | 48 | # Calculate placeholders' values 49 | probs = np.zeros([H*W,B,C]) 50 | confs = np.zeros([H*W,B]) 51 | coord = np.zeros([H*W,B,4]) 52 | proid = np.zeros([H*W,B,C]) 53 | prear = np.zeros([H*W,4]) 54 | for obj in allobj: 55 | probs[obj[5], :, :] = [[0.]*C] * B 56 | probs[obj[5], :, labels.index(obj[0])] = 1. 57 | proid[obj[5], :, :] = [[1.]*C] * B 58 | coord[obj[5], :, :] = [obj[1:5]] * B 59 | prear[obj[5],0] = obj[1] - obj[3]**2 * .5 * W # xleft 60 | prear[obj[5],1] = obj[2] - obj[4]**2 * .5 * H # yup 61 | prear[obj[5],2] = obj[1] + obj[3]**2 * .5 * W # xright 62 | prear[obj[5],3] = obj[2] + obj[4]**2 * .5 * H # ybot 63 | confs[obj[5], :] = [1.] * B 64 | 65 | # Finalise the placeholders' values 66 | upleft = np.expand_dims(prear[:,0:2], 1) 67 | botright = np.expand_dims(prear[:,2:4], 1) 68 | wh = botright - upleft; 69 | area = wh[:,:,0] * wh[:,:,1] 70 | upleft = np.concatenate([upleft] * B, 1) 71 | botright = np.concatenate([botright] * B, 1) 72 | areas = np.concatenate([area] * B, 1) 73 | 74 | # value for placeholder at input layer 75 | inp_feed_val = img 76 | # value for placeholder at loss layer 77 | loss_feed_val = { 78 | 'probs': probs, 'confs': confs, 79 | 'coord': coord, 'proid': proid, 80 | 'areas': areas, 'upleft': upleft, 81 | 'botright': botright 82 | } 83 | 84 | return inp_feed_val, loss_feed_val 85 | 86 | -------------------------------------------------------------------------------- /darkflow/net/yolov2/predict.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import cv2 4 | import os 5 | import json 6 | #from scipy.special import expit 7 | #from utils.box import BoundBox, box_iou, prob_compare 8 | #from utils.box import prob_compare2, box_intersection 9 | from ...utils.box import BoundBox 10 | from ...cython_utils.cy_yolo2_findboxes import box_constructor 11 | 12 | def expit(x): 13 | return 1. / (1. + np.exp(-x)) 14 | 15 | def _softmax(x): 16 | e_x = np.exp(x - np.max(x)) 17 | out = e_x / e_x.sum() 18 | return out 19 | 20 | def findboxes(self, net_out): 21 | # meta 22 | meta = self.meta 23 | boxes = list() 24 | boxes=box_constructor(meta,net_out) 25 | return boxes 26 | 27 | def postprocess(self, net_out, im, save = True): 28 | """ 29 | Takes net output, draw net_out, save to disk 30 | """ 31 | boxes = self.findboxes(net_out) 32 | 33 | # meta 34 | meta = self.meta 35 | threshold = meta['thresh'] 36 | colors = meta['colors'] 37 | labels = meta['labels'] 38 | if type(im) is not np.ndarray: 39 | imgcv = cv2.imread(im) 40 | else: imgcv = im 41 | h, w, _ = imgcv.shape 42 | 43 | resultsForJSON = [] 44 | for b in boxes: 45 | boxResults = self.process_box(b, h, w, threshold) 46 | if boxResults is None: 47 | continue 48 | left, right, top, bot, mess, max_indx, confidence = boxResults 49 | thick = int((h + w) // 300) 50 | if self.FLAGS.json: 51 | resultsForJSON.append({"label": mess, "confidence": float('%.2f' % confidence), "topleft": {"x": left, "y": top}, "bottomright": {"x": right, "y": bot}}) 52 | continue 53 | 54 | cv2.rectangle(imgcv, 55 | (left, top), (right, bot), 56 | colors[max_indx], thick) 57 | cv2.putText(imgcv, mess, (left, top - 12), 58 | 0, 1e-3 * h, colors[max_indx],thick//3) 59 | 60 | if not save: return imgcv 61 | 62 | outfolder = os.path.join(self.FLAGS.imgdir, 'out') 63 | img_name = os.path.join(outfolder, os.path.basename(im)) 64 | if self.FLAGS.json: 65 | textJSON = json.dumps(resultsForJSON) 66 | textFile = os.path.splitext(img_name)[0] + ".json" 67 | with open(textFile, 'w') as f: 68 | f.write(textJSON) 69 | return 70 | 71 | cv2.imwrite(img_name, imgcv) 72 | -------------------------------------------------------------------------------- /darkflow/net/yolov2/train.py: -------------------------------------------------------------------------------- 1 | import tensorflow.contrib.slim as slim 2 | import pickle 3 | import tensorflow as tf 4 | from ..yolo.misc import show 5 | import numpy as np 6 | import os 7 | import math 8 | 9 | def expit_tensor(x): 10 | return 1. / (1. + tf.exp(-x)) 11 | 12 | def loss(self, net_out): 13 | """ 14 | Takes net.out and placeholders value 15 | returned in batch() func above, 16 | to build train_op and loss 17 | """ 18 | # meta 19 | m = self.meta 20 | sprob = float(m['class_scale']) 21 | sconf = float(m['object_scale']) 22 | snoob = float(m['noobject_scale']) 23 | scoor = float(m['coord_scale']) 24 | H, W, _ = m['out_size'] 25 | B, C = m['num'], m['classes'] 26 | HW = H * W # number of grid cells 27 | anchors = m['anchors'] 28 | 29 | print('{} loss hyper-parameters:'.format(m['model'])) 30 | print('\tH = {}'.format(H)) 31 | print('\tW = {}'.format(W)) 32 | print('\tbox = {}'.format(m['num'])) 33 | print('\tclasses = {}'.format(m['classes'])) 34 | print('\tscales = {}'.format([sprob, sconf, snoob, scoor])) 35 | 36 | size1 = [None, HW, B, C] 37 | size2 = [None, HW, B] 38 | 39 | # return the below placeholders 40 | _probs = tf.placeholder(tf.float32, size1) 41 | _confs = tf.placeholder(tf.float32, size2) 42 | _coord = tf.placeholder(tf.float32, size2 + [4]) 43 | # weights term for L2 loss 44 | _proid = tf.placeholder(tf.float32, size1) 45 | # material calculating IOU 46 | _areas = tf.placeholder(tf.float32, size2) 47 | _upleft = tf.placeholder(tf.float32, size2 + [2]) 48 | _botright = tf.placeholder(tf.float32, size2 + [2]) 49 | 50 | self.placeholders = { 51 | 'probs':_probs, 'confs':_confs, 'coord':_coord, 'proid':_proid, 52 | 'areas':_areas, 'upleft':_upleft, 'botright':_botright 53 | } 54 | 55 | # Extract the coordinate prediction from net.out 56 | net_out_reshape = tf.reshape(net_out, [-1, H, W, B, (4 + 1 + C)]) 57 | coords = net_out_reshape[:, :, :, :, :4] 58 | coords = tf.reshape(coords, [-1, H*W, B, 4]) 59 | adjusted_coords_xy = expit_tensor(coords[:,:,:,0:2]) 60 | adjusted_coords_wh = tf.sqrt(tf.exp(coords[:,:,:,2:4]) * np.reshape(anchors, [1, 1, B, 2]) / np.reshape([W, H], [1, 1, 1, 2])) 61 | coords = tf.concat([adjusted_coords_xy, adjusted_coords_wh], 3) 62 | 63 | adjusted_c = expit_tensor(net_out_reshape[:, :, :, :, 4]) 64 | adjusted_c = tf.reshape(adjusted_c, [-1, H*W, B, 1]) 65 | 66 | adjusted_prob = tf.nn.softmax(net_out_reshape[:, :, :, :, 5:]) 67 | adjusted_prob = tf.reshape(adjusted_prob, [-1, H*W, B, C]) 68 | 69 | adjusted_net_out = tf.concat([adjusted_coords_xy, adjusted_coords_wh, adjusted_c, adjusted_prob], 3) 70 | 71 | wh = tf.pow(coords[:,:,:,2:4], 2) * np.reshape([W, H], [1, 1, 1, 2]) 72 | area_pred = wh[:,:,:,0] * wh[:,:,:,1] 73 | centers = coords[:,:,:,0:2] 74 | floor = centers - (wh * .5) 75 | ceil = centers + (wh * .5) 76 | 77 | # calculate the intersection areas 78 | intersect_upleft = tf.maximum(floor, _upleft) 79 | intersect_botright = tf.minimum(ceil , _botright) 80 | intersect_wh = intersect_botright - intersect_upleft 81 | intersect_wh = tf.maximum(intersect_wh, 0.0) 82 | intersect = tf.multiply(intersect_wh[:,:,:,0], intersect_wh[:,:,:,1]) 83 | 84 | # calculate the best IOU, set 0.0 confidence for worse boxes 85 | iou = tf.truediv(intersect, _areas + area_pred - intersect) 86 | best_box = tf.equal(iou, tf.reduce_max(iou, [2], True)) 87 | best_box = tf.to_float(best_box) 88 | confs = tf.multiply(best_box, _confs) 89 | 90 | # take care of the weight terms 91 | conid = snoob * (1. - confs) + sconf * confs 92 | weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3) 93 | cooid = scoor * weight_coo 94 | weight_pro = tf.concat(C * [tf.expand_dims(confs, -1)], 3) 95 | proid = sprob * weight_pro 96 | 97 | self.fetch += [_probs, confs, conid, cooid, proid] 98 | true = tf.concat([_coord, tf.expand_dims(confs, 3), _probs ], 3) 99 | wght = tf.concat([cooid, tf.expand_dims(conid, 3), proid ], 3) 100 | 101 | print('Building {} loss'.format(m['model'])) 102 | loss = tf.pow(adjusted_net_out - true, 2) 103 | loss = tf.multiply(loss, wght) 104 | loss = tf.reshape(loss, [-1, H*W*B*(4 + 1 + C)]) 105 | loss = tf.reduce_sum(loss, 1) 106 | self.loss = .5 * tf.reduce_mean(loss) 107 | tf.summary.scalar('{} loss'.format(m['model']), self.loss) -------------------------------------------------------------------------------- /darkflow/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/utils/__pycache__/box.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/utils/__pycache__/box.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/utils/__pycache__/im_transform.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/utils/__pycache__/im_transform.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/utils/__pycache__/loader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/utils/__pycache__/loader.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/utils/__pycache__/pascal_voc_clean_xml.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/utils/__pycache__/pascal_voc_clean_xml.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/utils/__pycache__/process.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/darkflow/utils/__pycache__/process.cpython-36.pyc -------------------------------------------------------------------------------- /darkflow/utils/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class BoundBox: 4 | def __init__(self, classes): 5 | self.x, self.y = float(), float() 6 | self.w, self.h = float(), float() 7 | self.c = float() 8 | self.class_num = classes 9 | self.probs = np.zeros((classes,)) 10 | 11 | def overlap(x1,w1,x2,w2): 12 | l1 = x1 - w1 / 2.; 13 | l2 = x2 - w2 / 2.; 14 | left = max(l1, l2) 15 | r1 = x1 + w1 / 2.; 16 | r2 = x2 + w2 / 2.; 17 | right = min(r1, r2) 18 | return right - left; 19 | 20 | def box_intersection(a, b): 21 | w = overlap(a.x, a.w, b.x, b.w); 22 | h = overlap(a.y, a.h, b.y, b.h); 23 | if w < 0 or h < 0: return 0; 24 | area = w * h; 25 | return area; 26 | 27 | def box_union(a, b): 28 | i = box_intersection(a, b); 29 | u = a.w * a.h + b.w * b.h - i; 30 | return u; 31 | 32 | def box_iou(a, b): 33 | return box_intersection(a, b) / box_union(a, b); 34 | 35 | def prob_compare(box): 36 | return box.probs[box.class_num] 37 | 38 | def prob_compare2(boxa, boxb): 39 | if (boxa.pi < boxb.pi): 40 | return 1 41 | elif(boxa.pi == boxb.pi): 42 | return 0 43 | else: 44 | return -1 -------------------------------------------------------------------------------- /darkflow/utils/im_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | def imcv2_recolor(im, a = .1): 5 | t = [np.random.uniform()] 6 | t += [np.random.uniform()] 7 | t += [np.random.uniform()] 8 | t = np.array(t) * 2. - 1. 9 | 10 | # random amplify each channel 11 | im = im * (1 + t * a) 12 | mx = 255. * (1 + a) 13 | up = np.random.uniform() * 2 - 1 14 | # im = np.power(im/mx, 1. + up * .5) 15 | im = cv2.pow(im/mx, 1. + up * .5) 16 | return np.array(im * 255., np.uint8) 17 | 18 | def imcv2_affine_trans(im): 19 | # Scale and translate 20 | h, w, c = im.shape 21 | scale = np.random.uniform() / 10. + 1. 22 | max_offx = (scale-1.) * w 23 | max_offy = (scale-1.) * h 24 | offx = int(np.random.uniform() * max_offx) 25 | offy = int(np.random.uniform() * max_offy) 26 | 27 | im = cv2.resize(im, (0,0), fx = scale, fy = scale) 28 | im = im[offy : (offy + h), offx : (offx + w)] 29 | flip = np.random.binomial(1, .5) 30 | if flip: im = cv2.flip(im, 1) 31 | return im, [w, h, c], [scale, [offx, offy], flip] 32 | -------------------------------------------------------------------------------- /darkflow/utils/loader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | from .. import dark 4 | import numpy as np 5 | from os.path import basename 6 | 7 | class loader(object): 8 | """ 9 | interface to work with both .weights and .ckpt files 10 | in loading / recollecting / resolving mode 11 | """ 12 | VAR_LAYER = ['convolutional', 'connected', 'local', 13 | 'select', 'conv-select', 14 | 'extract', 'conv-extract'] 15 | 16 | def __init__(self, *args): 17 | self.src_key = list() 18 | self.vals = list() 19 | self.load(*args) 20 | 21 | def __call__(self, key): 22 | for idx in range(len(key)): 23 | val = self.find(key, idx) 24 | if val is not None: return val 25 | return None 26 | 27 | def find(self, key, idx): 28 | up_to = min(len(self.src_key), 4) 29 | for i in range(up_to): 30 | key_b = self.src_key[i] 31 | if key_b[idx:] == key[idx:]: 32 | return self.yields(i) 33 | return None 34 | 35 | def yields(self, idx): 36 | del self.src_key[idx] 37 | temp = self.vals[idx] 38 | del self.vals[idx] 39 | return temp 40 | 41 | class weights_loader(loader): 42 | """one who understands .weights files""" 43 | 44 | _W_ORDER = dict({ # order of param flattened into .weights file 45 | 'convolutional': [ 46 | 'biases','gamma','moving_mean','moving_variance','kernel' 47 | ], 48 | 'connected': ['biases', 'weights'], 49 | 'local': ['biases', 'kernels'] 50 | }) 51 | 52 | def load(self, path, src_layers): 53 | self.src_layers = src_layers 54 | walker = weights_walker(path) 55 | 56 | for i, layer in enumerate(src_layers): 57 | if layer.type not in self.VAR_LAYER: continue 58 | self.src_key.append([layer]) 59 | 60 | if walker.eof: new = None 61 | else: 62 | args = layer.signature 63 | new = dark.darknet.create_darkop(*args) 64 | self.vals.append(new) 65 | 66 | if new is None: continue 67 | order = self._W_ORDER[new.type] 68 | for par in order: 69 | if par not in new.wshape: continue 70 | val = walker.walk(new.wsize[par]) 71 | new.w[par] = val 72 | new.finalize(walker.transpose) 73 | 74 | if walker.path is not None: 75 | assert walker.offset == walker.size, \ 76 | 'expect {} bytes, found {}'.format( 77 | walker.offset, walker.size) 78 | print('Successfully identified {} bytes'.format( 79 | walker.offset)) 80 | 81 | class checkpoint_loader(loader): 82 | """ 83 | one who understands .ckpt files, very much 84 | """ 85 | def load(self, ckpt, ignore): 86 | meta = ckpt + '.meta' 87 | with tf.Graph().as_default() as graph: 88 | with tf.Session().as_default() as sess: 89 | saver = tf.train.import_meta_graph(meta) 90 | saver.restore(sess, ckpt) 91 | for var in tf.global_variables(): 92 | name = var.name.split(':')[0] 93 | packet = [name, var.get_shape().as_list()] 94 | self.src_key += [packet] 95 | self.vals += [var.eval(sess)] 96 | 97 | def create_loader(path, cfg = None): 98 | if path is None: 99 | load_type = weights_loader 100 | elif '.weights' in path: 101 | load_type = weights_loader 102 | else: 103 | load_type = checkpoint_loader 104 | 105 | return load_type(path, cfg) 106 | 107 | class weights_walker(object): 108 | """incremental reader of float32 binary files""" 109 | def __init__(self, path): 110 | self.eof = False # end of file 111 | self.path = path # current pos 112 | if path is None: 113 | self.eof = True 114 | return 115 | else: 116 | self.size = os.path.getsize(path)# save the path 117 | major, minor, revision, seen = np.memmap(path, 118 | shape = (), mode = 'r', offset = 0, 119 | dtype = '({})i4,'.format(4)) 120 | self.transpose = major > 1000 or minor > 1000 121 | self.offset = 16 122 | 123 | def walk(self, size): 124 | if self.eof: return None 125 | end_point = self.offset + 4 * size 126 | assert end_point <= self.size, \ 127 | 'Over-read {}'.format(self.path) 128 | 129 | float32_1D_array = np.memmap( 130 | self.path, shape = (), mode = 'r', 131 | offset = self.offset, 132 | dtype='({})float32,'.format(size) 133 | ) 134 | 135 | self.offset = end_point 136 | if end_point == self.size: 137 | self.eof = True 138 | return float32_1D_array 139 | 140 | def model_name(file_path): 141 | file_name = basename(file_path) 142 | ext = str() 143 | if '.' in file_name: # exclude extension 144 | file_name = file_name.split('.') 145 | ext = file_name[-1] 146 | file_name = '.'.join(file_name[:-1]) 147 | if ext == str() or ext == 'meta': # ckpt file 148 | file_name = file_name.split('-') 149 | num = int(file_name[-1]) 150 | return '-'.join(file_name[:-1]) 151 | if ext == 'weights': 152 | return file_name -------------------------------------------------------------------------------- /darkflow/utils/pascal_voc_clean_xml.py: -------------------------------------------------------------------------------- 1 | """ 2 | parse PASCAL VOC xml annotations 3 | """ 4 | 5 | import os 6 | import sys 7 | import xml.etree.ElementTree as ET 8 | import glob 9 | 10 | 11 | def _pp(l): # pretty printing 12 | for i in l: print('{}: {}'.format(i,l[i])) 13 | 14 | def pascal_voc_clean_xml(ANN, pick, exclusive = False): 15 | print('Parsing for {} {}'.format( 16 | pick, 'exclusively' * int(exclusive))) 17 | 18 | dumps = list() 19 | cur_dir = os.getcwd() 20 | os.chdir(ANN) 21 | annotations = os.listdir('.') 22 | annotations = glob.glob(str(annotations)+'*.xml') 23 | size = len(annotations) 24 | 25 | for i, file in enumerate(annotations): 26 | # progress bar 27 | sys.stdout.write('\r') 28 | percentage = 1. * (i+1) / size 29 | progress = int(percentage * 20) 30 | bar_arg = [progress*'=', ' '*(19-progress), percentage*100] 31 | bar_arg += [file] 32 | sys.stdout.write('[{}>{}]{:.0f}% {}'.format(*bar_arg)) 33 | sys.stdout.flush() 34 | 35 | # actual parsing 36 | in_file = open(file) 37 | tree=ET.parse(in_file) 38 | root = tree.getroot() 39 | jpg = str(root.find('filename').text) 40 | imsize = root.find('size') 41 | w = int(imsize.find('width').text) 42 | h = int(imsize.find('height').text) 43 | all = list() 44 | 45 | for obj in root.iter('object'): 46 | current = list() 47 | name = obj.find('name').text 48 | if name not in pick: 49 | continue 50 | 51 | xmlbox = obj.find('bndbox') 52 | xn = int(float(xmlbox.find('xmin').text)) 53 | xx = int(float(xmlbox.find('xmax').text)) 54 | yn = int(float(xmlbox.find('ymin').text)) 55 | yx = int(float(xmlbox.find('ymax').text)) 56 | current = [name,xn,yn,xx,yx] 57 | all += [current] 58 | 59 | add = [[jpg, [w, h, all]]] 60 | dumps += add 61 | in_file.close() 62 | 63 | # gather all stats 64 | stat = dict() 65 | for dump in dumps: 66 | all = dump[1][2] 67 | for current in all: 68 | if current[0] in pick: 69 | if current[0] in stat: 70 | stat[current[0]]+=1 71 | else: 72 | stat[current[0]] =1 73 | 74 | print('\nStatistics:') 75 | _pp(stat) 76 | print('Dataset size: {}'.format(len(dumps))) 77 | 78 | os.chdir(cur_dir) 79 | return dumps -------------------------------------------------------------------------------- /darkflow/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.0' 2 | """Current version of darkflow.""" -------------------------------------------------------------------------------- /data_augment.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | from tqdm import tqdm 4 | import random 5 | 6 | DIR = 'dataset/' 7 | COLORS = [[200, 0, 0], [0, 200, 0], [0, 0, 200], [200, 200, 0]] 8 | black = [0, 0, 0] 9 | for subdir in ['train', 'val']: 10 | img_path = os.path.join(DIR, subdir, 'images') 11 | mask_path = os.path.join(DIR, subdir, 'masks') 12 | 13 | for img in tqdm(os.listdir(img_path)): 14 | image = cv2.imread(os.path.join(img_path, img)) 15 | mask = cv2.imread(os.path.join(mask_path, os.path.splitext(img)[0] + '_mask.png')) 16 | (img_w, img_h) = image.shape[:-1] 17 | #print(image.shape[:-1]) 18 | (obj_w, obj_h) = (random.randint(70, 100), random.randint(70, 100)) 19 | (x, y) = (random.randint(img_w/2 - 40, img_w/2 + 40), random.randint(img_h/2 - 70, img_h/2 + 20)) 20 | (x2, y2) = (x+obj_w, y+obj_h) 21 | color = random.choice(COLORS) 22 | cv2.rectangle(image, (x, y), (x2, y2), color, -1) 23 | cv2.rectangle(mask, (x, y), (x2, y2), black, -1) 24 | 25 | cv2.imwrite(os.path.join(img_path, img), image) 26 | cv2.imwrite(os.path.join(mask_path, os.path.splitext(img)[0] + '_mask.png'), mask) 27 | -------------------------------------------------------------------------------- /flow: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import sys 4 | from darkflow.cli import cliHandler 5 | 6 | cliHandler(sys.argv) 7 | 8 | -------------------------------------------------------------------------------- /issues.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/issues.png -------------------------------------------------------------------------------- /labels.txt: -------------------------------------------------------------------------------- 1 | cup 2 | -------------------------------------------------------------------------------- /mrcnn/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /mrcnn/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/mrcnn/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /mrcnn/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/mrcnn/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /mrcnn/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/mrcnn/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /mrcnn/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/mrcnn/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /mrcnn/parallel_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Multi-GPU Support for Keras. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | Ideas and a small code snippets from these sources: 10 | https://github.com/fchollet/keras/issues/2436 11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012 12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/ 13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py 14 | """ 15 | 16 | import tensorflow as tf 17 | import keras.backend as K 18 | import keras.layers as KL 19 | import keras.models as KM 20 | 21 | 22 | class ParallelModel(KM.Model): 23 | """Subclasses the standard Keras Model and adds multi-GPU support. 24 | It works by creating a copy of the model on each GPU. Then it slices 25 | the inputs and sends a slice to each copy of the model, and then 26 | merges the outputs together and applies the loss on the combined 27 | outputs. 28 | """ 29 | 30 | def __init__(self, keras_model, gpu_count): 31 | """Class constructor. 32 | keras_model: The Keras model to parallelize 33 | gpu_count: Number of GPUs. Must be > 1 34 | """ 35 | self.inner_model = keras_model 36 | self.gpu_count = gpu_count 37 | merged_outputs = self.make_parallel() 38 | super(ParallelModel, self).__init__(inputs=self.inner_model.inputs, 39 | outputs=merged_outputs) 40 | 41 | def __getattribute__(self, attrname): 42 | """Redirect loading and saving methods to the inner model. That's where 43 | the weights are stored.""" 44 | if 'load' in attrname or 'save' in attrname: 45 | return getattr(self.inner_model, attrname) 46 | return super(ParallelModel, self).__getattribute__(attrname) 47 | 48 | def summary(self, *args, **kwargs): 49 | """Override summary() to display summaries of both, the wrapper 50 | and inner models.""" 51 | super(ParallelModel, self).summary(*args, **kwargs) 52 | self.inner_model.summary(*args, **kwargs) 53 | 54 | def make_parallel(self): 55 | """Creates a new wrapper model that consists of multiple replicas of 56 | the original model placed on different GPUs. 57 | """ 58 | # Slice inputs. Slice inputs on the CPU to avoid sending a copy 59 | # of the full inputs to all GPUs. Saves on bandwidth and memory. 60 | input_slices = {name: tf.split(x, self.gpu_count) 61 | for name, x in zip(self.inner_model.input_names, 62 | self.inner_model.inputs)} 63 | 64 | output_names = self.inner_model.output_names 65 | outputs_all = [] 66 | for i in range(len(self.inner_model.outputs)): 67 | outputs_all.append([]) 68 | 69 | # Run the model call() on each GPU to place the ops there 70 | for i in range(self.gpu_count): 71 | with tf.device('/gpu:%d' % i): 72 | with tf.name_scope('tower_%d' % i): 73 | # Run a slice of inputs through this replica 74 | zipped_inputs = zip(self.inner_model.input_names, 75 | self.inner_model.inputs) 76 | inputs = [ 77 | KL.Lambda(lambda s: input_slices[name][i], 78 | output_shape=lambda s: (None,) + s[1:])(tensor) 79 | for name, tensor in zipped_inputs] 80 | # Create the model replica and get the outputs 81 | outputs = self.inner_model(inputs) 82 | if not isinstance(outputs, list): 83 | outputs = [outputs] 84 | # Save the outputs for merging back together later 85 | for l, o in enumerate(outputs): 86 | outputs_all[l].append(o) 87 | 88 | # Merge outputs on CPU 89 | with tf.device('/cpu:0'): 90 | merged = [] 91 | for outputs, name in zip(outputs_all, output_names): 92 | # Concatenate or average outputs? 93 | # Outputs usually have a batch dimension and we concatenate 94 | # across it. If they don't, then the output is likely a loss 95 | # or a metric value that gets averaged across the batch. 96 | # Keras expects losses and metrics to be scalars. 97 | if K.int_shape(outputs[0]) == (): 98 | # Average 99 | m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs), name=name)(outputs) 100 | else: 101 | # Concatenate 102 | m = KL.Concatenate(axis=0, name=name)(outputs) 103 | merged.append(m) 104 | return merged 105 | 106 | 107 | if __name__ == "__main__": 108 | # Testing code below. It creates a simple model to train on MNIST and 109 | # tries to run it on 2 GPUs. It saves the graph so it can be viewed 110 | # in TensorBoard. Run it as: 111 | # 112 | # python3 parallel_model.py 113 | 114 | import os 115 | import numpy as np 116 | import keras.optimizers 117 | from keras.datasets import mnist 118 | from keras.preprocessing.image import ImageDataGenerator 119 | 120 | GPU_COUNT = 2 121 | 122 | # Root directory of the project 123 | ROOT_DIR = os.path.abspath("../") 124 | 125 | # Directory to save logs and trained model 126 | MODEL_DIR = os.path.join(ROOT_DIR, "logs") 127 | 128 | def build_model(x_train, num_classes): 129 | # Reset default graph. Keras leaves old ops in the graph, 130 | # which are ignored for execution but clutter graph 131 | # visualization in TensorBoard. 132 | tf.reset_default_graph() 133 | 134 | inputs = KL.Input(shape=x_train.shape[1:], name="input_image") 135 | x = KL.Conv2D(32, (3, 3), activation='relu', padding="same", 136 | name="conv1")(inputs) 137 | x = KL.Conv2D(64, (3, 3), activation='relu', padding="same", 138 | name="conv2")(x) 139 | x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x) 140 | x = KL.Flatten(name="flat1")(x) 141 | x = KL.Dense(128, activation='relu', name="dense1")(x) 142 | x = KL.Dense(num_classes, activation='softmax', name="dense2")(x) 143 | 144 | return KM.Model(inputs, x, "digit_classifier_model") 145 | 146 | # Load MNIST Data 147 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 148 | x_train = np.expand_dims(x_train, -1).astype('float32') / 255 149 | x_test = np.expand_dims(x_test, -1).astype('float32') / 255 150 | 151 | print('x_train shape:', x_train.shape) 152 | print('x_test shape:', x_test.shape) 153 | 154 | # Build data generator and model 155 | datagen = ImageDataGenerator() 156 | model = build_model(x_train, 10) 157 | 158 | # Add multi-GPU support. 159 | model = ParallelModel(model, GPU_COUNT) 160 | 161 | optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0) 162 | 163 | model.compile(loss='sparse_categorical_crossentropy', 164 | optimizer=optimizer, metrics=['accuracy']) 165 | 166 | model.summary() 167 | 168 | # Train 169 | model.fit_generator( 170 | datagen.flow(x_train, y_train, batch_size=64), 171 | steps_per_epoch=50, epochs=10, verbose=1, 172 | validation_data=(x_test, y_test), 173 | callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR, 174 | write_graph=True)] 175 | ) 176 | -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 11-59-20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 11-59-20.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 11-59-30.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 11-59-30.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 11-59-39.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 11-59-39.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 11-59-48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 11-59-48.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 12-00-06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 12-00-06.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 12-00-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 12-00-15.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 12-00-23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 12-00-23.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 12-00-33.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 12-00-33.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 12-00-42.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 12-00-42.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 12-00-51.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 12-00-51.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 12-01-00.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 12-01-00.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 12-01-08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 12-01-08.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 12-01-17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 12-01-17.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 12-01-26.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 12-01-26.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 12-01-36.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 12-01-36.png -------------------------------------------------------------------------------- /outputs/Screenshot from 2019-01-20 12-01-44.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/outputs/Screenshot from 2019-01-20 12-01-44.png -------------------------------------------------------------------------------- /pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jatinmandav/Occluded-Object-Detection/b3c2041c95b5043ce9b33f862bc8b703d91e0cd0/pipeline.png -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import datetime 5 | import numpy as np 6 | import skimage.draw 7 | 8 | from yolo_prediction import * 9 | 10 | # Root directory of the project 11 | ROOT_DIR = os.path.abspath("../") 12 | 13 | # Import Mask RCNN 14 | sys.path.append(ROOT_DIR) # To find local version of the library 15 | from mrcnn.config import Config 16 | from mrcnn import model as modellib, utils 17 | 18 | # Path to trained weights file 19 | COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 20 | 21 | # Directory to save logs and model checkpoints, if not provided 22 | # through the command line argument --logs 23 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 24 | 25 | ############################################################ 26 | # Configurations 27 | ############################################################ 28 | 29 | class CoffeeCupConfig(Config): 30 | """Configuration for training on the Coffee Cup dataset. 31 | Derives from the base Config class and overrides some values. 32 | """ 33 | # Give the configuration a recognizable name 34 | NAME = "coffee_cup" 35 | 36 | # We use a GPU with 12GB memory, which can fit two images. 37 | # Adjust down if you use a smaller GPU. 38 | IMAGES_PER_GPU = 2 39 | 40 | # Number of classes (including background) 41 | NUM_CLASSES = 1 + 1 # Background + coffee_cup 42 | 43 | # Number of training steps per epoch 44 | STEPS_PER_EPOCH = 100 45 | 46 | # Skip detections with < 90% confidence 47 | DETECTION_MIN_CONFIDENCE = 0.9 48 | 49 | def form_mask(image, mask): 50 | image, result = make_prediction(image) 51 | 52 | update_mask = np.zeros(mask.shape, dtype=np.bool) 53 | for r in result: 54 | update_mask[r[2]:r[4], r[1]:r[3]] = mask[r[2]:r[4], r[1]:r[3]] 55 | 56 | mask = update_mask[:] 57 | 58 | # Make a grayscale copy of the image. The grayscale copy still 59 | # has 3 RGB channels, though. 60 | color = np.empty(image.shape) 61 | color[:] = [200, 0, 0] 62 | if mask.shape[-1] > 0: 63 | # We're treating all instances as one, so collapse the mask into one layer 64 | mask = (np.sum(mask, -1, keepdims=True) >= 1) 65 | splash = np.where(mask, color, image).astype(np.uint8) 66 | else: 67 | splash = image.astype(np.uint8) 68 | 69 | return splash 70 | 71 | def detect_and_color_splash(model, image_path=None): 72 | import cv2 73 | # Run model detection and generate the color splash effect 74 | print("Running on {}".format(args.image)) 75 | # Read image 76 | image = cv2.imread(image_path) 77 | # Detect objects 78 | r = model.detect([image], verbose=1)[0] 79 | print(r['scores'], r['class_ids'], r['rois']) 80 | # Color splash 81 | splash = form_mask(image, r['masks']) 82 | font = cv2.FONT_HERSHEY_SIMPLEX 83 | color = [255, 0, 0] 84 | cv2.putText(splash, '{}:{}'.format('Mask Score', r['scores'][0]), 85 | (10, 20), font, 0.6, color, 2) 86 | 87 | cv2.imshow('result', splash) 88 | cv2.waitKey(0) 89 | 90 | 91 | ############################################################ 92 | # Training 93 | ############################################################ 94 | 95 | if __name__ == '__main__': 96 | import argparse 97 | 98 | # Parse command line arguments 99 | parser = argparse.ArgumentParser( 100 | description='Detect coffee_cups with MaskRCNN+YOLO Network') 101 | parser.add_argument('--weights', required=True, 102 | metavar="/path/to/weights.h5", 103 | help="Path to weights .h5 file or 'coco'") 104 | parser.add_argument('--image', required=True, 105 | metavar="path or URL to image", 106 | help='Image to make prediction') 107 | args = parser.parse_args() 108 | 109 | # Validate arguments 110 | 111 | print("Weights: ", args.weights) 112 | logs = 'logs/' 113 | 114 | # Configurations 115 | class InferenceConfig(CoffeeCupConfig): 116 | GPU_COUNT = 1 117 | IMAGES_PER_GPU = 1 118 | 119 | config = InferenceConfig() 120 | config.display() 121 | 122 | model = modellib.MaskRCNN(mode="inference", config=config, 123 | model_dir=logs) 124 | 125 | weights_path = args.weights 126 | 127 | # Load weights 128 | print("Loading weights ", weights_path) 129 | model.load_weights(weights_path, by_name=True) 130 | 131 | image = cv2.imread(args.image) 132 | cv2.imshow('Original Image', image) 133 | detect_and_color_splash(model, image_path=args.image) 134 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from setuptools.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | import os 6 | import imp 7 | 8 | VERSION = imp.load_source('version', os.path.join('.', 'darkflow', 'version.py')) 9 | VERSION = VERSION.__version__ 10 | 11 | if os.name =='nt' : 12 | ext_modules=[ 13 | Extension("darkflow.cython_utils.nms", 14 | sources=["darkflow/cython_utils/nms.pyx"], 15 | #libraries=["m"] # Unix-like specific 16 | include_dirs=[numpy.get_include()] 17 | ), 18 | Extension("darkflow.cython_utils.cy_yolo2_findboxes", 19 | sources=["darkflow/cython_utils/cy_yolo2_findboxes.pyx"], 20 | #libraries=["m"] # Unix-like specific 21 | include_dirs=[numpy.get_include()] 22 | ), 23 | Extension("darkflow.cython_utils.cy_yolo_findboxes", 24 | sources=["darkflow/cython_utils/cy_yolo_findboxes.pyx"], 25 | #libraries=["m"] # Unix-like specific 26 | include_dirs=[numpy.get_include()] 27 | ) 28 | ] 29 | 30 | elif os.name =='posix' : 31 | ext_modules=[ 32 | Extension("darkflow.cython_utils.nms", 33 | sources=["darkflow/cython_utils/nms.pyx"], 34 | libraries=["m"], # Unix-like specific 35 | include_dirs=[numpy.get_include()] 36 | ), 37 | Extension("darkflow.cython_utils.cy_yolo2_findboxes", 38 | sources=["darkflow/cython_utils/cy_yolo2_findboxes.pyx"], 39 | libraries=["m"], # Unix-like specific 40 | include_dirs=[numpy.get_include()] 41 | ), 42 | Extension("darkflow.cython_utils.cy_yolo_findboxes", 43 | sources=["darkflow/cython_utils/cy_yolo_findboxes.pyx"], 44 | libraries=["m"], # Unix-like specific 45 | include_dirs=[numpy.get_include()] 46 | ) 47 | ] 48 | 49 | else : 50 | ext_modules=[ 51 | Extension("darkflow.cython_utils.nms", 52 | sources=["darkflow/cython_utils/nms.pyx"], 53 | libraries=["m"] # Unix-like specific 54 | ), 55 | Extension("darkflow.cython_utils.cy_yolo2_findboxes", 56 | sources=["darkflow/cython_utils/cy_yolo2_findboxes.pyx"], 57 | libraries=["m"] # Unix-like specific 58 | ), 59 | Extension("darkflow.cython_utils.cy_yolo_findboxes", 60 | sources=["darkflow/cython_utils/cy_yolo_findboxes.pyx"], 61 | libraries=["m"] # Unix-like specific 62 | ) 63 | ] 64 | 65 | setup( 66 | version=VERSION, 67 | name='darkflow', 68 | description='Darkflow', 69 | license='GPLv3', 70 | url='https://github.com/thtrieu/darkflow', 71 | packages = find_packages(), 72 | scripts = ['flow'], 73 | ext_modules = cythonize(ext_modules) 74 | ) -------------------------------------------------------------------------------- /train_mask.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import datetime 5 | import numpy as np 6 | import skimage.draw 7 | 8 | # Root directory of the project 9 | ROOT_DIR = os.path.abspath("../../") 10 | 11 | # Import Mask RCNN 12 | sys.path.append(ROOT_DIR) # To find local version of the library 13 | from mrcnn.config import Config 14 | from mrcnn import model as modellib, utils 15 | 16 | # Path to trained weights file 17 | COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 18 | 19 | # Directory to save logs and model checkpoints, if not provided 20 | # through the command line argument --logs 21 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 22 | 23 | ############################################################ 24 | # Configurations 25 | ############################################################ 26 | 27 | 28 | class CoffeeCupConfig(Config): 29 | """Configuration for training on the toy dataset. 30 | Derives from the base Config class and overrides some values. 31 | """ 32 | # Give the configuration a recognizable name 33 | NAME = "coffee_cup" 34 | 35 | # We use a GPU with 12GB memory, which can fit two images. 36 | # Adjust down if you use a smaller GPU. 37 | IMAGES_PER_GPU = 2 38 | 39 | # Number of classes (including background) 40 | NUM_CLASSES = 1 + 1 # Background + coffee_cup 41 | 42 | # Number of training steps per epoch 43 | STEPS_PER_EPOCH = 100 44 | 45 | # Skip detections with < 90% confidence 46 | DETECTION_MIN_CONFIDENCE = 0.9 47 | 48 | 49 | ############################################################ 50 | # Dataset 51 | ############################################################ 52 | 53 | class CoffeeCupDataset(utils.Dataset): 54 | 55 | def load_cup(self, dataset_dir, subset): 56 | """Load a subset of the coffee_cup dataset. 57 | dataset_dir: Root directory of the dataset. 58 | subset: Subset to load: train or val 59 | """ 60 | # Add classes. We have only one class to add. 61 | self.add_class("coffee_cup", 1, "coffee_cup") 62 | 63 | # Train or validation dataset? 64 | assert subset in ["train", "val"] 65 | dataset_dir = os.path.join(dataset_dir, subset) 66 | 67 | p = os.path.join(dataset_dir, 'images/') 68 | for fname in os.listdir(p): 69 | image_path = os.path.join(p, fname) 70 | image = skimage.io.imread(image_path) 71 | height, width = image.shape[:2] 72 | 73 | self.add_image( 74 | "coffee_cup", 75 | image_id=os.path.splitext(fname)[0], # use file name as a unique image id 76 | path=image_path, 77 | mask_path=os.path.join(dataset_dir, 'masks/'), 78 | width=width, height=height) 79 | 80 | def load_mask(self, image_id): 81 | """Generate instance masks for an image. 82 | Returns: 83 | masks: A bool array of shape [height, width, instance count] with 84 | one mask per instance. 85 | class_ids: a 1D array of class IDs of the instance masks. 86 | """ 87 | import cv2 88 | image_info = self.image_info[image_id] 89 | if image_info["source"] != "coffee_cup": 90 | return super(self.__class__, self).load_mask(image_id) 91 | 92 | info = self.image_info[image_id] 93 | mask_path = os.path.join(info['mask_path'], info['id'] + '_mask.png') 94 | mask = cv2.imread(mask_path, 0) 95 | mask = np.reshape(mask, [mask.shape[0], mask.shape[1], 1]) 96 | 97 | return mask.astype(np.bool), np.array([mask.shape[-1]], dtype=np.int32) 98 | 99 | def image_reference(self, image_id): 100 | """Return the path of the image.""" 101 | info = self.image_info[image_id] 102 | if info["source"] == "coffee_cup": 103 | return info["path"] 104 | else: 105 | super(self.__class__, self).image_reference(image_id) 106 | 107 | 108 | def train(model): 109 | """Train the model.""" 110 | # Training dataset. 111 | dataset_train = CoffeeCupDataset() 112 | dataset_train.load_cup(args.dataset, "train") 113 | dataset_train.prepare() 114 | 115 | # Validation dataset 116 | dataset_val = CoffeeCupDataset() 117 | dataset_val.load_cup(args.dataset, "val") 118 | dataset_val.prepare() 119 | 120 | print("Training network heads") 121 | model.train(dataset_train, dataset_val, 122 | learning_rate=config.LEARNING_RATE, 123 | epochs=30, 124 | layers='heads') 125 | 126 | ############################################################ 127 | # Training 128 | ############################################################ 129 | 130 | if __name__ == '__main__': 131 | import argparse 132 | 133 | # Parse command line arguments 134 | parser = argparse.ArgumentParser( 135 | description='Train Mask R-CNN to detect Coffee Cup.') 136 | 137 | parser.add_argument('--dataset', required=True, 138 | metavar="/path/to/coffee_cup/dataset/", 139 | help='Directory of the coffee_cup dataset') 140 | parser.add_argument('--weights', required=True, 141 | metavar="/path/to/weights.h5", 142 | help="Path to weights .h5 file or 'coco'") 143 | parser.add_argument('--logs', required=False, 144 | default=DEFAULT_LOGS_DIR, 145 | metavar="/path/to/logs/", 146 | help='Logs and checkpoints directory (default=logs/)') 147 | args = parser.parse_args() 148 | 149 | print("Weights: ", args.weights) 150 | print("Dataset: ", args.dataset) 151 | print("Logs: ", args.logs) 152 | 153 | # Configurations 154 | config = CoffeeCupConfig() 155 | config.display() 156 | 157 | # Create model 158 | model = modellib.MaskRCNN(mode="training", config=config, 159 | model_dir=args.logs) 160 | 161 | # Select weights file to load 162 | if args.weights.lower() == "coco": 163 | weights_path = COCO_WEIGHTS_PATH 164 | # Download weights file 165 | if not os.path.exists(weights_path): 166 | utils.download_trained_weights(weights_path) 167 | elif args.weights.lower() == "last": 168 | # Find last trained weights 169 | weights_path = model.find_last() 170 | elif args.weights.lower() == "imagenet": 171 | # Start from ImageNet trained weights 172 | weights_path = model.get_imagenet_weights() 173 | else: 174 | weights_path = args.weights 175 | 176 | # Load weights 177 | print("Loading weights ", weights_path) 178 | if args.weights.lower() == "coco": 179 | # Exclude the last layers because they require a matching 180 | # number of classes 181 | model.load_weights(weights_path, by_name=True, exclude=[ 182 | "mrcnn_class_logits", "mrcnn_bbox_fc", 183 | "mrcnn_bbox", "mrcnn_mask"]) 184 | else: 185 | model.load_weights(weights_path, by_name=True) 186 | 187 | # Train or evaluate 188 | train(model) 189 | 190 | -------------------------------------------------------------------------------- /yolo_prediction.py: -------------------------------------------------------------------------------- 1 | from darkflow.net.build import TFNet 2 | import cv2 3 | import os 4 | import time 5 | import argparse 6 | 7 | options = {'model': 'cfg/yolov2-tiny-voc-1c.cfg', 'load':4000, 'threshold':0.2, 8 | 'labels':'labels.txt'} 9 | 10 | network = TFNet(options) 11 | 12 | def make_prediction(img): 13 | result = network.return_predict(img) 14 | 15 | result_ = [] 16 | 17 | for item in result: 18 | confidence = item['confidence'] 19 | topleft_x, topleft_y = item['topleft']['x'], item['topleft']['y'] 20 | bottomright_x, bottomright_y = item['bottomright']['x'], item['bottomright']['y'] 21 | label = item['label'] 22 | 23 | rect2 = [topleft_x, topleft_y, bottomright_x, bottomright_y] 24 | 25 | result_.append([label] + rect2 + [confidence]) 26 | font = cv2.FONT_HERSHEY_SIMPLEX 27 | if not result_ == []: 28 | for bbox in result_: 29 | label = bbox[0] 30 | color = [255, 0, 0] 31 | 32 | img = cv2.rectangle(img, (bbox[1], bbox[2]), 33 | (bbox[3], bbox[4]), color, 1) 34 | 35 | cv2.putText(img, '{}:{}'.format(label, bbox[-1]), 36 | (bbox[1], bbox[2]), font, 0.6, color, 2) 37 | 38 | return img, result_ 39 | --------------------------------------------------------------------------------