├── .gitignore
├── README.md
├── assets
    ├── .DS_Store
    ├── 2018-（J）- Deep Learning for Generic Object Detection： A Survey - 1809.02165.pdf
    ├── 2018-（J）- Recent Advances in Object Detection in the Age of Deep Convolutional Neural Networks - 1809.03193.pdf
    ├── 2019-（J）-CornerNet-Lite： Efficient Keypoint Based Object Detection - 1904.08900.pdf
    ├── README.md
    ├── algorithm
    │   ├── .DS_Store
    │   ├── 1811.04533.pdf
    │   ├── 1904.03797v1.pdf
    │   ├── RCNN 算法.xmind
    │   ├── RCNN_algorithm.png
    │   ├── SPP算法.xmind
    │   ├── fast_rcnn.png
    │   ├── faster_rcnn.png
    │   ├── faster_rcnn_v2.png
    │   ├── fpn.png
    │   ├── overfeat.png
    │   ├── rcnn.png
    │   └── sppnet.png
    ├── block_diagram
    │   ├── SSD-architecture.png
    │   ├── SSD-framework.png
    │   ├── cornetnet-lite.png
    │   ├── fcn.png
    │   ├── fcn_architecture.png
    │   ├── fcn_block.png
    │   ├── fcn_upooling.jpg
    │   ├── featurized-image-pyramid.png
    │   ├── fpn.png
    │   ├── fpn_rpn.jpeg
    │   ├── lenet_alexnet.png
    │   ├── mobilenetv1.png
    │   ├── mobilenetv2.png
    │   ├── object_detection_block_diagram.ep
    │   ├── object_detection_block_diagram.pptx
    │   ├── resnet_architecture.png
    │   ├── resnet_block.png
    │   ├── retina-net.png
    │   ├── shufflenet.png
    │   ├── vgg16.png
    │   ├── vgg19.png
    │   ├── yolo-network-architecture.png
    │   └── yolo-responsible-predictor.png
    └── code_diagram
    │   ├── alexnet_revised.png
    │   ├── alexnet_revised_v1.png
    │   ├── lenet_revised.png
    │   └── vgg16_tl.png
├── dataset
    └── ChineseFoodDataset
    │   ├── .DS_Store
    │   └── chinese_food_spider.py
├── image-retrieval
    ├── .DS_Store
    ├── 1998-（J）-Example-Based Learning for View-Based Human Face Detection.pdf
    ├── 2010-（J）-Object Detection with Discriminatively Trained Part Based Models.pdf
    └── paper
    │   ├── .DS_Store
    │   └── 2015-（J）-CVPR- Deep Learning of Binary Hash Codes for Fast Image Retrieval.pdf
└── sample-code
    ├── .DS_Store
    ├── network
        ├── .DS_Store
        ├── .idea
        │   ├── misc.xml
        │   ├── modules.xml
        │   ├── network.iml
        │   ├── vcs.xml
        │   └── workspace.xml
        ├── alexnet_keras.py
        ├── cifar10_cnn.py
        ├── lenet_keras.py
        ├── resnet.py
        ├── resnet50.py
        ├── resnet_common.py
        ├── resnet_v2.py
        ├── resnext.py
        ├── vgg16.py
        ├── vgg16_keras.py
        ├── vgg19.py
        └── vgg19_keras_cifar100.py
    ├── nlp
        └── token_nlp.py
    └── object_detection
        ├── .DS_Store
        └── faster_rcnn
            ├── faster_rcnn_open_image_dataset.py
            └── faster_rcnn_train.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | dataset/.DS_Store
 3 | *.h5
 4 | */.idea
 5 | */.pytest_cache
 6 | */.vscode
 7 | *.pkl
 8 | *.tgz
 9 | sample-code/network/*.jpg
10 | .DS_Store
11 | .idea/*
12 | .vscode/*
13 | assets/block_diagram/.DS_Store
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 |   [图解物体检测 & 网络框架](https://github.com/taylorguo/Deep-Object-Detection/blob/master/assets/README.md)
  2 | 
  3 | Inspired by awesome object detection, deep object detection does a easy way for understanding in Chinese.
  4 | 
  5 | ## 目录
  6 | 
  7 | - [图解网络架构](#图解网络架构)
  8 |   - [LeNet_AlexNet](#lenet_alexnet)
  9 |   - [LeNet_AlexNet_Keras代码实现](#lenet_alexnet_keras代码实现)
 10 |   - [VGG16网络与代码实现](#vgg16网络与代码实现)
 11 |   - [VGG19网络与代码实现](#vgg19网络与代码实现)
 12 |   - [Resnet](#resnet)
 13 |   - [Inception-v4: 2016](#inception-v4-2016)
 14 |   - [SqueezeNet:2016](#squeezenet2016)
 15 |   - [DenseNet:2016](#densenet2016)
 16 |   - [Xception:2016](#xception2016)
 17 |   - [ResNeXt:2016](#resnext2016)
 18 |   - [ROR: 2016](#ror-2016)
 19 |   - [MobileNet-v1:2017](#mobilenet-v12017)
 20 |   - [ShuffleNet:2017](#shufflenet2017)
 21 |   - [SENet : 2017](#senet--2017)
 22 |   - [MobileNet-V2:2018](#mobilenet-v22018)
 23 |   - [ShuffleNet-V2: 2018](#shufflenet-v2-2018)
 24 |   - [MobileNet-V3: 2019](#mobilenet-v3-2019)
 25 |   - [EfficientNet: 2019](#efficientnet-2019)
 26 |   - [Transformer in Transformer: 2021](#transformer-in-transformer-2021)
 27 |   - [ViT-Image Recognition at Scale: 2021](#vit-image-recognition-at-scale-2021)
 28 |   - [Perceiver: 2021](#perceiver-2021)
 29 | - [图解Object_Detection框架](#图解object_detection框架)
 30 |   - [Multi-stage Object Detection](#multi-stage-object-detection)
 31 |     - [RCNN : 2014](#rcnn--2014)
 32 |     - [SPPnet : 2014](#sppnet--2014)
 33 |     - [FCN : 2015](#fcn--2015)
 34 |     - [Fast R-CNN : 2015](#fast-r-cnn--2015)
 35 |     - [Faster R-CNN : 2015](#faster-r-cnn--2015)
 36 |     - [FPN : 2016](#fpn--2016)
 37 |     - [Mask R-CNN : 2017](#mask-r-cnn--2017)
 38 |     - [Soft-NMS : 2017](#soft-nms--2017)
 39 |     - [Segmentation is all you need : 2019](#segmentation-is-all-you-need--2019)
 40 |   - [Single Stage Object Detection](#single-stage-object-detection)
 41 |     - [DenseBox : 2015](#densebox--2015)
 42 |     - [SSD : 2016](#ssd--2016)
 43 |     - [YoLov2 : 2016](#yolov2--2016)
 44 |     - [RetinaNet : 2017](#retinanet--2017)
 45 |     - [YoLov3 : 2018](#yolov3--2018)
 46 |     - [M2Det : 2019](#m2det--2019)
 47 |     - [CornerNet-Lite : 2019](#cornernet-lite--2019)
 48 | - [图解 Action Classification](#图解-action-classification)
 49 |   - [:lemon:  MLAD    :date:   2021.03.04v1  :blush:  University of Central Florida](#lemon--mlad----date---20210304v1--blush--university-of-central-florida)
 50 | - [数据集Object_Detection](#数据集object_detection)
 51 |   - [General Dataset](#general-dataset)
 52 |   - [Animal](#animal)
 53 |   - [Plant](#plant)
 54 |   - [Food](#food)
 55 |   - [Transportation](#transportation)
 56 |   - [Scene](#scene)
 57 |   - [Face](#face)
 58 | 
 59 | 
 60 | 
 61 | # 图解网络架构
 62 | 
 63 | ## LeNet_AlexNet
 64 | <img src="./assets/block_diagram/lenet_alexnet.png" width="600">
 65 | 
 66 | ## LeNet_AlexNet_Keras代码实现
 67 | 
 68 | [LeNet-Keras for mnist handwriting digital image classification](https://github.com/taylorguo/Deep-Object-Detection/blob/master/sample-code/network/lenet_keras.py)
 69 | 
 70 | LeNet-Keras restructure
 71 | 
 72 | <img src="./assets/code_diagram/lenet_revised.png" width="500">
 73 | Accuracy: 98.54%
 74 | 
 75 | 
 76 | ===================================
 77 | 
 78 | [AlexNet-Keras for oxflower17 image classification](https://github.com/taylorguo/Deep-Object-Detection/blob/master/sample-code/network/alexnet_keras.py)
 79 | 
 80 | AlexNet-Keras restructure: 修改后的网络 val_acc: ~80%, 过拟合
 81 | 
 82 | <img src="./assets/code_diagram/alexnet_revised_v1.png" width="400">
 83 | 
 84 | 
 85 | ===================================
 86 | ## VGG16网络与代码实现
 87 | 
 88 | <img src="./assets/block_diagram/vgg16.png" width="800">
 89 | 
 90 | [VGG16 Keras 官方代码实现](https://github.com/taylorguo/Deep-Object-Detection/blob/master/sample-code/network/vgg16.py)
 91 | 
 92 | [VGG16-Keras oxflower17 物体分类](https://github.com/taylorguo/Deep-Object-Detection/blob/master/sample-code/network/vgg16_keras.py): 修改后的网络 val_acc: ~86.4%, 过拟合
 93 | 
 94 | <img src="./assets/code_diagram/vgg16_tl.png" width="400">
 95 | 
 96 | 
 97 | ## VGG19网络与代码实现
 98 | 
 99 | <img src="./assets/block_diagram/vgg19.png" width="800">
100 | 
101 | [VGG19 Keras 官方代码实现](https://github.com/taylorguo/Deep-Object-Detection/blob/master/sample-code/network/vgg19.py)
102 | 
103 | 
104 | 
105 | ## Resnet
106 | 
107 | - ResNet [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf) - CVPR
108 |   
109 |     - 残差块与直连层:
110 |   
111 |         <img src="./assets/block_diagram/resnet_block.png" width="400">
112 | 
113 |     - 残差网络架构:
114 |    
115 |         <img src="./assets/block_diagram/resnet_architecture.png" width="600">
116 | 
117 |     - 残差网络中 Shortcut Connection 参考文章
118 | 
119 |         - 1995 - [Neural networks for pattern recognition - Bishop]()
120 |         - 1996 - [Pattern recognition and neural networks - Ripley]()
121 |         - 1999 - [Modern applied statistics with s-plus - Venables & Ripley]()
122 | 
123 | 
124 | - [Highway Networks](https://arxiv.org/pdf/1505.00387v2.pdf), [中文翻译参考](https://www.cnblogs.com/2008nmj/p/9104744.html)
125 | 
126 | - [Convolutional Neural Networks at Constrained Time Cost](https://arxiv.org/pdf/1412.1710.pdf)
127 | 
128 |     - 实验表明: 加深网络, 会出现训练误差
129 | 
130 | ===================================
131 | ## Inception-v4: 2016
132 | 
133 | - [Inception-v4](https://arxiv.org/pdf/1602.07261v1.pdf), Inception-ResNet and the Impact of Residual Connections on Learning
134 | 
135 | 
136 | 
137 | ## SqueezeNet:2016
138 | 
139 | - [SqueezeNet](https://arxiv.org/pdf/1602.07360.pdf): AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size
140 | 
141 | 
142 | ## DenseNet:2016
143 | 
144 | - [DenseNet](https://arxiv.org/pdf/1608.06993.pdf) : Densely Connected Convolutional Networks
145 |   
146 | - [DenseNet- Github](https://github.com/liuzhuang13/DenseNet#results-on-imagenet-and-pretrained-models)
147 |     - Dense Block 层间链接采用concat, 而不是按元素add
148 | 
149 | 
150 | ## Xception:2016
151 | 
152 | - [Xception](https://arxiv.org/pdf/1610.02357.pdf): Deep Learning with Depthwise Separable Convolutions
153 | 
154 | 
155 | 
156 | 
157 | ## ResNeXt:2016
158 | 
159 | - [ResNeXt](https://arxiv.org/pdf/1611.05431.pdf): Aggregated Residual Transformations for Deep Neural Networks
160 | 
161 | 
162 | ## ROR: 2016
163 | 
164 | - [ROR](https://arxiv.org/pdf/1608.02908.pdf) - Residual Networks of Residual Networks: Multilevel Residual Networks
165 | 
166 | 
167 | ## MobileNet-v1:2017
168 | 
169 | - [MobileNets](https://arxiv.org/pdf/1704.04861.pdf) : Efficient Convolutional Neural Networks for Mobile Vision Applications
170 | 
171 | - 图解MobileNetv1:
172 | 
173 |     <img src="./assets/block_diagram/mobilenetv1.png" width="600">
174 | 
175 | - 参考资料:
176 |   - [tensorflow layers 卷积层 Python定义](https://github.com/tensorflow/tensorflow/blob/43dcd3dc3ee4b090832455acf43e8dd483a6117b/tensorflow/python/layers/convolutional.py#L222)
177 |   - [tensorflow base Layers class](https://github.com/tensorflow/tensorflow/blob/43dcd3dc3ee4b090832455acf43e8dd483a6117b/tensorflow/python/layers/base.py#L156)
178 |   - [CNN中卷积层的计算细节@zhihu](https://zhuanlan.zhihu.com/p/29119239)
179 |   - [CNN中卷积层的计算细节@csdn](https://blog.csdn.net/dcrmg/article/details/79652487)
180 |   - [【TensorFlow】理解tf.nn.conv2d方法](https://blog.csdn.net/zuolixiangfisher/article/details/80528989)
181 |   - [【tensorflow源码分析】 Conv2d卷积运算](https://www.cnblogs.com/yao62995/p/5773018.html)
182 |   - [**『TensorFlow』卷积层、池化层详解**](https://www.cnblogs.com/hellcat/p/7850048.html)
183 | 
184 | 
185 | ## ShuffleNet:2017
186 | 
187 | - [ShuffleNet](https://arxiv.org/pdf/1707.01083.pdf): An Extremely Efficient Convolutional Neural Network for Mobile Devices
188 |   
189 | - 图解ShuffleNet单元块:
190 | 
191 |     <img src="./assets/block_diagram/shufflenet.png" width="600">
192 | 
193 | - Code:
194 |   - [ShuffleNet Tensorflow](https://github.com/MG2033/ShuffleNet)
195 | 
196 | 
197 | ## SENet : 2017
198 | 
199 | - [SENet](https://arxiv.org/pdf/1709.01507.pdf) Squeeze-and-Excitation Networks
200 | 
201 | 
202 | ## MobileNet-V2:2018
203 | 
204 | - [MobileNetV2 ](https://arxiv.org/pdf/1801.04381.pdf): Inverted Residuals and Linear Bottlenecks
205 | 
206 | - 图解MobileNetv2:
207 | 
208 |     <img src="./assets/block_diagram/mobilenetv2.png" width="600">
209 | 
210 | ## ShuffleNet-V2: 2018
211 | 
212 | - [ShuffleNet V2](https://arxiv.org/pdf/1807.11164.pdf): Practical Guidelines for Efficient CNN Architecture Design
213 | 
214 | 
215 | ## MobileNet-V3: 2019
216 | 
217 | - [MobileNet V3](https://arxiv.org/pdf/1905.02244.pdf): Searching for MobileNetV3
218 | 
219 | ## EfficientNet: 2019
220 | 
221 | - [EfficientNet](https://arxiv.org/pdf/1905.11946.pdf): Rethinking Model Scaling for Convolutional Neural Networks
222 | 
223 | 
224 | ## Transformer in Transformer: 2021
225 | 
226 | - [Transformer in Transformer](https://arxiv.org/pdf/2103.00112v1.pdf)
227 | 
228 | 
229 | - [TnT PyTorch code](https://github.com/lucidrains/transformer-in-transformer)
230 |   
231 | ## ViT-Image Recognition at Scale: 2021
232 | 
233 | - [Vision Transformers](https://arxiv.org/pdf/2010.11929.pdf): An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale
234 | 
235 | - [ViT image classification - Keras code](https://github.com/keras-team/keras-io/blob/master/examples/vision/image_classification_with_vision_transformer.py)
236 | 
237 | 
238 | 
239 | ## Perceiver: 2021
240 | 
241 | - [Perceiver](https://arxiv.org/pdf/2103.03206.pdf) : General Perception with Iterative Attention
242 | 
243 | 
244 | 
245 | [ViT image classification - Keras code](https://github.com/keras-team/keras-io/blob/master/examples/vision/image_classification_with_vision_transformer.py)
246 | 
247 | 
248 | =============================
249 | 
250 | 
251 | 
252 | # [图解Object_Detection框架](https://github.com/taylorguo/Deep-Object-Detection/blob/master/assets/README.md)
253 | 
254 | 通用文档
255 | 
256 | - [cs231n : Spatial Localization and Detection](http://cs231n.stanford.edu/slides/2016/winter1516_lecture8.pdf)
257 | 
258 | 
259 | 2010
260 | 
261 | - [Object Detection with Discriminatively Trained Part Based Models](http://cs.brown.edu/people/pfelzens/papers/lsvm-pami.pdf)
262 | 
263 | 
264 | 2011
265 | 
266 | - [Ensemble of Exemplar-SVMs for Object Detection and Beyond](http://www.cs.cmu.edu/~efros/exemplarsvm-iccv11.pdf)
267 | 
268 | 
269 | 2013 
270 | 
271 | - [OverFeat: Integrated Recognition, Localization and Detection using Convolutional Networks](https://arxiv.org/pdf/1312.6229.pdf)
272 | 
273 |     - [Code](https://github.com/sermanet/OverFeat)
274 | 
275 |     - sliding window detector on an image pyramid
276 | 
277 |     - Overfeat 算法流程:
278 |   
279 |         <img src="./assets/algorithm/overfeat.png" width="600">
280 | 
281 | 2014
282 | 
283 | - [VGG: Very Deep Convolutional Networks for Large-Scale Image Recognition](http://www.arxiv.org/pdf/1409.1556.pdf)
284 | 
285 | - SPP: [Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition](https://arxiv.org/pdf/1406.4729.pdf)
286 | 
287 | 
288 | 
289 | 2017
290 | 
291 | - [On the Origin of Deep Learning](https://arxiv.org/pdf/1702.07800.pdf)
292 | 
293 | 2018
294 | 
295 | - [A guide to convolution arithmetic for deep learning](https://arxiv.org/pdf/1603.07285.pdf)
296 | 
297 | 
298 | - [Progressive Neural Architecture Search](https://arxiv.org/pdf/1712.00559.pdf)
299 | 
300 | 
301 | 
302 | 
303 | ===========================
304 | 
305 | ## Multi-stage Object Detection
306 | 
307 | 
308 | 
309 | 
310 | 
311 | 
312 | ###  RCNN : 2014
313 | 
314 |   - [Region-Based Convolutional Networks for Accurate Object Detection and Segmentation](http://medialab.sjtu.edu.cn/teaching/CV/hw/related_papers/3_detection.pdf)
315 | 
316 |   - v5 [Rich feature hierarchies for accurate object detection and semantic segmentation](https://arxiv.org/pdf/1311.2524v3.pdf) - CVPR
317 | 
318 |   - region proposal with scale-normalized before classifying with a ConvNet
319 | 
320 |     <img src="./assets/algorithm/rcnn.png" width="600">
321 | 
322 |   - [RCNN Keras Code](https://github.com/yhenon/keras-rcnn)
323 | 
324 |     <img src="./assets/algorithm/RCNN_algorithm.png" width="600">
325 |     
326 | 
327 | 
328 | ###  SPPnet : 2014
329 | 
330 | - SPPnet [Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition](https://arxiv.org/pdf/1406.4729.pdf) - ECCV
331 |     <img src="./assets/algorithm/sppnet.png" width="600">
332 | 
333 |     - [ROI Pooling ](http://wavelab.uwaterloo.ca/wp-content/uploads/2017/04/Lecture_6.pdf)
334 | 
335 | 
336 | 
337 | 
338 | 
339 | ### FCN : 2015
340 | 
341 | - FCN -[Fully convolutional networks for semantic segmentation](https://arxiv.org/pdf/1411.4038.pdf) - CVPR
342 |     - 全卷积网络将最后的三层全连接层, 用多通道同尺寸卷积核, 转换成卷积层; 使输入图像尺寸可以改动
343 | 
344 |         <img src="./assets/block_diagram/fcn_architecture.png" width="400">
345 | 
346 |     - 语义分割的网络结构:  
347 |       - 提取不同的池化层特征图, 对特征图进行上采样
348 |       - 上采样使用反卷积(转置卷积) : 导致反卷积后的图像不够细致
349 |       - 跳层结构, 特征图融合: 元素按像素相加(Keras里面 add 函数)
350 |       - 将特征图转换成原图像大小进行像素预测
351 | 
352 |         <img src="./assets/block_diagram/fcn_upooling.jpg" width="400">
353 | 
354 |         <img src="./assets/block_diagram/fcn.png" width="400">
355 | 
356 |     - 语义分割的问题定义:
357 |       - 像素值二分类
358 |       - 最后一层卷积为1x1x21(VOC 20类物体+1类背景)
359 | 
360 |         <img src="./assets/block_diagram/fcn_block.png" width="400">
361 | 
362 |         [参考资料: 全卷积网络 FCN 详解](https://blog.csdn.net/sinat_24143931/article/details/78696442)
363 | 
364 |         [参考资料: 10分钟看懂FCN: 语义分割深度模型先驱](http://www.sohu.com/a/270896638_633698)
365 | 
366 |     - code:
367 |       - [FCN in tensorflow](https://github.com/MarvinTeichmann/tensorflow-fcn)
368 |       - [FCN offical](https://github.com/shelhamer/fcn.berkeleyvision.org)
369 | 
370 | 
371 | ### Fast R-CNN : 2015
372 | 
373 | - [Fast R-CNN](https://arxiv.org/pdf/1504.08083.pdf) - ICCV
374 | 
375 |     <img src="./assets/algorithm/fast_rcnn.png" width="600">
376 | 
377 | ### Faster R-CNN : 2015
378 | 
379 | - [Faster R-CNN: To- wards real-time object detection with region proposal net- works](https://arxiv.org/pdf/1506.01497.pdf) - NIPS
380 | 
381 |     - RPN(Region Proposal Network) & Anchor Box
382 | 
383 |     <img src="./assets/algorithm/faster_rcnn_v2.png">
384 | 
385 |     - [Convolutional Feature Maps](http://kaiminghe.com/iccv15tutorial/iccv2015_tutorial_convolutional_feature_maps_kaiminghe.pdf)
386 | 
387 | 
388 | - 物体检索 [Faster R-CNN Features for Instance Search](https://arxiv.org/pdf/1604.08893.pdf) 
389 | 
390 | 
391 | 
392 | 
393 | 
394 | ### FPN : 2016
395 | 
396 | - [Feature Pyramid Networks for Object Detection](https://arxiv.org/pdf/1612.03144.pdf)
397 | 
398 |     - Idea from traditional CV feature pyramids, for compute and memory intensive in DL 
399 | 
400 |         想法源自传统计算机视觉中的特征金字塔, 深度学习中没用是因为计算密集,占内存
401 | 
402 |     - bottome-up in FeedForward: deepest layer of each stage should have the strongest features
403 |     
404 |         每阶段的最深的一层应该有最强的特征
405 | 
406 |     <img src="./assets/algorithm/fpn.png">
407 | 
408 |     - [参考文档: Understanding FPN](https://medium.com/@jonathan_hui/understanding-feature-pyramid-networks-for-object-detection-fpn-45b227b9106c)
409 |   
410 |     - Code:
411 |       - [FPN in Mask-RCNN Keras Code](https://github.com/matterport/Mask_RCNN/blob/master/mrcnn/model.py)
412 |       - [FPN in Tensorflow](https://github.com/yangxue0827/FPN_Tensorflow)
413 |       - [FPN in Caffe](https://github.com/unsky/FPN)
414 | 
415 | 
416 | 
417 | ### Mask R-CNN : 2017 
418 | 
419 | - [Mask R-CNN](https://arxiv.org/pdf/1703.06870.pdf)
420 |   - Code:
421 |     - [Keras matterport](https://github.com/matterport/Mask_RCNN)
422 |     - [Caffe2 Facebook](https://github.com/facebookresearch/Detectron)
423 |     - [PyTorch wannabeOG](https://github.com/wannabeOG/Mask-RCNN)
424 |     - [MXNet TuSimple](https://github.com/TuSimple/mx-maskrcnn)
425 |     - [Chainer DeNA](https://github.com/DeNA/Chainer_Mask_R-CNN)
426 | 
427 | 
428 | ### Soft-NMS : 2017
429 | 
430 | - [Soft-NMS](https://arxiv.org/pdf/1704.04503.pdf)
431 | 
432 | 
433 | 
434 | ### Segmentation is all you need : 2019
435 | 
436 | - [Segmentation is All You Need](https://arxiv.org/pdf/1904.13300v1.pdf)
437 | 
438 | 
439 | ============================
440 | ## Single Stage Object Detection
441 | 
442 | 
443 | ### DenseBox : 2015
444 | 
445 | - [DenseBox: Unifying Landmark Localization with End to End Object Detection](https://arxiv.org/pdf/1509.04874.pdf)
446 | 
447 | ### SSD : 2016
448 | 
449 | - [SSD: Single Shot MultiBox Detector](https://arxiv.org/pdf/1512.02325.pdf) - ECCV
450 | 
451 |     - 工作流程:
452 | 
453 |         - 特征提取网络为VGG-16, 边界框 和 分类 为特征图金字塔
454 |     
455 |     - 网络架构: 
456 | 
457 |         <img src="./assets/block_diagram/SSD-architecture.png" width="600">
458 | 
459 |     - 损失函数:
460 | 
461 |         - 位置Smooth L1 Loss 和 多分类Softmax 的和
462 | 
463 |              <img src="./assets/block_diagram/SSD-framework.png" width="600">
464 | 
465 | 
466 | ### YoLov2 : 2016
467 | 
468 | - YOLOv2 [YOLO9000: Better, Faster, Stronger](https://arxiv.org/pdf/1612.08242.pdf)
469 | 
470 |     - 工作流程：
471 | 
472 |         - 在图像分类任务上预训练 CNN网络
473 | 
474 |         - 图像拆分为单元格, 如果一个对象的中心在一个单元格内，该单元格就“负责”检测该对象
475 |             
476 |             每个单元预测（a）边界框位置，（b）置信度分数，（c）以边界框中的对象的存在为条件的对象类的概率
477 | 
478 |         - 修改预训练的CNN的最后一层以输出预测张量
479 |     
480 |     - 网络架构:
481 | 
482 |         <img src="./assets/block_diagram/yolo-network-architecture.png" width="600">
483 | 
484 |     - 损失函数:
485 | 
486 |         - 2部分组成: 边界框回归 和 分类条件概率 - 都采用平方差的和
487 | 
488 |              <img src="./assets/block_diagram/yolo-responsible-predictor.png"  width="400">
489 | 
490 | 
491 | ### RetinaNet : 2017
492 | 
493 | - RetinaNet:[Focal Loss for Dense Object Detection](https://arxiv.org/pdf/1708.02002.pdf)
494 | 
495 |     - 工作流程:
496 | 
497 |         - 焦点损失为明显的,容易错误分类的情况(具有噪声纹理或部分对象的背景)分配更多权重，并且降低简单情况权重(明显空白背景)
498 |         
499 |         - 特征提取网络为ResNet, 特征金字塔提高检测性能
500 | 
501 |             <img src="./assets/block_diagram/featurized-image-pyramid.png" width="600">
502 | 
503 |     - 网络架构:
504 | 
505 |         <img src="./assets/block_diagram/retina-net.png" width="600">
506 | 
507 |     
508 | 
509 | 
510 | ### YoLov3 : 2018
511 | 
512 | - [YOLOv3: An Incremental Improvement](https://arxiv.org/pdf/1804.02767.pdf)
513 | 
514 |     - bbox 预测使用尺寸聚类
515 | 
516 |         - 每个box有4个坐标
517 | 
518 |         - 训练时, 使用误差平方和损失函数 sum of squared error loss
519 | 
520 |         - bbox object分值, 用 logistic regression
521 | 
522 |         - 分类器 使用 logistic regression, 损失函数binary cross-entropy
523 | 
524 |     - 借鉴了 FPN 网络
525 | 
526 |     - 特征提取卷积网络
527 | 
528 |         - 3x3, 1x1 卷积层交替
529 | 
530 |         - 借鉴了 ResNet, 使用了直连, 分别从卷积层或直连层进行直连
531 | 
532 | 
533 | ### M2Det : 2019 
534 | 
535 | - [M2Det](https://arxiv.org/pdf/1811.04533.pdf)
536 | 
537 | 
538 | ### CornerNet-Lite : 2019
539 | 
540 | - [CornerNet-Lite](https://arxiv.org/pdf/1904.08900.pdf) : Efficient Keypoint Based Object Detection
541 |   - CornerNet-Saccade: 处理特征图的像素, 一个裁剪多个检测; 离线处理
542 |   - CornetNet-Squeeze: 骨干网络, 使用SqueezeNet, 沙漏架构; 实时处理
543 | 
544 | 
545 | 
546 | [参考资料: 目标检测算法总结](https://www.cnblogs.com/guoyaohua/p/8994246.html)
547 | 
548 | 
549 | 
550 | 
551 | =============================
552 | 
553 | 
554 | # 图解 Action Classification
555 | 
556 | ## :lemon:  [MLAD](https://arxiv.org/pdf/2103.03027.pdf)    :date:   2021.03.04v1  :blush:  University of Central Florida
557 | 
558 | - [Modeling Multi-Label Action Dependencies for Temporal Action Localization](https://arxiv.org/pdf/2103.03027.pdf)
559 | 
560 | Network
561 | 
562 | <img src="./assets/code_diagram/lenet_revised.png" width="500">
563 | 
564 | 
565 | 
566 | =============================
567 | 
568 | 
569 | 
570 | # 数据集Object_Detection
571 | 
572 | 不确定每个数据集都包含完整的物体检测数据标注。
573 | 
574 | ## General Dataset
575 | 
576 | - [数据集收集 Dataset Collection](http://www.escience.cn/people/lichang/Data.html)
577 | 
578 | - [数据集: 25种简介](https://www.easemob.com/news/1433)
579 | 
580 | - [CIFAR10](https://figshare.com/articles/dataset/CIFAR10-DVS_New/4724671/2)
581 | 
582 | - [ImageNet 最大的图像识别图像库](http://www.image-net.org/)
583 | 
584 |     - 14,197,122张图像
585 | 
586 | - [PASCAL Visual Object Classes Challenge 2008 (VOC2008)](http://host.robots.ox.ac.uk/pascal/VOC/voc2008/htmldoc/voc.html), [VOC-2012](http://pjreddie.com/projects/pascal-voc-dataset-mirror/)
587 | 
588 | 
589 | - [Open Images dataset(带标注)](https://github.com/openimages/dataset)
590 | 
591 | 
592 |     - 近900万个图像URL数据集, 数千个类的图像级标签边框并且进行了标注。
593 |     
594 |     - 数据集包含9,011,219张图像的训练集, 41,260张图像的验证集, 125,436张图像的测试集。
595 | 
596 | 
597 | - [Corel5K 图像集](https://github.com/watersink/Corel5K)
598 | 
599 |     - Corel5K图像集，共5000幅图片，包含50个语义主题，有公共汽车、恐龙、海滩等。
600 | 
601 | 
602 | 
603 | 
604 | 
605 | ## Animal
606 | 
607 | 
608 | [Stanford Dogs 🐶 Dataset : Over 20,000 images of 120 dog breeds](https://www.kaggle.com/jessicali9530/stanford-dogs-dataset)
609 | 
610 | 
611 | - Context
612 | 
613 |     The Stanford Dogs dataset contains images of 120 breeds of dogs from around the world. This dataset has been built using images and annotation from ImageNet for the task of fine-grained image categorization. It was originally collected for fine-grain image categorization, a challenging problem as certain dog breeds have near identical features or differ in colour and age.
614 | 
615 |     来源于imagenet, 用于图像细粒度分类
616 | 
617 | 
618 | - Content
619 | 
620 |     - Number of categories: 120
621 |     - Number of images: 20,580
622 |     - Annotations: Class labels, Bounding boxes
623 | 
624 | 
625 | [Honey Bee pollen : High resolution images of individual bees on the ramp](https://www.kaggle.com/ivanfel/honey-bee-pollen)
626 | 
627 | - Context
628 | 
629 |     This image dataset has been created from videos captured at the entrance of a bee colony in June 2017 at the Bee facility of the Gurabo Agricultural Experimental Station of the University of Puerto Rico.
630 |     
631 |     识别 蜜蜂 🐝 授粉 或者 未授粉
632 | 
633 | - Content
634 | 
635 |     - images/ contains images for pollen bearing and no pollen bearing honey bees.
636 | 
637 |         - The prefix of the images names define their class: e.g. NP1268-15r.jpg for non-pollen and P7797-103r.jpg for pollen bearing bees. 
638 |         - The numbers correspond to frame and item number respectively, you need to be careful that they are not numbered sequentially.
639 | 
640 | 
641 | 
642 |     - Read-skimage.ipynb Jupyter notebook for simple script to load the data and create the dataset using skimage library.
643 | 
644 | 
645 | 
646 | 
647 | ## Plant
648 | 
649 | [Flowers Recognition : This dataset contains labeled 4242 images of flowers.](https://www.kaggle.com/alxmamaev/flowers-recognition)
650 | 
651 | - Context
652 | 
653 |     This dataset contains 4242 images of flowers. The data collection is based on the data flicr, google images, yandex images. You can use this datastet to recognize plants from the photo.
654 | 
655 |     
656 | 
657 | - Content
658 | 
659 |     - five classes: chamomile, tulip, rose, sunflower, dandelion
660 |     - each class there are about 800 photos
661 |     - resolution: about 320x240 pixels
662 | 
663 | 
664 | [VGG - 17 Category Flower Dataset](http://www.robots.ox.ac.uk/~vgg/data/flowers/17/index.html)
665 | 
666 | - Context
667 | 
668 |     - 17 category flower dataset with 80 images for each class
669 |     - 80 images for each category
670 |     
671 | 
672 | - Content
673 | 
674 |     - The datasplits used in this paper are specified in datasplits.mat
675 | 
676 |     - There are 3 separate splits. The results in the paper are averaged over the 3 splits.
677 | 
678 |     - Each split has a training file (trn1,trn2,trn3), a validation file (val1, val2, val3) and a testfile (tst1, tst2 or tst3). 
679 | 
680 | 
681 | [VGG - 102 Category Flower Dataset](http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html)
682 | 
683 | - Context
684 | 
685 |     - 102 category dataset, consisting of 102 flower categories
686 |     - Each class consists of between 40 and 258 images
687 |     
688 | 
689 | - Content
690 | 
691 |     - The datasplits used in this paper are specified in setid.mat.
692 | 
693 |     - The results in the paper are produced on a 103 category database. - - The two categories labeled Petunia have since been merged since they are the same.
694 |     - There is a training file (trnid), a validation file (valid) and a testfile (tstid).
695 | 
696 | 
697 | 
698 | [Fruits 360 dataset : A dataset with 65429 images of 95 fruits](https://www.kaggle.com/moltean/fruits)
699 | 
700 | - Context
701 | 
702 |     The following fruits are included: Apples (different varieties: Golden, Red Yellow, Granny Smith, Red, Red Delicious), Apricot, Avocado, Avocado ripe, Banana (Yellow, Red, Lady Finger), Cactus fruit, Cantaloupe (2 varieties), Carambula, Cherry (different varieties, Rainier), Cherry Wax (Yellow, Red, Black), Chestnut, Clementine, Cocos, Dates, Granadilla, Grape (Blue, Pink, White (different varieties)), Grapefruit (Pink, White), Guava, Hazelnut, Huckleberry, Kiwi, Kaki, Kumsquats, Lemon (normal, Meyer), Lime, Lychee, Mandarine, Mango, Mangostan, Maracuja, Melon Piel de Sapo, Mulberry, Nectarine, Orange, Papaya, Passion fruit, Peach (different varieties), Pepino, Pear (different varieties, Abate, Kaiser, Monster, Williams), Physalis (normal, with Husk), Pineapple (normal, Mini), Pitahaya Red, Plums (different varieties), Pomegranate, Pomelo Sweetie, Quince, Rambutan, Raspberry, Redcurrant, Salak, Strawberry (normal, Wedge), Tamarillo, Tangelo, Tomato (different varieties, Maroon, Cherry Red), Walnut.
703 | 
704 |     
705 | - Content
706 | 
707 |     - Total number of images: 65429.
708 |         - Training set size: 48905 images (one fruit per image).
709 |         - Test set size: 16421 images (one fruit per image).
710 |         - Multi-fruits set size: 103 images (more than one fruit (or fruit class) per image)
711 |     - Number of classes: 95 (fruits).
712 |     - Image size: 100x100 pixels.
713 | 
714 | 
715 | - [GitHub download: Fruits-360 dataset](https://github.com/Horea94/Fruit-Images-Dataset)
716 | 
717 | 
718 | 
719 | [Plant Seedlings Classification : Determine the species of a seedling from an image](https://www.kaggle.com/c/plant-seedlings-classification)
720 | 
721 | - Context
722 | 
723 |     - a dataset containing images of approximately 960 unique plants belonging to 12 species at several growth stages
724 | 
725 | - Content
726 | 
727 |     - [A Public Image Database for Benchmark of Plant Seedling Classification Algorithms](https://arxiv.org/abs/1711.05458)
728 | 
729 | 
730 | [V2 Plant Seedlings Dataset : Images of crop and weed seedlings at different growth stages](https://www.kaggle.com/vbookshelf/v2-plant-seedlings-dataset)
731 | 
732 | 
733 | - Context
734 |     - The V1 version of this dataset was used in the Plant Seedling Classification playground competition here on Kaggle. This is the V2 version. Some samples in V1 contained multiple plants. The dataset’s creators have now removed those samples.
735 | 
736 | - Content
737 | 
738 |     - This dataset contains 5,539 images of crop and weed seedlings. 
739 |     - The images are grouped into 12 classes as shown in the above pictures. These classes represent common plant species in Danish agriculture. Each class contains rgb images that show plants at different growth stages. 
740 |     - The images are in various sizes and are in png format.
741 | 
742 | 
743 | 
744 | 
745 | 
746 | ## Food
747 | 
748 | [UEC Food-256 Japan Food](http://foodcam.mobi/dataset256.html)
749 | 
750 | - Context
751 | 
752 |     - The dataset "UEC FOOD 256" contains 256-kind food photos. Each food photo has a bounding box indicating the location of the food item in the photo. 
753 | 
754 |     - Most of the food categories in this dataset are popular foods in Japan and other countries. 
755 | 
756 | 
757 | - Content 
758 | 
759 |     - [1-256] : directory names correspond to food ID.
760 |     - [1-256]/*.jpg : food photo files (some photos are duplicated in two or more directories, since they includes two or more food items.)
761 |     - [1-256]/bb_info.txt: bounding box information for the photo files in each directory
762 | 
763 |     - category.txt : food list including the correspondences between food IDs and food names in English
764 |     - category_ja.txt : food list including the correspondences between food IDs and food names in Japanese
765 |     - multiple_food.txt: the list representing food photos including two or more food items
766 | 
767 | [FoodDD: Food Detection Dataset](http://www.site.uottawa.ca/~shervin/food/), [论文](http://www.site.uottawa.ca/~shervin/pubs/FoodRecognitionDataset-MadiMa.pdf)
768 | 
769 | [NutriNet: A Deep Learning Food and Drink Image Recognition System for Dietary Assessment](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5537777/)
770 | 
771 | [ChineseFoodNet: A large-scale Image Dataset for Chinese Food Recognition - 2017](https://arxiv.org/pdf/1705.02743.pdf)
772 | 
773 | [Yummly-28K - 2017](http://isia.ict.ac.cn/dataset/)
774 | 
775 |     - Content 
776 |     
777 |         - 27,638 recipes in total. 
778 |         - Each recipe contains one recipe image, the ingredients, the cuisine and the course information.     
779 |         - There are 16 kinds of cuisines (e.g,“American”,“Italian” and “Mexican”) 
780 |         - and 13 kinds of recipe courses (e.g, “Main Dishes”,“Desserts” and “Lunch and Snacks”).
781 | 
782 | [VireoFood-172 dataset](http://vireo.cs.cityu.edu.hk/vireofood172/),   [论文-2016](http://vireo.cs.cityu.edu.hk/jingjing/papers/chen2016deep.pdf)
783 | 
784 | [Dishes: a restaurant-oriented food dataset - 2015](http://isia.ict.ac.cn/dataset/Geolocation-food/)
785 | 
786 | 
787 | 
788 | 
789 | ## Transportation
790 | 
791 | 
792 | [Boat types recognition : About 1,500 pictures of boats classified in 9 categories](https://www.kaggle.com/clorichel/boat-types-recognition)
793 | 
794 | - Context
795 | 
796 |     This dataset is used on this blog post https://clorichel.com/blog/2018/11/10/machine-learning-and-object-detection/ where you'll train an image recognition model with TensorFlow to find about anything on pictures and videos.
797 | 
798 |     
799 | 
800 | - Content
801 | 
802 |     1,500 pictures of boats, of various sizes, but classified by those different types: buoy, cruise ship, ferry boat, freight boat, gondola, inflatable boat, kayak, paper boat, sailboat.
803 | 
804 | 
805 | 
806 | 
807 | 
808 | ## Scene
809 | 
810 | 
811 | [Intel Image Classification : Image Scene Classification of Multiclass](https://www.kaggle.com/puneet6060/intel-image-classification)
812 | 
813 | - Context
814 | 
815 |     image data of Natural Scenes around the world
816 | 
817 |     
818 | 
819 | - Content
820 | 
821 |     - This Data contains around 25k images of size 150x150 distributed under 6 categories. {'buildings' -> 0, 'forest' -> 1, 'glacier' -> 2, 'mountain' -> 3, 'sea' -> 4, 'street' -> 5 }
822 | 
823 |     - The Train, Test and Prediction data is separated in each zip files. There are around 14k images in Train, 3k in Test and 7k in Prediction. This data was initially published on https://datahack.analyticsvidhya.com by Intel to host a Image classification Challenge.
824 | 
825 | 
826 | 
827 | 
828 | 
829 | 
830 | ## Face 
831 | 
832 | [CelebFaces Attributes (CelebA) Dataset : Over 200K images of celebrities with 40 binary attribute annotations](https://www.kaggle.com/jessicali9530/celeba-dataset/version/2)
833 | 
834 | 


--------------------------------------------------------------------------------
/assets/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/.DS_Store


--------------------------------------------------------------------------------
/assets/2018-（J）- Deep Learning for Generic Object Detection： A Survey - 1809.02165.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/2018-（J）- Deep Learning for Generic Object Detection： A Survey - 1809.02165.pdf


--------------------------------------------------------------------------------
/assets/2018-（J）- Recent Advances in Object Detection in the Age of Deep Convolutional Neural Networks - 1809.03193.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/2018-（J）- Recent Advances in Object Detection in the Age of Deep Convolutional Neural Networks - 1809.03193.pdf


--------------------------------------------------------------------------------
/assets/2019-（J）-CornerNet-Lite： Efficient Keypoint Based Object Detection - 1904.08900.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/2019-（J）-CornerNet-Lite： Efficient Keypoint Based Object Detection - 1904.08900.pdf


--------------------------------------------------------------------------------
/assets/README.md:
--------------------------------------------------------------------------------
  1 | # 图解 Object Detection 框架
  2 | 
  3 | ## General Information
  4 | 
  5 | - [cs231n : Spatial Localization and Detection](http://cs231n.stanford.edu/slides/2016/winter1516_lecture8.pdf)
  6 | 
  7 | 
  8 | 
  9 | 
 10 | 2010
 11 | 
 12 | - [Object Detection with Discriminatively Trained Part Based Models](http://cs.brown.edu/people/pfelzens/papers/lsvm-pami.pdf)
 13 | 
 14 | 
 15 | 2011
 16 | 
 17 | - [Ensemble of Exemplar-SVMs for Object Detection and Beyond](http://www.cs.cmu.edu/~efros/exemplarsvm-iccv11.pdf)
 18 | 
 19 | 2012
 20 | 
 21 | - [AlexNet]()
 22 | 
 23 | 
 24 | 2013 
 25 | 
 26 | - [OverFeat: Integrated Recognition, Localization and Detection using Convolutional Networks](https://arxiv.org/pdf/1312.6229.pdf)
 27 | 
 28 |     - [Code](https://github.com/sermanet/OverFeat)
 29 | 
 30 |     - sliding window detector on an image pyramid
 31 | 
 32 | 
 33 | 
 34 | 2014
 35 | 
 36 | - [VGG]()
 37 | 
 38 | - SPP: [Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition](https://arxiv.org/pdf/1406.4729.pdf)
 39 | 
 40 | 
 41 | 2015
 42 | 
 43 | - [Highway Networks](https://arxiv.org/pdf/1505.00387v2.pdf), [中文翻译参考](https://www.cnblogs.com/2008nmj/p/9104744.html)
 44 | 
 45 | - [Convolutional Neural Networks at Constrained Time Cost](https://arxiv.org/pdf/1412.1710.pdf)
 46 | 
 47 |     - 实验表明: 加深网络, 会出现训练误差
 48 | 
 49 | - ResNet [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf) - CVPR
 50 | 
 51 |     - 残差网络中 Shortcut Connection 参考文章
 52 | 
 53 |         - 1995 - [Neural networks for pattern recognition - Bishop]()
 54 |         - 1996 - [Pattern recognition and neural networks - Ripley]()
 55 |         - 1999 - [Modern applied statistics with s-plus - Venables & Ripley]()
 56 | 
 57 | 2017
 58 | 
 59 | - [On the Origin of Deep Learning](https://arxiv.org/pdf/1702.07800.pdf)
 60 | 
 61 | 2018
 62 | 
 63 | - [A guide to convolution arithmetic for deep learning](https://arxiv.org/pdf/1603.07285.pdf)
 64 | 
 65 | 
 66 | - [Progressive Neural Architecture Search](https://arxiv.org/pdf/1712.00559.pdf)
 67 | 
 68 | 
 69 | 
 70 | 
 71 | ============================
 72 | ## Single Stage Object Detection
 73 | 
 74 | 
 75 | 2015
 76 | 
 77 | - [DenseBox: Unifying Landmark Localization with End to End Object Detection](https://arxiv.org/pdf/1509.04874.pdf)
 78 | 
 79 | 2016
 80 | 
 81 | - [SSD: Single Shot MultiBox Detector](https://arxiv.org/pdf/1512.02325.pdf) - ECCV
 82 | 
 83 |     - 工作流程:
 84 | 
 85 |         - 特征提取网络为VGG-16, 边界框 和 分类 为特征图金字塔
 86 |     
 87 |     - 网络架构: 
 88 | 
 89 |         <img src="./block_diagram/SSD-architecture.png" width="600">
 90 | 
 91 |     - 损失函数:
 92 | 
 93 |         - 位置Smooth L1 Loss 和 多分类Softmax 的和
 94 | 
 95 |              <img src="./block_diagram/SSD-framework.png" width="600">
 96 | 
 97 | 
 98 | - YOLOv2 [YOLO9000: Better, Faster, Stronger](https://arxiv.org/pdf/1612.08242.pdf)
 99 | 
100 |     - 工作流程：
101 | 
102 |         - 在图像分类任务上预训练 CNN网络
103 | 
104 |         - 图像拆分为单元格, 如果一个对象的中心在一个单元格内，该单元格就“负责”检测该对象
105 |             
106 |             每个单元预测（a）边界框位置，（b）置信度分数，（c）以边界框中的对象的存在为条件的对象类的概率
107 | 
108 |         - 修改预训练的CNN的最后一层以输出预测张量
109 |     
110 |     - 网络架构:
111 | 
112 |         <img src="./block_diagram/yolo-network-architecture.png" width="600">
113 | 
114 |     - 损失函数:
115 | 
116 |         - 2部分组成: 边界框回归 和 分类条件概率 - 都采用平方差的和
117 | 
118 |              <img src="./block_diagram/yolo-responsible-predictor.png"  width="400">
119 | 
120 | 
121 | 2017
122 | 
123 | - RetinaNet:[Focal Loss for Dense Object Detection](https://arxiv.org/pdf/1708.02002.pdf)
124 | 
125 |     - 工作流程:
126 | 
127 |         - 焦点损失为明显的,容易错误分类的情况(具有噪声纹理或部分对象的背景)分配更多权重，并且降低简单情况权重(明显空白背景)
128 |         
129 |         - 特征提取网络为ResNet, 特征金字塔提高检测性能
130 | 
131 |             <img src="./block_diagram/featurized-image-pyramid.png" width="600">
132 | 
133 |     - 网络架构:
134 | 
135 |         <img src="./block_diagram/retina-net.png" width="600">
136 | 
137 |     
138 | 
139 | 
140 | 2018
141 | 
142 | - [YOLOv3: An Incremental Improvement](https://arxiv.org/pdf/1804.02767.pdf)
143 | 
144 |     - bbox 预测使用尺寸聚类
145 | 
146 |         - 每个box有4个坐标
147 | 
148 |         - 训练时, 使用误差平方和损失函数 sum of squared error loss
149 | 
150 |         - bbox object分值, 用 logistic regression
151 | 
152 |         - 分类器 使用 logistic regression, 损失函数binary cross-entropy
153 | 
154 |     - 借鉴了 FPN 网络
155 | 
156 |     - 特征提取卷积网络
157 | 
158 |         - 3x3, 1x1 卷积层交替
159 | 
160 |         - 借鉴了 ResNet, 使用了直连, 分别从卷积层或直连层进行直连
161 | 
162 | 
163 | 
164 | ===========================
165 | ## Multi-stage Object Detection
166 | 
167 | 
168 | 
169 | 
170 | 2014
171 | 
172 | - RCNN 
173 | 
174 |     - [Region-Based Convolutional Networks for
175 |     Accurate Object Detection and Segmentation](http://medialab.sjtu.edu.cn/teaching/CV/hw/related_papers/3_detection.pdf)
176 | 
177 |     - v5 [Rich feature hierarchies for accurate object detection and semantic segmentation](https://arxiv.org/pdf/1311.2524v3.pdf) - CVPR
178 |         - region proposal with scale-normalized before classifying with a ConvNet
179 | 
180 |     <img src="./algorithm/rcnn.png" width="600">
181 | 
182 |     -[RCNN Keras Code](https://github.com/yhenon/keras-rcnn)
183 | 
184 |     <img src="./algorithm/RCNN_algorithm.png" width="800">
185 |     
186 | 
187 | 
188 | - SPPnet [Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition](https://arxiv.org/pdf/1406.4729.pdf) - ECCV
189 | 
190 |     <img src="./algorithm/sppnet.png" width="600">
191 | 
192 |     - [ROI Pooling ](http://wavelab.uwaterloo.ca/wp-content/uploads/2017/04/Lecture_6.pdf)
193 | 
194 | 2015
195 | 
196 | 
197 | 
198 | - FCN -[Fully convolutional networks for semantic segmentation](https://arxiv.org/pdf/1411.4038.pdf) - CVPR
199 | 
200 | - [Fast R-CNN](https://arxiv.org/pdf/1504.08083.pdf) - ICCV
201 | 
202 |     <img src="./algorithm/fast_rcnn.png" width="600">
203 | 
204 | - [Faster R-CNN: To- wards real-time object detection with region proposal net- works](https://arxiv.org/pdf/1506.01497.pdf) - NIPS
205 | 
206 |     - RPN(Region Proposal Network) & Anchor Box
207 | 
208 |     <img src="./algorithm/faster_rcnn.png">
209 | 
210 |     - [Convolutional Feature Maps](http://kaiminghe.com/iccv15tutorial/iccv2015_tutorial_convolutional_feature_maps_kaiminghe.pdf)
211 | 
212 | 
213 | - 物体检索 [Faster R-CNN Features for Instance Search](https://arxiv.org/pdf/1604.08893.pdf) 
214 | 
215 | 
216 | 
217 | 2016
218 | 
219 | 
220 | 
221 | 
222 | - [Feature Pyramid Networks for Object Detection](https://arxiv.org/pdf/1612.03144.pdf)
223 | 
224 |     - Idea from traditional CV feature pyramids, for compute and memory intensive in DL 
225 | 
226 |         想法源自传统计算机视觉中的特征金字塔, 深度学习中没用是因为计算密集,占内存
227 | 
228 |     - bottome-up in FeedForward: deepest layer of each stage should have the strongest features
229 |     
230 |         每阶段的最深的一层应该有最强的特征
231 | 
232 |     <img src="./algorithm/fpn.png">
233 | 
234 |     - [参考文档: Understanding FPN](https://medium.com/@jonathan_hui/understanding-feature-pyramid-networks-for-object-detection-fpn-45b227b9106c)


--------------------------------------------------------------------------------
/assets/algorithm/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/algorithm/.DS_Store


--------------------------------------------------------------------------------
/assets/algorithm/1811.04533.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/algorithm/1811.04533.pdf


--------------------------------------------------------------------------------
/assets/algorithm/1904.03797v1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/algorithm/1904.03797v1.pdf


--------------------------------------------------------------------------------
/assets/algorithm/RCNN 算法.xmind:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/algorithm/RCNN 算法.xmind


--------------------------------------------------------------------------------
/assets/algorithm/RCNN_algorithm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/algorithm/RCNN_algorithm.png


--------------------------------------------------------------------------------
/assets/algorithm/SPP算法.xmind:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/algorithm/SPP算法.xmind


--------------------------------------------------------------------------------
/assets/algorithm/fast_rcnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/algorithm/fast_rcnn.png


--------------------------------------------------------------------------------
/assets/algorithm/faster_rcnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/algorithm/faster_rcnn.png


--------------------------------------------------------------------------------
/assets/algorithm/faster_rcnn_v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/algorithm/faster_rcnn_v2.png


--------------------------------------------------------------------------------
/assets/algorithm/fpn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/algorithm/fpn.png


--------------------------------------------------------------------------------
/assets/algorithm/overfeat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/algorithm/overfeat.png


--------------------------------------------------------------------------------
/assets/algorithm/rcnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/algorithm/rcnn.png


--------------------------------------------------------------------------------
/assets/algorithm/sppnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/algorithm/sppnet.png


--------------------------------------------------------------------------------
/assets/block_diagram/SSD-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/SSD-architecture.png


--------------------------------------------------------------------------------
/assets/block_diagram/SSD-framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/SSD-framework.png


--------------------------------------------------------------------------------
/assets/block_diagram/cornetnet-lite.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/cornetnet-lite.png


--------------------------------------------------------------------------------
/assets/block_diagram/fcn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/fcn.png


--------------------------------------------------------------------------------
/assets/block_diagram/fcn_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/fcn_architecture.png


--------------------------------------------------------------------------------
/assets/block_diagram/fcn_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/fcn_block.png


--------------------------------------------------------------------------------
/assets/block_diagram/fcn_upooling.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/fcn_upooling.jpg


--------------------------------------------------------------------------------
/assets/block_diagram/featurized-image-pyramid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/featurized-image-pyramid.png


--------------------------------------------------------------------------------
/assets/block_diagram/fpn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/fpn.png


--------------------------------------------------------------------------------
/assets/block_diagram/fpn_rpn.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/fpn_rpn.jpeg


--------------------------------------------------------------------------------
/assets/block_diagram/lenet_alexnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/lenet_alexnet.png


--------------------------------------------------------------------------------
/assets/block_diagram/mobilenetv1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/mobilenetv1.png


--------------------------------------------------------------------------------
/assets/block_diagram/mobilenetv2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/mobilenetv2.png


--------------------------------------------------------------------------------
/assets/block_diagram/object_detection_block_diagram.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/object_detection_block_diagram.pptx


--------------------------------------------------------------------------------
/assets/block_diagram/resnet_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/resnet_architecture.png


--------------------------------------------------------------------------------
/assets/block_diagram/resnet_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/resnet_block.png


--------------------------------------------------------------------------------
/assets/block_diagram/retina-net.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/retina-net.png


--------------------------------------------------------------------------------
/assets/block_diagram/shufflenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/shufflenet.png


--------------------------------------------------------------------------------
/assets/block_diagram/vgg16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/vgg16.png


--------------------------------------------------------------------------------
/assets/block_diagram/vgg19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/vgg19.png


--------------------------------------------------------------------------------
/assets/block_diagram/yolo-network-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/yolo-network-architecture.png


--------------------------------------------------------------------------------
/assets/block_diagram/yolo-responsible-predictor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/block_diagram/yolo-responsible-predictor.png


--------------------------------------------------------------------------------
/assets/code_diagram/alexnet_revised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/code_diagram/alexnet_revised.png


--------------------------------------------------------------------------------
/assets/code_diagram/alexnet_revised_v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/code_diagram/alexnet_revised_v1.png


--------------------------------------------------------------------------------
/assets/code_diagram/lenet_revised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/code_diagram/lenet_revised.png


--------------------------------------------------------------------------------
/assets/code_diagram/vgg16_tl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/assets/code_diagram/vgg16_tl.png


--------------------------------------------------------------------------------
/dataset/ChineseFoodDataset/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/dataset/ChineseFoodDataset/.DS_Store


--------------------------------------------------------------------------------
/dataset/ChineseFoodDataset/chinese_food_spider.py:
--------------------------------------------------------------------------------
  1 | import os, requests
  2 | from lxml import etree
  3 | 
  4 | 
  5 | main_page = "https://www.douguo.com/caipu/"
  6 | fenlei_url = "https://www.douguo.com/caipu/fenlei"
  7 | test_noodle_page = "https://www.douguo.com/caipu/面条/"
  8 | 
  9 | 
 10 | def get_html(url):
 11 | 
 12 |     r = requests.get(url)
 13 |     r.encoding = "utf-8"
 14 | 
 15 |     # print(r.text)
 16 | 
 17 |     html = etree.HTML(r.text)
 18 | 
 19 |     return html
 20 | 
 21 | 
 22 | def get_classes_urls_dict(classes_page_url):
 23 |     classes_page_html = get_html(classes_page_url)
 24 |     classes = classes_page_html.xpath("//ul/li/a/@title")
 25 |     catalog_url = classes_page_html.xpath("//ul[@class='sortlist clearfix']/li/a/@href")
 26 |     
 27 |     classes_url = {}
 28 |     for k,v in zip(classes, catalog_url):
 29 |         classes_url.update({k:"".join((main_page, k))})
 30 | 
 31 |     return classes_url
 32 | 
 33 | 
 34 | def get_class_page_nums(single_class_url):
 35 | 
 36 |     class_page = get_html(single_class_url)
 37 |     page_urls = class_page.xpath("//div[@class='pages']/a/@href")
 38 | 
 39 |     page_url_prefix = ""
 40 |     all_page_num = []
 41 |     for i in list(set(page_urls)):
 42 |         page_num = int(i.split("/")[-1])
 43 |         all_page_num.append(page_num)
 44 | 
 45 |         if page_url_prefix != i[:i.rfind("/")]:
 46 |             page_url_prefix = i[:i.rfind("/")+1]
 47 |         # print(page_url_prefix)
 48 | 
 49 |     all_page_num.sort()
 50 |     # print(all_page_num)
 51 | 
 52 |     new_num = []
 53 | 
 54 |     if (len(all_page_num)>3):
 55 |         d0 = all_page_num[1]-all_page_num[0]
 56 |         d1 = all_page_num[2]-all_page_num[1]
 57 |         if d0==d1:
 58 |             p_num = all_page_num[0]
 59 |             i = 0
 60 |             while p_num <= all_page_num[-1]:
 61 |                 new_num.append(page_url_prefix + str(p_num))
 62 |                 p_num += d0
 63 |     else:
 64 |         new_num = all_page_num
 65 |     
 66 |     return new_num
 67 | 
 68 | 
 69 | def get_page_img(page_url):
 70 |     page_html = get_html(page_url)
 71 |     page_imgs = page_html.xpath("//ul[@id='jxlist']/li/a/img/@src")
 72 |     return page_imgs
 73 | 
 74 | 
 75 | def get_all_urls(cls_url):
 76 | 
 77 |     all_url_list = []
 78 |     cls_urls =  get_classes_urls_dict(cls_url)
 79 |     for i in cls_urls.values():
 80 |         for j in get_class_page_nums(i):
 81 |             all_url_list.extend(get_page_img(j))
 82 |     
 83 |     return all_url_list
 84 | 
 85 | 
 86 | def download_img_list(url_list):
 87 | 
 88 |     if not os.path.exists("images"):
 89 |         os.mkdir("images")
 90 |     
 91 |     i =1 
 92 |     for each in url_list:
 93 |         print('正在下载第' + str(i) + '张图片，图片地址:' + str(each))
 94 |         try:
 95 |             pic = requests.get(each, timeout=10)
 96 |         except requests.exceptions.ConnectionError:
 97 |             print('【错误】当前图片无法下载')
 98 |             continue
 99 | 
100 |         file_name = each.split("/")[-1]
101 |         # dir = 'images/' + 'douguo_{:%Y%m%dT%H%M%S}.jpg'.format(datetime.datetime.now())
102 |         dir = 'images/' + file_name
103 | 
104 |         with open(dir, 'wb') as fp:
105 |             fp.write(pic.content)
106 | 
107 |         i += 1
108 | 
109 | 
110 | def download_img(url_list):
111 |     if not os.path.exists("images"):
112 |         os.mkdir("images")
113 |     
114 |     i =1 
115 |     for each in url_list:
116 |         
117 |         new_each = each.replace("400x266", "yuan")
118 |         print('正在下载第' + str(i) + '张图片，图片地址:' + str(each))
119 | 
120 |         try:
121 |             pic = requests.get(new_each, timeout=10)
122 |         except requests.exceptions.ConnectionError:
123 |             print('【错误】当前图片无法下载')
124 |             continue
125 | 
126 |         file_name = new_each.split("/")[-1]
127 |         # dir = 'images/' + 'douguo_{:%Y%m%dT%H%M%S}.jpg'.format(datetime.datetime.now())
128 |         dir = 'images/' + file_name
129 | 
130 |         with open(dir, 'wb') as fp:
131 |             fp.write(pic.content)
132 | 
133 |         i += 1
134 | 
135 | 
136 | def main(url):
137 |     c_dict = get_classes_urls_dict(url)
138 |     for each_cls in c_dict.values():
139 |         # print(get_class_page_nums(each_cls))
140 |         for each_page in get_class_page_nums(each_cls):
141 |             # print(get_page_img(each_page))
142 |             download_img(get_page_img(each_page))
143 |             # for each_img in get_page_img(each_page):
144 |                 # new_each_img = each_img.replace("400x266", "yuan")
145 |                 # print(new_each_img)
146 |                 # download_img(each_img)
147 | 
148 | 
149 | if __name__ == "__main__":
150 |     main(fenlei_url)


--------------------------------------------------------------------------------
/image-retrieval/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/image-retrieval/.DS_Store


--------------------------------------------------------------------------------
/image-retrieval/1998-（J）-Example-Based Learning for View-Based Human Face Detection.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/image-retrieval/1998-（J）-Example-Based Learning for View-Based Human Face Detection.pdf


--------------------------------------------------------------------------------
/image-retrieval/2010-（J）-Object Detection with Discriminatively Trained Part Based Models.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/image-retrieval/2010-（J）-Object Detection with Discriminatively Trained Part Based Models.pdf


--------------------------------------------------------------------------------
/image-retrieval/paper/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/image-retrieval/paper/.DS_Store


--------------------------------------------------------------------------------
/image-retrieval/paper/2015-（J）-CVPR- Deep Learning of Binary Hash Codes for Fast Image Retrieval.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/image-retrieval/paper/2015-（J）-CVPR- Deep Learning of Binary Hash Codes for Fast Image Retrieval.pdf


--------------------------------------------------------------------------------
/sample-code/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/sample-code/.DS_Store


--------------------------------------------------------------------------------
/sample-code/network/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/sample-code/network/.DS_Store


--------------------------------------------------------------------------------
/sample-code/network/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="JavaScriptSettings">
4 |     <option name="languageLevel" value="ES6" />
5 |   </component>
6 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (keras_flask)" project-jdk-type="Python SDK" />
7 | </project>


--------------------------------------------------------------------------------
/sample-code/network/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/network.iml" filepath="$PROJECT_DIR$/.idea/network.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/sample-code/network/.idea/network.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.6 (keras_flask)" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/sample-code/network/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/sample-code/network/.idea/workspace.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project version="4">
  3 |   <component name="ChangeListManager">
  4 |     <list default="true" id="8504a0c6-d13f-4442-bf82-256751722542" name="Default Changelist" comment="" />
  5 |     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
  6 |     <option name="SHOW_DIALOG" value="false" />
  7 |     <option name="HIGHLIGHT_CONFLICTS" value="true" />
  8 |     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
  9 |     <option name="LAST_RESOLUTION" value="IGNORE" />
 10 |   </component>
 11 |   <component name="FUSProjectUsageTrigger">
 12 |     <session id="-717729998">
 13 |       <usages-collector id="statistics.lifecycle.project">
 14 |         <counts>
 15 |           <entry key="project.closed" value="1" />
 16 |           <entry key="project.open.time.3" value="1" />
 17 |           <entry key="project.opened" value="1" />
 18 |         </counts>
 19 |       </usages-collector>
 20 |       <usages-collector id="statistics.file.extensions.open">
 21 |         <counts>
 22 |           <entry key="py" value="5" />
 23 |         </counts>
 24 |       </usages-collector>
 25 |       <usages-collector id="statistics.file.types.open">
 26 |         <counts>
 27 |           <entry key="Python" value="5" />
 28 |         </counts>
 29 |       </usages-collector>
 30 |       <usages-collector id="statistics.file.extensions.edit">
 31 |         <counts>
 32 |           <entry key="py" value="2090" />
 33 |         </counts>
 34 |       </usages-collector>
 35 |       <usages-collector id="statistics.file.types.edit">
 36 |         <counts>
 37 |           <entry key="Python" value="2090" />
 38 |         </counts>
 39 |       </usages-collector>
 40 |     </session>
 41 |   </component>
 42 |   <component name="FileEditorManager">
 43 |     <leaf>
 44 |       <file pinned="false" current-in-tab="true">
 45 |         <entry file="file://$PROJECT_DIR$/lenet_keras.py">
 46 |           <provider selected="true" editor-type-id="text-editor">
 47 |             <state relative-caret-position="574">
 48 |               <caret line="101" column="26" lean-forward="true" selection-start-line="101" selection-start-column="26" selection-end-line="101" selection-end-column="26" />
 49 |               <folding>
 50 |                 <element signature="e#316#351#0" expanded="true" />
 51 |                 <marker date="1552987928340" expanded="true" signature="719:721" ph="..." />
 52 |               </folding>
 53 |             </state>
 54 |           </provider>
 55 |         </entry>
 56 |       </file>
 57 |     </leaf>
 58 |   </component>
 59 |   <component name="FileTemplateManagerImpl">
 60 |     <option name="RECENT_TEMPLATES">
 61 |       <list>
 62 |         <option value="Python Script" />
 63 |       </list>
 64 |     </option>
 65 |   </component>
 66 |   <component name="Git.Settings">
 67 |     <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/../.." />
 68 |   </component>
 69 |   <component name="HighlightingSettingsPerFile">
 70 |     <setting file="file://$PROJECT_DIR$/lenet_keras.py" root0="SKIP_HIGHLIGHTING" />
 71 |   </component>
 72 |   <component name="IdeDocumentHistory">
 73 |     <option name="CHANGED_PATHS">
 74 |       <list>
 75 |         <option value="$PROJECT_DIR$/lenet_keras.py" />
 76 |       </list>
 77 |     </option>
 78 |   </component>
 79 |   <component name="JsBuildToolGruntFileManager" detection-done="true" sorting="DEFINITION_ORDER" />
 80 |   <component name="JsBuildToolPackageJson" detection-done="true" sorting="DEFINITION_ORDER" />
 81 |   <component name="JsGulpfileManager">
 82 |     <detection-done>true</detection-done>
 83 |     <sorting>DEFINITION_ORDER</sorting>
 84 |   </component>
 85 |   <component name="ProjectFrameBounds">
 86 |     <option name="x" value="120" />
 87 |     <option name="y" value="23" />
 88 |     <option name="width" value="1440" />
 89 |     <option name="height" value="836" />
 90 |   </component>
 91 |   <component name="ProjectLevelVcsManager" settingsEditedManually="true" />
 92 |   <component name="ProjectView">
 93 |     <navigator proportions="" version="1">
 94 |       <foldersAlwaysOnTop value="true" />
 95 |     </navigator>
 96 |     <panes>
 97 |       <pane id="ProjectPane">
 98 |         <subPane>
 99 |           <expand>
100 |             <path>
101 |               <item name="network" type="b2602c69:ProjectViewProjectNode" />
102 |               <item name="network" type="462c0819:PsiDirectoryNode" />
103 |             </path>
104 |             <path>
105 |               <item name="network" type="b2602c69:ProjectViewProjectNode" />
106 |               <item name="network" type="462c0819:PsiDirectoryNode" />
107 |               <item name="models" type="462c0819:PsiDirectoryNode" />
108 |             </path>
109 |           </expand>
110 |           <select />
111 |         </subPane>
112 |       </pane>
113 |       <pane id="Scope" />
114 |     </panes>
115 |   </component>
116 |   <component name="PropertiesComponent">
117 |     <property name="WebServerToolWindowFactoryState" value="false" />
118 |     <property name="last_opened_file_path" value="$PROJECT_DIR$" />
119 |     <property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
120 |     <property name="nodejs_npm_path_reset_for_default_project" value="true" />
121 |     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
122 |   </component>
123 |   <component name="RunDashboard">
124 |     <option name="ruleStates">
125 |       <list>
126 |         <RuleState>
127 |           <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
128 |         </RuleState>
129 |         <RuleState>
130 |           <option name="name" value="StatusDashboardGroupingRule" />
131 |         </RuleState>
132 |       </list>
133 |     </option>
134 |   </component>
135 |   <component name="SvnConfiguration">
136 |     <configuration />
137 |   </component>
138 |   <component name="TaskManager">
139 |     <task active="true" id="Default" summary="Default task">
140 |       <changelist id="8504a0c6-d13f-4442-bf82-256751722542" name="Default Changelist" comment="" />
141 |       <created>1552975483740</created>
142 |       <option name="number" value="Default" />
143 |       <option name="presentableId" value="Default" />
144 |       <updated>1552975483740</updated>
145 |     </task>
146 |     <servers />
147 |   </component>
148 |   <component name="ToolWindowManager">
149 |     <frame x="120" y="23" width="1440" height="836" extended-state="0" />
150 |     <layout>
151 |       <window_info id="Favorites" side_tool="true" />
152 |       <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.24964234" />
153 |       <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
154 |       <window_info anchor="bottom" id="Docker" show_stripe_button="false" />
155 |       <window_info anchor="bottom" id="Database Changes" show_stripe_button="false" />
156 |       <window_info anchor="bottom" id="Version Control" show_stripe_button="false" />
157 |       <window_info anchor="bottom" id="Python Console" />
158 |       <window_info anchor="bottom" id="Terminal" />
159 |       <window_info anchor="bottom" id="Event Log" side_tool="true" />
160 |       <window_info anchor="bottom" id="Message" order="0" />
161 |       <window_info anchor="bottom" id="Find" order="1" />
162 |       <window_info anchor="bottom" id="Run" order="2" />
163 |       <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
164 |       <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
165 |       <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
166 |       <window_info anchor="bottom" id="TODO" order="6" />
167 |       <window_info anchor="right" id="SciView" />
168 |       <window_info anchor="right" id="Database" />
169 |       <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
170 |       <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
171 |       <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
172 |     </layout>
173 |   </component>
174 |   <component name="TypeScriptGeneratedFilesManager">
175 |     <option name="version" value="1" />
176 |   </component>
177 |   <component name="VcsContentAnnotationSettings">
178 |     <option name="myLimit" value="2678400000" />
179 |   </component>
180 |   <component name="editorHistoryManager">
181 |     <entry file="file:///anaconda3/envs/keras_flask/lib/python3.6/site-packages/numpy/lib/npyio.py">
182 |       <provider selected="true" editor-type-id="text-editor">
183 |         <state relative-caret-position="-1899">
184 |           <caret line="267" column="4" selection-start-line="267" selection-start-column="4" selection-end-line="267" selection-end-column="4" />
185 |         </state>
186 |       </provider>
187 |     </entry>
188 |     <entry file="file:///anaconda3/envs/keras_flask/lib/python3.6/site-packages/keras/backend/common.py">
189 |       <provider selected="true" editor-type-id="text-editor">
190 |         <state relative-caret-position="75">
191 |           <caret line="5" selection-start-line="5" selection-end-line="5" />
192 |         </state>
193 |       </provider>
194 |     </entry>
195 |     <entry file="file:///anaconda3/envs/keras_flask/lib/python3.6/site-packages/keras/datasets/mnist.py">
196 |       <provider selected="true" editor-type-id="text-editor">
197 |         <state relative-caret-position="255">
198 |           <caret line="18" column="4" selection-start-line="18" selection-start-column="4" selection-end-line="18" selection-end-column="4" />
199 |         </state>
200 |       </provider>
201 |     </entry>
202 |     <entry file="file://$PROJECT_DIR$/lenet_keras.py">
203 |       <provider selected="true" editor-type-id="text-editor">
204 |         <state relative-caret-position="574">
205 |           <caret line="101" column="26" lean-forward="true" selection-start-line="101" selection-start-column="26" selection-end-line="101" selection-end-column="26" />
206 |           <folding>
207 |             <element signature="e#316#351#0" expanded="true" />
208 |             <marker date="1552987928340" expanded="true" signature="719:721" ph="..." />
209 |           </folding>
210 |         </state>
211 |       </provider>
212 |     </entry>
213 |   </component>
214 | </project>


--------------------------------------------------------------------------------
/sample-code/network/alexnet_keras.py:
--------------------------------------------------------------------------------
  1 | # Author: Taylor Guo, taylorguo@126.com
  2 | # Python 3.6.7
  3 | '''
  4 | Keras                  2.1.0
  5 | Keras-Applications     1.0.7
  6 | Keras-Preprocessing    1.0.8
  7 | tensorboard            1.12.2
  8 | tensorflow             1.12.0
  9 | tensorflow-tensorboard 0.4.0
 10 | tflearn                0.3.2
 11 | numpy                  1.14.5
 12 | opencv-python          3.4.1.15
 13 | 
 14 | paper: (https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)
 15 | PPT: (http://vision.stanford.edu/teaching/cs231b_spring1415/slides/alexnet_tugce_kyunghee.pdf)
 16 | '''
 17 | 
 18 | # AlexNet-Keras for oxflower17 image classification
 19 | 
 20 | from keras.models import Sequential
 21 | from keras.layers import Conv2D, MaxPooling2D, Activation, Flatten, Dense, Dropout
 22 | from keras.layers.normalization import BatchNormalization
 23 | from keras import backend as K
 24 | 
 25 | from tflearn.datasets import oxflower17
 26 | from keras.utils import to_categorical
 27 | from keras.optimizers import SGD, Adam
 28 | 
 29 | from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
 30 | 
 31 | import numpy as np
 32 | import os,datetime
 33 | 
 34 | class AlexNet:
 35 | 
 36 | 	@staticmethod
 37 | 	def build(channels, height, width, classes, activation="relu", weights_path=None):
 38 | 
 39 | 		input_shape = (height, width, channels)
 40 | 
 41 | 		model = Sequential()
 42 | 
 43 | 		model.add(Conv2D(96,(11,11), strides=(4,4), input_shape=input_shape))
 44 | 		model.add(BatchNormalization())
 45 | 		model.add(Activation(activation))
 46 | 		model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
 47 | 
 48 | 		model.add(Conv2D(256, (5,5), strides=(2,2)))
 49 | 		model.add(BatchNormalization())
 50 | 		model.add(Activation(activation))
 51 | 		model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
 52 | 
 53 | 		model.add(Conv2D(384,(3,3), strides=(1,1), padding="same"))
 54 | 		model.add(BatchNormalization())
 55 | 		model.add(Activation(activation))
 56 | 
 57 | 		model.add(Conv2D(384, (3, 3), padding="same"))
 58 | 		model.add(BatchNormalization())
 59 | 		model.add(Activation(activation))
 60 | 
 61 | 		model.add(Conv2D(256, (3, 3), padding="same"))
 62 | 		model.add(BatchNormalization())
 63 | 		model.add(Activation(activation))
 64 | 		model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
 65 | 
 66 | 		model.add(Flatten())
 67 | 
 68 | 		model.add(Dense(512, activation=activation))
 69 | 		model.add(Dropout(0.6))
 70 | 
 71 | 		model.add(Dense(512, activation=activation))
 72 | 		model.add(Dropout(0.6))
 73 | 
 74 | 		model.add(Dense(128, activation=activation))
 75 | 		model.add(Dropout(0.6))
 76 | 
 77 | 		model.add(Dense(classes, activation="softmax"))
 78 | 
 79 | 		if weights_path is not None:
 80 | 			model.load_weights(weights_path)
 81 | 
 82 | 		model.summary()
 83 | 
 84 | 		return model
 85 | 
 86 | 	@staticmethod
 87 | 	def load_dataset_oxflower17():
 88 | 		print("\t Downloading Oxford Flower17 dataset ...")
 89 | 
 90 | 		training_data, training_labels = oxflower17.load_data(one_hot=True)
 91 | 
 92 | 		return (training_data, training_labels)
 93 | 
 94 | 	@staticmethod
 95 | 	def train(weight_path=None, load_weights=False, save_weights=True):
 96 | 
 97 | 		model = AlexNet.build(channels=3, height=224, width=224, classes=17)
 98 | 		
 99 | 		model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.0005), metrics=["accuracy"])
100 | 
101 | 		(train_d, train_l) = AlexNet.load_dataset_oxflower17()
102 | 
103 | 		early_stopping = EarlyStopping(monitor="val_acc", patience=200, verbose=1)
104 | 		reduce_lr = ReduceLROnPlateau(monitor="val_acc", factor=0.8, patience=100, verbose=1)
105 | 		if not os.path.exists("models"):
106 | 			os.mkdir("models")
107 | 		best_weights = "models/best_weights.h5"
108 | 		save_best_model = ModelCheckpoint(best_weights, monitor="val_acc", verbose=1, save_best_only=True)
109 | 
110 | 		if load_weights==False:
111 | 			print("\t Start training ...")
112 | 			train_history= model.fit(train_d, train_l, batch_size=64, epochs=1000, verbose=1, validation_split=0.3,
113 | 					  callbacks=[reduce_lr, save_best_model,early_stopping])
114 | 		else:
115 | 			pass
116 | 			# load_weights from weight_path
117 | 
118 | 		if save_weights==True:
119 | 			print("\t Save trained weights to file ...")
120 | 			if not os.path.exists("models"):
121 | 				os.mkdir("models")
122 | 			weight_file = "AlexNet_{:%Y%m%dT%H%M%S}.h5".format(datetime.datetime.now())
123 | 			model.save_weights(os.path.join("models", weight_file), overwrite=True)
124 | 
125 | if __name__ == '__main__':
126 |     AlexNet.train(save_weights=False)


--------------------------------------------------------------------------------
/sample-code/network/cifar10_cnn.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | #Train a simple deep CNN on the CIFAR10 small images dataset.
  3 | 
  4 | It gets to 75% validation accuracy in 25 epochs, and 79% after 50 epochs.
  5 | (it's still underfitting at that point, though).
  6 | '''
  7 | 
  8 | from __future__ import print_function
  9 | import keras
 10 | from keras.datasets import cifar10
 11 | from keras.preprocessing.image import ImageDataGenerator
 12 | from keras.models import Sequential
 13 | from keras.layers import Dense, Dropout, Activation, Flatten
 14 | from keras.layers import Conv2D, MaxPooling2D
 15 | import os
 16 | 
 17 | batch_size = 32
 18 | num_classes = 10
 19 | epochs = 100
 20 | data_augmentation = True
 21 | num_predictions = 20
 22 | save_dir = os.path.join(os.getcwd(), 'saved_models')
 23 | model_name = 'keras_cifar10_trained_model.h5'
 24 | 
 25 | # The data, split between train and test sets:
 26 | (x_train, y_train), (x_test, y_test) = cifar10.load_data()
 27 | print('x_train shape:', x_train.shape)
 28 | print(x_train.shape[0], 'train samples')
 29 | print(x_test.shape[0], 'test samples')
 30 | 
 31 | # Convert class vectors to binary class matrices.
 32 | y_train = keras.utils.to_categorical(y_train, num_classes)
 33 | y_test = keras.utils.to_categorical(y_test, num_classes)
 34 | 
 35 | model = Sequential()
 36 | model.add(Conv2D(32, (3, 3), padding='same',
 37 |                  input_shape=x_train.shape[1:]))
 38 | model.add(Activation('relu'))
 39 | model.add(Conv2D(32, (3, 3)))
 40 | model.add(Activation('relu'))
 41 | model.add(MaxPooling2D(pool_size=(2, 2)))
 42 | model.add(Dropout(0.25))
 43 | 
 44 | model.add(Conv2D(64, (3, 3), padding='same'))
 45 | model.add(Activation('relu'))
 46 | model.add(Conv2D(64, (3, 3)))
 47 | model.add(Activation('relu'))
 48 | model.add(MaxPooling2D(pool_size=(2, 2)))
 49 | model.add(Dropout(0.25))
 50 | 
 51 | model.add(Flatten())
 52 | model.add(Dense(512))
 53 | model.add(Activation('relu'))
 54 | model.add(Dropout(0.5))
 55 | model.add(Dense(num_classes))
 56 | model.add(Activation('softmax'))
 57 | 
 58 | # initiate RMSprop optimizer
 59 | opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
 60 | 
 61 | # Let's train the model using RMSprop
 62 | model.compile(loss='categorical_crossentropy',
 63 |               optimizer=opt,
 64 |               metrics=['accuracy'])
 65 | 
 66 | x_train = x_train.astype('float32')
 67 | x_test = x_test.astype('float32')
 68 | x_train /= 255
 69 | x_test /= 255
 70 | 
 71 | if not data_augmentation:
 72 |     print('Not using data augmentation.')
 73 |     model.fit(x_train, y_train,
 74 |               batch_size=batch_size,
 75 |               epochs=epochs,
 76 |               validation_data=(x_test, y_test),
 77 |               shuffle=True)
 78 | else:
 79 |     print('Using real-time data augmentation.')
 80 |     # This will do preprocessing and realtime data augmentation:
 81 |     datagen = ImageDataGenerator(
 82 |         featurewise_center=False,  # set input mean to 0 over the dataset
 83 |         samplewise_center=False,  # set each sample mean to 0
 84 |         featurewise_std_normalization=False,  # divide inputs by std of the dataset
 85 |         samplewise_std_normalization=False,  # divide each input by its std
 86 |         zca_whitening=False,  # apply ZCA whitening
 87 |         zca_epsilon=1e-06,  # epsilon for ZCA whitening
 88 |         rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
 89 |         # randomly shift images horizontally (fraction of total width)
 90 |         width_shift_range=0.1,
 91 |         # randomly shift images vertically (fraction of total height)
 92 |         height_shift_range=0.1,
 93 |         shear_range=0.,  # set range for random shear
 94 |         zoom_range=0.,  # set range for random zoom
 95 |         channel_shift_range=0.,  # set range for random channel shifts
 96 |         # set mode for filling points outside the input boundaries
 97 |         fill_mode='nearest',
 98 |         cval=0.,  # value used for fill_mode = "constant"
 99 |         horizontal_flip=True,  # randomly flip images
100 |         vertical_flip=False,  # randomly flip images
101 |         # set rescaling factor (applied before any other transformation)
102 |         rescale=None,
103 |         # set function that will be applied on each input
104 |         preprocessing_function=None,
105 |         # image data format, either "channels_first" or "channels_last"
106 |         data_format=None,
107 |         # fraction of images reserved for validation (strictly between 0 and 1)
108 |         validation_split=0.0)
109 | 
110 |     # Compute quantities required for feature-wise normalization
111 |     # (std, mean, and principal components if ZCA whitening is applied).
112 |     datagen.fit(x_train)
113 | 
114 |     # Fit the model on the batches generated by datagen.flow().
115 |     model.fit_generator(datagen.flow(x_train, y_train,
116 |                                      batch_size=batch_size),
117 |                         epochs=epochs,
118 |                         validation_data=(x_test, y_test),
119 |                         workers=4)
120 | 
121 | # Save model and weights
122 | if not os.path.isdir(save_dir):
123 |     os.makedirs(save_dir)
124 | model_path = os.path.join(save_dir, model_name)
125 | model.save(model_path)
126 | print('Saved trained model at %s ' % model_path)
127 | 
128 | # Score trained model.
129 | scores = model.evaluate(x_test, y_test, verbose=1)
130 | print('Test loss:', scores[0])
131 | print('Test accuracy:', scores[1])
132 | 


--------------------------------------------------------------------------------
/sample-code/network/lenet_keras.py:
--------------------------------------------------------------------------------
  1 | # Author: Taylor Guo, taylorguo@126.com
  2 | 
  3 | '''
  4 | Keras                  2.1.0
  5 | Keras-Applications     1.0.7
  6 | Keras-Preprocessing    1.0.8
  7 | tensorboard            1.12.2
  8 | tensorflow             1.12.0
  9 | tensorflow-tensorboard 0.4.0
 10 | numpy                  1.14.5
 11 | opencv-python          3.4.1.15
 12 | 
 13 | paper: (http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf)
 14 | '''
 15 | 
 16 | # LeNet-Keras for mnist handwriting digital image classification
 17 | 
 18 | from keras.models import Sequential
 19 | from keras.layers import Conv2D, MaxPooling2D, Activation, Flatten, Dense
 20 | from keras import backend as K
 21 | 
 22 | from keras.datasets import mnist
 23 | from keras.utils import to_categorical
 24 | from keras.optimizers import SGD
 25 | import numpy as np
 26 | import cv2, os,datetime
 27 | 
 28 | class LeNet:
 29 | 	@staticmethod
 30 | 	def build(channels, height, width, classes, activation="relu", weights_path=None):
 31 | 
 32 | 		input_shape = (height, width, channels)
 33 | 
 34 | 		model = Sequential()
 35 | 
 36 | 		model.add(Conv2D(16,(5,5),activation=activation, padding="same", input_shape=input_shape))
 37 | 		model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
 38 | 
 39 | 		model.add(Conv2D(32, (5,5), activation=activation, padding="same"))
 40 | 		model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
 41 | 
 42 | 		model.add(Flatten())
 43 | 		model.add(Dense(128, activation=activation))
 44 | 		model.add(Dense(classes, activation="softmax"))
 45 | 
 46 | 		if weights_path is not None:
 47 | 			model.load_weights(weights_path)
 48 | 
 49 | 		model.summary()
 50 | 
 51 | 		return model
 52 | 
 53 | 	@staticmethod
 54 | 	def load_dataset_mnist():
 55 | 		print("\t Downloading MNIST dataset ...")
 56 | 
 57 | 		(training_data, training_labels),(test_data, test_labels) = mnist.load_data()
 58 | 
 59 | 		training_data = training_data.reshape((60000, 28,28,1))
 60 | 		test_data = test_data.reshape((10000, 28,28,1))
 61 | 
 62 | 		training_data = training_data.astype("float32") / 255.0
 63 | 		test_data = test_data.astype("float32") / 255.0
 64 | 
 65 | 		training_labels = to_categorical(training_labels)
 66 | 		test_labels = to_categorical(test_labels)
 67 | 
 68 | 		return ((training_data, training_labels),(test_data, test_labels))
 69 | 
 70 | 	@staticmethod
 71 | 	def train(weight_path=None, load_weights=False, save_weights=True):
 72 | 
 73 | 		model = LeNet.build(channels=1, height=28, width=28, classes=10)
 74 | 		model.compile(loss="categorical_crossentropy", optimizer=SGD(lr=0.01), metrics=["accuracy"])
 75 | 
 76 | 		(train_d, train_l),(test_d, test_l) = LeNet.load_dataset_mnist()
 77 | 
 78 | 		if load_weights==False:
 79 | 			print("\t Start training ...")
 80 | 			model.fit(train_d, train_l, batch_size=128, epochs=20, verbose=1)
 81 | 
 82 | 			print("\t Now evaluating ...")
 83 | 			(loss, accuracy) = model.evaluate(test_d, test_l, batch_size=128, verbose=1)
 84 | 
 85 | 			print("\t Accuracy: {:.2f}%".format(accuracy*100))
 86 | 		else:
 87 | 			pass
 88 | 			# load_weights from weight_path
 89 | 
 90 | 		if save_weights==True:
 91 | 			print("\t Save trained weights to file ...")
 92 | 			if not os.path.exists("models"):
 93 | 				os.mkdir("models")
 94 | 			weight_file = "LeNet_{:%Y%m%dT%H%M%S}.h5".format(datetime.datetime.now())
 95 | 			model.save_weights(os.path.join("models", weight_file), overwrite=True)
 96 | 
 97 | 	@staticmethod
 98 | 	def inference():
 99 | 		pass
100 | 
101 | 
102 | if __name__ == '__main__':
103 | 
104 |     LeNet.train(save_weights=False)


--------------------------------------------------------------------------------
/sample-code/network/resnet.py:
--------------------------------------------------------------------------------
 1 | """ResNet models for Keras.
 2 | 
 3 | # Reference paper
 4 | 
 5 | - [Deep Residual Learning for Image Recognition]
 6 |   (https://arxiv.org/abs/1512.03385) (CVPR 2016 Best Paper Award)
 7 | 
 8 | # Reference implementations
 9 | 
10 | - [TensorNets]
11 |   (https://github.com/taehoonlee/tensornets/blob/master/tensornets/resnets.py)
12 | - [Caffe ResNet]
13 |   (https://github.com/KaimingHe/deep-residual-networks/tree/master/prototxt)
14 | 
15 | """
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | from . import imagenet_utils
21 | from .imagenet_utils import decode_predictions
22 | from .resnet_common import ResNet50
23 | from .resnet_common import ResNet101
24 | from .resnet_common import ResNet152
25 | 
26 | 
27 | def preprocess_input(x, **kwargs):
28 |     """Preprocesses a numpy array encoding a batch of images.
29 | 
30 |     # Arguments
31 |         x: a 4D numpy array consists of RGB values within [0, 255].
32 |         data_format: data format of the image tensor.
33 | 
34 |     # Returns
35 |         Preprocessed array.
36 |     """
37 |     return imagenet_utils.preprocess_input(x, mode='caffe', **kwargs)
38 | 


--------------------------------------------------------------------------------
/sample-code/network/resnet50.py:
--------------------------------------------------------------------------------
  1 | """ResNet50 model for Keras.
  2 | 
  3 | # Reference:
  4 | 
  5 | - [Deep Residual Learning for Image Recognition](
  6 |     https://arxiv.org/abs/1512.03385)
  7 | 
  8 | Adapted from code contributed by BigMoyan.
  9 | """
 10 | from __future__ import absolute_import
 11 | from __future__ import division
 12 | from __future__ import print_function
 13 | 
 14 | import os
 15 | import warnings
 16 | 
 17 | from . import get_submodules_from_kwargs
 18 | from . import imagenet_utils
 19 | from .imagenet_utils import decode_predictions
 20 | from .imagenet_utils import _obtain_input_shape
 21 | 
 22 | preprocess_input = imagenet_utils.preprocess_input
 23 | 
 24 | WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/'
 25 |                 'releases/download/v0.2/'
 26 |                 'resnet50_weights_tf_dim_ordering_tf_kernels.h5')
 27 | WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/'
 28 |                        'releases/download/v0.2/'
 29 |                        'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5')
 30 | 
 31 | backend = None
 32 | layers = None
 33 | models = None
 34 | keras_utils = None
 35 | 
 36 | 
 37 | def identity_block(input_tensor, kernel_size, filters, stage, block):
 38 |     """The identity block is the block that has no conv layer at shortcut.
 39 | 
 40 |     # Arguments
 41 |         input_tensor: input tensor
 42 |         kernel_size: default 3, the kernel size of
 43 |             middle conv layer at main path
 44 |         filters: list of integers, the filters of 3 conv layer at main path
 45 |         stage: integer, current stage label, used for generating layer names
 46 |         block: 'a','b'..., current block label, used for generating layer names
 47 | 
 48 |     # Returns
 49 |         Output tensor for the block.
 50 |     """
 51 |     filters1, filters2, filters3 = filters
 52 |     if backend.image_data_format() == 'channels_last':
 53 |         bn_axis = 3
 54 |     else:
 55 |         bn_axis = 1
 56 |     conv_name_base = 'res' + str(stage) + block + '_branch'
 57 |     bn_name_base = 'bn' + str(stage) + block + '_branch'
 58 | 
 59 |     x = layers.Conv2D(filters1, (1, 1),
 60 |                       kernel_initializer='he_normal',
 61 |                       name=conv_name_base + '2a')(input_tensor)
 62 |     x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
 63 |     x = layers.Activation('relu')(x)
 64 | 
 65 |     x = layers.Conv2D(filters2, kernel_size,
 66 |                       padding='same',
 67 |                       kernel_initializer='he_normal',
 68 |                       name=conv_name_base + '2b')(x)
 69 |     x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
 70 |     x = layers.Activation('relu')(x)
 71 | 
 72 |     x = layers.Conv2D(filters3, (1, 1),
 73 |                       kernel_initializer='he_normal',
 74 |                       name=conv_name_base + '2c')(x)
 75 |     x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
 76 | 
 77 |     x = layers.add([x, input_tensor])
 78 |     x = layers.Activation('relu')(x)
 79 |     return x
 80 | 
 81 | 
 82 | def conv_block(input_tensor,
 83 |                kernel_size,
 84 |                filters,
 85 |                stage,
 86 |                block,
 87 |                strides=(2, 2)):
 88 |     """A block that has a conv layer at shortcut.
 89 | 
 90 |     # Arguments
 91 |         input_tensor: input tensor
 92 |         kernel_size: default 3, the kernel size of
 93 |             middle conv layer at main path
 94 |         filters: list of integers, the filters of 3 conv layer at main path
 95 |         stage: integer, current stage label, used for generating layer names
 96 |         block: 'a','b'..., current block label, used for generating layer names
 97 |         strides: Strides for the first conv layer in the block.
 98 | 
 99 |     # Returns
100 |         Output tensor for the block.
101 | 
102 |     Note that from stage 3,
103 |     the first conv layer at main path is with strides=(2, 2)
104 |     And the shortcut should have strides=(2, 2) as well
105 |     """
106 |     filters1, filters2, filters3 = filters
107 |     if backend.image_data_format() == 'channels_last':
108 |         bn_axis = 3
109 |     else:
110 |         bn_axis = 1
111 |     conv_name_base = 'res' + str(stage) + block + '_branch'
112 |     bn_name_base = 'bn' + str(stage) + block + '_branch'
113 | 
114 |     x = layers.Conv2D(filters1, (1, 1), strides=strides,
115 |                       kernel_initializer='he_normal',
116 |                       name=conv_name_base + '2a')(input_tensor)
117 |     x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
118 |     x = layers.Activation('relu')(x)
119 | 
120 |     x = layers.Conv2D(filters2, kernel_size, padding='same',
121 |                       kernel_initializer='he_normal',
122 |                       name=conv_name_base + '2b')(x)
123 |     x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
124 |     x = layers.Activation('relu')(x)
125 | 
126 |     x = layers.Conv2D(filters3, (1, 1),
127 |                       kernel_initializer='he_normal',
128 |                       name=conv_name_base + '2c')(x)
129 |     x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
130 | 
131 |     shortcut = layers.Conv2D(filters3, (1, 1), strides=strides,
132 |                              kernel_initializer='he_normal',
133 |                              name=conv_name_base + '1')(input_tensor)
134 |     shortcut = layers.BatchNormalization(
135 |         axis=bn_axis, name=bn_name_base + '1')(shortcut)
136 | 
137 |     x = layers.add([x, shortcut])
138 |     x = layers.Activation('relu')(x)
139 |     return x
140 | 
141 | 
142 | def ResNet50(include_top=True,
143 |              weights='imagenet',
144 |              input_tensor=None,
145 |              input_shape=None,
146 |              pooling=None,
147 |              classes=1000,
148 |              **kwargs):
149 |     """Instantiates the ResNet50 architecture.
150 | 
151 |     Optionally loads weights pre-trained on ImageNet.
152 |     Note that the data format convention used by the model is
153 |     the one specified in your Keras config at `~/.keras/keras.json`.
154 | 
155 |     # Arguments
156 |         include_top: whether to include the fully-connected
157 |             layer at the top of the network.
158 |         weights: one of `None` (random initialization),
159 |               'imagenet' (pre-training on ImageNet),
160 |               or the path to the weights file to be loaded.
161 |         input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
162 |             to use as image input for the model.
163 |         input_shape: optional shape tuple, only to be specified
164 |             if `include_top` is False (otherwise the input shape
165 |             has to be `(224, 224, 3)` (with `channels_last` data format)
166 |             or `(3, 224, 224)` (with `channels_first` data format).
167 |             It should have exactly 3 inputs channels,
168 |             and width and height should be no smaller than 32.
169 |             E.g. `(200, 200, 3)` would be one valid value.
170 |         pooling: Optional pooling mode for feature extraction
171 |             when `include_top` is `False`.
172 |             - `None` means that the output of the model will be
173 |                 the 4D tensor output of the
174 |                 last convolutional block.
175 |             - `avg` means that global average pooling
176 |                 will be applied to the output of the
177 |                 last convolutional block, and thus
178 |                 the output of the model will be a 2D tensor.
179 |             - `max` means that global max pooling will
180 |                 be applied.
181 |         classes: optional number of classes to classify images
182 |             into, only to be specified if `include_top` is True, and
183 |             if no `weights` argument is specified.
184 | 
185 |     # Returns
186 |         A Keras model instance.
187 | 
188 |     # Raises
189 |         ValueError: in case of invalid argument for `weights`,
190 |             or invalid input shape.
191 |     """
192 |     global backend, layers, models, keras_utils
193 |     backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)
194 | 
195 |     if not (weights in {'imagenet', None} or os.path.exists(weights)):
196 |         raise ValueError('The `weights` argument should be either '
197 |                          '`None` (random initialization), `imagenet` '
198 |                          '(pre-training on ImageNet), '
199 |                          'or the path to the weights file to be loaded.')
200 | 
201 |     if weights == 'imagenet' and include_top and classes != 1000:
202 |         raise ValueError('If using `weights` as `"imagenet"` with `include_top`'
203 |                          ' as true, `classes` should be 1000')
204 | 
205 |     # Determine proper input shape
206 |     input_shape = _obtain_input_shape(input_shape,
207 |                                       default_size=224,
208 |                                       min_size=32,
209 |                                       data_format=backend.image_data_format(),
210 |                                       require_flatten=include_top,
211 |                                       weights=weights)
212 | 
213 |     if input_tensor is None:
214 |         img_input = layers.Input(shape=input_shape)
215 |     else:
216 |         if not backend.is_keras_tensor(input_tensor):
217 |             img_input = layers.Input(tensor=input_tensor, shape=input_shape)
218 |         else:
219 |             img_input = input_tensor
220 |     if backend.image_data_format() == 'channels_last':
221 |         bn_axis = 3
222 |     else:
223 |         bn_axis = 1
224 | 
225 |     x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
226 |     x = layers.Conv2D(64, (7, 7),
227 |                       strides=(2, 2),
228 |                       padding='valid',
229 |                       kernel_initializer='he_normal',
230 |                       name='conv1')(x)
231 |     x = layers.BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
232 |     x = layers.Activation('relu')(x)
233 |     x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
234 |     x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
235 | 
236 |     x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
237 |     x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
238 |     x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
239 | 
240 |     x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
241 |     x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
242 |     x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
243 |     x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
244 | 
245 |     x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
246 |     x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
247 |     x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
248 |     x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
249 |     x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
250 |     x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
251 | 
252 |     x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
253 |     x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
254 |     x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
255 | 
256 |     if include_top:
257 |         x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
258 |         x = layers.Dense(classes, activation='softmax', name='fc1000')(x)
259 |     else:
260 |         if pooling == 'avg':
261 |             x = layers.GlobalAveragePooling2D()(x)
262 |         elif pooling == 'max':
263 |             x = layers.GlobalMaxPooling2D()(x)
264 |         else:
265 |             warnings.warn('The output shape of `ResNet50(include_top=False)` '
266 |                           'has been changed since Keras 2.2.0.')
267 | 
268 |     # Ensure that the model takes into account
269 |     # any potential predecessors of `input_tensor`.
270 |     if input_tensor is not None:
271 |         inputs = keras_utils.get_source_inputs(input_tensor)
272 |     else:
273 |         inputs = img_input
274 |     # Create model.
275 |     model = models.Model(inputs, x, name='resnet50')
276 | 
277 |     # Load weights.
278 |     if weights == 'imagenet':
279 |         if include_top:
280 |             weights_path = keras_utils.get_file(
281 |                 'resnet50_weights_tf_dim_ordering_tf_kernels.h5',
282 |                 WEIGHTS_PATH,
283 |                 cache_subdir='models',
284 |                 md5_hash='a7b3fe01876f51b976af0dea6bc144eb')
285 |         else:
286 |             weights_path = keras_utils.get_file(
287 |                 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
288 |                 WEIGHTS_PATH_NO_TOP,
289 |                 cache_subdir='models',
290 |                 md5_hash='a268eb855778b3df3c7506639542a6af')
291 |         model.load_weights(weights_path)
292 |         if backend.backend() == 'theano':
293 |             keras_utils.convert_all_kernels_in_model(model)
294 |     elif weights is not None:
295 |         model.load_weights(weights)
296 | 
297 |     return model
298 | 


--------------------------------------------------------------------------------
/sample-code/network/resnet_common.py:
--------------------------------------------------------------------------------
  1 | """ResNet, ResNetV2, and ResNeXt models for Keras.
  2 | 
  3 | # Reference papers
  4 | 
  5 | - [Deep Residual Learning for Image Recognition]
  6 |   (https://arxiv.org/abs/1512.03385) (CVPR 2016 Best Paper Award)
  7 | - [Identity Mappings in Deep Residual Networks]
  8 |   (https://arxiv.org/abs/1603.05027) (ECCV 2016)
  9 | - [Aggregated Residual Transformations for Deep Neural Networks]
 10 |   (https://arxiv.org/abs/1611.05431) (CVPR 2017)
 11 | 
 12 | # Reference implementations
 13 | 
 14 | - [TensorNets]
 15 |   (https://github.com/taehoonlee/tensornets/blob/master/tensornets/resnets.py)
 16 | - [Caffe ResNet]
 17 |   (https://github.com/KaimingHe/deep-residual-networks/tree/master/prototxt)
 18 | - [Torch ResNetV2]
 19 |   (https://github.com/facebook/fb.resnet.torch/blob/master/models/preresnet.lua)
 20 | - [Torch ResNeXt]
 21 |   (https://github.com/facebookresearch/ResNeXt/blob/master/models/resnext.lua)
 22 | 
 23 | """
 24 | from __future__ import absolute_import
 25 | from __future__ import division
 26 | from __future__ import print_function
 27 | 
 28 | import os
 29 | 
 30 | from . import get_submodules_from_kwargs
 31 | from .imagenet_utils import _obtain_input_shape
 32 | 
 33 | 
 34 | backend = None
 35 | layers = None
 36 | models = None
 37 | keras_utils = None
 38 | 
 39 | 
 40 | BASE_WEIGHTS_PATH = (
 41 |     'https://github.com/keras-team/keras-applications/'
 42 |     'releases/download/resnet/')
 43 | WEIGHTS_HASHES = {
 44 |     'resnet50': ('2cb95161c43110f7111970584f804107',
 45 |                  '4d473c1dd8becc155b73f8504c6f6626'),
 46 |     'resnet101': ('f1aeb4b969a6efcfb50fad2f0c20cfc5',
 47 |                   '88cf7a10940856eca736dc7b7e228a21'),
 48 |     'resnet152': ('100835be76be38e30d865e96f2aaae62',
 49 |                   'ee4c566cf9a93f14d82f913c2dc6dd0c'),
 50 |     'resnet50v2': ('3ef43a0b657b3be2300d5770ece849e0',
 51 |                    'fac2f116257151a9d068a22e544a4917'),
 52 |     'resnet101v2': ('6343647c601c52e1368623803854d971',
 53 |                     'c0ed64b8031c3730f411d2eb4eea35b5'),
 54 |     'resnet152v2': ('a49b44d1979771252814e80f8ec446f9',
 55 |                     'ed17cf2e0169df9d443503ef94b23b33'),
 56 |     'resnext50': ('67a5b30d522ed92f75a1f16eef299d1a',
 57 |                   '62527c363bdd9ec598bed41947b379fc'),
 58 |     'resnext101': ('34fb605428fcc7aa4d62f44404c11509',
 59 |                    '0f678c91647380debd923963594981b3')
 60 | }
 61 | 
 62 | 
 63 | def block1(x, filters, kernel_size=3, stride=1,
 64 |            conv_shortcut=True, name=None):
 65 |     """A residual block.
 66 | 
 67 |     # Arguments
 68 |         x: input tensor.
 69 |         filters: integer, filters of the bottleneck layer.
 70 |         kernel_size: default 3, kernel size of the bottleneck layer.
 71 |         stride: default 1, stride of the first layer.
 72 |         conv_shortcut: default True, use convolution shortcut if True,
 73 |             otherwise identity shortcut.
 74 |         name: string, block label.
 75 | 
 76 |     # Returns
 77 |         Output tensor for the residual block.
 78 |     """
 79 |     bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1
 80 | 
 81 |     if conv_shortcut is True:
 82 |         shortcut = layers.Conv2D(4 * filters, 1, strides=stride,
 83 |                                  name=name + '_0_conv')(x)
 84 |         shortcut = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
 85 |                                              name=name + '_0_bn')(shortcut)
 86 |     else:
 87 |         shortcut = x
 88 | 
 89 |     x = layers.Conv2D(filters, 1, strides=stride, name=name + '_1_conv')(x)
 90 |     x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
 91 |                                   name=name + '_1_bn')(x)
 92 |     x = layers.Activation('relu', name=name + '_1_relu')(x)
 93 | 
 94 |     x = layers.Conv2D(filters, kernel_size, padding='SAME',
 95 |                       name=name + '_2_conv')(x)
 96 |     x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
 97 |                                   name=name + '_2_bn')(x)
 98 |     x = layers.Activation('relu', name=name + '_2_relu')(x)
 99 | 
100 |     x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x)
101 |     x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
102 |                                   name=name + '_3_bn')(x)
103 | 
104 |     x = layers.Add(name=name + '_add')([shortcut, x])
105 |     x = layers.Activation('relu', name=name + '_out')(x)
106 |     return x
107 | 
108 | 
109 | def stack1(x, filters, blocks, stride1=2, name=None):
110 |     """A set of stacked residual blocks.
111 | 
112 |     # Arguments
113 |         x: input tensor.
114 |         filters: integer, filters of the bottleneck layer in a block.
115 |         blocks: integer, blocks in the stacked blocks.
116 |         stride1: default 2, stride of the first layer in the first block.
117 |         name: string, stack label.
118 | 
119 |     # Returns
120 |         Output tensor for the stacked blocks.
121 |     """
122 |     x = block1(x, filters, stride=stride1, name=name + '_block1')
123 |     for i in range(2, blocks + 1):
124 |         x = block1(x, filters, conv_shortcut=False, name=name + '_block' + str(i))
125 |     return x
126 | 
127 | 
128 | def block2(x, filters, kernel_size=3, stride=1,
129 |            conv_shortcut=False, name=None):
130 |     """A residual block.
131 | 
132 |     # Arguments
133 |         x: input tensor.
134 |         filters: integer, filters of the bottleneck layer.
135 |         kernel_size: default 3, kernel size of the bottleneck layer.
136 |         stride: default 1, stride of the first layer.
137 |         conv_shortcut: default False, use convolution shortcut if True,
138 |             otherwise identity shortcut.
139 |         name: string, block label.
140 | 
141 |     # Returns
142 |         Output tensor for the residual block.
143 |     """
144 |     bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1
145 | 
146 |     preact = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
147 |                                        name=name + '_preact_bn')(x)
148 |     preact = layers.Activation('relu', name=name + '_preact_relu')(preact)
149 | 
150 |     if conv_shortcut is True:
151 |         shortcut = layers.Conv2D(4 * filters, 1, strides=stride,
152 |                                  name=name + '_0_conv')(preact)
153 |     else:
154 |         shortcut = layers.MaxPooling2D(1, strides=stride)(x) if stride > 1 else x
155 | 
156 |     x = layers.Conv2D(filters, 1, strides=1, use_bias=False,
157 |                       name=name + '_1_conv')(preact)
158 |     x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
159 |                                   name=name + '_1_bn')(x)
160 |     x = layers.Activation('relu', name=name + '_1_relu')(x)
161 | 
162 |     x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x)
163 |     x = layers.Conv2D(filters, kernel_size, strides=stride,
164 |                       use_bias=False, name=name + '_2_conv')(x)
165 |     x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
166 |                                   name=name + '_2_bn')(x)
167 |     x = layers.Activation('relu', name=name + '_2_relu')(x)
168 | 
169 |     x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x)
170 |     x = layers.Add(name=name + '_out')([shortcut, x])
171 |     return x
172 | 
173 | 
174 | def stack2(x, filters, blocks, stride1=2, name=None):
175 |     """A set of stacked residual blocks.
176 | 
177 |     # Arguments
178 |         x: input tensor.
179 |         filters: integer, filters of the bottleneck layer in a block.
180 |         blocks: integer, blocks in the stacked blocks.
181 |         stride1: default 2, stride of the first layer in the first block.
182 |         name: string, stack label.
183 | 
184 |     # Returns
185 |         Output tensor for the stacked blocks.
186 |     """
187 |     x = block2(x, filters, conv_shortcut=True, name=name + '_block1')
188 |     for i in range(2, blocks):
189 |         x = block2(x, filters, name=name + '_block' + str(i))
190 |     x = block2(x, filters, stride=stride1, name=name + '_block' + str(blocks))
191 |     return x
192 | 
193 | 
194 | def block3(x, filters, kernel_size=3, stride=1, groups=32,
195 |            conv_shortcut=True, name=None):
196 |     """A residual block.
197 | 
198 |     # Arguments
199 |         x: input tensor.
200 |         filters: integer, filters of the bottleneck layer.
201 |         kernel_size: default 3, kernel size of the bottleneck layer.
202 |         stride: default 1, stride of the first layer.
203 |         groups: default 32, group size for grouped convolution.
204 |         conv_shortcut: default True, use convolution shortcut if True,
205 |             otherwise identity shortcut.
206 |         name: string, block label.
207 | 
208 |     # Returns
209 |         Output tensor for the residual block.
210 |     """
211 |     bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1
212 | 
213 |     if conv_shortcut is True:
214 |         shortcut = layers.Conv2D((64 // groups) * filters, 1, strides=stride,
215 |                                  use_bias=False, name=name + '_0_conv')(x)
216 |         shortcut = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
217 |                                              name=name + '_0_bn')(shortcut)
218 |     else:
219 |         shortcut = x
220 | 
221 |     x = layers.Conv2D(filters, 1, use_bias=False, name=name + '_1_conv')(x)
222 |     x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
223 |                                   name=name + '_1_bn')(x)
224 |     x = layers.Activation('relu', name=name + '_1_relu')(x)
225 | 
226 |     c = filters // groups
227 |     x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x)
228 |     x = layers.DepthwiseConv2D(kernel_size, strides=stride, depth_multiplier=c,
229 |                                use_bias=False, name=name + '_2_conv')(x)
230 |     x_shape = backend.int_shape(x)[1:-1]
231 |     x = layers.Reshape(x_shape + (groups, c, c))(x)
232 |     output_shape = x_shape + (groups, c) if backend.backend() == 'theano' else None
233 |     x = layers.Lambda(lambda x: sum([x[:, :, :, :, i] for i in range(c)]),
234 |                       output_shape=output_shape, name=name + '_2_reduce')(x)
235 |     x = layers.Reshape(x_shape + (filters,))(x)
236 |     x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
237 |                                   name=name + '_2_bn')(x)
238 |     x = layers.Activation('relu', name=name + '_2_relu')(x)
239 | 
240 |     x = layers.Conv2D((64 // groups) * filters, 1,
241 |                       use_bias=False, name=name + '_3_conv')(x)
242 |     x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
243 |                                   name=name + '_3_bn')(x)
244 | 
245 |     x = layers.Add(name=name + '_add')([shortcut, x])
246 |     x = layers.Activation('relu', name=name + '_out')(x)
247 |     return x
248 | 
249 | 
250 | def stack3(x, filters, blocks, stride1=2, groups=32, name=None):
251 |     """A set of stacked residual blocks.
252 | 
253 |     # Arguments
254 |         x: input tensor.
255 |         filters: integer, filters of the bottleneck layer in a block.
256 |         blocks: integer, blocks in the stacked blocks.
257 |         stride1: default 2, stride of the first layer in the first block.
258 |         groups: default 32, group size for grouped convolution.
259 |         name: string, stack label.
260 | 
261 |     # Returns
262 |         Output tensor for the stacked blocks.
263 |     """
264 |     x = block3(x, filters, stride=stride1, groups=groups, name=name + '_block1')
265 |     for i in range(2, blocks + 1):
266 |         x = block3(x, filters, groups=groups, conv_shortcut=False,
267 |                    name=name + '_block' + str(i))
268 |     return x
269 | 
270 | 
271 | def ResNet(stack_fn,
272 |            preact,
273 |            use_bias,
274 |            model_name='resnet',
275 |            include_top=True,
276 |            weights='imagenet',
277 |            input_tensor=None,
278 |            input_shape=None,
279 |            pooling=None,
280 |            classes=1000,
281 |            **kwargs):
282 |     """Instantiates the ResNet, ResNetV2, and ResNeXt architecture.
283 | 
284 |     Optionally loads weights pre-trained on ImageNet.
285 |     Note that the data format convention used by the model is
286 |     the one specified in your Keras config at `~/.keras/keras.json`.
287 | 
288 |     # Arguments
289 |         stack_fn: a function that returns output tensor for the
290 |             stacked residual blocks.
291 |         preact: whether to use pre-activation or not
292 |             (True for ResNetV2, False for ResNet and ResNeXt).
293 |         use_bias: whether to use biases for convolutional layers or not
294 |             (True for ResNet and ResNetV2, False for ResNeXt).
295 |         model_name: string, model name.
296 |         include_top: whether to include the fully-connected
297 |             layer at the top of the network.
298 |         weights: one of `None` (random initialization),
299 |               'imagenet' (pre-training on ImageNet),
300 |               or the path to the weights file to be loaded.
301 |         input_tensor: optional Keras tensor
302 |             (i.e. output of `layers.Input()`)
303 |             to use as image input for the model.
304 |         input_shape: optional shape tuple, only to be specified
305 |             if `include_top` is False (otherwise the input shape
306 |             has to be `(224, 224, 3)` (with `channels_last` data format)
307 |             or `(3, 224, 224)` (with `channels_first` data format).
308 |             It should have exactly 3 inputs channels.
309 |         pooling: optional pooling mode for feature extraction
310 |             when `include_top` is `False`.
311 |             - `None` means that the output of the model will be
312 |                 the 4D tensor output of the
313 |                 last convolutional layer.
314 |             - `avg` means that global average pooling
315 |                 will be applied to the output of the
316 |                 last convolutional layer, and thus
317 |                 the output of the model will be a 2D tensor.
318 |             - `max` means that global max pooling will
319 |                 be applied.
320 |         classes: optional number of classes to classify images
321 |             into, only to be specified if `include_top` is True, and
322 |             if no `weights` argument is specified.
323 | 
324 |     # Returns
325 |         A Keras model instance.
326 | 
327 |     # Raises
328 |         ValueError: in case of invalid argument for `weights`,
329 |             or invalid input shape.
330 |     """
331 |     global backend, layers, models, keras_utils
332 |     backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)
333 | 
334 |     if not (weights in {'imagenet', None} or os.path.exists(weights)):
335 |         raise ValueError('The `weights` argument should be either '
336 |                          '`None` (random initialization), `imagenet` '
337 |                          '(pre-training on ImageNet), '
338 |                          'or the path to the weights file to be loaded.')
339 | 
340 |     if weights == 'imagenet' and include_top and classes != 1000:
341 |         raise ValueError('If using `weights` as `"imagenet"` with `include_top`'
342 |                          ' as true, `classes` should be 1000')
343 | 
344 |     # Determine proper input shape
345 |     input_shape = _obtain_input_shape(input_shape,
346 |                                       default_size=224,
347 |                                       min_size=32,
348 |                                       data_format=backend.image_data_format(),
349 |                                       require_flatten=include_top,
350 |                                       weights=weights)
351 | 
352 |     if input_tensor is None:
353 |         img_input = layers.Input(shape=input_shape)
354 |     else:
355 |         if not backend.is_keras_tensor(input_tensor):
356 |             img_input = layers.Input(tensor=input_tensor, shape=input_shape)
357 |         else:
358 |             img_input = input_tensor
359 | 
360 |     bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1
361 | 
362 |     x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)), name='conv1_pad')(img_input)
363 |     x = layers.Conv2D(64, 7, strides=2, use_bias=use_bias, name='conv1_conv')(x)
364 | 
365 |     if preact is False:
366 |         x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
367 |                                       name='conv1_bn')(x)
368 |         x = layers.Activation('relu', name='conv1_relu')(x)
369 | 
370 |     x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x)
371 |     x = layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x)
372 | 
373 |     x = stack_fn(x)
374 | 
375 |     if preact is True:
376 |         x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
377 |                                       name='post_bn')(x)
378 |         x = layers.Activation('relu', name='post_relu')(x)
379 | 
380 |     if include_top:
381 |         x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
382 |         x = layers.Dense(classes, activation='softmax', name='probs')(x)
383 |     else:
384 |         if pooling == 'avg':
385 |             x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
386 |         elif pooling == 'max':
387 |             x = layers.GlobalMaxPooling2D(name='max_pool')(x)
388 | 
389 |     # Ensure that the model takes into account
390 |     # any potential predecessors of `input_tensor`.
391 |     if input_tensor is not None:
392 |         inputs = keras_utils.get_source_inputs(input_tensor)
393 |     else:
394 |         inputs = img_input
395 | 
396 |     # Create model.
397 |     model = models.Model(inputs, x, name=model_name)
398 | 
399 |     # Load weights.
400 |     if (weights == 'imagenet') and (model_name in WEIGHTS_HASHES):
401 |         if include_top:
402 |             file_name = model_name + '_weights_tf_dim_ordering_tf_kernels.h5'
403 |             file_hash = WEIGHTS_HASHES[model_name][0]
404 |         else:
405 |             file_name = model_name + '_weights_tf_dim_ordering_tf_kernels_notop.h5'
406 |             file_hash = WEIGHTS_HASHES[model_name][1]
407 |         weights_path = keras_utils.get_file(file_name,
408 |                                             BASE_WEIGHTS_PATH + file_name,
409 |                                             cache_subdir='models',
410 |                                             file_hash=file_hash)
411 |         model.load_weights(weights_path)
412 |     elif weights is not None:
413 |         model.load_weights(weights)
414 | 
415 |     return model
416 | 
417 | 
418 | def ResNet50(include_top=True,
419 |              weights='imagenet',
420 |              input_tensor=None,
421 |              input_shape=None,
422 |              pooling=None,
423 |              classes=1000,
424 |              **kwargs):
425 |     def stack_fn(x):
426 |         x = stack1(x, 64, 3, stride1=1, name='conv2')
427 |         x = stack1(x, 128, 4, name='conv3')
428 |         x = stack1(x, 256, 6, name='conv4')
429 |         x = stack1(x, 512, 3, name='conv5')
430 |         return x
431 |     return ResNet(stack_fn, False, True, 'resnet50',
432 |                   include_top, weights,
433 |                   input_tensor, input_shape,
434 |                   pooling, classes,
435 |                   **kwargs)
436 | 
437 | 
438 | def ResNet101(include_top=True,
439 |               weights='imagenet',
440 |               input_tensor=None,
441 |               input_shape=None,
442 |               pooling=None,
443 |               classes=1000,
444 |               **kwargs):
445 |     def stack_fn(x):
446 |         x = stack1(x, 64, 3, stride1=1, name='conv2')
447 |         x = stack1(x, 128, 4, name='conv3')
448 |         x = stack1(x, 256, 23, name='conv4')
449 |         x = stack1(x, 512, 3, name='conv5')
450 |         return x
451 |     return ResNet(stack_fn, False, True, 'resnet101',
452 |                   include_top, weights,
453 |                   input_tensor, input_shape,
454 |                   pooling, classes,
455 |                   **kwargs)
456 | 
457 | 
458 | def ResNet152(include_top=True,
459 |               weights='imagenet',
460 |               input_tensor=None,
461 |               input_shape=None,
462 |               pooling=None,
463 |               classes=1000,
464 |               **kwargs):
465 |     def stack_fn(x):
466 |         x = stack1(x, 64, 3, stride1=1, name='conv2')
467 |         x = stack1(x, 128, 8, name='conv3')
468 |         x = stack1(x, 256, 36, name='conv4')
469 |         x = stack1(x, 512, 3, name='conv5')
470 |         return x
471 |     return ResNet(stack_fn, False, True, 'resnet152',
472 |                   include_top, weights,
473 |                   input_tensor, input_shape,
474 |                   pooling, classes,
475 |                   **kwargs)
476 | 
477 | 
478 | def ResNet50V2(include_top=True,
479 |                weights='imagenet',
480 |                input_tensor=None,
481 |                input_shape=None,
482 |                pooling=None,
483 |                classes=1000,
484 |                **kwargs):
485 |     def stack_fn(x):
486 |         x = stack2(x, 64, 3, name='conv2')
487 |         x = stack2(x, 128, 4, name='conv3')
488 |         x = stack2(x, 256, 6, name='conv4')
489 |         x = stack2(x, 512, 3, stride1=1, name='conv5')
490 |         return x
491 |     return ResNet(stack_fn, True, True, 'resnet50v2',
492 |                   include_top, weights,
493 |                   input_tensor, input_shape,
494 |                   pooling, classes,
495 |                   **kwargs)
496 | 
497 | 
498 | def ResNet101V2(include_top=True,
499 |                 weights='imagenet',
500 |                 input_tensor=None,
501 |                 input_shape=None,
502 |                 pooling=None,
503 |                 classes=1000,
504 |                 **kwargs):
505 |     def stack_fn(x):
506 |         x = stack2(x, 64, 3, name='conv2')
507 |         x = stack2(x, 128, 4, name='conv3')
508 |         x = stack2(x, 256, 23, name='conv4')
509 |         x = stack2(x, 512, 3, stride1=1, name='conv5')
510 |         return x
511 |     return ResNet(stack_fn, True, True, 'resnet101v2',
512 |                   include_top, weights,
513 |                   input_tensor, input_shape,
514 |                   pooling, classes,
515 |                   **kwargs)
516 | 
517 | 
518 | def ResNet152V2(include_top=True,
519 |                 weights='imagenet',
520 |                 input_tensor=None,
521 |                 input_shape=None,
522 |                 pooling=None,
523 |                 classes=1000,
524 |                 **kwargs):
525 |     def stack_fn(x):
526 |         x = stack2(x, 64, 3, name='conv2')
527 |         x = stack2(x, 128, 8, name='conv3')
528 |         x = stack2(x, 256, 36, name='conv4')
529 |         x = stack2(x, 512, 3, stride1=1, name='conv5')
530 |         return x
531 |     return ResNet(stack_fn, True, True, 'resnet152v2',
532 |                   include_top, weights,
533 |                   input_tensor, input_shape,
534 |                   pooling, classes,
535 |                   **kwargs)
536 | 
537 | 
538 | def ResNeXt50(include_top=True,
539 |               weights='imagenet',
540 |               input_tensor=None,
541 |               input_shape=None,
542 |               pooling=None,
543 |               classes=1000,
544 |               **kwargs):
545 |     def stack_fn(x):
546 |         x = stack3(x, 128, 3, stride1=1, name='conv2')
547 |         x = stack3(x, 256, 4, name='conv3')
548 |         x = stack3(x, 512, 6, name='conv4')
549 |         x = stack3(x, 1024, 3, name='conv5')
550 |         return x
551 |     return ResNet(stack_fn, False, False, 'resnext50',
552 |                   include_top, weights,
553 |                   input_tensor, input_shape,
554 |                   pooling, classes,
555 |                   **kwargs)
556 | 
557 | 
558 | def ResNeXt101(include_top=True,
559 |                weights='imagenet',
560 |                input_tensor=None,
561 |                input_shape=None,
562 |                pooling=None,
563 |                classes=1000,
564 |                **kwargs):
565 |     def stack_fn(x):
566 |         x = stack3(x, 128, 3, stride1=1, name='conv2')
567 |         x = stack3(x, 256, 4, name='conv3')
568 |         x = stack3(x, 512, 23, name='conv4')
569 |         x = stack3(x, 1024, 3, name='conv5')
570 |         return x
571 |     return ResNet(stack_fn, False, False, 'resnext101',
572 |                   include_top, weights,
573 |                   input_tensor, input_shape,
574 |                   pooling, classes,
575 |                   **kwargs)
576 | 
577 | 
578 | setattr(ResNet50, '__doc__', ResNet.__doc__)
579 | setattr(ResNet101, '__doc__', ResNet.__doc__)
580 | setattr(ResNet152, '__doc__', ResNet.__doc__)
581 | setattr(ResNet50V2, '__doc__', ResNet.__doc__)
582 | setattr(ResNet101V2, '__doc__', ResNet.__doc__)
583 | setattr(ResNet152V2, '__doc__', ResNet.__doc__)
584 | setattr(ResNeXt50, '__doc__', ResNet.__doc__)
585 | setattr(ResNeXt101, '__doc__', ResNet.__doc__)
586 | 


--------------------------------------------------------------------------------
/sample-code/network/resnet_v2.py:
--------------------------------------------------------------------------------
 1 | """ResNetV2 models for Keras.
 2 | 
 3 | # Reference paper
 4 | 
 5 | - [Aggregated Residual Transformations for Deep Neural Networks]
 6 |   (https://arxiv.org/abs/1611.05431) (CVPR 2017)
 7 | 
 8 | # Reference implementations
 9 | 
10 | - [TensorNets]
11 |   (https://github.com/taehoonlee/tensornets/blob/master/tensornets/resnets.py)
12 | - [Torch ResNetV2]
13 |   (https://github.com/facebook/fb.resnet.torch/blob/master/models/preresnet.lua)
14 | 
15 | """
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | from . import imagenet_utils
21 | from .imagenet_utils import decode_predictions
22 | from .resnet_common import ResNet50V2
23 | from .resnet_common import ResNet101V2
24 | from .resnet_common import ResNet152V2
25 | 
26 | 
27 | def preprocess_input(x, **kwargs):
28 |     """Preprocesses a numpy array encoding a batch of images.
29 | 
30 |     # Arguments
31 |         x: a 4D numpy array consists of RGB values within [0, 255].
32 |         data_format: data format of the image tensor.
33 | 
34 |     # Returns
35 |         Preprocessed array.
36 |     """
37 |     return imagenet_utils.preprocess_input(x, mode='tf', **kwargs)
38 | 


--------------------------------------------------------------------------------
/sample-code/network/resnext.py:
--------------------------------------------------------------------------------
 1 | """ResNeXt models for Keras.
 2 | 
 3 | # Reference paper
 4 | 
 5 | - [Aggregated Residual Transformations for Deep Neural Networks]
 6 |   (https://arxiv.org/abs/1611.05431) (CVPR 2017)
 7 | 
 8 | # Reference implementations
 9 | 
10 | - [TensorNets]
11 |   (https://github.com/taehoonlee/tensornets/blob/master/tensornets/resnets.py)
12 | - [Torch ResNeXt]
13 |   (https://github.com/facebookresearch/ResNeXt/blob/master/models/resnext.lua)
14 | 
15 | """
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | import os
21 | 
22 | from . import imagenet_utils
23 | from .imagenet_utils import decode_predictions
24 | from .resnet_common import ResNeXt50
25 | from .resnet_common import ResNeXt101
26 | 
27 | 
28 | def preprocess_input(x, **kwargs):
29 |     """Preprocesses a numpy array encoding a batch of images.
30 | 
31 |     # Arguments
32 |         x: a 4D numpy array consists of RGB values within [0, 255].
33 |         data_format: data format of the image tensor.
34 | 
35 |     # Returns
36 |         Preprocessed array.
37 |     """
38 |     return imagenet_utils.preprocess_input(x, mode='torch', **kwargs)
39 | 


--------------------------------------------------------------------------------
/sample-code/network/vgg16.py:
--------------------------------------------------------------------------------
  1 | """VGG16 model for Keras.
  2 | 
  3 | # Reference
  4 | 
  5 | - [Very Deep Convolutional Networks for Large-Scale Image Recognition](
  6 |     https://arxiv.org/abs/1409.1556)
  7 | 
  8 | """
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | 
 13 | import os
 14 | 
 15 | from . import get_submodules_from_kwargs
 16 | from . import imagenet_utils
 17 | from .imagenet_utils import decode_predictions
 18 | from .imagenet_utils import _obtain_input_shape
 19 | 
 20 | preprocess_input = imagenet_utils.preprocess_input
 21 | 
 22 | WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/'
 23 |                 'releases/download/v0.1/'
 24 |                 'vgg16_weights_tf_dim_ordering_tf_kernels.h5')
 25 | WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/'
 26 |                        'releases/download/v0.1/'
 27 |                        'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5')
 28 | 
 29 | 
 30 | def VGG16(include_top=True,
 31 |           weights='imagenet',
 32 |           input_tensor=None,
 33 |           input_shape=None,
 34 |           pooling=None,
 35 |           classes=1000,
 36 |           **kwargs):
 37 |     """Instantiates the VGG16 architecture.
 38 | 
 39 |     Optionally loads weights pre-trained on ImageNet.
 40 |     Note that the data format convention used by the model is
 41 |     the one specified in your Keras config at `~/.keras/keras.json`.
 42 | 
 43 |     # Arguments
 44 |         include_top: whether to include the 3 fully-connected
 45 |             layers at the top of the network.
 46 |         weights: one of `None` (random initialization),
 47 |               'imagenet' (pre-training on ImageNet),
 48 |               or the path to the weights file to be loaded.
 49 |         input_tensor: optional Keras tensor
 50 |             (i.e. output of `layers.Input()`)
 51 |             to use as image input for the model.
 52 |         input_shape: optional shape tuple, only to be specified
 53 |             if `include_top` is False (otherwise the input shape
 54 |             has to be `(224, 224, 3)`
 55 |             (with `channels_last` data format)
 56 |             or `(3, 224, 224)` (with `channels_first` data format).
 57 |             It should have exactly 3 input channels,
 58 |             and width and height should be no smaller than 32.
 59 |             E.g. `(200, 200, 3)` would be one valid value.
 60 |         pooling: Optional pooling mode for feature extraction
 61 |             when `include_top` is `False`.
 62 |             - `None` means that the output of the model will be
 63 |                 the 4D tensor output of the
 64 |                 last convolutional block.
 65 |             - `avg` means that global average pooling
 66 |                 will be applied to the output of the
 67 |                 last convolutional block, and thus
 68 |                 the output of the model will be a 2D tensor.
 69 |             - `max` means that global max pooling will
 70 |                 be applied.
 71 |         classes: optional number of classes to classify images
 72 |             into, only to be specified if `include_top` is True, and
 73 |             if no `weights` argument is specified.
 74 | 
 75 |     # Returns
 76 |         A Keras model instance.
 77 | 
 78 |     # Raises
 79 |         ValueError: in case of invalid argument for `weights`,
 80 |             or invalid input shape.
 81 |     """
 82 |     backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)
 83 | 
 84 |     if not (weights in {'imagenet', None} or os.path.exists(weights)):
 85 |         raise ValueError('The `weights` argument should be either '
 86 |                          '`None` (random initialization), `imagenet` '
 87 |                          '(pre-training on ImageNet), '
 88 |                          'or the path to the weights file to be loaded.')
 89 | 
 90 |     if weights == 'imagenet' and include_top and classes != 1000:
 91 |         raise ValueError('If using `weights` as `"imagenet"` with `include_top`'
 92 |                          ' as true, `classes` should be 1000')
 93 |     # Determine proper input shape
 94 |     input_shape = _obtain_input_shape(input_shape,
 95 |                                       default_size=224,
 96 |                                       min_size=32,
 97 |                                       data_format=backend.image_data_format(),
 98 |                                       require_flatten=include_top,
 99 |                                       weights=weights)
100 | 
101 |     if input_tensor is None:
102 |         img_input = layers.Input(shape=input_shape)
103 |     else:
104 |         if not backend.is_keras_tensor(input_tensor):
105 |             img_input = layers.Input(tensor=input_tensor, shape=input_shape)
106 |         else:
107 |             img_input = input_tensor
108 |     # Block 1
109 |     x = layers.Conv2D(64, (3, 3),
110 |                       activation='relu',
111 |                       padding='same',
112 |                       name='block1_conv1')(img_input)
113 |     x = layers.Conv2D(64, (3, 3),
114 |                       activation='relu',
115 |                       padding='same',
116 |                       name='block1_conv2')(x)
117 |     x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
118 | 
119 |     # Block 2
120 |     x = layers.Conv2D(128, (3, 3),
121 |                       activation='relu',
122 |                       padding='same',
123 |                       name='block2_conv1')(x)
124 |     x = layers.Conv2D(128, (3, 3),
125 |                       activation='relu',
126 |                       padding='same',
127 |                       name='block2_conv2')(x)
128 |     x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
129 | 
130 |     # Block 3
131 |     x = layers.Conv2D(256, (3, 3),
132 |                       activation='relu',
133 |                       padding='same',
134 |                       name='block3_conv1')(x)
135 |     x = layers.Conv2D(256, (3, 3),
136 |                       activation='relu',
137 |                       padding='same',
138 |                       name='block3_conv2')(x)
139 |     x = layers.Conv2D(256, (3, 3),
140 |                       activation='relu',
141 |                       padding='same',
142 |                       name='block3_conv3')(x)
143 |     x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
144 | 
145 |     # Block 4
146 |     x = layers.Conv2D(512, (3, 3),
147 |                       activation='relu',
148 |                       padding='same',
149 |                       name='block4_conv1')(x)
150 |     x = layers.Conv2D(512, (3, 3),
151 |                       activation='relu',
152 |                       padding='same',
153 |                       name='block4_conv2')(x)
154 |     x = layers.Conv2D(512, (3, 3),
155 |                       activation='relu',
156 |                       padding='same',
157 |                       name='block4_conv3')(x)
158 |     x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
159 | 
160 |     # Block 5
161 |     x = layers.Conv2D(512, (3, 3),
162 |                       activation='relu',
163 |                       padding='same',
164 |                       name='block5_conv1')(x)
165 |     x = layers.Conv2D(512, (3, 3),
166 |                       activation='relu',
167 |                       padding='same',
168 |                       name='block5_conv2')(x)
169 |     x = layers.Conv2D(512, (3, 3),
170 |                       activation='relu',
171 |                       padding='same',
172 |                       name='block5_conv3')(x)
173 |     x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
174 | 
175 |     if include_top:
176 |         # Classification block
177 |         x = layers.Flatten(name='flatten')(x)
178 |         x = layers.Dense(4096, activation='relu', name='fc1')(x)
179 |         x = layers.Dense(4096, activation='relu', name='fc2')(x)
180 |         x = layers.Dense(classes, activation='softmax', name='predictions')(x)
181 |     else:
182 |         if pooling == 'avg':
183 |             x = layers.GlobalAveragePooling2D()(x)
184 |         elif pooling == 'max':
185 |             x = layers.GlobalMaxPooling2D()(x)
186 | 
187 |     # Ensure that the model takes into account
188 |     # any potential predecessors of `input_tensor`.
189 |     if input_tensor is not None:
190 |         inputs = keras_utils.get_source_inputs(input_tensor)
191 |     else:
192 |         inputs = img_input
193 |     # Create model.
194 |     model = models.Model(inputs, x, name='vgg16')
195 | 
196 |     # Load weights.
197 |     if weights == 'imagenet':
198 |         if include_top:
199 |             weights_path = keras_utils.get_file(
200 |                 'vgg16_weights_tf_dim_ordering_tf_kernels.h5',
201 |                 WEIGHTS_PATH,
202 |                 cache_subdir='models',
203 |                 file_hash='64373286793e3c8b2b4e3219cbf3544b')
204 |         else:
205 |             weights_path = keras_utils.get_file(
206 |                 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
207 |                 WEIGHTS_PATH_NO_TOP,
208 |                 cache_subdir='models',
209 |                 file_hash='6d6bbae143d832006294945121d1f1fc')
210 |         model.load_weights(weights_path)
211 |         if backend.backend() == 'theano':
212 |             keras_utils.convert_all_kernels_in_model(model)
213 |     elif weights is not None:
214 |         model.load_weights(weights)
215 | 
216 |     return model
217 | 


--------------------------------------------------------------------------------
/sample-code/network/vgg16_keras.py:
--------------------------------------------------------------------------------
 1 | # Author: Taylor Guo, taylorguo@126.com
 2 | # Python 3.6.7
 3 | '''
 4 | Keras                  2.1.0
 5 | Keras-Applications     1.0.7
 6 | Keras-Preprocessing    1.0.8
 7 | tensorboard            1.12.2
 8 | tensorflow             1.12.0
 9 | tensorflow-tensorboard 0.4.0
10 | numpy                  1.14.5
11 | opencv-python          3.4.1.15
12 | 
13 | paper: (https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)
14 | PPT: (http://vision.stanford.edu/teaching/cs231b_spring1415/slides/alexnet_tugce_kyunghee.pdf)
15 | '''
16 | 
17 | from keras.models import Sequential, Model
18 | from keras.layers import Conv2D, MaxPooling2D, Activation, Flatten, Dense, Dropout
19 | from keras.layers.normalization import BatchNormalization
20 | from keras import backend as K
21 | 
22 | from keras.applications.vgg16 import VGG16
23 | 
24 | from tflearn.datasets import oxflower17
25 | 
26 | from keras.utils import to_categorical
27 | from keras.optimizers import SGD, Adam
28 | 
29 | from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
30 | 
31 | import numpy as np
32 | import os,datetime
33 | 
34 | class VGG16_Net:
35 | 
36 | 	@staticmethod
37 | 	def build(activation="relu", classes=17):
38 | 		vgg16_pretrained_model = VGG16(include_top=False, weights="imagenet", input_shape=(224,224,3))
39 | 		for layer in vgg16_pretrained_model.layers:
40 | 			layer.trainable = False
41 | 		x = Flatten()(vgg16_pretrained_model.output)
42 | 		x = Dense(128, activation=activation, name="FC_1")(x)
43 | 		x = Dropout(0.5)(x)
44 | 		x = Dense(64, activation=activation, name="FC_2")(x)
45 | 		x = Dropout(0.5)(x)
46 | 		x = Dense(classes, activation="softmax", name="output")(x)
47 | 
48 | 		model = Model(vgg16_pretrained_model.input, x, name="VGG16_imagenet_no_top")
49 | 		model.summary()
50 | 		return model
51 | 
52 | 	@staticmethod
53 | 	def load_dataset_oxflower17():
54 | 		print("\t Downloading oxflower17 dataset ...")
55 | 
56 | 		(x_train, y_train) = oxflower17.load_data(one_hot=True)
57 | 
58 | 		return (x_train, y_train)
59 | 
60 | 	@staticmethod
61 | 	def train(weight_path=None, load_weights=False, save_weights=True):
62 | 
63 | 		model = VGG16_Net.build()
64 | 
65 | 		model.compile(loss="categorical_crossentropy", optimizer=SGD(lr=0.0005), metrics=["accuracy"])
66 | 
67 | 		train_d, train_l = VGG16_Net.load_dataset_oxflower17()
68 | 
69 | 		early_stopping = EarlyStopping(monitor="val_acc", patience=200, verbose=1)
70 | 		reduce_lr = ReduceLROnPlateau(monitor="val_acc", factor=0.8, patience=100, verbose=1)
71 | 
72 | 		if not os.path.exists("models"):
73 | 			os.mkdir("models")
74 | 		best_weights = "models/best_weights_VGG16_oxflower17_val_acc_{val_acc:.3f}.h5"
75 | 		save_best_model = ModelCheckpoint(best_weights, monitor="val_acc", verbose=1, save_best_only=True)
76 | 
77 | 		if load_weights == False:
78 | 			print("\t Start training ...")
79 | 			train_history = model.fit(train_d, train_l, batch_size=64, epochs=2000, verbose=1, validation_split=0.2,
80 | 			                          callbacks=[reduce_lr, save_best_model, early_stopping])
81 | 		else:
82 | 			pass
83 | 		# load_weights from weight_path
84 | 
85 | 		if save_weights == True:
86 | 			print("\t Save trained weights to file ...")
87 | 			if not os.path.exists("models"):
88 | 				os.mkdir("models")
89 | 			weight_file = "VGG16_Net_{:%Y%m%dT%H%M%S}.h5".format(datetime.datetime.now())
90 | 			model.save_weights(os.path.join("models", weight_file), overwrite=True)
91 | 
92 | 
93 | if __name__ == '__main__':
94 | 	VGG16_Net.train(save_weights=False)


--------------------------------------------------------------------------------
/sample-code/network/vgg19.py:
--------------------------------------------------------------------------------
  1 | """VGG19 model for Keras.
  2 | 
  3 | # Reference
  4 | 
  5 | - [Very Deep Convolutional Networks for Large-Scale Image Recognition](
  6 |     https://arxiv.org/abs/1409.1556)
  7 | 
  8 | """
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | 
 13 | import os
 14 | 
 15 | from . import get_submodules_from_kwargs
 16 | from . import imagenet_utils
 17 | from .imagenet_utils import decode_predictions
 18 | from .imagenet_utils import _obtain_input_shape
 19 | 
 20 | preprocess_input = imagenet_utils.preprocess_input
 21 | 
 22 | WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/'
 23 |                 'releases/download/v0.1/'
 24 |                 'vgg19_weights_tf_dim_ordering_tf_kernels.h5')
 25 | WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/'
 26 |                        'releases/download/v0.1/'
 27 |                        'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5')
 28 | 
 29 | 
 30 | def VGG19(include_top=True,
 31 |           weights='imagenet',
 32 |           input_tensor=None,
 33 |           input_shape=None,
 34 |           pooling=None,
 35 |           classes=1000,
 36 |           **kwargs):
 37 |     """Instantiates the VGG19 architecture.
 38 | 
 39 |     Optionally loads weights pre-trained on ImageNet.
 40 |     Note that the data format convention used by the model is
 41 |     the one specified in your Keras config at `~/.keras/keras.json`.
 42 | 
 43 |     # Arguments
 44 |         include_top: whether to include the 3 fully-connected
 45 |             layers at the top of the network.
 46 |         weights: one of `None` (random initialization),
 47 |               'imagenet' (pre-training on ImageNet),
 48 |               or the path to the weights file to be loaded.
 49 |         input_tensor: optional Keras tensor
 50 |             (i.e. output of `layers.Input()`)
 51 |             to use as image input for the model.
 52 |         input_shape: optional shape tuple, only to be specified
 53 |             if `include_top` is False (otherwise the input shape
 54 |             has to be `(224, 224, 3)`
 55 |             (with `channels_last` data format)
 56 |             or `(3, 224, 224)` (with `channels_first` data format).
 57 |             It should have exactly 3 inputs channels,
 58 |             and width and height should be no smaller than 32.
 59 |             E.g. `(200, 200, 3)` would be one valid value.
 60 |         pooling: Optional pooling mode for feature extraction
 61 |             when `include_top` is `False`.
 62 |             - `None` means that the output of the model will be
 63 |                 the 4D tensor output of the
 64 |                 last convolutional block.
 65 |             - `avg` means that global average pooling
 66 |                 will be applied to the output of the
 67 |                 last convolutional block, and thus
 68 |                 the output of the model will be a 2D tensor.
 69 |             - `max` means that global max pooling will
 70 |                 be applied.
 71 |         classes: optional number of classes to classify images
 72 |             into, only to be specified if `include_top` is True, and
 73 |             if no `weights` argument is specified.
 74 | 
 75 |     # Returns
 76 |         A Keras model instance.
 77 | 
 78 |     # Raises
 79 |         ValueError: in case of invalid argument for `weights`,
 80 |             or invalid input shape.
 81 |     """
 82 |     backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)
 83 | 
 84 |     if not (weights in {'imagenet', None} or os.path.exists(weights)):
 85 |         raise ValueError('The `weights` argument should be either '
 86 |                          '`None` (random initialization), `imagenet` '
 87 |                          '(pre-training on ImageNet), '
 88 |                          'or the path to the weights file to be loaded.')
 89 | 
 90 |     if weights == 'imagenet' and include_top and classes != 1000:
 91 |         raise ValueError('If using `weights` as `"imagenet"` with `include_top`'
 92 |                          ' as true, `classes` should be 1000')
 93 |     # Determine proper input shape
 94 |     input_shape = _obtain_input_shape(input_shape,
 95 |                                       default_size=224,
 96 |                                       min_size=32,
 97 |                                       data_format=backend.image_data_format(),
 98 |                                       require_flatten=include_top,
 99 |                                       weights=weights)
100 | 
101 |     if input_tensor is None:
102 |         img_input = layers.Input(shape=input_shape)
103 |     else:
104 |         if not backend.is_keras_tensor(input_tensor):
105 |             img_input = layers.Input(tensor=input_tensor, shape=input_shape)
106 |         else:
107 |             img_input = input_tensor
108 |     # Block 1
109 |     x = layers.Conv2D(64, (3, 3),
110 |                       activation='relu',
111 |                       padding='same',
112 |                       name='block1_conv1')(img_input)
113 |     x = layers.Conv2D(64, (3, 3),
114 |                       activation='relu',
115 |                       padding='same',
116 |                       name='block1_conv2')(x)
117 |     x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
118 | 
119 |     # Block 2
120 |     x = layers.Conv2D(128, (3, 3),
121 |                       activation='relu',
122 |                       padding='same',
123 |                       name='block2_conv1')(x)
124 |     x = layers.Conv2D(128, (3, 3),
125 |                       activation='relu',
126 |                       padding='same',
127 |                       name='block2_conv2')(x)
128 |     x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
129 | 
130 |     # Block 3
131 |     x = layers.Conv2D(256, (3, 3),
132 |                       activation='relu',
133 |                       padding='same',
134 |                       name='block3_conv1')(x)
135 |     x = layers.Conv2D(256, (3, 3),
136 |                       activation='relu',
137 |                       padding='same',
138 |                       name='block3_conv2')(x)
139 |     x = layers.Conv2D(256, (3, 3),
140 |                       activation='relu',
141 |                       padding='same',
142 |                       name='block3_conv3')(x)
143 |     x = layers.Conv2D(256, (3, 3),
144 |                       activation='relu',
145 |                       padding='same',
146 |                       name='block3_conv4')(x)
147 |     x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
148 | 
149 |     # Block 4
150 |     x = layers.Conv2D(512, (3, 3),
151 |                       activation='relu',
152 |                       padding='same',
153 |                       name='block4_conv1')(x)
154 |     x = layers.Conv2D(512, (3, 3),
155 |                       activation='relu',
156 |                       padding='same',
157 |                       name='block4_conv2')(x)
158 |     x = layers.Conv2D(512, (3, 3),
159 |                       activation='relu',
160 |                       padding='same',
161 |                       name='block4_conv3')(x)
162 |     x = layers.Conv2D(512, (3, 3),
163 |                       activation='relu',
164 |                       padding='same',
165 |                       name='block4_conv4')(x)
166 |     x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
167 | 
168 |     # Block 5
169 |     x = layers.Conv2D(512, (3, 3),
170 |                       activation='relu',
171 |                       padding='same',
172 |                       name='block5_conv1')(x)
173 |     x = layers.Conv2D(512, (3, 3),
174 |                       activation='relu',
175 |                       padding='same',
176 |                       name='block5_conv2')(x)
177 |     x = layers.Conv2D(512, (3, 3),
178 |                       activation='relu',
179 |                       padding='same',
180 |                       name='block5_conv3')(x)
181 |     x = layers.Conv2D(512, (3, 3),
182 |                       activation='relu',
183 |                       padding='same',
184 |                       name='block5_conv4')(x)
185 |     x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
186 | 
187 |     if include_top:
188 |         # Classification block
189 |         x = layers.Flatten(name='flatten')(x)
190 |         x = layers.Dense(4096, activation='relu', name='fc1')(x)
191 |         x = layers.Dense(4096, activation='relu', name='fc2')(x)
192 |         x = layers.Dense(classes, activation='softmax', name='predictions')(x)
193 |     else:
194 |         if pooling == 'avg':
195 |             x = layers.GlobalAveragePooling2D()(x)
196 |         elif pooling == 'max':
197 |             x = layers.GlobalMaxPooling2D()(x)
198 | 
199 |     # Ensure that the model takes into account
200 |     # any potential predecessors of `input_tensor`.
201 |     if input_tensor is not None:
202 |         inputs = keras_utils.get_source_inputs(input_tensor)
203 |     else:
204 |         inputs = img_input
205 |     # Create model.
206 |     model = models.Model(inputs, x, name='vgg19')
207 | 
208 |     # Load weights.
209 |     if weights == 'imagenet':
210 |         if include_top:
211 |             weights_path = keras_utils.get_file(
212 |                 'vgg19_weights_tf_dim_ordering_tf_kernels.h5',
213 |                 WEIGHTS_PATH,
214 |                 cache_subdir='models',
215 |                 file_hash='cbe5617147190e668d6c5d5026f83318')
216 |         else:
217 |             weights_path = keras_utils.get_file(
218 |                 'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5',
219 |                 WEIGHTS_PATH_NO_TOP,
220 |                 cache_subdir='models',
221 |                 file_hash='253f8cb515780f3b799900260a226db6')
222 |         model.load_weights(weights_path)
223 |         if backend.backend() == 'theano':
224 |             keras_utils.convert_all_kernels_in_model(model)
225 |     elif weights is not None:
226 |         model.load_weights(weights)
227 | 
228 |     return model
229 | 


--------------------------------------------------------------------------------
/sample-code/network/vgg19_keras_cifar100.py:
--------------------------------------------------------------------------------
  1 | # Author: Taylor Guo, taylorguo@126.com
  2 | # Python 3.6.7
  3 | '''
  4 | Keras                  2.1.0
  5 | Keras-Applications     1.0.7
  6 | Keras-Preprocessing    1.0.8
  7 | tensorboard            1.12.2
  8 | tensorflow             1.12.0
  9 | tensorflow-tensorboard 0.4.0
 10 | numpy                  1.14.5
 11 | opencv-python          3.4.1.15
 12 | 
 13 | paper: (https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)
 14 | PPT: (http://vision.stanford.edu/teaching/cs231b_spring1415/slides/alexnet_tugce_kyunghee.pdf)
 15 | '''
 16 | 
 17 | from keras.models import Input, Model
 18 | from keras.layers import Conv2D, MaxPooling2D, Activation, Flatten, Dense, Dropout
 19 | from keras.layers.normalization import BatchNormalization
 20 | from keras import backend as K
 21 | 
 22 | from keras.applications.vgg19 import VGG19
 23 | 
 24 | from keras.datasets import cifar100
 25 | 
 26 | from keras.utils import to_categorical
 27 | from keras.optimizers import SGD, Adam
 28 | 
 29 | from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
 30 | 
 31 | import numpy as np
 32 | import os,datetime
 33 | 
 34 | class VGG19_Net:
 35 | 
 36 | 	@staticmethod
 37 | 	def build(input_shape, activation="relu", padding="same", classes=100):
 38 | 
 39 | 		input = Input(shape=input_shape, name="model_input")
 40 | 
 41 | 		# Block 1
 42 | 		x = Conv2D(64, (3, 3), padding=padding, activation=activation, name="block1_conv1")(input)
 43 | 		x = Conv2D(64, (3, 3), padding=padding, activation=activation, name="block1_conv2")(x)
 44 | 		x = MaxPooling2D((2, 2), strides=(2, 2), name="block1_pool")(x)
 45 | 
 46 | 		# Block 2
 47 | 		x = Conv2D(128, (3, 3), padding=padding, activation=activation, name="block2_conv1")(x)
 48 | 		x = Conv2D(128, (3, 3), padding=padding, activation=activation, name="block2_conv2")(x)
 49 | 		x = MaxPooling2D((2, 2), strides=(2, 2), name="block2_pool")(x)
 50 | 
 51 | 		# Block 3
 52 | 		x = Conv2D(256, (3, 3), padding=padding, activation=activation, name="block3_conv1")(x)
 53 | 		x = Conv2D(256, (3, 3), padding=padding, activation=activation, name="block3_conv2")(x)
 54 | 		x = Conv2D(256, (3, 3), padding=padding, activation=activation, name="block3_conv3")(x)
 55 | 		x = Conv2D(256, (3, 3), padding=padding, activation=activation, name="block3_conv4")(x)
 56 | 		x = MaxPooling2D((2, 2), strides=(2, 2), name="block3_pool")(x)
 57 | 
 58 | 		# Block 4
 59 | 		x = Conv2D(512, (3, 3), padding=padding, activation=activation, name="block4_conv1")(x)
 60 | 		x = Conv2D(512, (3, 3), padding=padding, activation=activation, name="block4_conv2")(x)
 61 | 		x = Conv2D(512, (3, 3), padding=padding, activation=activation, name="block4_conv3")(x)
 62 | 		x = Conv2D(512, (3, 3), padding=padding, activation=activation, name="block4_conv4")(x)
 63 | 		x = MaxPooling2D((2, 2), strides=(2, 2), name="block4_pool")(x)
 64 | 
 65 | 		# Block 5
 66 | 		x = Conv2D(512, (3, 3), padding=padding, activation=activation, name="block5_conv1")(x)
 67 | 		x = Conv2D(512, (3, 3), padding=padding, activation=activation, name="block5_conv2")(x)
 68 | 		x = Conv2D(512, (3, 3), padding=padding, activation=activation, name="block5_conv3")(x)
 69 | 		x = Conv2D(512, (3, 3), padding=padding, activation=activation, name="block5_conv4")(x)
 70 | 		x = MaxPooling2D((2, 2), strides=(2, 2), name="block5_pool")(x)
 71 | 
 72 | 		# classification block
 73 | 		x = Flatten(name="flatten")(x)
 74 | 		x = Dense(512, activation=activation, name="fc1")(x)
 75 | 		x = Dense(128, activation=activation, name="fc2")(x)
 76 | 		x = Dense(classes, activation="softmax", name="prediction")(x)
 77 | 
 78 | 		model = Model(input, x, name="VGG19_Net")
 79 | 
 80 | 		model.summary()
 81 | 
 82 | 		return model
 83 | 
 84 | 
 85 | 	@staticmethod
 86 | 	def build_from_config(input_shape, activation="relu", padding="same", classes=100):
 87 | 
 88 | 		model_config = {"block1": {"layers": 2, "filter": 64},
 89 | 		                "block2": {"layers": 2, "filter": 128},
 90 | 		                "block3": {"layers": 4, "filter": 256},
 91 | 		                "block4": {"layers": 4, "filter": 512},
 92 | 		                "block5": {"layers": 4, "filter": 512}
 93 | 		                }
 94 | 
 95 | 		input = Input(shape=input_shape, name="model_input")
 96 | 
 97 | 		x = input
 98 | 		for block in model_config.keys():
 99 | 			for layer_nb in range(model_config[block]["layers"]):
100 | 				kernel = model_config[block]["filter"]
101 | 				layer_name = "%s_conv%d"%(block, layer_nb+1)
102 | 				# print(layer_name)
103 | 				x = Conv2D(kernel, (3, 3), activation=activation, padding=padding, name=layer_name)(x)
104 | 			x = MaxPooling2D((2, 2), strides=(2, 2), name="%s_pool"%block)(x)
105 | 
106 | 		# classification block
107 | 		x = Flatten(name="flatten")(x)
108 | 		x = Dense(512, activation=activation, name="fc1")(x)
109 | 		x = Dense(128, activation=activation, name="fc2")(x)
110 | 		x = Dense(classes, activation="softmax", name="prediction")(x)
111 | 
112 | 		model = Model(input, x, name="VGG19_Net")
113 | 
114 | 		model.summary()
115 | 
116 | 		return model
117 | 
118 | 
119 | 	@staticmethod
120 | 	def train():
121 | 
122 | 		print("\t Loading CIFAR-100 dataset ...")
123 | 		(train_d, train_l), (test_d, test_l) = cifar100.load_data()
124 | 
125 | 		train_l = to_categorical(train_l, num_classes=100).reshape((50000, 100))
126 | 		test_l = to_categorical(test_l, num_classes=100).reshape((10000, 100))
127 | 
128 | 		input_shape = train_d.shape[1:]
129 | 
130 | 		train_d = train_d.astype("float32") / 255
131 | 		test_d = test_d.astype("float32") / 255
132 | 
133 | 		model = VGG19_Net.build(input_shape= input_shape)
134 | 		model.compile(loss="categorical_crossentropy", optimizer=SGD(lr=0.0005), metrics=["accuracy"])
135 | 
136 | 		if not os.path.exists("models"):
137 | 			os.mkdir("models")
138 | 		best_weights = "models/best_weights_VGG16_CIFAR100_{val_acc:.4f}.h5"
139 | 
140 | 		save_best_model = ModelCheckpoint(best_weights, monitor="val_acc", verbose=1, save_best_only=True)
141 | 		early_stopping = EarlyStopping(monitor="val_acc", patience=200, verbose=1)
142 | 		reduce_lr = ReduceLROnPlateau(monitor="val_acc", factor=0.8, patience=100, verbose=1)
143 | 		callbacks = [reduce_lr, save_best_model, early_stopping]
144 | 
145 | 		print("\t Start training ...")
146 | 		train_history = model.fit(train_d, train_l, batch_size=128, epochs=10000, verbose=1,
147 | 		                          validation_data=(test_d, test_l), callbacks=callbacks)
148 | 
149 | 
150 | if __name__ == '__main__':
151 | 	VGG19_Net.train()


--------------------------------------------------------------------------------
/sample-code/nlp/token_nlp.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | import numpy as np
 3 | 
 4 | samples = ['The cat sat on the mat.', 'The dog ate my homework.']
 5 | 
 6 | token_index = {}
 7 | 
 8 | for sample in samples:
 9 | 	for word in sample.split():
10 | 		if word not in token_index:
11 | 			token_index[word] = len(token_index) + 1
12 | 
13 | max_length = 10
14 | 
15 | results = np.zeros(shape=(len(samples), max_length, max(token_index.values()) + 1))
16 | 
17 | for i, sample in enumerate(samples):
18 | 	for j, word in list(enumerate(sample.split()))[: max_length]:
19 | 		index = token_index.get(word)
20 | 		results[i, j, index] = 1.
21 | 
22 | print(results)
23 | 
24 | '''
25 | 
26 | from keras.preprocessing.text import Tokenizer
27 | samples = ['The cat sat on the mat.', 'The dog ate my homework.']
28 | 
29 | tokenizer = Tokenizer(num_words=1000)
30 | tokenizer.fit_on_texts(samples)
31 | 
32 | sequences = tokenizer.texts_to_sequences(samples)
33 | one_hot_results = tokenizer.texts_to_matrix(samples, mode='binary')
34 | 
35 | word_index = tokenizer.word_index
36 | print('Found %s unique tokens.'%len(word_index))


--------------------------------------------------------------------------------
/sample-code/object_detection/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorguo/Deep-Object-Detection/76c602d1ff344c28ce63bd84c8d19242a2411839/sample-code/object_detection/.DS_Store


--------------------------------------------------------------------------------
/sample-code/object_detection/faster_rcnn/faster_rcnn_open_image_dataset.py:
--------------------------------------------------------------------------------
 1 | import time, sys, os, random
 2 | import numpy as np
 3 | import pandas as pd
 4 | from skimage import io
 5 | from matplotlib import pyplot as plt
 6 | from shutil import copyfile
 7 | 
 8 | import cv2
 9 | import tensorflow as tf
10 | 
11 | base_path = 'Dataset/Open Images Dataset v4 (Bounding Boxes)'
12 | images_boxable_fname = 'train-images-boxable.csv'
13 | annotations_bbox_fname = 'train-annotations-bbox.csv'
14 | class_descriptions_fname = 'class-descriptions-boxable.csv'
15 | 
16 | images_boxable = pd.read_csv(os.path.join(base_path, images_boxable_fname))
17 | # images_boxable.head()
18 | annotations_bbox = pd.read_csv(os.path.join(base_path, annotations_bbox_fname))
19 | # annotations_bbox.head()
20 | class_descriptions = pd.read_csv(os.path.join(base_path, class_descriptions_fname))
21 | # class_descriptions.head()
22 | 
23 | 


--------------------------------------------------------------------------------
/sample-code/object_detection/faster_rcnn/faster_rcnn_train.py:
--------------------------------------------------------------------------------
   1 | 
   2 | import random, sys, pprint, time, pickle, math, copy, os
   3 | from optparse import OptionParser
   4 | import cv2
   5 | import numpy as np
   6 | import pandas as pd
   7 | from matplotlib import pyplot as plt
   8 | import tensorflow as tf
   9 | 
  10 | from sklearn.metrics import average_precision_score
  11 | 
  12 | from keras import backend as K
  13 | from keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, Dropout
  14 | from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, TimeDistributed
  15 | from keras.engine.topology import get_source_inputs
  16 | from keras.utils import layer_utils
  17 | from keras.utils.data_utils import get_file
  18 | from keras.objectives import categorical_crossentropy
  19 | 
  20 | from keras.models import Model
  21 | from keras.utils import generic_utils
  22 | from keras.engine import Layer, InputSpec
  23 | from keras import initializers, regularizers
  24 | 
  25 | class Config:
  26 | 
  27 | 	def __init__(self):
  28 | 
  29 | 		self.verbose = True
  30 | 		self.network = "vgg"
  31 | 
  32 | 		self.horizontal_flips = False
  33 | 		self.vertical_flips = False
  34 | 		self.rotate_90 = False
  35 | 
  36 | 		self.anchor_box_scales = [64, 128, 256]
  37 | 		self.anchor_box_ratios = [[1, 1], [1./math.sqrt(2), 2./math.sqrt(2)], [2./math.sqrt(2), 1./math.sqrt(2)]]
  38 | 
  39 | 		self.im_resize = 300
  40 | 
  41 | 		self.img_channel_mean = [103.939, 116.779, 123.68]
  42 | 		self.img_scaling_factor = 1.0
  43 | 
  44 | 		self.num_rois = 4
  45 | 
  46 | 		self.rpn_stride = 16
  47 | 
  48 | 		self.balanced_classes = False
  49 | 
  50 | 		self.std_scaling = 4.0
  51 | 		self.classifier_regressor_std = [8.0, 8.0, 4.0, 4.0]
  52 | 
  53 | 		self.rpn_min_iou = 0.3
  54 | 		self.rpn_max_iou = 0.7
  55 | 
  56 | 		self.classifier_min_overlap = 0.1
  57 | 		self.classifier_max_overlap = 0.5
  58 | 
  59 | 		self.class_mapping = None
  60 | 		self.model_path = None
  61 | 
  62 | 
  63 | def get_data(input_path):
  64 | 	'''
  65 | 	Parse data from annotation file
  66 | 	:param input_path: annotation file path
  67 | 	:return:
  68 | 		all_data: list(filepath, width, height, list(bboxes))
  69 | 		classes_count: dict{key- class_name: value- count_num} -- {'Car': 2383, 'Mobile phone': 1108, 'Person': 3745}
  70 | 		class_mapping: dict{key- class_name: value- idx} --  {'Car': 0, 'Mobile phone': 1, 'Person': 2}
  71 | 	'''
  72 | 	found_bg = False
  73 | 	all_imgs = {}
  74 | 	classes_count = {}
  75 | 	class_mapping = {}
  76 | 	visualise = True
  77 | 
  78 | 	i = 1
  79 | 	with open(input_path, "r") as f:
  80 | 		print('Parsing annotation files')
  81 | 		for line in f:
  82 | 			sys.stdout.write("\r"+"idx="+str(i))
  83 | 			i += 1
  84 | 			line_split = line.strip().split(",")
  85 | 			(filename, x1, y1, x2, y2, class_name) = line_split
  86 | 
  87 | 			if class_name not in classes_count:
  88 | 				classes_count[class_name] = 1
  89 | 			else:
  90 | 				classes_count[class_name] += 1
  91 | 
  92 | 			if class_name not in class_mapping:
  93 | 				if class_name == "bg" and found_bg == False:
  94 | 					print("class name is bg. Will be treated as background(hard negative minning).")
  95 | 					found_bg = True
  96 | 				class_mapping[class_name] = len(class_mapping)
  97 | 
  98 | 			if filename not in all_imgs:
  99 | 				all_imgs[filename] = {}
 100 | 
 101 | 				img = cv2.imread(filename)
 102 | 				(rows, cols) = img.shape[:2]
 103 | 
 104 | 				all_imgs[filename]["filepath"] = filename
 105 | 				all_imgs[filename]["width"] = cols
 106 | 				all_imgs[filename]["height"] = rows
 107 | 				all_imgs[filename]["bboxes"] = []
 108 | 
 109 | 			all_imgs[filename]["bboxes"].append({"class": class_name, "x1": int(x1), "x2": int(x2), "y1": int(y1), "y2": int(y2)})
 110 | 
 111 | 		all_data = []
 112 | 		for key in all_imgs:
 113 | 			all_data.append(all_imgs[key])
 114 | 
 115 | 		if found_bg:
 116 | 			if class_mapping["bg"] != len(class_mapping) - 1:
 117 | 				key_to_switch = [key for key in class_mapping.keys() if class_mapping[key] == len(class_mapping) - 1][0]
 118 | 				val_to_switch = class_mapping["bg"]
 119 | 				class_mapping["bg"] = len(class_mapping) - 1
 120 | 				class_mapping[key_to_switch] = val_to_switch
 121 | 
 122 | 		return all_data, classes_count, class_mapping
 123 | 
 124 | 
 125 | class RoiPoolingConv(Layer):
 126 | 
 127 | 	def __init__(self, pool_size, num_rois, **kwargs):
 128 | 
 129 | 		self.dim_ordering = K.image_dim_ordering()
 130 | 		self.pool_size = pool_size
 131 | 		self.num_rois = num_rois
 132 | 
 133 | 		super(RoiPoolingConv, self).__init__(**kwargs)
 134 | 
 135 | 	def build(self, input_shape):
 136 | 		self.nb_channels = input_shape[0][3]
 137 | 
 138 | 	def compute_output_shape(self, input_shape):
 139 | 		return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels
 140 | 
 141 | 	def call(self, x, mask=None):
 142 | 
 143 | 		assert (len(x) == 2)
 144 | 
 145 | 		img = x[0]
 146 | 		rois = x[1]
 147 | 		input_shape = K.shape(img)
 148 | 
 149 | 		outputs = []
 150 | 
 151 | 		for roi_idx in range(self.num_rois):
 152 | 
 153 | 			x = rois[0, roi_idx, 0]
 154 | 			y = rois[0, roi_idx, 1]
 155 | 			w = rois[0, roi_idx, 2]
 156 | 			h = rois[0, roi_idx, 3]
 157 | 
 158 | 			x = K.cast(x, 'int32')
 159 | 			y = K.cast(y, 'int32')
 160 | 			w = K.cast(w, 'int32')
 161 | 			h = K.cast(h, 'int32')
 162 | 
 163 | 			rs = tf.image.resize_images(img[:, y: y + h, x: x + w, :], self.pool_size, self.pool_size)
 164 | 			outputs.append(rs)
 165 | 
 166 | 		final_output = K.concatenate(outputs, axis=0)
 167 | 		final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))
 168 | 
 169 | 		return final_output
 170 | 
 171 | 
 172 | 	def get_config(self):
 173 | 		config = {'pool_size': self.pool_size,
 174 | 		          'num_rois': self.num_rois}
 175 | 		base_config = super(RoiPoolingConv, self).get_config()
 176 | 		return dict(list(base_config.items()) + list(config.items()))
 177 | 
 178 | 
 179 | # define VGG network
 180 | 
 181 | 
 182 | def rpn_layer(base_layers, num_anchors):
 183 | 	x = Conv2D(512, (3, 3), padding="same", activation="relu", kernel_initializer="normal", name="rpn_conv1")(base_layers)
 184 | 	x_class = Conv2D(num_anchors, (1, 1), activation="sigmoid", kernel_initializer="uniform", name="rpn_out_class")(x)
 185 | 	x_regressor = Conv2D(num_anchors, (1, 1), activation="linear", kernel_initializer="zero", name="rpn_out_regessor")(x)
 186 | 	return [x_class, x_regressor, base_layers]
 187 | 
 188 | 
 189 | def classifier_layer(base_layers, input_rois, num_rois, nb_classes = 4):
 190 | 	input_shape = (num_rois, 7, 7, 512)
 191 | 	pooling_regions = 7
 192 | 	out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois])
 193 | 
 194 | 	out = TimeDistributed(Flatten(name='flatten'))(out_roi_pool)
 195 | 	out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out)
 196 | 	out = TimeDistributed(Dropout(0.5))(out)
 197 | 	out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out)
 198 | 	out = TimeDistributed(Dropout(0.5))(out)
 199 | 
 200 | 	out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'),
 201 | 	                            name='dense_class_{}'.format(nb_classes))(out)
 202 | 	out_regressor = TimeDistributed(Dense(4 * (nb_classes - 1), activation='linear', kernel_initializer='zero'),
 203 | 	                                name='dense_regressor_{}'.format(nb_classes))(out)
 204 | 
 205 | 	return [out_class, out_regressor]
 206 | 
 207 | 
 208 | def union(au, bu, area_intersection):
 209 | 	area_a = (au[2]-au[0]) * (au[3]-au[1])
 210 | 	area_b = (bu[2]-bu[0]) * (bu[3]-bu[1])
 211 | 	area_union = area_a + area_b - area_intersection
 212 | 	return area_union
 213 | 
 214 | def intersection(ai, bi):
 215 | 	x = max(ai[0], bi[0])
 216 | 	y = max(ai[1], bi[1])
 217 | 	w = min(ai[2], bi[2]) - x
 218 | 	h = min(ai[3], bi[3]) - y
 219 | 	if w < 0 or h < 0:
 220 | 		return 0
 221 | 	return w * h
 222 | 
 223 | def iou(a, b):
 224 | 	if a[0] >= a[2] or a[1] >= a[3] or b[0] >= b[2] or b[1] >= b[3]:
 225 | 		return 0.0
 226 | 	area_i = intersection(a, b)
 227 | 	area_u = union(a, b, area_i)
 228 | 	return float(area_i) / float(area_u + 1e-6)
 229 | 
 230 | 
 231 | # Calculate the rpn for all anchors of all images
 232 | def calc_rpn(C, img_data, width, height, resized_width, resized_height, img_length_calc_function):
 233 | 	"""(Important part!) Calculate the rpn for all anchors
 234 | 			If feature map has shape 38x50=1900, there are 1900x9=17100 potential anchors
 235 | 
 236 | 		Args:
 237 | 			C: config
 238 | 			img_data: augmented image data
 239 | 			width: original image width (e.g. 600)
 240 | 			height: original image height (e.g. 800)
 241 | 			resized_width: resized image width according to C.im_size (e.g. 300)
 242 | 			resized_height: resized image height according to C.im_size (e.g. 400)
 243 | 			img_length_calc_function: function to calculate final layer's feature map (of base model) size according to input image size
 244 | 
 245 | 		Returns:
 246 | 			y_rpn_cls: list(num_bboxes, y_is_box_valid + y_rpn_overlap)
 247 | 				y_is_box_valid: 0 or 1 (0 means the box is invalid, 1 means the box is valid)
 248 | 				y_rpn_overlap: 0 or 1 (0 means the box is not an object, 1 means the box is an object)
 249 | 			y_rpn_regr: list(num_bboxes, 4*y_rpn_overlap + y_rpn_regr)
 250 | 				y_rpn_regr: x1,y1,x2,y2 bunding boxes coordinates
 251 | 	"""
 252 | 	downscale = float(C.rpn_stride)
 253 | 	anchor_sizes = C.anchor_box_scales  # 128, 256, 512
 254 | 	anchor_ratios = C.anchor_box_ratios # 1:1, 1:2*sqrt(2), 2*sqrt(2):1
 255 | 	num_anchors = len(anchor_sizes) * len(anchor_ratios) # 3x3=9
 256 | 
 257 | 	# calculate the output map size based on the network architecture
 258 | 	(output_width, output_height) = img_length_calc_function(resized_width, resized_height)
 259 | 
 260 | 	n_anchratios = len(anchor_ratios)  # 3
 261 | 
 262 | 	# initialise empty output objectives
 263 | 	y_rpn_overlap = np.zeros((output_height, output_width, num_anchors))
 264 | 	y_is_box_valid = np.zeros((output_height, output_width, num_anchors))
 265 | 	y_rpn_regressor = np.zeros((output_height, output_width, num_anchors * 4))
 266 | 
 267 | 	num_bboxes = len(img_data['bboxes'])
 268 | 	num_anchors_for_bbox = np.zeros(num_bboxes).astype(int)
 269 | 
 270 | 	best_anchor_for_bbox = -1*np.ones((num_bboxes, 4)).astype(int)
 271 | 	best_iou_for_bbox = np.zeros(num_bboxes).astype(np.float32)
 272 | 	best_x_for_bbox = np.zeros((num_bboxes, 4)).astype(int)
 273 | 	best_dx_for_bbox = np.zeros((num_bboxes, 4)).astype(np.float32)
 274 | 
 275 | 	# get the GT box coordinates, and resize to account for image resizing
 276 | 	gta = np.zeros((num_bboxes, 4))
 277 | 	for bbox_num, bbox in enumerate(img_data['bboxes']):
 278 | 		# get the GT box coordinates, and resize to account for image resizing
 279 | 		gta[bbox_num, 0] = bbox['x1'] * (resized_width / float(width))
 280 | 		gta[bbox_num, 1] = bbox['x2'] * (resized_width / float(width))
 281 | 		gta[bbox_num, 2] = bbox['y1'] * (resized_height / float(height))
 282 | 		gta[bbox_num, 3] = bbox['y2'] * (resized_height / float(height))
 283 | 
 284 | 	# rpn ground truth
 285 | 	for anchor_sizes_idx in range(len(anchor_sizes)):
 286 | 		for anchor_ratios_idx in range(n_anchratios):
 287 | 			anchor_x = anchor_sizes[anchor_sizes_idx] * anchor_ratios[anchor_ratios_idx]
 288 | 			anchor_y = anchor_sizes[anchor_sizes_idx] * anchor_ratios[anchor_ratios_idx]
 289 | 
 290 | 			for ix in range(output_width):
 291 | 				# x-coordinates of the current anchor box
 292 | 				x1_anc = downscale * (ix + 0.5) - anchor_x / 2
 293 | 				x2_anc = downscale * (ix + 0.5) + anchor_x / 2
 294 | 
 295 | 				# ignore boxes that go across image boundaries
 296 | 				if x1_anc <0 or x2_anc > resized_width:
 297 | 					continue
 298 | 
 299 | 				for jy in range(output_height):
 300 | 					# y-coordinates of the current anchor box
 301 | 					y1_anc = downscale * (jy + 0.5) - anchor_y / 2
 302 | 					y2_anc = downscale * (jy + 0.5) + anchor_y / 2
 303 | 
 304 | 					# ignore boxes that go across image boundaries
 305 | 					if y1_anc < 0 or y2_anc > resized_height:
 306 | 						continue
 307 | 
 308 | 					# bbox_type indicates whether an anchor should be a target
 309 | 					# Initialize with 'negative'
 310 | 					bbox_type = 'neg'
 311 | 					# this is the best IOU for the (x,y) coord and the current anchor
 312 | 					# note that this is different from the best IOU for a GT bbox
 313 | 					best_iou_for_loc = 0.0
 314 | 					for bbox_num in range(num_bboxes):
 315 | 						# get IOU of the current GT box and the current anchor box
 316 | 						curr_iou = iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]],
 317 | 						               [x1_anc, y1_anc, x2_anc, y2_anc])
 318 | 						# calculate the regression targets if they will be needed
 319 | 						if curr_iou > best_iou_for_bbox[bbox_num] or curr_iou > C.rpn_max_overlap:
 320 | 							cx = (gta[bbox_num, 0] + gta[bbox_num, 1]) / 2.0
 321 | 							cy = (gta[bbox_num, 2] + gta[bbox_num, 3]) / 2.0
 322 | 							cxa = (x1_anc + x2_anc) / 2.0
 323 | 							cya = (y1_anc + y2_anc) / 2.0
 324 | 
 325 | 							# x,y are the center point of ground-truth bbox
 326 | 							# xa,ya are the center point of anchor bbox (xa=downscale * (ix + 0.5); ya=downscale * (iy+0.5))
 327 | 							# w,h are the width and height of ground-truth bbox
 328 | 							# wa,ha are the width and height of anchor bboxe
 329 | 							# tx = (x - xa) / wa
 330 | 							# ty = (y - ya) / ha
 331 | 							# tw = log(w / wa)
 332 | 							# th = log(h / ha)
 333 | 							tx = (cx - cxa) / (x2_anc - x1_anc)
 334 | 							ty = (cy - cya) / (y2_anc - y1_anc)
 335 | 							tw = np.log((gta[bbox_num, 1] - gta[bbox_num, 0]) / (x2_anc - x1_anc))
 336 | 							th = np.log((gta[bbox_num, 3] - gta[bbox_num, 2]) / (y2_anc - y1_anc))
 337 | 
 338 | 						if img_data['bboxes'][bbox_num]['class'] != 'bg':
 339 | 
 340 | 							# all GT boxes should be mapped to an anchor box, so we keep track of which anchor box was best
 341 | 							if curr_iou > best_iou_for_bbox[bbox_num]:
 342 | 								best_anchor_for_bbox[bbox_num] = [jy, ix, anchor_ratio_idx, anchor_size_idx]
 343 | 								best_iou_for_bbox[bbox_num] = curr_iou
 344 | 								best_x_for_bbox[bbox_num, :] = [x1_anc, x2_anc, y1_anc, y2_anc]
 345 | 								best_dx_for_bbox[bbox_num, :] = [tx, ty, tw, th]
 346 | 
 347 | 							# we set the anchor to positive if the IOU is >0.7 (it does not matter if there was another better box, it just indicates overlap)
 348 | 							if curr_iou > C.rpn_max_overlap:
 349 | 								bbox_type = 'pos'
 350 | 								num_anchors_for_bbox[bbox_num] += 1
 351 | 								# we update the regression layer target if this IOU is the best for the current (x,y) and anchor position
 352 | 								if curr_iou > best_iou_for_loc:
 353 | 									best_iou_for_loc = curr_iou
 354 | 									best_regr = (tx, ty, tw, th)
 355 | 
 356 | 							# if the IOU is >0.3 and <0.7, it is ambiguous and no included in the objective
 357 | 							if C.rpn_min_overlap < curr_iou < C.rpn_max_overlap:
 358 | 								# gray zone between neg and pos
 359 | 								if bbox_type != 'pos':
 360 | 									bbox_type = 'neutral'
 361 | 
 362 | 					# turn on or off outputs depending on IOUs
 363 | 					if bbox_type == 'neg':
 364 | 						y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1
 365 | 						y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0
 366 | 					elif bbox_type == 'neutral':
 367 | 						y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0
 368 | 						y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0
 369 | 					elif bbox_type == 'pos':
 370 | 						y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1
 371 | 						y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1
 372 | 						start = 4 * (anchor_ratio_idx + n_anchratios * anchor_size_idx)
 373 | 						y_rpn_regr[jy, ix, start:start + 4] = best_regr
 374 | 
 375 | 
 376 | 	# we ensure that every bbox has at least one positive RPN region
 377 | 
 378 | 	for idx in range(num_anchors_for_bbox.shape[0]):
 379 | 		if num_anchors_for_bbox[idx] == 0:
 380 | 			# no box with an IOU greater than zero ...
 381 | 			if best_anchor_for_bbox[idx, 0] == -1:
 382 | 				continue
 383 | 			y_is_box_valid[
 384 | 				best_anchor_for_bbox[idx, 0], best_anchor_for_bbox[idx, 1], best_anchor_for_bbox[idx, 2] + n_anchratios *
 385 | 				best_anchor_for_bbox[idx, 3]] = 1
 386 | 			y_rpn_overlap[
 387 | 				best_anchor_for_bbox[idx, 0], best_anchor_for_bbox[idx, 1], best_anchor_for_bbox[idx, 2] + n_anchratios *
 388 | 				best_anchor_for_bbox[idx, 3]] = 1
 389 | 			start = 4 * (best_anchor_for_bbox[idx, 2] + n_anchratios * best_anchor_for_bbox[idx, 3])
 390 | 			y_rpn_regr[
 391 | 			best_anchor_for_bbox[idx, 0], best_anchor_for_bbox[idx, 1], start:start + 4] = best_dx_for_bbox[idx, :]
 392 | 
 393 | 	y_rpn_overlap = np.transpose(y_rpn_overlap, (2, 0, 1))
 394 | 	y_rpn_overlap = np.expand_dims(y_rpn_overlap, axis=0)
 395 | 
 396 | 	y_is_box_valid = np.transpose(y_is_box_valid, (2, 0, 1))
 397 | 	y_is_box_valid = np.expand_dims(y_is_box_valid, axis=0)
 398 | 
 399 | 	y_rpn_regr = np.transpose(y_rpn_regr, (2, 0, 1))
 400 | 	y_rpn_regr = np.expand_dims(y_rpn_regr, axis=0)
 401 | 
 402 | 	pos_locs = np.where(np.logical_and(y_rpn_overlap[0, :, :, :] == 1, y_is_box_valid[0, :, :, :] == 1))
 403 | 	neg_locs = np.where(np.logical_and(y_rpn_overlap[0, :, :, :] == 0, y_is_box_valid[0, :, :, :] == 1))
 404 | 
 405 | 	num_pos = len(pos_locs[0])
 406 | 
 407 | 	# one issue is that the RPN has many more negative than positive regions, so we turn off some of the negative
 408 | 	# regions. We also limit it to 256 regions.
 409 | 	num_regions = 256
 410 | 
 411 | 	if len(pos_locs[0]) > num_regions / 2:
 412 | 		val_locs = random.sample(range(len(pos_locs[0])), len(pos_locs[0]) - num_regions / 2)
 413 | 		y_is_box_valid[0, pos_locs[0][val_locs], pos_locs[1][val_locs], pos_locs[2][val_locs]] = 0
 414 | 		num_pos = num_regions / 2
 415 | 
 416 | 	if len(neg_locs[0]) + num_pos > num_regions:
 417 | 		val_locs = random.sample(range(len(neg_locs[0])), len(neg_locs[0]) - num_pos)
 418 | 		y_is_box_valid[0, neg_locs[0][val_locs], neg_locs[1][val_locs], neg_locs[2][val_locs]] = 0
 419 | 
 420 | 	y_rpn_cls = np.concatenate([y_is_box_valid, y_rpn_overlap], axis=1)
 421 | 	y_rpn_regr = np.concatenate([np.repeat(y_rpn_overlap, 4, axis=1), y_rpn_regr], axis=1)
 422 | 
 423 | 	return np.copy(y_rpn_cls), np.copy(y_rpn_regr), num_pos
 424 | 
 425 | 
 426 | # Get new image size and augment the image
 427 | def get_new_img_size(width, height, img_min_side=300):
 428 | 	if width <= height:
 429 | 		f = float(img_min_side) / width
 430 | 		resized_height = int(f * height)
 431 | 		resized_width = img_min_side
 432 | 	else:
 433 | 		f = float(img_min_side) / height
 434 | 		resized_width = int(f * width)
 435 | 		resized_height = img_min_side
 436 | 
 437 | 	return resized_width, resized_height
 438 | 
 439 | def augment(img_data, config, augment=True):
 440 | 	assert 'filepath' in img_data
 441 | 	assert 'bboxes' in img_data
 442 | 	assert 'width' in img_data
 443 | 	assert 'height' in img_data
 444 | 
 445 | 	img_data_aug = copy.deepcopy(img_data)
 446 | 
 447 | 	img = cv2.imread(img_data_aug['filepath'])
 448 | 
 449 | 	if augment:
 450 | 		rows, cols = img.shape[:2]
 451 | 
 452 | 		if config.use_horizontal_flips and np.random.randint(0, 2) == 0:
 453 | 			img = cv2.flip(img, 1)
 454 | 			for bbox in img_data_aug['bboxes']:
 455 | 				x1 = bbox['x1']
 456 | 				x2 = bbox['x2']
 457 | 				bbox['x2'] = cols - x1
 458 | 				bbox['x1'] = cols - x2
 459 | 
 460 | 		if config.use_vertical_flips and np.random.randint(0, 2) == 0:
 461 | 			img = cv2.flip(img, 0)
 462 | 			for bbox in img_data_aug['bboxes']:
 463 | 				y1 = bbox['y1']
 464 | 				y2 = bbox['y2']
 465 | 				bbox['y2'] = rows - y1
 466 | 				bbox['y1'] = rows - y2
 467 | 
 468 | 		if config.rot_90:
 469 | 			angle = np.random.choice([0,90,180,270],1)[0]
 470 | 			if angle == 270:
 471 | 				img = np.transpose(img, (1,0,2))
 472 | 				img = cv2.flip(img, 0)
 473 | 			elif angle == 180:
 474 | 				img = cv2.flip(img, -1)
 475 | 			elif angle == 90:
 476 | 				img = np.transpose(img, (1,0,2))
 477 | 				img = cv2.flip(img, 1)
 478 | 			elif angle == 0:
 479 | 				pass
 480 | 
 481 | 			for bbox in img_data_aug['bboxes']:
 482 | 				x1 = bbox['x1']
 483 | 				x2 = bbox['x2']
 484 | 				y1 = bbox['y1']
 485 | 				y2 = bbox['y2']
 486 | 				if angle == 270:
 487 | 					bbox['x1'] = y1
 488 | 					bbox['x2'] = y2
 489 | 					bbox['y1'] = cols - x2
 490 | 					bbox['y2'] = cols - x1
 491 | 				elif angle == 180:
 492 | 					bbox['x2'] = cols - x1
 493 | 					bbox['x1'] = cols - x2
 494 | 					bbox['y2'] = rows - y1
 495 | 					bbox['y1'] = rows - y2
 496 | 				elif angle == 90:
 497 | 					bbox['x1'] = rows - y2
 498 | 					bbox['x2'] = rows - y1
 499 | 					bbox['y1'] = x1
 500 | 					bbox['y2'] = x2
 501 | 				elif angle == 0:
 502 | 					pass
 503 | 
 504 | 	img_data_aug['width'] = img.shape[1]
 505 | 	img_data_aug['height'] = img.shape[0]
 506 | 	return img_data_aug, img
 507 | 
 508 | 
 509 | # Generate the ground_truth anchors
 510 | def get_anchor_gt(all_img_data, C, img_length_calc_function, mode='train'):
 511 | 	""" Yield the ground-truth anchors as Y (labels)
 512 | 
 513 | 	Args:
 514 | 		all_img_data: list(filepath, width, height, list(bboxes))
 515 | 		C: config
 516 | 		img_length_calc_function: function to calculate final layer's feature map (of base model) size according to input image size
 517 | 		mode: 'train' or 'test'; 'train' mode need augmentation
 518 | 
 519 | 	Returns:
 520 | 		x_img: image data after resized and scaling (smallest size = 300px)
 521 | 		Y: [y_rpn_cls, y_rpn_regr]
 522 | 		img_data_aug: augmented image data (original image with augmentation)
 523 | 		debug_img: show image for debug
 524 | 		num_pos: show number of positive anchors for debug
 525 | 	"""
 526 | 	while True:
 527 | 
 528 | 		for img_data in all_img_data:
 529 | 			try:
 530 | 
 531 | 				# read in image, and optionally add augmentation
 532 | 
 533 | 				if mode == 'train':
 534 | 					img_data_aug, x_img = augment(img_data, C, augment=True)
 535 | 				else:
 536 | 					img_data_aug, x_img = augment(img_data, C, augment=False)
 537 | 
 538 | 				(width, height) = (img_data_aug['width'], img_data_aug['height'])
 539 | 				(rows, cols, _) = x_img.shape
 540 | 
 541 | 				assert cols == width
 542 | 				assert rows == height
 543 | 
 544 | 				# get image dimensions for resizing
 545 | 				(resized_width, resized_height) = get_new_img_size(width, height, C.im_size)
 546 | 
 547 | 				# resize the image so that smalles side is length = 300px
 548 | 				x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
 549 | 				debug_img = x_img.copy()
 550 | 
 551 | 				try:
 552 | 					y_rpn_cls, y_rpn_regr, num_pos = calc_rpn(C, img_data_aug, width, height, resized_width,
 553 | 					                                          resized_height, img_length_calc_function)
 554 | 				except:
 555 | 					continue
 556 | 
 557 | 				# Zero-center by mean pixel, and preprocess image
 558 | 
 559 | 				x_img = x_img[:, :, (2, 1, 0)]  # BGR -> RGB
 560 | 				x_img = x_img.astype(np.float32)
 561 | 				x_img[:, :, 0] -= C.img_channel_mean[0]
 562 | 				x_img[:, :, 1] -= C.img_channel_mean[1]
 563 | 				x_img[:, :, 2] -= C.img_channel_mean[2]
 564 | 				x_img /= C.img_scaling_factor
 565 | 
 566 | 				x_img = np.transpose(x_img, (2, 0, 1))
 567 | 				x_img = np.expand_dims(x_img, axis=0)
 568 | 
 569 | 				y_rpn_regr[:, y_rpn_regr.shape[1] // 2:, :, :] *= C.std_scaling
 570 | 
 571 | 				x_img = np.transpose(x_img, (0, 2, 3, 1))
 572 | 				y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 1))
 573 | 				y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 1))
 574 | 
 575 | 				yield np.copy(x_img), [np.copy(y_rpn_cls), np.copy(y_rpn_regr)], img_data_aug, debug_img, num_pos
 576 | 
 577 | 			except Exception as e:
 578 | 				print(e)
 579 | 				continue
 580 | 
 581 | 
 582 | # Define loss functions for all four outputs
 583 | lambda_rpn_regr = 1.0
 584 | lambda_rpn_class = 1.0
 585 | 
 586 | lambda_cls_regr = 1.0
 587 | lambda_cls_class = 1.0
 588 | 
 589 | epsilon = 1e-4
 590 | 
 591 | def rpn_loss_regr(num_anchors):
 592 |     """Loss function for rpn regression
 593 |     Args:
 594 |         num_anchors: number of anchors (9 in here)
 595 |     Returns:
 596 |         Smooth L1 loss function
 597 |                            0.5*x*x (if x_abs < 1)
 598 |                            x_abx - 0.5 (otherwise)
 599 |     """
 600 |     def rpn_loss_regr_fixed_num(y_true, y_pred):
 601 | 
 602 |         # x is the difference between true value and predicted vaue
 603 |         x = y_true[:, :, :, 4 * num_anchors:] - y_pred
 604 | 
 605 |         # absolute value of x
 606 |         x_abs = K.abs(x)
 607 | 
 608 |         # If x_abs <= 1.0, x_bool = 1
 609 |         x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32)
 610 | 
 611 |         return lambda_rpn_regr * K.sum(
 612 |             y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors])
 613 | 
 614 |     return rpn_loss_regr_fixed_num
 615 | 
 616 | 
 617 | def rpn_loss_cls(num_anchors):
 618 |     """Loss function for rpn classification
 619 |     Args:
 620 |         num_anchors: number of anchors (9 in here)
 621 |         y_true[:, :, :, :9]: [0,1,0,0,0,0,0,1,0] means only the second and the eighth box is valid which contains pos or neg anchor => isValid
 622 |         y_true[:, :, :, 9:]: [0,1,0,0,0,0,0,0,0] means the second box is pos and eighth box is negative
 623 |     Returns:
 624 |         lambda * sum((binary_crossentropy(isValid*y_pred,y_true))) / N
 625 |     """
 626 |     def rpn_loss_cls_fixed_num(y_true, y_pred):
 627 | 
 628 |             return lambda_rpn_class * K.sum(y_true[:, :, :, :num_anchors] * K.binary_crossentropy(y_pred[:, :, :, :], y_true[:, :, :, num_anchors:])) / K.sum(epsilon + y_true[:, :, :, :num_anchors])
 629 | 
 630 |     return rpn_loss_cls_fixed_num
 631 | 
 632 | 
 633 | def class_loss_regr(num_classes):
 634 |     """Loss function for rpn regression
 635 |     Args:
 636 |         num_anchors: number of anchors (9 in here)
 637 |     Returns:
 638 |         Smooth L1 loss function
 639 |                            0.5*x*x (if x_abs < 1)
 640 |                            x_abx - 0.5 (otherwise)
 641 |     """
 642 |     def class_loss_regr_fixed_num(y_true, y_pred):
 643 |         x = y_true[:, :, 4*num_classes:] - y_pred
 644 |         x_abs = K.abs(x)
 645 |         x_bool = K.cast(K.less_equal(x_abs, 1.0), 'float32')
 646 |         return lambda_cls_regr * K.sum(y_true[:, :, :4*num_classes] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :4*num_classes])
 647 |     return class_loss_regr_fixed_num
 648 | 
 649 | 
 650 | def class_loss_cls(y_true, y_pred):
 651 |     return lambda_cls_class * K.mean(categorical_crossentropy(y_true[0, :, :], y_pred[0, :, :]))
 652 | 
 653 | 
 654 | def non_max_suppression_fast(boxes, probs, overlap_thresh=0.9, max_boxes=300):
 655 | 	# code used from here: http://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
 656 | 	# if there are no boxes, return an empty list
 657 | 
 658 | 	# Process explanation:
 659 | 	#   Step 1: Sort the probs list
 660 | 	#   Step 2: Find the larget prob 'Last' in the list and save it to the pick list
 661 | 	#   Step 3: Calculate the IoU with 'Last' box and other boxes in the list. If the IoU is larger than overlap_threshold, delete the box from list
 662 | 	#   Step 4: Repeat step 2 and step 3 until there is no item in the probs list
 663 | 	if len(boxes) == 0:
 664 | 		return []
 665 | 
 666 | 	# grab the coordinates of the bounding boxes
 667 | 	x1 = boxes[:, 0]
 668 | 	y1 = boxes[:, 1]
 669 | 	x2 = boxes[:, 2]
 670 | 	y2 = boxes[:, 3]
 671 | 
 672 | 	np.testing.assert_array_less(x1, x2)
 673 | 	np.testing.assert_array_less(y1, y2)
 674 | 
 675 | 	# if the bounding boxes integers, convert them to floats --
 676 | 	# this is important since we'll be doing a bunch of divisions
 677 | 	if boxes.dtype.kind == "i":
 678 | 		boxes = boxes.astype("float")
 679 | 
 680 | 	# initialize the list of picked indexes
 681 | 	pick = []
 682 | 
 683 | 	# calculate the areas
 684 | 	area = (x2 - x1) * (y2 - y1)
 685 | 
 686 | 	# sort the bounding boxes
 687 | 	idxs = np.argsort(probs)
 688 | 
 689 | 	# keep looping while some indexes still remain in the indexes
 690 | 	# list
 691 | 	while len(idxs) > 0:
 692 | 		# grab the last index in the indexes list and add the
 693 | 		# index value to the list of picked indexes
 694 | 		last = len(idxs) - 1
 695 | 		i = idxs[last]
 696 | 		pick.append(i)
 697 | 
 698 | 		# find the intersection
 699 | 
 700 | 		xx1_int = np.maximum(x1[i], x1[idxs[:last]])
 701 | 		yy1_int = np.maximum(y1[i], y1[idxs[:last]])
 702 | 		xx2_int = np.minimum(x2[i], x2[idxs[:last]])
 703 | 		yy2_int = np.minimum(y2[i], y2[idxs[:last]])
 704 | 
 705 | 		ww_int = np.maximum(0, xx2_int - xx1_int)
 706 | 		hh_int = np.maximum(0, yy2_int - yy1_int)
 707 | 
 708 | 		area_int = ww_int * hh_int
 709 | 
 710 | 		# find the union
 711 | 		area_union = area[i] + area[idxs[:last]] - area_int
 712 | 
 713 | 		# compute the ratio of overlap
 714 | 		overlap = area_int / (area_union + 1e-6)
 715 | 
 716 | 		# delete all indexes from the index list that have
 717 | 		idxs = np.delete(idxs, np.concatenate(([last],
 718 | 		                                       np.where(overlap > overlap_thresh)[0])))
 719 | 
 720 | 		if len(pick) >= max_boxes:
 721 | 			break
 722 | 
 723 | 	# return only the bounding boxes that were picked using the integer data type
 724 | 	boxes = boxes[pick].astype("int")
 725 | 	probs = probs[pick]
 726 | 	return boxes, probs
 727 | 
 728 | 
 729 | def apply_regr_np(X, T):
 730 | 	"""Apply regression layer to all anchors in one feature map
 731 | 
 732 | 	Args:
 733 | 		X: shape=(4, 18, 25) the current anchor type for all points in the feature map
 734 | 		T: regression layer shape=(4, 18, 25)
 735 | 
 736 | 	Returns:
 737 | 		X: regressed position and size for current anchor
 738 | 	"""
 739 | 	try:
 740 | 		x = X[0, :, :]
 741 | 		y = X[1, :, :]
 742 | 		w = X[2, :, :]
 743 | 		h = X[3, :, :]
 744 | 
 745 | 		tx = T[0, :, :]
 746 | 		ty = T[1, :, :]
 747 | 		tw = T[2, :, :]
 748 | 		th = T[3, :, :]
 749 | 
 750 | 		cx = x + w / 2.
 751 | 		cy = y + h / 2.
 752 | 		cx1 = tx * w + cx
 753 | 		cy1 = ty * h + cy
 754 | 
 755 | 		w1 = np.exp(tw.astype(np.float64)) * w
 756 | 		h1 = np.exp(th.astype(np.float64)) * h
 757 | 		x1 = cx1 - w1 / 2.
 758 | 		y1 = cy1 - h1 / 2.
 759 | 
 760 | 		x1 = np.round(x1)
 761 | 		y1 = np.round(y1)
 762 | 		w1 = np.round(w1)
 763 | 		h1 = np.round(h1)
 764 | 		return np.stack([x1, y1, w1, h1])
 765 | 	except Exception as e:
 766 | 		print(e)
 767 | 		return X
 768 | 
 769 | 
 770 | def apply_regr(x, y, w, h, tx, ty, tw, th):
 771 | 	# Apply regression to x, y, w and h
 772 | 	try:
 773 | 		cx = x + w / 2.
 774 | 		cy = y + h / 2.
 775 | 		cx1 = tx * w + cx
 776 | 		cy1 = ty * h + cy
 777 | 		w1 = math.exp(tw) * w
 778 | 		h1 = math.exp(th) * h
 779 | 		x1 = cx1 - w1 / 2.
 780 | 		y1 = cy1 - h1 / 2.
 781 | 		x1 = int(round(x1))
 782 | 		y1 = int(round(y1))
 783 | 		w1 = int(round(w1))
 784 | 		h1 = int(round(h1))
 785 | 
 786 | 		return x1, y1, w1, h1
 787 | 
 788 | 	except ValueError:
 789 | 		return x, y, w, h
 790 | 	except OverflowError:
 791 | 		return x, y, w, h
 792 | 	except Exception as e:
 793 | 		print(e)
 794 | 		return x, y, w, h
 795 | 
 796 | 
 797 | def calc_iou(R, img_data, C, class_mapping):
 798 | 	"""Converts from (x1,y1,x2,y2) to (x,y,w,h) format
 799 | 
 800 | 	Args:
 801 | 		R: bboxes, probs
 802 | 	"""
 803 | 	bboxes = img_data['bboxes']
 804 | 	(width, height) = (img_data['width'], img_data['height'])
 805 | 	# get image dimensions for resizing
 806 | 	(resized_width, resized_height) = get_new_img_size(width, height, C.im_size)
 807 | 
 808 | 	gta = np.zeros((len(bboxes), 4))
 809 | 
 810 | 	for bbox_num, bbox in enumerate(bboxes):
 811 | 		# get the GT box coordinates, and resize to account for image resizing
 812 | 		# gta[bbox_num, 0] = (40 * (600 / 800)) / 16 = int(round(1.875)) = 2 (x in feature map)
 813 | 		gta[bbox_num, 0] = int(round(bbox['x1'] * (resized_width / float(width)) / C.rpn_stride))
 814 | 		gta[bbox_num, 1] = int(round(bbox['x2'] * (resized_width / float(width)) / C.rpn_stride))
 815 | 		gta[bbox_num, 2] = int(round(bbox['y1'] * (resized_height / float(height)) / C.rpn_stride))
 816 | 		gta[bbox_num, 3] = int(round(bbox['y2'] * (resized_height / float(height)) / C.rpn_stride))
 817 | 
 818 | 	x_roi = []
 819 | 	y_class_num = []
 820 | 	y_class_regr_coords = []
 821 | 	y_class_regr_label = []
 822 | 	IoUs = []  # for debugging only
 823 | 
 824 | 	# R.shape[0]: number of bboxes (=300 from non_max_suppression)
 825 | 	for ix in range(R.shape[0]):
 826 | 		(x1, y1, x2, y2) = R[ix, :]
 827 | 		x1 = int(round(x1))
 828 | 		y1 = int(round(y1))
 829 | 		x2 = int(round(x2))
 830 | 		y2 = int(round(y2))
 831 | 
 832 | 		best_iou = 0.0
 833 | 		best_bbox = -1
 834 | 		# Iterate through all the ground-truth bboxes to calculate the iou
 835 | 		for bbox_num in range(len(bboxes)):
 836 | 			curr_iou = iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]], [x1, y1, x2, y2])
 837 | 
 838 | 			# Find out the corresponding ground-truth bbox_num with larget iou
 839 | 			if curr_iou > best_iou:
 840 | 				best_iou = curr_iou
 841 | 				best_bbox = bbox_num
 842 | 
 843 | 		if best_iou < C.classifier_min_overlap:
 844 | 			continue
 845 | 		else:
 846 | 			w = x2 - x1
 847 | 			h = y2 - y1
 848 | 			x_roi.append([x1, y1, w, h])
 849 | 			IoUs.append(best_iou)
 850 | 
 851 | 			if C.classifier_min_overlap <= best_iou < C.classifier_max_overlap:
 852 | 				# hard negative example
 853 | 				cls_name = 'bg'
 854 | 			elif C.classifier_max_overlap <= best_iou:
 855 | 				cls_name = bboxes[best_bbox]['class']
 856 | 				cxg = (gta[best_bbox, 0] + gta[best_bbox, 1]) / 2.0
 857 | 				cyg = (gta[best_bbox, 2] + gta[best_bbox, 3]) / 2.0
 858 | 
 859 | 				cx = x1 + w / 2.0
 860 | 				cy = y1 + h / 2.0
 861 | 
 862 | 				tx = (cxg - cx) / float(w)
 863 | 				ty = (cyg - cy) / float(h)
 864 | 				tw = np.log((gta[best_bbox, 1] - gta[best_bbox, 0]) / float(w))
 865 | 				th = np.log((gta[best_bbox, 3] - gta[best_bbox, 2]) / float(h))
 866 | 			else:
 867 | 				print('roi = {}'.format(best_iou))
 868 | 				raise RuntimeError
 869 | 
 870 | 		class_num = class_mapping[cls_name]
 871 | 		class_label = len(class_mapping) * [0]
 872 | 		class_label[class_num] = 1
 873 | 		y_class_num.append(copy.deepcopy(class_label))
 874 | 		coords = [0] * 4 * (len(class_mapping) - 1)
 875 | 		labels = [0] * 4 * (len(class_mapping) - 1)
 876 | 		if cls_name != 'bg':
 877 | 			label_pos = 4 * class_num
 878 | 			sx, sy, sw, sh = C.classifier_regr_std
 879 | 			coords[label_pos:4 + label_pos] = [sx * tx, sy * ty, sw * tw, sh * th]
 880 | 			labels[label_pos:4 + label_pos] = [1, 1, 1, 1]
 881 | 			y_class_regr_coords.append(copy.deepcopy(coords))
 882 | 			y_class_regr_label.append(copy.deepcopy(labels))
 883 | 		else:
 884 | 			y_class_regr_coords.append(copy.deepcopy(coords))
 885 | 			y_class_regr_label.append(copy.deepcopy(labels))
 886 | 
 887 | 	if len(x_roi) == 0:
 888 | 		return None, None, None, None
 889 | 
 890 | 	# bboxes that iou > C.classifier_min_overlap for all gt bboxes in 300 non_max_suppression bboxes
 891 | 	X = np.array(x_roi)
 892 | 	# one hot code for bboxes from above => x_roi (X)
 893 | 	Y1 = np.array(y_class_num)
 894 | 	# corresponding labels and corresponding gt bboxes
 895 | 	Y2 = np.concatenate([np.array(y_class_regr_label), np.array(y_class_regr_coords)], axis=1)
 896 | 
 897 | 	return np.expand_dims(X, axis=0), np.expand_dims(Y1, axis=0), np.expand_dims(Y2, axis=0), IoUs
 898 | 
 899 | 
 900 | def rpn_to_roi(rpn_layer, regr_layer, C, dim_ordering, use_regr=True, max_boxes=300, overlap_thresh=0.9):
 901 | 	"""Convert rpn layer to roi bboxes
 902 | 
 903 | 	Args: (num_anchors = 9)
 904 | 		rpn_layer: output layer for rpn classification
 905 | 			shape (1, feature_map.height, feature_map.width, num_anchors)
 906 | 			Might be (1, 18, 25, 18) if resized image is 400 width and 300
 907 | 		regr_layer: output layer for rpn regression
 908 | 			shape (1, feature_map.height, feature_map.width, num_anchors)
 909 | 			Might be (1, 18, 25, 72) if resized image is 400 width and 300
 910 | 		C: config
 911 | 		use_regr: Wether to use bboxes regression in rpn
 912 | 		max_boxes: max bboxes number for non-max-suppression (NMS)
 913 | 		overlap_thresh: If iou in NMS is larger than this threshold, drop the box
 914 | 
 915 | 	Returns:
 916 | 		result: boxes from non-max-suppression (shape=(300, 4))
 917 | 			boxes: coordinates for bboxes (on the feature map)
 918 | 	"""
 919 | 	regr_layer = regr_layer / C.std_scaling
 920 | 
 921 | 	anchor_sizes = C.anchor_box_scales  # (3 in here)
 922 | 	anchor_ratios = C.anchor_box_ratios  # (3 in here)
 923 | 
 924 | 	assert rpn_layer.shape[0] == 1
 925 | 
 926 | 	(rows, cols) = rpn_layer.shape[1:3]
 927 | 
 928 | 	curr_layer = 0
 929 | 
 930 | 	# A.shape = (4, feature_map.height, feature_map.width, num_anchors)
 931 | 	# Might be (4, 18, 25, 18) if resized image is 400 width and 300
 932 | 	# A is the coordinates for 9 anchors for every point in the feature map
 933 | 	# => all 18x25x9=4050 anchors cooridnates
 934 | 	A = np.zeros((4, rpn_layer.shape[1], rpn_layer.shape[2], rpn_layer.shape[3]))
 935 | 
 936 | 	for anchor_size in anchor_sizes:
 937 | 		for anchor_ratio in anchor_ratios:
 938 | 			# anchor_x = (128 * 1) / 16 = 8  => width of current anchor
 939 | 			# anchor_y = (128 * 2) / 16 = 16 => height of current anchor
 940 | 			anchor_x = (anchor_size * anchor_ratio[0]) / C.rpn_stride
 941 | 			anchor_y = (anchor_size * anchor_ratio[1]) / C.rpn_stride
 942 | 
 943 | 			# curr_layer: 0~8 (9 anchors)
 944 | 			# the Kth anchor of all position in the feature map (9th in total)
 945 | 			regr = regr_layer[0, :, :, 4 * curr_layer:4 * curr_layer + 4]  # shape => (18, 25, 4)
 946 | 			regr = np.transpose(regr, (2, 0, 1))  # shape => (4, 18, 25)
 947 | 
 948 | 			# Create 18x25 mesh grid
 949 | 			# For every point in x, there are all the y points and vice versa
 950 | 			# X.shape = (18, 25)
 951 | 			# Y.shape = (18, 25)
 952 | 			X, Y = np.meshgrid(np.arange(cols), np.arange(rows))
 953 | 
 954 | 			# Calculate anchor position and size for each feature map point
 955 | 			A[0, :, :, curr_layer] = X - anchor_x / 2  # Top left x coordinate
 956 | 			A[1, :, :, curr_layer] = Y - anchor_y / 2  # Top left y coordinate
 957 | 			A[2, :, :, curr_layer] = anchor_x  # width of current anchor
 958 | 			A[3, :, :, curr_layer] = anchor_y  # height of current anchor
 959 | 
 960 | 			# Apply regression to x, y, w and h if there is rpn regression layer
 961 | 			if use_regr:
 962 | 				A[:, :, :, curr_layer] = apply_regr_np(A[:, :, :, curr_layer], regr)
 963 | 
 964 | 			# Avoid width and height exceeding 1
 965 | 			A[2, :, :, curr_layer] = np.maximum(1, A[2, :, :, curr_layer])
 966 | 			A[3, :, :, curr_layer] = np.maximum(1, A[3, :, :, curr_layer])
 967 | 
 968 | 			# Convert (x, y , w, h) to (x1, y1, x2, y2)
 969 | 			# x1, y1 is top left coordinate
 970 | 			# x2, y2 is bottom right coordinate
 971 | 			A[2, :, :, curr_layer] += A[0, :, :, curr_layer]
 972 | 			A[3, :, :, curr_layer] += A[1, :, :, curr_layer]
 973 | 
 974 | 			# Avoid bboxes drawn outside the feature map
 975 | 			A[0, :, :, curr_layer] = np.maximum(0, A[0, :, :, curr_layer])
 976 | 			A[1, :, :, curr_layer] = np.maximum(0, A[1, :, :, curr_layer])
 977 | 			A[2, :, :, curr_layer] = np.minimum(cols - 1, A[2, :, :, curr_layer])
 978 | 			A[3, :, :, curr_layer] = np.minimum(rows - 1, A[3, :, :, curr_layer])
 979 | 
 980 | 			curr_layer += 1
 981 | 
 982 | 	all_boxes = np.reshape(A.transpose((0, 3, 1, 2)), (4, -1)).transpose((1, 0))  # shape=(4050, 4)
 983 | 	all_probs = rpn_layer.transpose((0, 3, 1, 2)).reshape((-1))  # shape=(4050,)
 984 | 
 985 | 	x1 = all_boxes[:, 0]
 986 | 	y1 = all_boxes[:, 1]
 987 | 	x2 = all_boxes[:, 2]
 988 | 	y2 = all_boxes[:, 3]
 989 | 
 990 | 	# Find out the bboxes which is illegal and delete them from bboxes list
 991 | 	idxs = np.where((x1 - x2 >= 0) | (y1 - y2 >= 0))
 992 | 
 993 | 	all_boxes = np.delete(all_boxes, idxs, 0)
 994 | 	all_probs = np.delete(all_probs, idxs, 0)
 995 | 
 996 | 	# Apply non_max_suppression
 997 | 	# Only extract the bboxes. Don't need rpn probs in the later process
 998 | 	result = non_max_suppression_fast(all_boxes, all_probs, overlap_thresh=overlap_thresh, max_boxes=max_boxes)[0]
 999 | 
1000 | 	return result
1001 | 
1002 | 
1003 | # Start training
1004 | 
1005 | base_path = 'drive/My Drive/AI/Faster_RCNN'
1006 | 
1007 | train_path =  'drive/My Drive/AI/Dataset/Open Images Dataset v4 (Bounding Boxes)/person_car_phone_train_annotation.txt' # Training data (annotation file)
1008 | 
1009 | num_rois = 4 # Number of RoIs to process at once.
1010 | 
1011 | # Augmentation flag
1012 | horizontal_flips = True # Augment with horizontal flips in training.
1013 | vertical_flips = True   # Augment with vertical flips in training.
1014 | rot_90 = True           # Augment with 90 degree rotations in training.
1015 | 
1016 | output_weight_path = os.path.join(base_path, 'model/model_frcnn_vgg.hdf5')
1017 | 
1018 | record_path = os.path.join(base_path, 'model/record.csv') # Record data (used to save the losses, classification accuracy and mean average precision)
1019 | 
1020 | base_weight_path = os.path.join(base_path, 'model/vgg16_weights_tf_dim_ordering_tf_kernels.h5')
1021 | 
1022 | config_output_filename = os.path.join(base_path, 'model_vgg_config.pickle')
1023 | 
1024 | 
1025 | # Create the config
1026 | C = Config()
1027 | 
1028 | C.use_horizontal_flips = horizontal_flips
1029 | C.use_vertical_flips = vertical_flips
1030 | C.rot_90 = rot_90
1031 | 
1032 | C.record_path = record_path
1033 | C.model_path = output_weight_path
1034 | C.num_rois = num_rois
1035 | 
1036 | C.base_net_weights = base_weight_path
1037 | 
1038 | 
1039 | #--------------------------------------------------------#
1040 | # This step will spend some time to load the data        #
1041 | #--------------------------------------------------------#
1042 | st = time.time()
1043 | train_imgs, classes_count, class_mapping = get_data(train_path)
1044 | print()
1045 | print('Spend %0.2f mins to load the data' % ((time.time()-st)/60) )
1046 | 
1047 | 
1048 | if 'bg' not in classes_count:
1049 | 	classes_count['bg'] = 0
1050 | 	class_mapping['bg'] = len(class_mapping)
1051 | # e.g.
1052 | #    classes_count: {'Car': 2383, 'Mobile phone': 1108, 'Person': 3745, 'bg': 0}
1053 | #    class_mapping: {'Person': 0, 'Car': 1, 'Mobile phone': 2, 'bg': 3}
1054 | C.class_mapping = class_mapping
1055 | 
1056 | print('Training images per class:')
1057 | pprint.pprint(classes_count)
1058 | print('Num classes (including bg) = {}'.format(len(classes_count)))
1059 | print(class_mapping)
1060 | 
1061 | # Save the configuration
1062 | with open(config_output_filename, 'wb') as config_f:
1063 | 	pickle.dump(C,config_f)
1064 | 	print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename))
1065 | 
1066 | 
1067 | # Shuffle the images with seed
1068 | random.seed(1)
1069 | random.shuffle(train_imgs)
1070 | 
1071 | print('Num train samples (images) {}'.format(len(train_imgs)))
1072 | 
1073 | 
1074 | # Get train data generator which generate X, Y, image_data
1075 | data_gen_train = get_anchor_gt(train_imgs, C, get_img_output_length, mode='train')
1076 | 
1077 | X, Y, image_data, debug_img, debug_num_pos = next(data_gen_train)
1078 | 
1079 | 
1080 | print('Original image: height=%d width=%d'%(image_data['height'], image_data['width']))
1081 | print('Resized image:  height=%d width=%d C.im_size=%d'%(X.shape[1], X.shape[2], C.im_size))
1082 | print('Feature map size: height=%d width=%d C.rpn_stride=%d'%(Y[0].shape[1], Y[0].shape[2], C.rpn_stride))
1083 | print(X.shape)
1084 | print(str(len(Y))+" includes 'y_rpn_cls' and 'y_rpn_regr'")
1085 | print('Shape of y_rpn_cls {}'.format(Y[0].shape))
1086 | print('Shape of y_rpn_regr {}'.format(Y[1].shape))
1087 | print(image_data)
1088 | 
1089 | print('Number of positive anchors for this image: %d' % (debug_num_pos))
1090 | if debug_num_pos==0:
1091 |     gt_x1, gt_x2 = image_data['bboxes'][0]['x1']*(X.shape[2]/image_data['height']), image_data['bboxes'][0]['x2']*(X.shape[2]/image_data['height'])
1092 |     gt_y1, gt_y2 = image_data['bboxes'][0]['y1']*(X.shape[1]/image_data['width']), image_data['bboxes'][0]['y2']*(X.shape[1]/image_data['width'])
1093 |     gt_x1, gt_y1, gt_x2, gt_y2 = int(gt_x1), int(gt_y1), int(gt_x2), int(gt_y2)
1094 | 
1095 |     img = debug_img.copy()
1096 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
1097 |     color = (0, 255, 0)
1098 |     cv2.putText(img, 'gt bbox', (gt_x1, gt_y1-5), cv2.FONT_HERSHEY_DUPLEX, 0.7, color, 1)
1099 |     cv2.rectangle(img, (gt_x1, gt_y1), (gt_x2, gt_y2), color, 2)
1100 |     cv2.circle(img, (int((gt_x1+gt_x2)/2), int((gt_y1+gt_y2)/2)), 3, color, -1)
1101 | 
1102 |     plt.grid()
1103 |     plt.imshow(img)
1104 |     plt.show()
1105 | else:
1106 |     cls = Y[0][0]
1107 |     pos_cls = np.where(cls==1)
1108 |     print(pos_cls)
1109 |     regr = Y[1][0]
1110 |     pos_regr = np.where(regr==1)
1111 |     print(pos_regr)
1112 |     print('y_rpn_cls for possible pos anchor: {}'.format(cls[pos_cls[0][0],pos_cls[1][0],:]))
1113 |     print('y_rpn_regr for positive anchor: {}'.format(regr[pos_regr[0][0],pos_regr[1][0],:]))
1114 | 
1115 |     gt_x1, gt_x2 = image_data['bboxes'][0]['x1']*(X.shape[2]/image_data['width']), image_data['bboxes'][0]['x2']*(X.shape[2]/image_data['width'])
1116 |     gt_y1, gt_y2 = image_data['bboxes'][0]['y1']*(X.shape[1]/image_data['height']), image_data['bboxes'][0]['y2']*(X.shape[1]/image_data['height'])
1117 |     gt_x1, gt_y1, gt_x2, gt_y2 = int(gt_x1), int(gt_y1), int(gt_x2), int(gt_y2)
1118 | 
1119 |     img = debug_img.copy()
1120 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
1121 |     color = (0, 255, 0)
1122 |     #   cv2.putText(img, 'gt bbox', (gt_x1, gt_y1-5), cv2.FONT_HERSHEY_DUPLEX, 0.7, color, 1)
1123 |     cv2.rectangle(img, (gt_x1, gt_y1), (gt_x2, gt_y2), color, 2)
1124 |     cv2.circle(img, (int((gt_x1+gt_x2)/2), int((gt_y1+gt_y2)/2)), 3, color, -1)
1125 | 
1126 |     # Add text
1127 |     textLabel = 'gt bbox'
1128 |     (retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,0.5,1)
1129 |     textOrg = (gt_x1, gt_y1+5)
1130 |     cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
1131 |     cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
1132 |     cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0), 1)
1133 | 
1134 |     # Draw positive anchors according to the y_rpn_regr
1135 |     for i in range(debug_num_pos):
1136 | 
1137 |         color = (100+i*(155/4), 0, 100+i*(155/4))
1138 | 
1139 |         idx = pos_regr[2][i*4]/4
1140 |         anchor_size = C.anchor_box_scales[int(idx/3)]
1141 |         anchor_ratio = C.anchor_box_ratios[2-int((idx+1)%3)]
1142 | 
1143 |         center = (pos_regr[1][i*4]*C.rpn_stride, pos_regr[0][i*4]*C.rpn_stride)
1144 |         print('Center position of positive anchor: ', center)
1145 |         cv2.circle(img, center, 3, color, -1)
1146 |         anc_w, anc_h = anchor_size*anchor_ratio[0], anchor_size*anchor_ratio[1]
1147 |         cv2.rectangle(img, (center[0]-int(anc_w/2), center[1]-int(anc_h/2)), (center[0]+int(anc_w/2), center[1]+int(anc_h/2)), color, 2)
1148 | #         cv2.putText(img, 'pos anchor bbox '+str(i+1), (center[0]-int(anc_w/2), center[1]-int(anc_h/2)-5), cv2.FONT_HERSHEY_DUPLEX, 0.5, color, 1)
1149 | 
1150 | print('Green bboxes is ground-truth bbox. Others are positive anchors')
1151 | plt.figure(figsize=(8,8))
1152 | plt.grid()
1153 | plt.imshow(img)
1154 | plt.show()
1155 | 
1156 | 
1157 | ####### Build the model
1158 | input_shape_img = (None, None, 3)
1159 | 
1160 | img_input = Input(shape=input_shape_img)
1161 | roi_input = Input(shape=(None, 4))
1162 | 
1163 | # define the base network (VGG here, can be Resnet50, Inception, etc)
1164 | shared_layers = nn_base(img_input, trainable=True)
1165 | 
1166 | # define the RPN, built on the base layers
1167 | num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)  # 9
1168 | rpn = rpn_layer(shared_layers, num_anchors)
1169 | 
1170 | classifier = classifier_layer(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count))
1171 | 
1172 | model_rpn = Model(img_input, rpn[:2])
1173 | model_classifier = Model([img_input, roi_input], classifier)
1174 | 
1175 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
1176 | model_all = Model([img_input, roi_input], rpn[:2] + classifier)
1177 | 
1178 | # Because the google colab can only run the session several hours one time (then you need to connect again),
1179 | # we need to save the model and load the model to continue training
1180 | if not os.path.isfile(C.model_path):
1181 | 	# If this is the begin of the training, load the pre-traind base network such as vgg-16
1182 | 	try:
1183 | 		print('This is the first time of your training')
1184 | 		print('loading weights from {}'.format(C.base_net_weights))
1185 | 		model_rpn.load_weights(C.base_net_weights, by_name=True)
1186 | 		model_classifier.load_weights(C.base_net_weights, by_name=True)
1187 | 	except:
1188 | 		print('Could not load pretrained model weights. Weights can be found in the keras application folder \
1189 |             https://github.com/fchollet/keras/tree/master/keras/applications')
1190 | 
1191 | 	# Create the record.csv file to record losses, acc and mAP
1192 | 	record_df = pd.DataFrame(
1193 | 		columns=['mean_overlapping_bboxes', 'class_acc', 'loss_rpn_cls', 'loss_rpn_regr', 'loss_class_cls',
1194 | 		         'loss_class_regr', 'curr_loss', 'elapsed_time', 'mAP'])
1195 | else:
1196 | 	# If this is a continued training, load the trained model from before
1197 | 	print('Continue training based on previous trained model')
1198 | 	print('Loading weights from {}'.format(C.model_path))
1199 | 	model_rpn.load_weights(C.model_path, by_name=True)
1200 | 	model_classifier.load_weights(C.model_path, by_name=True)
1201 | 
1202 | 	# Load the records
1203 | 	record_df = pd.read_csv(record_path)
1204 | 
1205 | 	r_mean_overlapping_bboxes = record_df['mean_overlapping_bboxes']
1206 | 	r_class_acc = record_df['class_acc']
1207 | 	r_loss_rpn_cls = record_df['loss_rpn_cls']
1208 | 	r_loss_rpn_regr = record_df['loss_rpn_regr']
1209 | 	r_loss_class_cls = record_df['loss_class_cls']
1210 | 	r_loss_class_regr = record_df['loss_class_regr']
1211 | 	r_curr_loss = record_df['curr_loss']
1212 | 	r_elapsed_time = record_df['elapsed_time']
1213 | 	r_mAP = record_df['mAP']
1214 | 
1215 | 	print('Already train %dK batches' % (len(record_df)))
1216 | 
1217 | 
1218 | 
1219 | optimizer = Adam(lr=1e-5)
1220 | optimizer_classifier = Adam(lr=1e-5)
1221 | model_rpn.compile(optimizer=optimizer, loss=[rpn_loss_cls(num_anchors), rpn_loss_regr(num_anchors)])
1222 | model_classifier.compile(optimizer=optimizer_classifier, loss=[class_loss_cls, class_loss_regr(len(classes_count)-1)], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
1223 | model_all.compile(optimizer='sgd', loss='mae')
1224 | 
1225 | 
1226 | # Training setting
1227 | total_epochs = len(record_df)
1228 | r_epochs = len(record_df)
1229 | 
1230 | epoch_length = 1000
1231 | num_epochs = 40
1232 | iter_num = 0
1233 | 
1234 | total_epochs += num_epochs
1235 | 
1236 | losses = np.zeros((epoch_length, 5))
1237 | rpn_accuracy_rpn_monitor = []
1238 | rpn_accuracy_for_epoch = []
1239 | 
1240 | if len(record_df)==0:
1241 |     best_loss = np.Inf
1242 | else:
1243 |     best_loss = np.min(r_curr_loss)
1244 | 
1245 | 
1246 | print(len(record_df))
1247 | 
1248 | start_time = time.time()
1249 | for epoch_num in range(num_epochs):
1250 | 
1251 | 	progbar = generic_utils.Progbar(epoch_length)
1252 | 	print('Epoch {}/{}'.format(r_epochs + 1, total_epochs))
1253 | 
1254 | 	r_epochs += 1
1255 | 
1256 | 	while True:
1257 | 		try:
1258 | 
1259 | 			if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
1260 | 				mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor)
1261 | 				rpn_accuracy_rpn_monitor = []
1262 | 				#                 print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(mean_overlapping_bboxes, epoch_length))
1263 | 				if mean_overlapping_bboxes == 0:
1264 | 					print(
1265 | 						'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.')
1266 | 
1267 | 			# Generate X (x_img) and label Y ([y_rpn_cls, y_rpn_regr])
1268 | 			X, Y, img_data, debug_img, debug_num_pos = next(data_gen_train)
1269 | 
1270 | 			# Train rpn model and get loss value [_, loss_rpn_cls, loss_rpn_regr]
1271 | 			loss_rpn = model_rpn.train_on_batch(X, Y)
1272 | 
1273 | 			# Get predicted rpn from rpn model [rpn_cls, rpn_regr]
1274 | 			P_rpn = model_rpn.predict_on_batch(X)
1275 | 
1276 | 			# R: bboxes (shape=(300,4))
1277 | 			# Convert rpn layer to roi bboxes
1278 | 			R = rpn_to_roi(P_rpn[0], P_rpn[1], C, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7,
1279 | 			               max_boxes=300)
1280 | 
1281 | 			# note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
1282 | 			# X2: bboxes that iou > C.classifier_min_overlap for all gt bboxes in 300 non_max_suppression bboxes
1283 | 			# Y1: one hot code for bboxes from above => x_roi (X)
1284 | 			# Y2: corresponding labels and corresponding gt bboxes
1285 | 			X2, Y1, Y2, IouS = calc_iou(R, img_data, C, class_mapping)
1286 | 
1287 | 			# If X2 is None means there are no matching bboxes
1288 | 			if X2 is None:
1289 | 				rpn_accuracy_rpn_monitor.append(0)
1290 | 				rpn_accuracy_for_epoch.append(0)
1291 | 				continue
1292 | 
1293 | 			# Find out the positive anchors and negative anchors
1294 | 			neg_samples = np.where(Y1[0, :, -1] == 1)
1295 | 			pos_samples = np.where(Y1[0, :, -1] == 0)
1296 | 
1297 | 			if len(neg_samples) > 0:
1298 | 				neg_samples = neg_samples[0]
1299 | 			else:
1300 | 				neg_samples = []
1301 | 
1302 | 			if len(pos_samples) > 0:
1303 | 				pos_samples = pos_samples[0]
1304 | 			else:
1305 | 				pos_samples = []
1306 | 
1307 | 			rpn_accuracy_rpn_monitor.append(len(pos_samples))
1308 | 			rpn_accuracy_for_epoch.append((len(pos_samples)))
1309 | 
1310 | 			if C.num_rois > 1:
1311 | 				# If number of positive anchors is larger than 4//2 = 2, randomly choose 2 pos samples
1312 | 				if len(pos_samples) < C.num_rois // 2:
1313 | 					selected_pos_samples = pos_samples.tolist()
1314 | 				else:
1315 | 					selected_pos_samples = np.random.choice(pos_samples, C.num_rois // 2, replace=False).tolist()
1316 | 
1317 | 				# Randomly choose (num_rois - num_pos) neg samples
1318 | 				try:
1319 | 					selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples),
1320 | 					                                        replace=False).tolist()
1321 | 				except:
1322 | 					selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples),
1323 | 					                                        replace=True).tolist()
1324 | 
1325 | 				# Save all the pos and neg samples in sel_samples
1326 | 				sel_samples = selected_pos_samples + selected_neg_samples
1327 | 			else:
1328 | 				# in the extreme case where num_rois = 1, we pick a random pos or neg sample
1329 | 				selected_pos_samples = pos_samples.tolist()
1330 | 				selected_neg_samples = neg_samples.tolist()
1331 | 				if np.random.randint(0, 2):
1332 | 					sel_samples = random.choice(neg_samples)
1333 | 				else:
1334 | 					sel_samples = random.choice(pos_samples)
1335 | 
1336 | 			# training_data: [X, X2[:, sel_samples, :]]
1337 | 			# labels: [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]
1338 | 			#  X                     => img_data resized image
1339 | 			#  X2[:, sel_samples, :] => num_rois (4 in here) bboxes which contains selected neg and pos
1340 | 			#  Y1[:, sel_samples, :] => one hot encode for num_rois bboxes which contains selected neg and pos
1341 | 			#  Y2[:, sel_samples, :] => labels and gt bboxes for num_rois bboxes which contains selected neg and pos
1342 | 			loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]],
1343 | 			                                             [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])
1344 | 
1345 | 			losses[iter_num, 0] = loss_rpn[1]
1346 | 			losses[iter_num, 1] = loss_rpn[2]
1347 | 
1348 | 			losses[iter_num, 2] = loss_class[1]
1349 | 			losses[iter_num, 3] = loss_class[2]
1350 | 			losses[iter_num, 4] = loss_class[3]
1351 | 
1352 | 			iter_num += 1
1353 | 
1354 | 			progbar.update(iter_num,
1355 | 			               [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
1356 | 			                ('final_cls', np.mean(losses[:iter_num, 2])),
1357 | 			                ('final_regr', np.mean(losses[:iter_num, 3]))])
1358 | 
1359 | 			if iter_num == epoch_length:
1360 | 				loss_rpn_cls = np.mean(losses[:, 0])
1361 | 				loss_rpn_regr = np.mean(losses[:, 1])
1362 | 				loss_class_cls = np.mean(losses[:, 2])
1363 | 				loss_class_regr = np.mean(losses[:, 3])
1364 | 				class_acc = np.mean(losses[:, 4])
1365 | 
1366 | 				mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
1367 | 				rpn_accuracy_for_epoch = []
1368 | 
1369 | 				if C.verbose:
1370 | 					print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(
1371 | 						mean_overlapping_bboxes))
1372 | 					print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
1373 | 					print('Loss RPN classifier: {}'.format(loss_rpn_cls))
1374 | 					print('Loss RPN regression: {}'.format(loss_rpn_regr))
1375 | 					print('Loss Detector classifier: {}'.format(loss_class_cls))
1376 | 					print('Loss Detector regression: {}'.format(loss_class_regr))
1377 | 					print('Total loss: {}'.format(loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr))
1378 | 					print('Elapsed time: {}'.format(time.time() - start_time))
1379 | 					elapsed_time = (time.time() - start_time) / 60
1380 | 
1381 | 				curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
1382 | 				iter_num = 0
1383 | 				start_time = time.time()
1384 | 
1385 | 				if curr_loss < best_loss:
1386 | 					if C.verbose:
1387 | 						print('Total loss decreased from {} to {}, saving weights'.format(best_loss, curr_loss))
1388 | 					best_loss = curr_loss
1389 | 					model_all.save_weights(C.model_path)
1390 | 
1391 | 				new_row = {'mean_overlapping_bboxes': round(mean_overlapping_bboxes, 3),
1392 | 				           'class_acc': round(class_acc, 3),
1393 | 				           'loss_rpn_cls': round(loss_rpn_cls, 3),
1394 | 				           'loss_rpn_regr': round(loss_rpn_regr, 3),
1395 | 				           'loss_class_cls': round(loss_class_cls, 3),
1396 | 				           'loss_class_regr': round(loss_class_regr, 3),
1397 | 				           'curr_loss': round(curr_loss, 3),
1398 | 				           'elapsed_time': round(elapsed_time, 3),
1399 | 				           'mAP': 0}
1400 | 
1401 | 				record_df = record_df.append(new_row, ignore_index=True)
1402 | 				record_df.to_csv(record_path, index=0)
1403 | 
1404 | 				break
1405 | 
1406 | 		except Exception as e:
1407 | 			print('Exception: {}'.format(e))
1408 | 			continue
1409 | 
1410 | print('Training complete, exiting.')
1411 | 
1412 | 
1413 | 
1414 | plt.figure(figsize=(15,5))
1415 | plt.subplot(1,2,1)
1416 | plt.plot(np.arange(0, r_epochs), record_df['mean_overlapping_bboxes'], 'r')
1417 | plt.title('mean_overlapping_bboxes')
1418 | plt.subplot(1,2,2)
1419 | plt.plot(np.arange(0, r_epochs), record_df['class_acc'], 'r')
1420 | plt.title('class_acc')
1421 | 
1422 | plt.show()
1423 | 
1424 | plt.figure(figsize=(15,5))
1425 | plt.subplot(1,2,1)
1426 | plt.plot(np.arange(0, r_epochs), record_df['loss_rpn_cls'], 'r')
1427 | plt.title('loss_rpn_cls')
1428 | plt.subplot(1,2,2)
1429 | plt.plot(np.arange(0, r_epochs), record_df['loss_rpn_regr'], 'r')
1430 | plt.title('loss_rpn_regr')
1431 | plt.show()
1432 | 
1433 | 
1434 | plt.figure(figsize=(15,5))
1435 | plt.subplot(1,2,1)
1436 | plt.plot(np.arange(0, r_epochs), record_df['loss_class_cls'], 'r')
1437 | plt.title('loss_class_cls')
1438 | plt.subplot(1,2,2)
1439 | plt.plot(np.arange(0, r_epochs), record_df['loss_class_regr'], 'r')
1440 | plt.title('loss_class_regr')
1441 | plt.show()
1442 | 
1443 | plt.plot(np.arange(0, r_epochs), record_df['curr_loss'], 'r')
1444 | plt.title('total_loss')
1445 | plt.show()
1446 | 
1447 | 
1448 | 


--------------------------------------------------------------------------------